Esempio n. 1
0
def test_summarize_labels():
    figure = Figure()
    figure.add(Track(1, 200, label="Foo"))
    figure.add(Track(1, 200, label="Bar", color='none'))
    expected_summary = """\
Foo[1-200]
Bar(1-200)
"""

    summary = summarize_figure(figure)

    assert summary == expected_summary
Esempio n. 2
0
def test_summarize_label_objects():
    figure = Figure()
    figure.add(Track(0, 0, label=Label(25, "Foo:")))
    figure.add(Track(0, 0, label="Bar:"))
    expected_summary = """\
Foo:
Bar:
"""

    summary = summarize_figure(figure)

    assert summary == expected_summary
Esempio n. 3
0
 def test_add_track(self):
     figure = Figure()
     figure.add_track(
         Track(50,
               300,
               direction='f',
               label="Another\
         sequence",
               regions=[(50, 100, 'lightblue')]))
     figure.add_track(Track(110, 410, direction='r', label="Sequence 1",\
         regions=[(150, 200, 'salmon')]))
     figure.show()
Esempio n. 4
0
 def test_multitrack(self):
     figure = Figure()
     for i in range(0, 10):
         figure.add_track(
             Multitrack([
                 Track(
                     i, i + 10, direction='f', label='Track {}F'.format(i)),
                 Track(i + 20,
                       i + 30,
                       direction='r',
                       label='Track {}R'.format(i))
             ],
                        join=True))
     figure.show()
Esempio n. 5
0
def test_summarize_multitracks():
    figure = Figure()
    figure.add(Track(0, 0, label="Foo:"))
    figure.add(
        Multitrack([Track(10, 20, label="Bar"),
                    Track(30, 40, label="Baz")]))
    expected_summary = """\
Foo:
Bar[10-20], Baz[30-40]
"""

    summary = summarize_figure(figure)

    assert summary == expected_summary
Esempio n. 6
0
def add_partial_banner(f, position_offset, max_position):
    """ Build a dashed line with dashes 500 wide. """
    dash_width = 500
    banner_width = max_position - position_offset
    subtracks = [
        Track(i * dash_width + position_offset + 1,
              min((i + 1) * dash_width + position_offset, max_position))
        for i in range((banner_width + dash_width) // dash_width) if not i % 2
    ]
    subtracks.append(
        Track(position_offset + 1,
              max_position,
              label='Partial Blast Results',
              color='none'))
    f.add(Multitrack(subtracks))
Esempio n. 7
0
def test_summarize_zero_coverage():
    figure = Figure()
    figure.add(Coverage(10, 20, [0, 0, 0]), gap=-4)
    figure.add(Track(10, 20, label="Bar"))

    with pytest.raises(ZeroDivisionError):
        summarize_figure(figure)
Esempio n. 8
0
def test_arrow_group(svg_differ):
    expected_figure = Figure()
    expected_figure.add(Track(1, 500, label='Header'))
    h = 30
    expected_figure.add(Arrow(1, 200, label='X', h=h), gap=-h)
    expected_figure.add(Arrow(300, 500, label='Y', h=h))
    expected_svg = expected_figure.show()

    f = Figure()
    f.add(Track(1, 500, label='Header'))
    f.add(
        ArrowGroup(
            [Arrow(1, 200, label='X', h=h),
             Arrow(300, 500, label='Y', h=h)]))
    svg = f.show()

    svg_differ.assert_equal(svg, expected_svg, 'test_arrow_group')
Esempio n. 9
0
def test_summarize_multitracks_with_separate_label():
    figure = Figure()
    figure.add(Track(0, 0, label="Foo:"))
    figure.add(
        Multitrack([
            Track(10, 20),
            Track(30, 40),
            Track(10, 40, label="Bar", color='none')
        ]))
    expected_summary = """\
Foo:
[10-20], [30-40], Bar(10-40)
"""

    summary = summarize_figure(figure)

    assert summary == expected_summary
Esempio n. 10
0
def test_draw_coverage(svg_differ):
    expected_figure = Figure()
    expected_figure.add(Track(0, 1, color='', h=-4))  # Just a spacer.
    expected_figure.add(Track(100, 200, label='Bar'))
    expected_svg = expected_figure.show()
    expected_svg.insert(0, draw.Rectangle(100, 20, 25, 5, fill='blue'))
    expected_svg.insert(1, draw.Rectangle(125, 20, 25, 10, fill='blue'))
    expected_svg.insert(2, draw.Rectangle(175, 20, 25, 1, fill='blue'))

    figure = Figure()
    coverage_depths = 25 * [5] + 25 * [10] + 25 * [0] + 25 * [1]
    figure.add(SmoothCoverage(100, 200, coverage_depths), gap=-4)
    figure.add(Track(100, 200, label="Bar"))

    svg = figure.show()

    svg_differ.assert_equal(svg, expected_svg, 'test_draw_coverage')
Esempio n. 11
0
def test_arrow_group_reverse_overlap(svg_differ):
    expected_figure = Figure()
    expected_figure.add(Track(1, 500, label='Header'))
    h = 20
    expected_figure.add(Arrow(1, 300, label='X', h=h), gap=3)
    expected_figure.add(Arrow(400, 250, label='Y', h=h))
    expected_svg = expected_figure.show()

    f = Figure()
    f.add(Track(1, 500, label='Header'))
    f.add(
        ArrowGroup(
            [Arrow(1, 300, label='X', h=h),
             Arrow(400, 250, label='Y', h=h)]))
    svg = f.show()

    svg_differ.assert_equal(svg, expected_svg, 'test_arrow_group')
Esempio n. 12
0
def test_arrow_group_small_neighbour(svg_differ):
    expected_figure = Figure()
    expected_figure.add(Track(1, 500, label='Header'))
    h = 20
    expected_figure.add(Arrow(301, 315, elevation=-1, label='1.2', h=h),
                        gap=-h)
    expected_figure.add(Arrow(1, 300, elevation=-1, label='1.1', h=h))
    expected_svg = expected_figure.show()

    f = Figure()
    f.add(Track(1, 500, label='Header'))
    f.add(
        ArrowGroup([
            Arrow(1, 300, elevation=-1, label='1.1', h=h),
            Arrow(301, 315, elevation=-1, label='1.2', h=h)
        ]))
    svg = f.show()

    svg_differ.assert_equal(svg, expected_svg, 'test_arrow_group')
Esempio n. 13
0
def test_summarize_smooth_coverage_ten_percent():
    figure = Figure()
    figure.add(SmoothCoverage(10, 20, [100, 110, 111, 50]), gap=-4)
    figure.add(Track(12, 22, label="Bar"))
    expected_summary = """\
Coverage 100x2, 111, 50
Bar[12-22]
"""

    summary = summarize_figure(figure)

    assert summary == expected_summary
Esempio n. 14
0
def test_summarize_smooth_coverage():
    figure = Figure()
    figure.add(SmoothCoverage(10, 20, [11, 11, 21, 1, 1, 1]), gap=-4)
    figure.add(Track(12, 22, label="Bar"))
    expected_summary = """\
Coverage 11x2, 21, 1x3
Bar[12-22]
"""

    summary = summarize_figure(figure)

    assert summary == expected_summary
Esempio n. 15
0
def test_summarize_regions():
    figure = Figure()
    figure.add(
        Track(1,
              200,
              label="Foo",
              regions=[(50, 100, 'lightgreen'), (110, 120, 'red')]))
    expected_summary = """\
Foo[1-200], lightgreen{50-100}, red{110-120}
"""

    summary = summarize_figure(figure)

    assert summary == expected_summary
Esempio n. 16
0
def start_drawing(width, height):
    expected_svg = Drawing(width, height, origin=(0, 0))
    expected_svg.append(
        Rectangle(0,
                  height - 15,
                  200,
                  10,
                  stroke='lightgrey',
                  fill='lightgrey'))
    expected_svg.append(
        Text('Header',
             10,
             width / 2,
             height - 15,
             font_family='monospace',
             text_anchor='middle'))
    f = Figure()
    f.add(Track(0, width, label='Header'))
    return f, expected_svg
Esempio n. 17
0
def build_contig(reader, f, contig_name, max_position, position_offset,
                 blast_rows):
    contig_matcher = ContigMatcher(contig_name)
    blast_ranges = []  # [[start, end, blast_num]]
    blast_starts = defaultdict(set)  # {start: {blast_num}}
    blast_ends = defaultdict(set)  # {end: {blast_num}}
    if not contig_name.startswith('contig-'):
        for blast_row in blast_rows:
            if not contig_matcher.is_match(blast_row):
                continue
            blast_num = len(blast_ranges) + 1
            blast_ranges.append([None, None, blast_num])
            blast_starts[blast_row['start']].add(blast_num)
            blast_ends[blast_row['end']].add(blast_num)
    event_positions = set(blast_starts)
    event_positions.update(blast_ends)
    event_positions = sorted(event_positions, reverse=True)

    insertion_size = 0
    insertion_ranges = []  # [(start, end)]
    unmatched_ranges = []  # [[start, end]]
    for contig_name2, contig_rows in groupby(reader, itemgetter('contig')):
        if contig_name2 != contig_name:
            continue
        contig_rows = list(contig_rows)
        coordinates_name = contig_rows[0]['coordinates']
        if coordinates_name:
            pos_field = 'refseq_nuc_pos'
        else:
            pos_field = 'query_nuc_pos'
        for contig_row in contig_rows:
            for field_name in (pos_field, 'coverage', 'dels'):
                field_text = contig_row[field_name]
                field_value = None if field_text == '' else int(field_text)
                contig_row[field_name] = field_value
        start = contig_rows[0][pos_field]
        end = contig_rows[-1][pos_field]
        coverage = [0] * (end - start + 1)
        pos = 0
        for contig_row in contig_rows:
            pos = contig_row[pos_field]
            if pos is None:
                insertion_size += 1
            else:
                if insertion_size:
                    insertion_ranges.append((pos, pos + insertion_size - 1))
                    insertion_size = 0
                if contig_row['coverage'] is not None:
                    coverage[pos - start] = (contig_row['coverage'] -
                                             contig_row['dels'])
                contig_pos = int(contig_row['query_nuc_pos'])
                while event_positions and event_positions[-1] <= contig_pos:
                    event_pos = event_positions.pop()
                    for blast_num in blast_starts[event_pos]:
                        blast_ranges[blast_num - 1][0] = pos
                    for blast_num in blast_ends[event_pos]:
                        blast_ranges[blast_num - 1][1] = pos
            link = contig_row.get('link')
            if link == 'U':
                # Position is unmatched, add to list.
                if not unmatched_ranges or unmatched_ranges[-1][-1] != pos - 1:
                    unmatched_ranges.append([pos, pos])
                else:
                    unmatched_ranges[-1][-1] = pos
        while event_positions:
            # Use up any events that went past the end of the contig.
            event_pos = event_positions.pop()
            for blast_num in blast_starts[event_pos]:
                blast_ranges[blast_num - 1][0] = pos
            for blast_num in blast_ends[event_pos]:
                blast_ranges[blast_num - 1][1] = pos

        arrows = []
        for arrow_start, arrow_end, blast_num in blast_ranges:
            arrows.append(
                Arrow(arrow_start + position_offset,
                      arrow_end + position_offset,
                      elevation=-1,
                      label=f'{contig_matcher.num}.{blast_num}'))
        if arrows:
            f.add(ArrowGroup(arrows))
        subtracks = []
        for has_coverage, group_positions in groupby(
                enumerate(coverage), lambda item: item[1] != 0):
            if has_coverage:
                group_positions = list(group_positions)
                group_start, _ = group_positions[0]
                group_end, _ = group_positions[-1]
                subtracks.append(
                    Track(start + group_start + position_offset,
                          start + group_end + position_offset))
        if not subtracks:
            group_start = prev_pos = None
            included_positions = [row[pos_field] for row in contig_rows]
            included_positions.append(None)  # Trigger final section.
            for pos in included_positions:
                if group_start is None:
                    group_start = pos
                else:
                    if pos != prev_pos + 1:
                        subtracks.append(
                            Track(group_start + position_offset,
                                  prev_pos + position_offset))
                        group_start = pos
                prev_pos = pos
        if max(coverage) <= 0:
            track_label = contig_name
        else:
            f.add(ShadedCoverage(start + position_offset,
                                 end + position_offset, coverage),
                  gap=-4)
            track_label = f"{contig_name} - depth {max(coverage)}"
        subtracks.append(
            Track(1,
                  max_position,
                  label=track_label,
                  color='none',
                  regions=[
                      (a + position_offset, b + position_offset, 'lightgreen')
                      for a, b in insertion_ranges
                  ] + [(a + position_offset, b + position_offset, 'yellow')
                       for a, b in unmatched_ranges]))
        f.add(Multitrack(subtracks))
        break
Esempio n. 18
0
def build_coverage_figure(genome_coverage_csv, blast_csv=None):
    min_position, max_position = 1, 500
    coordinate_depths = Counter()
    contig_depths = Counter()
    contig_groups = defaultdict(set)  # {coordinates_name: {contig_name}}
    reader = DictReader(genome_coverage_csv)
    for row in reader:
        query_nuc_pos = int(row['query_nuc_pos'])
        if row['refseq_nuc_pos']:
            refseq_nuc_pos = int(row['refseq_nuc_pos'])
        else:
            refseq_nuc_pos = min_position
        min_position = min(min_position, refseq_nuc_pos, query_nuc_pos)
        max_position = max(max_position, refseq_nuc_pos, query_nuc_pos)
        coordinates_name = row['coordinates']
        contig_name = row['contig']
        if row['coverage'] != '':
            row_coverage = int(row['coverage']) - int(row['dels'])
            coordinate_depths[coordinates_name] = max(
                coordinate_depths[coordinates_name], row_coverage)
            contig_depths[contig_name] = max(contig_depths[contig_name],
                                             row_coverage)
        contig_groups[coordinates_name].add(contig_name)
    if '' in coordinate_depths:
        # Force partial contigs to come last.
        coordinate_depths[''] = -1
    position_offset = -min_position + 1
    max_position += position_offset

    blast_rows = []
    if blast_csv is not None:
        for blast_row in DictReader(blast_csv):
            for field_name in ('start', 'end', 'ref_start', 'ref_end'):
                # noinspection PyTypeChecker
                blast_row[field_name] = int(blast_row[field_name])
            blast_rows.append(blast_row)
    blast_rows.sort(key=itemgetter('start', 'ref_start'))

    landmarks_path = (Path(__file__).parent.parent / "data" /
                      "landmark_references.yaml")
    landmark_groups = yaml.safe_load(landmarks_path.read_text())
    projects = ProjectConfig.loadDefault()
    f = Figure()
    for _, coordinates_name in sorted(
        (-depth, name) for name, depth in coordinate_depths.items()):
        for reference_set in landmark_groups:
            if coordinates_name != reference_set['coordinates']:
                continue
            prev_landmark = None
            for i, landmark in enumerate(
                    sorted(reference_set['landmarks'],
                           key=itemgetter('start'))):
                landmark.setdefault('frame', 0)
                if prev_landmark and 'end' not in prev_landmark:
                    prev_landmark['end'] = landmark['start'] - 1
                prev_landmark = landmark
            for frame, frame_landmarks in groupby(reference_set['landmarks'],
                                                  itemgetter('frame')):
                subtracks = []
                for landmark in frame_landmarks:
                    landmark_colour = landmark.get('colour')
                    if landmark_colour is None:
                        continue
                    subtracks.append(
                        Track(landmark['start'] + position_offset,
                              landmark['end'] + position_offset,
                              label=landmark['name'],
                              color=landmark_colour))
                    max_position = max(max_position,
                                       landmark['end'] + position_offset)
                f.add(Multitrack(subtracks))
            break
        else:
            add_partial_banner(f, position_offset, max_position)
        contig_names = contig_groups[coordinates_name]
        sorted_contig_names = sort_contig_names(contig_names, contig_depths)
        ref_arrows = []
        for contig_name in sorted_contig_names:
            if contig_name.startswith('contig-'):
                # No arrows on original contig tracks.
                continue
            contig_matcher = ContigMatcher(contig_name)
            ref_positions = None
            arrow_count = 0
            for blast_row in blast_rows:
                if not contig_matcher.is_match(blast_row):
                    continue
                if (ref_positions is None and coordinates_name != ''
                        and blast_row['ref_name'] != coordinates_name):
                    ref_positions = map_references(blast_row['ref_name'],
                                                   coordinates_name, projects)
                arrow_count += 1
                ref_start = int(blast_row['ref_start'])
                ref_end = int(blast_row['ref_end'])
                if ref_positions is None:
                    coordinate_start = ref_start
                    coordinate_end = ref_end
                else:
                    coordinate_start = ref_positions[ref_start]
                    coordinate_end = ref_positions[ref_end]
                ref_arrows.append(
                    Arrow(coordinate_start + position_offset,
                          coordinate_end + position_offset,
                          elevation=1,
                          label=f'{contig_matcher.num}.{arrow_count}'))
        if ref_arrows:
            f.add(ArrowGroup(ref_arrows))
        for contig_name in sorted_contig_names:
            genome_coverage_csv.seek(0)
            reader = DictReader(genome_coverage_csv)
            build_contig(reader, f, contig_name, max_position, position_offset,
                         blast_rows)

    if not f.elements:
        f.add(Track(1, max_position, label='No contigs found.', color='none'))
    return f