def test_summarize_labels(): figure = Figure() figure.add(Track(1, 200, label="Foo")) figure.add(Track(1, 200, label="Bar", color='none')) expected_summary = """\ Foo[1-200] Bar(1-200) """ summary = summarize_figure(figure) assert summary == expected_summary
def test_summarize_label_objects(): figure = Figure() figure.add(Track(0, 0, label=Label(25, "Foo:"))) figure.add(Track(0, 0, label="Bar:")) expected_summary = """\ Foo: Bar: """ summary = summarize_figure(figure) assert summary == expected_summary
def test_add_track(self): figure = Figure() figure.add_track( Track(50, 300, direction='f', label="Another\ sequence", regions=[(50, 100, 'lightblue')])) figure.add_track(Track(110, 410, direction='r', label="Sequence 1",\ regions=[(150, 200, 'salmon')])) figure.show()
def test_multitrack(self): figure = Figure() for i in range(0, 10): figure.add_track( Multitrack([ Track( i, i + 10, direction='f', label='Track {}F'.format(i)), Track(i + 20, i + 30, direction='r', label='Track {}R'.format(i)) ], join=True)) figure.show()
def test_summarize_multitracks(): figure = Figure() figure.add(Track(0, 0, label="Foo:")) figure.add( Multitrack([Track(10, 20, label="Bar"), Track(30, 40, label="Baz")])) expected_summary = """\ Foo: Bar[10-20], Baz[30-40] """ summary = summarize_figure(figure) assert summary == expected_summary
def add_partial_banner(f, position_offset, max_position): """ Build a dashed line with dashes 500 wide. """ dash_width = 500 banner_width = max_position - position_offset subtracks = [ Track(i * dash_width + position_offset + 1, min((i + 1) * dash_width + position_offset, max_position)) for i in range((banner_width + dash_width) // dash_width) if not i % 2 ] subtracks.append( Track(position_offset + 1, max_position, label='Partial Blast Results', color='none')) f.add(Multitrack(subtracks))
def test_summarize_zero_coverage(): figure = Figure() figure.add(Coverage(10, 20, [0, 0, 0]), gap=-4) figure.add(Track(10, 20, label="Bar")) with pytest.raises(ZeroDivisionError): summarize_figure(figure)
def test_arrow_group(svg_differ): expected_figure = Figure() expected_figure.add(Track(1, 500, label='Header')) h = 30 expected_figure.add(Arrow(1, 200, label='X', h=h), gap=-h) expected_figure.add(Arrow(300, 500, label='Y', h=h)) expected_svg = expected_figure.show() f = Figure() f.add(Track(1, 500, label='Header')) f.add( ArrowGroup( [Arrow(1, 200, label='X', h=h), Arrow(300, 500, label='Y', h=h)])) svg = f.show() svg_differ.assert_equal(svg, expected_svg, 'test_arrow_group')
def test_summarize_multitracks_with_separate_label(): figure = Figure() figure.add(Track(0, 0, label="Foo:")) figure.add( Multitrack([ Track(10, 20), Track(30, 40), Track(10, 40, label="Bar", color='none') ])) expected_summary = """\ Foo: [10-20], [30-40], Bar(10-40) """ summary = summarize_figure(figure) assert summary == expected_summary
def test_draw_coverage(svg_differ): expected_figure = Figure() expected_figure.add(Track(0, 1, color='', h=-4)) # Just a spacer. expected_figure.add(Track(100, 200, label='Bar')) expected_svg = expected_figure.show() expected_svg.insert(0, draw.Rectangle(100, 20, 25, 5, fill='blue')) expected_svg.insert(1, draw.Rectangle(125, 20, 25, 10, fill='blue')) expected_svg.insert(2, draw.Rectangle(175, 20, 25, 1, fill='blue')) figure = Figure() coverage_depths = 25 * [5] + 25 * [10] + 25 * [0] + 25 * [1] figure.add(SmoothCoverage(100, 200, coverage_depths), gap=-4) figure.add(Track(100, 200, label="Bar")) svg = figure.show() svg_differ.assert_equal(svg, expected_svg, 'test_draw_coverage')
def test_arrow_group_reverse_overlap(svg_differ): expected_figure = Figure() expected_figure.add(Track(1, 500, label='Header')) h = 20 expected_figure.add(Arrow(1, 300, label='X', h=h), gap=3) expected_figure.add(Arrow(400, 250, label='Y', h=h)) expected_svg = expected_figure.show() f = Figure() f.add(Track(1, 500, label='Header')) f.add( ArrowGroup( [Arrow(1, 300, label='X', h=h), Arrow(400, 250, label='Y', h=h)])) svg = f.show() svg_differ.assert_equal(svg, expected_svg, 'test_arrow_group')
def test_arrow_group_small_neighbour(svg_differ): expected_figure = Figure() expected_figure.add(Track(1, 500, label='Header')) h = 20 expected_figure.add(Arrow(301, 315, elevation=-1, label='1.2', h=h), gap=-h) expected_figure.add(Arrow(1, 300, elevation=-1, label='1.1', h=h)) expected_svg = expected_figure.show() f = Figure() f.add(Track(1, 500, label='Header')) f.add( ArrowGroup([ Arrow(1, 300, elevation=-1, label='1.1', h=h), Arrow(301, 315, elevation=-1, label='1.2', h=h) ])) svg = f.show() svg_differ.assert_equal(svg, expected_svg, 'test_arrow_group')
def test_summarize_smooth_coverage_ten_percent(): figure = Figure() figure.add(SmoothCoverage(10, 20, [100, 110, 111, 50]), gap=-4) figure.add(Track(12, 22, label="Bar")) expected_summary = """\ Coverage 100x2, 111, 50 Bar[12-22] """ summary = summarize_figure(figure) assert summary == expected_summary
def test_summarize_smooth_coverage(): figure = Figure() figure.add(SmoothCoverage(10, 20, [11, 11, 21, 1, 1, 1]), gap=-4) figure.add(Track(12, 22, label="Bar")) expected_summary = """\ Coverage 11x2, 21, 1x3 Bar[12-22] """ summary = summarize_figure(figure) assert summary == expected_summary
def test_summarize_regions(): figure = Figure() figure.add( Track(1, 200, label="Foo", regions=[(50, 100, 'lightgreen'), (110, 120, 'red')])) expected_summary = """\ Foo[1-200], lightgreen{50-100}, red{110-120} """ summary = summarize_figure(figure) assert summary == expected_summary
def start_drawing(width, height): expected_svg = Drawing(width, height, origin=(0, 0)) expected_svg.append( Rectangle(0, height - 15, 200, 10, stroke='lightgrey', fill='lightgrey')) expected_svg.append( Text('Header', 10, width / 2, height - 15, font_family='monospace', text_anchor='middle')) f = Figure() f.add(Track(0, width, label='Header')) return f, expected_svg
def build_contig(reader, f, contig_name, max_position, position_offset, blast_rows): contig_matcher = ContigMatcher(contig_name) blast_ranges = [] # [[start, end, blast_num]] blast_starts = defaultdict(set) # {start: {blast_num}} blast_ends = defaultdict(set) # {end: {blast_num}} if not contig_name.startswith('contig-'): for blast_row in blast_rows: if not contig_matcher.is_match(blast_row): continue blast_num = len(blast_ranges) + 1 blast_ranges.append([None, None, blast_num]) blast_starts[blast_row['start']].add(blast_num) blast_ends[blast_row['end']].add(blast_num) event_positions = set(blast_starts) event_positions.update(blast_ends) event_positions = sorted(event_positions, reverse=True) insertion_size = 0 insertion_ranges = [] # [(start, end)] unmatched_ranges = [] # [[start, end]] for contig_name2, contig_rows in groupby(reader, itemgetter('contig')): if contig_name2 != contig_name: continue contig_rows = list(contig_rows) coordinates_name = contig_rows[0]['coordinates'] if coordinates_name: pos_field = 'refseq_nuc_pos' else: pos_field = 'query_nuc_pos' for contig_row in contig_rows: for field_name in (pos_field, 'coverage', 'dels'): field_text = contig_row[field_name] field_value = None if field_text == '' else int(field_text) contig_row[field_name] = field_value start = contig_rows[0][pos_field] end = contig_rows[-1][pos_field] coverage = [0] * (end - start + 1) pos = 0 for contig_row in contig_rows: pos = contig_row[pos_field] if pos is None: insertion_size += 1 else: if insertion_size: insertion_ranges.append((pos, pos + insertion_size - 1)) insertion_size = 0 if contig_row['coverage'] is not None: coverage[pos - start] = (contig_row['coverage'] - contig_row['dels']) contig_pos = int(contig_row['query_nuc_pos']) while event_positions and event_positions[-1] <= contig_pos: event_pos = event_positions.pop() for blast_num in blast_starts[event_pos]: blast_ranges[blast_num - 1][0] = pos for blast_num in blast_ends[event_pos]: blast_ranges[blast_num - 1][1] = pos link = contig_row.get('link') if link == 'U': # Position is unmatched, add to list. if not unmatched_ranges or unmatched_ranges[-1][-1] != pos - 1: unmatched_ranges.append([pos, pos]) else: unmatched_ranges[-1][-1] = pos while event_positions: # Use up any events that went past the end of the contig. event_pos = event_positions.pop() for blast_num in blast_starts[event_pos]: blast_ranges[blast_num - 1][0] = pos for blast_num in blast_ends[event_pos]: blast_ranges[blast_num - 1][1] = pos arrows = [] for arrow_start, arrow_end, blast_num in blast_ranges: arrows.append( Arrow(arrow_start + position_offset, arrow_end + position_offset, elevation=-1, label=f'{contig_matcher.num}.{blast_num}')) if arrows: f.add(ArrowGroup(arrows)) subtracks = [] for has_coverage, group_positions in groupby( enumerate(coverage), lambda item: item[1] != 0): if has_coverage: group_positions = list(group_positions) group_start, _ = group_positions[0] group_end, _ = group_positions[-1] subtracks.append( Track(start + group_start + position_offset, start + group_end + position_offset)) if not subtracks: group_start = prev_pos = None included_positions = [row[pos_field] for row in contig_rows] included_positions.append(None) # Trigger final section. for pos in included_positions: if group_start is None: group_start = pos else: if pos != prev_pos + 1: subtracks.append( Track(group_start + position_offset, prev_pos + position_offset)) group_start = pos prev_pos = pos if max(coverage) <= 0: track_label = contig_name else: f.add(ShadedCoverage(start + position_offset, end + position_offset, coverage), gap=-4) track_label = f"{contig_name} - depth {max(coverage)}" subtracks.append( Track(1, max_position, label=track_label, color='none', regions=[ (a + position_offset, b + position_offset, 'lightgreen') for a, b in insertion_ranges ] + [(a + position_offset, b + position_offset, 'yellow') for a, b in unmatched_ranges])) f.add(Multitrack(subtracks)) break
def build_coverage_figure(genome_coverage_csv, blast_csv=None): min_position, max_position = 1, 500 coordinate_depths = Counter() contig_depths = Counter() contig_groups = defaultdict(set) # {coordinates_name: {contig_name}} reader = DictReader(genome_coverage_csv) for row in reader: query_nuc_pos = int(row['query_nuc_pos']) if row['refseq_nuc_pos']: refseq_nuc_pos = int(row['refseq_nuc_pos']) else: refseq_nuc_pos = min_position min_position = min(min_position, refseq_nuc_pos, query_nuc_pos) max_position = max(max_position, refseq_nuc_pos, query_nuc_pos) coordinates_name = row['coordinates'] contig_name = row['contig'] if row['coverage'] != '': row_coverage = int(row['coverage']) - int(row['dels']) coordinate_depths[coordinates_name] = max( coordinate_depths[coordinates_name], row_coverage) contig_depths[contig_name] = max(contig_depths[contig_name], row_coverage) contig_groups[coordinates_name].add(contig_name) if '' in coordinate_depths: # Force partial contigs to come last. coordinate_depths[''] = -1 position_offset = -min_position + 1 max_position += position_offset blast_rows = [] if blast_csv is not None: for blast_row in DictReader(blast_csv): for field_name in ('start', 'end', 'ref_start', 'ref_end'): # noinspection PyTypeChecker blast_row[field_name] = int(blast_row[field_name]) blast_rows.append(blast_row) blast_rows.sort(key=itemgetter('start', 'ref_start')) landmarks_path = (Path(__file__).parent.parent / "data" / "landmark_references.yaml") landmark_groups = yaml.safe_load(landmarks_path.read_text()) projects = ProjectConfig.loadDefault() f = Figure() for _, coordinates_name in sorted( (-depth, name) for name, depth in coordinate_depths.items()): for reference_set in landmark_groups: if coordinates_name != reference_set['coordinates']: continue prev_landmark = None for i, landmark in enumerate( sorted(reference_set['landmarks'], key=itemgetter('start'))): landmark.setdefault('frame', 0) if prev_landmark and 'end' not in prev_landmark: prev_landmark['end'] = landmark['start'] - 1 prev_landmark = landmark for frame, frame_landmarks in groupby(reference_set['landmarks'], itemgetter('frame')): subtracks = [] for landmark in frame_landmarks: landmark_colour = landmark.get('colour') if landmark_colour is None: continue subtracks.append( Track(landmark['start'] + position_offset, landmark['end'] + position_offset, label=landmark['name'], color=landmark_colour)) max_position = max(max_position, landmark['end'] + position_offset) f.add(Multitrack(subtracks)) break else: add_partial_banner(f, position_offset, max_position) contig_names = contig_groups[coordinates_name] sorted_contig_names = sort_contig_names(contig_names, contig_depths) ref_arrows = [] for contig_name in sorted_contig_names: if contig_name.startswith('contig-'): # No arrows on original contig tracks. continue contig_matcher = ContigMatcher(contig_name) ref_positions = None arrow_count = 0 for blast_row in blast_rows: if not contig_matcher.is_match(blast_row): continue if (ref_positions is None and coordinates_name != '' and blast_row['ref_name'] != coordinates_name): ref_positions = map_references(blast_row['ref_name'], coordinates_name, projects) arrow_count += 1 ref_start = int(blast_row['ref_start']) ref_end = int(blast_row['ref_end']) if ref_positions is None: coordinate_start = ref_start coordinate_end = ref_end else: coordinate_start = ref_positions[ref_start] coordinate_end = ref_positions[ref_end] ref_arrows.append( Arrow(coordinate_start + position_offset, coordinate_end + position_offset, elevation=1, label=f'{contig_matcher.num}.{arrow_count}')) if ref_arrows: f.add(ArrowGroup(ref_arrows)) for contig_name in sorted_contig_names: genome_coverage_csv.seek(0) reader = DictReader(genome_coverage_csv) build_contig(reader, f, contig_name, max_position, position_offset, blast_rows) if not f.elements: f.add(Track(1, max_position, label='No contigs found.', color='none')) return f