def test_diagram_via_object_pdf(self): """Construct and draw PDF using object approach.""" genbank_entry = self.record gdd = Diagram('Test Diagram') gdt1 = Track('CDS features', greytrack=True, scale_largetick_interval=1e4, scale_smalltick_interval=1e3, greytrack_labels=10, greytrack_font_color="red", scale_format="SInt") gdt2 = Track('gene features', greytrack=1, scale_largetick_interval=1e4) # First add some feature sets: gdfsA = FeatureSet(name='CDS backgrounds') gdfsB = FeatureSet(name='gene background') gdfs1 = FeatureSet(name='CDS features') gdfs2 = FeatureSet(name='gene features') gdfs3 = FeatureSet(name='misc_features') gdfs4 = FeatureSet(name='repeat regions') prev_gene = None cds_count = 0 for feature in genbank_entry.features: if feature.type == 'CDS': cds_count += 1 if prev_gene: # Assuming it goes with this CDS! if cds_count % 2 == 0: dark, light = colors.peru, colors.tan else: dark, light = colors.burlywood, colors.bisque # Background for CDS, a = gdfsA.add_feature(SeqFeature( FeatureLocation(feature.location.start, feature.location.end, strand=0)), color=dark) # Background for gene, b = gdfsB.add_feature(SeqFeature( FeatureLocation(prev_gene.location.start, prev_gene.location.end, strand=0)), color=dark) # Cross link, gdd.cross_track_links.append(CrossLink(a, b, light, dark)) prev_gene = None if feature.type == 'gene': prev_gene = feature # Some cross links on the same linear diagram fragment, f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2220, 2230)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2200, 2210)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2150, 2200)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2220, 2290)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True)) f, c = fill_and_border(colors.green) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2250, 2560)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2300, 2860)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) # Some cross links where both parts are saddling the linear diagram fragment boundary, f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(3155, 3250)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(3130, 3300)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) # Nestled within that (drawn on top), f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(3160, 3275)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(3180, 3225)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True)) # Some cross links where two features are on either side of the linear diagram fragment boundary, f, c = fill_and_border(colors.green) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6450, 6550)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6265, 6365)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c)) f, c = fill_and_border(colors.gold) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6265, 6365)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6450, 6550)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c)) f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6275, 6375)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6430, 6530)), color=f, border=c) gdd.cross_track_links.append( CrossLink(a, b, color=f, border=c, flip=True)) f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6430, 6530)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6275, 6375)), color=f, border=c) gdd.cross_track_links.append( CrossLink(a, b, color=f, border=c, flip=True)) cds_count = 0 for feature in genbank_entry.features: if feature.type == 'CDS': cds_count += 1 if cds_count % 2 == 0: gdfs1.add_feature(feature, color=colors.pink, sigil="ARROW") else: gdfs1.add_feature(feature, color=colors.red, sigil="ARROW") if feature.type == 'gene': # Note we set the colour of ALL the genes later on as a test, gdfs2.add_feature(feature, sigil="ARROW") if feature.type == 'misc_feature': gdfs3.add_feature(feature, color=colors.orange) if feature.type == 'repeat_region': gdfs4.add_feature(feature, color=colors.purple) # gdd.cross_track_links = gdd.cross_track_links[:1] gdfs1.set_all_features('label', 1) gdfs2.set_all_features('label', 1) gdfs3.set_all_features('label', 1) gdfs4.set_all_features('label', 1) gdfs3.set_all_features('hide', 0) gdfs4.set_all_features('hide', 0) # gdfs1.set_all_features('color', colors.red) gdfs2.set_all_features('color', colors.blue) gdt1.add_set(gdfsA) # Before CDS so under them! gdt1.add_set(gdfs1) gdt2.add_set(gdfsB) # Before genes so under them! gdt2.add_set(gdfs2) gdt3 = Track('misc features and repeats', greytrack=1, scale_largetick_interval=1e4) gdt3.add_set(gdfs3) gdt3.add_set(gdfs4) # Now add some graph sets: # Use a fairly large step so we can easily tell the difference # between the bar and line graphs. step = len(genbank_entry) // 200 gdgs1 = GraphSet('GC skew') graphdata1 = apply_to_window(genbank_entry.seq, step, calc_gc_skew, step) gdgs1.new_graph(graphdata1, 'GC Skew', style='bar', color=colors.violet, altcolor=colors.purple) gdt4 = Track('GC Skew (bar)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt4.add_set(gdgs1) gdgs2 = GraphSet('GC and AT Content') gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_gc_content, step), 'GC content', style='line', color=colors.lightgreen, altcolor=colors.darkseagreen) gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_at_content, step), 'AT content', style='line', color=colors.orange, altcolor=colors.red) gdt5 = Track('GC Content(green line), AT Content(red line)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt5.add_set(gdgs2) gdgs3 = GraphSet('Di-nucleotide count') step = len(genbank_entry) // 400 # smaller step gdgs3.new_graph(apply_to_window(genbank_entry.seq, step, calc_dinucleotide_counts, step), 'Di-nucleotide count', style='heat', color=colors.red, altcolor=colors.orange) gdt6 = Track('Di-nucleotide count', height=0.5, greytrack=False, scale=False) gdt6.add_set(gdgs3) # Add the tracks (from both features and graphs) # Leave some white space in the middle/bottom gdd.add_track(gdt4, 3) # GC skew gdd.add_track(gdt5, 4) # GC and AT content gdd.add_track(gdt1, 5) # CDS features gdd.add_track(gdt2, 6) # Gene features gdd.add_track(gdt3, 7) # Misc features and repeat feature gdd.add_track(gdt6, 8) # Feature depth # Finally draw it in both formats, and full view and partial gdd.draw(format='circular', orientation='landscape', tracklines=0, pagesize='A0') output_filename = os.path.join('Graphics', 'GD_by_obj_circular.pdf') gdd.write(output_filename, 'PDF') gdd.circular = False gdd.draw(format='circular', orientation='landscape', tracklines=0, pagesize='A0', start=3000, end=6300) output_filename = os.path.join('Graphics', 'GD_by_obj_frag_circular.pdf') gdd.write(output_filename, 'PDF') gdd.draw(format='linear', orientation='landscape', tracklines=0, pagesize='A0', fragments=3) output_filename = os.path.join('Graphics', 'GD_by_obj_linear.pdf') gdd.write(output_filename, 'PDF') gdd.set_all_tracks("greytrack_labels", 2) gdd.draw(format='linear', orientation='landscape', tracklines=0, pagesize=(30 * cm, 10 * cm), fragments=1, start=3000, end=6300) output_filename = os.path.join('Graphics', 'GD_by_obj_frag_linear.pdf') gdd.write(output_filename, 'PDF')
def test_diagram_via_object_pdf(self): """Construct and draw PDF using object approach.""" genbank_entry = self.record gdd = Diagram('Test Diagram') gdt1 = Track('CDS features', greytrack=True, scale_largetick_interval=1e4, scale_smalltick_interval=1e3, greytrack_labels=10, greytrack_font_color="red", scale_format = "SInt") gdt2 = Track('gene features', greytrack=1, scale_largetick_interval=1e4) #First add some feature sets: gdfsA = FeatureSet(name='CDS backgrounds') gdfsB = FeatureSet(name='gene background') gdfs1 = FeatureSet(name='CDS features') gdfs2 = FeatureSet(name='gene features') gdfs3 = FeatureSet(name='misc_features') gdfs4 = FeatureSet(name='repeat regions') prev_gene = None cds_count = 0 for feature in genbank_entry.features: if feature.type == 'CDS': cds_count += 1 if prev_gene: #Assuming it goes with this CDS! if cds_count % 2 == 0: dark, light = colors.peru, colors.tan else: dark, light = colors.burlywood, colors.bisque #Background for CDS, a = gdfsA.add_feature(SeqFeature(FeatureLocation(feature.location.start, feature.location.end, strand=0)), color=dark) #Background for gene, b = gdfsB.add_feature(SeqFeature(FeatureLocation(prev_gene.location.start, prev_gene.location.end, strand=0)), color=dark) #Cross link, gdd.cross_track_links.append(CrossLink(a, b, light, dark)) prev_gene = None if feature.type == 'gene': prev_gene = feature #Some cross links on the same linear diagram fragment, f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2220,2230)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2200,2210)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2150,2200)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2220,2290)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True)) f, c = fill_and_border(colors.green) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2250,2560)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2300,2860)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) #Some cross links where both parts are saddling the linear diagram fragment boundary, f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(3155,3250)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(3130,3300)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) #Nestled within that (drawn on top), f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(3160,3275)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(3180,3225)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True)) #Some cross links where two features are on either side of the linear diagram fragment boundary, f, c = fill_and_border(colors.green) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6450,6550)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6265,6365)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c)) f, c = fill_and_border(colors.gold) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6265,6365)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6450,6550)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c)) f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6275,6375)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6430,6530)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c, flip=True)) f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6430,6530)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6275,6375)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c, flip=True)) cds_count = 0 for feature in genbank_entry.features: if feature.type == 'CDS': cds_count += 1 if cds_count % 2 == 0: gdfs1.add_feature(feature, color=colors.pink, sigil="ARROW") else: gdfs1.add_feature(feature, color=colors.red, sigil="ARROW") if feature.type == 'gene': #Note we set the colour of ALL the genes later on as a test, gdfs2.add_feature(feature, sigil="ARROW") if feature.type == 'misc_feature': gdfs3.add_feature(feature, color=colors.orange) if feature.type == 'repeat_region': gdfs4.add_feature(feature, color=colors.purple) #gdd.cross_track_links = gdd.cross_track_links[:1] gdfs1.set_all_features('label', 1) gdfs2.set_all_features('label', 1) gdfs3.set_all_features('label', 1) gdfs4.set_all_features('label', 1) gdfs3.set_all_features('hide', 0) gdfs4.set_all_features('hide', 0) #gdfs1.set_all_features('color', colors.red) gdfs2.set_all_features('color', colors.blue) gdt1.add_set(gdfsA) # Before CDS so under them! gdt1.add_set(gdfs1) gdt2.add_set(gdfsB) # Before genes so under them! gdt2.add_set(gdfs2) gdt3 = Track('misc features and repeats', greytrack=1, scale_largetick_interval=1e4) gdt3.add_set(gdfs3) gdt3.add_set(gdfs4) #Now add some graph sets: #Use a fairly large step so we can easily tell the difference #between the bar and line graphs. step = len(genbank_entry)//200 gdgs1 = GraphSet('GC skew') graphdata1 = apply_to_window(genbank_entry.seq, step, calc_gc_skew, step) gdgs1.new_graph(graphdata1, 'GC Skew', style='bar', color=colors.violet, altcolor=colors.purple) gdt4 = Track( 'GC Skew (bar)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt4.add_set(gdgs1) gdgs2 = GraphSet('GC and AT Content') gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_gc_content, step), 'GC content', style='line', color=colors.lightgreen, altcolor=colors.darkseagreen) gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_at_content, step), 'AT content', style='line', color=colors.orange, altcolor=colors.red) gdt5 = Track( 'GC Content(green line), AT Content(red line)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt5.add_set(gdgs2) gdgs3 = GraphSet('Di-nucleotide count') step = len(genbank_entry) // 400 # smaller step gdgs3.new_graph(apply_to_window(genbank_entry.seq, step, calc_dinucleotide_counts, step), 'Di-nucleotide count', style='heat', color=colors.red, altcolor=colors.orange) gdt6 = Track('Di-nucleotide count', height=0.5, greytrack=False, scale=False) gdt6.add_set(gdgs3) #Add the tracks (from both features and graphs) #Leave some white space in the middle/bottom gdd.add_track(gdt4, 3) # GC skew gdd.add_track(gdt5, 4) # GC and AT content gdd.add_track(gdt1, 5) # CDS features gdd.add_track(gdt2, 6) # Gene features gdd.add_track(gdt3, 7) # Misc features and repeat feature gdd.add_track(gdt6, 8) # Feature depth #Finally draw it in both formats, and full view and partial gdd.draw(format='circular', orientation='landscape', tracklines=0, pagesize='A0') output_filename = os.path.join('Graphics', 'GD_by_obj_circular.pdf') gdd.write(output_filename, 'PDF') gdd.circular=False gdd.draw(format='circular', orientation='landscape', tracklines=0, pagesize='A0', start=3000, end=6300) output_filename = os.path.join('Graphics', 'GD_by_obj_frag_circular.pdf') gdd.write(output_filename, 'PDF') gdd.draw(format='linear', orientation='landscape', tracklines=0, pagesize='A0', fragments=3) output_filename = os.path.join('Graphics', 'GD_by_obj_linear.pdf') gdd.write(output_filename, 'PDF') gdd.set_all_tracks("greytrack_labels", 2) gdd.draw(format='linear', orientation='landscape', tracklines=0, pagesize=(30*cm,10*cm), fragments=1, start=3000, end=6300) output_filename = os.path.join('Graphics', 'GD_by_obj_frag_linear.pdf') gdd.write(output_filename, 'PDF')
def test_diagram_via_object_pdf(self): """Construct and draw PDF using object approach.""" genbank_entry = self.record gdd = Diagram('Test Diagram') #First add some feature sets: gdfs1 = FeatureSet(name='CDS features') gdfs2 = FeatureSet(name='gene features') gdfs3 = FeatureSet(name='misc_features') gdfs4 = FeatureSet(name='repeat regions') cds_count = 0 for feature in genbank_entry.features: if feature.type == 'CDS': cds_count += 1 if cds_count % 2 == 0: gdfs1.add_feature(feature, color=colors.pink) else: gdfs1.add_feature(feature, color=colors.red) if feature.type == 'gene': gdfs2.add_feature(feature) if feature.type == 'misc_feature': gdfs3.add_feature(feature, color=colors.orange) if feature.type == 'repeat_region': gdfs4.add_feature(feature, color=colors.purple) gdfs1.set_all_features('label', 1) gdfs2.set_all_features('label', 1) gdfs3.set_all_features('label', 1) gdfs4.set_all_features('label', 1) gdfs3.set_all_features('hide', 0) gdfs4.set_all_features('hide', 0) #gdfs1.set_all_features('color', colors.red) gdfs2.set_all_features('color', colors.blue) gdt1 = Track('CDS features', greytrack=True, scale_largetick_interval=1e4, scale_smalltick_interval=1e3, greytrack_labels=10, greytrack_font_color="red", scale_format = "SInt") gdt1.add_set(gdfs1) gdt2 = Track('gene features', greytrack=1, scale_largetick_interval=1e4) gdt2.add_set(gdfs2) gdt3 = Track('misc features and repeats', greytrack=1, scale_largetick_interval=1e4) gdt3.add_set(gdfs3) gdt3.add_set(gdfs4) #Now add some graph sets: #Use a fairly large step so we can easily tell the difference #between the bar and line graphs. step = len(genbank_entry)/200 gdgs1 = GraphSet('GC skew') graphdata1 = apply_to_window(genbank_entry.seq, step, calc_gc_skew, step) gdgs1.new_graph(graphdata1, 'GC Skew', style='bar', color=colors.violet, altcolor=colors.purple) gdt4 = Track(\ 'GC Skew (bar)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt4.add_set(gdgs1) gdgs2 = GraphSet('GC and AT Content') gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_gc_content, step), 'GC content', style='line', color=colors.lightgreen, altcolor=colors.darkseagreen) gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_at_content, step), 'AT content', style='line', color=colors.orange, altcolor=colors.red) gdt5 = Track(\ 'GC Content(green line), AT Content(red line)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt5.add_set(gdgs2) gdgs3 = GraphSet('Di-nucleotide count') step = len(genbank_entry)/400 #smaller step gdgs3.new_graph(apply_to_window(genbank_entry.seq, step, calc_dinucleotide_counts, step), 'Di-nucleotide count', style='heat', color=colors.red, altcolor=colors.orange) gdt6 = Track('Di-nucleotide count', height=0.5, greytrack=False, scale=False) gdt6.add_set(gdgs3) #Add the tracks (from both features and graphs) #Leave some white space in the middle gdd.add_track(gdt4, 3) # GC skew gdd.add_track(gdt5, 4) # GC and AT content gdd.add_track(gdt1, 5) # CDS features gdd.add_track(gdt2, 6) # Gene features gdd.add_track(gdt3, 7) # Misc features and repeat feature gdd.add_track(gdt6, 8) # Feature depth #Finally draw it in both formats, gdd.draw(format='circular', orientation='landscape', tracklines=0, pagesize='A0', circular=True) output_filename = os.path.join('Graphics', 'GD_by_obj_circular.pdf') gdd.write(output_filename, 'PDF') gdd.draw(format='linear', orientation='landscape', tracklines=0, pagesize='A0', fragments=3) output_filename = os.path.join('Graphics', 'GD_by_obj_linear.pdf') gdd.write(output_filename, 'PDF')
class DiagramTest(unittest.TestCase): """Creating feature sets, graph sets, tracks etc individually for the diagram.""" def setUp(self): """Test setup, just loads a GenBank file as a SeqRecord.""" handle = open(os.path.join("GenBank", "NC_005816.gb"), 'r') self.record = SeqIO.read(handle, "genbank") handle.close() self.gdd = Diagram('Test Diagram') # Add a track of features, self.gdd.new_track(1, greytrack=True, name="CDS Features", greytrack_labels=0, height=0.5) def tearDown(self): """Release the drawing objects.""" del self.gdd def test_str(self): """Test diagram's info as string.""" expected = "\n<<class 'Bio.Graphics.GenomeDiagram._Diagram.Diagram'>: Test Diagram>" \ "\n1 tracks" \ "\nTrack 1: " \ "\n<<class 'Bio.Graphics.GenomeDiagram._Track.Track'>: CDS Features>" \ "\n0 sets" \ "\n" self.assertEqual(expected, str(self.gdd)) def test_add_track(self): """Add track.""" track = Track(name="Annotated Features") self.gdd.add_track(track, 2) self.assertEqual(2, len(self.gdd.get_tracks())) def test_add_track_to_occupied_level(self): """Add track to occupied level.""" new_track = self.gdd.get_tracks()[0] self.gdd.add_track(new_track, 1) self.assertEqual(2, len(self.gdd.get_tracks())) def test_add_track_error(self): """Test adding unspecified track.""" self.assertRaises(ValueError, self.gdd.add_track, None, 1) def test_del_tracks(self): """Delete track.""" self.gdd.del_track(1) self.assertEqual(0, len(self.gdd.get_tracks())) def test_get_tracks(self): """Get track.""" self.assertEqual(1, len(self.gdd.get_tracks())) def test_move_track(self): """Move a track.""" self.gdd.move_track(1, 2) expected = "\n<<class 'Bio.Graphics.GenomeDiagram._Diagram.Diagram'>: Test Diagram>" \ "\n1 tracks" \ "\nTrack 2: " \ "\n<<class 'Bio.Graphics.GenomeDiagram._Track.Track'>: CDS Features>" \ "\n0 sets" \ "\n" self.assertEqual(expected, str(self.gdd)) def test_renumber(self): """Test renumbering tracks.""" self.gdd.renumber_tracks(0) expected = "\n<<class 'Bio.Graphics.GenomeDiagram._Diagram.Diagram'>: Test Diagram>" \ "\n1 tracks" \ "\nTrack 0: " \ "\n<<class 'Bio.Graphics.GenomeDiagram._Track.Track'>: CDS Features>" \ "\n0 sets" \ "\n" self.assertEqual(expected, str(self.gdd)) def test_write_arguments(self): """Check how the write methods respond to output format arguments.""" gdd = Diagram('Test Diagram') gdd.drawing = None # Hack - need the ReportLab drawing object to be created. filename = os.path.join("Graphics", "error.txt") # We (now) allow valid formats in any case. for output in ["XXX", "xxx", None, 123, 5.9]: with self.assertRaises(ValueError): gdd.write(filename, output) with self.assertRaises(ValueError): gdd.write_to_string(output) def test_partial_diagram(self): """Construct and draw SVG and PDF for just part of a SeqRecord.""" genbank_entry = self.record start = 6500 end = 8750 gdd = Diagram('Test Diagram', # For the circular diagram we don't want a closed cirle: circular=False, ) # Add a track of features, gdt_features = gdd.new_track(1, greytrack=True, name="CDS Features", scale_largetick_interval=1000, scale_smalltick_interval=100, scale_format="SInt", greytrack_labels=False, height=0.5) # We'll just use one feature set for these features, gds_features = gdt_features.new_set() for feature in genbank_entry.features: if feature.type != "CDS": # We're going to ignore these. continue if feature.location.end.position < start: # Out of frame (too far left) continue if feature.location.start.position > end: # Out of frame (too far right) continue # This URL should work in SVG output from recent versions # of ReportLab. You need ReportLab 2.4 or later try: url = "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi" +\ "?db=protein&id=%s" % feature.qualifiers["protein_id"][0] except KeyError: url = None # Note that I am using strings for color names, instead # of passing in color objects. This should also work! if len(gds_features) % 2 == 0: color = "white" # for testing the automatic black border! else: color = "red" # Checking it can cope with the old UK spelling colour. # Also show the labels perpendicular to the track. gds_features.add_feature(feature, colour=color, url=url, sigil="ARROW", label_position=None, label_size=8, label_angle=90, label=True) # And draw it... gdd.draw(format='linear', orientation='landscape', tracklines=False, pagesize=(10 * cm, 6 * cm), fragments=1, start=start, end=end) output_filename = os.path.join('Graphics', 'GD_region_linear.pdf') gdd.write(output_filename, 'PDF') # Also check the write_to_string (bytes string) method matches, assert open(output_filename, "rb").read() == gdd.write_to_string('PDF') output_filename = os.path.join('Graphics', 'GD_region_linear.svg') gdd.write(output_filename, 'SVG') # Circular with a particular start/end is a bit odd, but by setting # circular=False (above) a sweep of 90% is used (a wedge is left out) gdd.draw(format='circular', tracklines=False, pagesize=(10 * cm, 10 * cm), start=start, end=end) output_filename = os.path.join('Graphics', 'GD_region_circular.pdf') gdd.write(output_filename, 'PDF') output_filename = os.path.join('Graphics', 'GD_region_circular.svg') gdd.write(output_filename, 'SVG') def test_diagram_via_methods_pdf(self): """Construct and draw PDF using method approach.""" genbank_entry = self.record gdd = Diagram('Test Diagram') # Add a track of features, gdt_features = gdd.new_track(1, greytrack=True, name="CDS Features", greytrack_labels=0, height=0.5) # We'll just use one feature set for the genes and misc_features, gds_features = gdt_features.new_set() for feature in genbank_entry.features: if feature.type == "gene": if len(gds_features) % 2 == 0: color = "blue" else: color = "lightblue" gds_features.add_feature(feature, color=color, # label_position="middle", # label_position="end", label_position="start", label_size=11, # label_angle=90, sigil="ARROW", label=True) # I want to include some strandless features, so for an example # will use EcoRI recognition sites etc. for site, name, color in [("GAATTC", "EcoRI", "green"), ("CCCGGG", "SmaI", "orange"), ("AAGCTT", "HindIII", "red"), ("GGATCC", "BamHI", "purple")]: index = 0 while True: index = genbank_entry.seq.find(site, start=index) if index == -1: break feature = SeqFeature(FeatureLocation(index, index + 6), strand=None) # This URL should work in SVG output from recent versions # of ReportLab. You need ReportLab 2.4 or later try: url = "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi" +\ "?db=protein&id=%s" % feature.qualifiers["protein_id"][0] except KeyError: url = None gds_features.add_feature(feature, color=color, url=url, # label_position="middle", label_size=10, label_color=color, # label_angle=90, name=name, label=True) index += len(site) del index # Now add a graph track... gdt_at_gc = gdd.new_track(2, greytrack=True, name="AT and GC content", greytrack_labels=True) gds_at_gc = gdt_at_gc.new_set(type="graph") step = len(genbank_entry) // 200 gds_at_gc.new_graph(apply_to_window(genbank_entry.seq, step, calc_gc_content, step), 'GC content', style='line', color=colors.lightgreen, altcolor=colors.darkseagreen) gds_at_gc.new_graph(apply_to_window(genbank_entry.seq, step, calc_at_content, step), 'AT content', style='line', color=colors.orange, altcolor=colors.red) # Finally draw it in both formats, gdd.draw(format='linear', orientation='landscape', tracklines=0, pagesize='A4', fragments=3) output_filename = os.path.join('Graphics', 'GD_by_meth_linear.pdf') gdd.write(output_filename, 'PDF') gdd.draw(format='circular', tracklines=False, circle_core=0.8, pagesize=(20 * cm, 20 * cm), circular=True) output_filename = os.path.join('Graphics', 'GD_by_meth_circular.pdf') gdd.write(output_filename, 'PDF') def test_diagram_via_object_pdf(self): """Construct and draw PDF using object approach.""" genbank_entry = self.record gdd = Diagram('Test Diagram') gdt1 = Track('CDS features', greytrack=True, scale_largetick_interval=1e4, scale_smalltick_interval=1e3, greytrack_labels=10, greytrack_font_color="red", scale_format="SInt") gdt2 = Track('gene features', greytrack=1, scale_largetick_interval=1e4) # First add some feature sets: gdfsA = FeatureSet(name='CDS backgrounds') gdfsB = FeatureSet(name='gene background') gdfs1 = FeatureSet(name='CDS features') gdfs2 = FeatureSet(name='gene features') gdfs3 = FeatureSet(name='misc_features') gdfs4 = FeatureSet(name='repeat regions') prev_gene = None cds_count = 0 for feature in genbank_entry.features: if feature.type == 'CDS': cds_count += 1 if prev_gene: # Assuming it goes with this CDS! if cds_count % 2 == 0: dark, light = colors.peru, colors.tan else: dark, light = colors.burlywood, colors.bisque # Background for CDS, a = gdfsA.add_feature(SeqFeature(FeatureLocation(feature.location.start, feature.location.end, strand=0)), color=dark) # Background for gene, b = gdfsB.add_feature(SeqFeature(FeatureLocation(prev_gene.location.start, prev_gene.location.end, strand=0)), color=dark) # Cross link, gdd.cross_track_links.append(CrossLink(a, b, light, dark)) prev_gene = None if feature.type == 'gene': prev_gene = feature # Some cross links on the same linear diagram fragment, f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2220, 2230)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2200, 2210)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2150, 2200)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2220, 2290)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True)) f, c = fill_and_border(colors.green) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2250, 2560)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2300, 2860)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) # Some cross links where both parts are saddling the linear diagram fragment boundary, f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(3155, 3250)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(3130, 3300)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) # Nestled within that (drawn on top), f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(3160, 3275)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(3180, 3225)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True)) # Some cross links where two features are on either side of the linear diagram fragment boundary, f, c = fill_and_border(colors.green) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6450, 6550)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6265, 6365)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c)) f, c = fill_and_border(colors.gold) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6265, 6365)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6450, 6550)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c)) f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6275, 6375)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6430, 6530)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c, flip=True)) f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6430, 6530)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6275, 6375)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c, flip=True)) cds_count = 0 for feature in genbank_entry.features: if feature.type == 'CDS': cds_count += 1 if cds_count % 2 == 0: gdfs1.add_feature(feature, color=colors.pink, sigil="ARROW") else: gdfs1.add_feature(feature, color=colors.red, sigil="ARROW") if feature.type == 'gene': # Note we set the colour of ALL the genes later on as a test, gdfs2.add_feature(feature, sigil="ARROW") if feature.type == 'misc_feature': gdfs3.add_feature(feature, color=colors.orange) if feature.type == 'repeat_region': gdfs4.add_feature(feature, color=colors.purple) # gdd.cross_track_links = gdd.cross_track_links[:1] gdfs1.set_all_features('label', 1) gdfs2.set_all_features('label', 1) gdfs3.set_all_features('label', 1) gdfs4.set_all_features('label', 1) gdfs3.set_all_features('hide', 0) gdfs4.set_all_features('hide', 0) # gdfs1.set_all_features('color', colors.red) gdfs2.set_all_features('color', colors.blue) gdt1.add_set(gdfsA) # Before CDS so under them! gdt1.add_set(gdfs1) gdt2.add_set(gdfsB) # Before genes so under them! gdt2.add_set(gdfs2) gdt3 = Track('misc features and repeats', greytrack=1, scale_largetick_interval=1e4) gdt3.add_set(gdfs3) gdt3.add_set(gdfs4) # Now add some graph sets: # Use a fairly large step so we can easily tell the difference # between the bar and line graphs. step = len(genbank_entry) // 200 gdgs1 = GraphSet('GC skew') graphdata1 = apply_to_window(genbank_entry.seq, step, calc_gc_skew, step) gdgs1.new_graph(graphdata1, 'GC Skew', style='bar', color=colors.violet, altcolor=colors.purple) gdt4 = Track('GC Skew (bar)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt4.add_set(gdgs1) gdgs2 = GraphSet('GC and AT Content') gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_gc_content, step), 'GC content', style='line', color=colors.lightgreen, altcolor=colors.darkseagreen) gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_at_content, step), 'AT content', style='line', color=colors.orange, altcolor=colors.red) gdt5 = Track('GC Content(green line), AT Content(red line)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt5.add_set(gdgs2) gdgs3 = GraphSet('Di-nucleotide count') step = len(genbank_entry) // 400 # smaller step gdgs3.new_graph(apply_to_window(genbank_entry.seq, step, calc_dinucleotide_counts, step), 'Di-nucleotide count', style='heat', color=colors.red, altcolor=colors.orange) gdt6 = Track('Di-nucleotide count', height=0.5, greytrack=False, scale=False) gdt6.add_set(gdgs3) # Add the tracks (from both features and graphs) # Leave some white space in the middle/bottom gdd.add_track(gdt4, 3) # GC skew gdd.add_track(gdt5, 4) # GC and AT content gdd.add_track(gdt1, 5) # CDS features gdd.add_track(gdt2, 6) # Gene features gdd.add_track(gdt3, 7) # Misc features and repeat feature gdd.add_track(gdt6, 8) # Feature depth # Finally draw it in both formats, and full view and partial gdd.draw(format='circular', orientation='landscape', tracklines=0, pagesize='A0') output_filename = os.path.join('Graphics', 'GD_by_obj_circular.pdf') gdd.write(output_filename, 'PDF') gdd.circular = False gdd.draw(format='circular', orientation='landscape', tracklines=0, pagesize='A0', start=3000, end=6300) output_filename = os.path.join('Graphics', 'GD_by_obj_frag_circular.pdf') gdd.write(output_filename, 'PDF') gdd.draw(format='linear', orientation='landscape', tracklines=0, pagesize='A0', fragments=3) output_filename = os.path.join('Graphics', 'GD_by_obj_linear.pdf') gdd.write(output_filename, 'PDF') gdd.set_all_tracks("greytrack_labels", 2) gdd.draw(format='linear', orientation='landscape', tracklines=0, pagesize=(30 * cm, 10 * cm), fragments=1, start=3000, end=6300) output_filename = os.path.join('Graphics', 'GD_by_obj_frag_linear.pdf') gdd.write(output_filename, 'PDF')
class DiagramTest(unittest.TestCase): """Creating feature sets, graph sets, tracks etc individually for the diagram.""" def setUp(self): """Test setup, just loads a GenBank file as a SeqRecord.""" handle = open(os.path.join("GenBank", "NC_005816.gb"), 'r') self.record = SeqIO.read(handle, "genbank") handle.close() self.gdd = Diagram('Test Diagram') # Add a track of features, self.gdd.new_track(1, greytrack=True, name="CDS Features", greytrack_labels=0, height=0.5) def tearDown(self): del self.gdd def test_str(self): """Test diagram's info as string.""" expected = "\n<<class 'Bio.Graphics.GenomeDiagram._Diagram.Diagram'>: Test Diagram>" \ "\n1 tracks" \ "\nTrack 1: " \ "\n<<class 'Bio.Graphics.GenomeDiagram._Track.Track'>: CDS Features>" \ "\n0 sets" \ "\n" self.assertEqual(expected, str(self.gdd)) def test_add_track(self): track = Track(name="Annotated Features") self.gdd.add_track(track, 2) self.assertEqual(2, len(self.gdd.get_tracks())) def test_add_track_to_occupied_level(self): new_track = self.gdd.get_tracks()[0] self.gdd.add_track(new_track, 1) self.assertEqual(2, len(self.gdd.get_tracks())) def test_add_track_error(self): """Test adding unspecified track.""" self.assertRaises(ValueError, self.gdd.add_track, None, 1) def test_del_tracks(self): self.gdd.del_track(1) self.assertEqual(0, len(self.gdd.get_tracks())) def test_get_tracks(self): self.assertEqual(1, len(self.gdd.get_tracks())) def test_move_track(self): self.gdd.move_track(1, 2) expected = "\n<<class 'Bio.Graphics.GenomeDiagram._Diagram.Diagram'>: Test Diagram>" \ "\n1 tracks" \ "\nTrack 2: " \ "\n<<class 'Bio.Graphics.GenomeDiagram._Track.Track'>: CDS Features>" \ "\n0 sets" \ "\n" self.assertEqual(expected, str(self.gdd)) def test_renumber(self): """Test renumbering tracks.""" self.gdd.renumber_tracks(0) expected = "\n<<class 'Bio.Graphics.GenomeDiagram._Diagram.Diagram'>: Test Diagram>" \ "\n1 tracks" \ "\nTrack 0: " \ "\n<<class 'Bio.Graphics.GenomeDiagram._Track.Track'>: CDS Features>" \ "\n0 sets" \ "\n" self.assertEqual(expected, str(self.gdd)) def test_write_arguments(self): """Check how the write methods respond to output format arguments.""" gdd = Diagram('Test Diagram') gdd.drawing = None # Hack - need the ReportLab drawing object to be created. filename = os.path.join("Graphics", "error.txt") # We (now) allow valid formats in any case. for output in ["XXX", "xxx", None, 123, 5.9]: try: gdd.write(filename, output) assert False, \ "Should have rejected %s as an output format" % output except ValueError: # Good! pass try: gdd.write_to_string(output) assert False, \ "Should have rejected %s as an output format" % output except ValueError: # Good! pass def test_partial_diagram(self): """construct and draw SVG and PDF for just part of a SeqRecord.""" genbank_entry = self.record start = 6500 end = 8750 gdd = Diagram( 'Test Diagram', # For the circular diagram we don't want a closed cirle: circular=False, ) # Add a track of features, gdt_features = gdd.new_track(1, greytrack=True, name="CDS Features", scale_largetick_interval=1000, scale_smalltick_interval=100, scale_format="SInt", greytrack_labels=False, height=0.5) # We'll just use one feature set for these features, gds_features = gdt_features.new_set() for feature in genbank_entry.features: if feature.type != "CDS": # We're going to ignore these. continue if feature.location.end.position < start: # Out of frame (too far left) continue if feature.location.start.position > end: # Out of frame (too far right) continue # This URL should work in SVG output from recent versions # of ReportLab. You need ReportLab 2.4 or later try: url = "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi" +\ "?db=protein&id=%s" % feature.qualifiers["protein_id"][0] except KeyError: url = None # Note that I am using strings for color names, instead # of passing in color objects. This should also work! if len(gds_features) % 2 == 0: color = "white" # for testing the automatic black border! else: color = "red" # Checking it can cope with the old UK spelling colour. # Also show the labels perpendicular to the track. gds_features.add_feature(feature, colour=color, url=url, sigil="ARROW", label_position=None, label_size=8, label_angle=90, label=True) # And draw it... gdd.draw(format='linear', orientation='landscape', tracklines=False, pagesize=(10 * cm, 6 * cm), fragments=1, start=start, end=end) output_filename = os.path.join('Graphics', 'GD_region_linear.pdf') gdd.write(output_filename, 'PDF') # Also check the write_to_string (bytes string) method matches, # (Note the possible confusion over new lines on Windows) assert open(output_filename, "rb").read().replace(b"\r\n", b"\n") \ == gdd.write_to_string('PDF').replace(b"\r\n", b"\n") output_filename = os.path.join('Graphics', 'GD_region_linear.svg') gdd.write(output_filename, 'SVG') # Circular with a particular start/end is a bit odd, but by setting # circular=False (above) a sweep of 90% is used (a wedge is left out) gdd.draw(format='circular', tracklines=False, pagesize=(10 * cm, 10 * cm), start=start, end=end) output_filename = os.path.join('Graphics', 'GD_region_circular.pdf') gdd.write(output_filename, 'PDF') output_filename = os.path.join('Graphics', 'GD_region_circular.svg') gdd.write(output_filename, 'SVG') def test_diagram_via_methods_pdf(self): """Construct and draw PDF using method approach.""" genbank_entry = self.record gdd = Diagram('Test Diagram') # Add a track of features, gdt_features = gdd.new_track(1, greytrack=True, name="CDS Features", greytrack_labels=0, height=0.5) # We'll just use one feature set for the genes and misc_features, gds_features = gdt_features.new_set() for feature in genbank_entry.features: if feature.type == "gene": if len(gds_features) % 2 == 0: color = "blue" else: color = "lightblue" gds_features.add_feature( feature, color=color, # label_position="middle", # label_position="end", label_position="start", label_size=11, # label_angle=90, sigil="ARROW", label=True) # I want to include some strandless features, so for an example # will use EcoRI recognition sites etc. for site, name, color in [("GAATTC", "EcoRI", "green"), ("CCCGGG", "SmaI", "orange"), ("AAGCTT", "HindIII", "red"), ("GGATCC", "BamHI", "purple")]: index = 0 while True: index = genbank_entry.seq.find(site, start=index) if index == -1: break feature = SeqFeature(FeatureLocation(index, index + 6), strand=None) # This URL should work in SVG output from recent versions # of ReportLab. You need ReportLab 2.4 or later try: url = "http://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi" +\ "?db=protein&id=%s" % feature.qualifiers["protein_id"][0] except KeyError: url = None gds_features.add_feature( feature, color=color, url=url, # label_position="middle", label_size=10, label_color=color, # label_angle=90, name=name, label=True) index += len(site) del index # Now add a graph track... gdt_at_gc = gdd.new_track(2, greytrack=True, name="AT and GC content", greytrack_labels=True) gds_at_gc = gdt_at_gc.new_set(type="graph") step = len(genbank_entry) // 200 gds_at_gc.new_graph(apply_to_window(genbank_entry.seq, step, calc_gc_content, step), 'GC content', style='line', color=colors.lightgreen, altcolor=colors.darkseagreen) gds_at_gc.new_graph(apply_to_window(genbank_entry.seq, step, calc_at_content, step), 'AT content', style='line', color=colors.orange, altcolor=colors.red) # Finally draw it in both formats, gdd.draw(format='linear', orientation='landscape', tracklines=0, pagesize='A4', fragments=3) output_filename = os.path.join('Graphics', 'GD_by_meth_linear.pdf') gdd.write(output_filename, 'PDF') gdd.draw(format='circular', tracklines=False, circle_core=0.8, pagesize=(20 * cm, 20 * cm), circular=True) output_filename = os.path.join('Graphics', 'GD_by_meth_circular.pdf') gdd.write(output_filename, 'PDF') def test_diagram_via_object_pdf(self): """Construct and draw PDF using object approach.""" genbank_entry = self.record gdd = Diagram('Test Diagram') gdt1 = Track('CDS features', greytrack=True, scale_largetick_interval=1e4, scale_smalltick_interval=1e3, greytrack_labels=10, greytrack_font_color="red", scale_format="SInt") gdt2 = Track('gene features', greytrack=1, scale_largetick_interval=1e4) # First add some feature sets: gdfsA = FeatureSet(name='CDS backgrounds') gdfsB = FeatureSet(name='gene background') gdfs1 = FeatureSet(name='CDS features') gdfs2 = FeatureSet(name='gene features') gdfs3 = FeatureSet(name='misc_features') gdfs4 = FeatureSet(name='repeat regions') prev_gene = None cds_count = 0 for feature in genbank_entry.features: if feature.type == 'CDS': cds_count += 1 if prev_gene: # Assuming it goes with this CDS! if cds_count % 2 == 0: dark, light = colors.peru, colors.tan else: dark, light = colors.burlywood, colors.bisque # Background for CDS, a = gdfsA.add_feature(SeqFeature( FeatureLocation(feature.location.start, feature.location.end, strand=0)), color=dark) # Background for gene, b = gdfsB.add_feature(SeqFeature( FeatureLocation(prev_gene.location.start, prev_gene.location.end, strand=0)), color=dark) # Cross link, gdd.cross_track_links.append(CrossLink(a, b, light, dark)) prev_gene = None if feature.type == 'gene': prev_gene = feature # Some cross links on the same linear diagram fragment, f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2220, 2230)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2200, 2210)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2150, 2200)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2220, 2290)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True)) f, c = fill_and_border(colors.green) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2250, 2560)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2300, 2860)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) # Some cross links where both parts are saddling the linear diagram fragment boundary, f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(3155, 3250)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(3130, 3300)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) # Nestled within that (drawn on top), f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(3160, 3275)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(3180, 3225)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True)) # Some cross links where two features are on either side of the linear diagram fragment boundary, f, c = fill_and_border(colors.green) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6450, 6550)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6265, 6365)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c)) f, c = fill_and_border(colors.gold) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6265, 6365)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6450, 6550)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c)) f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6275, 6375)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6430, 6530)), color=f, border=c) gdd.cross_track_links.append( CrossLink(a, b, color=f, border=c, flip=True)) f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6430, 6530)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6275, 6375)), color=f, border=c) gdd.cross_track_links.append( CrossLink(a, b, color=f, border=c, flip=True)) cds_count = 0 for feature in genbank_entry.features: if feature.type == 'CDS': cds_count += 1 if cds_count % 2 == 0: gdfs1.add_feature(feature, color=colors.pink, sigil="ARROW") else: gdfs1.add_feature(feature, color=colors.red, sigil="ARROW") if feature.type == 'gene': # Note we set the colour of ALL the genes later on as a test, gdfs2.add_feature(feature, sigil="ARROW") if feature.type == 'misc_feature': gdfs3.add_feature(feature, color=colors.orange) if feature.type == 'repeat_region': gdfs4.add_feature(feature, color=colors.purple) # gdd.cross_track_links = gdd.cross_track_links[:1] gdfs1.set_all_features('label', 1) gdfs2.set_all_features('label', 1) gdfs3.set_all_features('label', 1) gdfs4.set_all_features('label', 1) gdfs3.set_all_features('hide', 0) gdfs4.set_all_features('hide', 0) # gdfs1.set_all_features('color', colors.red) gdfs2.set_all_features('color', colors.blue) gdt1.add_set(gdfsA) # Before CDS so under them! gdt1.add_set(gdfs1) gdt2.add_set(gdfsB) # Before genes so under them! gdt2.add_set(gdfs2) gdt3 = Track('misc features and repeats', greytrack=1, scale_largetick_interval=1e4) gdt3.add_set(gdfs3) gdt3.add_set(gdfs4) # Now add some graph sets: # Use a fairly large step so we can easily tell the difference # between the bar and line graphs. step = len(genbank_entry) // 200 gdgs1 = GraphSet('GC skew') graphdata1 = apply_to_window(genbank_entry.seq, step, calc_gc_skew, step) gdgs1.new_graph(graphdata1, 'GC Skew', style='bar', color=colors.violet, altcolor=colors.purple) gdt4 = Track('GC Skew (bar)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt4.add_set(gdgs1) gdgs2 = GraphSet('GC and AT Content') gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_gc_content, step), 'GC content', style='line', color=colors.lightgreen, altcolor=colors.darkseagreen) gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_at_content, step), 'AT content', style='line', color=colors.orange, altcolor=colors.red) gdt5 = Track('GC Content(green line), AT Content(red line)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt5.add_set(gdgs2) gdgs3 = GraphSet('Di-nucleotide count') step = len(genbank_entry) // 400 # smaller step gdgs3.new_graph(apply_to_window(genbank_entry.seq, step, calc_dinucleotide_counts, step), 'Di-nucleotide count', style='heat', color=colors.red, altcolor=colors.orange) gdt6 = Track('Di-nucleotide count', height=0.5, greytrack=False, scale=False) gdt6.add_set(gdgs3) # Add the tracks (from both features and graphs) # Leave some white space in the middle/bottom gdd.add_track(gdt4, 3) # GC skew gdd.add_track(gdt5, 4) # GC and AT content gdd.add_track(gdt1, 5) # CDS features gdd.add_track(gdt2, 6) # Gene features gdd.add_track(gdt3, 7) # Misc features and repeat feature gdd.add_track(gdt6, 8) # Feature depth # Finally draw it in both formats, and full view and partial gdd.draw(format='circular', orientation='landscape', tracklines=0, pagesize='A0') output_filename = os.path.join('Graphics', 'GD_by_obj_circular.pdf') gdd.write(output_filename, 'PDF') gdd.circular = False gdd.draw(format='circular', orientation='landscape', tracklines=0, pagesize='A0', start=3000, end=6300) output_filename = os.path.join('Graphics', 'GD_by_obj_frag_circular.pdf') gdd.write(output_filename, 'PDF') gdd.draw(format='linear', orientation='landscape', tracklines=0, pagesize='A0', fragments=3) output_filename = os.path.join('Graphics', 'GD_by_obj_linear.pdf') gdd.write(output_filename, 'PDF') gdd.set_all_tracks("greytrack_labels", 2) gdd.draw(format='linear', orientation='landscape', tracklines=0, pagesize=(30 * cm, 10 * cm), fragments=1, start=3000, end=6300) output_filename = os.path.join('Graphics', 'GD_by_obj_frag_linear.pdf') gdd.write(output_filename, 'PDF')
def test_diagram_via_object_pdf(self): """Construct and draw PDF using object approach.""" genbank_entry = self.record gdd = Diagram('Test Diagram') #First add some feature sets: gdfs1 = FeatureSet(name='CDS features') gdfs2 = FeatureSet(name='gene features') gdfs3 = FeatureSet(name='misc_features') gdfs4 = FeatureSet(name='repeat regions') cds_count = 0 for feature in genbank_entry.features: if feature.type == 'CDS': cds_count += 1 if cds_count % 2 == 0: gdfs1.add_feature(feature, color=colors.pink) else: gdfs1.add_feature(feature, color=colors.red) if feature.type == 'gene': gdfs2.add_feature(feature) if feature.type == 'misc_feature': gdfs3.add_feature(feature, color=colors.orange) if feature.type == 'repeat_region': gdfs4.add_feature(feature, color=colors.purple) gdfs1.set_all_features('label', 1) gdfs2.set_all_features('label', 1) gdfs3.set_all_features('label', 1) gdfs4.set_all_features('label', 1) gdfs3.set_all_features('hide', 0) gdfs4.set_all_features('hide', 0) #gdfs1.set_all_features('color', colors.red) gdfs2.set_all_features('color', colors.blue) gdt1 = Track('CDS features', greytrack=True, scale_largetick_interval=1e4, scale_smalltick_interval=1e3, greytrack_labels=10, greytrack_font_color="red", scale_format="SInt") gdt1.add_set(gdfs1) gdt2 = Track('gene features', greytrack=1, scale_largetick_interval=1e4) gdt2.add_set(gdfs2) gdt3 = Track('misc features and repeats', greytrack=1, scale_largetick_interval=1e4) gdt3.add_set(gdfs3) gdt3.add_set(gdfs4) #Now add some graph sets: #Use a fairly large step so we can easily tell the difference #between the bar and line graphs. step = len(genbank_entry) // 200 gdgs1 = GraphSet('GC skew') graphdata1 = apply_to_window(genbank_entry.seq, step, calc_gc_skew, step) gdgs1.new_graph(graphdata1, 'GC Skew', style='bar', color=colors.violet, altcolor=colors.purple) gdt4 = Track(\ 'GC Skew (bar)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt4.add_set(gdgs1) gdgs2 = GraphSet('GC and AT Content') gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_gc_content, step), 'GC content', style='line', color=colors.lightgreen, altcolor=colors.darkseagreen) gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_at_content, step), 'AT content', style='line', color=colors.orange, altcolor=colors.red) gdt5 = Track(\ 'GC Content(green line), AT Content(red line)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt5.add_set(gdgs2) gdgs3 = GraphSet('Di-nucleotide count') step = len(genbank_entry) // 400 #smaller step gdgs3.new_graph(apply_to_window(genbank_entry.seq, step, calc_dinucleotide_counts, step), 'Di-nucleotide count', style='heat', color=colors.red, altcolor=colors.orange) gdt6 = Track('Di-nucleotide count', height=0.5, greytrack=False, scale=False) gdt6.add_set(gdgs3) #Add the tracks (from both features and graphs) #Leave some white space in the middle gdd.add_track(gdt4, 3) # GC skew gdd.add_track(gdt5, 4) # GC and AT content gdd.add_track(gdt1, 5) # CDS features gdd.add_track(gdt2, 6) # Gene features gdd.add_track(gdt3, 7) # Misc features and repeat feature gdd.add_track(gdt6, 8) # Feature depth #Finally draw it in both formats, gdd.draw(format='circular', orientation='landscape', tracklines=0, pagesize='A0', circular=True) output_filename = os.path.join('Graphics', 'GD_by_obj_circular.pdf') gdd.write(output_filename, 'PDF') gdd.draw(format='linear', orientation='landscape', tracklines=0, pagesize='A0', fragments=3) output_filename = os.path.join('Graphics', 'GD_by_obj_linear.pdf') gdd.write(output_filename, 'PDF')