def test_diagram_via_object_pdf(self): """Construct and draw PDF using object approach.""" genbank_entry = self.record gdd = Diagram('Test Diagram') gdt1 = Track('CDS features', greytrack=True, scale_largetick_interval=1e4, scale_smalltick_interval=1e3, greytrack_labels=10, greytrack_font_color="red", scale_format="SInt") gdt2 = Track('gene features', greytrack=1, scale_largetick_interval=1e4) # First add some feature sets: gdfsA = FeatureSet(name='CDS backgrounds') gdfsB = FeatureSet(name='gene background') gdfs1 = FeatureSet(name='CDS features') gdfs2 = FeatureSet(name='gene features') gdfs3 = FeatureSet(name='misc_features') gdfs4 = FeatureSet(name='repeat regions') prev_gene = None cds_count = 0 for feature in genbank_entry.features: if feature.type == 'CDS': cds_count += 1 if prev_gene: # Assuming it goes with this CDS! if cds_count % 2 == 0: dark, light = colors.peru, colors.tan else: dark, light = colors.burlywood, colors.bisque # Background for CDS, a = gdfsA.add_feature(SeqFeature( FeatureLocation(feature.location.start, feature.location.end, strand=0)), color=dark) # Background for gene, b = gdfsB.add_feature(SeqFeature( FeatureLocation(prev_gene.location.start, prev_gene.location.end, strand=0)), color=dark) # Cross link, gdd.cross_track_links.append(CrossLink(a, b, light, dark)) prev_gene = None if feature.type == 'gene': prev_gene = feature # Some cross links on the same linear diagram fragment, f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2220, 2230)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2200, 2210)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2150, 2200)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2220, 2290)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True)) f, c = fill_and_border(colors.green) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2250, 2560)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2300, 2860)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) # Some cross links where both parts are saddling the linear diagram fragment boundary, f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(3155, 3250)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(3130, 3300)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) # Nestled within that (drawn on top), f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(3160, 3275)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(3180, 3225)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True)) # Some cross links where two features are on either side of the linear diagram fragment boundary, f, c = fill_and_border(colors.green) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6450, 6550)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6265, 6365)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c)) f, c = fill_and_border(colors.gold) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6265, 6365)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6450, 6550)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c)) f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6275, 6375)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6430, 6530)), color=f, border=c) gdd.cross_track_links.append( CrossLink(a, b, color=f, border=c, flip=True)) f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6430, 6530)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6275, 6375)), color=f, border=c) gdd.cross_track_links.append( CrossLink(a, b, color=f, border=c, flip=True)) cds_count = 0 for feature in genbank_entry.features: if feature.type == 'CDS': cds_count += 1 if cds_count % 2 == 0: gdfs1.add_feature(feature, color=colors.pink, sigil="ARROW") else: gdfs1.add_feature(feature, color=colors.red, sigil="ARROW") if feature.type == 'gene': # Note we set the colour of ALL the genes later on as a test, gdfs2.add_feature(feature, sigil="ARROW") if feature.type == 'misc_feature': gdfs3.add_feature(feature, color=colors.orange) if feature.type == 'repeat_region': gdfs4.add_feature(feature, color=colors.purple) # gdd.cross_track_links = gdd.cross_track_links[:1] gdfs1.set_all_features('label', 1) gdfs2.set_all_features('label', 1) gdfs3.set_all_features('label', 1) gdfs4.set_all_features('label', 1) gdfs3.set_all_features('hide', 0) gdfs4.set_all_features('hide', 0) # gdfs1.set_all_features('color', colors.red) gdfs2.set_all_features('color', colors.blue) gdt1.add_set(gdfsA) # Before CDS so under them! gdt1.add_set(gdfs1) gdt2.add_set(gdfsB) # Before genes so under them! gdt2.add_set(gdfs2) gdt3 = Track('misc features and repeats', greytrack=1, scale_largetick_interval=1e4) gdt3.add_set(gdfs3) gdt3.add_set(gdfs4) # Now add some graph sets: # Use a fairly large step so we can easily tell the difference # between the bar and line graphs. step = len(genbank_entry) // 200 gdgs1 = GraphSet('GC skew') graphdata1 = apply_to_window(genbank_entry.seq, step, calc_gc_skew, step) gdgs1.new_graph(graphdata1, 'GC Skew', style='bar', color=colors.violet, altcolor=colors.purple) gdt4 = Track('GC Skew (bar)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt4.add_set(gdgs1) gdgs2 = GraphSet('GC and AT Content') gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_gc_content, step), 'GC content', style='line', color=colors.lightgreen, altcolor=colors.darkseagreen) gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_at_content, step), 'AT content', style='line', color=colors.orange, altcolor=colors.red) gdt5 = Track('GC Content(green line), AT Content(red line)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt5.add_set(gdgs2) gdgs3 = GraphSet('Di-nucleotide count') step = len(genbank_entry) // 400 # smaller step gdgs3.new_graph(apply_to_window(genbank_entry.seq, step, calc_dinucleotide_counts, step), 'Di-nucleotide count', style='heat', color=colors.red, altcolor=colors.orange) gdt6 = Track('Di-nucleotide count', height=0.5, greytrack=False, scale=False) gdt6.add_set(gdgs3) # Add the tracks (from both features and graphs) # Leave some white space in the middle/bottom gdd.add_track(gdt4, 3) # GC skew gdd.add_track(gdt5, 4) # GC and AT content gdd.add_track(gdt1, 5) # CDS features gdd.add_track(gdt2, 6) # Gene features gdd.add_track(gdt3, 7) # Misc features and repeat feature gdd.add_track(gdt6, 8) # Feature depth # Finally draw it in both formats, and full view and partial gdd.draw(format='circular', orientation='landscape', tracklines=0, pagesize='A0') output_filename = os.path.join('Graphics', 'GD_by_obj_circular.pdf') gdd.write(output_filename, 'PDF') gdd.circular = False gdd.draw(format='circular', orientation='landscape', tracklines=0, pagesize='A0', start=3000, end=6300) output_filename = os.path.join('Graphics', 'GD_by_obj_frag_circular.pdf') gdd.write(output_filename, 'PDF') gdd.draw(format='linear', orientation='landscape', tracklines=0, pagesize='A0', fragments=3) output_filename = os.path.join('Graphics', 'GD_by_obj_linear.pdf') gdd.write(output_filename, 'PDF') gdd.set_all_tracks("greytrack_labels", 2) gdd.draw(format='linear', orientation='landscape', tracklines=0, pagesize=(30 * cm, 10 * cm), fragments=1, start=3000, end=6300) output_filename = os.path.join('Graphics', 'GD_by_obj_frag_linear.pdf') gdd.write(output_filename, 'PDF')
def test_diagram_via_object_pdf(self): """Construct and draw PDF using object approach.""" genbank_entry = self.record gdd = Diagram('Test Diagram') gdt1 = Track('CDS features', greytrack=True, scale_largetick_interval=1e4, scale_smalltick_interval=1e3, greytrack_labels=10, greytrack_font_color="red", scale_format = "SInt") gdt2 = Track('gene features', greytrack=1, scale_largetick_interval=1e4) #First add some feature sets: gdfsA = FeatureSet(name='CDS backgrounds') gdfsB = FeatureSet(name='gene background') gdfs1 = FeatureSet(name='CDS features') gdfs2 = FeatureSet(name='gene features') gdfs3 = FeatureSet(name='misc_features') gdfs4 = FeatureSet(name='repeat regions') prev_gene = None cds_count = 0 for feature in genbank_entry.features: if feature.type == 'CDS': cds_count += 1 if prev_gene: #Assuming it goes with this CDS! if cds_count % 2 == 0: dark, light = colors.peru, colors.tan else: dark, light = colors.burlywood, colors.bisque #Background for CDS, a = gdfsA.add_feature(SeqFeature(FeatureLocation(feature.location.start, feature.location.end, strand=0)), color=dark) #Background for gene, b = gdfsB.add_feature(SeqFeature(FeatureLocation(prev_gene.location.start, prev_gene.location.end, strand=0)), color=dark) #Cross link, gdd.cross_track_links.append(CrossLink(a, b, light, dark)) prev_gene = None if feature.type == 'gene': prev_gene = feature #Some cross links on the same linear diagram fragment, f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2220,2230)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2200,2210)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2150,2200)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2220,2290)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True)) f, c = fill_and_border(colors.green) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2250,2560)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2300,2860)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) #Some cross links where both parts are saddling the linear diagram fragment boundary, f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(3155,3250)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(3130,3300)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) #Nestled within that (drawn on top), f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(3160,3275)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(3180,3225)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True)) #Some cross links where two features are on either side of the linear diagram fragment boundary, f, c = fill_and_border(colors.green) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6450,6550)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6265,6365)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c)) f, c = fill_and_border(colors.gold) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6265,6365)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6450,6550)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c)) f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6275,6375)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6430,6530)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c, flip=True)) f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6430,6530)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6275,6375)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c, flip=True)) cds_count = 0 for feature in genbank_entry.features: if feature.type == 'CDS': cds_count += 1 if cds_count % 2 == 0: gdfs1.add_feature(feature, color=colors.pink, sigil="ARROW") else: gdfs1.add_feature(feature, color=colors.red, sigil="ARROW") if feature.type == 'gene': #Note we set the colour of ALL the genes later on as a test, gdfs2.add_feature(feature, sigil="ARROW") if feature.type == 'misc_feature': gdfs3.add_feature(feature, color=colors.orange) if feature.type == 'repeat_region': gdfs4.add_feature(feature, color=colors.purple) #gdd.cross_track_links = gdd.cross_track_links[:1] gdfs1.set_all_features('label', 1) gdfs2.set_all_features('label', 1) gdfs3.set_all_features('label', 1) gdfs4.set_all_features('label', 1) gdfs3.set_all_features('hide', 0) gdfs4.set_all_features('hide', 0) #gdfs1.set_all_features('color', colors.red) gdfs2.set_all_features('color', colors.blue) gdt1.add_set(gdfsA) # Before CDS so under them! gdt1.add_set(gdfs1) gdt2.add_set(gdfsB) # Before genes so under them! gdt2.add_set(gdfs2) gdt3 = Track('misc features and repeats', greytrack=1, scale_largetick_interval=1e4) gdt3.add_set(gdfs3) gdt3.add_set(gdfs4) #Now add some graph sets: #Use a fairly large step so we can easily tell the difference #between the bar and line graphs. step = len(genbank_entry)//200 gdgs1 = GraphSet('GC skew') graphdata1 = apply_to_window(genbank_entry.seq, step, calc_gc_skew, step) gdgs1.new_graph(graphdata1, 'GC Skew', style='bar', color=colors.violet, altcolor=colors.purple) gdt4 = Track( 'GC Skew (bar)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt4.add_set(gdgs1) gdgs2 = GraphSet('GC and AT Content') gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_gc_content, step), 'GC content', style='line', color=colors.lightgreen, altcolor=colors.darkseagreen) gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_at_content, step), 'AT content', style='line', color=colors.orange, altcolor=colors.red) gdt5 = Track( 'GC Content(green line), AT Content(red line)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt5.add_set(gdgs2) gdgs3 = GraphSet('Di-nucleotide count') step = len(genbank_entry) // 400 # smaller step gdgs3.new_graph(apply_to_window(genbank_entry.seq, step, calc_dinucleotide_counts, step), 'Di-nucleotide count', style='heat', color=colors.red, altcolor=colors.orange) gdt6 = Track('Di-nucleotide count', height=0.5, greytrack=False, scale=False) gdt6.add_set(gdgs3) #Add the tracks (from both features and graphs) #Leave some white space in the middle/bottom gdd.add_track(gdt4, 3) # GC skew gdd.add_track(gdt5, 4) # GC and AT content gdd.add_track(gdt1, 5) # CDS features gdd.add_track(gdt2, 6) # Gene features gdd.add_track(gdt3, 7) # Misc features and repeat feature gdd.add_track(gdt6, 8) # Feature depth #Finally draw it in both formats, and full view and partial gdd.draw(format='circular', orientation='landscape', tracklines=0, pagesize='A0') output_filename = os.path.join('Graphics', 'GD_by_obj_circular.pdf') gdd.write(output_filename, 'PDF') gdd.circular=False gdd.draw(format='circular', orientation='landscape', tracklines=0, pagesize='A0', start=3000, end=6300) output_filename = os.path.join('Graphics', 'GD_by_obj_frag_circular.pdf') gdd.write(output_filename, 'PDF') gdd.draw(format='linear', orientation='landscape', tracklines=0, pagesize='A0', fragments=3) output_filename = os.path.join('Graphics', 'GD_by_obj_linear.pdf') gdd.write(output_filename, 'PDF') gdd.set_all_tracks("greytrack_labels", 2) gdd.draw(format='linear', orientation='landscape', tracklines=0, pagesize=(30*cm,10*cm), fragments=1, start=3000, end=6300) output_filename = os.path.join('Graphics', 'GD_by_obj_frag_linear.pdf') gdd.write(output_filename, 'PDF')
def generateCircleGraph(self, file, windows_to_top_topologies, topologies_to_colors, window_size, window_offset, sites_to_informative): """ Creates genetic circle graph showing the windows and the areas where each topology appears Inputs: i. file --- phylip file inputted in GUI ii. windows_to_top_topologies --- mapping outputted by windows_to_newick()[0] iii. topologies_to_colors --- mapping outputted by topology_colors()[0] iv. window_size --- size inputted in GUI v. window_offset --- size inputted in GUI Returns: A genetic circle graph GenomeAtlas. """ ############################# Format Data ############################# # get the length of the sequence with open(file) as f: length_of_sequences = int(f.readline().split()[1]) f.close() # accounts for offset windows_to_top_topologies2 = {} for window in windows_to_top_topologies: windows_to_top_topologies2[ window * window_offset] = windows_to_top_topologies[window] windows_to_top_topologies = windows_to_top_topologies2.items() # gets windows windows = [] for window_topology in windows_to_top_topologies: windows.append(window_topology[0]) for i in range(length_of_sequences): if i not in windows: windows_to_top_topologies.append((i, 0)) # maps data to topologies topologies_to_data = {} for topology in topologies_to_colors: topologies_to_data[topology] = [] for topology in topologies_to_colors: for window in windows_to_top_topologies: if topology == window[1]: topologies_to_data[topology].append(tuple([window[0], 1])) else: topologies_to_data[topology].append( tuple([window[0], -0.1])) # maps data to colors data_to_colors = {} for topology in topologies_to_data: data_to_colors[str( topologies_to_data[topology])] = topologies_to_colors[topology] includeOther = False if 'Other' in topologies_to_data: includeOther = True # -1 because top_topologies_to_colors includes 'Other' number_of_top_topologies = len(topologies_to_colors) - 1 else: number_of_top_topologies = len(topologies_to_colors) # removes 'Other' from mapping if includeOther: minor_topology_data = topologies_to_data['Other'] del topologies_to_data['Other'] data = topologies_to_data.values() # separates data into windowed and un-windowed windowed_data = [] nonwindowed_data = [] for i in range(length_of_sequences): if i not in windows: nonwindowed_data.append(tuple([i, 1])) windowed_data.append(tuple([i, 0])) else: for j in range(i, i + window_size): windowed_data.append((j, 1)) nonwindowed_data.append(tuple([i, 0])) ############################# Build Graph ############################# # name of the figure name = "../plots/GenomeAtlas" graphStyle = 'bar' # create the diagram -- highest level container for everything diagram = GenomeDiagram.Diagram(name) diagram.new_track(1, greytrack=0, name="Track", height=2, hide=0, scale=1, scale_color=colors.black, scale_font='Helvetica', scale_fontsize=6, scale_fontangle=45, scale_ticks=1, scale_largeticks=0.3, scale_smallticks=0.1, scale_largetick_interval=(length_of_sequences / 6), scale_smalltick_interval=(length_of_sequences / 12), scale_largetick_labels=1, scale_smalltick_labels=0) if includeOther: diagram \ .new_track(2, name="Minor Topologies", height=1.0, hide=0, greytrack=0, greytrack_labels=2, greytrack_font_size=8, grey_track_font_color=colors.black, scale=1, scale_ticks=0, axis_labels=0) \ .new_set('graph') \ .new_graph(minor_topology_data, style=graphStyle, colour=colors.HexColor(data_to_colors[str(minor_topology_data)]), altcolour=colors.transparent, linewidth=1) for i in range(number_of_top_topologies): # create tracks -- and add them to the diagram if i == 0: diagram \ .new_track(i + 3, name="Track" + str(i + 1), height=1.0, hide=0, greytrack=0, greytrack_labels=2, greytrack_font_size=8, grey_track_font_color=colors.black, scale=1, scale_ticks=0, axis_labels=0) \ .new_set('graph') \ .new_graph(data[i], style=graphStyle, colour=colors.HexColor(data_to_colors[str(data[i])]), altcolour=colors.transparent, linewidth=1) else: diagram \ .new_track(i + 3, name="Track" + str(i + 1), height=1.0, hide=0, greytrack=0, greytrack_labels=2, greytrack_font_size=8, grey_track_font_color=colors.black, scale=0) \ .new_set('graph') \ .new_graph(data[i], style=graphStyle, colour=colors.HexColor(data_to_colors[str(data[i])]), altcolour=colors.transparent, linewidth=1) # outer ring shit graph_set = GraphSet('graph') graph_set.new_graph(nonwindowed_data, style=graphStyle, color=colors.HexColor('#cccccc'), altcolour=colors.transparent) graph_set.new_graph(windowed_data, style=graphStyle, color=colors.HexColor('#2f377c'), altcolour=colors.transparent) # diagram \ # .new_track(i + 5, name="Track" + str(i + 2), height=2, hide=0, greytrack=0, greytrack_labels=2, # greytrack_font_size=8, grey_track_font_color=colors.black, scale=0) \ # .add_set(graph_set) ############################################ diagram \ .new_track(i + 4, name="Track" + str(i + 1), height=1.0, hide=0, greytrack=0, greytrack_labels=2, greytrack_font_size=8, grey_track_font_color=colors.black, scale=0) \ .new_set('graph') \ .new_graph(sites_to_informative.items(), style='line', colour=colors.coral, altcolour=colors.transparent, linewidth=0.01) ########################################################### diagram.draw(format="circular", pagesize='A5', orientation='landscape', x=0.0, y=0.0, track_size=1.88, tracklines=0, circular=0, circle_core=0.3, start=0, end=length_of_sequences - 1) # save the file # diagram.write("../plots/GenomeAtlas.eps", "EPS") # diagram.write("plots/GenomeAtlas.svg", "SVG") diagram.write("plots/GenomeAtlas.pdf", "PDF") diagram.write("plots/GenomeAtlas.png", "PNG") self.emit(QtCore.SIGNAL('CIRCLE_GRAPH_COMPLETE'))
def test_diagram_via_object_pdf(self): """Construct and draw PDF using object approach.""" genbank_entry = self.record gdd = Diagram('Test Diagram') #First add some feature sets: gdfs1 = FeatureSet(name='CDS features') gdfs2 = FeatureSet(name='gene features') gdfs3 = FeatureSet(name='misc_features') gdfs4 = FeatureSet(name='repeat regions') cds_count = 0 for feature in genbank_entry.features: if feature.type == 'CDS': cds_count += 1 if cds_count % 2 == 0: gdfs1.add_feature(feature, color=colors.pink) else: gdfs1.add_feature(feature, color=colors.red) if feature.type == 'gene': gdfs2.add_feature(feature) if feature.type == 'misc_feature': gdfs3.add_feature(feature, color=colors.orange) if feature.type == 'repeat_region': gdfs4.add_feature(feature, color=colors.purple) gdfs1.set_all_features('label', 1) gdfs2.set_all_features('label', 1) gdfs3.set_all_features('label', 1) gdfs4.set_all_features('label', 1) gdfs3.set_all_features('hide', 0) gdfs4.set_all_features('hide', 0) #gdfs1.set_all_features('color', colors.red) gdfs2.set_all_features('color', colors.blue) gdt1 = Track('CDS features', greytrack=True, scale_largetick_interval=1e4, scale_smalltick_interval=1e3, greytrack_labels=10, greytrack_font_color="red", scale_format = "SInt") gdt1.add_set(gdfs1) gdt2 = Track('gene features', greytrack=1, scale_largetick_interval=1e4) gdt2.add_set(gdfs2) gdt3 = Track('misc features and repeats', greytrack=1, scale_largetick_interval=1e4) gdt3.add_set(gdfs3) gdt3.add_set(gdfs4) #Now add some graph sets: #Use a fairly large step so we can easily tell the difference #between the bar and line graphs. step = len(genbank_entry)/200 gdgs1 = GraphSet('GC skew') graphdata1 = apply_to_window(genbank_entry.seq, step, calc_gc_skew, step) gdgs1.new_graph(graphdata1, 'GC Skew', style='bar', color=colors.violet, altcolor=colors.purple) gdt4 = Track(\ 'GC Skew (bar)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt4.add_set(gdgs1) gdgs2 = GraphSet('GC and AT Content') gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_gc_content, step), 'GC content', style='line', color=colors.lightgreen, altcolor=colors.darkseagreen) gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_at_content, step), 'AT content', style='line', color=colors.orange, altcolor=colors.red) gdt5 = Track(\ 'GC Content(green line), AT Content(red line)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt5.add_set(gdgs2) gdgs3 = GraphSet('Di-nucleotide count') step = len(genbank_entry)/400 #smaller step gdgs3.new_graph(apply_to_window(genbank_entry.seq, step, calc_dinucleotide_counts, step), 'Di-nucleotide count', style='heat', color=colors.red, altcolor=colors.orange) gdt6 = Track('Di-nucleotide count', height=0.5, greytrack=False, scale=False) gdt6.add_set(gdgs3) #Add the tracks (from both features and graphs) #Leave some white space in the middle gdd.add_track(gdt4, 3) # GC skew gdd.add_track(gdt5, 4) # GC and AT content gdd.add_track(gdt1, 5) # CDS features gdd.add_track(gdt2, 6) # Gene features gdd.add_track(gdt3, 7) # Misc features and repeat feature gdd.add_track(gdt6, 8) # Feature depth #Finally draw it in both formats, gdd.draw(format='circular', orientation='landscape', tracklines=0, pagesize='A0', circular=True) output_filename = os.path.join('Graphics', 'GD_by_obj_circular.pdf') gdd.write(output_filename, 'PDF') gdd.draw(format='linear', orientation='landscape', tracklines=0, pagesize='A0', fragments=3) output_filename = os.path.join('Graphics', 'GD_by_obj_linear.pdf') gdd.write(output_filename, 'PDF')
def test_diagram_via_object_pdf(self): """Construct and draw PDF using object approach.""" genbank_entry = self.record gdd = Diagram('Test Diagram') #First add some feature sets: gdfs1 = FeatureSet(name='CDS features') gdfs2 = FeatureSet(name='gene features') gdfs3 = FeatureSet(name='misc_features') gdfs4 = FeatureSet(name='repeat regions') cds_count = 0 for feature in genbank_entry.features: if feature.type == 'CDS': cds_count += 1 if cds_count % 2 == 0: gdfs1.add_feature(feature, color=colors.pink) else: gdfs1.add_feature(feature, color=colors.red) if feature.type == 'gene': gdfs2.add_feature(feature) if feature.type == 'misc_feature': gdfs3.add_feature(feature, color=colors.orange) if feature.type == 'repeat_region': gdfs4.add_feature(feature, color=colors.purple) gdfs1.set_all_features('label', 1) gdfs2.set_all_features('label', 1) gdfs3.set_all_features('label', 1) gdfs4.set_all_features('label', 1) gdfs3.set_all_features('hide', 0) gdfs4.set_all_features('hide', 0) #gdfs1.set_all_features('color', colors.red) gdfs2.set_all_features('color', colors.blue) gdt1 = Track('CDS features', greytrack=True, scale_largetick_interval=1e4, scale_smalltick_interval=1e3, greytrack_labels=10, greytrack_font_color="red", scale_format="SInt") gdt1.add_set(gdfs1) gdt2 = Track('gene features', greytrack=1, scale_largetick_interval=1e4) gdt2.add_set(gdfs2) gdt3 = Track('misc features and repeats', greytrack=1, scale_largetick_interval=1e4) gdt3.add_set(gdfs3) gdt3.add_set(gdfs4) #Now add some graph sets: #Use a fairly large step so we can easily tell the difference #between the bar and line graphs. step = len(genbank_entry) // 200 gdgs1 = GraphSet('GC skew') graphdata1 = apply_to_window(genbank_entry.seq, step, calc_gc_skew, step) gdgs1.new_graph(graphdata1, 'GC Skew', style='bar', color=colors.violet, altcolor=colors.purple) gdt4 = Track(\ 'GC Skew (bar)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt4.add_set(gdgs1) gdgs2 = GraphSet('GC and AT Content') gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_gc_content, step), 'GC content', style='line', color=colors.lightgreen, altcolor=colors.darkseagreen) gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_at_content, step), 'AT content', style='line', color=colors.orange, altcolor=colors.red) gdt5 = Track(\ 'GC Content(green line), AT Content(red line)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt5.add_set(gdgs2) gdgs3 = GraphSet('Di-nucleotide count') step = len(genbank_entry) // 400 #smaller step gdgs3.new_graph(apply_to_window(genbank_entry.seq, step, calc_dinucleotide_counts, step), 'Di-nucleotide count', style='heat', color=colors.red, altcolor=colors.orange) gdt6 = Track('Di-nucleotide count', height=0.5, greytrack=False, scale=False) gdt6.add_set(gdgs3) #Add the tracks (from both features and graphs) #Leave some white space in the middle gdd.add_track(gdt4, 3) # GC skew gdd.add_track(gdt5, 4) # GC and AT content gdd.add_track(gdt1, 5) # CDS features gdd.add_track(gdt2, 6) # Gene features gdd.add_track(gdt3, 7) # Misc features and repeat feature gdd.add_track(gdt6, 8) # Feature depth #Finally draw it in both formats, gdd.draw(format='circular', orientation='landscape', tracklines=0, pagesize='A0', circular=True) output_filename = os.path.join('Graphics', 'GD_by_obj_circular.pdf') gdd.write(output_filename, 'PDF') gdd.draw(format='linear', orientation='landscape', tracklines=0, pagesize='A0', fragments=3) output_filename = os.path.join('Graphics', 'GD_by_obj_linear.pdf') gdd.write(output_filename, 'PDF')