def drawsvg(clust): for k in clust: fs = GenomeDiagram.FeatureSet() for g in clust[k]['gene']: fs.add_feature( clust[k]['gene'][g], color = colors.lightgreen, label=True, label_size=10, label_angle=90, sigil="BIGARROW", arrowshaft_height=0.5, arrowhead_length=0.25 ) track = GenomeDiagram.Track(name = k) diag = GenomeDiagram.Diagram() track.add_set(fs) diag.add_track(track, 1) pglen = float(clust[k]['end'] - clust[k]['start']) / float(1000) diag.draw( format = "linear", orientation = "landscape", pagesize = (pglen*cm, 5*cm), fragments = 1, start = clust[k]['start'], end= clust[k]['end'] ) diag.write('.'.join([pref, k, 'svg']), "SVG")
def bottom_up(): from reportlab.lib import colors from reportlab.lib.units import cm from Bio.Graphics import GenomeDiagram from Bio import SeqIO record = SeqIO.read("NC_005816.gb", "genbank") # Create the feature set and its feature objects, gd_feature_set = GenomeDiagram.FeatureSet() for feature in record.features: if feature.type != "gene": # Exclude this feature continue if len(gd_feature_set) % 2 == 0: color = colors.blue else: color = colors.lightblue gd_feature_set.add_feature(feature, color=color, label=True) # (this for loop is the same as in the previous example) # Create a track, and a diagram gd_track_for_features = GenomeDiagram.Track(name="Annotated Features") gd_diagram = GenomeDiagram.Diagram( "Yersinia pestis biovar Microtus plasmid pPCP1") # Now have to glue the bits together... gd_track_for_features.add_set(gd_feature_set) gd_diagram.add_track(gd_track_for_features, 1)
def main(name): record = SeqIO.read("Genome.gb", "genbank") # Create the feature set and its feature objects gd_feature_set = GenomeDiagram.FeatureSet() for feature in record.features: if feature.type != "gene": continue if len(gd_feature_set) % 2 == 0: color = colors.purple else: color = colors.lightblue gd_feature_set.add_feature(feature, color=color, label=True, label_size=14, label_angle=10) # Create a track and a diagram gd_track_for_features = GenomeDiagram.Track(name="Annotated Features") gd_diagram = GenomeDiagram.Diagram("Curly Tomato Stunt Virus") gd_track_for_features.add_set(gd_feature_set) gd_diagram.add_track(gd_track_for_features, 1) gd_diagram.draw( format="circular", circular=True, pagesize=(20 * cm, 20 * cm), start=0, end=len(record), circle_core=0.7, ) gd_diagram.write("tomato_curly_stunt_virus.JPG", "JPG")
def make_diagram(genome_record: SeqIO.SeqRecord, pseudo_record: SeqIO.SeqRecord, outfile: str): """Plots the genome with pseudogenes on another track""" diagram = GenomeDiagram.Diagram() original_features = GenomeDiagram.FeatureSet( ) # These features will be from the original genbank file for feature in genome_record.features: # genome_record is the record from the original genbank file if feature.type != "gene": # Exclude this feature continue if len(original_features) % 2 == 0: # Alternate colours color = colors.blue else: color = colors.lightblue original_features.add_feature(feature, color=color) track_for_original_features = GenomeDiagram.Track( name="Original Features", scale_largetick_interval=100000, scale_largeticks=5, scale_fontangle=180, scale_fontsize=10) track_for_original_features.add_set(original_features) diagram.add_track(track=track_for_original_features, track_level=1) pseudo_features = GenomeDiagram.FeatureSet( ) # These features will be from the pseudogene annotation for feature in pseudo_record.features: if len(pseudo_features) % 2 == 0: # Alternate colours color = colors.red else: color = colors.lightcoral pseudo_features.add_feature(feature, color=color) track_for_pseudogenes = GenomeDiagram.Track(name="Pseudogenes", scale_largetick_labels=0) track_for_pseudogenes.add_set(pseudo_features) diagram.add_track(track=track_for_pseudogenes, track_level=2) diagram.draw(format="circular", circular=True, start=0, end=len(genome_record), circle_core=0.8) diagram.write(filename=outfile, output="PDF")
def __init__(self, trackName): self.trackName = trackName self.gdTrack = GenomeDiagram.Track(greytrack=True, name=self.trackName, greytrack_labels=1, greytrack_fontsize=33) self.gdFeature = GenomeDiagram.FeatureSet() self.size = 0 self.nbFeats = 0
def WriteImg(seqRecIn): #to select different colors from colorSelection = [colors.darkgreen, colors.darkorange, colors.lightblue] currColor = 0 #uses GenomeDiagram package to create a featureset object for the diagram to use later featSet = GenomeDiagram.FeatureSet() for currFeat in seqRecIn.features: if currFeat.type == "CDS": featSet.add_feature( currFeat, color=colorSelection[currColor % 3], label=True, label_size=20, sigil="ARROW", #alternates widths specifically to accomodate the TCSV Sequence: order goes Medium->Thinnest->Thickest->Medium->Thinnest arrowshaft_height=(0.5 + ((0.2) - (((currColor + 1) % 3) * (0.2)))), #turns arrowhead triangle into a thin line arrowhead_length=0) currColor += 1 #add features to track(only 1 because it is circular) track = GenomeDiagram.Track(name="TCSV Sequence Features") track.add_set(featSet) #add track to diagram diagram = GenomeDiagram.Diagram( name="Tomato Curly Stunt Virus", format="circular", pagesize=(1000, 1000), circular=True, start=0, #goes until end of any length sequence end=len(seqRecIn), circle_core=0.5) diagram.add_track(track, 1) #writes diagram to memory so it can be written into a file diagram.draw() #writes to a file using the reportlab functionalities diagram.write("TCSV_Sequence_Map.jpg", "JPG")
def _create_track(self, track, track_options=None, feature_options=None): """Create a Track object for the specified track Args: track (str): track identifier **feature_options (mixed): additional options, passed to FeatureSet.add_feature(). Overrides self.feature_options. Return: FeatureSet for the track """ i = self._trackindex(track) options = dict(self.track_options) if track_options: options.update(track_options) options.setdefault("name", track) options.setdefault("end", self._tracklens[i]) if feature_options is None: feature_options = {} fs = self._create_feature_set(track, **feature_options) tr = GenomeDiagram.Track(**options) tr.add_set(fs) return tr
def write_visuals(seq_name, df, seq_length, results_pdf): gd_diagram = GenomeDiagram.Diagram(seq_name, track_size=1) new_row = pn.DataFrame({ "Motif": seq_name, "Start": 1, "End": 1 }, index=[0]) df = pn.concat([new_row, df]).reset_index(drop=True) NCCR = GenomeDiagram.FeatureSet() for index, row in df.iterrows(): (motif_name, start_motif, end_motif) = (row["Motif"], row["Start"], row["End"]) cols = [motif_name, start_motif, end_motif] if index is 0: block = SeqFeature(FeatureLocation(int(cols[1]), int(cols[2]), strand=-1), type="blocks", id=motif_name) NCCR.add_feature(block, color=colors.HexColor("#8DD35F"), name=motif_name, label=True, label_size=8, label_position="middle", label_angle=180) else: if motif_name.islower(): motif = SeqFeature(FeatureLocation(int(cols[1]), int(cols[2]), strand=+1), type="motifs", id=motif_name) NCCR.add_feature(motif, color=colors.HexColor("#8DD35F"), name=motif_name, label=True, label_size=10, label_position="left", label_angle=90) else: block = SeqFeature(FeatureLocation(int(cols[1]), int(cols[2]), strand=-1), type="blocks", id=motif_name) if motif_name == "O": NCCR.add_feature(block, color=colors.HexColor("#ffc69e"), name=motif_name, label=True, label_size=10, label_position="center", label_angle=180) elif motif_name == "P": NCCR.add_feature(block, color=colors.HexColor("#fff6d4"), name=motif_name, label=True, label_size=10, label_position="middle", label_angle=180) elif motif_name == "Q": NCCR.add_feature(block, color=colors.HexColor("#f6f9eb"), name=motif_name, label=True, label_size=10, label_position="middle", label_angle=180) elif motif_name == "R": NCCR.add_feature(block, color=colors.HexColor("#ebf9f6"), name=motif_name, label=True, label_size=10, label_position="middle", label_angle=180) elif motif_name == "S": NCCR.add_feature(block, color=colors.HexColor("#f9ebf6"), name=motif_name, label=True, label_size=10, label_position="middle", label_angle=180) else: NCCR.add_feature(block, color=colors.HexColor("#C8C4B7"), name=motif_name, label=True, label_size=20, label_position="right", label_angle=180) NCCR_track = GenomeDiagram.Track(name="Annotated Features", height=0.3) NCCR_track.add_set(NCCR) gd_diagram.add_track(NCCR_track, 3) seq_length = int(cols[2]) rows = max(2, int(round(seq_length / 100))) gd_diagram.draw(format='linear', tracklines=0, pagesize='A4', orientation='landscape', fragments=4, start=1, end=int(seq_length)) pdf_filepath = os.path.join('results', '{}.pdf'.format(seq_name)) gd_diagram.write(pdf_filepath, 'PDF', dpi=300) results_pdf.append(pdf_filepath)
def write_schemadelica_plot(self, path='./'): logger.info('Writing plot') gd_diagram = GenomeDiagram.Diagram("Primer Scheme", track_size=1) scale_track = GenomeDiagram.Track(name='scale', scale=True, scale_fontsize=10, scale_largetick_interval=1000, height=0.1) gd_diagram.add_track(scale_track, 2) primer_feature_set_1 = GenomeDiagram.FeatureSet() primer_feature_set_2 = GenomeDiagram.FeatureSet() for r in self.regions: cols1 = [ self.primary_reference.id, r.top_pair.left.start, r.top_pair.left.end, r.top_pair.left.name, r.pool ] cols2 = [ self.primary_reference.id, r.top_pair.right.end, r.top_pair.right.start, r.top_pair.right.name, r.pool ] region = str(r.region_num) fwd_feature = SeqFeature( FeatureLocation(int(cols1[1]), int(cols1[2]), strand=0)) rev_feature = SeqFeature( FeatureLocation(int(cols2[1]), int(cols2[2]), strand=0)) region_feature = SeqFeature( FeatureLocation(int(cols1[1]), int(cols2[2]), strand=0)) if int(region) % 2 == 0: primer_feature_set_1.add_feature(region_feature, color=colors.palevioletred, name=region, label=True, label_size=10, label_position="middle", label_angle=0) primer_feature_set_1.add_feature(fwd_feature, color=colors.red, name=region, label=False) primer_feature_set_1.add_feature(rev_feature, color=colors.red, name=region, label=False) else: primer_feature_set_2.add_feature(region_feature, color=colors.palevioletred, name=region, label=True, label_size=10, label_position="middle", label_angle=0) primer_feature_set_2.add_feature(fwd_feature, color=colors.red, name=region, label=False) primer_feature_set_2.add_feature(rev_feature, color=colors.red, name=region, label=False) primer_track = GenomeDiagram.Track(name="Annotated Features", height=0.1) primer_track.add_set(primer_feature_set_1) gd_diagram.add_track(primer_track, 4) primer_track = GenomeDiagram.Track(name="Annotated Features", height=0.1) primer_track.add_set(primer_feature_set_2) gd_diagram.add_track(primer_track, 6) rows = max(2, int(round(len(self.primary_reference) / 10000.0))) gd_diagram.draw(format='linear', pagesize=(300 * rows, 200 * rows), fragments=rows, start=0, end=len(self.primary_reference)) png_filepath = os.path.join(path, '{}.png'.format(self.prefix)) pdf_filepath = os.path.join(path, '{}.pdf'.format(self.prefix)) svg_filepath = os.path.join(path, '{}.svg'.format(self.prefix)) gd_diagram.write(png_filepath, 'PNG', dpi=300) gd_diagram.write(pdf_filepath, 'PDF', dpi=300) gd_diagram.write(svg_filepath, 'SVG', dpi=300)
def write_schemadelica_plot(self): """Write schemadelica plot as SVG and PDF.""" gd_diagram = GenomeDiagram.Diagram("Primer Scheme", track_size=0.15) primer_feature_set = GenomeDiagram.FeatureSet() # make the gc track window = 50 gc_set = GenomeDiagram.GraphSet("GC content") graphdata1 = self.apply_to_window(self.primary_ref.seq, window, self.calc_gc) gc_set.new_graph( graphdata1, "GC content", style="line", color=colors.violet, altcolor=colors.purple, ) gc_track = GenomeDiagram.Track("GC content", height=1.5, greytrack=0, scale_largetick_interval=1e3) gc_track.add_set(gc_set) # make the primer track for r in self.regions: region = str(r.region_num) strand = 1 if r.region_num % 2 else -1 fwd_feature = SeqFeature( FeatureLocation(r.left.start, r.left.end, strand=strand)) rev_feature = SeqFeature( FeatureLocation(r.right.end, r.right.start, strand=strand)) region_feature = SeqFeature( FeatureLocation(r.left.start, r.right.start, strand=strand)) primer_color = colors.red region_color = colors.palevioletred primer_feature_set.add_feature( region_feature, color=region_color, name=region, label=True, label_position="middle", label_angle=0 if strand == 1 else -180, ) primer_feature_set.add_feature(fwd_feature, color=primer_color, name=region) primer_feature_set.add_feature(rev_feature, color=primer_color, name=region) primer_track = GenomeDiagram.Track(name="Annotated Features", height=1) primer_track.add_set(primer_feature_set) gd_diagram.add_track(primer_track, 2) gd_diagram.add_track(gc_track, 1) rows = max(2, int(round(len(self.primary_ref) / 10000.0))) gd_diagram.draw( format="linear", pagesize=(300 * rows, 200 * rows), fragments=rows, start=0, end=len(self.primary_ref), ) pdf_filepath = self.outpath / f"{self.prefix}.plot.pdf" svg_filepath = self.outpath / f"{self.prefix}.plot.svg" logger.info(f"Writing {pdf_filepath}") logger.info(f"Writing {svg_filepath}") gd_diagram.write(str(pdf_filepath), "PDF", dpi=300) gd_diagram.write(str(svg_filepath), "SVG", dpi=300)
colors.lightskyblue ] itr = iter(col_list) itr_copy = iter(col_list) record = SeqIO.read("Genome.gb", "genbank") track_list = [] cdsfs = GenomeDiagram.FeatureSet(name='CDS features') for feature in record.features: if feature.type == "CDS": cdsfs.add_feature(feature, sigil="ARROW", color=next(itr_copy)) gdt1 = GenomeDiagram.Track('CDS features', greytrack=1, greytrack_labels=3) gdt1.add_set(cdsfs) track_list.append(gdt1) for feature in record.features: if feature.type != "gene": continue gd_feature_set = GenomeDiagram.FeatureSet() gd_feature_set.add_feature(feature, sigil="ARROW", color=next(itr), label=True, label_size=15, label_angle=0) if (len(track_list) == 0 or len(track_list) == 4): track_for_this_feature = GenomeDiagram.Track(
import sys from Bio import SeqIO from reportlab.lib import colors from reportlab.lib.units import cm from Bio.Graphics import GenomeDiagram from Bio.SeqFeature import SeqFeature, FeatureLocation record = list(SeqIO.parse(open(sys.argv[1], 'r'), 'fasta'))[0] gd_track_for_features = GenomeDiagram.Track(name="Annotated Features", scale=0, scale_largetick_labels=True, scale_largetick_interval=5000, height=1) gd_diagram = GenomeDiagram.Diagram("Zika Brazil", track_size=0.8) scale_track = GenomeDiagram.Track(name='scale', scale=1, scale_largetick_labels=True, scale_largetick_interval=5000, height=0.3) gd_diagram.add_track(scale_track, 1) primer_feature_set = GenomeDiagram.FeatureSet() for line in open(sys.argv[2], 'r'): cols = line.strip().split() strand = (+1 if cols[0].split('_')[-1] == 'L' else -1) colour = (colors.red if cols[0].split('_')[-2] == 'in' else colors.blue) feature = SeqFeature( FeatureLocation(int(cols[3]), int(cols[4]), strand=strand))
#Bottom Down Approach - Create objects then combine them #Create the feature set and its feature objects gd_feature_set2 = GenomeDiagram.FeatureSet() for feature in record.features: if feature.type != "gene": #Exclude this feature continue if len(gd_feature_set2) % 2 == 0: color = colors.blue else: color = colors.lightblue gd_feature_set2.add_feature(feature, color=color, label=True) #Create a track, and a diagram gd_track_for_features = GenomeDiagram.Track(name="Annotated Features") gd_diagram = GenomeDiagram.Diagram( "Yersinia pestis biovar Microtus plasmid pPCP1") #Now have to glue the bits together... gd_track_for_features.add_set(gd_feature_set) gd_diagram.add_track(gd_track_for_features, 1) gd_diagram.draw(format="linear", orientation="landscape", pagesize='A4', fragments=1, start=0, end=len(record)) gd_diagram.write("plasmid_linear4.pdf", "PDF")
gd_first_set.add_feature(feature, color=color, label=True, sigil="OCTO", label_size=25) if len(gd_total_set) % 2 == 0: color = colors.red else: color = colors.pink gd_total_set.add_feature(feature, color=color, label=True, sigil="OCTO", label_size=25) first_track_for_features = GenomeDiagram.Track() second_track_for_features = GenomeDiagram.Track() total_track = GenomeDiagram.Track() gd_diagram = GenomeDiagram.Diagram("Tomato curly stunt virus") gd_diagram_overlap = GenomeDiagram.Diagram("Tomato curly stunt virus Overlap") total_track.add_set(gd_total_set) gd_diagram_overlap.add_track(total_track, 1) first_track_for_features.add_set(gd_first_set) gd_diagram.add_track(first_track_for_features, 2) second_track_for_features.add_set(gd_second_set) gd_diagram.add_track(second_track_for_features, 1) gd_diagram.draw( format="circular",
def write_schemadelica_plot(self, path='./'): logger.info('Writing plot') #print(dir(GenomeDiagram)) gd_diagram = GenomeDiagram.Diagram("Primer Scheme", track_size=0.15) primer_feature_set = GenomeDiagram.FeatureSet() #make the gc track window = 50 gc_set = GenomeDiagram.GraphSet('GC skew') graphdata1 = self.apply_to_window(self.primary_reference.seq, window, self.calc_gc_skew) gc_set.new_graph(graphdata1, 'GC Skew', style='line', color=colors.violet, altcolor=colors.purple) gc_track = GenomeDiagram.Track('GC Skew', height=1.5, greytrack=0, scale_largetick_interval=1e3) gc_track.add_set(gc_set) #make the primer track for r in self.regions: region = str(r.region_num) strand = 1 if r.region_num % 2 else -1 fwd_feature = SeqFeature( FeatureLocation(r.top_pair.left.start, r.top_pair.left.end, strand=strand)) rev_feature = SeqFeature( FeatureLocation(r.top_pair.right.end, r.top_pair.right.start, strand=strand)) region_feature = SeqFeature( FeatureLocation(r.top_pair.left.start, r.top_pair.right.start, strand=strand)) primer_color = colors.red #if strand == 1 else colors.blue region_color = colors.palevioletred #if strand == 1 else colors.lightblue primer_feature_set.add_feature( region_feature, color=region_color, name=region, label=True, label_position="middle", label_angle=0 if strand == 1 else -180) primer_feature_set.add_feature(fwd_feature, color=primer_color, name=region) primer_feature_set.add_feature(rev_feature, color=primer_color, name=region) primer_track = GenomeDiagram.Track(name="Annotated Features", height=1) primer_track.add_set(primer_feature_set) gd_diagram.add_track(primer_track, 2) gd_diagram.add_track(gc_track, 1) rows = max(2, int(round(len(self.primary_reference) / 10000.0))) gd_diagram.draw(format='linear', pagesize=(300 * rows, 200 * rows), fragments=rows, start=0, end=len(self.primary_reference)) pdf_filepath = os.path.join(path, '{}.pdf'.format(self.prefix)) svg_filepath = os.path.join(path, '{}.svg'.format(self.prefix)) gd_diagram.write(pdf_filepath, 'PDF', dpi=300) gd_diagram.write(svg_filepath, 'SVG', dpi=300)
return row gd_diagram = GenomeDiagram.Diagram(fragments=1, x=0.01, yt=0.05, yb=0, start=0, end=SUBJ_LEN, tracklines=False) # Scale track gd_track_for_scale = GenomeDiagram.Track(scale=True, scale_ticks=True, scale_largetick_interval=10000, scale_smalltick_interval=1000, scale_largetick_labels=True, scale_smalltick_labels=True, scale_fontangle=315) gd_diagram.add_track(gd_track_for_scale, 1) # Query tracks index = 1 csvreader = csv.reader(result_handle, delimiter='\t') for row in csvreader: if len(row) == 0: continue row = prepare_values(row) if row[LENGTH] < LEN_LIMIT: continue
# label_angle=90, # label_position='start', # label_strand=1, # strand=None) else: feature_set2.add_feature(feature, color='coral', label=True, label_size=14, label_angle=270, label_position='end', label_strand=-1, strand=None) i += 1 genes_track = GenomeDiagram.Track('genes', greytrack=False, scale=False) genes_track.add_set(feature_set1) genes_track.add_set(feature_set2) #%% from Bio.SeqFeature import SeqFeature, FeatureLocation snv_df = pd.read_csv(tab_dir + '/SNV_HUMAN_YFV_RESULTS.csv') snv_series = snv_df.iloc[:, 2] feature_set_SNV = GenomeDiagram.FeatureSet() for position in snv_series: snv = SeqFeature(FeatureLocation(position, position), strand=+1) feature_set_SNV.add_feature(snv, color='red', strand=None)