def bottom_up(): from reportlab.lib import colors from reportlab.lib.units import cm from Bio.Graphics import GenomeDiagram from Bio import SeqIO record = SeqIO.read("NC_005816.gb", "genbank") # Create the feature set and its feature objects, gd_feature_set = GenomeDiagram.FeatureSet() for feature in record.features: if feature.type != "gene": # Exclude this feature continue if len(gd_feature_set) % 2 == 0: color = colors.blue else: color = colors.lightblue gd_feature_set.add_feature(feature, color=color, label=True) # (this for loop is the same as in the previous example) # Create a track, and a diagram gd_track_for_features = GenomeDiagram.Track(name="Annotated Features") gd_diagram = GenomeDiagram.Diagram( "Yersinia pestis biovar Microtus plasmid pPCP1") # Now have to glue the bits together... gd_track_for_features.add_set(gd_feature_set) gd_diagram.add_track(gd_track_for_features, 1)
def drawsvg(clust): for k in clust: fs = GenomeDiagram.FeatureSet() for g in clust[k]['gene']: fs.add_feature( clust[k]['gene'][g], color = colors.lightgreen, label=True, label_size=10, label_angle=90, sigil="BIGARROW", arrowshaft_height=0.5, arrowhead_length=0.25 ) track = GenomeDiagram.Track(name = k) diag = GenomeDiagram.Diagram() track.add_set(fs) diag.add_track(track, 1) pglen = float(clust[k]['end'] - clust[k]['start']) / float(1000) diag.draw( format = "linear", orientation = "landscape", pagesize = (pglen*cm, 5*cm), fragments = 1, start = clust[k]['start'], end= clust[k]['end'] ) diag.write('.'.join([pref, k, 'svg']), "SVG")
def main(name): record = SeqIO.read("Genome.gb", "genbank") # Create the feature set and its feature objects gd_feature_set = GenomeDiagram.FeatureSet() for feature in record.features: if feature.type != "gene": continue if len(gd_feature_set) % 2 == 0: color = colors.purple else: color = colors.lightblue gd_feature_set.add_feature(feature, color=color, label=True, label_size=14, label_angle=10) # Create a track and a diagram gd_track_for_features = GenomeDiagram.Track(name="Annotated Features") gd_diagram = GenomeDiagram.Diagram("Curly Tomato Stunt Virus") gd_track_for_features.add_set(gd_feature_set) gd_diagram.add_track(gd_track_for_features, 1) gd_diagram.draw( format="circular", circular=True, pagesize=(20 * cm, 20 * cm), start=0, end=len(record), circle_core=0.7, ) gd_diagram.write("tomato_curly_stunt_virus.JPG", "JPG")
def make_diagram(genome_record: SeqIO.SeqRecord, pseudo_record: SeqIO.SeqRecord, outfile: str): """Plots the genome with pseudogenes on another track""" diagram = GenomeDiagram.Diagram() original_features = GenomeDiagram.FeatureSet( ) # These features will be from the original genbank file for feature in genome_record.features: # genome_record is the record from the original genbank file if feature.type != "gene": # Exclude this feature continue if len(original_features) % 2 == 0: # Alternate colours color = colors.blue else: color = colors.lightblue original_features.add_feature(feature, color=color) track_for_original_features = GenomeDiagram.Track( name="Original Features", scale_largetick_interval=100000, scale_largeticks=5, scale_fontangle=180, scale_fontsize=10) track_for_original_features.add_set(original_features) diagram.add_track(track=track_for_original_features, track_level=1) pseudo_features = GenomeDiagram.FeatureSet( ) # These features will be from the pseudogene annotation for feature in pseudo_record.features: if len(pseudo_features) % 2 == 0: # Alternate colours color = colors.red else: color = colors.lightcoral pseudo_features.add_feature(feature, color=color) track_for_pseudogenes = GenomeDiagram.Track(name="Pseudogenes", scale_largetick_labels=0) track_for_pseudogenes.add_set(pseudo_features) diagram.add_track(track=track_for_pseudogenes, track_level=2) diagram.draw(format="circular", circular=True, start=0, end=len(genome_record), circle_core=0.8) diagram.write(filename=outfile, output="PDF")
def __init__(self, trackName): self.trackName = trackName self.gdTrack = GenomeDiagram.Track(greytrack=True, name=self.trackName, greytrack_labels=1, greytrack_fontsize=33) self.gdFeature = GenomeDiagram.FeatureSet() self.size = 0 self.nbFeats = 0
def _create_feature_set(self, track, **feature_options): """Create a Track object for the specified track Args: track (str): track identifier **feature_options (mixed): additional options, passed to FeatureSet.add_feature(). Overrides self.feature_options. Return: FeatureSet for the track """ fs = GenomeDiagram.FeatureSet() tracknum = self._trackindices[track] for i, loc in enumerate(self._repeats[tracknum]): feature = SeqFeature(loc) options = dict(self.feature_options) options.update(feature_options) options.setdefault("color", self.rainbow(i)) #options.setdefault("name",str(i+1)) fs.add_feature(feature, **options) return fs
def WriteImg(seqRecIn): #to select different colors from colorSelection = [colors.darkgreen, colors.darkorange, colors.lightblue] currColor = 0 #uses GenomeDiagram package to create a featureset object for the diagram to use later featSet = GenomeDiagram.FeatureSet() for currFeat in seqRecIn.features: if currFeat.type == "CDS": featSet.add_feature( currFeat, color=colorSelection[currColor % 3], label=True, label_size=20, sigil="ARROW", #alternates widths specifically to accomodate the TCSV Sequence: order goes Medium->Thinnest->Thickest->Medium->Thinnest arrowshaft_height=(0.5 + ((0.2) - (((currColor + 1) % 3) * (0.2)))), #turns arrowhead triangle into a thin line arrowhead_length=0) currColor += 1 #add features to track(only 1 because it is circular) track = GenomeDiagram.Track(name="TCSV Sequence Features") track.add_set(featSet) #add track to diagram diagram = GenomeDiagram.Diagram( name="Tomato Curly Stunt Virus", format="circular", pagesize=(1000, 1000), circular=True, start=0, #goes until end of any length sequence end=len(seqRecIn), circle_core=0.5) diagram.add_track(track, 1) #writes diagram to memory so it can be written into a file diagram.draw() #writes to a file using the reportlab functionalities diagram.write("TCSV_Sequence_Map.jpg", "JPG")
# query feature coordinates q_feat_start = row[SSTART] - row[QSTART] q_feat_end = q_feat_start + row[QLEN] # trim query coordinates to fit inside the subject. Render an arrow on the end which does not fit. arrowhead_length_qstart = 0 arrowhead_length_qend = 0 if q_feat_start < 0: q_feat_start = 0 arrowhead_length_qstart = 0.5 if q_feat_end > SUBJ_LEN: q_feat_end = SUBJ_END arrowhead_length_qend = 0.5 gd_feature_set = GenomeDiagram.FeatureSet() gd_track = GenomeDiagram.Track() # because a feature can only have one arrow, render two features for the query # add/subtract 200 to not cover the arrow ends query_feature_start = SeqFeature(FeatureLocation(q_feat_start, q_feat_end - 200), strand=-1) gd_feature_set.add_feature(query_feature_start, color=colors.lightblue, sigil="BIGARROW", arrowshaft_height=1.0, arrowhead_length=arrowhead_length_qstart) query_feature_end = SeqFeature(FeatureLocation(q_feat_start + 200, q_feat_end),
def write_visuals(seq_name, df, seq_length, results_pdf): gd_diagram = GenomeDiagram.Diagram(seq_name, track_size=1) new_row = pn.DataFrame({ "Motif": seq_name, "Start": 1, "End": 1 }, index=[0]) df = pn.concat([new_row, df]).reset_index(drop=True) NCCR = GenomeDiagram.FeatureSet() for index, row in df.iterrows(): (motif_name, start_motif, end_motif) = (row["Motif"], row["Start"], row["End"]) cols = [motif_name, start_motif, end_motif] if index is 0: block = SeqFeature(FeatureLocation(int(cols[1]), int(cols[2]), strand=-1), type="blocks", id=motif_name) NCCR.add_feature(block, color=colors.HexColor("#8DD35F"), name=motif_name, label=True, label_size=8, label_position="middle", label_angle=180) else: if motif_name.islower(): motif = SeqFeature(FeatureLocation(int(cols[1]), int(cols[2]), strand=+1), type="motifs", id=motif_name) NCCR.add_feature(motif, color=colors.HexColor("#8DD35F"), name=motif_name, label=True, label_size=10, label_position="left", label_angle=90) else: block = SeqFeature(FeatureLocation(int(cols[1]), int(cols[2]), strand=-1), type="blocks", id=motif_name) if motif_name == "O": NCCR.add_feature(block, color=colors.HexColor("#ffc69e"), name=motif_name, label=True, label_size=10, label_position="center", label_angle=180) elif motif_name == "P": NCCR.add_feature(block, color=colors.HexColor("#fff6d4"), name=motif_name, label=True, label_size=10, label_position="middle", label_angle=180) elif motif_name == "Q": NCCR.add_feature(block, color=colors.HexColor("#f6f9eb"), name=motif_name, label=True, label_size=10, label_position="middle", label_angle=180) elif motif_name == "R": NCCR.add_feature(block, color=colors.HexColor("#ebf9f6"), name=motif_name, label=True, label_size=10, label_position="middle", label_angle=180) elif motif_name == "S": NCCR.add_feature(block, color=colors.HexColor("#f9ebf6"), name=motif_name, label=True, label_size=10, label_position="middle", label_angle=180) else: NCCR.add_feature(block, color=colors.HexColor("#C8C4B7"), name=motif_name, label=True, label_size=20, label_position="right", label_angle=180) NCCR_track = GenomeDiagram.Track(name="Annotated Features", height=0.3) NCCR_track.add_set(NCCR) gd_diagram.add_track(NCCR_track, 3) seq_length = int(cols[2]) rows = max(2, int(round(seq_length / 100))) gd_diagram.draw(format='linear', tracklines=0, pagesize='A4', orientation='landscape', fragments=4, start=1, end=int(seq_length)) pdf_filepath = os.path.join('results', '{}.pdf'.format(seq_name)) gd_diagram.write(pdf_filepath, 'PDF', dpi=300) results_pdf.append(pdf_filepath)
def write_schemadelica_plot(self, path='./'): logger.info('Writing plot') gd_diagram = GenomeDiagram.Diagram("Primer Scheme", track_size=1) scale_track = GenomeDiagram.Track(name='scale', scale=True, scale_fontsize=10, scale_largetick_interval=1000, height=0.1) gd_diagram.add_track(scale_track, 2) primer_feature_set_1 = GenomeDiagram.FeatureSet() primer_feature_set_2 = GenomeDiagram.FeatureSet() for r in self.regions: cols1 = [ self.primary_reference.id, r.top_pair.left.start, r.top_pair.left.end, r.top_pair.left.name, r.pool ] cols2 = [ self.primary_reference.id, r.top_pair.right.end, r.top_pair.right.start, r.top_pair.right.name, r.pool ] region = str(r.region_num) fwd_feature = SeqFeature( FeatureLocation(int(cols1[1]), int(cols1[2]), strand=0)) rev_feature = SeqFeature( FeatureLocation(int(cols2[1]), int(cols2[2]), strand=0)) region_feature = SeqFeature( FeatureLocation(int(cols1[1]), int(cols2[2]), strand=0)) if int(region) % 2 == 0: primer_feature_set_1.add_feature(region_feature, color=colors.palevioletred, name=region, label=True, label_size=10, label_position="middle", label_angle=0) primer_feature_set_1.add_feature(fwd_feature, color=colors.red, name=region, label=False) primer_feature_set_1.add_feature(rev_feature, color=colors.red, name=region, label=False) else: primer_feature_set_2.add_feature(region_feature, color=colors.palevioletred, name=region, label=True, label_size=10, label_position="middle", label_angle=0) primer_feature_set_2.add_feature(fwd_feature, color=colors.red, name=region, label=False) primer_feature_set_2.add_feature(rev_feature, color=colors.red, name=region, label=False) primer_track = GenomeDiagram.Track(name="Annotated Features", height=0.1) primer_track.add_set(primer_feature_set_1) gd_diagram.add_track(primer_track, 4) primer_track = GenomeDiagram.Track(name="Annotated Features", height=0.1) primer_track.add_set(primer_feature_set_2) gd_diagram.add_track(primer_track, 6) rows = max(2, int(round(len(self.primary_reference) / 10000.0))) gd_diagram.draw(format='linear', pagesize=(300 * rows, 200 * rows), fragments=rows, start=0, end=len(self.primary_reference)) png_filepath = os.path.join(path, '{}.png'.format(self.prefix)) pdf_filepath = os.path.join(path, '{}.pdf'.format(self.prefix)) svg_filepath = os.path.join(path, '{}.svg'.format(self.prefix)) gd_diagram.write(png_filepath, 'PNG', dpi=300) gd_diagram.write(pdf_filepath, 'PDF', dpi=300) gd_diagram.write(svg_filepath, 'SVG', dpi=300)
def write_schemadelica_plot(self): """Write schemadelica plot as SVG and PDF.""" gd_diagram = GenomeDiagram.Diagram("Primer Scheme", track_size=0.15) primer_feature_set = GenomeDiagram.FeatureSet() # make the gc track window = 50 gc_set = GenomeDiagram.GraphSet("GC content") graphdata1 = self.apply_to_window(self.primary_ref.seq, window, self.calc_gc) gc_set.new_graph( graphdata1, "GC content", style="line", color=colors.violet, altcolor=colors.purple, ) gc_track = GenomeDiagram.Track("GC content", height=1.5, greytrack=0, scale_largetick_interval=1e3) gc_track.add_set(gc_set) # make the primer track for r in self.regions: region = str(r.region_num) strand = 1 if r.region_num % 2 else -1 fwd_feature = SeqFeature( FeatureLocation(r.left.start, r.left.end, strand=strand)) rev_feature = SeqFeature( FeatureLocation(r.right.end, r.right.start, strand=strand)) region_feature = SeqFeature( FeatureLocation(r.left.start, r.right.start, strand=strand)) primer_color = colors.red region_color = colors.palevioletred primer_feature_set.add_feature( region_feature, color=region_color, name=region, label=True, label_position="middle", label_angle=0 if strand == 1 else -180, ) primer_feature_set.add_feature(fwd_feature, color=primer_color, name=region) primer_feature_set.add_feature(rev_feature, color=primer_color, name=region) primer_track = GenomeDiagram.Track(name="Annotated Features", height=1) primer_track.add_set(primer_feature_set) gd_diagram.add_track(primer_track, 2) gd_diagram.add_track(gc_track, 1) rows = max(2, int(round(len(self.primary_ref) / 10000.0))) gd_diagram.draw( format="linear", pagesize=(300 * rows, 200 * rows), fragments=rows, start=0, end=len(self.primary_ref), ) pdf_filepath = self.outpath / f"{self.prefix}.plot.pdf" svg_filepath = self.outpath / f"{self.prefix}.plot.svg" logger.info(f"Writing {pdf_filepath}") logger.info(f"Writing {svg_filepath}") gd_diagram.write(str(pdf_filepath), "PDF", dpi=300) gd_diagram.write(str(svg_filepath), "SVG", dpi=300)
from Bio import SeqIO from Bio import SeqUtils from Bio.Seq import Seq col_list = [ colors.coral, colors.blue, colors.crimson, colors.navy, colors.red, colors.lightskyblue ] itr = iter(col_list) itr_copy = iter(col_list) record = SeqIO.read("Genome.gb", "genbank") track_list = [] cdsfs = GenomeDiagram.FeatureSet(name='CDS features') for feature in record.features: if feature.type == "CDS": cdsfs.add_feature(feature, sigil="ARROW", color=next(itr_copy)) gdt1 = GenomeDiagram.Track('CDS features', greytrack=1, greytrack_labels=3) gdt1.add_set(cdsfs) track_list.append(gdt1) for feature in record.features: if feature.type != "gene": continue gd_feature_set = GenomeDiagram.FeatureSet() gd_feature_set.add_feature(feature, sigil="ARROW",
# -*- coding: utf-8 -*- """ Created on Thu Jan 21 16:18:02 2021 @author: sodasim """ from reportlab.lib import colors from reportlab.lib.units import cm from Bio.Graphics import GenomeDiagram from Bio import SeqIO record = SeqIO.read("Genome.gb", "genbank") gd_first_set = GenomeDiagram.FeatureSet() gd_second_set = GenomeDiagram.FeatureSet() gd_total_set = GenomeDiagram.FeatureSet() for feature in record.features: firstTrack = True if feature.type != "gene": continue for other_feature in gd_first_set.get_features(): if other_feature.type != "gene": continue # Determines if feature should be placed on the second track if feature.location.start < other_feature.location.end and feature.location.start > other_feature.location.start: if len(gd_second_set) % 2 == 0: color = colors.blue else: color = colors.lightblue gd_second_set.add_feature(feature,
def write_schemadelica_plot(self, path='./'): logger.info('Writing plot') #print(dir(GenomeDiagram)) gd_diagram = GenomeDiagram.Diagram("Primer Scheme", track_size=0.15) primer_feature_set = GenomeDiagram.FeatureSet() #make the gc track window = 50 gc_set = GenomeDiagram.GraphSet('GC skew') graphdata1 = self.apply_to_window(self.primary_reference.seq, window, self.calc_gc_skew) gc_set.new_graph(graphdata1, 'GC Skew', style='line', color=colors.violet, altcolor=colors.purple) gc_track = GenomeDiagram.Track('GC Skew', height=1.5, greytrack=0, scale_largetick_interval=1e3) gc_track.add_set(gc_set) #make the primer track for r in self.regions: region = str(r.region_num) strand = 1 if r.region_num % 2 else -1 fwd_feature = SeqFeature( FeatureLocation(r.top_pair.left.start, r.top_pair.left.end, strand=strand)) rev_feature = SeqFeature( FeatureLocation(r.top_pair.right.end, r.top_pair.right.start, strand=strand)) region_feature = SeqFeature( FeatureLocation(r.top_pair.left.start, r.top_pair.right.start, strand=strand)) primer_color = colors.red #if strand == 1 else colors.blue region_color = colors.palevioletred #if strand == 1 else colors.lightblue primer_feature_set.add_feature( region_feature, color=region_color, name=region, label=True, label_position="middle", label_angle=0 if strand == 1 else -180) primer_feature_set.add_feature(fwd_feature, color=primer_color, name=region) primer_feature_set.add_feature(rev_feature, color=primer_color, name=region) primer_track = GenomeDiagram.Track(name="Annotated Features", height=1) primer_track.add_set(primer_feature_set) gd_diagram.add_track(primer_track, 2) gd_diagram.add_track(gc_track, 1) rows = max(2, int(round(len(self.primary_reference) / 10000.0))) gd_diagram.draw(format='linear', pagesize=(300 * rows, 200 * rows), fragments=rows, start=0, end=len(self.primary_reference)) pdf_filepath = os.path.join(path, '{}.pdf'.format(self.prefix)) svg_filepath = os.path.join(path, '{}.svg'.format(self.prefix)) gd_diagram.write(pdf_filepath, 'PDF', dpi=300) gd_diagram.write(svg_filepath, 'SVG', dpi=300)
gd_track_for_features = GenomeDiagram.Track(name="Annotated Features", scale=0, scale_largetick_labels=True, scale_largetick_interval=5000, height=1) gd_diagram = GenomeDiagram.Diagram("Zika Brazil", track_size=0.8) scale_track = GenomeDiagram.Track(name='scale', scale=1, scale_largetick_labels=True, scale_largetick_interval=5000, height=0.3) gd_diagram.add_track(scale_track, 1) primer_feature_set = GenomeDiagram.FeatureSet() for line in open(sys.argv[2], 'r'): cols = line.strip().split() strand = (+1 if cols[0].split('_')[-1] == 'L' else -1) colour = (colors.red if cols[0].split('_')[-2] == 'in' else colors.blue) feature = SeqFeature( FeatureLocation(int(cols[3]), int(cols[4]), strand=strand)) primer_feature_set.add_feature(feature, color=colour, name=cols[0], label=True, label_size=4, label_position="start", label_angle=45)
for col in seq_df.columns: bases = seq_df[col] bases.dropna(how='any', inplace=True) ent = entropy1(bases) pos_ent = (col, ent) alignment_entropy.append(pos_ent) #%% for feature in ref_polyprot.features: print(feature) # Bottom Up Approach feature_set1 = GenomeDiagram.FeatureSet() feature_set2 = GenomeDiagram.FeatureSet() i = 1 for feature in ref_polyprot.features: if feature.type != "mat_peptide": #Exclude this feature continue if i % 2 == 0: feature_set1.add_feature(feature, color='green', label=True, label_size=14, label_angle=90, label_position='start', label_strand=1,