Exemple #1
0
def bottom_up():
    from reportlab.lib import colors
    from reportlab.lib.units import cm
    from Bio.Graphics import GenomeDiagram
    from Bio import SeqIO
    record = SeqIO.read("NC_005816.gb", "genbank")

    # Create the feature set and its feature objects,
    gd_feature_set = GenomeDiagram.FeatureSet()
    for feature in record.features:
        if feature.type != "gene":
            # Exclude this feature
            continue
        if len(gd_feature_set) % 2 == 0:
            color = colors.blue
        else:
            color = colors.lightblue
        gd_feature_set.add_feature(feature, color=color, label=True)
        # (this for loop is the same as in the previous example)

    # Create a track, and a diagram
    gd_track_for_features = GenomeDiagram.Track(name="Annotated Features")
    gd_diagram = GenomeDiagram.Diagram(
        "Yersinia pestis biovar Microtus plasmid pPCP1")

    # Now have to glue the bits together...
    gd_track_for_features.add_set(gd_feature_set)
    gd_diagram.add_track(gd_track_for_features, 1)
Exemple #2
0
def drawsvg(clust):
	for k in clust:
		fs = GenomeDiagram.FeatureSet()
		for g in clust[k]['gene']:
			fs.add_feature(
				clust[k]['gene'][g],
				color = colors.lightgreen,
				label=True,
				label_size=10,
				label_angle=90,
				sigil="BIGARROW",
				arrowshaft_height=0.5,
				arrowhead_length=0.25
			)
			track = GenomeDiagram.Track(name = k)
			diag = GenomeDiagram.Diagram()
			track.add_set(fs)
			diag.add_track(track, 1)
                        pglen = float(clust[k]['end'] - clust[k]['start']) / float(1000)
			diag.draw(
				format = "linear",
				orientation = "landscape",
				pagesize = (pglen*cm, 5*cm),
				fragments = 1,
				start = clust[k]['start'],
				end= clust[k]['end']
			)
			diag.write('.'.join([pref, k, 'svg']), "SVG")	
Exemple #3
0
def main(name):
    record = SeqIO.read("Genome.gb", "genbank")
    # Create the feature set and its feature objects
    gd_feature_set = GenomeDiagram.FeatureSet()
    for feature in record.features:
        if feature.type != "gene":
            continue
        if len(gd_feature_set) % 2 == 0:
            color = colors.purple
        else:
            color = colors.lightblue
        gd_feature_set.add_feature(feature,
                                   color=color,
                                   label=True,
                                   label_size=14,
                                   label_angle=10)

        # Create a track and a diagram
        gd_track_for_features = GenomeDiagram.Track(name="Annotated Features")
        gd_diagram = GenomeDiagram.Diagram("Curly Tomato Stunt Virus")

        gd_track_for_features.add_set(gd_feature_set)
        gd_diagram.add_track(gd_track_for_features, 1)

        gd_diagram.draw(
            format="circular",
            circular=True,
            pagesize=(20 * cm, 20 * cm),
            start=0,
            end=len(record),
            circle_core=0.7,
        )
        gd_diagram.write("tomato_curly_stunt_virus.JPG", "JPG")
def make_diagram(genome_record: SeqIO.SeqRecord,
                 pseudo_record: SeqIO.SeqRecord, outfile: str):
    """Plots the genome with pseudogenes on another track"""
    diagram = GenomeDiagram.Diagram()

    original_features = GenomeDiagram.FeatureSet(
    )  # These features will be from the original genbank file
    for feature in genome_record.features:  # genome_record is the record from the original genbank file
        if feature.type != "gene":
            # Exclude this feature
            continue
        if len(original_features) % 2 == 0:  # Alternate colours
            color = colors.blue
        else:
            color = colors.lightblue
        original_features.add_feature(feature, color=color)

    track_for_original_features = GenomeDiagram.Track(
        name="Original Features",
        scale_largetick_interval=100000,
        scale_largeticks=5,
        scale_fontangle=180,
        scale_fontsize=10)
    track_for_original_features.add_set(original_features)
    diagram.add_track(track=track_for_original_features, track_level=1)

    pseudo_features = GenomeDiagram.FeatureSet(
    )  # These features will be from the pseudogene annotation
    for feature in pseudo_record.features:
        if len(pseudo_features) % 2 == 0:  # Alternate colours
            color = colors.red
        else:
            color = colors.lightcoral
        pseudo_features.add_feature(feature, color=color)

    track_for_pseudogenes = GenomeDiagram.Track(name="Pseudogenes",
                                                scale_largetick_labels=0)
    track_for_pseudogenes.add_set(pseudo_features)
    diagram.add_track(track=track_for_pseudogenes, track_level=2)

    diagram.draw(format="circular",
                 circular=True,
                 start=0,
                 end=len(genome_record),
                 circle_core=0.8)
    diagram.write(filename=outfile, output="PDF")
Exemple #5
0
 def __init__(self, trackName):
     self.trackName = trackName
     self.gdTrack = GenomeDiagram.Track(greytrack=True,
                                        name=self.trackName,
                                        greytrack_labels=1,
                                        greytrack_fontsize=33)
     self.gdFeature = GenomeDiagram.FeatureSet()
     self.size = 0
     self.nbFeats = 0
    def _create_feature_set(self, track, **feature_options):
        """Create a Track object for the specified track

        Args:
            track (str): track identifier
            **feature_options (mixed): additional options, passed to FeatureSet.add_feature().
                                       Overrides self.feature_options.

        Return:
            FeatureSet for the track
        """
        fs = GenomeDiagram.FeatureSet()
        tracknum = self._trackindices[track]
        for i, loc in enumerate(self._repeats[tracknum]):
            feature = SeqFeature(loc)
            options = dict(self.feature_options)
            options.update(feature_options)
            options.setdefault("color", self.rainbow(i))
            #options.setdefault("name",str(i+1))
            fs.add_feature(feature, **options)
        return fs
Exemple #7
0
def WriteImg(seqRecIn):
    #to select different colors from
    colorSelection = [colors.darkgreen, colors.darkorange, colors.lightblue]
    currColor = 0
    #uses GenomeDiagram package to create a featureset object for the diagram to use later
    featSet = GenomeDiagram.FeatureSet()
    for currFeat in seqRecIn.features:
        if currFeat.type == "CDS":
            featSet.add_feature(
                currFeat,
                color=colorSelection[currColor % 3],
                label=True,
                label_size=20,
                sigil="ARROW",
                #alternates widths specifically to accomodate the TCSV Sequence: order goes Medium->Thinnest->Thickest->Medium->Thinnest
                arrowshaft_height=(0.5 + ((0.2) - (((currColor + 1) % 3) *
                                                   (0.2)))),
                #turns arrowhead triangle into a thin line
                arrowhead_length=0)
            currColor += 1
    #add features to track(only 1 because it is circular)
    track = GenomeDiagram.Track(name="TCSV Sequence Features")
    track.add_set(featSet)
    #add track to diagram
    diagram = GenomeDiagram.Diagram(
        name="Tomato Curly Stunt Virus",
        format="circular",
        pagesize=(1000, 1000),
        circular=True,
        start=0,
        #goes until end of any length sequence
        end=len(seqRecIn),
        circle_core=0.5)
    diagram.add_track(track, 1)
    #writes diagram to memory so it can be written into a file
    diagram.draw()
    #writes to a file using the reportlab functionalities
    diagram.write("TCSV_Sequence_Map.jpg", "JPG")
    # query feature coordinates
    q_feat_start = row[SSTART] - row[QSTART]
    q_feat_end = q_feat_start + row[QLEN]

    # trim query coordinates to fit inside the subject. Render an arrow on the end which does not fit.
    arrowhead_length_qstart = 0
    arrowhead_length_qend = 0
    if q_feat_start < 0:
        q_feat_start = 0
        arrowhead_length_qstart = 0.5
    if q_feat_end > SUBJ_LEN:
        q_feat_end = SUBJ_END
        arrowhead_length_qend = 0.5

    gd_feature_set = GenomeDiagram.FeatureSet()
    gd_track = GenomeDiagram.Track()

    # because a feature can only have one arrow, render two features for the query
    # add/subtract 200 to not cover the arrow ends
    query_feature_start = SeqFeature(FeatureLocation(q_feat_start,
                                                     q_feat_end - 200),
                                     strand=-1)
    gd_feature_set.add_feature(query_feature_start,
                               color=colors.lightblue,
                               sigil="BIGARROW",
                               arrowshaft_height=1.0,
                               arrowhead_length=arrowhead_length_qstart)

    query_feature_end = SeqFeature(FeatureLocation(q_feat_start + 200,
                                                   q_feat_end),
Exemple #9
0
def write_visuals(seq_name, df, seq_length, results_pdf):
    gd_diagram = GenomeDiagram.Diagram(seq_name, track_size=1)
    new_row = pn.DataFrame({
        "Motif": seq_name,
        "Start": 1,
        "End": 1
    },
                           index=[0])
    df = pn.concat([new_row, df]).reset_index(drop=True)
    NCCR = GenomeDiagram.FeatureSet()
    for index, row in df.iterrows():
        (motif_name, start_motif, end_motif) = (row["Motif"], row["Start"],
                                                row["End"])
        cols = [motif_name, start_motif, end_motif]
        if index is 0:
            block = SeqFeature(FeatureLocation(int(cols[1]),
                                               int(cols[2]),
                                               strand=-1),
                               type="blocks",
                               id=motif_name)
            NCCR.add_feature(block,
                             color=colors.HexColor("#8DD35F"),
                             name=motif_name,
                             label=True,
                             label_size=8,
                             label_position="middle",
                             label_angle=180)
        else:
            if motif_name.islower():
                motif = SeqFeature(FeatureLocation(int(cols[1]),
                                                   int(cols[2]),
                                                   strand=+1),
                                   type="motifs",
                                   id=motif_name)
                NCCR.add_feature(motif,
                                 color=colors.HexColor("#8DD35F"),
                                 name=motif_name,
                                 label=True,
                                 label_size=10,
                                 label_position="left",
                                 label_angle=90)
            else:
                block = SeqFeature(FeatureLocation(int(cols[1]),
                                                   int(cols[2]),
                                                   strand=-1),
                                   type="blocks",
                                   id=motif_name)
                if motif_name == "O":
                    NCCR.add_feature(block,
                                     color=colors.HexColor("#ffc69e"),
                                     name=motif_name,
                                     label=True,
                                     label_size=10,
                                     label_position="center",
                                     label_angle=180)
                elif motif_name == "P":
                    NCCR.add_feature(block,
                                     color=colors.HexColor("#fff6d4"),
                                     name=motif_name,
                                     label=True,
                                     label_size=10,
                                     label_position="middle",
                                     label_angle=180)
                elif motif_name == "Q":
                    NCCR.add_feature(block,
                                     color=colors.HexColor("#f6f9eb"),
                                     name=motif_name,
                                     label=True,
                                     label_size=10,
                                     label_position="middle",
                                     label_angle=180)
                elif motif_name == "R":
                    NCCR.add_feature(block,
                                     color=colors.HexColor("#ebf9f6"),
                                     name=motif_name,
                                     label=True,
                                     label_size=10,
                                     label_position="middle",
                                     label_angle=180)
                elif motif_name == "S":
                    NCCR.add_feature(block,
                                     color=colors.HexColor("#f9ebf6"),
                                     name=motif_name,
                                     label=True,
                                     label_size=10,
                                     label_position="middle",
                                     label_angle=180)
                else:
                    NCCR.add_feature(block,
                                     color=colors.HexColor("#C8C4B7"),
                                     name=motif_name,
                                     label=True,
                                     label_size=20,
                                     label_position="right",
                                     label_angle=180)
    NCCR_track = GenomeDiagram.Track(name="Annotated Features", height=0.3)
    NCCR_track.add_set(NCCR)
    gd_diagram.add_track(NCCR_track, 3)
    seq_length = int(cols[2])
    rows = max(2, int(round(seq_length / 100)))
    gd_diagram.draw(format='linear',
                    tracklines=0,
                    pagesize='A4',
                    orientation='landscape',
                    fragments=4,
                    start=1,
                    end=int(seq_length))

    pdf_filepath = os.path.join('results', '{}.pdf'.format(seq_name))
    gd_diagram.write(pdf_filepath, 'PDF', dpi=300)
    results_pdf.append(pdf_filepath)
Exemple #10
0
    def write_schemadelica_plot(self, path='./'):
        logger.info('Writing plot')
        gd_diagram = GenomeDiagram.Diagram("Primer Scheme", track_size=1)
        scale_track = GenomeDiagram.Track(name='scale',
                                          scale=True,
                                          scale_fontsize=10,
                                          scale_largetick_interval=1000,
                                          height=0.1)
        gd_diagram.add_track(scale_track, 2)

        primer_feature_set_1 = GenomeDiagram.FeatureSet()
        primer_feature_set_2 = GenomeDiagram.FeatureSet()

        for r in self.regions:
            cols1 = [
                self.primary_reference.id, r.top_pair.left.start,
                r.top_pair.left.end, r.top_pair.left.name, r.pool
            ]
            cols2 = [
                self.primary_reference.id, r.top_pair.right.end,
                r.top_pair.right.start, r.top_pair.right.name, r.pool
            ]
            region = str(r.region_num)
            fwd_feature = SeqFeature(
                FeatureLocation(int(cols1[1]), int(cols1[2]), strand=0))
            rev_feature = SeqFeature(
                FeatureLocation(int(cols2[1]), int(cols2[2]), strand=0))
            region_feature = SeqFeature(
                FeatureLocation(int(cols1[1]), int(cols2[2]), strand=0))
            if int(region) % 2 == 0:
                primer_feature_set_1.add_feature(region_feature,
                                                 color=colors.palevioletred,
                                                 name=region,
                                                 label=True,
                                                 label_size=10,
                                                 label_position="middle",
                                                 label_angle=0)
                primer_feature_set_1.add_feature(fwd_feature,
                                                 color=colors.red,
                                                 name=region,
                                                 label=False)
                primer_feature_set_1.add_feature(rev_feature,
                                                 color=colors.red,
                                                 name=region,
                                                 label=False)
            else:
                primer_feature_set_2.add_feature(region_feature,
                                                 color=colors.palevioletred,
                                                 name=region,
                                                 label=True,
                                                 label_size=10,
                                                 label_position="middle",
                                                 label_angle=0)
                primer_feature_set_2.add_feature(fwd_feature,
                                                 color=colors.red,
                                                 name=region,
                                                 label=False)
                primer_feature_set_2.add_feature(rev_feature,
                                                 color=colors.red,
                                                 name=region,
                                                 label=False)

        primer_track = GenomeDiagram.Track(name="Annotated Features",
                                           height=0.1)
        primer_track.add_set(primer_feature_set_1)
        gd_diagram.add_track(primer_track, 4)

        primer_track = GenomeDiagram.Track(name="Annotated Features",
                                           height=0.1)
        primer_track.add_set(primer_feature_set_2)
        gd_diagram.add_track(primer_track, 6)

        rows = max(2, int(round(len(self.primary_reference) / 10000.0)))
        gd_diagram.draw(format='linear',
                        pagesize=(300 * rows, 200 * rows),
                        fragments=rows,
                        start=0,
                        end=len(self.primary_reference))

        png_filepath = os.path.join(path, '{}.png'.format(self.prefix))
        pdf_filepath = os.path.join(path, '{}.pdf'.format(self.prefix))
        svg_filepath = os.path.join(path, '{}.svg'.format(self.prefix))
        gd_diagram.write(png_filepath, 'PNG', dpi=300)
        gd_diagram.write(pdf_filepath, 'PDF', dpi=300)
        gd_diagram.write(svg_filepath, 'SVG', dpi=300)
Exemple #11
0
    def write_schemadelica_plot(self):
        """Write schemadelica plot as SVG and PDF."""
        gd_diagram = GenomeDiagram.Diagram("Primer Scheme", track_size=0.15)
        primer_feature_set = GenomeDiagram.FeatureSet()

        # make the gc track
        window = 50
        gc_set = GenomeDiagram.GraphSet("GC content")
        graphdata1 = self.apply_to_window(self.primary_ref.seq, window,
                                          self.calc_gc)
        gc_set.new_graph(
            graphdata1,
            "GC content",
            style="line",
            color=colors.violet,
            altcolor=colors.purple,
        )
        gc_track = GenomeDiagram.Track("GC content",
                                       height=1.5,
                                       greytrack=0,
                                       scale_largetick_interval=1e3)
        gc_track.add_set(gc_set)

        # make the primer track
        for r in self.regions:
            region = str(r.region_num)
            strand = 1 if r.region_num % 2 else -1

            fwd_feature = SeqFeature(
                FeatureLocation(r.left.start, r.left.end, strand=strand))
            rev_feature = SeqFeature(
                FeatureLocation(r.right.end, r.right.start, strand=strand))
            region_feature = SeqFeature(
                FeatureLocation(r.left.start, r.right.start, strand=strand))

            primer_color = colors.red
            region_color = colors.palevioletred

            primer_feature_set.add_feature(
                region_feature,
                color=region_color,
                name=region,
                label=True,
                label_position="middle",
                label_angle=0 if strand == 1 else -180,
            )
            primer_feature_set.add_feature(fwd_feature,
                                           color=primer_color,
                                           name=region)
            primer_feature_set.add_feature(rev_feature,
                                           color=primer_color,
                                           name=region)

        primer_track = GenomeDiagram.Track(name="Annotated Features", height=1)
        primer_track.add_set(primer_feature_set)

        gd_diagram.add_track(primer_track, 2)
        gd_diagram.add_track(gc_track, 1)

        rows = max(2, int(round(len(self.primary_ref) / 10000.0)))
        gd_diagram.draw(
            format="linear",
            pagesize=(300 * rows, 200 * rows),
            fragments=rows,
            start=0,
            end=len(self.primary_ref),
        )

        pdf_filepath = self.outpath / f"{self.prefix}.plot.pdf"
        svg_filepath = self.outpath / f"{self.prefix}.plot.svg"
        logger.info(f"Writing {pdf_filepath}")
        logger.info(f"Writing {svg_filepath}")
        gd_diagram.write(str(pdf_filepath), "PDF", dpi=300)
        gd_diagram.write(str(svg_filepath), "SVG", dpi=300)
Exemple #12
0
from Bio import SeqIO
from Bio import SeqUtils
from Bio.Seq import Seq

col_list = [
    colors.coral, colors.blue, colors.crimson, colors.navy, colors.red,
    colors.lightskyblue
]
itr = iter(col_list)
itr_copy = iter(col_list)

record = SeqIO.read("Genome.gb", "genbank")

track_list = []

cdsfs = GenomeDiagram.FeatureSet(name='CDS features')

for feature in record.features:
    if feature.type == "CDS":
        cdsfs.add_feature(feature, sigil="ARROW", color=next(itr_copy))

gdt1 = GenomeDiagram.Track('CDS features', greytrack=1, greytrack_labels=3)
gdt1.add_set(cdsfs)
track_list.append(gdt1)

for feature in record.features:
    if feature.type != "gene":
        continue
    gd_feature_set = GenomeDiagram.FeatureSet()
    gd_feature_set.add_feature(feature,
                               sigil="ARROW",
# -*- coding: utf-8 -*-
"""
Created on Thu Jan 21 16:18:02 2021

@author: sodasim
"""
from reportlab.lib import colors
from reportlab.lib.units import cm
from Bio.Graphics import GenomeDiagram
from Bio import SeqIO

record = SeqIO.read("Genome.gb", "genbank")

gd_first_set = GenomeDiagram.FeatureSet()
gd_second_set = GenomeDiagram.FeatureSet()
gd_total_set = GenomeDiagram.FeatureSet()

for feature in record.features:
    firstTrack = True
    if feature.type != "gene":
        continue
    for other_feature in gd_first_set.get_features():
        if other_feature.type != "gene":
            continue
        # Determines if feature should be placed on the second track
        if feature.location.start < other_feature.location.end and feature.location.start > other_feature.location.start:
            if len(gd_second_set) % 2 == 0:
                color = colors.blue
            else:
                color = colors.lightblue
            gd_second_set.add_feature(feature,
Exemple #14
0
    def write_schemadelica_plot(self, path='./'):
        logger.info('Writing plot')

        #print(dir(GenomeDiagram))
        gd_diagram = GenomeDiagram.Diagram("Primer Scheme", track_size=0.15)
        primer_feature_set = GenomeDiagram.FeatureSet()

        #make the gc track
        window = 50
        gc_set = GenomeDiagram.GraphSet('GC skew')
        graphdata1 = self.apply_to_window(self.primary_reference.seq, window,
                                          self.calc_gc_skew)
        gc_set.new_graph(graphdata1,
                         'GC Skew',
                         style='line',
                         color=colors.violet,
                         altcolor=colors.purple)
        gc_track = GenomeDiagram.Track('GC Skew',
                                       height=1.5,
                                       greytrack=0,
                                       scale_largetick_interval=1e3)
        gc_track.add_set(gc_set)

        #make the primer track
        for r in self.regions:
            region = str(r.region_num)
            strand = 1 if r.region_num % 2 else -1

            fwd_feature = SeqFeature(
                FeatureLocation(r.top_pair.left.start,
                                r.top_pair.left.end,
                                strand=strand))
            rev_feature = SeqFeature(
                FeatureLocation(r.top_pair.right.end,
                                r.top_pair.right.start,
                                strand=strand))
            region_feature = SeqFeature(
                FeatureLocation(r.top_pair.left.start,
                                r.top_pair.right.start,
                                strand=strand))

            primer_color = colors.red  #if strand == 1 else colors.blue
            region_color = colors.palevioletred  #if strand == 1 else colors.lightblue

            primer_feature_set.add_feature(
                region_feature,
                color=region_color,
                name=region,
                label=True,
                label_position="middle",
                label_angle=0 if strand == 1 else -180)
            primer_feature_set.add_feature(fwd_feature,
                                           color=primer_color,
                                           name=region)
            primer_feature_set.add_feature(rev_feature,
                                           color=primer_color,
                                           name=region)

        primer_track = GenomeDiagram.Track(name="Annotated Features", height=1)
        primer_track.add_set(primer_feature_set)

        gd_diagram.add_track(primer_track, 2)
        gd_diagram.add_track(gc_track, 1)

        rows = max(2, int(round(len(self.primary_reference) / 10000.0)))
        gd_diagram.draw(format='linear',
                        pagesize=(300 * rows, 200 * rows),
                        fragments=rows,
                        start=0,
                        end=len(self.primary_reference))

        pdf_filepath = os.path.join(path, '{}.pdf'.format(self.prefix))
        svg_filepath = os.path.join(path, '{}.svg'.format(self.prefix))
        gd_diagram.write(pdf_filepath, 'PDF', dpi=300)
        gd_diagram.write(svg_filepath, 'SVG', dpi=300)
Exemple #15
0
gd_track_for_features = GenomeDiagram.Track(name="Annotated Features",
                                            scale=0,
                                            scale_largetick_labels=True,
                                            scale_largetick_interval=5000,
                                            height=1)
gd_diagram = GenomeDiagram.Diagram("Zika Brazil", track_size=0.8)

scale_track = GenomeDiagram.Track(name='scale',
                                  scale=1,
                                  scale_largetick_labels=True,
                                  scale_largetick_interval=5000,
                                  height=0.3)
gd_diagram.add_track(scale_track, 1)

primer_feature_set = GenomeDiagram.FeatureSet()

for line in open(sys.argv[2], 'r'):
    cols = line.strip().split()
    strand = (+1 if cols[0].split('_')[-1] == 'L' else -1)
    colour = (colors.red if cols[0].split('_')[-2] == 'in' else colors.blue)
    feature = SeqFeature(
        FeatureLocation(int(cols[3]), int(cols[4]), strand=strand))
    primer_feature_set.add_feature(feature,
                                   color=colour,
                                   name=cols[0],
                                   label=True,
                                   label_size=4,
                                   label_position="start",
                                   label_angle=45)
Exemple #16
0
for col in seq_df.columns:
    bases = seq_df[col]
    bases.dropna(how='any', inplace=True)
    ent = entropy1(bases)
    pos_ent = (col, ent)
    alignment_entropy.append(pos_ent)

#%%

for feature in ref_polyprot.features:
    print(feature)

# Bottom Up Approach

feature_set1 = GenomeDiagram.FeatureSet()
feature_set2 = GenomeDiagram.FeatureSet()

i = 1
for feature in ref_polyprot.features:
    if feature.type != "mat_peptide":
        #Exclude this feature
        continue
    if i % 2 == 0:
        feature_set1.add_feature(feature,
                                 color='green',
                                 label=True,
                                 label_size=14,
                                 label_angle=90,
                                 label_position='start',
                                 label_strand=1,