else:
            start = int(sp_line[2])
            end = int(sp_line[3])
            strand = 1

        if series in series_features:
            series_features[series].append((gene_name, strand, start, end))
        else:
            series_features[series] = [(gene_name, strand, start, end)]
            series_indexes[series] = series_index
            series_index = series_index + 1

start = sys.maxsize
end = -1

gdd = GenomeDiagram.Diagram("diagram", tracklines=False, y=0.4)
gd_track_for_features = gdd.new_track(1,
                                      scale=True,
                                      height=1,
                                      scale_smallticks=0)
gds_features = gd_track_for_features.new_set()

seed(int(arg.random_seed))

for series in series_features.keys():
    if series_indexes[series] < len(colors_list):
        current_color = colors_list[series_indexes[series]]
    else:
        current_color = colors.Color(random(), random(), random())

    for i in range(0, len(series_features[series])):
Example #2
0
'''
first try, not using Diagram_class
'''

from Bio.SeqFeature import SeqFeature, FeatureLocation
from Bio.Graphics import GenomeDiagram
from Bio.Graphics.GenomeDiagram import CrossLink
from reportlab.lib.units import cm
from reportlab.lib import colors

gdd = GenomeDiagram.Diagram('Diagram')
gdt1_features = gdd.new_track(1, greytrack=False)
gds1_features = gdt1_features.new_set()
gdt2_features = gdd.new_track(1, greytrack=False)
gds2_features = gdt2_features.new_set()

inFile = open('ABL1_NC')
NC_len = 5894
num = 0
startend = []
for line in inFile:
    num += 1
    color = colors.linearlyInterpolatedColor(colors.white, colors.firebrick, 0,
                                             10, num)
    line = line.strip()
    fields = line.split('\t')
    q_start = int(fields[7])
    q_end = int(fields[8])
    s_start = int(fields[9])
    s_end = int(fields[10])
#!/home/pjsola/env/bin/python

import os
import csv

from Bio.SeqFeature import SeqFeature, FeatureLocation
from reportlab.lib import colors
from reportlab.lib.units import cm
from Bio.Graphics import GenomeDiagram
from Bio.Graphics.GenomeDiagram import CrossLink


diagram_name = 'TEST_CL'
gd_diagram = GenomeDiagram.Diagram(diagram_name)
dict_records = {'NC_016838':122799, 'NC_016839':105974, 'NC_016846':111195}

#NC_016838.1 vs NC_016839.1 made up reltions
A_vs_B = [
    (99, "mcpQ", "tetR"),
    (33, "ligA", "rhsC")
]

B_vs_C = [
    (99, "tetA", "pld"),
    (33, "rhsC", "traC")
]
i = 0
for record,record_length in dict_records.items():
    # Allocate tracks 5 (top), 3, 1 (bottom) for A, B, C
    # (empty tracks 2 and 4 add useful white space to emphasise the cross links
    # and also serve to make the tracks vertically more compressed)
Example #4
0
def plot_multiple_regions_crosslink(target_protein_list,
                                    region_record_list,
                                    plasmid_list,
                                    out_name,
                                    biodb_name="chlamydia_03_15",
                                    color_locus_list=[],
                                    flip_record_based_on_first=True,
                                    color_orthogroup_list=[]):

    import matplotlib.cm as cm
    from matplotlib.colors import rgb2hex
    import matplotlib as mpl
    import MySQLdb
    import os
    sqlpsw = os.environ['SQLPSW']

    norm = mpl.colors.Normalize(vmin=-30, vmax=100)
    cmap = cm.Blues
    m = cm.ScalarMappable(norm=norm, cmap=cmap)

    conn = MySQLdb.connect(
        host="127.0.0.1",  # your host, usually localhost
        user="******",  # your username
        passwd=sqlpsw,  # your password
        db="orth_%s" % biodb_name)  # name of the data base
    cursor = conn.cursor()

    gd_diagram = GenomeDiagram.Diagram("geomic_region")
    feature_sets = []
    max_len = 0
    records = dict((rec.name, rec) for rec in region_record_list)

    n_records = len(region_record_list)

    record_length = [len(record) for record in region_record_list]

    if flip_record_based_on_first:
        region_record_list_flip = [region_record_list[0]]
        region_record_list_flip[0].name = region_record_list_flip[
            0].description
        for x in range(0, len(region_record_list) - 1):
            same_strand_count = 0
            different_strand_count = 0
            features_X = region_record_list[x].features
            features_Y = region_record_list[x + 1].features
            for feature_1 in features_X:

                if feature_1.type != "CDS":
                    continue

                for feature_2 in features_Y:
                    if feature_2.type != "CDS":
                        continue
                    try:

                        group1 = feature_1.qualifiers["orthogroup"][0]
                        group2 = feature_2.qualifiers["orthogroup"][0]
                        if group1 == group2:
                            strand1 = feature_1.location.strand
                            strand2 = feature_2.location.strand
                            if strand1 == strand2:
                                same_strand_count += 1
                            else:
                                different_strand_count += 1

                    except:
                        pass

            if different_strand_count > same_strand_count:
                region_record_list[x + 1] = region_record_list[
                    x + 1].reverse_complement(
                        id=region_record_list[x + 1].id,
                        name=region_record_list[x + 1].description)
            else:
                region_record_list[x +
                                   1].name = region_record_list[x +
                                                                1].description

        #region_record_list = region_record_list_flip
    for i, record in enumerate(region_record_list):
        max_len = max(max_len, len(record))
        #Allocate tracks 3 (top), 1 (bottom) for region 1 and 2
        #(empty tracks 2 useful white space to emphasise the cross links
        #and also serve to make the tracks vertically more compressed)
        gd_track_for_features = gd_diagram.new_track(
            (1 * n_records - 1) - 1 * i,
            name=record.name,
            greytrack=True,
            height=0.4,
            start=0,
            end=len(record))
        if record.name not in feature_sets:
            feature_sets.append(gd_track_for_features.new_set())
        else:
            print("already in feature_sets!")
            print(record)
            quit

    #print 'looping....'
    for x in range(0, len(region_record_list) - 1):
        features_X = region_record_list[x].features
        features_Y = region_record_list[x + 1].features
        set_X = feature_sets[x]
        set_Y = feature_sets[x + 1]
        for feature_1 in features_X:

            if feature_1.type != "CDS":
                continue

            for feature_2 in features_Y:
                if feature_2.type != "CDS":
                    continue
                try:

                    group1 = feature_1.qualifiers["orthogroup"][0]
                    group2 = feature_2.qualifiers["orthogroup"][0]

                except:
                    group1 = "one_singleton"
                    group2 = "two_singleton"

                if group1 == group2:
                    border = colors.lightgrey
                    color = colors.lightgrey
                    try:
                        identity = orthogroup_identity_db.check_identity(
                            cursor, feature_1.qualifiers["orthogroup"][0],
                            feature_1.qualifiers["locus_tag"][0],
                            feature_2.qualifiers["locus_tag"][0])
                    except:
                        identity = 0
                        print(
                            "problem with identity table %s and locus %s %s" %
                            (group1, feature_1.qualifiers["locus_tag"][0],
                             feature_1.qualifiers["locus_tag"][0]))

                    color2 = colors.HexColor(
                        rgb2hex(m.to_rgba(float(identity))))
                    border2 = colors.HexColor(
                        rgb2hex(m.to_rgba(float(identity))))

                    F_x = set_X.add_feature(
                        SeqFeature(
                            FeatureLocation(feature_1.location.start,
                                            feature_1.location.end,
                                            strand=0)),
                        color=color,
                        border=border,
                        set_id=feature_1.qualifiers["locus_tag"])
                    F_y = set_Y.add_feature(SeqFeature(
                        FeatureLocation(feature_2.location.start,
                                        feature_2.location.end,
                                        strand=0)),
                                            color=color,
                                            border=border)
                    gd_diagram.cross_track_links.append(
                        CrossLink(F_x, F_y, color2, border2))

    #for x in range(0,len(region_record_list)-1):
    x = 0
    all_locus = []

    for n, record in enumerate(region_record_list):
        gd_feature_set = feature_sets[n]
        i = 0

        if plasmid_list[x]:
            #print "PLASMID!!"
            color1 = colors.HexColor('#2837B7')
            color2 = colors.blue
        else:
            color1 = colors.HexColor('#40F13A')
            color2 = colors.HexColor('#0F600C')

        one_row_locus = []
        for feature in record.features:
            if feature.type == "tblast_target":
                feature.name = 'match'
                gd_feature_set.add_feature(feature,
                                           sigil="BOX",
                                           color="#ff4a0c86",
                                           label=False,
                                           label_position="middle",
                                           label_size=25,
                                           label_angle=0)

            if feature.type == "assembly_gap":
                #print "gap", feature
                feature.location.strand = None
                gd_feature_set.add_feature(feature,
                                           sigil="BOX",
                                           color="red",
                                           label=True,
                                           label_position="middle",
                                           label_strand=1,
                                           label_size=14,
                                           label_angle=40)

            if feature.type == "rRNA":

                gd_feature_set.add_feature(feature,
                                           sigil="ARROW",
                                           color="orange",
                                           label=True,
                                           label_position="middle",
                                           label_strand=1,
                                           label_size=10,
                                           label_angle=40)
                try:
                    one_row_locus.append(feature.qualifiers["locus_tag"][0])
                except:
                    pass
            if feature.type == "tRNA":

                gd_feature_set.add_feature(feature,
                                           sigil="ARROW",
                                           color="orange",
                                           label=True,
                                           label_position="middle",
                                           label_strand=1,
                                           label_size=10,
                                           label_angle=40)
                try:
                    one_row_locus.append(feature.qualifiers["locus_tag"][0])
                except:
                    print('no locus tag for:')
                    print(feature)

            if feature.type == "repeat_region":

                gd_feature_set.add_feature(feature,
                                           sigil="BOX",
                                           color="blue",
                                           label=True,
                                           label_position="middle",
                                           label_strand=1,
                                           label_size=14,
                                           label_angle=40)

            if 'pseudo' in feature.qualifiers:

                gd_feature_set.add_feature(feature,
                                           sigil="OCTO",
                                           color="#6E6E6E",
                                           label=True,
                                           label_position="middle",
                                           label_strand=1,
                                           label_size=10,
                                           label_angle=40)

            elif feature.type != "CDS":
                continue
            else:

                try:
                    a = feature.qualifiers["locus_tag"][0]
                except:
                    # cas des pseudogenes qui sont des CDS mais n'ont pas de protein ID
                    continue

                try:
                    g = feature.qualifiers["orthogroup"][0]
                except:
                    # cas des pseudogenes qui sont des CDS mais n'ont pas de protein ID
                    continue

                if a in color_locus_list:
                    #print '###########################', a, color_locus_list
                    if len(gd_feature_set) % 2 == 0:
                        color = colors.HexColor('#ca4700')
                    else:
                        color = colors.HexColor('#fd7a32')
                else:
                    if len(gd_feature_set) % 2 == 0:
                        color = color1
                    else:
                        color = color2

                if g in color_orthogroup_list:
                    #print '###########################', a, color_locus_list
                    if len(gd_feature_set) % 2 == 0:
                        color = colors.HexColor('#ca4700')
                    else:
                        color = colors.HexColor('#fd7a32')
                else:
                    if len(gd_feature_set) % 2 == 0:
                        color = color1
                    else:
                        color = color2

                #try:
                #    try:
                #            group = protein_id2group[feature.qualifiers["protein_id"][0]]
                #    except:
                #            group = protein_id2group[feature.qualifiers["protein_id"][1]]
                #except:
                #    # no group attributed: singleton => special color
                #    color = colors.HexColor('#E104C0')

                for target_protein in target_protein_list:
                    if target_protein in feature.qualifiers["locus_tag"]:
                        #print "target prot!"
                        color = colors.red

                gd_feature_set.add_feature(feature,
                                           sigil="ARROW",
                                           color=color,
                                           label=True,
                                           label_position="middle",
                                           label_strand=1,
                                           label_size=10,
                                           label_angle=40)
                i += 1
                try:
                    one_row_locus.append(feature.qualifiers["locus_tag"][0])
                except:
                    print('no locus tag for:')
                    print(feature)
        all_locus = one_row_locus + all_locus

        x += 1

    #print "max", max_len
    #print "n record", len(region_record_list)

    if len(region_record_list) == 2:
        hauteur = 300
    else:
        hauteur = 150 * len(region_record_list)
    largeur = max(record_length) / 30
    #print "hauteur", hauteur
    #print "largeur", largeur
    #gd_diagram.set_page_size(, orientation)
    if hauteur > largeur:
        gd_diagram.draw(format="linear",
                        pagesize=(hauteur, largeur),
                        orientation='portrait',
                        fragments=1,
                        start=0,
                        end=max_len)
    else:
        gd_diagram.draw(format="linear",
                        pagesize=(hauteur, largeur),
                        orientation='landscape',
                        fragments=1,
                        start=0,
                        end=max_len)
    #print "writing diagram", out_name

    #gd_diagram.write(out_name, "SVG")

    import io
    from chlamdb.plots import edit_svg

    svg_diagram = io.StringIO()
    gd_diagram.write(svg_diagram, "SVG")
    svg_diagram.flush()
    #gd_diagram

    with_links = edit_svg.edit_svg(svg_diagram.getvalue(), all_locus,
                                   biodb_name)

    with_links.write(out_name)

    png_name = out_name.split('.')[0] + '.png'

    #png_handle = open(png_name, 'w')
    #gd_diagram.write(png_handle, "PNG")
    #png_handle.close()

    try:
        cmd = 'chmod 444 %s' % out_name
    except:
        pass
    from chlamdb.biosqldb import shell_command
    #print cmd
    shell_command.shell_command(cmd)

    return all_locus
Example #5
0
from reportlab.lib import colors
from reportlab.lib.units import cm
from Bio.Graphics import GenomeDiagram
from Bio import SeqIO
from Bio.SeqFeature import SeqFeature, FeatureLocation
import sys
record = next(SeqIO.parse(sys.argv[1], "genbank"))
print(record)
gd_diagram = GenomeDiagram.Diagram(record.id)
gd_track_for_features = gd_diagram.new_track(1, name="Annotated Features")
gd_feature_set = gd_track_for_features.new_set()
contig = SeqFeature(FeatureLocation(0, len(record.seq)))
gd_feature_set.add_feature(contig,
                           sigil="ARROW",
                           color="black",
                           label=True,
                           name="1",
                           arrowshaft_height=1.0,
                           label_size=14,
                           label_angle=0)
gd_feature_set.add_feature(contig,
                           sigil="ARROW",
                           color="black",
                           label=True,
                           name="1",
                           arrowshaft_height=1.0,
                           label_size=14,
                           label_angle=0)

for feature in record.features:
    if feature.type != "gene":
Example #6
0
# import library to create genome diagram
from Bio import SeqIO
from Bio.Graphics import GenomeDiagram

# import library to parse file
from reportlab.lib import colors
from reportlab.lib.units import cm

# read genome file
record = SeqIO.read("Genome.gb", "genbank")

# make diagram for the genome
gd_diagram = GenomeDiagram.Diagram("DNA sequence visualization")
gd_track_for_features = gd_diagram.new_track(1, name="Annotated Features")
gd_feature_set = gd_track_for_features.new_set()

# add features to diagram
for feature in record.features:
    if feature.type != "gene":
        continue
    if len(gd_feature_set) % 2 == 0:
        #add colors to diagram
        color = colors.green
    else:
        color = colors.darkcyan

    gd_feature_set.add_feature(feature,
                               color=color,
                               label=True,
                               label_size=20,
                               label_color=color)
                        line = line.split()
                        rev_data.append((int(line[1]), float(line[2])))
                        if float(line[2]) > scale_height:
                            scale_height = float(line[2])
                    rev_data.append((len(record.seq) + 5, scale_height))
                print(filename)
                print("Max height: " + str(scale_height))

                if "ICP1" in filename:
                    scale_height = 10
                elif "chromosome" in filename:
                    scale_height = 1000
                else:
                    scale_height = 50000

                gd_diagram = GenomeDiagram.Diagram("temp")
                gd_track_for_features = gd_diagram.new_track(
                    5,
                    name="Annotated Features",
                    height=1,
                    scale_ticks=0,
                    greytrack=1,
                    greytrack_labels=0,
                    scale=0)
                gd_feature_set = gd_track_for_features.new_set(type='feature')

                gd_track_for_feature_names = gd_diagram.new_track(
                    2,
                    name="Annotated names",
                    height=1,
                    scale_ticks=1,
Example #8
0
        ret_list.append((i, x))
        i = i + 100
    return ret_list


graphdata2 = gcSkewData(SeqUtils.GC_skew(seq_string))

gdgs2 = GenomeDiagram.GraphSet('GC Skew')
gdgs2.new_graph(graphdata, 'GC Skew', style='line', linewidth=2)

gdt3 = GenomeDiagram.Track('GC Skew', greytrack=1, greytrack_labels=4)
gdt3.add_set(gdgs2)

track_list.append(gdt3)

gd_diagram = GenomeDiagram.Diagram("Tomato Curly Stunt Virus, complete genome",
                                   track_size=0.7)

i = 1

for track in track_list:
    gd_diagram.add_track(track, i)
    i = i + 1

gd_diagram.draw(format="circular",
                circular=True,
                pagesize=(50 * cm, 50 * cm),
                start=0,
                end=len(record),
                circle_core=0.3)

gd_diagram.write("circularGenomeTCSV.pdf", "PDF", dpi=72)
Example #9
0
def record2graph(fnames, beds, r, minlog, window, verbose):
    """ """
    #create diagram
    gdd = GenomeDiagram.Diagram()  #GDDiagram(gb)
    #add annotation
    gdt1 = gdd.new_track(1,
                         greytrack=1,
                         name="Genes and GC",
                         height=1.5,
                         scale_smalltick_interval=5 * 10**4,
                         scale_smallticks=0.15,
                         scale_largeticks=1.0,
                         scale_largetick_labels=1,
                         scale_largetick_interval=250 * 10**3,
                         scale_fontangle=0)
    gdt1.greytrack_fontcolor = colors.black
    gdfs = gdt1.new_set("feature")
    for feature in r.features:
        if feature.type == "CDS":
            gdfs.add_feature(feature, color=colors.grey)
    #add GC
    gdgs = gdt1.new_set("graph")
    #get gc graph
    gcgraph = seq2gcgraph(r, beds[0][0][0])
    gdgs.new_graph(gcgraph,
                   "GC content",
                   style="line",
                   color=colors.blue,
                   center=50)

    #add coverage tracks for each bed tracks file
    ggraphs, gtracks = [], []
    for i, fn in enumerate(fnames):
        #add individual track
        gdt = gdd.new_track(i + 2,
                            greytrack=1,
                            name=fn,
                            height=1,
                            scale_smalltick_interval=5 * 10**4,
                            scale_smallticks=0.15,
                            scale_largetick_labels=0,
                            scale_fontangle=0)
        gdt.greytrack_fontcolor = colors.black
        #adjust font
        fsize = 8 - len(fnames) / 5
        if fsize < 1:
            fsize = 1
        gdt.greytrack_fontsize = fsize
        #add feature and graph
        gtracks.append(gdt.new_set("feature"))
        ggraphs.append(gdt.new_set("graph"))

    j = 0
    #colorstuple = ("blue","darkgrey","orange")
    #first add coverage track
    i = 0
    for bed, expcount, bed1, expcount1, bed2, expcount2 in zip(
            beds[0][0], beds[0][1], beds[1][0], beds[1][1], beds[2][0],
            beds[2][1]):
        #add SNPs colored data
        fdata = bed2SeqFeature(bed1, expcount1, bed2, expcount2, window)
        for feature, color in fdata:
            gtracks[i].add_feature(feature, colour=color)
        #add coverage track
        gdata = bed2graph(bed, window, expcount, minlog)
        gdgg = ggraphs[i].new_graph(gdata,
                                    fn,
                                    style="line",
                                    linewidth=0.5,
                                    center=0,
                                    color="blue")

        i += 1

    #write
    xl = xr = 0.05
    width = 841.8897637795275
    height = 595.275590551181
    '''if clen<10.0**6:
        xr = 1.0 - clen / 10.0**6 * 0.95
    else:
        width = clen * width / 10.0**6
    if len(fnames)>12:
        height = len(fnames)/12.0 * height'''
    #draw
    gdd.draw(format="linear",
             pagesize=(width, height),
             xl=xl,
             xr=xr,
             orientation="landscape",
             tracklines=0,
             fragments=1,
             circular=0,
             track_size=0.75)  # ,pagesize="A3"
    return gdd
Example #10
0
def cre(project_data_dir="",
        global_data_dir="",
        feature_name="",
        construct_name="CRE",
        extract_from="",
        main_record_file="",
        align_with=[],
        primer_list=[],
        restriction_interval=[],
        rb=[],
        write=False,
        pagesize="A4",
        scale_fontsize=3,
        label_size=2,
        greytrack_fontsize=7,
        x=0.05,
        y=0.01,
        track_size=0.3):

    #define color palettes:
    primer_colors = [
        colors.orchid, colors.cornflower, colors.lightseagreen, colors.salmon
    ]

    if extract_from:
        main_record, main_record_file = extract_feature(
            sequence_id=extract_from,
            data_dir=global_data_dir,
            feature_names=feature_name,
            write_file=True)
    elif main_record_file:
        extract_from = splitext(basename(main_record_file))[0]
        if splitext(basename(main_record_file))[1] == ".fasta":
            main_record = SeqIO.read(main_record_file, "fasta")
        elif splitext(
                basename(main_record_file))[1] in [".gb", ".gbk", ".genbank"]:
            main_record = SeqIO.read(main_record_file, "gb")
            main_record_file = convert_seq(main_record_file, "genbank",
                                           "fasta")
            print main_record_file

    gdd = GenomeDiagram.Diagram(construct_name + ' Construct Diagram',
                                x=x,
                                y=y,
                                track_size=track_size)

    genbank_track, genbank_features = new_track(
        gdd,
        construct_name + " features",
        smalltick=10,
        scale_fontsize=scale_fontsize,
        greytrack_fontsize=greytrack_fontsize)
    for feature in main_record.features:
        if "Cre" in str(feature.qualifiers):
            color = colors.lavender
        else:
            color = colors.grey
        genbank_features.add_feature(feature,
                                     sigil="ARROW",
                                     color=color,
                                     label_color=color,
                                     label=True,
                                     label_size=label_size,
                                     label_angle=30,
                                     arrowshaft_height=1)

    if restriction_interval or rb:
        restriction_dict = enzyme_selector(sequence=main_record,
                                           restriction_interval=[0, 690],
                                           genome_frequency=[700, 2000],
                                           deterministic_overhangs=True,
                                           rb=["XceI", "PsuI"])
        restriction_track, restriction_features = new_track(
            gdd,
            construct_name + " restriction sites",
            smalltick=10,
            scale_fontsize=scale_fontsize,
            greytrack_fontsize=greytrack_fontsize)
        draw_digest(restriction_features, restriction_dict)

    # plotting primers
    if primer_list:
        primer_colors = cycle(primer_colors)
        primer_track, primer_features = new_track(
            gdd,
            construct_name + " primers",
            smalltick=10,
            scale_fontsize=scale_fontsize,
            greytrack_fontsize=greytrack_fontsize)
        for primer_entry in primer_list:
            primer_color = primer_colors.next()
            add_to_track(primer_features,
                         global_data_dir + "primers/" + primer_entry[0] +
                         ".fasta",
                         main_record_file,
                         annotation=primer_entry[0],
                         feature_color=primer_color,
                         label_angle=30,
                         label_size=label_size)
            add_to_track(primer_features,
                         global_data_dir + "primers/" + primer_entry[1] +
                         ".fasta",
                         main_record_file,
                         annotation=primer_entry[1],
                         feature_color=primer_color,
                         label_angle=30,
                         label_size=label_size)

    # turn entry names into actual file paths
    align_with = [project_data_dir + entry + ".fasta" for entry in align_with]
    for i in align_with:
        hit_track_back, hit_features_back = new_track(
            gdd,
            splitext(i)[0][-6:],
            smalltick=10,
            end=len(SeqIO.read(i, 'fasta')),
            scale_fontsize=scale_fontsize,
            greytrack_fontsize=greytrack_fontsize)
        add_to_track(hit_features_back,
                     main_record_file,
                     i,
                     annotation=" " + construct_name,
                     feature_color=colors.red,
                     label_angle=30,
                     forceone=True,
                     label_size=label_size)
        hit_track, hit_features = new_track(
            gdd,
            construct_name + " alignment hits",
            smalltick=10,
            scale_fontsize=scale_fontsize,
            greytrack_fontsize=greytrack_fontsize)
        hsp_list = add_to_track(hit_features,
                                i,
                                main_record_file,
                                annotation=" " + splitext(i)[0][-6:],
                                feature_color=colors.red,
                                label_angle=30,
                                forceone=True,
                                label_size=label_size)

        record = SeqIO.read(i, "fasta")
        #for loop only takes the first alignment, then breaks
        for hsp in hsp_list:
            truncated_record = SeqRecord(
                record.seq[hsp.query_end:],
                id="Region downstream of Cre in ePet-cre mice, i=" + i)
            write_seq(sequence_write_path=project_data_dir,
                      record=truncated_record,
                      ID=splitext(basename(i))[0] + "_3-unmatched")
            break

    if align_with:
        record = SeqRecord(main_record.seq + record.seq[hsp.query_end:],
                           id="Cre and following bases in ePet-cre construct.")
        write_seq(sequence_write_path=project_data_dir,
                  record=record,
                  ID="cre-ff-current")

    gdd.draw(format="linear",
             pagesize=pagesize,
             fragments=1,
             start=0,
             end=len(main_record))
    if write:
        gdd.write(
            "/home/chymera/src/AutoTransGeno/output/" + construct_name +
            "_from_" + extract_from + ".pdf", "PDF")
        print "/home/chymera/src/AutoTransGeno/output/" + construct_name + "_from_" + extract_from + ".pdf"
    return gdd
Example #11
0
def geneclusterview(gene_cluster, cutoff):
	cursor = asp_con(path='192.38.13.196', user='******', pw='1234')

	#recA = SeqIO.read("GC_11_382281_6.gb", "gb")
	#recB = SeqIO.read("GC_28_10278_8.gb", "gb")
	#recC = SeqIO.read("GC_28_3654_7.gb", "gb")

	red = PCMYKColor(0,  100.0,  0.0, 0.0)
	blue  = PCMYKColor(91.0,  43.0,  0.0, 0.0)
	black = PCMYKColor(0, 0, 20.0, 0)

	pal = fade(blue,[100,60,40,20,5,0]) # TODO better coloring!

	#records = [recA, recB, recC] # for testing
	# TODO if isfile
	records = seq_builder(gene_cluster, cutoff, save = False)

	# records = [recA, recB, recC]
	##########################################
	# Doing some black magic on rank numbers #
	##########################################

	hits = []
	tagcol = []
	#all_h_tags=[]
	rank_col = []
	for i in records[1:]: # Leaving query cluster out # Exchange hardcoded query cluster later!
		tags = []
		for n in i.features:
			if n.qualifiers['locus_tag'][2:] is None:
				print "cannot get rank for this protein"
				print n 
			else:
				tags.append(n.qualifiers['locus_tag'][2:]) # Hod to put a 0 here before... don't know why it doesn't need that now...
		tags_formatted = "("+str(tags)[1:-1]+")"
		#all_h_tags.append(str(tags)[1:-1])
		query ="SELECT  h_seqkey,CAST(pident AS UNSIGNED) from t_antismash2blast_reduced where clust_id = '%s' and h_seqkey IN %s;" % (gene_cluster,tags_formatted)
		cursor.execute(query)

		result = list(cursor.fetchall())
		query =" SELECT ta.q_seqkey, tx.h_seqkey  from t_antismash2blast_reduced as ta  left join (SELECT * from t_antismash2blast_reduced as tb where tb.clust_id = '%s' and tb.h_seqkey IN %s ) tx on ta.q_seqkey = tx.q_seqkey where ta.clust_id = '%s' and ta.h_seqkey = ta.q_seqkey; "% (gene_cluster, tags_formatted, gene_cluster)
		cursor.execute(query)
		ranks_raw = cursor.fetchall()

		ranks = {}

		rank_counter = 1

		for line in ranks_raw:
			a,b = line
			if b in ranks:
				if b!='NULL':
					ranks[b].append(rank_counter)
					rank_counter+=1
			else:
				if b!='NULL':
					ranks[b] = list()
					ranks[b].append(rank_counter)
					rank_counter+=1

		rank_col.append(ranks)



		tag_pid = {}

		for line in result:
			a,b = line
			if a in tag_pid:
				tag_pid[a].append(int(b))
			else:
				tag_pid[a] = list()
				tag_pid[a].append(int(b))
		hits.append(tag_pid)
		tagcol.append(tags)

	#print rank_col
	#f_all_h_tags= ','.join(all_h_tags)

	#formatted = "('"+str(f_all_h_tags)[1:-1]+"')"

	#print tagcol
	#print '\n'
	#print hits
	if gene_cluster[-2]=='_':
		fix = range(int(gene_cluster[-1])+1)
	else:
		fix = range(int(gene_cluster[-2:])+1)
	print fix
	
	rankA=fix[1:] #HERE!!!!
	print rankA

	# Let's see if [[] for i in range(len(records))] works because some records from org 3 show empty features

	ranklist = [[] for i in range(len(records))]
	#print range(len(records))

	cols=[[] for i in range(len(records))]

	counter= -1
	for i in tagcol:
		counter+=1
		for n in i:
			#print counter
			if n in rank_col[counter]:
				ranklist[counter].append(rank_col[counter][n][0])
			else:
				ranklist[counter].append(0)
			if n in hits[counter]:
				calc = round(hits[counter][n][0])
				if calc > 80:
					index=0
				elif calc >60:
					index = 1
				elif calc > 40:
					index = 2
				elif calc > 20:
					index = 3
				else:
					index = 4
				cols[counter].append(pal[index])
			else:
				cols[counter].append(black)



	#print ranklist
	if gene_cluster[-2]=='_':
		dummy_int = int(gene_cluster[-1])+1
	else:
		dummy_int = int(gene_cluster[-2:])+1

	colA=[blue]*dummy_int

	def set_color():
		pass


	name = "/home/seth/300asp/Scripts/genecluster_plots/gc_view_%s" % gene_cluster
	gd_diagram = GenomeDiagram.Diagram(name)
	max_len = 0

	#print type([rankA]+ranklist)
	#print [rankA]+ranklist

	for record, gene_colors, rank in zip(records, [colA]+cols, [rankA]+ranklist):
		max_len = max(max_len, len(record))
		gd_track_for_features = gd_diagram.new_track(1,
								name=record.description + record.id,
								greytrack=True,
								greytrack_labels = 1,
								greytrack_font_rotation = 0,
								greytrack_font_colour = Color(1,0,0),
								greytrack_fontsize = 5,
								axis_labels = True,
								scale_smallticks = 0.6,
								start=0, end=len(record))
		gd_feature_set = gd_track_for_features.new_set()

		i = 0
		#print len(rank)
		#print len(record.features)
		#print rank
		for feature, single_rank in zip(record.features, rank):
			#if feature.type != "gene": # Doesn't work with my records because they don't have genes, only cds.... should work but doesn't....
				#Exclude this feature
			#    continue
			if feature.strand == -1:
				temp_angle = 180
			else:
				temp_angle = 1
			if single_rank == 0:
				temp_name = ''
			else:
				temp_name =   str(single_rank) # str(feature.qualifiers['locus_tag'])+'M'+str(single_rank)
			gd_feature_set.add_feature(feature, sigil="BIGARROW",
									   color=gene_colors[i], label=True,
									   name = temp_name, # this is too much, maybe implement if other label is found: str(feature.qualifiers['locus_tag'])+'M'+str(single_rank),
									   label_position="middle",
									   label_size = 6, label_angle= temp_angle)
			i+=1

	gd_diagram.draw(format="linear", pagesize='A4', fragments=1, orientation = 'portrait', track_size = 0.6, xl = 0.15,
					start=0, end=max_len)
	gd_diagram.write(name + ".pdf", "PDF")
	#gd_diagram.write(name + ".eps", "EPS")
	gd_diagram.write(name + ".svg", "SVG")
Example #12
0
    def plot_motif_sites(self, cluster_num, motif_num):
        """THIS NEEDS MORE WORK but has the beginnings of something...
        TODO: multiple motifs on same tracks, include ALL genes (i.e. in operons that were not included),
              do reverse-complement positioning correctly (based on gene strand), 
              use MAST scan output (from b.tables['motif_annotations'])
        """
        from Bio.SeqFeature import SeqFeature, FeatureLocation
        from Bio.Graphics import GenomeDiagram
        from reportlab.lib.units import cm
        from reportlab.lib import colors
        """To get this to work: download http://www.reportlab.com/ftp/fonts/pfbfer.zip
           and unzip it into /usr/lib/python2.7/dist-packages/reportlab/fonts/
        """

        motif_sites = self.get_motif_sites(cluster_num, motif_num)
        pv_range = np.max(
            -np.log10(motif_sites.pvalue.values)
        ) - 4  ## divide -log10(pval) by this to get alpha to use
        len_range = np.max(motif_sites.start.values) + 10

        gdd = GenomeDiagram.Diagram('Motif sites: %d, %d' %
                                    (cluster_num, motif_num))

        for i in range(motif_sites.shape[0]):
            gdt_features = gdd.new_track(1,
                                         start=0,
                                         end=len_range,
                                         greytrack=True,
                                         greytrack_labels=1,
                                         name=motif_sites.names.values[i],
                                         scale=True,
                                         greytrack_fontsize=4)
            gds_features = gdt_features.new_set()
            col = colors.red.clone()
            col.alpha = (-np.log10(motif_sites.pvalue.values[i]) -
                         4) / pv_range
            m_start = motif_sites.start.values[i]
            m_len = len(motif_sites.seq.values[i])
            m_strand = motif_sites.reverse.values[i]
            if m_strand == 0:
                m_strand = -1
            feature = SeqFeature(FeatureLocation(m_start, m_start + m_len - 1),
                                 strand=m_strand)
            gds_features.add_feature(feature,
                                     name=str(i + 1),
                                     label=False,
                                     color=col)

        gdd.draw(format='linear',
                 pagesize=(15 * cm, motif_sites.shape[0] * cm / 2),
                 fragments=1,
                 start=0,
                 end=len_range + 10)
        ##gdd.write("GD_labels_default.pdf", "pdf") ## looks like only output is to file, so do this:
        #output = cStringIO.StringIO()
        #gdd.write(output, 'png', dpi=300)
        #output.seek(0)
        output = gdd.write_to_string(output='png', dpi=300)
        output = cStringIO.StringIO(output)
        img = mpimg.imread(output)
        plt.axis('off')
        imgplot = plt.imshow(img, interpolation='bicubic')
        output.close()
        return gdd
Example #13
0
 def __init__(self):
     self.tracks = []
     self.gdd = GenomeDiagram.Diagram('Diagram')  #Is this name useful ?
Example #14
0
    max_len += SPACER + len(record)
max_len -= SPACER
if os.path.isfile(reference_genbank):
    reference_parser = SeqIO.parse(reference_genbank, "genbank")
else:
    reference_parser = SeqIO.parse(reference_fasta, "fasta")

if output_fasta:
    sys.stderr.write(
        "WARNING - Consider using order_assembly.py instead for FASTA output\n"
    )
    fasta_handle = open(output_fasta, "w")
    fasta_saved_count = 0
    fasta_short_dropped = 0

gd_diagram = GenomeDiagram.Diagram("Comparison")
gd_track_for_features = gd_diagram.new_track(1,
                                             name="reference",
                                             greytrack=False,
                                             height=0.5,
                                             start=0,
                                             end=max_len)
gd_feature_set = gd_track_for_features.new_set()
# Add a dark grey background
gd_feature_set.add_feature(SeqFeature(FeatureLocation(0, len(record))),
                           sigil="BOX",
                           color="grey",
                           label=False),

offset = 0
ref_offsets = dict()
Example #15
0
def record2graph(fnames, beds, r, minlog, window, verbose):
    """ """
    #create diagram
    gdd = GenomeDiagram.Diagram()  #GDDiagram(gb)
    #gdd.name = r.id
    #add annotation
    gdt1 = gdd.new_track(1,
                         greytrack=1,
                         name="[%s] Genes and GC" % r.id,
                         height=1.5,
                         scale_smalltick_interval=5 * 10**4,
                         scale_smallticks=0.15,
                         scale_largeticks=1.0,
                         scale_largetick_labels=1,
                         scale_largetick_interval=250 * 10**3,
                         scale_fontangle=0)
    gdt1.greytrack_fontcolor = colors.black
    gdfs = gdt1.new_set("feature")
    for feature in r.features:
        if feature.type == "CDS":
            gdfs.add_feature(feature, color=colors.grey)
    #add GC
    gdgs = gdt1.new_set("graph")
    #get gc graph
    gcgraph = seq2gcgraph(r, beds[0][0][0])
    gdgs.new_graph(gcgraph,
                   "GC content",
                   style="line",
                   color=colors.blue,
                   center=50)

    #add coverage tracks for each bed tracks file
    gdgslist = []
    for i, fn in enumerate(fnames):
        #add individual track
        gdt = gdd.new_track(i + 2,
                            greytrack=1,
                            name=fn,
                            height=2,
                            scale_smalltick_interval=5 * 10**4,
                            scale_smallticks=0.15,
                            scale_largetick_labels=0,
                            scale_fontangle=0)
        gdt.greytrack_fontcolor = colors.black
        gdgslist.append(gdt.new_set("graph"))

    colorstuple = ["blue", "darkgrey", "orange"]
    colorstuple.reverse()
    beds.reverse()
    for j, bedexps in enumerate(beds):
        for i, (bed, expcount) in enumerate(zip(bedexps[0], bedexps[1])):
            if bed:
                gdata = bed2graph(bed, window, expcount, minlog)
                #add graph to track
                linewidth = 0.3
                if j == 2:
                    linewidth = 1.0
                gdgg = gdgslist[i].new_graph(gdata,
                                             fn,
                                             style="line",
                                             linewidth=linewidth,
                                             center=0,
                                             color=colorstuple[j])
                clen = gdgslist[i].range()[1]

    #write
    xl = xr = 0.05
    width = 841.8897637795275
    height = 595.275590551181
    '''if clen<10.0**6:
        xr = 1.0 - clen / 10.0**6 * 0.95
    else:
        width = clen * width / 10.0**6
    if len(fnames)>12:
        height = len(fnames)/12.0 * height'''
    #draw
    gdd.draw(format="linear",
             pagesize=(width, height),
             xl=xl,
             xr=xr,
             orientation="landscape",
             tracklines=0,
             fragments=1,
             circular=0,
             track_size=0.75)  # ,pagesize="A3"
    return gdd
Example #16
0
#!/home/pjsola/env/bin/python

import os
import csv

from Bio.SeqFeature import SeqFeature, FeatureLocation
from reportlab.lib import colors
from reportlab.lib.units import cm
from Bio.Graphics import GenomeDiagram

diagram_name = 'TEST_3'
gdd = GenomeDiagram.Diagram(diagram_name)
dict_records = {
    'NC_016838.1': 122799,
    'NC_016839.1': 105974,
    'NC_016846.1': 111195
}

for record, record_length in dict_records.items():

    gd_track_for_features = gdd.new_track(1,
                                          name=record,
                                          greytrack=True,
                                          start=0,
                                          end=record_length)
    gd_set_features = gd_track_for_features.new_set()

    with open('KPN.gff.forward.coordinates', 'r') as bed_forward_file:
        bed_readed = csv.reader(bed_forward_file, delimiter="\t")

        #record = None
Example #17
0
    (28, "orf54", "lin2566"),
]


def get_feature(features, id, tags=("locus_tag", "gene", "old_locus_tag")):
    """Search list of SeqFeature objects for an identifier under the given tags."""
    for f in features:
        for key in tags:
            # tag may not be present in this feature
            for x in f.qualifiers.get(key, []):
                if x == id:
                    return f
    raise KeyError(id)


gd_diagram = GenomeDiagram.Diagram(name)
feature_sets = {}
max_len = 0
for i, record in enumerate([A_rec, B_rec, C_rec]):
    max_len = max(max_len, len(record))
    # Allocate tracks 5 (top), 3, 1 (bottom) for A, B, C
    # (empty tracks 2 and 4 add useful white space to emphasise the cross links
    # and also serve to make the tracks vertically more compressed)
    gd_track_for_features = gd_diagram.new_track(5 - 2 * i,
                                                 name=record.name,
                                                 greytrack=True,
                                                 height=0.5,
                                                 start=0,
                                                 end=len(record))
    assert record.name not in feature_sets
    feature_sets[record.name] = gd_track_for_features.new_set()
Example #18
0
    def write_schemadelica_plot(self):
        """Write schemadelica plot as SVG and PDF."""
        gd_diagram = GenomeDiagram.Diagram("Primer Scheme", track_size=0.15)
        primer_feature_set = GenomeDiagram.FeatureSet()

        # make the gc track
        window = 50
        gc_set = GenomeDiagram.GraphSet("GC content")
        graphdata1 = self.apply_to_window(self.primary_ref.seq, window,
                                          self.calc_gc)
        gc_set.new_graph(
            graphdata1,
            "GC content",
            style="line",
            color=colors.violet,
            altcolor=colors.purple,
        )
        gc_track = GenomeDiagram.Track("GC content",
                                       height=1.5,
                                       greytrack=0,
                                       scale_largetick_interval=1e3)
        gc_track.add_set(gc_set)

        # make the primer track
        for r in self.regions:
            region = str(r.region_num)
            strand = 1 if r.region_num % 2 else -1

            fwd_feature = SeqFeature(
                FeatureLocation(r.left.start, r.left.end, strand=strand))
            rev_feature = SeqFeature(
                FeatureLocation(r.right.end, r.right.start, strand=strand))
            region_feature = SeqFeature(
                FeatureLocation(r.left.start, r.right.start, strand=strand))

            primer_color = colors.red
            region_color = colors.palevioletred

            primer_feature_set.add_feature(
                region_feature,
                color=region_color,
                name=region,
                label=True,
                label_position="middle",
                label_angle=0 if strand == 1 else -180,
            )
            primer_feature_set.add_feature(fwd_feature,
                                           color=primer_color,
                                           name=region)
            primer_feature_set.add_feature(rev_feature,
                                           color=primer_color,
                                           name=region)

        primer_track = GenomeDiagram.Track(name="Annotated Features", height=1)
        primer_track.add_set(primer_feature_set)

        gd_diagram.add_track(primer_track, 2)
        gd_diagram.add_track(gc_track, 1)

        rows = max(2, int(round(len(self.primary_ref) / 10000.0)))
        gd_diagram.draw(
            format="linear",
            pagesize=(300 * rows, 200 * rows),
            fragments=rows,
            start=0,
            end=len(self.primary_ref),
        )

        pdf_filepath = self.outpath / f"{self.prefix}.plot.pdf"
        svg_filepath = self.outpath / f"{self.prefix}.plot.svg"
        logger.info(f"Writing {pdf_filepath}")
        logger.info(f"Writing {svg_filepath}")
        gd_diagram.write(str(pdf_filepath), "PDF", dpi=300)
        gd_diagram.write(str(svg_filepath), "SVG", dpi=300)
# from a specified genbank file input

from reportlab.lib import colors
from reportlab.lib.units import cm
from Bio.Graphics import GenomeDiagram
from Bio.SeqFeature import SeqFeature, FeatureLocation
from Bio import SeqIO
record1 = input("What is your genbank filename? Ex: ""sequence.gb"" \n")
record = SeqIO.read(record1, "genbank")
pgene_start = int(input("What is the start location of your putative gene?\n"))
pgene_end = int(input("What is the end location of your putative gene?\n"))
pgene_ori = int(input("Is putative gene forward (""1"") or reverse (""-1"")?\n"))
pgene = str(input("What would you like to name your putative gene?\n"))

# create an empty diagram, then add an empty track & empty feature set
gd_diagram = GenomeDiagram.Diagram("S. cerevisiae Chromosome IX")
gd_track_for_features = gd_diagram.new_track(1, name="Annotated Features")
gd_feature_set = gd_track_for_features.new_set()

#Take each gene SeqFeature object in our SeqRecord, and use it to
# generate a feature on the diagram. 
for feature in record.features:
    if feature.type != "gene":
        #Exclude this feature
        continue
    if len(gd_feature_set) % 2 == 0:
        color = colors.lightblue
    else:
        color = colors.blue
    gd_feature_set.add_feature(feature, sigil="ARROW", color=color, arrowshaft_height=1.0,
                               label=True, label_size=8, label_angle=30)
Example #20
0
seqCount = 0
longest = 0
for record in SeqIO.parse(handle, "fasta"):
    seqId = record.id
    seqLen = len(record)
    seqLenMap[seqId] = seqLen
    longest = max(longest, seqLen)
    seqCount += 1
handle.close()

colorMap = [color_code(i, len(motifMap))for i in range(len(motifMap))]

seqColor = colors.grey.clone(alpha=0.2)

posBED_FH = open(os.path.join(outDir, "sequences_cluster_match_position.bed"), "r")
gdd = GenomeDiagram.Diagram()

prevSeqId = ""
trackId = 0
for line in posBED_FH:
    f = line.rstrip('\n').split('\t')
    seqId = f[0]
    seqLen = int(seqLenMap[seqId])
    padLen = max(0, int((longest - seqLen) / 2))
    start = int(f[1]) + padLen
    end = int(f[2]) + padLen
    cluster_id = int(f[3])
    if prevSeqId != seqId:
        gd_track = gdd.new_track(2 * trackId,
                                 greytrack=True,
                                 start=0,
# to parse the data
from Bio import SeqIO
from Bio.Graphics import GenomeDiagram
#to present the data
from reportlab.lib import colors
from reportlab.lib.units import cm

color_set = [
    colors.green, colors.orange, colors.red, colors.purple, colors.cyan
]

record = SeqIO.read("Genome.gb", "genbank")

gd_diagram = GenomeDiagram.Diagram("Tomato Curly Stunt Virus")
gd_track_for_features = gd_diagram.new_track(1, name="Annotated Features")
gd_feature_set = gd_track_for_features.new_set()

for feature in record.features:
    if feature.type != "gene":
        # dont consider the feature since not a gene.
        continue

    color = color_set[(len(gd_feature_set))]

    # the parameters for representation of each feature.
    gd_feature_set.add_feature(feature,
                               sigil="ARROW",
                               arrowshaft_height=0.5,
                               color=color,
                               label=True,
                               label_size=25,
Example #22
0
def hyperdraw(genome, dataset, label, intensity=False):
    for genoslice in genome:
        step = 50000
        print(len(genoslice), ' in ', step, ' is ', len(genoslice) / step)

        gd_diagram = GenomeDiagram.Diagram(genoslice.id)
        gd_track_for_features = gd_diagram.new_track(
            1,
            name="Annotated Features",
            scale_ticks=1,
            scale_largetick_interval=1000,
            scale_smalltick_interval=100,
            scale_smallticks=0.05,
            scale_largeticks=0.2,
            scale_smalltick_labels=0)
        gd_feature_set = gd_track_for_features.new_set()
        for feature in genoslice.features:
            #if feature.type == "operon":
            #    gd_feature_set.add_feature(feature, sigil="BOX", ##pointy boxes
            #                           color=colors.grey, label=False)
            if feature.type == "rRNA" or feature.type == "tRNA":
                gd_feature_set.add_feature(
                    feature,
                    sigil="OCTO",  ##pointy boxes
                    color=colors.grey)
            elif feature.type == "CDS":
                # feature.qualifiers['product'][0].lower().find('hypo')>-1:
                # feature.qualifiers['product'][0].lower().find('transposase')>-1:
                locus = feature.qualifiers['locus_tag'][0]
                red, green, blue = (1, 1, 1)
                border = colors.gainsboro
                ah = 0
                if locus in dataset:
                    if intensity:
                        intense = float(dataset[locus][1])
                        red, green = (1 - intense / 10, 1 - intense / 10)
                    else:
                        ah = dataset[locus][3]
                        if ah < 0:
                            ah = 0
                        elif ah > 1:
                            ah = 1
                        change = float(dataset[locus][1])
                        sig = float(dataset[locus][2])
                        if change > 0:
                            red, blue = (1 - change / 10, 1 - change / 10)
                        else:
                            green, blue = (1 - abs(change / 10),
                                           1 - abs(change / 10))
                        if sig < 0.05:
                            border = colors.black
                        else:
                            border = colors.gray
                    labellable = True
                else:
                    labellable = False
                if len(feature) < 200:
                    labellable = False
                color = colors.Color(red, green, blue)

                #shortened=feature.qualifiers['locus_tag'][0]+' '+feature.qualifiers['product'][0]
                #if len(shortened)>20:
                #    shortened=shortened[0:20]+'...'
                shortened = feature.qualifiers['product'][0]
                for badword in 'hypothetical putative probable family domain unknown possible partial'.split(
                ):
                    if feature.qualifiers['product'][0].lower().find(
                            badword) > -1:
                        shortened = feature.qualifiers['locus_tag'][0]
                allocated = math.floor(len(feature) / 1000 * 30)
                if len(shortened) > allocated:
                    shortened = shortened[0:allocated] + '...'

                gd_feature_set.add_feature(
                    feature,
                    sigil="ARROW",
                    arrowshaft_height=ah,  ##pointy boxes=1
                    color=color,
                    label=labellable,
                    height=0.8,  #not the actually setting...
                    name=shortened,
                    label_position="start",
                    border=border,
                    #label_strand=1,
                    label_size=8,
                    label_angle=0)

        i = 0
        for x in range(step + 1, len(genoslice) - 100, step):
            print(x - step, x + 100)
            gs = genoslice[x - step:x + 100]
            gd_diagram.draw(format="linear",
                            pagesize='A4',
                            orientation='landscape',
                            fragments=10,
                            start=x - step,
                            end=x + 100)
            print(gs.features[1].qualifiers['locus_tag'][0],
                  gs.features[-1].qualifiers['locus_tag'][0])
            op = 'C:\\Users\\Cass\\Desktop\\test2\\' + label + gs.features[
                1].qualifiers['locus_tag'][0] + '-' + gs.features[
                    -1].qualifiers['locus_tag'][0]
            gd_diagram.write(op + ".pdf", "PDF")
            gd_diagram.write(op + ".eps", "EPS")
            gd_diagram.write(op + ".svg", "SVG")
            gd_diagram.write(op + ".png", "PNG")
Example #23
0
def plot_multiple_regions_crosslink2(target_protein_list, region_record_list,
                                     plasmid_list, out_name):
    gd_diagram = GenomeDiagram.Diagram("geomic_region")
    feature_sets = []
    max_len = 0
    records = dict((rec.name, rec) for rec in region_record_list)
    n_records = len(region_record_list)

    record_length = [len(record) for record in region_record_list]

    for i, record in enumerate(region_record_list):
        max_len = max(max_len, len(record))
        #print "i", i
        #Allocate tracks 3 (top), 1 (bottom) for region 1 and 2
        #(empty tracks 2 useful white space to emphasise the cross links
        #and also serve to make the tracks vertically more compressed)
        gd_track_for_features = gd_diagram.new_track(
            (2 * n_records - 1) - 2 * i,
            name=record.name,
            greytrack=True,
            height=0.5,
            start=0,
            end=len(record))
        if record.name not in feature_sets:
            feature_sets.append(gd_track_for_features.new_set())
        else:
            print("already in feature_sets!")
            print(record)
            quit

    for x in range(0, len(region_record_list) - 1):
        #print "x", x
        features_X = region_record_list[x].features
        features_Y = region_record_list[x + 1].features
        set_X = feature_sets[x]
        set_Y = feature_sets[x + 1]
        for feature_1 in features_X:
            if feature_1.type != "CDS":
                continue
            for feature_2 in features_Y:
                if feature_2.type != "CDS":
                    continue
                try:

                    group1 = feature_1.qualifiers["orthogroup"][0]
                    group2 = feature_2.qualifiers["orthogroup"][0]

                except:
                    group1 = "one_singleton"
                    group2 = "two_singleton"

                if group1 == group2:
                    border = colors.lightgrey
                    color = colors.lightgrey
                    F_x = set_X.add_feature(SeqFeature(
                        FeatureLocation(feature_1.location.start,
                                        feature_1.location.end,
                                        strand=0)),
                                            color=color,
                                            border=border)
                    F_y = set_Y.add_feature(SeqFeature(
                        FeatureLocation(feature_2.location.start,
                                        feature_2.location.end,
                                        strand=0)),
                                            color=color,
                                            border=border)
                    gd_diagram.cross_track_links.append(
                        CrossLink(F_x, F_y, color, border))

    #for x in range(0,len(region_record_list)-1):
    x = 0
    for n, record in enumerate(region_record_list):
        gd_feature_set = feature_sets[n]
        i = 0

        if plasmid_list[x]:
            #print "PLASMID!!!"
            color1 = colors.HexColor('#2837B7')
            color2 = colors.blue
        else:
            color1 = colors.HexColor('#40F13A')
            color2 = colors.HexColor('#0F600C')

        for feature in record.features:
            if feature.type != "CDS":
                continue
            try:
                a = feature.qualifiers["locus_tag"]
            except:
                # cas des pseudogenes qui sont des CDS mais n'ont pas de protein ID
                continue

            if len(gd_feature_set) % 2 == 0:
                color = color1
            else:
                color = color2

            #try:
            #    try:
            #            group = protein_id2group[feature.qualifiers["protein_id"][0]]
            #    except:
            #            group = protein_id2group[feature.qualifiers["protein_id"][1]]
            #except:
            #    # no group attributed: singleton => special color
            #    color = colors.HexColor('#E104C0')

            for target_protein in target_protein_list:
                if target_protein in feature.qualifiers["locus_tag"]:
                    #print "target prot!"
                    color = colors.red

            gd_feature_set.add_feature(feature,
                                       sigil="ARROW",
                                       color=color,
                                       label=True,
                                       label_position="middle",
                                       label_strand=1,
                                       label_size=12,
                                       label_angle=45)
            i += 1
        x += 1

    #print "max", max_len
    #print "n records", len(region_record_list)
    if len(region_record_list) == 2:
        hauteur = 700
    else:
        hauteur = 250 * len(region_record_list)
    largeur = max(record_length) / 30
    #print "hauteur", hauteur
    #print "largeur", largeur
    #gd_diagram.set_page_size(, orientation)
    if hauteur > largeur:
        gd_diagram.draw(format="linear",
                        pagesize=(hauteur, largeur),
                        orientation='portrait',
                        fragments=1,
                        start=0,
                        end=max_len)
    else:
        gd_diagram.draw(format="linear",
                        pagesize=(hauteur, largeur),
                        orientation='landscape',
                        fragments=1,
                        start=0,
                        end=max_len)
    #print "writing diagram", out_name

    gd_diagram.write(out_name, "SVG")
Example #24
0
    def run_module(self):

        if self.id_list and os.access(self.id_list[0], os.R_OK):
            print("Detected supplied circRNA ID file.")
            with open(self.id_list[0]) as f:
                lines = f.read().splitlines()
            self.id_list = lines

        # let's first check if the temporary directory exists
        if not (os.access(self.temp_dir, os.W_OK)):
            print("Temporary directory %s not writable." % self.temp_dir)
            # exit with -1 error if we can't use it
            exit(-1)

        # let's first check if the temporary directory exists
        if not (os.access(self.output_dir, os.W_OK)):
            print("Output directory %s not writable." % self.output_dir)
            # exit with -1 error if we can't use it
            exit(-1)

        circ_rna_number = 0

        # define temporary files
        exon_storage_tmp = self.temp_dir + "circtools_flanking_exons.tmp"
        blast_storage_tmp = self.temp_dir + "circtools_blast_results.tmp"
        blast_xml_tmp = self.temp_dir + "circtools_blast_results.xml"

        output_html_file = self.output_dir + self.experiment_title.replace(
            " ", "_") + ".html"

        # erase old contents
        open(exon_storage_tmp, 'w').close()

        # define cache dicts
        exon_cache = {}
        flanking_exon_cache = {}
        primer_to_circ_cache = {}

        if self.input_circRNA:
            from Bio import SeqIO
            with open(exon_storage_tmp, 'a') as data_store:
                for record in SeqIO.parse(self.input_circRNA, "fasta"):

                    # from the FASTA file we cannot tell the coordinates of the circRNA
                    name = str(record.id) + "_0_0_" + str(len(
                        record.seq)) + "_0"

                    data_store.write("\t".join(
                        [name, str(record.seq), "", "\n"]))
                    exon_cache[name] = {1: str(record.seq), 2: ""}

        else:
            exons = self.read_annotation_file(self.gtf_file, entity="exon")

            with open(self.dcc_file) as fp:

                for line in fp:

                    # make sure we remove the header
                    if line.startswith('Chr\t'):
                        continue

                    line = line.rstrip()
                    current_line = line.split('\t')

                    if current_line[3] == "not_annotated":
                        continue

                    if self.gene_list and not self.id_list and current_line[
                            3] not in self.gene_list:
                        continue

                    sep = "_"
                    name = sep.join([
                        current_line[3], current_line[0], current_line[1],
                        current_line[2], current_line[5]
                    ])

                    if self.id_list and not self.gene_list and name not in self.id_list:
                        continue

                    flanking_exon_cache[name] = {}

                    sep = "\t"
                    bed_string = sep.join([
                        current_line[0], current_line[1], current_line[2],
                        current_line[3],
                        str(0), current_line[5]
                    ])

                    virtual_bed_file = pybedtools.BedTool(bed_string,
                                                          from_string=True)
                    result = exons.intersect(virtual_bed_file, s=True)

                    fasta_bed_line_start = ""
                    fasta_bed_line_stop = ""

                    start = 0
                    stop = 0

                    for result_line in str(result).splitlines():
                        bed_feature = result_line.split('\t')

                        # this is a single-exon circRNA
                        if bed_feature[1] == current_line[1] and bed_feature[
                                2] == current_line[2]:
                            fasta_bed_line_start += result_line + "\n"
                            start = 1
                            stop = 1

                        if bed_feature[1] == current_line[1] and start == 0:
                            fasta_bed_line_start += result_line + "\n"
                            start = 1

                        if bed_feature[2] == current_line[2] and stop == 0:
                            fasta_bed_line_stop += result_line + "\n"
                            stop = 1

                        # these exons are kept for correctly drawing the circRNAs later
                        # not used for primer design
                        if bed_feature[1] > current_line[1] and bed_feature[
                                2] < current_line[2]:
                            flanking_exon_cache[name][bed_feature[1] + "_" +
                                                      bed_feature[2]] = 1

                    virtual_bed_file_start = pybedtools.BedTool(
                        fasta_bed_line_start, from_string=True)
                    virtual_bed_file_stop = pybedtools.BedTool(
                        fasta_bed_line_stop, from_string=True)

                    virtual_bed_file_start = virtual_bed_file_start.sequence(
                        fi=self.fasta_file)
                    virtual_bed_file_stop = virtual_bed_file_stop.sequence(
                        fi=self.fasta_file)

                    if stop == 0 or start == 0:
                        print(
                            "Could not identify the exact exon-border of the circRNA."
                        )
                        print(
                            "Will continue with non-annotated, manually extracted sequence."
                        )

                        # we have to manually reset the start position

                        fasta_bed_line = "\t".join([
                            current_line[0], current_line[1], current_line[2],
                            current_line[5]
                        ])

                        virtual_bed_file_start = pybedtools.BedTool(
                            fasta_bed_line, from_string=True)
                        virtual_bed_file_start = virtual_bed_file_start.sequence(
                            fi=self.fasta_file)
                        virtual_bed_file_stop = ""

                    exon1 = ""
                    exon2 = ""

                    if virtual_bed_file_start:
                        exon1 = open(
                            virtual_bed_file_start.seqfn).read().split(
                                "\n", 1)[1].rstrip()

                    if virtual_bed_file_stop:
                        exon2 = open(virtual_bed_file_stop.seqfn).read().split(
                            "\n", 1)[1].rstrip()

                    circ_rna_number += 1
                    print("extracting flanking exons for circRNA #",
                          circ_rna_number,
                          name,
                          end="\n",
                          flush=True)

                    if exon2 and not exon1:
                        exon1 = exon2
                        exon2 = ""

                    exon_cache[name] = {1: exon1, 2: exon2}

                    with open(exon_storage_tmp, 'a') as data_store:
                        data_store.write("\t".join([name, exon1, exon2, "\n"]))

        if not exon_cache:
            print(
                "Could not find any circRNAs matching your criteria, exiting.")
            exit(-1)

        # need to define path top R wrapper
        primer_script = 'circtools_primex_wrapper.R'

        # ------------------------------------ run script and check output -----------------------

        script_result = os.popen(primer_script + " " + exon_storage_tmp + " " +
                                 str(self.product_range[0]) + "," +
                                 str(self.product_range[1]) + " " +
                                 self.junction).read()

        # this is the first time we look through the input file
        # we collect the primer sequences and unify everything in one blast query

        blast_object_cache = {}
        blast_result_cache = {}

        blast_input_file = ""

        if circ_rna_number < 50:

            for line in script_result.splitlines():
                entry = line.split('\t')
                circular_rna_id = entry[0].split('_')

                if entry[1] == "NA":
                    continue

                # only blast 1
                elif entry[2] in blast_object_cache and not entry[
                        1] in blast_object_cache:
                    blast_input_file += "\n>" + entry[1] + "\n" + entry[1]
                    blast_object_cache[entry[1]] = 1
                    primer_to_circ_cache[entry[1]] = circular_rna_id[0]

                # only blast 2
                elif entry[1] in blast_object_cache and not entry[
                        2] in blast_object_cache:
                    blast_input_file += "\n>" + entry[2] + "\n" + entry[2]
                    blast_object_cache[entry[2]] = 1
                    primer_to_circ_cache[entry[2]] = circular_rna_id[0]

                # seen both already, skip
                elif entry[1] in blast_object_cache and entry[
                        2] in blast_object_cache:
                    continue

                # nothing seen yet, blast both
                else:
                    blast_input_file += "\n>" + entry[1] + "\n" + entry[
                        1] + "\n>" + entry[2] + "\n" + entry[2]
                    blast_object_cache[entry[1]] = 1
                    blast_object_cache[entry[2]] = 1
                    primer_to_circ_cache[entry[1]] = circular_rna_id[0]
                    primer_to_circ_cache[entry[2]] = circular_rna_id[0]
        else:
            print("Too many circRNAs selected, skipping BLAST step.")

        if self.no_blast:
            print("User disabled BLAST search, skipping.")

        run_blast = 0

        # check if we have to blast
        if not self.no_blast and blast_input_file:

            try:
                print("Sending " + str(len(blast_object_cache)) +
                      " primers to BLAST")
                print("This may take a few minutes, please be patient.")
                result_handle = self.call_blast(blast_input_file,
                                                self.organism)
                run_blast = 1
            except Exception as exc:
                print(exc)
                exit(-1)

            with open(blast_xml_tmp, "w") as out_handle:
                out_handle.write(result_handle.read())

            result_handle.close()
            result_handle = open(blast_xml_tmp)

            blast_records = NCBIXML.parse(result_handle)

            for blast_record in blast_records:

                if blast_record.query not in blast_result_cache:
                    blast_result_cache[blast_record.query] = []

                for description in blast_record.descriptions:

                    # filter out the host gene we're in now
                    # also filter out all "PREDICTED" stuff
                    if description.title.find(primer_to_circ_cache[blast_record.query]) == -1 and\
                            description.title.find("PREDICTED") == -1:
                        blast_result_cache[blast_record.query].append(
                            description.title)

        # if we encounter NAs nothing has been blasted, we manually set the values now

        blast_result_cache["NA"] = ["Not blasted, no primer pair found"]

        primex_data_with_blast_results = ""

        for line in script_result.splitlines():
            entry = line.split('\t')

            # split up the identifier for final plotting
            line = line.replace("_", "\t")

            if run_blast == 1:
                left_result = "No hits"
                right_result = "No hits"
            else:
                left_result = "Not blasted, no primer pair found"
                right_result = left_result

            if entry[1] in blast_result_cache:
                left_result = ";".join(blast_result_cache[entry[1]])

            if entry[2] in blast_result_cache:
                right_result = ";".join(blast_result_cache[entry[2]])

            # update line
            primex_data_with_blast_results += line + "\t" + left_result + "\t" + right_result + "\n"

        with open(blast_storage_tmp, 'w') as data_store:
            data_store.write(primex_data_with_blast_results)

        # need to define path top R wrapper
        primer_script = 'circtools_primex_formatter.R'

        # ------------------------------------ run script and check output -----------------------

        primex_data_formatted = os.popen(primer_script + " " +
                                         blast_storage_tmp + " " + "\"" +
                                         self.experiment_title + "\"").read()

        with open(output_html_file, 'w') as data_store:
            data_store.write(primex_data_formatted)

        print("Writing results to " + output_html_file)

        # here we create the circular graphics for primer visualisation
        for line in primex_data_with_blast_results.splitlines():
            entry = line.split('\t')

            # no primers, no graphics
            if entry[6] == "NA":
                continue

            circular_rna_id = "_".join(
                [entry[0], entry[1], entry[2], entry[3], entry[4]])

            if circular_rna_id in exon_cache:

                circular_rna_id_isoform = circular_rna_id + "_" + entry[5]

                circrna_length = int(entry[3]) - int(entry[2])

                exon1_length = len(exon_cache[circular_rna_id][1])
                exon2_length = len(exon_cache[circular_rna_id][2])

                exon2_colour = "#ffac68"

                if exon2_length == 0:
                    exon1_length = int(
                        len(exon_cache[circular_rna_id][1]) / 2) + 1
                    exon2_length = int(len(exon_cache[circular_rna_id][1]) / 2)
                    exon2_colour = "#ff6877"

                forward_primer_start = int(
                    entry[8].split(',')[0]) + circrna_length - exon2_length
                forward_primer_length = int(entry[8].split(',')[1])

                reverse_primer_start = int(
                    entry[9].split(',')[0]) - exon2_length
                reverse_primer_length = int(entry[9].split(',')[1])

                product_size = entry[14]

                gdd = GenomeDiagram.Diagram('circRNA primer diagram')
                gdt_features = gdd.new_track(
                    1,
                    greytrack=True,
                    name="",
                )
                gds_features = gdt_features.new_set()

                feature = SeqFeature(FeatureLocation(0, exon1_length),
                                     strand=+1)
                gds_features.add_feature(feature,
                                         name="Exon 1",
                                         label=False,
                                         color="#ff6877",
                                         label_size=22)

                feature = SeqFeature(FeatureLocation(
                    circrna_length - exon2_length, circrna_length),
                                     strand=+1)
                gds_features.add_feature(feature,
                                         name="Exon 2",
                                         label=False,
                                         color=exon2_colour,
                                         label_size=22)

                feature = SeqFeature(FeatureLocation(forward_primer_start,
                                                     circrna_length),
                                     strand=-1)
                gds_features.add_feature(feature,
                                         name="Product",
                                         label=False,
                                         color="#6881ff")

                feature = SeqFeature(FeatureLocation(0, reverse_primer_start),
                                     strand=-1)
                gds_features.add_feature(feature,
                                         name="Product: " + product_size +
                                         "bp",
                                         label=False,
                                         color="#6881ff",
                                         label_size=22,
                                         label_position="middle")

                if self.junction == "f":

                    feature = SeqFeature(FeatureLocation(
                        reverse_primer_start - reverse_primer_length,
                        reverse_primer_start),
                                         strand=-1)
                    gds_features.add_feature(feature,
                                             name="Reverse",
                                             label=False,
                                             sigil="BIGARROW",
                                             color="#75ff68",
                                             arrowshaft_height=0.3,
                                             arrowhead_length=0.1,
                                             label_size=22)

                    # the primer spans the BSJ, therefore we have to draw it in two pieces:
                    # piece 1: primer start to circRNA end
                    # piece 2: remaining primer portion beginning from 0

                    # piece 1:
                    feature = SeqFeature(
                        FeatureLocation(forward_primer_start, circrna_length))
                    gds_features.add_feature(feature,
                                             name="Forward",
                                             label=False,
                                             sigil="BIGARROW",
                                             color="#75ff68",
                                             arrowshaft_height=0.3,
                                             arrowhead_length=0.1,
                                             label_size=22)

                    # piece 2:
                    feature = SeqFeature(
                        FeatureLocation(
                            0, forward_primer_length -
                            (circrna_length - forward_primer_start)))
                    gds_features.add_feature(feature,
                                             name="Forward",
                                             label=False,
                                             sigil="BIGARROW",
                                             color="#75ff68",
                                             arrowshaft_height=0.3,
                                             arrowhead_length=0.1,
                                             label_size=22)
                elif self.junction == "r":
                    # the primer spans the BSJ, therefore we have to draw it in two pieces:
                    # piece 1: primer start of circRNA to circRNA end
                    # piece 2: remaining primer portion beginning from 0

                    # piece 1:
                    feature = SeqFeature(FeatureLocation(
                        circrna_length - reverse_primer_start, circrna_length),
                                         strand=-1)
                    gds_features.add_feature(feature,
                                             name="Reverse",
                                             label=False,
                                             sigil="BIGARROW",
                                             color="#75ff68",
                                             arrowshaft_height=0.3,
                                             arrowhead_length=0.1,
                                             label_size=22)

                    # piece 2:
                    feature = SeqFeature(FeatureLocation(
                        0, reverse_primer_start),
                                         strand=-1)
                    gds_features.add_feature(feature,
                                             name="Reverse",
                                             label=False,
                                             sigil="BIGARROW",
                                             color="#75ff68",
                                             arrowshaft_height=0.3,
                                             arrowhead_length=0.1,
                                             label_size=22)

                    feature = SeqFeature(
                        FeatureLocation(
                            forward_primer_start,
                            forward_primer_start + forward_primer_length))
                    gds_features.add_feature(feature,
                                             name="Forward",
                                             label=False,
                                             sigil="BIGARROW",
                                             color="#75ff68",
                                             arrowshaft_height=0.3,
                                             arrowhead_length=0.1,
                                             label_size=22)
                else:
                    feature = SeqFeature(FeatureLocation(
                        reverse_primer_start - reverse_primer_length,
                        reverse_primer_start),
                                         strand=-1)
                    gds_features.add_feature(feature,
                                             name="Reverse",
                                             label=False,
                                             sigil="BIGARROW",
                                             color="#75ff68",
                                             arrowshaft_height=0.3,
                                             arrowhead_length=0.1,
                                             label_size=22)

                    feature = SeqFeature(
                        FeatureLocation(
                            forward_primer_start,
                            forward_primer_start + forward_primer_length))
                    gds_features.add_feature(feature,
                                             name="Forward",
                                             label=False,
                                             sigil="BIGARROW",
                                             color="#75ff68",
                                             arrowshaft_height=0.3,
                                             arrowhead_length=0.1,
                                             label_size=22)

                feature = SeqFeature(FeatureLocation(0, 1))
                gds_features.add_feature(feature,
                                         name="BSJ",
                                         label=True,
                                         color="white",
                                         label_size=22)

                if circular_rna_id in flanking_exon_cache:
                    for exon in flanking_exon_cache[circular_rna_id]:
                        exon_start, exon_stop = exon.split('_')

                        exon_start = int(exon_start) - int(entry[2])
                        exon_stop = int(exon_stop) - int(entry[2])

                        feature = SeqFeature(FeatureLocation(
                            exon_start, exon_stop),
                                             strand=+1)
                        gds_features.add_feature(feature,
                                                 name="Exon",
                                                 label=False,
                                                 color="grey",
                                                 label_size=22)

                gdd.draw(format='circular',
                         pagesize=(600, 600),
                         circle_core=0.6,
                         track_size=0.3,
                         tracklines=0,
                         x=0.00,
                         y=0.00,
                         start=0,
                         end=circrna_length - 1)

                gdd.write(
                    self.output_dir + "/" + circular_rna_id_isoform + ".svg",
                    "svg")
Example #25
0
    def write_schemadelica_plot(self, path='./'):
        logger.info('Writing plot')
        gd_diagram = GenomeDiagram.Diagram("Primer Scheme", track_size=1)
        scale_track = GenomeDiagram.Track(name='scale',
                                          scale=True,
                                          scale_fontsize=10,
                                          scale_largetick_interval=1000,
                                          height=0.1)
        gd_diagram.add_track(scale_track, 2)

        primer_feature_set_1 = GenomeDiagram.FeatureSet()
        primer_feature_set_2 = GenomeDiagram.FeatureSet()

        for r in self.regions:
            cols1 = [
                self.primary_reference.id, r.top_pair.left.start,
                r.top_pair.left.end, r.top_pair.left.name, r.pool
            ]
            cols2 = [
                self.primary_reference.id, r.top_pair.right.end,
                r.top_pair.right.start, r.top_pair.right.name, r.pool
            ]
            region = str(r.region_num)
            fwd_feature = SeqFeature(
                FeatureLocation(int(cols1[1]), int(cols1[2]), strand=0))
            rev_feature = SeqFeature(
                FeatureLocation(int(cols2[1]), int(cols2[2]), strand=0))
            region_feature = SeqFeature(
                FeatureLocation(int(cols1[1]), int(cols2[2]), strand=0))
            if int(region) % 2 == 0:
                primer_feature_set_1.add_feature(region_feature,
                                                 color=colors.palevioletred,
                                                 name=region,
                                                 label=True,
                                                 label_size=10,
                                                 label_position="middle",
                                                 label_angle=0)
                primer_feature_set_1.add_feature(fwd_feature,
                                                 color=colors.red,
                                                 name=region,
                                                 label=False)
                primer_feature_set_1.add_feature(rev_feature,
                                                 color=colors.red,
                                                 name=region,
                                                 label=False)
            else:
                primer_feature_set_2.add_feature(region_feature,
                                                 color=colors.palevioletred,
                                                 name=region,
                                                 label=True,
                                                 label_size=10,
                                                 label_position="middle",
                                                 label_angle=0)
                primer_feature_set_2.add_feature(fwd_feature,
                                                 color=colors.red,
                                                 name=region,
                                                 label=False)
                primer_feature_set_2.add_feature(rev_feature,
                                                 color=colors.red,
                                                 name=region,
                                                 label=False)

        primer_track = GenomeDiagram.Track(name="Annotated Features",
                                           height=0.1)
        primer_track.add_set(primer_feature_set_1)
        gd_diagram.add_track(primer_track, 4)

        primer_track = GenomeDiagram.Track(name="Annotated Features",
                                           height=0.1)
        primer_track.add_set(primer_feature_set_2)
        gd_diagram.add_track(primer_track, 6)

        rows = max(2, int(round(len(self.primary_reference) / 10000.0)))
        gd_diagram.draw(format='linear',
                        pagesize=(300 * rows, 200 * rows),
                        fragments=rows,
                        start=0,
                        end=len(self.primary_reference))

        png_filepath = os.path.join(path, '{}.png'.format(self.prefix))
        pdf_filepath = os.path.join(path, '{}.pdf'.format(self.prefix))
        svg_filepath = os.path.join(path, '{}.svg'.format(self.prefix))
        gd_diagram.write(png_filepath, 'PNG', dpi=300)
        gd_diagram.write(pdf_filepath, 'PDF', dpi=300)
        gd_diagram.write(svg_filepath, 'SVG', dpi=300)
Example #26
0
    def export_plasmidmap(gbfile, filename=None):
        """ Export Linear and Circular Plasmid Map for the imported GenBank file.

        :param gbfile: a genbank file in .gb format or the path the file if not in the
            same folder.
        :type gbfile: str
        :param filename: the filenames/path to the filenames for the linear and
            circular plasmids in tuple
        :type filename: tuple, optional
        :return: the version from the genbank file
        :rtype: str
        """

        record = SeqIO.read(gbfile, "genbank")

        gd_diagram = GenomeDiagram.Diagram(record.id)
        gd_track_for_features = gd_diagram.new_track(1,
                                                     name="Annotated Features")
        gd_feature_set = gd_track_for_features.new_set()

        for feature in record.features:
            if feature.type == "primer" or (feature.type == "misc_feature"):
                continue
            #            if (feature.type != "CDS"):
            #                # Exclude this feature
            #                continue
            if len(gd_feature_set) % 2 == 0:
                color = colors.lightblue

            else:
                color = colors.blue

            gd_feature_set.add_feature(
                feature,
                sigil="ARROW",
                color=color,
                label_size=12,
                label_angle=0,
                label=True,
            )

        # Draw Linear map from genbank
        gd_diagram.draw(
            format="linear",
            orientation="landscape",
            pagesize="A4",
            fragments=4,
            start=0,
            end=len(record),
        )
        if filename is None:
            linfile = "plasmid_linear.png"
            circfile = "plasmid_circular.png"
        else:
            linfile = filename[0]
            circfile = filename[1]

        gd_diagram.write(linfile, "PNG")

        # Draw circular map from genbank
        gd_diagram.draw(
            format="circular",
            circular=True,
            pagesize=(25 * cm, 20 * cm),  # pagesize=(35 * cm, 30 * cm),
            start=0,
            end=len(record),
            circle_core=0.5,
        )
        # gd_diagram.write("plasmid_circular.pdf", "PDF")
        gd_diagram.write(circfile, "PNG")

        return record.id
Example #27
0
    scaffold2orf2coord[ scaffold ][orf] = (int(start),int(end),int(strand))
    print(scaffold2orf2coord[ scaffold ][orf])
file.close()



from reportlab.lib import colors
from reportlab.lib.units import cm
from Bio.Graphics import GenomeDiagram
from Bio import SeqIO


for scaffold,orf2coord in scaffold2orf2coord.items() :
    print(scaffold)
    scaffold_filename = scaffold+'_genomicDiagram.pdf'
    gd_diagram = GenomeDiagram.Diagram("a new type of nitrogenase")
    gd_track_for_features = gd_diagram.new_track(1, name="Annotated Features")
    gd_feature_set = gd_track_for_features.new_set()

    
    # print('\t'+str(scaffold2max[scaffold]))
    # print('\t'+str(scaffold2min[scaffold]))

    for orf,coord in orf2coord.items() :
        start = coord[0]
        end = coord[1]
        strand = coord[2]
        feature = SeqFeature( FeatureLocation( int( int(start) ) , int( int(end) ) ) , strand=int(strand), type = 'CDS' )
        if orf in orf2color :
            color = orf2color[orf]
        else:
from Bio.Graphics import GenomeDiagram
from reportlab.lib import colors
from reportlab.lib.units import cm
from Bio import SeqIO, SeqFeature
from Bio.SeqFeature import SeqFeature, FeatureLocation

#this code is from https://biopython-tutorial.readthedocs.io/en/latest/notebooks/17%20-%20Graphics%20including%20GenomeDiagram.html
#I used it, and that web page, to learn how to use GenomeDiagram from Biopython
#define parameters of drawing
gdd = GenomeDiagram.Diagram('Test Diagram')
gdt_features = gdd.new_track(1, greytrack=False)
gds_features = gdt_features.new_set()

#Add three features to show the strand options,
feature = SeqFeature(FeatureLocation(25, 125), strand=+1)
gds_features.add_feature(feature, name="Forward", label=True, sigil="ARROW")
feature = SeqFeature(FeatureLocation(150, 250), strand=None)
gds_features.add_feature(feature, name="Strandless", label=True, sigil="ARROW")
feature = SeqFeature(FeatureLocation(275, 375), strand=-1)
gds_features.add_feature(feature, name="Reverse", label=True, sigil="ARROW")

#draw and save diagram
gdd.draw(format='linear',
         pagesize=(15 * cm, 4 * cm),
         fragments=1,
         start=0,
         end=400)
gdd.write("GD_labels_default.png", "png")
Example #29
0
# BLAST
SeqIO.convert(A, "genbank", A + ".fasta", "fasta")
SeqIO.convert(B, "genbank", B + ".fasta", "fasta")


comando_blastn = NcbiblastnCommandline( \
query=A+".fasta", subject=B+".fasta", \
outfmt='6 qstart qend sstart send pident',\
out="blast_"+A+"_"+B+".txt")
stdout, stderr = comando_blastn()
blast = open("blast_" + A + "_" + B + ".txt")

# Iniciando a figura
name = A + "_" + B
gd = GenomeDiagram.Diagram(name)

gA = gd.new_track(1,name="A",height=0.5, \
start=0,end=len(A1))
gA1 = gA.new_set()
gB = gd.new_track(3,name="B",height=0.5, \
start=0,end=len(B1))
gB1 = gB.new_set()

# Cores CDSs - intercalado
c1 = "#79B134"
c2 = "#8DE91D"

# Colore um quadrado para cada CDS do arquivo A
cont = 1
for i in A1.features:
Example #30
0
p5 = []
p6 = []
for p,pp in zip(p1,p2):
    for ppp,pppp in zip(p3,p4):
        if ppp < p < pppp:
            p5.append(p)
            p6.append(pppp)
        elif p < ppp < pp:
            p5.append(ppp)
            p6.append(pp)
                
            
for p,pp, i in zip(p5,p6,range(1,len(p5)+1)):
    record.features.append(SeqFeature(location = FeatureLocation(p, pp, strand = None), type = 'overlap', id = 'o{}'.format(i)))

gd_diagram = GenomeDiagram.Diagram("Promoter region with GRE and TF binding")
gd_track_for_features = gd_diagram.new_track(1, name="GREs and TFBS")
gd_feature_set = gd_track_for_features.new_set()

for feature in record.features:
    if feature.type == "GRE":
        color = colors.green
        gd_feature_set.add_feature(feature, color = color, label = feature.id, label_size=12, label_angle=20, sigil = "OCTO")
    if feature.type == "TF":
        color = colors.red
        gd_feature_set.add_feature(feature, color = color, label = feature.id, label_size=12, label_angle=20, sigil = "OCTO")
#    else:
#        gd_feature_set.add_feature(feature, color = colors.yellow, label = False)
    
gd_diagram.draw(format="linear", orientation="landscape", pagesize=(300,80),
                fragments=1, start=0, end=len(record))