def _compose_crosslink(self, index, result, features1, features2): for f1, f2, col in result: if f1 is None: continue if self.no_color: col = colors.color2bw(col) tf1 = features1.add_feature(f1, color=col, border=col) tf2 = features2.add_feature(f2, color=col, border=col) self.diagram.cross_track_links.append(CrossLink(tf1, tf2, col, col))
flip = not flip q_start, q_end = q_end, q_start if s_start > s_end: flip = not flip s_start, s_end = s_end, s_start if flip: c = colors.Color(0, 0, 1, alpha=0.25) b = False else: c = colors.Color(1, 0, 0, alpha=0.25) b = False q_feature = q_set.add_feature(SeqFeature(FeatureLocation(q_start-1, q_end)), color=c, border=b) s_feature = s_set.add_feature(SeqFeature(FeatureLocation(s_start-1, s_end)), color=c, border=b) gd_diagram.cross_track_links.append(CrossLink(q_feature, s_feature, c, b)) #NOTE: We are using the same colour for all the matches, #with transparency. This means overlayed matches will appear darker. #It also means the drawing order not very important. #Note ACT puts long hits at the back, and colours by hit score handle.close() print("Drawing CDS features...") for f, format in genomes: record = records[f] feature_set = feature_sets[f] #Mark the CDS features for cds in record.features: if cds.type != "CDS": continue feature_set.add_feature(cds, sigil="ARROW",
if flip: c = colors.Color(0, 0, 1, alpha=0.25) b = False else: c = colors.Color(1, 0, 0, alpha=0.25) b = False q_feature = q_set.add_feature(SeqFeature( FeatureLocation(q_start - 1, q_end)), color=c, border=b) s_feature = s_set.add_feature(SeqFeature( FeatureLocation(s_start - 1, s_end)), color=c, border=b) gd_diagram.cross_track_links.append( CrossLink(q_feature, s_feature, c, b)) #NOTE: We are using the same colour for all the matches, #with transparency. This means overlayed matches will appear darker. #It also means the drawing order not very important. #Note ACT puts long hits at the back, and colours by hit score handle.close() print("Drawing CDS features...") for f, format in genomes: record = records[f] feature_set = feature_sets[f] #Mark the CDS features for cds in record.features: if cds.type != "CDS": continue feature_set.add_feature(cds,
def crosslinks(fileName, GenBank_1, GenBank_2): gd_diagram = GenomeDiagram.Diagram(fileName) max_len = 0 #Open Files and create fasta files to be compared by Ublast A_rec = SeqIO.read(GenBank_1, 'gb') GB_file_name = GenBank_1 fasta_file_name_A = GB_file_name[:-3] + ".fasta" writeFasta(GB_file_name, fasta_file_name_A) B_rec = SeqIO.read(GenBank_2, 'gb') GB_file_name = GenBank_2 fasta_file_name_B = GB_file_name[:-3] + ".fasta" writeFasta(GB_file_name, fasta_file_name_B) #create the tab file with the Ublast output blastfile = ublastfeatures(fasta_file_name_A, fasta_file_name_B) Gname = 'nn' #name of gene to add #First section gets the crosslinks from the blast files A_vs_B = getCrossLinks(blastfile) #print ('(percent, Gene Query, Gene result)')#This prints the list of Blast results for reference for item in A_vs_B: print item # asks user for a gene name to highlight gene_search = raw_input( "would you like to highlight a specific gene name?\n \t1) Yes\n \t2) No\n" ) if gene_search == "1" or gene_search.lower() == "yes": gene_highlight = raw_input( "What is the name of the gene you would like to highlight?\n") print gene_highlight + " will be printed in red on the genome diagram, all other genes will be grey" C_colors = [yellow] * 1 + [orange] * 1 + [brown] * 1 + [ lightblue ] * 1 + [purple] * 1 + [green] * 1 + [grey] * 1 else: gene_highlight = "NONE" C_colors = [yellow] * 1 + [ orange ] * 1 + [brown] * 1 + [lightblue] * 1 + [purple] * 1 + [green] * 1 + [ grey ] * 1 #this creates an array of color for the arrows in the GUI i = 0 #index of random color to add geneColor = grey #color of gene. Grey= no name # Create new features for concatenations recs = ("A", "B") for rec in recs: if rec == "A": for loc_a in re.finditer('NNNNNCACACACTTAATTAATTAAGTGTGTGNNNNN', str(A_rec.seq)): concat_feature = SeqFeature(FeatureLocation(loc_a.start(), loc_a.start() + 35, strand=-1), id="Concat", type="CDS", qualifiers={'product': 'Concat'}) A_rec.features.append(concat_feature) else: for loc_b in re.finditer('NNNNNCACACACTTAATTAATTAAGTGTGTGNNNNN', str(B_rec.seq)): concat_feature = SeqFeature(FeatureLocation(loc_b.start(), loc_b.start() + 35, strand=-1), id="Concat", type="CDS", qualifiers={'product': 'Concat'}) B_rec.features.append(concat_feature) #Read in lists of gene names and types with open('Backbones_2_Clean.csv', 'r') as hand1: back_b = csv.reader(hand1) backbone = list(back_b) with open('AntibioticResistanceGenesClean.csv', 'r') as hand2: AnRe = csv.reader(hand2) An_Re = list(AnRe) #this loop adds each gene feature to the record with a color and name for record, gene_colors in zip([A_rec, B_rec], [C_colors, C_colors]): max_len = max(max_len, len(record)) gd_track_for_features = gd_diagram.new_track(1, name=record.name, greytrack=True, start=0, end=len(record)) gd_feature_set = gd_track_for_features.new_set() for feature in record.features: if feature.type != "CDS": #Exclude this feature continue ## Chose Colors of annotations based on gene name try: Gname = feature.qualifiers['product'][0] if Gname == gene_highlight: geneColor = red elif gene_highlight == "NONE": geneColor = gene_colors[i % 6] # Backbone genes elif Gname in backbone[0]: geneColor = blue #Antibiotic Resistance Genes elif Gname in An_Re[0]: geneColor = green #Transposease & Intergrase elif Gname == 'Tnp' or Gname == 'Int': geneColor = orange else: geneColor = grey except KeyError: #if no gene name make it grey Gname = 'No Name' geneColor = grey gd_feature_set.add_feature( feature, sigil="BIGARROW", #this adds gene features to gd_feature_set arrowhead_length=.25, color=geneColor, label=True, name=Gname, label_position="start", label_size=6, label_angle=45) i += 1 #increment i so that arrows will have a random color track_X = gd_diagram.tracks[2] track_Y = gd_diagram.tracks[1] #this loop adds the cross links so they point to their feature in the diagram for score, id_X, id_Y in A_vs_B: try: feature_X = get_feature(A_rec.features, id_X) feature_Y = get_feature(B_rec.features, id_Y) color = colors.linearlyInterpolatedColor(colors.white, colors.firebrick, 0, 100, score) link_xy = CrossLink( (track_X, feature_X.location.start, feature_X.location.end), (track_Y, feature_Y.location.start, feature_Y.location.end), color, colors.lightgrey) print "Link made " gd_diagram.cross_track_links.append(link_xy) except KeyError: print "Feature qualifier for crosslink not found" # for those pesky nameless genes gd_diagram.draw(format="linear", pagesize=(1200, 2400), fragments=1, start=0, end=max_len) print max_len gd_diagram.write(fileName + ".pdf", "PDF")
break # ========================================================================== # Build links # ========================================================================== for id, link in links.iteritems(): if id in merged_links: continue feat1 = features[link["fid1"]] feat2 = features[link["fid2"]] ''' if "NapDC1_B04" not in name_mapping[feat1["cid"]] and "NapDC1_B04" not in name_mapping[feat2["cid"]]: continue ''' color = colors.Color(50, 50, 50, alpha=0) gd_link = CrossLink((link["track1"], link["start1"], link["end1"]), (link["track2"], link["start2"], link["end2"]), color, colors.lightgrey, feat1["dir"] != feat2["dir"]) gd_diagram.cross_track_links.append(gd_link) print "Number of links: " + str(len(gd_diagram.cross_track_links)) # ========================================================================== # Write to file # ========================================================================== gd_diagram.draw(format="linear", pagesize="A1") gd_diagram.write(mapping_filename + ".svg", output="SVG", dpi=600)
alpha=(hsp.ident_pct / 200.0)) loc = FeatureLocation(offset + hsp.query_start, offset + hsp.query_end, strand=0) q = gd_contig_features.add_feature(SeqFeature(loc), color=color, border=border) r_offset = ref_offsets[hack_ncbi_fasta_name(hsp.hit_id)] loc = FeatureLocation(r_offset + hsp.hit_start, r_offset + hsp.hit_end, strand=0) h = gd_record_features.add_feature(SeqFeature(loc), color=color, border=border) gd_diagram.cross_track_links.append( CrossLink(q, h, color, border, flip)) #Now add the unmatched contigs on outside position = 0 gd_contig_features = None unplaced = 0 for contig in SeqIO.parse(assembly_fasta, "fasta"): contig_id = contig.id if contig_id in contigs_shown: continue #print("Adding unmapped contig %s (len %i bp), offset now %i" % (contig_id, contig_len, position)) unplaced += 1 contig_len = len(contig) if output_fasta: if min_len <= contig_len: fasta_handle.write(contigs.get_raw(contig_id))
gB1.add_feature(i, label=False,\ label_position="start",color=color_atual) # Marca na figura os trechos sintenicos for b in blast: qstart = int(b.split("\t")[0]) qend = int(b.split("\t")[1]) sstart = int(b.split("\t")[2]) send = int(b.split("\t")[3]) identidade = (float(b.split("\t")[4])) / 100 #identidade = (float(b.split("\t")[4])*0.8)/100 # Detectando inversoes qinv = qend - qstart sinv = send - sstart if (qinv > 0 and sinv > 0) or \ (qinv < 0 and sinv < 0): cor = colors.Color\ (1,.341176,.341176,identidade) else: cor = colors.firebrick if identidade >= 1: gd.cross_track_links.append(CrossLink((gA, \ qstart, qend),(gB, sstart, send),color=cor)) gd.draw(format="linear", pagesize=(8*cm,29.7*cm), \ fragments=1) gd.write(name + ".pdf", "PDF")
def draw_alignment(self, _gdd, query_id, query_length, blast_hits): # draw reference gdt_features = _gdd.new_track(1, greytrack=False, start=1, end=self.ref_length) gds_features = gdt_features.new_set() max_length = 0 for annot in self.annotation: if annot[-1] == '1': strand = 1 elif annot[-1] == '-1': strand = -1 else: strand = None start, end = int(annot[0]), int(annot[1]) if end > max_length: max_length = end feature = SeqFeature(FeatureLocation(start, end), strand=strand, id=annot[2]) gds_features.add_feature(feature, label=True, name=annot[2], sigil='BOX', label_size=14, label_angle=0, arrowhead_length=0.1, arrowshaft_height=1) gds_features.add_feature(SeqFeature( FeatureLocation(self.start, self.start + 1)), label=True, name='PCR start', label_size=14, color='black') _gdd.draw(format='linear', fragments=1, start=1, end=max_length) # draw query sequence gdt_features = _gdd.new_track(1, greytrack=True, start=0, end=query_length, name=query_id) gds_features = gdt_features.new_set() for r in blast_hits: feature = SeqFeature(FeatureLocation(r[0], r[1]), strand=r[-1]) gds_features.add_feature(feature, label=False, sigil='BOX') gdt_features.greytrack_fontcolor = colors.black gdt_features.greytrack_fontsize = 12 # draw cross link for r in blast_hits: alpha = self.scale_color(r[4]) color = colors.linearlyInterpolatedColor(colors.white, colors.firebrick, 0, 100, alpha) feature = SeqFeature(FeatureLocation(r[0], r[1]), strand=r[-1]) gds_features.add_feature(feature, label=False, sigil='BOX') if max(r[2], r[3]) < self.start: continue link_xy = CrossLink((_gdd.tracks[1], r[0], r[1]), (_gdd.tracks[2], r[2], r[3]), color) _gdd.cross_track_links.append(link_xy) _gdd.draw(format='linear', fragments=1)
def plot_multiple_regions_crosslink2(target_protein_list, region_record_list, plasmid_list, out_name): gd_diagram = GenomeDiagram.Diagram("geomic_region") feature_sets = [] max_len = 0 records = dict((rec.name, rec) for rec in region_record_list) n_records = len(region_record_list) record_length = [len(record) for record in region_record_list] for i, record in enumerate(region_record_list): max_len = max(max_len, len(record)) #print "i", i #Allocate tracks 3 (top), 1 (bottom) for region 1 and 2 #(empty tracks 2 useful white space to emphasise the cross links #and also serve to make the tracks vertically more compressed) gd_track_for_features = gd_diagram.new_track( (2 * n_records - 1) - 2 * i, name=record.name, greytrack=True, height=0.5, start=0, end=len(record)) if record.name not in feature_sets: feature_sets.append(gd_track_for_features.new_set()) else: print("already in feature_sets!") print(record) quit for x in range(0, len(region_record_list) - 1): #print "x", x features_X = region_record_list[x].features features_Y = region_record_list[x + 1].features set_X = feature_sets[x] set_Y = feature_sets[x + 1] for feature_1 in features_X: if feature_1.type != "CDS": continue for feature_2 in features_Y: if feature_2.type != "CDS": continue try: group1 = feature_1.qualifiers["orthogroup"][0] group2 = feature_2.qualifiers["orthogroup"][0] except: group1 = "one_singleton" group2 = "two_singleton" if group1 == group2: border = colors.lightgrey color = colors.lightgrey F_x = set_X.add_feature(SeqFeature( FeatureLocation(feature_1.location.start, feature_1.location.end, strand=0)), color=color, border=border) F_y = set_Y.add_feature(SeqFeature( FeatureLocation(feature_2.location.start, feature_2.location.end, strand=0)), color=color, border=border) gd_diagram.cross_track_links.append( CrossLink(F_x, F_y, color, border)) #for x in range(0,len(region_record_list)-1): x = 0 for n, record in enumerate(region_record_list): gd_feature_set = feature_sets[n] i = 0 if plasmid_list[x]: #print "PLASMID!!!" color1 = colors.HexColor('#2837B7') color2 = colors.blue else: color1 = colors.HexColor('#40F13A') color2 = colors.HexColor('#0F600C') for feature in record.features: if feature.type != "CDS": continue try: a = feature.qualifiers["locus_tag"] except: # cas des pseudogenes qui sont des CDS mais n'ont pas de protein ID continue if len(gd_feature_set) % 2 == 0: color = color1 else: color = color2 #try: # try: # group = protein_id2group[feature.qualifiers["protein_id"][0]] # except: # group = protein_id2group[feature.qualifiers["protein_id"][1]] #except: # # no group attributed: singleton => special color # color = colors.HexColor('#E104C0') for target_protein in target_protein_list: if target_protein in feature.qualifiers["locus_tag"]: #print "target prot!" color = colors.red gd_feature_set.add_feature(feature, sigil="ARROW", color=color, label=True, label_position="middle", label_strand=1, label_size=12, label_angle=45) i += 1 x += 1 #print "max", max_len #print "n records", len(region_record_list) if len(region_record_list) == 2: hauteur = 700 else: hauteur = 250 * len(region_record_list) largeur = max(record_length) / 30 #print "hauteur", hauteur #print "largeur", largeur #gd_diagram.set_page_size(, orientation) if hauteur > largeur: gd_diagram.draw(format="linear", pagesize=(hauteur, largeur), orientation='portrait', fragments=1, start=0, end=max_len) else: gd_diagram.draw(format="linear", pagesize=(hauteur, largeur), orientation='landscape', fragments=1, start=0, end=max_len) #print "writing diagram", out_name gd_diagram.write(out_name, "SVG")
track_x_name = name_for_featureset.name track_x = name_for_featureset if name_for_featureset[feature_number].name == cross_link_feature_B and name_for_featureset.name != str(track_x_name): feature_y = name_for_featureset[feature_number] track_y_name = name_for_featureset.name track_y = name_for_featureset color = colors.linearlyInterpolatedColor(colors.white, colors.firebrick, 0, 100, score) border = colors.lightgrey # link_xy = CrossLink((track_x, feature_x.location.start, feature_x.location.end), # (track_y, feature_y.location.start, feature_y.location.end), # color, colors.lightgrey) # gd_diagram.cross_track_links.append(link_xy) gd_diagram.cross_track_links.append(CrossLink(feature_x, feature_y, color, border)) print('MATCH') i += 1 #print (len(name_for_featureset)) #for score, nameA, nameB in A_vs_B: #if name_for_featureset.name == nameA: # print(score) # print(nameA) #print(name_for_featureset.features[4].name) #for score, x, y in A_vs_B: #color = colors.linearlyInterpolatedColor(colors.white, colors.firebrick, 0, 100, 50) # border = colors.lightgrey
def plot_multiple_regions_crosslink(target_protein_list, region_record_list, plasmid_list, out_name, biodb_name="chlamydia_03_15", color_locus_list=[], flip_record_based_on_first=True, color_orthogroup_list=[]): import matplotlib.cm as cm from matplotlib.colors import rgb2hex import matplotlib as mpl import MySQLdb import os sqlpsw = os.environ['SQLPSW'] norm = mpl.colors.Normalize(vmin=-30, vmax=100) cmap = cm.Blues m = cm.ScalarMappable(norm=norm, cmap=cmap) conn = MySQLdb.connect( host="127.0.0.1", # your host, usually localhost user="******", # your username passwd=sqlpsw, # your password db="orth_%s" % biodb_name) # name of the data base cursor = conn.cursor() gd_diagram = GenomeDiagram.Diagram("geomic_region") feature_sets = [] max_len = 0 records = dict((rec.name, rec) for rec in region_record_list) n_records = len(region_record_list) record_length = [len(record) for record in region_record_list] if flip_record_based_on_first: region_record_list_flip = [region_record_list[0]] region_record_list_flip[0].name = region_record_list_flip[ 0].description for x in range(0, len(region_record_list) - 1): same_strand_count = 0 different_strand_count = 0 features_X = region_record_list[x].features features_Y = region_record_list[x + 1].features for feature_1 in features_X: if feature_1.type != "CDS": continue for feature_2 in features_Y: if feature_2.type != "CDS": continue try: group1 = feature_1.qualifiers["orthogroup"][0] group2 = feature_2.qualifiers["orthogroup"][0] if group1 == group2: strand1 = feature_1.location.strand strand2 = feature_2.location.strand if strand1 == strand2: same_strand_count += 1 else: different_strand_count += 1 except: pass if different_strand_count > same_strand_count: region_record_list[x + 1] = region_record_list[ x + 1].reverse_complement( id=region_record_list[x + 1].id, name=region_record_list[x + 1].description) else: region_record_list[x + 1].name = region_record_list[x + 1].description #region_record_list = region_record_list_flip for i, record in enumerate(region_record_list): max_len = max(max_len, len(record)) #Allocate tracks 3 (top), 1 (bottom) for region 1 and 2 #(empty tracks 2 useful white space to emphasise the cross links #and also serve to make the tracks vertically more compressed) gd_track_for_features = gd_diagram.new_track( (1 * n_records - 1) - 1 * i, name=record.name, greytrack=True, height=0.4, start=0, end=len(record)) if record.name not in feature_sets: feature_sets.append(gd_track_for_features.new_set()) else: print("already in feature_sets!") print(record) quit #print 'looping....' for x in range(0, len(region_record_list) - 1): features_X = region_record_list[x].features features_Y = region_record_list[x + 1].features set_X = feature_sets[x] set_Y = feature_sets[x + 1] for feature_1 in features_X: if feature_1.type != "CDS": continue for feature_2 in features_Y: if feature_2.type != "CDS": continue try: group1 = feature_1.qualifiers["orthogroup"][0] group2 = feature_2.qualifiers["orthogroup"][0] except: group1 = "one_singleton" group2 = "two_singleton" if group1 == group2: border = colors.lightgrey color = colors.lightgrey try: identity = orthogroup_identity_db.check_identity( cursor, feature_1.qualifiers["orthogroup"][0], feature_1.qualifiers["locus_tag"][0], feature_2.qualifiers["locus_tag"][0]) except: identity = 0 print( "problem with identity table %s and locus %s %s" % (group1, feature_1.qualifiers["locus_tag"][0], feature_1.qualifiers["locus_tag"][0])) color2 = colors.HexColor( rgb2hex(m.to_rgba(float(identity)))) border2 = colors.HexColor( rgb2hex(m.to_rgba(float(identity)))) F_x = set_X.add_feature( SeqFeature( FeatureLocation(feature_1.location.start, feature_1.location.end, strand=0)), color=color, border=border, set_id=feature_1.qualifiers["locus_tag"]) F_y = set_Y.add_feature(SeqFeature( FeatureLocation(feature_2.location.start, feature_2.location.end, strand=0)), color=color, border=border) gd_diagram.cross_track_links.append( CrossLink(F_x, F_y, color2, border2)) #for x in range(0,len(region_record_list)-1): x = 0 all_locus = [] for n, record in enumerate(region_record_list): gd_feature_set = feature_sets[n] i = 0 if plasmid_list[x]: #print "PLASMID!!" color1 = colors.HexColor('#2837B7') color2 = colors.blue else: color1 = colors.HexColor('#40F13A') color2 = colors.HexColor('#0F600C') one_row_locus = [] for feature in record.features: if feature.type == "tblast_target": feature.name = 'match' gd_feature_set.add_feature(feature, sigil="BOX", color="#ff4a0c86", label=False, label_position="middle", label_size=25, label_angle=0) if feature.type == "assembly_gap": #print "gap", feature feature.location.strand = None gd_feature_set.add_feature(feature, sigil="BOX", color="red", label=True, label_position="middle", label_strand=1, label_size=14, label_angle=40) if feature.type == "rRNA": gd_feature_set.add_feature(feature, sigil="ARROW", color="orange", label=True, label_position="middle", label_strand=1, label_size=10, label_angle=40) try: one_row_locus.append(feature.qualifiers["locus_tag"][0]) except: pass if feature.type == "tRNA": gd_feature_set.add_feature(feature, sigil="ARROW", color="orange", label=True, label_position="middle", label_strand=1, label_size=10, label_angle=40) try: one_row_locus.append(feature.qualifiers["locus_tag"][0]) except: print('no locus tag for:') print(feature) if feature.type == "repeat_region": gd_feature_set.add_feature(feature, sigil="BOX", color="blue", label=True, label_position="middle", label_strand=1, label_size=14, label_angle=40) if 'pseudo' in feature.qualifiers: gd_feature_set.add_feature(feature, sigil="OCTO", color="#6E6E6E", label=True, label_position="middle", label_strand=1, label_size=10, label_angle=40) elif feature.type != "CDS": continue else: try: a = feature.qualifiers["locus_tag"][0] except: # cas des pseudogenes qui sont des CDS mais n'ont pas de protein ID continue try: g = feature.qualifiers["orthogroup"][0] except: # cas des pseudogenes qui sont des CDS mais n'ont pas de protein ID continue if a in color_locus_list: #print '###########################', a, color_locus_list if len(gd_feature_set) % 2 == 0: color = colors.HexColor('#ca4700') else: color = colors.HexColor('#fd7a32') else: if len(gd_feature_set) % 2 == 0: color = color1 else: color = color2 if g in color_orthogroup_list: #print '###########################', a, color_locus_list if len(gd_feature_set) % 2 == 0: color = colors.HexColor('#ca4700') else: color = colors.HexColor('#fd7a32') else: if len(gd_feature_set) % 2 == 0: color = color1 else: color = color2 #try: # try: # group = protein_id2group[feature.qualifiers["protein_id"][0]] # except: # group = protein_id2group[feature.qualifiers["protein_id"][1]] #except: # # no group attributed: singleton => special color # color = colors.HexColor('#E104C0') for target_protein in target_protein_list: if target_protein in feature.qualifiers["locus_tag"]: #print "target prot!" color = colors.red gd_feature_set.add_feature(feature, sigil="ARROW", color=color, label=True, label_position="middle", label_strand=1, label_size=10, label_angle=40) i += 1 try: one_row_locus.append(feature.qualifiers["locus_tag"][0]) except: print('no locus tag for:') print(feature) all_locus = one_row_locus + all_locus x += 1 #print "max", max_len #print "n record", len(region_record_list) if len(region_record_list) == 2: hauteur = 300 else: hauteur = 150 * len(region_record_list) largeur = max(record_length) / 30 #print "hauteur", hauteur #print "largeur", largeur #gd_diagram.set_page_size(, orientation) if hauteur > largeur: gd_diagram.draw(format="linear", pagesize=(hauteur, largeur), orientation='portrait', fragments=1, start=0, end=max_len) else: gd_diagram.draw(format="linear", pagesize=(hauteur, largeur), orientation='landscape', fragments=1, start=0, end=max_len) #print "writing diagram", out_name #gd_diagram.write(out_name, "SVG") import io from chlamdb.plots import edit_svg svg_diagram = io.StringIO() gd_diagram.write(svg_diagram, "SVG") svg_diagram.flush() #gd_diagram with_links = edit_svg.edit_svg(svg_diagram.getvalue(), all_locus, biodb_name) with_links.write(out_name) png_name = out_name.split('.')[0] + '.png' #png_handle = open(png_name, 'w') #gd_diagram.write(png_handle, "PNG") #png_handle.close() try: cmd = 'chmod 444 %s' % out_name except: pass from chlamdb.biosqldb import shell_command #print cmd shell_command.shell_command(cmd) return all_locus
if name_for_featureset[ feature_number].name == cross_link_feature_B and name_for_featureset.name != str( track_x_name): features_y_link.insert(number, name_for_featureset[feature_number]) #feature_y_n = name_for_featureset[feature_number] track_y_name = name_for_featureset.name track_y = name_for_featureset print("Test FEATUREA 2: ") print(number) print(name_for_featureset[feature_number].name) print(features_y_link) for feature_x, feature_y in zip(features_x_link, features_y_link): #print("SCORE: " + str(count_feature)) score = A_vs_B[count_feature][0] color = colors.linearlyInterpolatedColor(colors.white, colors.firebrick, 0, 100, score) border = colors.lightgrey gd_diagram.cross_track_links.append( CrossLink(feature_x, feature_y, color, border)) count_feature += 1 i += 1 gd_diagram.draw(format='linear', pagesize='A4', fragments=1, start=0) gd_diagram.write(diagram_name + ".pdf", "pdf") gd_diagram.write(diagram_name + ".svg", "SVG")
print fname color = colors.Color(0.8, 0.8, 0.8, 0.9) border = colors.lightgrey with open("ch_comp.out") as f: for line in f: ls = line.split() id1 = ls[0][7:] if id1 in rnconv: id1 = rnconv[id1] id2 = ls[1] print(id1, id2) if id1 in rbars1 and id2 in rbars2: gd_diagram.cross_track_links.append( CrossLink(rbars1[id1], rbars2[id2], color, border)) for pair in [("rrs", "rrs"), ("rrl", "rrl")]: if pair[0] in rbars1 and pair[1] in rbars2: gd_diagram.cross_track_links.append( CrossLink(rbars1[pair[0]], rbars2[pair[1]], color, border)) print border gd_diagram.draw(format="linear", pagesize='A4', fragments=1) #gd_diagram.write(name + ".pdf", "PDF") gd_diagram.write(name + "_linear.png", "PNG") gd_diagram.write(name + "_linear.eps", "EPS") #gd_diagram.write(name + ".svg", "SVG")
sys_exit( "Could not find offset key %r for hit %r in dict (query id %r)" % (hack_ncbi_fasta_name( hsp.hit_id), hsp.hit_id, hsp.query_id)) else: sys_exit( "Could not find offset for hit %r in dict (query id %r)" % (hsp.hit_id, hsp.query_id)) loc = FeatureLocation(r_offset + hsp.hit_start, r_offset + hsp.hit_end, strand=0) hit = gd_ref_features.add_feature(SeqFeature(loc), color=color, border=border) gd_diagram.cross_track_links.append( CrossLink(query, hit, color, border, flip)) offset += SPACER + contig_len # Ready for next pairwise comparison, reference_fasta = assembly_fasta ref_offsets = contig_offsets gd_ref_features = gd_contig_features # Set size based on max track length? page = (2 * cm + 5 * cm * len(assemblies_fasta), 100 * cm * max_len / 5000000) gd_diagram.draw(format="linear", fragments=1, pagesize=page, start=0, end=max_len)
def GenomeMap(file, GenomeId, grid=10000, cross=True): # print(GenomeId) gd_diagram = GenomeDiagram.Diagram('phages') with open(file, 'r') as f: reader = csv.reader(f) data = list(reader) records = [] ref = {} for Id in GenomeId: try: record = SeqIO.read(Id + ".gb", "genbank") except FileNotFoundError or IOError or ValueError: hd = Entrez.efetch(db="nucleotide", id=Id, rettype='gb', retmode="text") record = SeqIO.read(hd, 'genbank') fw = open(Id + '.gb', 'w') SeqIO.write(record, fw, 'genbank') fw.close() os.getcwd() for i in SeqIO.parse(Id + ".gb", "genbank"): ref[Id] = i.annotations['keywords'] records.append(record) feature_sets = {} max_len = 0 for i, record in enumerate(records): max_len = max(max_len, len(record)) gd_track_for_features = gd_diagram.new_track( 5 - 2 * i, name=record.description, greytrack=True, greytrack_fontsize=16, greytrack_labels=1, largetick=True, smalltick=True, scale_ticks=True, scale_largeticks=0.5, scale_smallticks=0.1, scale_largetick_interval=grid, scale_smalltick_interval=grid / 20, scale_largetick_labels=True, start=0, end=len(record), ) assert record.name not in feature_sets feature_sets[record.id] = gd_track_for_features.new_set() for crosslink in data: if not cross: break set_X = feature_sets[crosslink[0].split(' ')[0]] set_Y = feature_sets[crosslink[1].split(' ')[0]] # 手动划分连接类型时使用 # score = 100 # try: # if crosslink[7] == 1 or crosslink[7] == -1: # score = 100 # except TypeError: # score = 50 if crosslink[0].split(' ')[0] in CLASS1 and crosslink[1].split( ' ')[0] in CLASS1: color = colors.linearlyInterpolatedColor( colors.green, colors.yellow, 0, len(GenomeId), GenomeId.index(crosslink[1].split(' ')[0])) elif crosslink[0].split(' ')[0] in CLASS2 and crosslink[1].split( ' ')[0] in CLASS2: color = colors.linearlyInterpolatedColor( colors.purple, colors.red, 0, len(GenomeId), GenomeId.index(crosslink[1].split(' ')[0])) else: color = colors.linearlyInterpolatedColor( colors.blue, colors.cyan, 0, len(GenomeId), GenomeId.index(crosslink[1].split(' ')[0])) # color = list(colors.getAllNamedColors().keys())[GenomeId.index(crosslink[1].split(' ')[0]) * 17 + 17 % 163] F_x = set_X.add_feature( SeqFeature( FeatureLocation(int(crosslink[2]), int(crosslink[3]), strand=0)), color=color, border=color, ) F_y = set_Y.add_feature( SeqFeature( FeatureLocation(int(crosslink[4]), int(crosslink[5]), strand=0)), color=color, border=color, ) link_xy = CrossLink(F_x, F_y, color, color) gd_diagram.cross_track_links.append(link_xy) for record in records: gd_feature_set = feature_sets[record.id] # 矫正ori for feature in record.features: if feature.type == 'rep_origin': print(record.description + ' 的起始位点在:' + str(feature.location.start)) record = record[feature.location. start:] + record[:feature.location.start] if record.features[0].strand == -1: print('daole') record = record.reverse_complement(id=True, name=True, description=True, features=True, annotations=True, letter_annotations=True) break # 务必绘制反向互补序列时手动开启 # record = record.reverse_complement(id=True, name=True, description=True, features=True, # annotations=True, letter_annotations=True) print(record.description + ' 的起始位点已校正') # 画features i = 0 if ref[record.id] != ['']: for feature in record.features: if feature.type != "gene": continue color = list(colors.getAllNamedColors().keys())[len(feature) % 163] gd_feature_set.add_feature(feature, color=color, label=True, label_size=10, label_angle=90, sigil="ARROW", arrowshaft_height=1.0, arrowhead_length=0.1) i += 1 elif ref[record.id] == ['']: for feature in record.features: if feature.type != "CDS": continue color = list(colors.getAllNamedColors().keys())[len(feature) % 163] gd_feature_set.add_feature(feature, color=color, label=True, label_size=10, label_angle=90, sigil="ARROW", arrowshaft_height=1.0, arrowhead_length=0.2) i += 1 # 用来手动添加重组位点 # for pos in recombinations: # if pos in record.features: # gd_feature_set.add_feature(feature, color=color, label=True, label_size=10, label_angle=90, # sigil="ARROW", arrowshaft_height=1.0, arrowhead_length=0.1) if not cross: # 用来绘制单一序列 gd_diagram.draw(format="linear", pagesize='A4', fragments=5, start=0, end=max_len, fragment_size=1) gd_diagram.write("T7.pdf", "PDF") else: # 用来绘制比对序列 gd_diagram.draw(format="linear", pagesize=(10 * len(GenomeId) * cm, 120 * cm), fragments=1, start=0, end=max_len, fragment_size=1) gd_diagram.write(output, "PDF") print("已输出为PDF")
set_Y = feature_sets[Y] for score, x, y in X_vs_Y: color = colors.linearlyInterpolatedColor(colors.white, colors.firebrick, 0, 100, score) border = colors.lightgrey f_x = get_feature(features_X, x) F_x = set_X.add_feature(SeqFeature(FeatureLocation(f_x.location.start, f_x.location.end, strand=0)), color=color, border=border) f_y = get_feature(features_Y, y) F_y = set_Y.add_feature(SeqFeature(FeatureLocation(f_y.location.start, f_y.location.end, strand=0)), color=color, border=border) gd_diagram.cross_track_links.append(CrossLink(F_x, F_y, color, border)) for record, gene_colors in zip([A_rec, B_rec, C_rec], [A_colors, B_colors, C_colors]): gd_feature_set = feature_sets[record.name] i = 0 for feature in record.features: if feature.type != "gene": # Exclude this feature continue try: g_color = gene_colors[i] except IndexError: print("Don't have color for %s gene %i" % (record.name, i))
identity = float(each_line_split[2]) query_start = int(each_line_split[6]) query_end = int(each_line_split[7]) target_start = int(each_line_split[8]) target_end = int(each_line_split[9]) # use color to reflect identity color = colors.linearlyInterpolatedColor(colors.white, colors.red, 50, 100, identity) # determine which is which (query/target to contig_1/contig_2) # if query is contig_1 if query == gene1_contig.name: link = CrossLink( (contig_1_gene_content_track, query_start, query_end), (contig_2_gene_content_track, target_start, target_end), color=color, border=color, flip=False) diagram.cross_track_links.append(link) # if query is contig_2 elif query == gene2_contig.name: link = CrossLink( (contig_2_gene_content_track, query_start, query_end), (contig_1_gene_content_track, target_start, target_end), color=color, border=color, flip=False) diagram.cross_track_links.append(link) # Draw and Export
def test_diagram_via_object_pdf(self): """Construct and draw PDF using object approach.""" genbank_entry = self.record gdd = Diagram('Test Diagram') gdt1 = Track('CDS features', greytrack=True, scale_largetick_interval=1e4, scale_smalltick_interval=1e3, greytrack_labels=10, greytrack_font_color="red", scale_format="SInt") gdt2 = Track('gene features', greytrack=1, scale_largetick_interval=1e4) # First add some feature sets: gdfsA = FeatureSet(name='CDS backgrounds') gdfsB = FeatureSet(name='gene background') gdfs1 = FeatureSet(name='CDS features') gdfs2 = FeatureSet(name='gene features') gdfs3 = FeatureSet(name='misc_features') gdfs4 = FeatureSet(name='repeat regions') prev_gene = None cds_count = 0 for feature in genbank_entry.features: if feature.type == 'CDS': cds_count += 1 if prev_gene: # Assuming it goes with this CDS! if cds_count % 2 == 0: dark, light = colors.peru, colors.tan else: dark, light = colors.burlywood, colors.bisque # Background for CDS, a = gdfsA.add_feature(SeqFeature( FeatureLocation(feature.location.start, feature.location.end, strand=0)), color=dark) # Background for gene, b = gdfsB.add_feature(SeqFeature( FeatureLocation(prev_gene.location.start, prev_gene.location.end, strand=0)), color=dark) # Cross link, gdd.cross_track_links.append(CrossLink(a, b, light, dark)) prev_gene = None if feature.type == 'gene': prev_gene = feature # Some cross links on the same linear diagram fragment, f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2220, 2230)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2200, 2210)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2150, 2200)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2220, 2290)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True)) f, c = fill_and_border(colors.green) a = gdfsA.add_feature(SeqFeature(FeatureLocation(2250, 2560)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(2300, 2860)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) # Some cross links where both parts are saddling the linear diagram fragment boundary, f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(3155, 3250)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(3130, 3300)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c)) # Nestled within that (drawn on top), f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(3160, 3275)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(3180, 3225)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, f, c, flip=True)) # Some cross links where two features are on either side of the linear diagram fragment boundary, f, c = fill_and_border(colors.green) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6450, 6550)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6265, 6365)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c)) f, c = fill_and_border(colors.gold) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6265, 6365)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6450, 6550)), color=f, border=c) gdd.cross_track_links.append(CrossLink(a, b, color=f, border=c)) f, c = fill_and_border(colors.red) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6275, 6375)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6430, 6530)), color=f, border=c) gdd.cross_track_links.append( CrossLink(a, b, color=f, border=c, flip=True)) f, c = fill_and_border(colors.blue) a = gdfsA.add_feature(SeqFeature(FeatureLocation(6430, 6530)), color=f, border=c) b = gdfsB.add_feature(SeqFeature(FeatureLocation(6275, 6375)), color=f, border=c) gdd.cross_track_links.append( CrossLink(a, b, color=f, border=c, flip=True)) cds_count = 0 for feature in genbank_entry.features: if feature.type == 'CDS': cds_count += 1 if cds_count % 2 == 0: gdfs1.add_feature(feature, color=colors.pink, sigil="ARROW") else: gdfs1.add_feature(feature, color=colors.red, sigil="ARROW") if feature.type == 'gene': # Note we set the colour of ALL the genes later on as a test, gdfs2.add_feature(feature, sigil="ARROW") if feature.type == 'misc_feature': gdfs3.add_feature(feature, color=colors.orange) if feature.type == 'repeat_region': gdfs4.add_feature(feature, color=colors.purple) # gdd.cross_track_links = gdd.cross_track_links[:1] gdfs1.set_all_features('label', 1) gdfs2.set_all_features('label', 1) gdfs3.set_all_features('label', 1) gdfs4.set_all_features('label', 1) gdfs3.set_all_features('hide', 0) gdfs4.set_all_features('hide', 0) # gdfs1.set_all_features('color', colors.red) gdfs2.set_all_features('color', colors.blue) gdt1.add_set(gdfsA) # Before CDS so under them! gdt1.add_set(gdfs1) gdt2.add_set(gdfsB) # Before genes so under them! gdt2.add_set(gdfs2) gdt3 = Track('misc features and repeats', greytrack=1, scale_largetick_interval=1e4) gdt3.add_set(gdfs3) gdt3.add_set(gdfs4) # Now add some graph sets: # Use a fairly large step so we can easily tell the difference # between the bar and line graphs. step = len(genbank_entry) // 200 gdgs1 = GraphSet('GC skew') graphdata1 = apply_to_window(genbank_entry.seq, step, calc_gc_skew, step) gdgs1.new_graph(graphdata1, 'GC Skew', style='bar', color=colors.violet, altcolor=colors.purple) gdt4 = Track('GC Skew (bar)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt4.add_set(gdgs1) gdgs2 = GraphSet('GC and AT Content') gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_gc_content, step), 'GC content', style='line', color=colors.lightgreen, altcolor=colors.darkseagreen) gdgs2.new_graph(apply_to_window(genbank_entry.seq, step, calc_at_content, step), 'AT content', style='line', color=colors.orange, altcolor=colors.red) gdt5 = Track('GC Content(green line), AT Content(red line)', height=1.94, greytrack=1, scale_largetick_interval=1e4) gdt5.add_set(gdgs2) gdgs3 = GraphSet('Di-nucleotide count') step = len(genbank_entry) // 400 # smaller step gdgs3.new_graph(apply_to_window(genbank_entry.seq, step, calc_dinucleotide_counts, step), 'Di-nucleotide count', style='heat', color=colors.red, altcolor=colors.orange) gdt6 = Track('Di-nucleotide count', height=0.5, greytrack=False, scale=False) gdt6.add_set(gdgs3) # Add the tracks (from both features and graphs) # Leave some white space in the middle/bottom gdd.add_track(gdt4, 3) # GC skew gdd.add_track(gdt5, 4) # GC and AT content gdd.add_track(gdt1, 5) # CDS features gdd.add_track(gdt2, 6) # Gene features gdd.add_track(gdt3, 7) # Misc features and repeat feature gdd.add_track(gdt6, 8) # Feature depth # Finally draw it in both formats, and full view and partial gdd.draw(format='circular', orientation='landscape', tracklines=0, pagesize='A0') output_filename = os.path.join('Graphics', 'GD_by_obj_circular.pdf') gdd.write(output_filename, 'PDF') gdd.circular = False gdd.draw(format='circular', orientation='landscape', tracklines=0, pagesize='A0', start=3000, end=6300) output_filename = os.path.join('Graphics', 'GD_by_obj_frag_circular.pdf') gdd.write(output_filename, 'PDF') gdd.draw(format='linear', orientation='landscape', tracklines=0, pagesize='A0', fragments=3) output_filename = os.path.join('Graphics', 'GD_by_obj_linear.pdf') gdd.write(output_filename, 'PDF') gdd.set_all_tracks("greytrack_labels", 2) gdd.draw(format='linear', orientation='landscape', tracklines=0, pagesize=(30 * cm, 10 * cm), fragments=1, start=3000, end=6300) output_filename = os.path.join('Graphics', 'GD_by_obj_frag_linear.pdf') gdd.write(output_filename, 'PDF')
if i.type == "rRNA": color_atual = colors.blue gB1.add_feature(i, label=False, label_position="start", color=color_atual) # Marca na figura os trechos sintenicos for b in blast: qstart = int(b.split("\t")[0]) qend = int(b.split("\t")[1]) sstart = int(b.split("\t")[2]) send = int(b.split("\t")[3]) identidade = (float(b.split("\t")[4]) * 0.8) / 100 # Detectando inversoes qinv = qend - qstart sinv = send - sstart if (qinv > 0 and sinv > 0) or (qinv < 0 and sinv < 0): cor = colors.Color(1, .341176, .341176, identidade) else: cor = colors.firebrick gd.cross_track_links.append( CrossLink((gA, qstart, qend), (gB, sstart, send), color=cor)) gd.draw(format="linear", pagesize=(8 * cm, 29.7 * cm), fragments=1) gd.write(name + ".pdf", "PDF")