def plot(self, ax, gr: GenomeRange, **kwargs): self.ax = ax df = self.fetch_plot_data(gr) if self.has_prop("row_filter"): filters = self.properties["row_filter"] for filter_ in filters.split(";"): try: op_idx = list(re.finditer("[=><!]", filter_))[0].start() l_ = filter_[:op_idx].strip() r_ = filter_[op_idx:] df = eval(f'df[df["{l_}"]{r_}]') except IndexError: log.warning(f"row filter {filter_} is not valid.") region_length = gr.end - gr.start len_ratio_th = self.properties["length_ratio_thresh"] df = df[(df["end"] - df["start"]) > region_length * len_ratio_th] features = [] for _, row in df.iterrows(): gf = GraphicFeature( start=row['start'], end=row['end'], strand=(1 if row['strand'] == '+' else -1), label=row['gene_name'], color=random.choice(self.colors), ) features.append(gf) record = GraphicRecord(sequence_length=gr.end - gr.start, features=features, first_index=gr.start) record.plot(ax=ax, with_ruler=False, draw_line=False) self.plot_label()
def test_cropping(): features = [ GraphicFeature(start=5, end=20, strand=+1, color="#ffd700", label="Small feature"), GraphicFeature( start=20, end=500, strand=+1, color="#ffcccc", label="Gene 1 with a very long name", ), GraphicFeature(start=400, end=700, strand=-1, color="#cffccc", label="Gene 2"), GraphicFeature(start=600, end=900, strand=+1, color="#ccccff", label="Gene 3"), ] # PLOT AND EXPORT A LINEAR VIEW OF THE CONSTRUCT record = GraphicRecord(sequence_length=1000, features=features) cropped_record = record.crop((425, 650)) assert len(cropped_record.features) == 3
def test_cropping_on_the_edge(): repeated_sequence = "ATGCATGCAT" graphic_record = GraphicRecord( sequence_length=1000, sequence=100 * repeated_sequence ) small_gr = graphic_record.crop((990, 1000)) assert small_gr.sequence == repeated_sequence
def contig_visualization_onefile(contig_array, genome, drug, rgi): save_path = "contigend_visualizations_single_genome/" Features = [] temp_array_totrack_length = [] contig_array.reset_index(drop=True, inplace=True) for i in range(len(contig_array)): #print(contig_array["GeneStart"][i]) a = GraphicFeature(start=contig_array["GeneStart"][i], end=contig_array["GeneEnd"][i], strand=contig_array["Strand"][i], color=contig_array["Genecolor"][i], label=str(contig_array["GeneName"][i])) Features.append(a) temp_array_totrack_length.append( (contig_array["GeneStart"][i], contig_array["GeneEnd"][i])) length = temp_array_totrack_length[-1][1] - temp_array_totrack_length[0][0] record = GraphicRecord(first_index=temp_array_totrack_length[0][0], sequence_length=length, features=Features) ax, _ = record.plot(figure_width=20, strand_in_label_threshold=7) temp_name = drug + "_" + str(rgi) + ".png" title = drug + "_" + str(rgi) ax.set_title(title) name = os.path.join(save_path, temp_name) ax.figure.savefig(name)
def contigend_visualization(contig_array, end_direction, genome, reverse_term, drug, rgi_gene): Features = [] temp_array_totrack_length = [] contig_array.reset_index(drop=True, inplace=True) if end_direction == "upward": b = GraphicFeature(start=contig_array["GeneStart"][0] - 1500, end=contig_array["GeneStart"][0], strand=+1, color="#0A090A", label="Contig_Ends") Features.append(b) temp_array_totrack_length.append((contig_array["GeneStart"][0] - 1500, contig_array["GeneStart"][0])) for i in range(len(contig_array)): #print(contig_array["GeneStart"][i]) a = GraphicFeature(start=contig_array["GeneStart"][i], end=contig_array["GeneEnd"][i], strand=contig_array["Strand"][i], color=contig_array["Genecolor"][i], label=str(contig_array["GeneName"][i])) Features.append(a) temp_array_totrack_length.append( (contig_array["GeneStart"][i], contig_array["GeneEnd"][i])) if end_direction == "downward": b = GraphicFeature(start=contig_array["GeneEnd"].iloc[-1] + 1500, end=contig_array["GeneEnd"].iloc[-1] + 1500, strand=+1, color="#0A090A", label="Contig_Ends") Features.append(b) temp_array_totrack_length.append( (contig_array["GeneEnd"].iloc[-1] + 1500, contig_array["GeneEnd"].iloc[-1] + 1500)) length = temp_array_totrack_length[-1][1] - temp_array_totrack_length[0][0] record = GraphicRecord(first_index=temp_array_totrack_length[0][0], sequence_length=length, features=Features) ax, _ = record.plot(figure_width=20, strand_in_label_threshold=7) if (reverse_term == "-1"): ax.invert_xaxis() title = drug + "_" + genome ax.set_title(title) temp_name = str(genome) + ".png" save_path = "contigend_visualizations/" name = os.path.join(save_path, temp_name) ax.figure.savefig(name)
def show_feature(self, figure_width=8, xlabel=""): if len(self._features) < 1: print("No feautres to show") return record = GraphicRecord(sequence_length=self._max_length, features=self._features) ax, _ = record.plot(figure_width=figure_width) ax.set_xlabel(xlabel, fontweight="bold", fontsize=16) return ax
def demo_dna_features_viewer(): features=[ GraphicFeature(start=0, end=20, strand=+1, color="#ffd700", label="Small feature"), GraphicFeature(start=20, end=500, strand=+1, color="#ffcccc", label="Gene 1 with a very long name"), GraphicFeature(start=400, end=700, strand=-1, color="#cffccc", label="Gene 2"), GraphicFeature(start=600, end=900, strand=+1, color="#ccccff", label="Gene 3") ] record = GraphicRecord(sequence_length=1000, features=features) record.plot(figure_width=5) return plt
def vis_sccmec(faa_file_sccmec, annotation_file, length_sccmec, core_proteins, blastp): # use faa file from prokka annotation on sccmec faa_dict_sccmec = fasta2dict(faa_file_sccmec) # update annotation based on core proteins in cluster datafile = annotation_data(annotation_file) update_datafile = update_annotation(datafile, blastp, faa_dict_sccmec, core_proteins) # create features object to visualisation using dna_features_viewer features = [] for line in update_datafile: id_, sense, start, end, size, length, gene = line if gene == 'core-proteins': color = '#ff8848' label = None else: try: color = colors[gene] except KeyError: color = 'grey' try: label = labels[gene] except KeyError: label = None if '-' in sense: features.append( GraphicFeature(start=int(start), end=int(end), strand=-1, color=color, label=label)) if '+' in sense: features.append( GraphicFeature(start=int(start), end=int(end), strand=+1, color=color, label=label)) record = GraphicRecord(sequence_length=length_sccmec, features=features) ax, _ = record.plot(figure_width=20) id_ = annotation_file.split('_')[-1].split('.')[0] filename = 'SCCmec_{}.png'.format(id_) ax.figure.savefig(filename, dpi=300)
def test_to_biopython_record(): record = GraphicRecord( sequence_length=50, features=[ GraphicFeature(start=5, end=20, strand=+1, label="a"), GraphicFeature(start=20, end=500, strand=+1, label="b"), GraphicFeature(start=400, end=700, strand=-1, label="c"), ], ) biopython_record = record.to_biopython_record(sequence=50 * "A") features = sorted([(f.location.start, f.location.end, f.qualifiers["label"]) for f in biopython_record.features]) assert features == [(5, 20, "a"), (20, 500, "b"), (400, 700, "c")]
def contigend_visualization(contig_array, end_direction, genome): Features = [] temp_array_totrack_length = [] contig_array.reset_index(drop=True, inplace=True) if end_direction == "upward": b = GraphicFeature(start=contig_array["GeneStart"][0] - 1500, end=contig_array["GeneStart"][0], strand=+1, color="#0A090A", label="Contig_Ends") Features.append(b) temp_array_totrack_length.append((contig_array["GeneStart"][0] - 1500, contig_array["GeneStart"][0])) for i in range(len(contig_array)): #print(contig_array["GeneStart"][i]) a = GraphicFeature(start=contig_array["GeneStart"][i], end=contig_array["GeneEnd"][i], strand=contig_array["Strand"][i], color=contig_array["Genecolor"][i], label=str(contig_array["GeneName"][i])) Features.append(a) temp_array_totrack_length.append( (contig_array["GeneStart"][i], contig_array["GeneEnd"][i])) if end_direction == "downward": b = GraphicFeature(start=contig_array["GeneEnd"].iloc[-1] + 1500, end=contig_array["GeneEnd"].iloc[-1] + 2500, strand=+1, color="#0A090A", label="Contig_Ends") Features.append(b) temp_array_totrack_length.append( (contig_array["GeneEnd"].iloc[-1] + 1500, contig_array["GeneEnd"].iloc[-1] + 2500)) length = temp_array_totrack_length[-1][1] - temp_array_totrack_length[0][0] record = GraphicRecord(first_index=temp_array_totrack_length[0][0], sequence_length=length, features=Features) ax, _ = record.plot(figure_width=20, strand_in_label_threshold=7) name = str(genome) + ".png" ax.figure.savefig(name)
def create_dna_structure(file_name): results = request.get_json() features = [] for i, spacerRepeat in enumerate(results['spacerRepeats']): features.append(GraphicFeature(start=spacerRepeat['position'], end=spacerRepeat['position']+len(spacerRepeat['repeat']), strand=+1, color="#cffccc", label="Repeat_"+str(i+1))) if 'spacer' in spacerRepeat: features.append(GraphicFeature(start=spacerRepeat['position']+len(spacerRepeat['repeat'])+1, end=spacerRepeat['position']+len(spacerRepeat['repeat'])+spacerRepeat['lengths'][1], strand=+1, color="#ccccff", label="Spacer_"+str(i+1))) record = GraphicRecord(sequence_length=results['length'], features=features) record = record.crop((results['spacerRepeats'][0]['position']-50, results['spacerRepeats'][len(results['spacerRepeats'])-1]['position']+ len(results['spacerRepeats'][len(results['spacerRepeats'])-1]['repeat'])+50)) ax, _ = record.plot(figure_width=10) ax.figure.savefig('static/logos/'+str(file_name)+'.png', bbox_inches='tight') return jsonify('{"success":1}')
def visualize_mrna_strand(self, dpi=120, cmap='viridis'): features = [ GraphicFeature(start=0, end=self.tag_length, color=self._colors[0], label='Tag'), GraphicFeature(start=self.tag_length, end=self.total_length, color=self._colors[1], label='Protein'), ] probe = self.probe_loc cmap = cm.get_cmap(cmap) color = np.where(probe == 1)[0] location = np.where(probe == 1)[1] ncolors = probe.shape[0] colors = cmap(np.linspace(.01, .95, ncolors)) colorlabels = ['Color %d' % i for i in range(ncolors)] for c, loc in zip(color, location): features = features + [ GraphicFeature(start=loc, end=loc + 2, color=colors[c], linecolor=colors[c]), ] record = GraphicRecord(sequence_length=self.total_length, features=features) fig, ax = plt.subplots(1, dpi=dpi) for c in range(ncolors): ax.plot([0, 0], [0, 0], color=colors[c]) #fix the legend colors colorlabels = ['Color %d' % i for i in range(ncolors)] ax, _ = record.plot(figure_width=6, ax=ax) ax.axes.legend(colorlabels, loc=7) ax.text(0, 5, 'Transcript Name: %s' % self.name) ax.text(0, 4, 'Total Length: %d codons' % self.total_length) ax.text(0, 3, 'Seq: %s ...' % self.aa_seq[:10]) fig.show()
def test_split_overflowing_features(): features = [ GraphicFeature(start=10, end=20, strand=+1, label="a"), GraphicFeature(start=40, end=55, strand=+1, label="b"), GraphicFeature(start=-20, end=2, strand=+1, label="c"), ] # PLOT AND EXPORT A LINEAR VIEW OF THE CONSTRUCT record = GraphicRecord(sequence_length=50, features=features) record.split_overflowing_features_circularly() new_features_locations_and_labels = sorted([(f.start, f.end, f.label) for f in record.features]) assert new_features_locations_and_labels == [ (0, 2, "c"), (0, 5, "b"), (10, 20, "a"), (30, 49, "c"), (40, 49, "b"), ]
def test_by_hand(tmpdir): """Test building a GraphicRecord "by hand" """ features = [ GraphicFeature(start=5, end=20, strand=+1, color="#ffd700", label="Small feature"), GraphicFeature( start=20, end=500, strand=+1, color="#ffcccc", label="Gene 1 with a very long name", ), GraphicFeature(start=400, end=700, strand=-1, color="#cffccc", label="Gene 2"), GraphicFeature(start=600, end=900, strand=+1, color="#ccccff", label="Gene 3"), ] # PLOT AND EXPORT A LINEAR VIEW OF THE CONSTRUCT record = GraphicRecord(sequence_length=1000, features=features) record.plot(figure_width=5, with_ruler=False) # lazy, just for coverage ax, _ = record.plot(figure_width=5) target_file = os.path.join(str(tmpdir), "by_hand.png") ax.figure.savefig(target_file) # PLOT AND EXPORT A CIRCULAR VIEW OF THE CONSTRUCT circular_rec = CircularGraphicRecord(sequence_length=1000, features=features) ax2, _ = circular_rec.plot(figure_width=4) ax2.figure.tight_layout() target_file = os.path.join(str(tmpdir), "by_hand_circular.png") ax2.figure.savefig(target_file, bbox_inches="tight")
def visualize_markup(self, index=1): sequences = [] for seq in self.seq: gff_record = next( (x for x in self.gff if str(x.id) == str(seq.id)), None) if gff_record: # Filter 'gene' features genes = [x for x in gff_record.features if x.type == 'gene'] features = [] for gene in genes: start, end, strand = gene.location.start, gene.location.end, gene.location.strand features.append( GraphicFeature(start, end, strand, label=gene.qualifiers['Name'], color="#cffccc")) sequences.append(features) output_file("test.html") record = GraphicRecord(sequence_length=1000, features=sequences[index]) show(record.plot_with_bokeh(figure_width=5))
def plot_align(self, ax, genome_range): gr = genome_range df = self.fetch_intervals(gr) df_ = df[np.bitwise_and(df['flag'], 0b100) == 0] len_thresh = self.properties.get("length_ratio_thresh", 0.005) df_ = df_[df_['seq'].str.len() > (gr.length * len_thresh)] if df_.shape[0] <= 0: return rev_flag = np.bitwise_and(df['flag'], 0b10000) != 0 features = [] for idx, row in df_.iterrows(): start = row['pos'] - gr.start end = row['pos'] + len(row['seq']) - gr.start strand = -1 if rev_flag.iloc[idx] else 1 gf = GraphicFeature( start=start, end=end, strand=strand, color=self.properties['color'], ) features.append(gf) record = GraphicRecord(sequence_length=gr.length, features=features) record.plot(ax=ax, with_ruler=False, draw_line=False)
def vis_pegRNA2(df,genome_fasta=None,**kwargs): """Given one instance of easy-prime prediction (rawX format), generate DNA visualization Input -------- the data frame contains 4 rows: RTT, PBS, sgRNA, ngRNA """ pegRNA_id = df.index.tolist()[0] variant_id = pegRNA_id.split("_")[0] chr = df['CHROM'][0] start = df['start'].min() start -= start%10 start -= 1 end = df['end'].max() end -= end%10 end += 10 variant_pos = df.POS.min() ref = df.REF[0] alt = df.ALT[0] predicted_efficiency = df.predicted_efficiency[0]*100 pos = variant_pos-start sequence = get_fasta_single(chr,start,end,genome_fasta).upper() fig,ax = plt.subplots() feature_list = [] for s,r in df.iterrows(): r_start = r.start-start r_end = r_start+(r.end-r.start) r_strand = get_strand(r.strand) gf = GraphicFeature(start=r_start, end=r_end, strand=r_strand, color=my_colors[r.type],label=r.type) feature_list.append(gf) record = GraphicRecord(sequence=sequence, features=feature_list) # ax, _ = record.plot(figure_width=int(len(sequence)/5)) record.plot(ax=ax,figure_width=int(len(sequence)/5)) return 0 record.plot_sequence(ax) ax.fill_between((pos-1.5, pos-0.5), +1000, -1000, alpha=0.5,color=my_colors['variant']) locs, labels = plt.xticks() new_labels = [] flag = True for i in locs: if flag: new_labels.append("%s %s"%(chr,int(start+i+1))) flag=False else: new_labels.append(int(start+i+1)) plt.xticks(locs,new_labels) plt.title("ID: %s, CHR: %s, POS: %s, REF: %s, ALT: %s \n Predicted efficiency: %.1f"%(variant_id,chr,variant_pos,ref,alt,predicted_efficiency)+"%") my_stringIObytes = io.BytesIO() ax.figure.savefig(my_stringIObytes, format='png',bbox_inches='tight') my_stringIObytes.seek(0) img_string = base64.b64encode(my_stringIObytes.read()) return "data:image/png;base64,%s"%(img_string.decode("utf-8"))
def plot_align(self, ax, gr: GenomeRange): assert isinstance( gr, GenomeRange), "The input gr should be type GenomeRange" df = self.fetch_plot_data(gr) df_ = df[np.bitwise_and(df['flag'], 0b100) == 0] len_thresh = self.properties["length_ratio_thresh"] df_ = df_[df_['seq'].str.len() > (gr.length * len_thresh)] if df_.shape[0] <= 0: return rev_flag = np.bitwise_and(df['flag'], 0b10000) != 0 features = [] for idx, row in df_.iterrows(): start = row['pos'] - gr.start end = row['pos'] + len(row['seq']) - gr.start strand = -1 if rev_flag.iloc[idx] else 1 gf = GraphicFeature( start=start, end=end, strand=strand, color=self.properties['color'], ) features.append(gf) record = GraphicRecord(sequence_length=gr.length, features=features) record.plot(ax=ax, with_ruler=False, draw_line=False)
def Protein_structure(ID,exons,domains,path,trID,exons_in_interface): #save Image of protein Structure features1,features2,fend=Visualize_transciript(exons,domains,exons_in_interface) fig, (ax1, ax2) = plt.subplots( 2, 1,figsize=(16, 3.5)) record = GraphicRecord(sequence_length=fend, features=features1,) record.plot(ax=ax1,figure_width=23,with_ruler=False) record = GraphicRecord(sequence_length=fend, features=features2,) record.plot(ax=ax2,figure_width=23,with_ruler=True,annotate_inline=True) ax1.title.set_text('Coding Exons') ax1.title.set_position([.5, -0.4]) ax2.title.set_text('Pfam Domains') ax2.title.set_position([.5, -0.5]) fig.savefig(path+trID, bbox_inches='tight') return
def haplotype_blocks_fig(model, ref_seq): s1, s2 = model.align_alleles() record = GraphicRecord(sequence=ref_seq, sequence_length=len(ref_seq), features=[ GraphicFeature(start=0, end=len(s1), strand=+1, color='#ffcccc'), GraphicFeature(start=0, end=len(s2), strand=+1, color='#cffccc') ]) ax, _ = record.plot(figure_width=5) record.plot_sequence(ax) record.plot_translation(ax, (8, 23), fontdict={'weight': 'bold'}) ax.figure.savefig('haplotypes.png', bbox_inches='tight')
def test_sequence_and_translation_plotting(): from dna_features_viewer import ( GraphicFeature, GraphicRecord, CircularGraphicRecord, ) features = [ GraphicFeature( start=5, end=10, strand=+1, color="#ffd700", label="bbS-1" ), GraphicFeature( start=8, end=15, strand=+1, color="#ffcccc", label="CrC" ), ] record = GraphicRecord(sequence=7 * "ATGC", features=features) ax, _ = record.plot(figure_width=5) record.plot_sequence(ax) record.plot_translation(ax, (8, 23), fontdict={"weight": "bold"})
def get_map(phage_id, UPLOAD_FOLDER): """Creates and returns a map of the genome. Args: UPLOAD_FOLDER: The folder containing all of the uploaded files. Returns: A dictionary containing an image of the genome map. """ features = [] for cds in db.session.query(Annotations).filter_by( phage_id=phage_id).order_by(Annotations.left): if cds.function != '@DELETED' and cds.status != 'trnaDELETED': if cds.strand == '+': if cds.status == "tRNA": features.append( GraphicFeature(start=cds.left, end=cds.right, strand=+1, color="#7570b3", label=cds.id)) else: features.append( GraphicFeature(start=cds.left, end=cds.right, strand=+1, color="#1b9e77", label=cds.id)) else: if cds.status == "tRNA": features.append( GraphicFeature(start=cds.left, end=cds.right, strand=-1, color="#7570b3", label=cds.id)) else: features.append( GraphicFeature(start=cds.left, end=cds.right, strand=-1, color="#d95f02", label=cds.id)) fasta_file = helper.get_file_path("fasta", UPLOAD_FOLDER) genome = SeqIO.read(fasta_file, "fasta").seq sequence = str(genome) record = GraphicRecord(sequence_length=len(sequence), features=features) ax, _ = record.plot(figure_width=len(sequence) / 1000) ax.figure.savefig(os.path.join(UPLOAD_FOLDER, 'sequence_and_translation.png'), bbox_inches='tight') image_byte_string = "" with open(os.path.join(UPLOAD_FOLDER, 'sequence_and_translation.png'), "rb") as image_file: image_byte_string = base64.b64encode(image_file.read()) response_object = {} response_object['status'] = "success" response_object['image'] = str(image_byte_string) return response_object
from dna_features_viewer import GraphicFeature, GraphicRecord record = GraphicRecord(sequence="ATGCATGCATGCATGCATGCATGCATGC", features=[ GraphicFeature(start=5, end=10, strand=+1, color='#ffcccc'), GraphicFeature(start=8, end=15, strand=+1, color='#ccccff') ]) ax, _ = record.plot(figure_width=5) record.plot_sequence(ax) record.plot_translation(ax, (8, 23), fontdict={'weight': 'bold'}) ax.figure.savefig('sequence_and_translation.png', bbox_inches='tight')
def plot(self, ax=None, plot_coverage=True, plot_reference=False, reference_ax=None, figsize="auto", features_filters=(), features_properties=None, reference_reads_shares="auto"): """Plot the sequencing matches. Useful to get a general overview of the sequencing (coverage, mutations etc.) Parameters ---------- ax Matplotlib ax on which to plot the alignments. If None, one will be automatically created. plot_coverage If True, the plots will display in the background a filled blue line indicating how many times each nucleotide of the sequence is covered by the succesfull alignments. plot_reference If True, a schema of the reference record will be plotted, by default above the reads plot. reference_ax If provided and plot_reference is True, the reference record will be plotted on this ax. figsize Size of the final figure. Leave it to 'auto' for a figure of width 12 and automatically chosen height. Or e.g. (16, 'auto') for a figure of width 12 and automatically chosen height features_filters List of functions (feature=>True/False). Features for which at least one test is False will not appear in the reference record plot. features_properties DNA Features Viewer property functions that can be used to change the appearance of the reference record. reference_reads_shares Relative shares of the pictures that should be occupied by the reference and by the reads. It is an experimental parameter so leave it to 'auto' for now. """ class AnnotationsGraphicTranslator(BiopythonTranslator): def compute_feature_color(self, f): return "#f9d277" def compute_feature_label(self, f): return BiopythonTranslator.compute_feature_label(f)[:20] def compute_filtered_features(self, features): def is_not_parameter(f): label = "".join(f.qualifiers.get('label', '')) return label not in ('cover', 'no_primer') return [f for f in features if is_not_parameter(f)] if plot_reference: translator = AnnotationsGraphicTranslator( features_filters=features_filters, features_properties=features_properties) grecord = translator.translate_record(self.reference) if not self.linear: grecord.split_overflowing_features_circularly() if figsize == "auto": figsize = (12, "auto") if figsize[1] == "auto": sequencing_ax_height = 2 + 0.35 * len(self.read_reference_matches) if not plot_reference: figure_height = sequencing_ax_height else: ref_ax, _ = grecord.plot(with_ruler=False, figure_width=figsize[0]) ref_fig_height = ref_ax.figure.get_size_inches()[1] figure_height = sequencing_ax_height + ref_fig_height if reference_reads_shares == "auto": reference_reads_shares = (int(100 * ref_fig_height), int(100 * sequencing_ax_height)) plt.close(ref_ax.figure) figsize = (figsize[0], figure_height) elif reference_reads_shares == "auto": reference_reads_shares = (1, 2) if plot_reference: if reference_ax is None: gs = gridspec.GridSpec(sum(reference_reads_shares), 1) fig = plt.figure(figsize=figsize, facecolor="w") reference_ax = fig.add_subplot(gs[:reference_reads_shares[0]]) ax = fig.add_subplot(gs[reference_reads_shares[0]:]) grecord.plot(reference_ax, with_ruler=False, annotate_inline=True) self.plot(ax=ax, plot_coverage=plot_coverage, plot_reference=False) ax.set_xlim(reference_ax.get_xlim()) return ax # so the first read in the list gets displayed on top read_reference_matches = OrderedDict( [item for item in list(self.read_reference_matches.items())[::-1]]) L = len(self.reference) if ax is None: fig, ax = plt.subplots(1, figsize=figsize) ax.set_xlim(-2, L) ax.set_ylim(0, len(read_reference_matches) + 2) ax.set_yticks(range(1, len(read_reference_matches) + 1)) ax.set_yticklabels([name for name in read_reference_matches]) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('bottom') gr_record = GraphicRecord(sequence_length=L, features=[]) for i, (read_name, matches) in enumerate(read_reference_matches.items()): y = i + 1 ax.axhline(y, ls=":", lw=0.5, color="#aaaaaa", zorder=-1000) if matches.primer.metadata.get('available', False): color = '#f7a3f6' else: color = "#a3c3f7" for match in matches.read_matches: gr_record.features = [ GraphicFeature(start=match.start, end=match.end, strand=match.strand, color=color) ] gr_record.split_overflowing_features_circularly() for feature in gr_record.features: gr_record.plot_feature(ax, feature, y, linewidth=0.2) for match in matches.primer_matches: feature = GraphicFeature(start=match.start, end=match.end, strand=match.strand, color="#e85558") gr_record.plot_feature(ax, feature, y, linewidth=0.2) if plot_coverage: ax.fill_between(range(len(self.coverage)), self.coverage, zorder=-2000, alpha=0.2, facecolor="#a3c3f7") return ax
def probe_graph(file_name, path): #file_name = "results_riftl_test.txt" #file_name = sys.argv[1] f = open(file_name, "r") probes = [] for line in f: print(line) line_info = line.split(':') #print(line) #print(line_info) if (line_info[0] == "Sequence"): sequence = line_info[1] elif (line_info[0] == "Initiator"): initiator1 = line_info[1].upper() initiator2 = line_info[2].upper() elif (line_info[0] == "Probes"): probes.append(line_info[1].upper()) probes.append(line_info[3].upper()) probes.append(line_info[2]) probes.append(line_info[4]) elif (line_info[0] == "Name"): name = line_info[1].rstrip() file_write = path + "/" + name + "_Probe_Set" ".txt" w = open(file_write, "w+") """print(sequence) print(initiator1) print(initiator2) print(probe1) print(probe2) print(start)""" x = 1 print(probes) print(name) for a in range(0, len(probes), 4): gstart = 28 gend = 3 probe1 = probes[a] probe2 = probes[a + 1] probe1 = Seq(probe1) probe1 = probe1.complement() probe2 = Seq(probe2) probe2 = probe2.complement() probe1 = str(probe1) probe2 = str(probe2) start = probes[a + 2] end = probes[a + 3] start = int(start) start -= 4 if start <= 0: gend = 0 - start - 1 gstart = gend + 25 start = 0 end = int(end) subseq = sequence[start:end] subseq = subseq.upper() record = GraphicRecord( sequence=subseq, features=[ GraphicFeature(start=gstart, end=gend, strand=+1, color='#ffcccc', label=probe1), GraphicFeature(start=gstart + 28, end=gstart + 2, strand=+1, color='#ccccff', label=probe2), GraphicFeature(start=gstart, end=gstart, strand=-1, color='m', label="space"), GraphicFeature(start=gstart + 1, end=gstart + 1, strand=-1, color='m', label="space"), GraphicFeature(start=gstart, end=(gstart - len(initiator1)), strand=-1, color='y', label=initiator1), GraphicFeature(start=gstart + 2, end=(gstart + 2 + len(initiator1)), strand=+1, color='y', label=initiator2) ]) ax, _ = record.plot(figure_width=10) record.plot_sequence(ax) total1 = initiator1 + "TT" + probe1[::-1] total2 = probe2[::-1] + "TT" + initiator2 w.write("PROBE SET" + str(x) + "\n") w.write("Probe1:" + total1 + "\n") w.write("Probe2:" + total2 + "\n") #print(total2) tosave = path + "/" + name + "Plots for Probes" + str(x) x += 1 ax.figure.savefig(tosave, bbox_inches='tight') #break w.close()
from dna_features_viewer import GraphicFeature, GraphicRecord import matplotlib.pyplot as plt record = GraphicRecord(sequence=250 * "ATGC", features=[ GraphicFeature(start=5, end=20, strand=+1, color="#ffd700", label="Small feature"), GraphicFeature( start=20, end=500, strand=+1, color="#ffcccc", label="Gene 1 with a very long name"), GraphicFeature(start=400, end=700, strand=-1, color="#cffccc", label="Gene 2"), GraphicFeature(start=600, end=900, strand=+1, color="#ccccff", label="Gene 3") ]) zoom_start, zoom_end = 398, 428 # coordinates of the "detail" cropped_record = record.crop((zoom_start, zoom_end)) fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 3))
output_file_utr ]), shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p.wait() features = [] with open(output_file_utr) as fp: next(fp) for line in fp: print(line) content = line.split("\t") features.append( GraphicFeature(start=int(content[3]), end=int(content[4]), strand=+1, color=get_color(content[8]), label=re.sub(r'mmu-', '', content[1]))) record = GraphicRecord(sequence_length=len(str(utr.seq)), features=features) # Circular record.plot(figure_width=12) plt.title(' '.join([utr.id, 'sequence'])) patch1 = mpatches.Patch(color="#00ff99", label='6mer') patch2 = mpatches.Patch(color="#9999ff", label='7mer-1a') patch3 = mpatches.Patch(color="#ff66cc", label='7mer-m8') patch4 = mpatches.Patch(color="#ff0000", label='8mer-1a') plt.legend(handles=[patch1, patch2, patch3, patch4]) plt.show()
import matplotlib.pyplot as plt from dna_features_viewer import (GraphicFeature, GraphicRecord, CircularGraphicRecord) features = [ GraphicFeature(start=20, end=500, strand=+1, color="#ffcccc", label="Gene 1 with a name"), GraphicFeature(start=400, end=700, strand=-1, color="#cffccc", label="Gene 2"), GraphicFeature(start=600, end=900, strand=+1, color="#0000ff", label="Gene 3"), ] record = GraphicRecord(sequence_length=1000, features=features) record.default_box_color = None record.default_font_family = 'Walter Turncoat' with plt.xkcd(): plt.rcParams["font.family"] = 'Permanent Marker' # ruler font plt.rcParams["xtick.labelsize"] = 'small' ax, _ = record.plot(figure_width=5, annotate_inline=False) ax.figure.tight_layout() ax.figure.savefig("cartoon_style.png", dpi=200)
def Visualiser_sekvens(gen): import warnings from Bio import BiopythonParserWarning warnings.simplefilter('ignore', BiopythonParserWarning) if gen == 'alle': fil = 'Artemisia%20annua.gb' graphic_record = ChangeFeatures().translate_record(fil) ax, _ = graphic_record.plot(figure_width=20) ax.figure.tight_layout() elif gen == "aldh1": sequence = "CTGTGTCTAGATTTACGGTTTTGTTGAGTATGGAGTATTTATCCCTGTGTCTAGATTTACGGTTTGAAGACTCAGGAAACTCTCATTAAGCGATCAACGTAGCATGATCATCAAAAGCATGGTTTTGTAAACTCGACATGTCAATGTACCAGCCGATCCAAGTATCCAAGCAATTGGTTCACCACACCAAAAGAGTTTTACACTTAAAAACAACAATTAATTCTAAATAGTCTATGTAATGAAATATGTTTTGTGTGGGTTAGTTTAGTTCATAGTTGCGCCATAAGTATTTACAGCAA" record = GraphicRecord(sequence=sequence, features=[ GraphicFeature(start=0, end=28, strand=+1, color='#ffd700', label="Promotor"), GraphicFeature(start=29, end=299, strand=+1, color="#ffcccc", label="aldh1") ]) ax, _ = record.plot(figure_width=50) record.plot_sequence(ax) record.plot_translation(ax, (29, 299), fontdict={'weight': 'bold'}) elif gen == 'CYP71AV1': sequence = "ATTTTTGGGGGCCCCCCCCCATTTTTTGGGGGGCGCGCGATGAAGTTGGTCATTCGAAATATACTTCCAAAATATGAAGTTGGTCATTCGAAATATACTTCCAAACAACCGAGCTGGTCAGGTAGATTTTGTTTCAGATGAAGATGCAATCCACCGTTGGGGGAGTTTCATGAATAACAATCGCAAATAAGATATATTGTTGATTCTTGATGATGTTTGGTCTGATACCATCATCACCGACCTCCAATTCAGGTCACGTGGATACAAGATCCTCGTGACCTCTGAAACAACCTTTAAGAGATTCGATACATATAAAGTGAGACCTCTCAGTGTTCAAGATGCCATCAATCTGTTATGCTATTCAACACTTTCGGAGCGTGCAAGTCAAGCCACAAATGACATACAGACCTTGTTGACAAGGTGAAATTTCAAATTATTCCAAGATTCATGTTTCATACCTTTATAAGAAAGTAATATCTAAACCATATTAACAAATACTAACAATTAACTTTCAAATGTTTTTGTAGTTAACCAAATGTTGCAAGAAGAATCCGCTCGCCTTAAGTGTCATTGGTGGTCGCCTAAAGGGGACACAAATGGAAAGTTGGCATCATACACTGAAAAAGCTATCTCAAGCCACACACCCTCTTATCGACCTTCCTTTGGATGAGGCAAACAGATTTCATCTCGCAAGAGCTCTCGGTTTACTCAAAGATGATGAACGCAACAGCCCCAGAAGTTCAACCTCGAAATTGACCCGATCTTACCAAGTCA" record = GraphicRecord(sequence=sequence, features=[ GraphicFeature(start=1, end=38, strand=+1, color='#cffccc', label="Promotor"), GraphicFeature(start=39, end=774, strand=+1, color="#cff77d", label="CYP71AV1") ]) ax, _ = record.plot(figure_width=100) record.plot_sequence(ax) record.plot_translation(ax, (39, 774), fontdict={'weight': 'bold'}) return
def upload(request): posted = False sequences = [] ids = [] organisms = [] gene_sequence = '' locations = [] all_locations = [] matched_sequences = [] matched_organisms = [] database = [] matched_database = [] matched_ids = [] features = [] zipped = {} figure_name = '' sequence_nos = [] i = 0 if request.method == 'POST': posted = True uploaded_file = request.FILES['document'] position = int(request.POST.get('position')) #position = int(position)*3 with open('myapp/Jaspar.txt', "r") as file: for line in file: line = line.split(';') ids.append(line[0]) sequences.append(line[1]) organisms.append(line[2]) database.append(line[3].rstrip('\n')) for line in uploaded_file: line = line.decode('utf-8') gene_sequence += line gene_sequence = gene_sequence[0:position] for sequence in sequences: locations = [ m.start() for m in re.finditer(sequence, gene_sequence) ] if locations != []: matched_sequences.append(sequence) all_locations.append(locations) matched_organisms.append(organisms[i]) matched_database.append(database[i]) matched_ids.append(ids[i]) for location in locations: features.append( GraphicFeature(start=location, end=location + len(sequence), strand=+1, color="#ffd700", label=sequence)) i += 1 record = GraphicRecord(sequence_length=len(gene_sequence), features=features) ax, _ = record.plot(figure_width=30) figure_name = uploaded_file.name + str(position) + '.png' ax.figure.savefig('myapp/static/' + uploaded_file.name + str(position) + '.png', bbox_inches='tight') print(matched_sequences) print(matched_organisms) print(all_locations) print(matched_database) sequence_nos = list(range(len(matched_sequences))) zipped = tuple( zip(matched_ids, matched_sequences, matched_organisms, matched_database, all_locations, sequence_nos)) return render( request, 'upload.html', { 'posted': posted, 'zipped': zipped, 'figure_name': figure_name, 'gene_sequence': gene_sequence })
GraphicFeature(start=14920, end=14947, strand=+1, color="purple"), GraphicFeature(start=14956, end=14969, strand=+1, color="purple"), GraphicFeature(start=15305, end=15442, strand=+1, color="purple"), GraphicFeature(start=15804, end=15836, strand=+1, color="purple"), GraphicFeature(start=15903, end=16204, strand=+1, color="purple"), GraphicFeature(start=16222, end=16665, strand=+1, color="purple"), GraphicFeature(start=16666, end=16712, strand=+1, color="purple"), GraphicFeature(start=16823, end=17181, strand=+1, color="purple"), GraphicFeature(start=17192, end=17448, strand=+1, color="purple"), GraphicFeature(start=18106, end=18148, strand=+1, color="purple"), GraphicFeature(start=18851, end=19485, strand=+1, color="purple"), GraphicFeature(start=19501, end=19522, strand=+1, color="purple"), GraphicFeature(start=19535, end=19630, strand=+1, color="purple"), GraphicFeature(start=19906, end=20212, strand=+1, color="purple"), GraphicFeature(start=20265, end=20324, strand=+1, color="purple"), GraphicFeature(start=20347, end=20364, strand=+1, color="purple"), GraphicFeature(start=20526, end=20656, strand=+1, color="purple"), GraphicFeature(start=23548, end=23571, strand=+1, color="purple"), GraphicFeature(start=23588, end=23595, strand=+1, color="purple"), GraphicFeature(start=23624, end=23650, strand=+1, color="purple"), GraphicFeature(start=23671, end=23688, strand=+1, color="purple"), GraphicFeature(start=23707, end=23736, strand=+1, color="purple"), GraphicFeature(start=24257, end=24624, strand=+1, color="purple"), GraphicFeature(start=24633, end=24653, strand=+1, color="purple"), GraphicFeature(start=24676, end=25173, strand=+1, color="purple"), ] record = GraphicRecord(sequence_length=25525, features=features) ax, _ = record.plot(figure_width=20) ax.figure.savefig("images.png")