def plot_gc_content(self, fontsize=16, ec="k", bins=100): """plot GC content histogram :params bins: a value for the number of bins or an array (with a copy() method) :param ec: add black contour on the bars .. plot:: :include-source: from sequana import BAM, sequana_data b = BAM(sequana_data('test.bam')) b.plot_gc_content() """ data = self.get_gc_content() try: X = np.linspace(0, 100, bins) except: X = bins.copy() pylab.hist(data, X, normed=True, ec=ec) pylab.grid(True) mu = pylab.mean(data) sigma = pylab.std(data) X = pylab.linspace(X.min(), X.max(), 100) pylab.plot(X, pylab.normpdf(X, mu, sigma), lw=2, color="r", ls="--") pylab.xlabel("GC content", fontsize=16)
def plot_contigs(res_best, ax, mode="score"): """ Plot contig aligned on genome with given y position mode : string : score or random : choose y position """ # create y array if needed (plot without score) if mode != "score": y = list(np.linspace(0,1,res_best.shape[0])) # save result of contigs list_contigs = [] # plot contigs for i in range(res_best.shape[0]): res_to_plot = res_best.iloc[i,:] contig = res_to_plot["qName"] start = (int(res_to_plot["tStart"]) + int(res_to_plot["rotation"])) end = (int(res_to_plot["tEnd"]) + int(res_to_plot["rotation"])) # check if start and end need to be rotated if int(abs(start/len_genome)) == 1: start = start % len_genome end = end % len_genome # save contigs list_contigs.append([start,end]) # choose y postion if mode == "score": score = float(res_to_plot["score_norm"]/float(-5)) ax.plot([start, end],[score]*2, ls='-', lw=5, color=colors[i], solid_capstyle="butt" ) else: ax.plot([start, end],[y[i]]*2, ls='-', lw=5, color=colors[i], solid_capstyle="butt" ) return list_contigs
def plot_gc_content(self, fontsize=16, ec="k", bins=100): """plot GC content histogram :params bins: a value for the number of bins or an array (with a copy() method) :param ec: add black contour on the bars .. plot:: :include-source: from sequana import BAM, sequana_data b = BAM(sequana_data('test.bam')) b.plot_gc_content() """ data = self.get_gc_content() try: X = np.linspace(0, 100, bins) except: X = bins.copy() pylab.hist(data, X, density=True, ec=ec) pylab.grid(True) mu = pylab.mean(data) sigma = pylab.std(data) X = pylab.linspace(X.min(), X.max(), 100) from sequana.misc import normpdf pylab.plot(X, normpdf(X, mu, sigma), lw=2, color="r", ls="--") pylab.xlabel("GC content", fontsize=16)
def _create_template_fft(self, M=1000): M_3 = int(M / 3) W = [-0.5] * M_3 + list(np.linspace(-0.5, 0.5, M - 2 * M_3)) + [0.5] * M_3 return list(W * np.hanning(M))
] return genome_not_covered ################################ PLOT ############################################################################################## if save_not_covered: df_not_covered_all = pd.DataFrame(columns=["start", "end", "reference"]) if do_plot: ##### Plots for all references cmap = pylab.cm.get_cmap(colormap) # shuffle colors : in case 2 adjacent contigs have the same color, user can plot again to see better shuffle_col = list(np.linspace(0, 1, res_best.shape[0])) shuffle(shuffle_col) colors = [cmap(i) for i in shuffle_col] pylab.plot(res_best["qLength"], res_best["score_norm"], "bo", alpha=0.5) pylab.xlabel("Length of contig") pylab.ylabel("Score blasr (normalised by length)") pylab.title(title_plot) if save_plot: pylab.savefig(file_plot.replace(".png", "_scores.png")) else: pylab.show() ##### Plot by reference ref_found = list(res_best["reference"].unique())
def plot(self, fig=None, grid=True, rotation=30, lower=None, upper=None, shrink=0.9, facecolor='white', colorbar=True, label_color='black', fontsize='small', edgecolor='black', method='ellipse', order_method='complete', order_metric='euclidean', cmap=None, ax=None, binarise_color=False): """plot the correlation matrix from the content of :attr:`df` (dataframe) By default, the correlation is shown on the upper and lower triangle and is symmetric wrt to the diagonal. The symbols are ellipses. The symbols can be changed to e.g. rectangle. The symbols are shown on upper and lower sides but you could choose a symbol for the upper side and another for the lower side using the **lower** and **upper** parameters. :param fig: Create a new figure by default. If an instance of an existing figure is provided, the corrplot is overlayed on the figure provided. Can also be the number of the figure. :param grid: add grid (Defaults to grey color). You can set it to False or a color. :param rotation: rotate labels on y-axis :param lower: if set to a valid method, plots the data on the lower left triangle :param upper: if set to a valid method, plots the data on the upper left triangle :param float shrink: maximum space used (in percent) by a symbol. If negative values are provided, the absolute value is taken. If greater than 1, the symbols wiill overlap. :param facecolor: color of the background (defaults to white). :param colorbar: add the colorbar (defaults to True). :param str label_color: (defaults to black). :param fontsize: size of the fonts defaults to 'small'. :param method: shape to be used in 'ellipse', 'square', 'rectangle', 'color', 'text', 'circle', 'number', 'pie'. :param order_method: see :meth:`order`. :param order_metric: see : meth:`order`. :param cmap: a valid cmap from matplotlib or colormap package (e.g., 'jet', or 'copper'). Default is red/white/blue colors. :param ax: a matplotlib axes. The colorbar can be tuned with the parameters stored in :attr:`params`. Here is an example. See notebook for other examples:: c = corrplot.Corrplot(dataframe) c.plot(cmap=('Orange', 'white', 'green')) c.plot(method='circle') c.plot(colorbar=False, shrink=.8, upper='circle' ) """ # default if cmap != None: try: if isinstance(cmap, str): self.cm = cmap_builder(cmap) else: self.cm = cmap_builder(*cmap) except: logger.warning("incorrect cmap. Use default one") self._set_default_cmap() else: self._set_default_cmap() self.shrink = abs(shrink) self.fontsize = fontsize self.edgecolor = edgecolor df = self.order(method=order_method, metric=order_metric) # figure can be a number or an instance; otherwise creates it if isinstance(fig, int): fig = plt.figure(num=fig, facecolor=facecolor) elif fig is not None: fig = plt.figure(num=fig.number, facecolor=facecolor) else: fig = plt.figure(num=None, facecolor=facecolor) # do we have an axes to plot the data in ? if ax is None: ax = plt.subplot(1, 1, 1, aspect='equal', facecolor=facecolor) else: # if so, clear the axes. Colorbar cannot be removed easily. plt.sca(ax) ax.clear() # subplot resets the bg color, let us set it again fig.set_facecolor(facecolor) width, height = df.shape labels = (df.columns) # add all patches to the figure # TODO check value of lower and upper if upper is None and lower is None: mode = 'method' diagonal = True elif upper and lower: mode = 'both' diagonal = False elif lower is not None: mode = 'lower' diagonal = True elif upper is not None: mode = 'upper' diagonal = True self.binarise_color = binarise_color if mode == 'upper': self._add_patches(df, upper, 'upper', ax, diagonal=True) elif mode == 'lower': self._add_patches(df, lower, 'lower', ax, diagonal=True) elif mode == 'method': self._add_patches(df, method, 'both', ax, diagonal=True) elif mode == 'both': self._add_patches(df, upper, 'upper', ax, diagonal=False) self._add_patches(df, lower, 'lower', ax, diagonal=False) # set xticks/xlabels on top ax.xaxis.tick_top() xtickslocs = np.arange(len(labels)) ax.set_xticks(xtickslocs) ax.set_xticklabels(labels, rotation=rotation, color=label_color, fontsize=fontsize, ha='left') ytickslocs = np.arange(len(labels)) ax.set_yticks(ytickslocs) ax.set_yticklabels(labels, fontsize=fontsize, color=label_color) plt.tight_layout() # shift the limits to englobe the patches correctly # This should be here afer set_xticks ax.set_xlim(-0.5, width - .5) ax.set_ylim(-0.5, height - .5) ax.invert_yaxis() if grid is not False: if grid is True: grid = 'grey' for i in range(0, width): ratio1 = float(i) / width ratio2 = float(i + 2) / width # TODO 1- set axis off # 2 - set xlabels along the diagonal # set colorbar either on left or bottom if mode == 'lower': plt.axvline(i + .5, ymin=1 - ratio1, ymax=0., color=grid) plt.axhline(i + .5, xmin=0, xmax=ratio2, color=grid) if mode == 'upper': plt.axvline(i + .5, ymin=1 - ratio2, ymax=1, color=grid) plt.axhline(i + .5, xmin=ratio1, xmax=1, color=grid) if mode in ['method', 'both']: plt.axvline(i + .5, color=grid) plt.axhline(i + .5, color=grid) # can probably be simplified if mode == 'lower': plt.axvline(-.5, ymin=0, ymax=1, color='grey') plt.axvline(width - .5, ymin=0, ymax=1. / width, color='grey', lw=2) plt.axhline(width - .5, xmin=0, xmax=1, color='grey', lw=2) plt.axhline(-.5, xmin=0, xmax=1. / width, color='grey', lw=2) plt.xticks([]) for i in range(0, width): plt.text(i, i - .6, labels[i], fontsize=fontsize, color=label_color, rotation=rotation, verticalalignment='bottom') plt.text(-.6, i, labels[i], fontsize=fontsize, color=label_color, rotation=0, horizontalalignment='right') plt.axis('off') # can probably be simplified elif mode == 'upper': plt.axvline(width - .5, ymin=0, ymax=1, color='grey', lw=2) plt.axvline(-.5, ymin=1 - 1. / width, ymax=1, color='grey', lw=2) plt.axhline(-.5, xmin=0, xmax=1, color='grey', lw=2) plt.axhline(width - .5, xmin=1 - 1. / width, xmax=1, color='grey', lw=2) plt.yticks([]) for i in range(0, width): plt.text(-.6 + i, i, labels[i], fontsize=fontsize, color=label_color, horizontalalignment='right', rotation=0) plt.text(i, -.5, labels[i], fontsize=fontsize, color=label_color, rotation=rotation, verticalalignment='bottom') plt.axis('off') # set all ticks length to zero ax = plt.gca() ax.tick_params(axis='both', which='both', length=0) if colorbar: N = self.params['colorbar.N'] + 1 assert N >= 2 # make sure the colorbar limits remains between the min (0) and the # max (1) (remember colormap are normalised) so that if data is # between -0.5 and let us say +1, the colors do not start at -0.5 # but -1 indeed. self.collection.set_clim(0, 1) cb = plt.gcf().colorbar( self.collection, orientation=self.params['colorbar.orientation'], shrink=self.params['colorbar.shrink'], boundaries=np.linspace(0, 1, N), ticks=[0, .25, 0.5, 0.75, 1]) cb.ax.set_yticklabels([-1, -.5, 0, .5, 1]) return cb
summary_variants.columns = header_df print("Save summary") summary_variants.to_csv(filename_summary + ".csv") ################################ PLOTS ############################################################################################## print("Create plots") #colors = ['m','r','y','g','b','c','k'] cmap = pylab.cm.get_cmap(colormap) #colors = [cmap(i) for i in np.linspace(0,1,len(list_analysis))] # positions of genome gen_pos = [[i, i + step - 1] for i in range(0, len_genome, step)] y_pos = list(np.linspace(0, 1, len(analysis_names) + 2)) if custom_colors: y_col = [colors[i] for i in range(len(analysis_names))] else: y_col = [cmap(i) for i in np.linspace(0, 1, len(analysis_names))] pylab.close('all') # create figure fig, axarr = pylab.subplots(len(gen_pos), 1, figsize=(int(step / 20000), int(len(gen_pos)) * 1.1)) for i in range(len(gen_pos)): subplot_variant_position(df_result, i, gen_pos, axarr, analysis_names,
################################ SAVE ############################################################################################## # save clean result if save_result: res_blasr.to_csv(file_blasr + "_scores.csv", index=False) df_variants.to_csv(file_blasr + "_variants.csv", index=False) ################################ PLOTS ############################################################################################## if do_plots: # title title_plot = file_blasr.split("/")[-1] # colors cmap = pylab.cm.get_cmap(colormap) # shuffel colors : in case 2 adjacent contigs have the same color, user can plot again to see better shuffle_col = list(np.linspace(0, 1, res_blasr.shape[0])) shuffle(shuffle_col) colors = [cmap(i) for i in shuffle_col] # plot score nor normalised pylab.plot(res_blasr["qLength"], res_blasr["score"], "bo", alpha=0.5) pylab.xlabel("Length of contig") pylab.ylabel("Score blasr (not normalised)") pylab.title(title_plot) pylab.show() # plot score normalised by lenght pylab.plot(res_blasr["qLength"], res_blasr["score_norm"], "bo", alpha=0.5) pylab.xlabel("Length of contig") pylab.ylabel("Score blasr (normalised by length)") pylab.title(title_plot)
""" # read genome reference ref_fasta = SeqIO.read(filename_ref, "fasta") sequence_reference = str(ref_fasta.seq).upper() len_genome = len(sequence_reference) return sequence_reference, len_genome ################################ EXECUTE ################################################################################################ # create N rotations of reference genome sequence_reference, len_genome = read_genome(filename_ref) seq_duplicated = sequence_reference+sequence_reference start_point = np.linspace(0,len_genome,N+1) start_point = [int(round(i)) for i in start_point[0:(len(start_point)-1)]] leak = start_point[1] # number of bases to keep at the end of the rotation : duplicate of the begining #print(leak) for start in start_point: seq_rotated = seq_duplicated[start:(start+len_genome+leak)] record_rotated = SeqRecord(Seq(seq_rotated,generic_dna), id = "%s genome rotated start=%s" % (filename_output.split("/")[-1],str(start))) SeqIO.write(record_rotated, "%s_rotate_%s.fasta" % (filename_output,str(start)), "fasta")
from sequana.lazy import pandas as pd from sequana.lazy import numpy as np from sklearn.metrics import precision_recall_curve import sys ################################ PARAMETERS ############################################################################################## file_result = str(sys.argv[1]) file_variants = str(sys.argv[2]) len_genome = int(sys.argv[3]) file_fig = str(sys.argv[4]) colormap = 'nipy_spectral_r' if len(sys.argv) > 5: cmap = pylab.cm.get_cmap(colormap) colors = [cmap(i) for i in np.linspace(0, 1, 7)] custom_colormap = [colors[0]] for i in range(1, 7): custom_colormap.extend([colors[i]] * 3) custom_colormap.extend([colors[-1]]) ################################ FUNCTIONS ################################################################################################ def compute_table_performance(analysis, df_results): """ return [TP, FP, FN, TN] TP and FP are lists of scores """ TP = [] FP = []
def _create_template_fft(self, M=1000): M_3 = int(M / 3) W = [-0.5] * M_3 + list(np.linspace(-0.5, 0.5, M - 2*M_3)) + [0.5] * M_3 return list(W * np.hanning(M))