예제 #1
0
    def plot_gc_content(self, fontsize=16, ec="k", bins=100):
        """plot GC content histogram

        :params bins: a value for the number of bins or an array (with a copy()
            method)
        :param ec: add black contour on the bars

        .. plot::
            :include-source:

            from sequana import BAM, sequana_data
            b = BAM(sequana_data('test.bam'))
            b.plot_gc_content()

        """
        data = self.get_gc_content()
        try:
            X = np.linspace(0, 100, bins)
        except:
            X = bins.copy()

        pylab.hist(data, X, normed=True, ec=ec)
        pylab.grid(True)
        mu = pylab.mean(data)
        sigma = pylab.std(data)

        X = pylab.linspace(X.min(), X.max(), 100)
        pylab.plot(X, pylab.normpdf(X, mu, sigma), lw=2, color="r", ls="--")
        pylab.xlabel("GC content", fontsize=16)
def plot_contigs(res_best, ax, mode="score"):
	"""
	Plot contig aligned on genome with given y position
	mode : string : score or random : choose y position
	"""

	# create y array if needed (plot without score)
	if mode != "score":
		y = list(np.linspace(0,1,res_best.shape[0]))

	# save result of contigs
	list_contigs = []

	# plot contigs
	for i in range(res_best.shape[0]):
		res_to_plot = res_best.iloc[i,:]
		contig = res_to_plot["qName"]
		start = (int(res_to_plot["tStart"]) + int(res_to_plot["rotation"]))
		end = (int(res_to_plot["tEnd"]) + int(res_to_plot["rotation"]))
		# check if start and end need to be rotated
		if int(abs(start/len_genome)) == 1:
			start = start % len_genome
			end = end % len_genome
		# save contigs
		list_contigs.append([start,end])
		# choose y postion
		if mode == "score":
			score = float(res_to_plot["score_norm"]/float(-5))
			ax.plot([start, end],[score]*2, ls='-', lw=5, color=colors[i], solid_capstyle="butt" )
		else:
			ax.plot([start, end],[y[i]]*2, ls='-', lw=5, color=colors[i], solid_capstyle="butt" )

	return list_contigs
예제 #3
0
파일: bamtools.py 프로젝트: sequana/sequana
    def plot_gc_content(self, fontsize=16, ec="k", bins=100):
        """plot GC content histogram

        :params bins: a value for the number of bins or an array (with a copy()
            method)
        :param ec: add black contour on the bars

        .. plot::
            :include-source:

            from sequana import BAM, sequana_data
            b = BAM(sequana_data('test.bam'))
            b.plot_gc_content()

        """
        data = self.get_gc_content()
        try:
            X = np.linspace(0, 100, bins)
        except:
            X = bins.copy()

        pylab.hist(data, X, density=True, ec=ec)
        pylab.grid(True)
        mu = pylab.mean(data)
        sigma = pylab.std(data)

        X = pylab.linspace(X.min(), X.max(), 100)

        from sequana.misc import normpdf

        pylab.plot(X, normpdf(X, mu, sigma), lw=2, color="r", ls="--")
        pylab.xlabel("GC content", fontsize=16)
예제 #4
0
 def _create_template_fft(self, M=1000):
     M_3 = int(M / 3)
     W = [-0.5] * M_3 + list(np.linspace(-0.5, 0.5,
                                         M - 2 * M_3)) + [0.5] * M_3
     return list(W * np.hanning(M))
예제 #5
0
    ]

    return genome_not_covered


################################ PLOT ##############################################################################################
if save_not_covered:
    df_not_covered_all = pd.DataFrame(columns=["start", "end", "reference"])

if do_plot:

    ##### Plots for all references

    cmap = pylab.cm.get_cmap(colormap)
    # shuffle colors :  in case 2 adjacent contigs have the same color, user can plot again to see better
    shuffle_col = list(np.linspace(0, 1, res_best.shape[0]))
    shuffle(shuffle_col)
    colors = [cmap(i) for i in shuffle_col]

    pylab.plot(res_best["qLength"], res_best["score_norm"], "bo", alpha=0.5)
    pylab.xlabel("Length of contig")
    pylab.ylabel("Score blasr (normalised by length)")
    pylab.title(title_plot)
    if save_plot:
        pylab.savefig(file_plot.replace(".png", "_scores.png"))
    else:
        pylab.show()

    ##### Plot by reference

    ref_found = list(res_best["reference"].unique())
예제 #6
0
    def plot(self,
             fig=None,
             grid=True,
             rotation=30,
             lower=None,
             upper=None,
             shrink=0.9,
             facecolor='white',
             colorbar=True,
             label_color='black',
             fontsize='small',
             edgecolor='black',
             method='ellipse',
             order_method='complete',
             order_metric='euclidean',
             cmap=None,
             ax=None,
             binarise_color=False):
        """plot the correlation matrix from the content of :attr:`df`
        (dataframe)

        By default, the correlation is shown on the upper and lower triangle and is
        symmetric wrt to the diagonal. The symbols are ellipses. The symbols can
        be changed to e.g. rectangle. The symbols are shown on upper and lower sides but
        you could choose a symbol for the upper side and another for the lower side using
        the **lower** and **upper** parameters.

        :param fig: Create a new figure by default. If an instance of an existing
            figure is provided, the corrplot is overlayed on the figure provided.
            Can also be the number of the figure.
        :param grid: add grid (Defaults to grey color). You can set it to False or a color.
        :param rotation: rotate labels on y-axis
        :param lower: if set to a valid method, plots the data on the lower
            left triangle
        :param upper: if set to a valid method, plots the data on the upper
            left triangle
        :param float shrink: maximum space used (in percent) by a symbol.
            If negative values are provided, the absolute value is taken.
            If greater than 1, the symbols wiill overlap.
        :param facecolor: color of the background (defaults to white).
        :param colorbar: add the colorbar (defaults to True).
        :param str label_color: (defaults to black).
        :param fontsize: size of the fonts defaults to 'small'.
        :param method: shape to be used in 'ellipse', 'square', 'rectangle',
            'color', 'text', 'circle',  'number', 'pie'.

        :param order_method: see :meth:`order`.
        :param order_metric: see : meth:`order`.
        :param cmap: a valid cmap from matplotlib or colormap package (e.g.,
            'jet', or 'copper'). Default is red/white/blue colors.
        :param ax: a matplotlib axes.

        The colorbar can be tuned with the parameters stored in :attr:`params`.

        Here is an example. See notebook for other examples::

            c = corrplot.Corrplot(dataframe)
            c.plot(cmap=('Orange', 'white', 'green'))
            c.plot(method='circle')
            c.plot(colorbar=False, shrink=.8, upper='circle'  )

        """
        # default
        if cmap != None:
            try:
                if isinstance(cmap, str):
                    self.cm = cmap_builder(cmap)
                else:
                    self.cm = cmap_builder(*cmap)
            except:
                logger.warning("incorrect cmap. Use default one")
                self._set_default_cmap()
        else:
            self._set_default_cmap()

        self.shrink = abs(shrink)
        self.fontsize = fontsize
        self.edgecolor = edgecolor

        df = self.order(method=order_method, metric=order_metric)

        # figure can be a number or an instance; otherwise creates it
        if isinstance(fig, int):
            fig = plt.figure(num=fig, facecolor=facecolor)
        elif fig is not None:
            fig = plt.figure(num=fig.number, facecolor=facecolor)
        else:
            fig = plt.figure(num=None, facecolor=facecolor)

        # do we have an axes to plot the data in ?
        if ax is None:
            ax = plt.subplot(1, 1, 1, aspect='equal', facecolor=facecolor)
        else:
            # if so, clear the axes. Colorbar cannot be removed easily.
            plt.sca(ax)
            ax.clear()

        # subplot resets the bg color, let us set it again
        fig.set_facecolor(facecolor)

        width, height = df.shape
        labels = (df.columns)

        # add all patches to the figure
        # TODO check value of lower and upper

        if upper is None and lower is None:
            mode = 'method'
            diagonal = True
        elif upper and lower:
            mode = 'both'
            diagonal = False
        elif lower is not None:
            mode = 'lower'
            diagonal = True
        elif upper is not None:
            mode = 'upper'
            diagonal = True

        self.binarise_color = binarise_color
        if mode == 'upper':
            self._add_patches(df, upper, 'upper', ax, diagonal=True)
        elif mode == 'lower':
            self._add_patches(df, lower, 'lower', ax, diagonal=True)
        elif mode == 'method':
            self._add_patches(df, method, 'both', ax, diagonal=True)
        elif mode == 'both':
            self._add_patches(df, upper, 'upper', ax, diagonal=False)
            self._add_patches(df, lower, 'lower', ax, diagonal=False)

        # set xticks/xlabels on top
        ax.xaxis.tick_top()
        xtickslocs = np.arange(len(labels))
        ax.set_xticks(xtickslocs)
        ax.set_xticklabels(labels,
                           rotation=rotation,
                           color=label_color,
                           fontsize=fontsize,
                           ha='left')

        ytickslocs = np.arange(len(labels))
        ax.set_yticks(ytickslocs)
        ax.set_yticklabels(labels, fontsize=fontsize, color=label_color)
        plt.tight_layout()

        # shift the limits to englobe the patches correctly
        # This should be here afer set_xticks
        ax.set_xlim(-0.5, width - .5)
        ax.set_ylim(-0.5, height - .5)
        ax.invert_yaxis()

        if grid is not False:
            if grid is True:
                grid = 'grey'
            for i in range(0, width):
                ratio1 = float(i) / width
                ratio2 = float(i + 2) / width
                # TODO 1- set axis off
                # 2 - set xlabels along the diagonal
                # set colorbar either on left or bottom
                if mode == 'lower':
                    plt.axvline(i + .5, ymin=1 - ratio1, ymax=0., color=grid)
                    plt.axhline(i + .5, xmin=0, xmax=ratio2, color=grid)
                if mode == 'upper':
                    plt.axvline(i + .5, ymin=1 - ratio2, ymax=1, color=grid)
                    plt.axhline(i + .5, xmin=ratio1, xmax=1, color=grid)
                if mode in ['method', 'both']:
                    plt.axvline(i + .5, color=grid)
                    plt.axhline(i + .5, color=grid)

            # can probably be simplified
            if mode == 'lower':
                plt.axvline(-.5, ymin=0, ymax=1, color='grey')
                plt.axvline(width - .5,
                            ymin=0,
                            ymax=1. / width,
                            color='grey',
                            lw=2)
                plt.axhline(width - .5, xmin=0, xmax=1, color='grey', lw=2)
                plt.axhline(-.5, xmin=0, xmax=1. / width, color='grey', lw=2)
                plt.xticks([])
                for i in range(0, width):
                    plt.text(i,
                             i - .6,
                             labels[i],
                             fontsize=fontsize,
                             color=label_color,
                             rotation=rotation,
                             verticalalignment='bottom')
                    plt.text(-.6,
                             i,
                             labels[i],
                             fontsize=fontsize,
                             color=label_color,
                             rotation=0,
                             horizontalalignment='right')
                plt.axis('off')
            # can probably be simplified
            elif mode == 'upper':
                plt.axvline(width - .5, ymin=0, ymax=1, color='grey', lw=2)
                plt.axvline(-.5,
                            ymin=1 - 1. / width,
                            ymax=1,
                            color='grey',
                            lw=2)
                plt.axhline(-.5, xmin=0, xmax=1, color='grey', lw=2)
                plt.axhline(width - .5,
                            xmin=1 - 1. / width,
                            xmax=1,
                            color='grey',
                            lw=2)
                plt.yticks([])
                for i in range(0, width):
                    plt.text(-.6 + i,
                             i,
                             labels[i],
                             fontsize=fontsize,
                             color=label_color,
                             horizontalalignment='right',
                             rotation=0)
                    plt.text(i,
                             -.5,
                             labels[i],
                             fontsize=fontsize,
                             color=label_color,
                             rotation=rotation,
                             verticalalignment='bottom')
                plt.axis('off')

        # set all ticks length to zero
        ax = plt.gca()
        ax.tick_params(axis='both', which='both', length=0)

        if colorbar:
            N = self.params['colorbar.N'] + 1
            assert N >= 2
            # make sure the colorbar limits remains between the min (0) and the
            # max (1) (remember colormap are normalised) so that if data is
            # between -0.5 and let us say +1, the colors do not start at -0.5
            # but -1 indeed.
            self.collection.set_clim(0, 1)
            cb = plt.gcf().colorbar(
                self.collection,
                orientation=self.params['colorbar.orientation'],
                shrink=self.params['colorbar.shrink'],
                boundaries=np.linspace(0, 1, N),
                ticks=[0, .25, 0.5, 0.75, 1])
            cb.ax.set_yticklabels([-1, -.5, 0, .5, 1])
            return cb
예제 #7
0
summary_variants.columns = header_df

print("Save summary")
summary_variants.to_csv(filename_summary + ".csv")

################################ PLOTS ##############################################################################################

print("Create plots")
#colors = ['m','r','y','g','b','c','k']

cmap = pylab.cm.get_cmap(colormap)
#colors = [cmap(i) for i in np.linspace(0,1,len(list_analysis))]

# positions of genome
gen_pos = [[i, i + step - 1] for i in range(0, len_genome, step)]
y_pos = list(np.linspace(0, 1, len(analysis_names) + 2))

if custom_colors:
    y_col = [colors[i] for i in range(len(analysis_names))]
else:
    y_col = [cmap(i) for i in np.linspace(0, 1, len(analysis_names))]

pylab.close('all')

# create figure
fig, axarr = pylab.subplots(len(gen_pos),
                            1,
                            figsize=(int(step / 20000),
                                     int(len(gen_pos)) * 1.1))
for i in range(len(gen_pos)):
    subplot_variant_position(df_result, i, gen_pos, axarr, analysis_names,
예제 #8
0
################################ SAVE ##############################################################################################

# save clean result
if save_result:
    res_blasr.to_csv(file_blasr + "_scores.csv", index=False)
    df_variants.to_csv(file_blasr + "_variants.csv", index=False)

################################ PLOTS ##############################################################################################

if do_plots:
    # title
    title_plot = file_blasr.split("/")[-1]
    # colors
    cmap = pylab.cm.get_cmap(colormap)
    # shuffel colors :  in case 2 adjacent contigs have the same color, user can plot again to see better
    shuffle_col = list(np.linspace(0, 1, res_blasr.shape[0]))
    shuffle(shuffle_col)
    colors = [cmap(i) for i in shuffle_col]

    # plot score nor normalised
    pylab.plot(res_blasr["qLength"], res_blasr["score"], "bo", alpha=0.5)
    pylab.xlabel("Length of contig")
    pylab.ylabel("Score blasr (not normalised)")
    pylab.title(title_plot)
    pylab.show()

    # plot score normalised by lenght
    pylab.plot(res_blasr["qLength"], res_blasr["score_norm"], "bo", alpha=0.5)
    pylab.xlabel("Length of contig")
    pylab.ylabel("Score blasr (normalised by length)")
    pylab.title(title_plot)
예제 #9
0
    """
    # read genome reference
    ref_fasta = SeqIO.read(filename_ref, "fasta")
    sequence_reference = str(ref_fasta.seq).upper()
    len_genome = len(sequence_reference)
    return sequence_reference, len_genome



################################ EXECUTE ################################################################################################


# create N rotations of reference genome
sequence_reference, len_genome = read_genome(filename_ref)
seq_duplicated = sequence_reference+sequence_reference

start_point = np.linspace(0,len_genome,N+1)
start_point = [int(round(i)) for i in start_point[0:(len(start_point)-1)]]

leak = start_point[1] # number of bases to keep at the end of the rotation : duplicate of the begining
#print(leak)

for start in start_point:
    seq_rotated = seq_duplicated[start:(start+len_genome+leak)]
    record_rotated = SeqRecord(Seq(seq_rotated,generic_dna), id = "%s genome rotated start=%s" % (filename_output.split("/")[-1],str(start)))
    SeqIO.write(record_rotated, "%s_rotate_%s.fasta" % (filename_output,str(start)), "fasta")




from sequana.lazy import pandas as pd
from sequana.lazy import numpy as np
from sklearn.metrics import precision_recall_curve
import sys

################################ PARAMETERS ##############################################################################################

file_result = str(sys.argv[1])
file_variants = str(sys.argv[2])
len_genome = int(sys.argv[3])
file_fig = str(sys.argv[4])

colormap = 'nipy_spectral_r'
if len(sys.argv) > 5:
    cmap = pylab.cm.get_cmap(colormap)
    colors = [cmap(i) for i in np.linspace(0, 1, 7)]
    custom_colormap = [colors[0]]
    for i in range(1, 7):
        custom_colormap.extend([colors[i]] * 3)
    custom_colormap.extend([colors[-1]])

################################ FUNCTIONS ################################################################################################


def compute_table_performance(analysis, df_results):
    """
	return [TP, FP, FN, TN]
	TP and FP are lists of scores
	"""
    TP = []
    FP = []
예제 #11
0
파일: sequence.py 프로젝트: sequana/sequana
 def _create_template_fft(self, M=1000):
     M_3 =  int(M / 3)
     W = [-0.5] * M_3 + list(np.linspace(-0.5, 0.5, M - 2*M_3)) + [0.5] * M_3
     return list(W * np.hanning(M))