Example #1
0
def plot_violin():
    print "Plotting violin..."
    plt.ioff()
    fig = plt.figure()

    sns.set(style="ticks")
    d = [[1, 2, 3, 4, 2, 3, 2, 3], [3, 4, 2, 3, 5, 6, 7, 7, 7]]

    rs = np.random.RandomState(0)

    n, p = 40, 8
    d = rs.normal(0, 1, (n, p))
    d += np.log(np.arange(1, p + 1)) * -5 + 10

    f, ax = plt.subplots()
    sns.offset_spines()
    sns.violinplot(d)
    sns.despine(trim=True)
Example #2
0
def plot_violin():
  print "Plotting violin..."
  plt.ioff()
  fig=plt.figure()

  sns.set(style="ticks")
  d = [[1,2,3,4,2,3,2,3], [3,4,2,3,5,6,7,7,7]]


  rs = np.random.RandomState(0)

  n, p = 40, 8
  d = rs.normal(0, 1, (n, p))
  d += np.log(np.arange(1, p + 1)) * -5 + 10


  f, ax = plt.subplots()
  sns.offset_spines()
  sns.violinplot(d)
  sns.despine(trim=True)
Example #3
0
def graph_mult_data(file_list,xcolumn,ycolumn,plot_labels,x_label,y_label,title,file_title,colors=None,cfgs=None,seaborn=False):
    if seaborn:
        seaborn = test_seaborn()
        import seaborn as sns
    logger.info("unpacking data")
    if colors==None:
        datalist = [[numpy.loadtxt(filename),label] for filename,label in zip(file_list, plot_labels)]
    else:
        datalist = [[numpy.loadtxt(filename),label,color] for filename,label,color in zip(file_list, plot_labels,colors)]
    if seaborn:
        sns.set_style("ticks", rc={'font.family': 'Helvetica'})
        sns.set_context("paper")
    logger.info("plotting")
    fig = plt.figure()
    axis = fig.add_subplot(111)
    if seaborn:
        sns.offset_spines(fig=fig)
    if colors==None:
        for data,label in datalist:
            axis.plot( data[:,xcolumn],data[:,ycolumn],label=label)
    else:
        for data,label,color in datalist:
            axis.plot( data[:,xcolumn],data[:,ycolumn],label=label,color=color)
    plot_markups(fig,axis,title,x_label,y_label,file_title,cfgs=cfgs,seaborn=seaborn)
Example #4
0
  def make_violin(self):

    """
    Violin plots are made for the outliers over redshift. Each violin is a box plot, i.e.,
    it depicts the probability density of the outliers for a given bin in redshift.
    """

    from matplotlib.mlab import griddata
    import matplotlib.pyplot as plt
    import seaborn as sns

    self.logger.info("Generating violin plot...")
    ind = range(len(self.outliers))
    rows = list(set(np.random.choice(ind,10000)))
    self.logger.info("Using a smaller size for space ({0} objects)".format(self.reduce_size))

    outliers = self.outliers[rows]
    measured = self.measured[rows]
    predicted = self.predicted[rows]

    plt.figure()
    
    bins = np.arange(0,self.measured.max()+0.1,0.1)
    text_bins = ["{0}".format(i) for i in bins]

    digitized = np.digitize(measured, bins)

    outliers2 = (predicted - measured)/(measured+1)

    violins = [outliers2[digitized == i] for i in range(1, len(bins))]
    dbin = (bins[1]-bins[0])/2.
    bins += dbin

    final_violin, final_names = [], []

    for i in range(len(violins)):

      if len(violins[i]) > 1:
        final_violin.append(violins[i])
        final_names.append(bins[i])

    pal = sns.blend_palette([self.color_palette, "lightblue"], 4)

    sns.offset_spines()
    ax = sns.violinplot(final_violin, names=final_names, color=pal)
    sns.despine(trim=True)

    ax.set_ylabel(r"$(z_{\rm phot}-z_{\rm spec})/(1+z_{\rm spec})$", fontsize=self.fontsize)
    ax.set_xlabel(r"$z_{\rm spec}$", fontsize=self.fontsize)
    ax.set_ylim([-0.5,0.5])

    xtix = [i.get_text() for i in ax.get_xticklabels()]
    new_xtix = [xtix[i] if (i % 2 == 0) else "" for i in range(len(xtix))]
    ax.set_xticklabels(new_xtix)

    for item in ([ax.xaxis.label, ax.yaxis.label]):
            item.set_fontsize(self.fontsize)

    for item in (ax.get_xticklabels() + ax.get_yticklabels()):
            item.set_fontsize(self.fontsize-10)

    ax.set_position([.15,.17,.75,.75])

    self.kde_ax = ax
    plt.savefig("PHOTZ_VIOLIN_{0}.pdf".format(self.family_name), format="pdf")
Example #5
0
    def make_violin(self):
        """
    Violin plots are made for the outliers over redshift. Each violin is a box plot, i.e.,
    it depicts the probability density of the outliers for a given bin in redshift.
    """

        from matplotlib.mlab import griddata
        import matplotlib.pyplot as plt
        import seaborn as sns

        self.logger.info("Generating violin plot...")
        ind = range(len(self.outliers))
        rows = list(set(np.random.choice(ind, 10000)))
        self.logger.info("Using a smaller size for space ({0} objects)".format(
            self.reduce_size))

        outliers = self.outliers[rows]
        measured = self.measured[rows]
        predicted = self.predicted[rows]

        plt.figure()

        bins = np.arange(0, self.measured.max() + 0.1, 0.1)
        text_bins = ["{0}".format(i) for i in bins]

        digitized = np.digitize(measured, bins)

        outliers2 = (predicted - measured) / (measured + 1)

        violins = [outliers2[digitized == i] for i in range(1, len(bins))]
        dbin = (bins[1] - bins[0]) / 2.
        bins += dbin

        final_violin, final_names = [], []

        for i in range(len(violins)):

            if len(violins[i]) > 1:
                final_violin.append(violins[i])
                final_names.append(bins[i])

        pal = sns.blend_palette([self.color_palette, "lightblue"], 4)

        sns.offset_spines()
        ax = sns.violinplot(final_violin, names=final_names, color=pal)
        sns.despine(trim=True)

        ax.set_ylabel(r"$(z_{\rm phot}-z_{\rm spec})/(1+z_{\rm spec})$",
                      fontsize=self.fontsize)
        ax.set_xlabel(r"$z_{\rm spec}$", fontsize=self.fontsize)
        ax.set_ylim([-0.5, 0.5])

        xtix = [i.get_text() for i in ax.get_xticklabels()]
        new_xtix = [xtix[i] if (i % 2 == 0) else "" for i in range(len(xtix))]
        ax.set_xticklabels(new_xtix)

        for item in ([ax.xaxis.label, ax.yaxis.label]):
            item.set_fontsize(self.fontsize)

        for item in (ax.get_xticklabels() + ax.get_yticklabels()):
            item.set_fontsize(self.fontsize - 10)

        ax.set_position([.15, .17, .75, .75])

        self.kde_ax = ax
        plt.savefig("PHOTZ_VIOLIN_{0}.pdf".format(self.family_name),
                    format="pdf")
Example #6
0
def main(options):
    """Main logic of the script"""
    df = pd.read_table(options.input, header=None)
    if options.clustered:
        ndf = pd.DataFrame({'hybrids': Counter(df[3].tolist())})
        top_mirnas = Counter(df[3].tolist())
    else:
        ndf = pd.DataFrame({'hybrids': Counter(df[6].tolist())})
        top_mirnas = Counter(df[6].tolist())
    ndf.hybrids = np.log(ndf.hybrids)
    exp = {}
    with open(options.expressions) as e:
        for row in csv.reader(e, delimiter='\t'):
            if row[0] != 'Mirnas/Samples':
                exp[row[0].replace("(star)", "*")] = np.log(
                    np.mean([float(value) for value in row[1:]]))

    ndf['expr'] = [exp[i] for i in ndf.index]
    ndf = ndf[ndf['expr'] >= options.level]
    # ndf = ndf[ndf['hybrids'] <= 7.0]
    if options.verbose:
        syserr("Number of miRNAs: %i\n" % len(ndf.index))
        for val in top_mirnas.most_common()[:options.top]:
            syserr("%s\t%i\n" % (val[0], val[1]))
    sns.set(style='white', font='serif')
    grid = sns.JointGrid('hybrids',
                         'expr',
                         data=ndf,
                         dropna=True,
                         size=8,
                         ratio=10,
                         space=0.2)
    grid.plot_marginals(sns.distplot,
                        color="firebrick",
                        rug=False,
                        hist=True,
                        kde=False)
    grid.plot_joint(sns.regplot,
                    scatter_kws={"color": "slategray"},
                    line_kws={
                        "linewidth": 1,
                        "color": "firebrick"
                    })
    grid.annotate(stats.pearsonr,
                  stat="pearson r",
                  template="{stat} = {val:.2g}\np-value = {p:.2g}")
    grid.set_axis_labels('log(Number of hybrids)', 'log(Expression value)')
    pl.savefig("hybrids_vs_expression.png")
    pl.clf()
    sns.set(style='ticks', font='serif')
    sns.distplot(ndf.hybrids, kde=False)
    sns.despine(trim=True)
    sns.offset_spines()
    pl.xlabel("log(Number of hybrids)")
    pl.ylabel("Fraction")
    pl.title("Number of hybrids found per miRNA")
    pl.tight_layout()
    pl.savefig("hist_of_hybrid_number.png")

    pl.clf()
    sns.set(style='ticks', font='serif')
    sns.distplot(ndf.expr, kde=False)
    sns.despine(trim=True)
    sns.offset_spines()
    pl.xlabel("log(Expression value)")
    pl.ylabel("Fraction")
    pl.title("Expression of miRNAs")
    pl.tight_layout()
    pl.savefig("hist_of_expressions.png")
"""
Violinplots
===========

"""
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="ticks")

rs = np.random.RandomState(0)

n, p = 40, 8
d = rs.normal(0, 1, (n, p))
d += np.log(np.arange(1, p + 1)) * -5 + 10

f, ax = plt.subplots()
sns.offset_spines()
sns.violinplot(d)
sns.despine(trim=True)