Example #1
0
def multiDimensionGraph():
    df.head(2)
    sns.relplot(
        x="PBR(IFRS-연결)",
        y="수익률(%)",
        col="size",
        hue="베타 (M,5Yr)",
        data=df,
        palette="coolwarm",
    )

    with sns.plotting_context("notebook", font_scale=1.2):
        sns.relplot(x="PBR(IFRS-연결)",
                    y="수익률(%)",
                    col="size",
                    hue="베타 (M,5Yr)",
                    palette="coolwarm",
                    data=df)

    with sns.plotting_context("notebook", font_scale=1.2):
        sns.relplot(
            x="PBR(IFRS-연결)",
            y="수익률(%)",
            size="size",  # `col` 대신 `size`사용
            hue="베타 (M,5Yr)",
            palette="coolwarm",
            data=df)
Example #2
0
def set_jw_style():
    import matplotlib
    import seaborn as sns

    matplotlib.rcParams["pdf.fonttype"] = 42
    matplotlib.rcParams["ps.fonttype"] = 42
    sns.set(
        style="ticks",
        font="Arial",
        font_scale=1,
        rc={
            "axes.linewidth": 0.05,
            "axes.labelsize": 7,
            "axes.titlesize": 7,
            "xtick.labelsize": 6,
            "ytick.labelsize": 6,
            "legend.fontsize": 6,
            "xtick.major.width": 0.25,
            "xtick.minor.width": 0.25,
            "ytick.major.width": 0.25,
            "text.color": "Black",
            "axes.labelcolor": "Black",
            "xtick.color": "Black",
            "ytick.color": "Black",
        },
    )
    sns.plotting_context()
Example #3
0
def plot_values(skews, kurt, fname):
    with sns.plotting_context('notebook', font_scale=1.25):
        ax = sns.lmplot(x='droplet number',
                        y='count',
                        data=skews,
                        fit_reg=False,
                        hue='droplet number',
                        palette='magma')
        #     ax.set(xlabel='count', ylabel='count', title='Intensity in droplets over time')
        ax.set(ylabel='Skewness',
               ylim=(-1, 1),
               title='Skew of intensity for each droplet')
        ax.savefig(fname + '_skewplot.png')

    with sns.plotting_context('notebook', font_scale=1.5):
        ax = sns.lmplot(x='droplet number',
                        y='count',
                        data=kurt,
                        fit_reg=False,
                        hue='droplet number',
                        palette='magma')
        #     ax.set(xlabel='count', ylabel='count', title='Intensity in droplets over droplet number')
        ax.set(ylabel='Kurtosis',
               title='Kurtosis of intensity for each droplet')
        ax.savefig(fname + '_kurtplot.png')
def boxplot_stats(dfin, title):
    plotlt.rcParams['pdf.fonttype'] = 42
    plotlt.rcParams['ps.fonttype'] = 42
    #hue --> give colour for FDR<0.1
    #size scale based on relab
    colour = ['r', 'black']
    sns.set(style="white")
    sns.plotting_context(font_scale=0.5)
Example #5
0
    def setDefaultStyle(self, fontscale=1.2, font='monospace'):

        import seaborn as sns
        sns.set(font_scale=fontscale,
                rc={'figure.facecolor':'white','axes.facecolor': '#F7F7F7'})
        sns.set_style("ticks", {'font.family':font, 'axes.facecolor': '#F7F7F7',
                                'legend.frameon': True})
        sns.plotting_context('notebook',
                           rc={'legend.fontsize':16,'xtick.labelsize':12,
                          'ytick.labelsize':12,'axes.labelsize':14,'axes.titlesize':16})
        return
Example #6
0
def dropplot(data, feature='median_conservation', genome_len=10**4):
    mapping = {}
    vals = np.sort(data[feature].unique())
    for i, cons in enumerate(vals):
        mapping[str(cons)] = i

    n_colors = 2
    if vals.shape[0] > 2:
        n_colors = max(8, vals.shape[0])

    with sns.plotting_context(
            rc={
                "font.size": 14,
                "axes.titlesize": 18,
                "axes.labelsize": 18,
                "xtick.labelsize": 14,
                "ytick.labelsize": 14,
                'y.labelsize': 16
            }):

        pal = sns.mpl_palette('seismic', n_colors)
        with sns.plotting_context(
                rc={
                    "font.size": 12,
                    "axes.labelsize": 15,
                    "xtick.labelsize": 14,
                    "ytick.labelsize": 12,
                    'aspect': 10
                }):
            f, ax = plt.subplots(figsize=(14, 4))
            for i, seq in enumerate(g['seq_id'].unique()):
                g_tag = data[data['seq_id'] == seq]
                ax.plot([1, genome_len], [i, i],
                        color="black",
                        alpha=0.7,
                        linewidth=4)
                for row in g_tag.iterrows():
                    row = row[1]
                    ax.scatter([row['start'], row['end']], [i, i],
                               marker='s',
                               s=2 * row['drop_size'],
                               c=pal[mapping[str(row[feature])]],
                               label="{} {}".format(row['product'],
                                                    row['start']))

        plt.legend(bbox_to_anchor=[1.1, 1.1])
        sns.palplot(sns.mpl_palette('seismic', n_colors))
        plt.show()
Example #7
0
def multi_qqplot(data, max_pval=1.0):
    with sns.axes_style('ticks'), sns.plotting_context('paper',
                                                       font_scale=2.5):
        # change dpi
        import matplotlib as mpl
        mpl.rc('savefig', dpi=300)

        # make qq plot for each method
        g = sns.FacetGrid(data,
                          col="method",
                          col_wrap=3,
                          sharey=False,
                          aspect=1.5)
        g.map(qqplot, "p-value")
        plt.tight_layout()

        #set_axes_label(g.fig, 'Theoretical ($log_{10}(p)$)', 'Ovserved ($log_{10}(p)$)', ylab_xoffset=-0.02, xlab_yoffset=-0.02)
        set_axes_label(g.fig,
                       'Expected p-value',
                       'Ovserved p-value',
                       ylab_xoffset=-0.03,
                       ylab_yoffset=.62,
                       xlab_yoffset=-0.02)
        g.set_titles('{col_name}')

        # set ylabel
        #g.axes[0].set_ylabel('p-value')
        #g.axes[3].set_ylabel('p-value')
        #g.axes[6].set_ylabel('p-value')

        # set xlim
        for myax in g.axes:
            myax.set_xlim((0, max_pval))
            myax.set_ylim((0, max_pval))
Example #8
0
def plot_feature(cv, index, model_index=1, ascending=False):
    """Plot the feature importance

    Plot the model importance

    Parameters:
    -----------
    cv: estimator
        The estimater is trained by tunning the parameters
    index:
        The train data columns
    model_index: int indice 
        Specific the cv model indice
    """
    value = cv.best_estimator_.steps[model_index][1].feature_importances_
    new_index = index[cv.best_estimator_.steps[0][1].get_support()]

    data = pd.DataFrame(value, index=new_index)
    data = data.reset_index().sort_values(0, ascending=False)

    with sns.axes_style("dark"), sns.plotting_context("paper", font_scale=1.5):
        plt.figure(figsize=(7, 7))
        ax = plt.subplot()
        sns.barplot(
            y="index", x=0, data= data, orient="h", palette=["#8c8c91"], ax=ax
        )
        plt.xlabel("")
        plt.ylabel("")

    return ax
def plot_entropies(results, rotate='oblimin', 
                   dpi=300, figsize=(20,8), ext='png', plot_dir=None): 
    """ Plots factor analytic results as bars
    
    Args:
        results: a dimensional structure results object
        c: the number of components to use
        task_sublists: a dictionary whose values are sets of tasks, and 
                        whose keywords are labels for those lists
        dpi: the final dpi for the image
        figsize: scalar - the width of the plot. The height is determined
            by the number of factors
        ext: the extension for the saved figure
        plot_dir: the directory to save the figure. If none, do not save
    """
    EFA = results.EFA
    # plot entropies
    entropies = EFA.results['entropies_%s' % rotate].copy()
    null_entropies = EFA.results['null_entropies_%s' % rotate].copy()
    entropies.loc[:, 'group'] = 'real'
    null_entropies.loc[:, 'group'] = 'null'
    plot_entropies = pd.concat([entropies, null_entropies], 0)
    plot_entropies = plot_entropies.melt(id_vars= 'group',
                                         var_name = 'EFA',
                                         value_name = 'entropy')
    with sns.plotting_context('notebook', font_scale=1.8):
        f = plt.figure(figsize=figsize)
        sns.boxplot(x='EFA', y='entropy', data=plot_entropies, hue='group')
        plt.xlabel('# Factors')
        plt.ylabel('Entropy')
        plt.title('Distribution of Measure Specificity across Factor Solutions')
        if plot_dir is not None:
            f.savefig(path.join(plot_dir, 'entropies_across_factors.%s' % ext), 
                      bbox_inches='tight', dpi=dpi)
            plt.close()
Example #10
0
def draw_heatmap(df,
                 x_labels=True,
                 y_labels=True,
                 title=None,
                 xlabel=None,
                 ylabel=None,
                 **kwargs):
    x_labels = list(map(x_labels,
                        df.columns)) if callable(x_labels) else x_labels
    y_labels = list(map(y_labels,
                        df.index)) if callable(y_labels) else y_labels
    with sb.axes_style('white'), sb.plotting_context('paper'):
        #         print(sb.axes_style())
        #         print(sb.plotting_context())
        axes = sb.heatmap(df,
                          xticklabels=x_labels,
                          yticklabels=y_labels,
                          annot=True,
                          cmap='RdYlGn',
                          robust=True,
                          **kwargs)
        set_labels(axes,
                   title=title,
                   xlabel=xlabel,
                   ylabel=ylabel,
                   x_tick_params=dict(labelrotation=90))
        fig = axes.get_figure()
        fig.set_size_inches(10, df.shape[0] / 2)
        fig.set_dpi(120)
        return fig
Example #11
0
def plot_beneficiaries(df, title):

    with sns.plotting_context('notebook', font_scale=1.6):
        fig, ax = plt.subplots(1)
        fig.set_size_inches(12, 16)
        dfi = df.set_index('principal_beneficiary')
        sns.heatmap(dfi[['amount']],
                    annot=True,
                    square=False,
                    xticklabels=True,
                    yticklabels=True,
                    fmt='.2f',
                    ax=ax)
        ax.set_title(title, fontsize=28, pad=20)
        ax.set_xlabel('')
        ax.set_ylabel('')
        fig.savefig(f"plots/{title.lower()}_heatmap.png",
                    bbox_inches='tight',
                    dpi=200)
        plt.show()

        fig, ax = plt.subplots(1)
        fig.set_size_inches(12, 16)
        sns.barplot(x='amount',
                    y='principal_beneficiary',
                    data=df,
                    palette='Spectral',
                    ax=ax)
        ax.set_title(title, fontsize=28, pad=20)
        ax.set_xlabel('')
        ax.set_ylabel('')
        fig.savefig(f"plots/{title.lower()}_barplot.png",
                    bbox_inches='tight',
                    dpi=200)
        plt.show()
Example #12
0
def violinplot_combined_one_isoform(args):
    plt.clf()
    with sns.plotting_context("paper", font_scale=1.8):
        # print(args.tsv_input)
        # sns.set(font_scale=2)
        true_positives = pd.read_csv(args.tsv_input, sep="\t")
        fig, ax = plt.subplots()
        flierprops = dict(markerfacecolor='0.75', markersize=5, marker='o')
        d = {'color': ['b', 'g', 'r']}
        g = sns.FacetGrid(true_positives,
                          row="Family",
                          size=3,
                          aspect=1.6,
                          row_order=["TSPY13P", "HSFY2", "DAZ2"],
                          legend_out=True)
        sns.set(style="whitegrid", palette="muted")

        (g.map(sns.violinplot,
               "read_count",
               args.y_axis,
               "TOOL",
               cut=0,
               hue_order=["ISOCON", "ICE"],
               palette=sns.color_palette("muted",
                                         2)).despine(left=True).add_legend(
                                             title="TOOL",
                                             label_order=["ISOCON", "ICE"]))
        g.set_titles(row_template="{row_name}", fontweight='bold', size=16)
        g.set_yticklabels(["", 0, 0.2, 0.4, 0.6, 0.8, 1.0])

        if args.y_axis == "FP":
            g.set(yscale="log")

        plt.savefig(args.outfile)
        plt.close()
    def visualize_embeddings_umap(self,
                                  title='',
                                  ext="png",
                                  save=True,
                                  **umap_kwargs):
        #Init umapper
        umapper = umap.UMAP(**umap_kwargs)
        # Compute umap embeddings
        umap_embeddings = umapper.fit_transform(self.embeddings)
        # Plot embeddings
        # with sns.set(style='white', context='poster'):
        with sns.plotting_context(context="poster"):
            _, ax = plt.subplots(1, figsize=(14, 10))
            plt.scatter(*umap_embeddings.T,
                        s=0.8,
                        c=self.true_labels,
                        cmap="tab20b",
                        alpha=1)
            # plt.scatter(*umap_embeddings.T, s=1.5, c= self.true_labels, cmap='tab10', alpha=0.8)

            plt.setp(ax, xticks=[], yticks=[])
            cbar = plt.colorbar(boundaries=np.arange(self.n_classes + 1) - 0.5)
            cbar.set_ticks(np.arange(self.n_classes))
            cbar.set_ticklabels(self.categorical_labels)
            plt.title(title)

        if save:
            self.save_fig(title, ext, fig)
Example #14
0
def plot_tsne(
    x: np.ndarray,
    y: np.ndarray,
    metadata: Dict[str, Any],
    kwargs: Dict[str, Any],
) -> sns.FacetGrid:
    """Plot the t-SNE results."""
    with sns.plotting_context(context="paper"):
        graph = sns.relplot(
            x=x[:, 0],
            y=x[:, 1],
            hue=y,
            palette=metadata["palette"],
            **kwargs["relplot_kwargs"],
        )
        _plot_colorbar(
            figure=graph.fig,
            palette=[*metadata["palette"].values()][1:],
            labels=[*metadata["labels"].values()][1:],
        )
        graph.set(
            title=f'{metadata["name"]} t-SNE Projection',
            xlabel="t-SNE Component 1",
            xticks=[],
            ylabel="t-SNE Component 2",
            yticks=[],
        )
    return graph
Example #15
0
        def wrapper(*args, **kwargs):
            if "context" in kwargs.keys():
                _context = kwargs["context"]
                del kwargs["context"]
            else:
                _context = "notebook"

            if "style" in kwargs.keys():
                _style = kwargs["style"]
                del kwargs["style"]
            else:
                _style = "whitegrid"

            if "params" in kwargs.keys():
                _params = kwargs["params"]
                del kwargs["params"]
            else:
                _params = None

            _default_params = {
              # "xtick.bottom": True,
              # "ytick.left": True,
              # "xtick.color": ".8",  # light gray
              # "ytick.color": ".15",  # dark gray
              "axes.spines.left": False,
              "axes.spines.bottom": False,
              "axes.spines.right": False,
              "axes.spines.top": False,
              }
            if _params is not None:
                merged_params = {**_params, **_default_params}
            else:
                merged_params = _default_params
            with sns.plotting_context(context=_context), sns.axes_style(style=_style, rc=merged_params):
                func(*args, **kwargs)
Example #16
0
def plotSentResults(csvPath):
    """
    plots positive and negative sentiment results from csv result files as png
    :param csvPath: path to csv result files
    :return: None
    """
    if not ".csv" in csvPath:
        csvPath = csvPath + ".csv"
    _data = pd.read_csv(result_folder + csvPath)
    vars = ["positive"] * len(_data["pos"]) + ["negative"] * len(_data["neg"])
    day = list(_data["date"])
    day.extend(_data["date"])
    vals = list(_data["pos"])
    vals.extend(list(_data["neg"]))
    data_preproc = pd.DataFrame({'Day': day, 'value': vals, 'variable': vars})
    if "Fox" in csvPath:
        _ticks = np.arange(0, 120, 5)
    plt.figure(figsize=(12, 9))
    with sns.plotting_context("notebook", font_scale=2.0):
        ax = sns.lineplot(x='Day',
                          y='value',
                          hue='variable',
                          data=data_preproc)
        if "Fox" in csvPath:
            plt.xticks(ticks=_ticks, fontsize=13, rotation=60)
        fig = ax.get_figure()
        ax.set(xlabel='Date', ylabel='Weighted frequency')
        handles, labels = ax.get_legend_handles_labels()
        ax.legend(handles=handles[1:], labels=labels[1:])
        result_name = csvPath.split(".")[0] + "Sentiment" + ".png"
        fig.savefig(result_folder + result_name)
def plot_bar(d, k, pv, figurename):
    plt.figure()
    fs = 20
    palette = sns.color_palette(['black', 'green'])
    with sns.plotting_context('paper', font_scale=2):
        ax = sns.barplot(data=d,
                         x='fragment_counts',
                         y='normalized_count',
                         hue='samplename',
                         palette=palette)
    ax.annotate('$\chi^{2}$: %.2f\nP-value: %.3f' % (k, pv),
                xy=(2, 1),
                fontsize=fs)
    ax.legend(title=' ', fontsize=fs, loc=(0.5, 0.4))
    ax.set_xlim(-0.5, 5)
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.set_yscale('log')
    ax.set_xlabel('Count per unique fragment', fontsize=fs)
    ax.set_ylabel('% Fragments', fontsize=fs)
    #x = ax.set_xticklabels(ax.get_xmajorticklabels(),rotation=0)
    ax.yaxis.set_major_formatter(ticker.FuncFormatter(lambda y,pos: ('{{:.{:1d}f}}'\
                                                                     .format(int(np.maximum(-np.log10(y),0))))\
                                                      .format(y)))

    #ax.tick_params(axis='both', which='major', labelsize=fs)
    #ax.ticklabel_format(style='plain')
    plt.savefig(figurename, transparent=True, bbox_inches='tight')
    print 'Plotted %s' % figurename
    return 0
Example #18
0
def context(context='notebook', font_scale=1.5, rc=None):
    """Create pyfolio default plotting style context.
    Under the hood, calls and returns seaborn.plotting_context() with
    some custom settings. Usually you would use in a with-context.
    Parameters
    ----------
    context : str, optional
        Name of seaborn context.
    font_scale : float, optional
        Scale font by factor font_scale.
    rc : dict, optional
        Config flags.
        By default, {'lines.linewidth': 1.5,
                     'axes.facecolor': '0.995',
                     'figure.facecolor': '0.97'}
        is being used and will be added to any
        rc passed in, unless explicitly overriden.
    Returns
    -------
    seaborn plotting context For more information, see seaborn.plotting_context().
    """
    if rc is None:
        rc = {}
    rc_default = {'lines.linewidth': 1.5,
                  'axes.facecolor': '0.995',
                  'figure.facecolor': '0.97',
                  'font_scal': 1}
    # Add defaults if they do not exist
    for name, val in rc_default.items():
        rc.setdefault(name, val)

    return sns.plotting_context(context=context, font_scale=font_scale,
                                rc=rc)
Example #19
0
 def get_figure(self,
         data,
         *args,
         caption="Caption not provided",
         **kwargs):
     """
     Plot the dataframe.
     """
     plt.figure()
     with seaborn.plotting_context(self.context,
                                   font_scale=self.font_scale,
                                   rc=self.rc_params()):
         graphic = seaborn\
             .catplot(
                 data=self.get_dataframe(data),
                 x=self.xvar,
                 y=self.yvar,
                 ci=self.confidence_interval,
                 kind="bar",
                 hue=self.gvar if self.gvar else None,
                 height=self.height_figure,
                 aspect=self.aspect_ratio_figure)\
             .set(
                 xlabel=self.xlabel,
                 ylabel=self.ylabel,
                 title=kwargs.pop("title", ""))
         plt.xticks(rotation=90)
         return Figure(
             graphic,
             caption=caption)
     raise RuntimeError(
         "Something must have gone wrong.")
Example #20
0
def plot_confusion_matrix(cm, labels=None, cmap='Blues', title=None, norm=False, context=None, annot=True):

    if labels is None:
        labels = True

    if isinstance(labels, collections.Iterable) and not isinstance(labels,str):
        labels = [label.title() for label in labels]

    if norm:
        cm = normalize_confusion_matrix(cm)

    if title is None:
        if norm:
            title = "Normalized Confusion Matrix"
        else:
            title = "Confusion Matrix"

    if context is None:
        context = sns.plotting_context("notebook", font_scale=1.5)

    with context:
        ax = sns.heatmap(cm,
                         xticklabels=labels,
                         yticklabels=labels,
                         cmap=cmap,
                         annot=annot
                        )
        ax.set_title(title)
Example #21
0
File: net.py Project: a3609640/Test
 def plot_2d_pca(data):
     plt.figure(figsize=(12,10))
     with sbn.plotting_context("notebook",font_scale=1.25):
         sbn.scatterplot(x    = "PC1", 
                         y    = "PC2",
                         data = pc_df, 
                         #hue="Species",
                         #style="Sex",
                         s=100)
         for i in range(n):
     #plot as arrows the variable scores (each variable has a score for PC1 and one for PC2)
             plt.arrow(0, 
                       0, 
                       coeff[i,0], 
                       coeff[i,1], 
                       color       = 'k', 
                       alpha       = 0.9, 
                       head_width  = 0.02, 
                       head_length = 0.05,
                       linestyle   = '-',
                       linewidth   = 1.5, 
                       overhang    = 0.2)
             plt.text(coeff[i,0]* 1.15, 
                      coeff[i,1] * 1.15, 
                      list(data.columns.values)[i], 
                      color = 'k', 
                      ha = 'center', 
                      va = 'center',
                      fontsize = 10)  
     plt.xlabel("PC1: "+f'{var_explained[0]:.0f}'+"%")
     plt.ylabel("PC2: "+f'{var_explained[1]:.0f}'+"%")
     plt.show()
Example #22
0
    def setup_figure(self):
        """
        Prepare the matplotlib figure for plotting.

        This method sets the default font, and the overall apearance of the
        figure.
        """

        if options.cfg.xkcd:
            fonts = QtGui.QFontDatabase().families()
            for x in ["Humor Sans", "DigitalStrip", "Comic Sans MS"]:
                if x in fonts:
                    self.options["figure_font"] = QtGui.QFont(x, pointSize=self.options["figure_font"].pointSize())
                    break
            else:
                for x in ["comic", "cartoon"]:
                    for y in fonts:
                        if x.lower() in y.lower():
                            self.options["figure_font"] = QtGui.QFont(x, pointSize=self.options["figure_font"].pointSize())
                            break
            plt.xkcd()

        with sns.plotting_context("paper"):
            self.g = sns.FacetGrid(self._table,
                                   col=self._col_factor,
                                   col_wrap=self._col_wrap,
                                   row=self._row_factor,
                                   sharex=True,
                                   sharey=True)
    def make_sns_heatmap(self, characteristic,
                         cmap=sns.light_palette("green"),
                         ret=False,
                         context='paper',
                         dim=[list('ABCDEFGH'), list(range(1, 13))],
                         replicates=False):
        '''
        creates a seaborn heatmap of any well characteristic (specified via
        function), also accepts a specified colormap (cmap), can return the
        figure (ret), accepts context which changes relative sizes of elements,
        and can restrict the heatmap to certain wells
        '''

        data = tools.as_matrix(self.well_list,
                                            characteristic,
                                            *dim)
        with sns.plotting_context(context):
            heatmap = sns.heatmap(data,
                                  cmap=cmap,
                                  annot=True,
                                  linewidths=.5,
                                  fmt='.2f')
            heatmap.set_xticklabels(dim[1])
            heatmap.set_yticklabels(dim[0], rotation=0)
            fig = heatmap.get_figure()
            fig.set_size_inches(13, 8)
            fig.savefig("output.svg")
            if ret:
                return heatmap
Example #24
0
def plot_pca(
    x: np.ndarray,
    y: np.ndarray,
    variance: np.ndarray,
    metadata: Dict[str, Any],
    kwargs: Dict[str, Any],
) -> sns.FacetGrid:
    """Plot the PCA results."""
    with sns.plotting_context(context="paper"):
        graph = sns.relplot(
            x=x[:, 0],
            y=x[:, 1],
            hue=y,
            palette=metadata["palette"],
            **kwargs["relplot_kwargs"],
        )
        _plot_colorbar(
            figure=graph.fig,
            palette=[*metadata["palette"].values()][1:],
            labels=[*metadata["labels"].values()][1:],
        )
        graph.set(
            title=f'{metadata["name"]} PCA Projection',
            xlabel=
            f"Principal Component 1 - {variance[0]*100:.1f}% Explained Variance",
            xticks=[],
            ylabel=
            f"Principal Component 2 - {variance[1]*100:.1f}% Explained Variance",
            yticks=[],
        )
    return graph
Example #25
0
def make_violins(exp, brain_areas, gene_list):
    subset = exp.loc[:, brain_areas]
    subset['in_gene_list'] = subset.index.isin(gene_list)
    tidy = subset.reset_index().melt(id_vars=['gene_symbol', 'in_gene_list'],
                                     var_name='brain_area',
                                     value_name='expression')

    with sns.plotting_context('notebook', font_scale=1.25):
        fig, ax = plt.subplots(figsize=(12, 9))
        sns.violinplot(y='brain_area',
                       x='expression',
                       edgecolor='black',
                       hue='in_gene_list',
                       palette={
                           False: '#636364',
                           True: '#D9D4D3'
                       },
                       cut=2,
                       split=True,
                       inner='quartiles',
                       data=tidy,
                       ax=ax)
        ax.set_xlabel('Expression (z-scored)')
        ax.set_ylabel('')

        legend = ax.get_legend()
        legend.set_title('')
        legend._loc = 7
        legend_labels = {'False': 'Background genes', 'True': 'Disease genes'}
        for text, label in zip(legend.texts, legend_labels.items()):
            text.set_text(label[1])

        sns.despine()
Example #26
0
def generate_pca_classification(pc_df):

    with sns.plotting_context(
            rc={
                "font.size": 14,
                "axes.titlesize": 18,
                "axes.labelsize": 18,
                "xtick.labelsize": 14,
                "ytick.labelsize": 14,
                'y.labelsize': 16
            }):

        f, axes = plt.subplots(1, 2)
        # sns.scatterplot(x='principal component 1', y = 'principal component 2', hue='ground_truth',marker = '+', data=principalDf, ax=axes[0])
        sns.scatterplot(x='principal component 1',
                        y='principal component 2',
                        hue='kmeans_clusters',
                        marker='+',
                        data=pc_df,
                        ax=axes[0])
        sns.scatterplot(x='principal component 1',
                        y='principal component 2',
                        hue='GMM_clusters',
                        marker='+',
                        data=pc_df,
                        ax=axes[1])

        sns.despine()
Example #27
0
def predicted_to_member_id(args):

    # for pseudo vs coding, see:
    # https://stackoverflow.com/questions/37331937/seaborn-facetgrid-countplot-hue

    # fig =sns.FacetGrid(data=df,col='Sex',hue='Marker2',palette='Set1',size=4,aspect=1).map(sns.countplot,'Marker1',order=df.Marker1.unique()).add_legend()

    plt.clf()
    with sns.plotting_context("paper", font_scale=1.0):
        print(args.tsv_input)
        data = pd.read_csv(args.tsv_input, sep="\t")
        sns.set(style="whitegrid", palette="muted")

        g = sns.factorplot("MEMBER_ID",
                           col="FAMILY",
                           col_wrap=4,
                           data=data[data.MEMBER_ID.notnull()],
                           kind="count",
                           size=2.5,
                           aspect=.8)
        # titanic[titanic.deck.notnull()]

        # g = sns.FacetGrid(data, col="FAMILY", size=3, aspect=.5)
        # g.map(sns.countplot, "MEMBER_ID") #.add_legend()
        plt.subplots_adjust(top=0.9)
        g.fig.suptitle("Potential Isoforms per member",
                       fontweight='bold',
                       size=16)

        plt.savefig(args.outfile)
        plt.close()
def draw(df, output):
    df["syst"] = np.array([s.replace("_", " ") for s in df["syst"].values])

    #print(df[(df["syst"]=="metTrigStat") & (df["variation"]=="up") & (df["process"]=="zmumu")])

    plt.rcParams['xtick.top'] = False
    plt.rcParams['ytick.right'] = False
    with sns.plotting_context(context='paper', font_scale=1.8):
        g = sns.FacetGrid(
            df, row='syst', col='process', hue='variation',
            margin_titles=True, legend_out=True,
        )
        g.map(plt.step, "bins", "count", where='post').add_legend()
        g.set(ylim=(0.5, 1.5))

        #g.fig.text(0.0, 1, r'$\mathbf{CMS}\ \mathit{Preliminary}$',
        #           ha='left', va='bottom', fontsize='large')
        #g.fig.text(0.9, 1, r'$35.9\ \mathrm{fb}^{-1}(13\ \mathrm{TeV})$',
        #           ha='right', va='bottom', fontsize='large')

        # Report
        print("Creating {}".format(output))

        # Actually save the figure
        g.fig.savefig(output, format="pdf", bbox_inches="tight")
        plt.close(g.fig)

    plt.rcParams['xtick.top'] = True
    plt.rcParams['ytick.right'] = True
def _show_results(results: pd.DataFrame):
    """
    Given a DataFrame of performance testing results, this function
    plots the results in a figure. In addition, it dumps the results as a string.

    :param results: a DataFrame containing the results of a performance test
    """
    print(results.to_string())
    sns.set_theme()
    with sns.plotting_context("paper", font_scale=1.5):
        sns.catplot(
            x="Input",
            y="Performance",
            hue="Function",
            kind="bar",
            data=pd.DataFrame(results),
            legend=False,
            height=8,
            aspect=2
        )
    plt.title("How to Python: Function Performance Comparison", fontsize=16)
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, title="Functions", fontsize='12', title_fontsize='12')
    plt.tight_layout()
    filename = os.path.splitext(os.path.basename(inspect.stack()[2].filename))[0]
    plt.savefig(f"{os.path.join('visualizations', filename)}.png")
def plot_bp_qual(tsv_file, outfile):
    sns.plt.clf()
    with sns.plotting_context("paper", font_scale=1.8):
        indata = pd.read_csv(tsv_file, sep="\t")
        fig, ax = plt.subplots()

        g = sns.factorplot(x="Homopolymenr_length",
                           y="P_error",
                           col="Passes",
                           col_order=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
                           col_wrap=3,
                           hue="Base",
                           data=indata[indata.P_error.notnull()],
                           size=3,
                           aspect=1.6,
                           palette="Set3",
                           dodge=True,
                           cut=0,
                           bw=.2)  #kind="violin",

        # g = sns.FacetGrid(data, row="Family", col="mutation_rate", size=3, aspect=1.6, row_order=["TSPY13P", "HSFY2", "DAZ2"], col_order=[0.01, 0.001, 0.0001], legend_out=True)
        sns.set(style="whitegrid", palette="muted")
        # (g.map(sns.violinplot, "read_count", args.y_axis, "TOOL", cut=0, hue_order=["ISOCON", "ICE"], palette=sns.color_palette("muted", 2)).despine(left=True).add_legend(title="TOOL", label_order=["ISOCON", "ICE"]))
        # g.set_titles(col_template="$\mu={col_name}$", row_template="{row_name}",  size=16)
        # g.set_yticklabels(["",0,0.2,0.4,0.6,0.8,1.0])
        # g.set(yscale="log")
        g.set(xlim=(0, 6))
        g.set(ylim=(0, 1))
        plt.savefig(outfile)
        plt.close()
def welch_matrix(scores, images):
    traits = list(scores.columns)
    traits = ["Openness", "Conscientiousness", "Extraversion", "Agreeableness", "Neuroticism"]
    ret = np.zeros((len(traits), len(traits)))
    for i, trait_i in enumerate(traits):
        arr_i = scores[trait_i].as_matrix()
        arr_i = (arr_i > np.median(arr_i)).astype(float) * 200 - 100
        for j, trait_j in enumerate(traits):
            arr_j = images[trait_j].values
            ret[i, j] = scipy.stats.kendalltau(arr_i, arr_j)[0]
            if i != j:
                ret[j,i] = 0
        with sns.plotting_context("notebook", font_scale=1.5):
            pylab.clf()
            cmap = sns.cubehelix_palette(100, start=0, rot=-0.25, reverse=False)
            v = ret[i, :].max() - ret[i, :].min()
            color = (99 * (ret[i, :] - ret[i, :].min()) / v).astype(int)
            for idx in range(5):
                pylab.bar(idx, height=ret[i, idx], color=cmap[color[idx]])
                pylab.text(idx, ret[i, idx] + v * (0.025 if ret[i,idx] > 0 else -0.04), traits[idx], horizontalalignment="center", fontsize=12)
            pylab.ylabel("Spearman $\\rho$")
            pylab.tight_layout()
            pylab.gca().set_xticks([])
            pylab.show()
    return pandas.DataFrame(ret, columns=[traits], index=[traits])
Example #32
0
    def plot_region_heatmap(self, clim=None):
        """

        Plots a frequency x region heatmap of mean t-statistics.

        """

        # mean t-stat within subject by region and frequency, then mean across subjects
        mean_df = self.group_df.groupby(['subject', 'regions', 'frequency']).mean().groupby(['regions', 'frequency']).mean()
        mean_df = mean_df.reset_index()

        # ignore data without a region
        mean_df['regions'].replace('', np.nan, inplace=True)
        mean_df = mean_df.dropna(subset=['regions'])

        # reshape it for easier plotting with seaborn
        mean_df = mean_df.pivot_table(index='frequency', columns='regions', values='t-stat')

        # center the colormap and plot
        if clim is None:
            clim = np.max(np.abs(mean_df.values))
        with sns.plotting_context("talk"):
            sns.heatmap(mean_df, cmap='RdBu_r',
                        yticklabels=mean_df.index.values.round(2),
                        vmin=-clim,
                        vmax=clim,
                        cbar_kws={'label': 't-stat'})
            plt.gca().invert_yaxis()
            plt.ylabel('Frequency')
            plt.xlabel('')

        plt.gcf().set_size_inches(12, 9)
Example #33
0
    def plot_region_heatmap(self, clim=None):
        """

        Plots a frequency x region heatmap of mean t-statistics.

        """

        # mean t-stat within subject by region and frequency, then mean across subjects
        mean_df = self.group_df.groupby(['subject', 'regions', 'frequency']).mean().groupby(['regions', 'frequency']).mean()
        mean_df = mean_df.reset_index()

        # ignore data without a region
        mean_df['regions'].replace('', np.nan, inplace=True)
        mean_df = mean_df.dropna(subset=['regions'])

        # reshape it for easier plotting with seaborn
        mean_df = mean_df.pivot_table(index='frequency', columns='regions', values='t-stat')

        # center the colormap and plot
        if clim is None:
            clim = np.max(np.abs(mean_df.values))
        with sns.plotting_context("talk"):
            sns.heatmap(mean_df, cmap='RdBu_r',
                        yticklabels=mean_df.index.values.round(2),
                        vmin=-clim,
                        vmax=clim,
                        cbar_kws={'label': 't-stat'})
            plt.gca().invert_yaxis()
            plt.ylabel('Frequency')
            plt.xlabel('')

        plt.gcf().set_size_inches(12, 9)
Example #34
0
def plotDf(df, save_directory, filename, drawdev=False, 
           font_scale=0.8, hue_order=None, legend_loc=1):
    # need df with row: <dataset,method,fold,value> then
    # sns.lineplot(data=df,x="dataset",y="value",style="method)
    sns.set_style("whitegrid")
    setLateXFonts()
    with sns.plotting_context("poster",font_scale=font_scale, rc={"lines.linewidth": font_scale, "grid.linewidth": font_scale}):
        fig, ax = plt.subplots()

        if drawdev:
            _y = "value"
        else:
            _y="method_mean"

        if hue_order is None:
            sns_plot = sns.lineplot(x="dataset", y=_y,
                                    hue="method",
                                    data=df, sort=False)
        else:
            sns_plot = sns.lineplot(x="dataset", y=_y,
                                    hue="method", hue_order=hue_order,
                                    data=df, sort=False)
        ax.set(xlabel='dataset', ylabel='retrieval loss')
        ax.legend(loc=legend_loc)
        fig = sns_plot.get_figure()
        fig.savefig(f"{save_directory}/{filename}.pdf",bbox_inches='tight')
        plt.show()
        plt.clf()
Example #35
0
def recall_per_abundance_normalized(args):
    plt.clf()
    with sns.plotting_context("paper", font_scale=1.8):
        data = pd.read_csv(args.recallfile, sep="\t")
        # new_data = data.groupby(["read_count", "abundance", "ed" ], as_index=False)['recall'].mean()
        # print(new_data)
        data.apply(pd.to_numeric, errors='coerce')
        g = sns.factorplot(x="read_count",
                           y="recall",
                           hue="ed",
                           col="abundance",
                           data=data,
                           col_wrap=3,
                           size=3,
                           aspect=1.6,
                           col_order=[0.5, 0.2, 0.1, 0.05, 0.01, 0.005])
        g.set(ylim=(0.0, 1.0))
        # g.set(yscale="log", ylim=(0.1,10000))
        # g.set_titles(col_template="$\mu={col_name}$", row_template="{row_name}",  size=16)
        g.set_ylabels("Recall")
        g.set_xlabels("Total read depth")
        outfile = os.path.join(args.outfolder,
                               "recall_per_abundance_normalized.pdf")
        plt.savefig(outfile)
        plt.close()
def plot_clustering_similarity(results, plot_dir=None, verbose=False, ext='png'):  
    HCA = results.HCA
    # get all clustering solutions
    clusterings = HCA.results.items()
    # plot cluster agreement across embedding spaces
    names = [k for k,v in clusterings]
    cluster_similarity = np.zeros((len(clusterings), len(clusterings)))
    cluster_similarity = pd.DataFrame(cluster_similarity, 
                                     index=names,
                                     columns=names)
    
    distance_similarity = np.zeros((len(clusterings), len(clusterings)))
    distance_similarity = pd.DataFrame(distance_similarity, 
                                     index=names,
                                     columns=names)
    for clustering1, clustering2 in combinations(clusterings, 2):
        name1 = clustering1[0].split('-')[-1]
        name2 = clustering2[0].split('-')[-1]
        # record similarity of distance_df
        dist_corr = np.corrcoef(squareform(clustering1[1]['distance_df']),
                                squareform(clustering2[1]['distance_df']))[1,0]
        distance_similarity.loc[name1, name2] = dist_corr
        distance_similarity.loc[name2, name1] = dist_corr
        # record similarity of clustering of dendrogram
        clusters1 = clustering1[1]['labels']
        clusters2 = clustering2[1]['labels']
        rand_score = adjusted_rand_score(clusters1, clusters2)
        MI_score = adjusted_mutual_info_score(clusters1, clusters2)
        cluster_similarity.loc[name1, name2] = rand_score
        cluster_similarity.loc[name2, name1] = MI_score
    
    with sns.plotting_context(context='notebook', font_scale=1.4):
        clust_fig = plt.figure(figsize = (12,12))
        sns.heatmap(cluster_similarity, square=True)
        plt.title('Cluster Similarity: TRIL: Adjusted MI, TRIU: Adjusted Rand',
                  y=1.02)
        
        dist_fig = plt.figure(figsize = (12,12))
        sns.heatmap(distance_similarity, square=True)
        plt.title('Distance Similarity, metric: %s' % HCA.dist_metric,
                  y=1.02)
        
    if plot_dir is not None:
        save_figure(clust_fig, path.join(plot_dir, 
                                   'cluster_similarity_across_measures.%s' % ext),
                    {'bbox_inches': 'tight'})
        save_figure(dist_fig, path.join(plot_dir, 
                                   'distance_similarity_across_measures.%s' % ext),
                    {'bbox_inches': 'tight'})
        plt.close(clust_fig)
        plt.close(dist_fig)
    
    if verbose:
        # assess relationship between two measurements
        rand_scores = cluster_similarity.values[np.triu_indices_from(cluster_similarity, k=1)]
        MI_scores = cluster_similarity.T.values[np.triu_indices_from(cluster_similarity, k=1)]
        score_consistency = np.corrcoef(rand_scores, MI_scores)[0,1]
        print('Correlation between measures of cluster consistency: %.2f' \
              % score_consistency)
Example #37
0
def plot_alpha(metadata, category, hue):
    import seaborn as sns
    with plt.rc_context(dict(sns.axes_style("darkgrid"),
                             **sns.plotting_context("notebook", font_scale=2))):
        width = len(metadata[category].unique())
        plt.figure(figsize=(width*4, 8))
        sns.boxplot(x=category, y='Alpha diversity',
                    data=metadata.sort(category), hue=hue, palette='cubehelix')
Example #38
0
def context(context='notebook', font_scale=1.5, rc=None):
    if rc is None:
        rc = {}

    rc_default = {'lines.linewidth': 1.5,
                  'axes.facecolor': '0.995',
                  'figure.facecolor': '0.97'}

    # Add defaults if they do not exist
    for name, val in rc_default.items():
        rc.setdefault(name, val)

    return sns.plotting_context(context=context, font_scale=font_scale,
                                rc=rc)
Example #39
0
def analyze_solution(recovered_users, hidden_user_idx, edges, verbose=False,
                     drawing=False):
    global USERS
    adj = defaultdict(set)
    for i, j, _, _ in edges:
        adj[i].add(j)
        adj[j].add(i)
    recovered_users /= np.sqrt((recovered_users ** 2).sum(-1))[..., np.newaxis]
    gold_users = USERS[hidden_user_idx, FEATURE_START:]
    gold_users /= np.sqrt((gold_users ** 2).sum(-1))[..., np.newaxis]
    diff = np.sqrt(((gold_users - recovered_users) ** 2).sum(-1))
    non_zeros = np.where(recovered_users[:, 0] > -100)[0]
    if verbose:
        print('average distance {:.3f}'.format(np.mean(diff[non_zeros])))
        prct = [5, 25, 50, 75, 95]
        vals = np.percentile(diff[non_zeros], prct)
        print('Percentile: '+'\t'.join(['{}'.format(str(_).ljust(5))
                                        for _ in prct]))
        print('            '+'\t'.join(['{:.3f}'.format(_) for _ in vals]))

    embeddings = np.zeros((4, non_zeros.size))
    i = 0
    for uidx in range(len(recovered_users)):
        neighbors = adj[hidden_user_idx[uidx]]
        hidden_neighbors = {_ for _ in neighbors if _ in hidden_user_idx}
        tot_dst = 0
        me = USERS[uidx, FEATURE_START:]
        me /= np.linalg.norm(me)
        for n in neighbors:
            nei = USERS[n, FEATURE_START:]
            tot_dst += np.linalg.norm(nei/np.linalg.norm(nei) - me)
        if uidx in non_zeros:
            embeddings[:, i] = [diff[uidx], len(neighbors),
                                len(hidden_neighbors)/len(neighbors),
                                tot_dst/len(neighbors)]
            i += 1

    if drawing:
        labels = ['number of neighbors',
                  'fraction of unknown neighbors',
                  'mean distance from all neighbors']
        for i in range(1, 4):
            with sns.plotting_context("notebook", font_scale=1.7,
                                      rc={"figure.figsize": (20, 10)}):
                sns.regplot(embeddings[i, :], embeddings[0, :],
                            label=labels[i-1])
        ppl.legend()
    return embeddings
Example #40
0
def plot_boundary(predict_fun, dataset, method):
    plot_step = .002
    xx, yy = np.meshgrid(np.arange(0,1, plot_step),
                         np.arange(0,1, plot_step))
    Z = predict_fun(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    size = (26/8)*2.54
    with sns.plotting_context(rc={'figure.figsize': (size,size)}):
        fig, ax1 = plt.subplots()
        ax1.contourf(xx, yy, Z, 2, cmap=RdGr)
        ax1.set_xlim(0, 1)
        ax1.set_ylim(0, 1)
        plt.axis('equal')
        plt.tick_params(axis='both', which='both', bottom='off', top='off',
                        left='off', labelbottom='off', labelleft='off')
        plt.show()
        plt.savefig('{}_{}{}.png'.format(dataset, method, '_full' if FULL else ''), dpi=300, bbox_inches='tight', pad_inches=0)
def isoformCheck(path):
	gene='TP53'
	patientIso={}
	noPatIDonlyTups=[]
	os.chdir(path)
	for files in os.listdir(path):
		with open(files) as input:
			#extracts patient ID from file name
			patientID=''
			for i in range(len(files)):
				if files[i]=='-':
					break;
				else:
					patientID=patientID+files[i]
			#comment out header line if no header is included in input
			header=next(input)
			for line in input:
				line=line.split('\t')
				if len(line)<35:
					print line
					break;
				if (line[11]==gene) and (float(line[36].rstrip('\n'))>=15):
					noPatIDonlyTups.append([str(line[14]), str(line[17]), str(line[18])])
					if patientID in patientIso:
						patientIso[patientID]=patientIso[patientID]+[(str(line[14]), str(line[17]), str(line[18]))]
					else:
						patientIso[patientID]=[(str(line[14]), str(line[17]), str(line[18]))]


	print patientIso
	print len(patientIso)
	print noPatIDonlyTups
	for x in range(0, len(noPatIDonlyTups)):
		if noPatIDonlyTups[x][0]=='NON_SYNONYMOUS_CODING':
			noPatIDonlyTups[x][0]='NON_SYN'
	#print sorted(noPatIDonlyTups, key=lambda x: x[0])
	dataframe=pandas.DataFrame(noPatIDonlyTups, columns=['mutation type', 'mutation', 'isoform'])
	print dataframe

	with sns.plotting_context("notebook", font_scale=1.5):
		sns.countplot(y="mutation type", hue="isoform", data=dataframe, palette="Set2")
		sns.plt.show()
def plot_factor_correlation(results, c, rotate='oblimin', title=True,
                            DA=False, size=4.6, dpi=300, ext='png', plot_dir=None):
    if DA:
        EFA = results.DA
    else:
        EFA = results.EFA
    loading = EFA.get_loading(c, rotate=rotate)
    # get factor correlation matrix
    reorder_vec = EFA.get_factor_reorder(c)
    phi = get_attr(EFA.results['factor_tree_Rout_%s' % rotate][c],'Phi')
    phi = pd.DataFrame(phi, columns=loading.columns, index=loading.columns)
    phi = phi.iloc[reorder_vec, reorder_vec]
    mask = np.zeros_like(phi)
    mask[np.tril_indices_from(mask, -1)] = True
    with sns.plotting_context('notebook', font_scale=2) and sns.axes_style('white'):
        f = plt.figure(figsize=(size*5/4, size))
        ax1 = f.add_axes([0,0,.9,.9])
        cbar_ax = f.add_axes([.91, .05, .03, .8])
        sns.heatmap(phi, ax=ax1, square=True, vmax=1, vmin=-1,
                    cbar_ax=cbar_ax, 
                    cmap=sns.diverging_palette(220,15,n=100,as_cmap=True))
        sns.heatmap(phi, ax=ax1, square=True, vmax=1, vmin=-1,
                    cbar_ax=cbar_ax, annot=True, annot_kws={"size": size/c*15},
                    cmap=sns.diverging_palette(220,15,n=100,as_cmap=True),
                    mask=mask)
        yticklabels = ax1.get_yticklabels()
        ax1.set_yticklabels(yticklabels, rotation=0, ha="right")
        ax1.set_xticklabels(ax1.get_xticklabels(), rotation=90)
        if title == True:
            ax1.set_title('%s Factor Correlations' % results.ID.split('_')[0].title(),
                      weight='bold', y=1.05, fontsize=size*3)
        ax1.tick_params(labelsize=size*3)
        # format cbar
        cbar_ax.tick_params(axis='y', length=0)
        cbar_ax.tick_params(labelsize=size*2)
        cbar_ax.set_ylabel('Pearson Correlation', rotation=-90, labelpad=size*4, fontsize=size*3)
    
    if plot_dir:
        filename = 'factor_correlations_EFA%s.%s' % (c, ext)
        save_figure(f, path.join(plot_dir, filename), 
                    {'bbox_inches': 'tight', 'dpi': dpi})
        plt.close()
Example #43
0
def plotting_context(context='notebook', font_scale=1.5, rc=None):
    """Create alphalens default plotting style context.

    Under the hood, calls and returns seaborn.plotting_context() with
    some custom settings. Usually you would use in a with-context.

    Parameters
    ----------
    context : str, optional
        Name of seaborn context.
    font_scale : float, optional
        Scale font by factor font_scale.
    rc : dict, optional
        Config flags.
        By default, {'lines.linewidth': 1.5}
        is being used and will be added to any
        rc passed in, unless explicitly overriden.

    Returns
    -------
    seaborn plotting context

    Example
    -------
    with alphalens.plotting.plotting_context(font_scale=2):
        alphalens.create_full_tear_sheet(..., set_context=False)

    See also
    --------
    For more information, see seaborn.plotting_context().

    """
    if rc is None:
        rc = {}

    rc_default = {'lines.linewidth': 1.5}

    # Add defaults if they do not exist
    for name, val in rc_default.items():
        rc.setdefault(name, val)

    return sns.plotting_context(context=context, font_scale=font_scale, rc=rc)
Example #44
0
def plot_stacked_bar(df):
    import seaborn as sns
    with plt.rc_context(dict(sns.axes_style("darkgrid"),
                         **sns.plotting_context("notebook", font_scale=1.8))):
        f, ax = plt.subplots(1, figsize=(10, 10))
        x = list(range(len(df.columns)))
        bottom = np.array([0] * len(df.columns))
        cat_percents = []
        for id_ in df.index:
            color = '#' + ''.join(np.random.choice(list('ABCDEF123456789'), 6))
            ax.bar(x, df.loc[id_], color=color, bottom=bottom, align='center')
            bottom = df.loc[id_] + bottom
            cat_percents.append(''.join(["[{0:.2f}] ".format(x) for x in df.loc[id_].tolist()]))

        legend_labels = [' '.join(e) for e in zip(cat_percents, df.index.tolist())]

        ax.set_xticks(x)
        ax.set_xticklabels(df.columns.tolist())
        ax.set_ylim([0, 1])
        ax.legend(legend_labels, loc='center left', bbox_to_anchor=(1, 0.5))
Example #45
0
def Graph(predictedEndAccuracy, task = DEFAULT):
  
    # load graph model
    graph = None
    if task == TASK_LEVEL_1:
        graph = GROUP_END_ACCURACY_LEVEL1
    elif task == TASK_LEVEL_2:
        graph = GROUP_END_ACCURACY_LEVEL2
    elif task == TASK_LEVEL_3:
        graph = GROUP_END_ACCURACY_LEVEL3
    elif task == TASK_LEVEL_4:
        graph = GROUP_END_ACCURACY_LEVEL4
    elif task == TASK_LEVEL_5:
        graph = GROUP_END_ACCURACY_LEVEL5
    else:
        print("Unknow task level value entered: %s" % task)
        return EMPTY_STRING
    
    groupEndAccuracy = pickle.load(open(graph, "rb"))
    
    # generate plot
    rcParams.update({'figure.autolayout': True})
    fig = plt.figure(figsize = (5, 3.4), dpi = 100)
    ax = fig.add_subplot()    
    with sns.plotting_context(PLOTTING_CONTEXT, font_scale = 1.2):
        ax = sns.kdeplot(groupEndAccuracy, shade = True, color = sns.xkcd_rgb[PLOT_COLOR],  linewidth = 3);
         
    ax.set(yticks = [])
    ax.legend_.remove()
    ax.set_xlabel(X_LABEL, fontsize = 12)
    ax.hold(True)    
    plt.axvline(x = predictedEndAccuracy, ymin = 0, ymax = 1.2, linewidth = 2, color = LINE_COLOR)
    ax.set_xlim([0, 1.2])
    # calculate statistics
    sortedValues = pd.Series.sort_values(groupEndAccuracy)
    percentHigherThanPopulation = float(np.array(np.where(sortedValues < predictedEndAccuracy)).shape[1]) / len(sortedValues) * 100  
    
    return (fig, percentHigherThanPopulation)
    
def plot_heatmap_genus(dataframe, high, low, oxy, rep, plot_dir):
    """
    Make a heatmap at Genus, using oganisms withing the specified abundance
    cutoffs.

    :param dataframe: dataframe to pass
    :param high: highest abundance to include genera for
    :param low: lowes abundance to include genera for
    :param oxy: oxygen tension, "Low" or "High"
    :param rep: replicate (1-4)
    :param plot_dir: directory to save plots in.
    :return:
    """
    # get rid of oxygen levels and replicates if specified.
    if oxy is not 'all':
        print("keep only {} oxygen samples".format(oxy))
        dataframe = dataframe[dataframe['oxy'] == oxy]
    if rep is not 'all':
        print("keep only replicate levels:", rep)
        dataframe = dataframe[dataframe['rep'].isin(rep)]
    dataframe = abundance_utils.filter_by_abundance(
        data=dataframe,
        abundance_column='fraction of reads',
        high=high, low=low)
    dataframe['facet_replicate'] = 'replicate ' + dataframe['rep'].astype(str)

    # make height of the plot a function of the number of rows (Genera):
    num_data_rows = len(dataframe['Genus'].unique())
    plot_size = 2 + num_data_rows / 7
    plot_aspect = 2
    if num_data_rows > 6:
        plot_aspect = .85
    if num_data_rows > 9:
        plot_aspect = .65
    if num_data_rows > 9:
        plot_aspect = .6

    def facet_heatmap(data, **kws):
        """
        Used to fill the subplots with data.

        :param data:
        :param kws:
        :return:
        """

        facet_data = data.pivot(index='Genus', columns='week',
                                values='fraction of reads')
        # Pass kwargs to heatmap  cmap used to be 'Blue'
        sns.heatmap(facet_data, cmap="YlGnBu", **kws)

    with sns.plotting_context(font_scale=7):
        g = sns.FacetGrid(dataframe, col='facet_replicate',
                          margin_titles=True,
                          size=plot_size, aspect=plot_aspect)
        g.set_xticklabels(rotation=90)

    # Create a colorbar axes
    cbar_ax = g.fig.add_axes([.94, .3, .02, .4], title='fraction \n of reads')

    g = g.map_dataframe(facet_heatmap,
                        cbar_ax=cbar_ax, vmin=0,
                        # specify vmax = max abundance seen or each will
                        # have its own scale (and you might not know it!)
                        vmax=dataframe['fraction of reads'].max(),
                        )

    g.set_titles(col_template="{col_name}", fontweight='bold', fontsize=18)
    g.set_axis_labels('week')

    # Add space so the colorbar doesn't overlap the plot
    g.fig.subplots_adjust(right=.9)

    # add a supertitle, you bet.
    plt.subplots_adjust(top=0.80)
    supertitle = str(low) + ' < fraction of reads < ' + str(
        high) + ', {} oxygen'.format(oxy)
    g.fig.suptitle(supertitle, size=18)

    # write a filename and save.
    filename = oxy + "_oxygen--{0}_to_{1}_abundance".format(low, high)
    print('filename:', filename)

    plot_dir = elviz_utils.prepare_plot_dir(plot_dir)

    # save figure
    g.savefig(plot_dir + filename + '.pdf')
def heatmap_from_taxa_dict(dataframe, taxa_dict,
                           title=False,
                           facet='rep', annotate=False,
                           summarise_other=True,
                           main_dir='./',
                           cap_facet_labels=True,
                           plot_dir='./plots/mixed_taxonomy/',
                           size_spec=False,
                           aspect_spec=False,
                           check_totals_sum_to_1=True,
                           svg=False):
    """
    Make a plot using a taxa_dict.

    The taxa_dict is used to make a summary dataframe using
    aggregate_mixed_taxonomy(), and the reult is plotted.

    :param dataframe: dataframe to source all data from
    :param taxa_dict: a dictionary with taxonomic levels as keys and
    names as values.  E.g. {'Phylum':['Bacteroidetes'],
    'Order':['Burkholderiales','Methylophilales', 'Methylococcales']}
    :param facet: The rows to facet the subplots by.  Defaults to replicates,
    so weeks will be the columns.
    :param annotate: print numerical values inside each square?  (Makes big
    plots *really* big; not recommended for default use.
    :param main_dir: main dir to consider "home", so notebooks can be run in
    remote directories.
    :param summarise_other: include a bar for "other"?  (Or just don't show)
    :param plot_dir: path to save plots at, relative to main_dir
    :param size_spec: manually specify the figure size (useful when default
    is ugly)
    :param aspect_spec: manually specify the figure asepct ratio (useful when
    default is ugly
    :return: saves and returns a seaborn heat map
    """

    # Cherry pick out the rows for the specified taxa.
    # If you give conflicting taxa as input, aggregate_mixed_taxonomy() will
    # throw an error.
    plot_data = aggregate_mixed_taxonomy(
        dataframe=dataframe,
        taxa_dict=taxa_dict,
        main_dir=main_dir,
        summarise_other=summarise_other,
        check_totals_sum_to_1=check_totals_sum_to_1)

    # store the maximum abundance level.  We will need to tell all the
    # sub-heat maps to use this same maximum so they aren't each on their
    # own scale.
    max_abundance = plot_data['abundance sum'].max()

    # The data is seperated by these two variables.
    # The one not used as the facet will be used as the columns in the
    # subplot.
    if facet == 'week':

        cols_in_facet = 'rep'
    else:
        cols_in_facet = 'week'

    print('plot_data.head()')
    print(plot_data.head())

    def pivot_so_columns_are_plotting_variable(dataframe, groupby):
        return dataframe.pivot(index='taxonomic name',
                               columns=groupby,
                               values='abundance sum')

    def facet_heatmap(data, groupby, xrotation, **kws):
        """
        Used to fill the subplots with data.

        :param data: dataframe to plot
        :param groupby: column to group on
        :param xrotation: degrees to rotate x labels by
        :param kws: kewyord arguments for plotting
        :return:
        """
        # pivot only supports one column for now.
        # http://stackoverflow.com/questions/32805267/pandas-pivot-on-multiple-columns-gives-the-truth-value-of-a-dataframe-is-ambigu
        facet_data = pivot_so_columns_are_plotting_variable(
            dataframe=data, groupby=groupby)
        # Pass kwargs to heatmap  cmap used to be 'Blue'
        sns.heatmap(facet_data, cmap="YlGnBu", **kws)
        g.set_xticklabels(rotation=xrotation)

    # todo: add a label at the bottom like "replicate" or "week"
    # currently replicate is turned into facet_replicate but should just
    # make a label that says replicate.  Week

    # Control plot aesthetics depending on facet option.
    if facet == 'week':
        xrotation = 0
        num_rows = len(plot_data['taxonomic name'].unique())
        size = 2 * 0.2*num_rows
        aspect = 1
        space_for_cbar = 0.85
        x_axis_label = 'replicate'

    else: # (facet = "rep")
        xrotation = 90
        # Calculate the size, aspect depending on the number of
        #  rows per subplot
        num_rows = len(plot_data['taxonomic name'].unique())
        size = 1 + 0.22*num_rows
        aspect = 1.5  # aspect for each sub-plot, not a single tile
        space_for_cbar = 0.85

    if size_spec:
        size = size_spec
    if aspect_spec:
        aspect = aspect_spec

    print(plot_data.head())

    if cap_facet_labels:
        if facet == "rep":
            row_var='$O_2$'
            col_var = 'Week'
            facet_var = "Replicate"
        else:
            print("not set up for facet != rep")
        plot_data = capitalize_some_column_names(plot_data)
        col_var
    else:
        facet_var = 'rep'
        row_var = 'oxy'
        col_var = 'week'

    with sns.plotting_context(font_scale=8):
        g = sns.FacetGrid(plot_data,
                          col=facet_var,
                          row=row_var,
                          size=size,
                          aspect=aspect,
                          margin_titles=True)

    # Add axes for the colorbar.  [left, bottom, width, height]
    cbar_ax = g.fig.add_axes([.92, .3, .02, .4], title='fraction \n of reads')

    g = g.map_dataframe(facet_heatmap,
                        cbar_ax=cbar_ax,
                        # NEED vmax = MAX ABUNDANCE or each plot will have
                        # its own color scale!
                        vmin=0, vmax=max_abundance,
                        annot=annotate,
                        groupby=col_var,
                        xrotation=xrotation)

    g.set_axis_labels(col_var)

    # add space for x label
    g.fig.subplots_adjust(bottom=0.2)

    # todo: add an x-label for each facet (I want only 1)
    # g.set_axis_labels(['x label', 'ylabel'])
    # g.fig.subplots_adjust(top=0.2)
    # g.fig.text(0.5, 0.1, s='armadillo') #, *args, **kwargs)
    # g.fig.xlabel('ardvark')

    # Add space so the colorbar doesn't overlap th plot.
    g.fig.subplots_adjust(right=space_for_cbar)
    # todo: still not enough room for
    # Order-Burkholderiales_Methylophilales_Methylococcales--
    # Phylum-Bacteroidetes--rep.pdf

    # Format the y strings in each subplot of the Seaborn grid.
    # Don't put () on the function you are c
    # Todo: make the 2nd argument a function
    y_label_formatter(g, italics_unless_other)

    supertitle = taxa_dict_to_descriptive_string(taxa_dict)
    if title:
        # TODO: they are currently being converted to LaTeX
        # add a supertitle, you bet.
        plt.subplots_adjust(top=0.80)
        g.fig.suptitle(supertitle, size=16)

    # Tight layout --> title and cbar overlap heat maps.  Boo.
    # NO: plt.tight_layout()
    g.fig.subplots_adjust(wspace=.05, hspace=.05)

    # prepare filename and save.
    plot_dir = elviz_utils.prepare_plot_dir(plot_dir)
    print("plot directory: {}".format(plot_dir))
    filepath = plot_dir + supertitle
    filepath += "--{}".format(facet)
    if annotate:
        filepath += "--annotated"
    filepath += ".pdf"
    print(filepath)
    g.fig.savefig(filepath)
    if svg:
        g.fig.savefig(filepath.rstrip("pdf") + "svg")

    return g
Example #48
0
def plot_fluxnet_comparison_one_site(driver, science_test_data_dir,
                                     compare_data_dict, result_dir, plot_dir,
                                     plots_to_make, context, style, var_names,
                                     months, obs_dir, subdir):

    if check_site_files(obs_dir, subdir):
        # get CSV file from site directory to get lat/lng for site
        lat, lng = get_fluxnet_lat_lon(obs_dir, subdir)
        print(lat, lng)

        # loop over data to compare
        data = {}
        for key, items in compare_data_dict.items():

            if key == "ecflux":
                try:
                    # load Ameriflux data
                    data[key] = read_fluxnet_obs(subdir,
                                                 science_test_data_dir,
                                                 items)
                except OSError:
                    warnings.warn(
                        "this %s site does not have data" % subdir)

            elif key == "VIC.4.2.d":
                try:
                    # load VIC 4.2 simulations
                    data[key] = read_vic_42_output(lat, lng,
                                                   science_test_data_dir,
                                                   items)

                except OSError:
                    warnings.warn(
                        "this site has a lat/lng precision issue")

            else:
                try:
                    # load VIC 5 simulations
                    data[key] = read_vic_5_output(lat, lng,
                                                  result_dir,
                                                  items)
                except OSError:
                    warnings.warn(
                        "this site has a lat/lng precision issue")

        # make figures

        # plot preferences
        fs = 15
        dpi = 150

        if 'annual_mean_diurnal_cycle' in plots_to_make:

            # make annual mean diurnal cycle plots
            with plt.rc_context(dict(sns.axes_style(style),
                                     **sns.plotting_context(context))):
                f, axarr = plt.subplots(4, 1, figsize=(8, 8), sharex=True)

                for i, (vic_var, variable_name) in enumerate(
                        var_names.items()):

                    # calculate annual mean diurnal cycle for each
                    # DataFrame
                    annual_mean = {}
                    for key, df in data.items():
                        annual_mean[key] = pd.DataFrame(
                            df[vic_var].groupby(df.index.hour).mean())

                    df = pd.DataFrame(
                        {key: d[vic_var] for key, d in annual_mean.items()
                         if vic_var in d})

                    for key, series in df.iteritems():
                        series.plot(
                            linewidth=compare_data_dict[key]['linewidth'],
                            ax=axarr[i],
                            color=compare_data_dict[key]['color'],
                            linestyle=compare_data_dict[key]['linestyle'],
                            zorder=compare_data_dict[key]['zorder'])

                    axarr[i].legend(loc='upper left')
                    axarr[i].set_ylabel(
                        '%s ($W/{m^2}$)' % variable_name,
                        size=fs)
                    axarr[i].set_xlabel('Time of Day (Hour)', size=fs)
                    axarr[i].set_xlim([0, 24])
                    axarr[i].xaxis.set_ticks(np.arange(0, 24, 3))

                # save plot
                plotname = '%s_%s.png' % (lat, lng)
                os.makedirs(os.path.join(plot_dir, 'annual_mean'),
                            exist_ok=True)
                savepath = os.path.join(plot_dir, 'annual_mean', plotname)
                plt.savefig(savepath, bbox_inches='tight', dpi=dpi)

                plt.clf()
                plt.close()

        if 'monthly_mean_diurnal_cycle' in plots_to_make:

            # make monthly mean diurnal cycle plots
            with plt.rc_context(dict(sns.axes_style(style),
                                     **sns.plotting_context(context))):
                f, axarr = plt.subplots(4, 12, figsize=(35, 7),
                                        sharex=True,
                                        sharey=True)

                for i, (vic_var, variable_name) in enumerate(
                        var_names.items()):

                    # calculate monthly mean diurnal cycle
                    monthly_mean = {}
                    for (key, df) in data.items():
                        monthly_mean[key] = pd.DataFrame(
                            df[vic_var].groupby([df.index.month,
                                                 df.index.hour]).mean())

                    df = pd.DataFrame(
                        {key: d[vic_var] for key, d in monthly_mean.items()
                         if vic_var in d})

                    for j, month in enumerate(months):

                        for key, series in df.iteritems():
                            series[j + 1].plot(
                                linewidth=compare_data_dict[key]['linewidth'],
                                ax=axarr[i, j],
                                color=compare_data_dict[key]['color'],
                                linestyle=compare_data_dict[key]['linestyle'],
                                zorder=compare_data_dict[key]['zorder'])

                        axarr[i, j].set_ylabel(
                            '%s \n ($W/{m^2}$)' % variable_name,
                            size=fs)
                        axarr[i, j].set_xlabel('', size=fs)
                        axarr[i, j].set_xlim([0, 24])
                        axarr[i, j].xaxis.set_ticks(np.arange(0, 24, 3))
                        if i == 0:
                            axarr[i, j].set_title(month, size=fs)

                # add legend
                axarr[0, -1].legend(loc='center left',
                                    bbox_to_anchor=(1, 0.5))

                # add common x label
                f.text(0.5, 0.04, 'Time of Day (Hour)', ha='center',
                       size=fs)

                # save plot
                plotname = '%s_%s.png' % (lat, lng)
                os.makedirs(os.path.join(plot_dir, 'monthly_mean'),
                            exist_ok=True)
                savepath = os.path.join(plot_dir,
                                        'monthly_mean', plotname)
                plt.savefig(savepath, bbox_inches='tight', dpi=dpi)

                plt.clf()
                plt.close()
Example #49
0
def plot_snotel_comparison_one_site(
        driver, science_test_data_dir,
        compare_data_dict,
        result_dir, plot_dir,
        plots_to_make,
        plot_variables, context, style, filename):
    
    print(plots_to_make)
    
    # get lat/lng from filename
    file_split = re.split('_', filename)
    lng = file_split[3].split('.txt')[0]
    lat = file_split[2]
    print('Plotting {} {}'.format(lat, lng))

    # loop over data to compare
    data = {}
    for key, items in compare_data_dict.items():

        # read in data
        if key == "snotel":
            data[key] = read_snotel_swe_obs(filename,
                                            science_test_data_dir,
                                            items)

        elif key == "VIC.4.2.d":
            data[key] = read_vic_42_output(lat, lng,
                                           science_test_data_dir,
                                           items)

        else:
            data[key] = read_vic_5_output(lat, lng,
                                          result_dir,
                                          items)

    # loop over variables to plot
    for plot_variable, units in plot_variables.items():

        if 'water_year' in plots_to_make:

            with plt.rc_context(dict(sns.axes_style(style),
                                     **sns.plotting_context(context))):
                fig, ax = plt.subplots(figsize=(10, 10))

                df = pd.DataFrame({key: d[plot_variable] for key, d in
                                   data.items() if plot_variable in d})

                for key, series in df.iteritems():
                    series.plot(
                        use_index=True,
                        linewidth=compare_data_dict[key]['linewidth'],
                        ax=ax,
                        color=compare_data_dict[key]['color'],
                        linestyle=compare_data_dict[key]
                        ['linestyle'],
                        zorder=compare_data_dict[key]['zorder'])

                ax.legend(loc='upper left')
                ax.set_ylabel("%s [%s]" % (plot_variable, units))

                # save figure
                os.makedirs(os.path.join(plot_dir, plot_variable),
                            exist_ok=True)
                plotname = '%s_%s.png' % (lat, lng)
                savepath = os.path.join(plot_dir, plot_variable, plotname)
                plt.savefig(savepath, bbox_inches='tight')
                print(savepath)
                plt.clf()
                plt.close()
def heatmap_all_below(dataframe, taxa_dict, plot_dir, low_cutoff=0.001,
                      cap_facet_labels=True,
                      title=False, svg=False):
    """
    Make a heatmap of all the taxa below the taxa specified in taxa_dict.

    :param dataframe: dataframe of data to harvest excerpts from
    :param taxa_dict: a dictionary with taxonomic levels as keys and
    names as values.  E.g. {'Order':['Burkholderiales']}
    :param plot_dir: path to save plots to, relative to main_dir
    :param main_dir: path to data source, etc.
    :param low_cutoff: lowest abundance to include.  A taxa must be above
    this threshold in at least one sample to be included.
    :return:
    """
    # TODO: this function has a lot of commonality with heatmap_from_taxa_dict
    # and could/should be factored.

    # grab the data for that taxa:
    # for now assume just 1 key and 1 value.
    taxa_level = list(taxa_dict.keys())[0]
    taxa_name = list(taxa_dict.values())[0][0]
    dataframe = dataframe[dataframe[taxa_level] == taxa_name]
    print(dataframe.head())

    # Columns to form a concatenated label from:
    label_cols = taxonomy_levels_below(taxa_level=taxa_level)
    print('label_cols: {}'.format(label_cols))

    # change nan cells to 'unknown'
    dataframe.fillna('unknown', inplace=True)

    # make a summary string representing the taxonomy for everything below

    def label_building_lambda(f, column_value_list, taxa_name):
        """
        Returns a lambda function to make row labels from.
        :param f: function to make a lambda out of.
        :param columns: column names to pass to function f in the lambda
        :return: function
        """
        # * means unpack the list you get from the list comprehension
        print("columns passed: {}".format(column_value_list))
        print("Use those in {}".format(f))
        # Passing a list into label_from_taxa_colnames().
        # Doing a list comprehension on columns.
        # Note that (row[col] for col in columns)) is a generator .
        # building something like label_from_taxa_colnames()
        return lambda row: f([row[col] for col in column_value_list],
                             taxa_name)
        # e.g. makes:
        # my_function([Comamonadaceae, Curvibacter]) from a row of a dataframe
        # and the specification that columns = ['Family', 'Genus']

    # TODO: use the taxa_dict to get the columns to use!
    # make a name_string per row.  It's something like
    # "Comamonadaceae, Curvibacter" or "other"
    dataframe['name_string'] = dataframe.apply(
        label_building_lambda(f=label_from_taxa_colnames,
                              column_value_list=label_cols,
                              taxa_name=taxa_name),
        axis=1)
    print("dataframe.head() for name_string:")
    print(dataframe.head())

    # reduce to only name_string rows with at least one abundance > the
    # threshold set by low_cutoff to we don't have a zillion rows:
    # todo: allow high to change?
    dataframe = \
        abundance_utils.filter_by_abundance(data=dataframe,
                                            abundance_column='fraction of '
                                                             'reads',
                                            high=1,
                                            low=low_cutoff,
                                            taxonomy_column='name_string')

    # Plot as usual, using the stuff developed above.
    # todo: factor some of this??
    def pivot_so_columns_are_plotting_variable(dataframe, groupby):
        return dataframe.pivot(index='name_string',
                               columns=groupby,
                               values='fraction of reads')

    def facet_heatmap(data, groupby, xrotation, **kws):
        """
        Used to fill the subplots with data.

        :param data: dataframe to plot
        :param groupby: column to group on
        :param xrotation:
        :param kws:
        :return:
        """
        # pivot only supports one column for now.
        # http://stackoverflow.com/questions/32805267/pandas-pivot-on-multiple-columns-gives-the-truth-value-of-a-dataframe-is-ambigu
        facet_data = pivot_so_columns_are_plotting_variable(
            dataframe=data, groupby=groupby)
        # Pass kwargs to heatmap cmap.
        sns.heatmap(facet_data, cmap="YlGnBu", **kws)
        g.set_xticklabels(rotation=xrotation)

    # set some plotting parameters
    xrotation = 90
    # Calculate the size, aspect depending on the number of
    #  rows per subplot
    num_rows = len(dataframe['name_string'].unique())
    size = 1 + 0.22*num_rows
    aspect = 1.5  # a

    if cap_facet_labels:
        dataframe = capitalize_some_column_names(dataframe)
        facet_var = "Replicate"
        row_var='$O_2$'
        col_var = "Week"
    else:
        facet_var = 'rep'
        row_var = 'oxy'
        col_var = 'week'

    # todo: this doesn't seem to be changing the font size.  Probably isn't
    # for other plotting calls either!
    with sns.plotting_context(font_scale=40):
        g = sns.FacetGrid(dataframe,
                          col=facet_var,
                          row=row_var,
                          size=size,
                          aspect=aspect,
                          margin_titles=True)

    g.set_axis_labels(col_var)

    # Add axes for the colorbar.  [left, bottom, width, height]
    cbar_ax = g.fig.add_axes([.94, .3, .02, .4], title='fraction \n of reads')

    g = g.map_dataframe(facet_heatmap,
                        cbar_ax=cbar_ax, vmin=0,
                        # MUST SET VMAX or all of the subplots will be on
                        # their own color scale and you might not know it.
                        vmax=dataframe['fraction of reads'].max(),
                        annot=False,
                        groupby=col_var,
                        xrotation=90)


    # modify labels
    # Todo: make the 2nd argument a function
    y_label_formatter(g, italics_unless_other)

    # add space for x label
    g.fig.subplots_adjust(bottom=0.2)

    # room for colorbar (cbar)
    g.fig.subplots_adjust(right=0.85)

    # add a supertitle, you bet.
    supertitle_base = taxa_dict_to_descriptive_string(taxa_dict)
    if title:
        plt.subplots_adjust(top=0.80)
        supertitle = \
            supertitle_base + '.  Min fraction of reads cutoff = {}'.format(
                low_cutoff)
        g.fig.suptitle(supertitle, size=15)

    # Also summarise # of taxa rows being grouped together.

    # prepare filename and save.
    plot_dir = elviz_utils.prepare_plot_dir(plot_dir)
    filepath = plot_dir + supertitle_base
    filepath += "--min_{}".format(low_cutoff)
    filepath += "--{}".format('x-week')
    filepath += ".pdf"
    print(filepath)
    g.fig.savefig(filepath)

    if svg:
        g.fig.savefig(filepath.rstrip("pdf") + "svg")

    return g
def target_gene_expression_analysis(mirna2age, mirna2disease,mirna2family,gene2age):
		mir_targetdb = pd.read_csv('/Users/virpatel/Desktop/pub_stuff/relevant_data/mir_target_vectordb.txt', sep='\t',index_col=[0], encoding='utf-8')
		mir_expdb = pd.read_csv('/Users/virpatel/Desktop/pub_stuff/relevant_data/exp_data_alldmir.txt', sep='\t',index_col=[0])

		family_target_hamming = []
		family_target_avg_age = []
		family_perc_dis = []


		tardis = []
		tarnotindis = []




		# generate_matrix(mir_targetdb,'target_heatmap_jaccard')
		generate_matrix(mir_expdb,'tis_exp_heatmap_jaccard')

		return








# 		# # for fam in mirna2family:
# 		# # 	family_vector = []
# 		# # 	mirlst = [a for a in mirna2family[fam] if a in mir_targetdb.index]
# 		# # 	mirdislst = [a for a in mirna2family[fam] if a in mirna2disease]
# 		# # 	if len(mirlst) < 4: continue
# 		# # 	if len(mirdislst) < 4: continue
# 		# # 	for mir in mirlst:
# 		# # 		for other_mir in mirlst:
# 		# # 			if mir == other_mir: continue
# 		# # 			family_vector.append(hamming(mir_targetdb.loc[mir], mir_targetdb.loc[other_mir],normalized=True))
				
# 		# # 	family_target_hamming.append(std(family_vector))
# 		# # 	family_target_avg_age.append(round(mean([float(mirna2age[mirna]) for mirna in mirlst if mirna in mirna2age]),1))
# 		# # 	family_perc_dis.append(float(len(mirdislst)) / float(len(mirna2family[fam])))


# 		# target_lst = list(mir_targetdb.columns.values)

# 		# mirnanumdis = []
# 		# mirnanumtar = []
# 		# mir_avg_tar_age_dis = []
# 		# mir_avg_tar_age_nondis = []
# 		# mir_age = []
# 		# mir_median_tar_age_all = []

# 		# for mir in mir_targetdb.index:
# 		# 	if mir not in mirna2disease: mirnanumdis.append(0)
# 		# 	else: mirnanumdis.append(len(mirna2disease[mir]))
# 		# 	bintarlt = mir_targetdb.loc[mir].tolist()
# 		# 	mirnanumtar.append(sum(bintarlt))
# 		# 	tarages = [float(gene2age[target_lst[ind]]) for ind, a in enumerate(bintarlt) if target_lst[ind] in gene2age and a == 1]
# 		# 	mir_median_tar_age_all.append(median(tarages))
# 		# 	mir_avg_tar_age_dis.append(mean(tarages))

# 		# for mir in mir_targetdb.index:
# 		# 	if mir not in mirna2disease:
# 		# 		bintarlt = mir_targetdb.loc[mir].tolist()
# 		# 		tarages = [float(gene2age[target_lst[ind]]) for ind, a in enumerate(bintarlt) if target_lst[ind] in gene2age and a == 1]
# 		# 		mir_avg_tar_age_nondis.append(median


# 		# yung_num_tis = []
# 		# old_num_tis = []

# 		# dis_num = []
# 		# mir_age_lst = []
# 		# exp_val = []

# 		# for mir in mir_expdb.index:
# 		# 	if mir in mirna2age:
# 		# 		v = float(sum(mir_expdb.loc[mir].tolist()))
# 		# 		mir_age_lst.append(mirna2age[mir])
# 		# 		mirage = mirna2age[mir]



# 		# 		exp_val.append(v)

# 		# 		if mirage > 100.0: old_num_tis.append(sum(mir_expdb.loc[mir].tolist()))
# 		# 		else: yung_num_tis.append(sum(mir_expdb.loc[mir].tolist()))



# 		# plt.scatter(mir_age_lst, exp_val)
# 		# plt.ylabel('Tissue Expression Count')
# 		# plt.xlabel('miRNA Age')
# 		# plt.subplots_adjust(bottom=0.20)
# 		# plt.savefig('figures/mirna_exp_all.pdf',bbox_inches='tight')
# 		# plt.close()



		# print mannwhitneyu(yung_num_tis, old_num_tis)

		mir_in_fam_pot = []
		mir_in_fam = []
		mir_not_in_fam = []

		mirna_in_hamming_2_exp = {}

		for mir in mirna2family:
			if len(mirna2family[mir]) > 3:
				mir_in_fam_pot += mirna2family[mir]
		expdb = []

		for mirna in mir_expdb.index:
			if mirna not in mirna2age: continue
			mirna_in_hamming_2_exp[mirna] = mir_expdb.loc[mirna].tolist()
			if mirna in mir_in_fam_pot:
				mir_in_fam.append(mirna)
				expdb.append([float(sum(mir_expdb.loc[mirna].tolist())), float(mirna2age[mirna]), 'In miRNA Family'])			
			else:
				mir_not_in_fam.append(mirna)
				expdb.append([float(sum(mir_expdb.loc[mirna].tolist())), float(mirna2age[mirna]), 'Not In miRNA Family'])			

		age1 = [mirna2age[a] for a in mir_in_fam ]
		age2 = [mirna2age[a] for a in mir_not_in_fam]
		gen1 = [sum(mirna_in_hamming_2_exp[a]) for a in mir_in_fam ]
		gen2 = [sum(mirna_in_hamming_2_exp[a]) for a in mir_not_in_fam]


		expdb = pd.DataFrame(expdb, columns=['Number of Tissues','miRNA Age','In miRNA Family?'])
		expdb = expdb.sort('miRNA Age',ascending=1)


		with  sns.plotting_context(font_scale=300):
			sns.violinplot(x='miRNA Age',y='Number of Tissues',hue='In miRNA Family?',data=expdb,palette="muted", width=.7,legend=False,cut = 0)

			fig = plt.gcf()
			frame = plt.legend(frameon=True, loc='bottom right' )
			fig.set_size_inches(30, 10.5)

			ax1 = plt.gca()
			ax1.set_xlim([-1,17])
			ax1.set_ylim([-0.1, 20.5])


			plt.savefig('figures/violin_fam_no_fam_exp.pdf',bbox_inches='tight')

			plt.close()
Example #52
0
from matplotlib import rcParams
import matplotlib.cm as cm
import matplotlib as mpl

from matplotlib import rc

import random

from matplotlib.font_manager import FontProperties

import seaborn as sns
import time

sns.color_palette("bright")
sns.set(font_scale=1.2)
sns.plotting_context(context="talk", rc=None)

font = {'family': 'Serif',
        'color':  'Black',
        'weight': 'normal',
        'size': 13,
        }
title_font = {'family': 'Serif',
        'color':  'Black',
        'weight': 'semibold',
        'size': 15,
        }

def organize_mrmr_ranking(pickle_file_path, folder_path, write_plot_path, figure_size, color_code, sensitive_features, methodology, figure_name, font, Title_font, target):

	final_feature_importance_dictionary = {}
le.despine(left=True)
le.fig.subplots_adjust(right = 0.95);
legend = le.ax.legend(loc = 'lower right', shadow = True)
le.ax.xaxis.set_label_coords(0.5, -0.1)

#%% Plotting timeseries
flatui = ["#9b59b6", "#3498db", "#e74c3c"]

pal_ts = [pal[0], pal[1] ,sns.color_palette('Paired')[1] ]

sns.set(context = "poster", style = 'darkgrid', palette = pal_ts, 
        rc = {'axes.labelsize': 20.0, 'figure.figsize': [10, 10], 
              'legend.fontsize': 16.0, 'xtick.labelsize': 20.0,
              'ytick.labelsize': 20.0})
              
with sns.plotting_context('poster'):
    ax1 = plt.subplot(311)
    plt.plot_date(dates, H_mod_PT, 'o--', label = 'TSEB', lw = 1)
    plt.plot_date(dates, H_mod_OS, 'o--', label = 'OSEB', lw = 1)
    plt.plot_date(dates, H_Scinti, 'o--', label = 'Scintillometer', lw = 1)
    plt.legend(loc = 'center left', bbox_to_anchor=(1.1,0.5), prop={'size':20})
    start = dates[0].astype('datetime64[D]')
    stop = dates[len(dates) -1].astype('datetime64[D]') + np.timedelta64(1, 'D')
    ax1.set_xlim([start, stop])
    ax1.set_ylabel('Energy flux (W/m2)', labelpad=20, fontsize=20)
    
    # Second plot
    ecData, ecTime = met.loadMetDataPN('EC') 
    
    ax2 = plt.subplot(312)
    dateCol_start = (np.abs(ecTime - start.astype('datetime64[s]'))).argmin()
import pandas as pd
import seaborn as sns
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from matplotlib.figure import Figure

#change directory to where my app files ares
import os
os.chdir("/Users/Charm/OneDrive/app_files/store")

GroupEndAccuracy = pickle.load( open( "groupEndAccuracyLevel5.p", "rb" ) )


fig = Figure(figsize=(5,4), dpi=100)
ax = fig.add_subplot(111)
canvas = FigureCanvas(fig)
with sns.plotting_context("notebook",font_scale=1.2):
     #ax = sns.distplot(GroupEndAccuracy, hist=False, rug=True, color='blue');
     ax = sns.kdeplot(GroupEndAccuracy, shade=True, color = sns.xkcd_rgb["light blue"],  linewidth=3);
ax.set(yticks=[])
ax.legend_.remove()
ax.set_xlabel('End Accuracy', fontsize=16)
ax.hold(True)
ax.set_title('Level5', fontsize=16)

predictedEnd = 0.85 #value output by my model from the App
plt.axvline(x=predictedEnd, ymin=0, ymax = 1.2, linewidth=2, color='r')
aa = pd.Series.sort_values(GroupEndAccuracy)
HigherThanPopulation = float(np.array(np.where(aa<predictedEnd)).shape[1])/len(aa)*100

print("Patient is likely to perform better than %d percent of the population on this task" %HigherThanPopulation)
def plot_bar_factor(loading, ax=None, bootstrap_err=None, grouping=None,
                    width=4, height=8, label_rows=True, title=None,
                    color_grouping=False, separate_ticklabels=True):
    """ Plots one factor loading as a vertical bar plot
    
    Args:
        loading: factor loadings as a dataframe or series
        ax: optional, plot axis
        bootstrap_err: a dataframe/series with the same index as loading. Used
            to plot confidence intervals on bars
        grouping: optional, output of "get_factor_groups", used to plot separating
            horizontal lines
        label_rows: boolean, whether to put ylabels
    """
    
    # longest label for drawing lines
    DV_fontsize = height/(loading.shape[0]//2)*20
    # set up plot variables
    if ax is None:
        f, ax = plt.subplots(1,1, figsize=(width, height))
    # change axis border width
    for axis in ['top','bottom','left','right']:
        ax.spines[axis].set_linewidth(height/8)
    with sns.plotting_context(font_scale=1.3):
        # plot optimal factor breakdown in bar format to better see labels
        # plot actual values
        colors = sns.diverging_palette(220,15,n=2)
        ordered_colors = [colors[int(i)] for i in (np.sign(loading)+1)/2]
        if bootstrap_err is None:
            abs(loading).plot(kind='barh', ax=ax, color=ordered_colors,
                                width=.7)
        else:
            abs(loading).plot(kind='barh', ax=ax, color=ordered_colors,
                                width=.7, xerr=bootstrap_err,
                                error_kw={'linewidth': height/10})
        # draw lines separating groups
        if grouping is not None:
            factor_breaks = np.cumsum([len(i[1]) for i in grouping])[:-1]
            for y_val in factor_breaks:
                ax.hlines(y_val-.5, 0, 1.1, lw=height/10, 
                          color='grey', linestyle='dashed')
        # set axes properties
        ax.set_xlim(0, max(max(abs(loading)), 1.1)); 
        ax.set_yticklabels(''); 
        ax.set_xticklabels('')
        labels = ax.get_yticklabels()
        locs = ax.yaxis.get_ticklocs()
        # add factor label to plot
        if title:
            ax.set_xlabel(title, ha='center', va='top', fontsize=height/2,
                          weight='bold', rotation=90)
        ax.tick_params(axis='x', bottom=False, labelbottom=False)
        # add labels of measures to top and bottom
        tick_colors = ['#000000','#444098']
        ax.set_facecolor('#DBDCE7')
        for location in locs[2::3]:
            ax.axhline(y=location, xmin=0, xmax=1, color='w', 
                       zorder=-1, lw=height/10)
        # if leftall given, plot all labels on left
        if label_rows:
            for i, label in enumerate(labels):
                label.set_text('%s ' % (label.get_text()))
            # and other half on bottom
            ax.set_yticks(locs)
            left_labels=ax.set_yticklabels(labels,fontsize=DV_fontsize)
            ax.tick_params(axis='y', size=height/4, width=height/10, pad=width)
            if grouping is not None and color_grouping:
                # change colors of ticks based on factor group
                color_i = 1
                last_group = None
                for j, label in enumerate(left_labels):
                    group = np.digitize(locs[j], factor_breaks)
                    if last_group is None or group != last_group:
                        color_i = 1-color_i
                        last_group = group
                    color = tick_colors[color_i]
                    label.set_color(color)             
        else:
            ax.set_yticklabels('')
            ax.tick_params(axis='y', size=0)
    if ax is None:
        return f
Example #56
0
                for j in range(IL):
                    table.append(a + sim_out[i*IL + j])

        # create a pandas frame
        print 'Making PANDAS frame...'
        df = pd.DataFrame(table, columns=columns)

        # turns out all we need is the follow pivoted table
        #perf = pd.pivot_table(df, values='Error', index=['SNR'], columns=['Algorithm'], aggfunc=np.mean)

        with open(pickle_file, 'wb') as f:
            pickle.dump([df, parameters], f)
            f.close()

    sns.set(style='whitegrid')
    sns.plotting_context(context='poster', font_scale=2.)
    pal = sns.cubehelix_palette(8, start=0.5, rot=-.75)

    # Draw the figure
    print 'Plotting...'

    df_rand = df[df['algorithm'] == 'RANDOM']
    df_det = df[df['algorithm'] == 'DETERMINISTIC']

    # Plot random
    p_rand = pd.pivot_table(df_rand, values='success', index=['K'], columns=['C'], aggfunc=np.mean)
    p_rand = p_rand.reindex_axis(sorted(p_rand.columns, key=int), axis=1)
    p_rand = p_rand.reindex_axis(sorted(p_rand.index, key=int), axis=0)

    p_det = pd.pivot_table(df_det, values='success', index=['K'], columns=['C'], aggfunc=np.mean)
    p_det = p_det.reindex_axis(sorted(p_det.columns, key=int), axis=1)
Example #57
0
 def get_grid(self, **kwargs):
     kwargs["data"] = self.df
     with sns.axes_style(self.axes_style):
         with sns.plotting_context(self.plotting_context):
             grid = sns.FacetGrid(**kwargs)
     return grid
Example #58
0
    def get_fig_for_dataframe(self):
        form_data = self.get_column_form_data()
        string_expressions = {form_datum["name"] : form_datum["initial"] for form_datum in form_data["string_field_uniques"]}
        df = self.data_mapping_revision.get_data()
        row_mask = df.isin(string_expressions)[[form_datum["name"]  for form_datum in form_data["string_field_uniques"]]]
        df = DataFrame(df[row_mask.all(1)])
        split_y_axis_by = self.split_y_axis_by if self.split_y_axis_by !='None' else None
        split_colour_by = self.split_colour_by if self.split_colour_by !='None' else None

        kwargs = {"size": 5, 
                    "aspect": 1.75,
                     "sharex":True, 
                     "sharey":True,
                     "hue" : split_colour_by,
                     "legend" : False,
                     "legend_out" : True,
                      #'legend.frameon': False
                    }
        split_by = self.split_by if self.split_by !='None' else None
        if split_by:
            kwargs["row"] = None
            kwargs["col"] = split_by
            kwargs["col_wrap"] = 4
        if GRAPH_MAPPINGS[self.visualisation_type]["xy"] == True:
            if df.count()[0] > 0 :
                xlim = (0, float(max(df[self.x_axis]))*1.3)
                ylim = (0, float(max(df[self.y_axis]))*1.1)
                kwargs["xlim"] = xlim
                kwargs["ylim"] = ylim
         

        with plotting_context( "poster" ):
            sns.set_style("white")
            labels = GRAPH_MAPPINGS[self.visualisation_type]["get_label_function"](self, df) 
            # g = sns.factorplot(self.x_axis,
            #      y=self.y_axis, data=df, 
            #      row=self.split_y_axis_by if self.split_y_axis_by !='None' else None, 
            #      x_order=labels, 
            #      col=self.split_colour_by if self.split_colour_by !='None' else None,)
            g_kwargs = {}
            if labels:
                g_kwargs["x_order"] =labels  
            print kwargs
            g = sns.FacetGrid(df,**kwargs )
            
            g.map(GRAPH_MAPPINGS[self.visualisation_type]["function"], self.x_axis, self.y_axis, **g_kwargs);
            if labels:
                if split_by:
                    for ax in g.axes:
                        ax.set_xticklabels(labels, rotation=90)
                else:
                    
                    g.set_xticklabels(labels, rotation=90)
            g.set_legend()
            # frame = g.fig.legend().get_frame()
            #if labels and not split_by :
             #   g.set_xticklabels(labels, rotation=90) 
            if self.visualisation_title:
                g.fig.tight_layout()
                height_in_inches = g.fig.get_figheight()
                title_height_fraction = 0.2 / (height_in_inches ** (0.5)) #20px is ~0.3 inches
                g.fig.suptitle(self.visualisation_title, fontsize=20)
                g.fig.tight_layout(rect=(0,0,1,1 - title_height_fraction))
            else:
                g.fig.tight_layout()            
            g.fig.patch.set_alpha(0.0)
            return g.fig
import argparse
import os
import sys

import cnvlib
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn

AP = argparse.ArgumentParser(description=__doc__)
AP.add_argument('cnr_files', nargs='+', help="All sample .cnr files.")
AP.add_argument('-o', '--output', help="Output filename.")
args = AP.parse_args()

seaborn.plotting_context("poster")
seaborn.set(font="Sans", style="darkgrid")



def load_depths_logs(cnr_fnames):
    logs = []
    depths = []
    for fname in cnr_fnames:
        cnr = cnvlib.read(fname)
        logs.append(cnr['log2'])
        depths.append(cnr['depth'])
        # Ninja move
        if len(cnr_fnames) == 1:
            plt.title(cnr.sample_id)