def testMakeTrinaryData(self): if IGNORE_TEST: return df = transform_data.makeTrinaryData( df=self.provider.df_normalized) columns = self.provider.df_normalized.columns self.assertTrue(helpers.isValidDataFrame(df, columns))
def __init__(self, df_trinary=None): """ :param pd.DataFrame df: trinary valued DF (has values -1, 0, 1) """ if df_trinary is None: provider = DataProvider() provider.do() df_trinary = transform_data.makeTrinaryData(is_include_nan=False) self.df_trinary = df_trinary self.df_group = None # Dataframe describing groups self.df_gene_group = None # Genes by group
def plotThresholdHeatmap(provider=None, df=None, ax=None, is_plot=True, min_abs=1.0, **kwargs): """ Plots normalized thresholded expression levels. :param DataProvider provider: :param pd.DataFrame df: dataframe to plot with trinary values :param plt.Axis ax: :param bool is_plot: shows plot if True :param float min_abs: minimum absolute for threshold :param dict kwargs: plotting options """ # Data setup provider = getProvider(provider) if df is None: df = provider.df_normalized df_plot = transform_data.makeTrinaryData(df, min_abs=min_abs) else: df_plot = df.copy() df_stages = provider.df_stage_matrix.copy() # Plot construct if ax is None: plt.figure(figsize=(16, 10)) ax = plt.gca() if not 'title' in kwargs.keys(): title = "Normalized Data" else: title = kwargs["title"] plot_kwargs = { cup.XLABEL: "times", cup.YLABEL: "gene", cup.TITLE: title, } cup.plotTrinaryHeatmap(df_plot, ax=ax, **plot_kwargs, is_plot=False) columns = df_plot.columns ax.set_xticks(np.arange(len(columns))) ax.set_xticklabels(columns) plotStateTransitions(provider=provider, ax=ax, ymax=len(df_plot), is_plot=False) if is_plot: plt.show()
def plotClusteredHeatmap(provider=None, ncluster=5, **kwargs): """ Plots a heatmap where categorical axes are grouped with similar values. :param DataProvider provider: :param int ncluser: number of clusters :param dict kwargs: plot parameters """ provider = getProvider(provider) df = transform_data.makeTrinaryData(provider.df_normalized, is_include_nan=False) df = util_dataframe.pruneSmallRows(df, min_abs=1.0) kmeans = KMeans(n_clusters=ncluster, random_state=0).fit(df) df[CLUSTER] = kmeans.labels_ df = df.sort_values(CLUSTER) del df[CLUSTER] df = df.applymap(lambda v: np.nan if np.isclose(v, 0) else v) plotThresholdHeatmap(provider=provider, df=df, title="Clustered Differential Expression", **kwargs)