def make_hist(df, title, num_bins = 8, code_percentile=.99): ''' Takes data, title, number of bins (max 10), and percentile. Outputs a histogram. ''' title = title.title() data_list = df.dropna().tolist() top_code_val = df.quantile(code_percentile) distinct_vals = len(set(data_list)) num_bins = min(distinct_vals, num_bins) # plt.style.use('ggplot') # df.hist(bins = np.linspace(0, top_code_val, num_bins + 1), normed=True) # plt.xlabel(title) # plt.title('Histogram of ' + title.replace("_", " ")) # plt.tight_layout() # plt.savefig('Histogram_' + title + '.png', format='png') # plt.close() sns.displot(df)
def plot_density_plots(metric): data = (pd.read_csv(f"../generated_data/data_{metric}.csv").drop( columns="Unnamed: 0").dropna().rename(columns={"value": "distance"})) sns.displot( data=data, x="distance", hue="variable", multiple="stack", height=6, aspect=0.7, ) plt.subplots_adjust(top=0.85) plt.title("\n".join( wrap( f"Distribution of the {metric} distances " f"among diagnostic categories", 50, ))) plt.savefig(DOTENV_KEY2VAL["GEN_FIGURES_DIR"] + metric + "_histogram.png", )
def plot_median_score_distribution(df, title, path, file_name): if not os.path.exists(path): os.mkdir(path) dis_plt = sns.displot(df, x="median_scores", hue="dose", kind="hist", multiple="stack", palette = 'viridis', height=6.5, aspect=1.7) dis_plt.fig.suptitle(title) dis_plt.fig.subplots_adjust(top=.92) plt.savefig(os.path.join(path, file_name)) plt.show()
def plot_p_value_dist(df, path, file_name): """Plot p-value frequency distribution per dose""" if not os.path.exists(path): os.mkdir(path) dis_plt = sns.displot(df, x="p_values", col="dose", col_wrap=3, binwidth=0.03) dis_plt.fig.suptitle("P-values distribution across all doses(1-6)", size = 16) dis_plt.fig.subplots_adjust(top=.92) plt.savefig(os.path.join(path, file_name)) plt.show()
def general_plots(): '''Make plots to illustrate the results of the scRNA-Seq analysis''' valuetype, use_spikeins, biotype_to_use = "Tpms", False, "protein_coding" adata, phases = read_counts_and_phases(valuetype, use_spikeins, biotype_to_use) # QC plots before filtering sc.pl.highest_expr_genes(adata, n_top=20, show=False, save=True) shutil.move("figures/highest_expr_genes.pdf", f"figures/highest_expr_genes_AllCells.pdf") # Post filtering QC do_log_normalization = True do_remove_blob = False adata, phasesfilt = qc_filtering(adata, do_log_normalization, do_remove_blob) sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5) sc.pl.highly_variable_genes(adata, show=False, save=True) shutil.move("figures/filter_genes_dispersion.pdf", f"figures/filter_genes_dispersionAllCells.pdf") # UMAP plots # Idea: based on the expression of all genes, do the cell cycle phases cluster together? # Execution: scanpy methods: UMAP statistics first, then make UMAP # Output: UMAP plots sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40) sc.tl.umap(adata) plt.rcParams['figure.figsize'] = (10, 10) sc.pl.umap(adata, color=["phase"], show=False, save=True) shutil.move("figures/umap.pdf", f"figures/umapAllCellsSeqCenterPhase.pdf") # General display of RNA abundances in TPMs sbn.displot(np.concatenate(adata.X), color="tab:orange") plt.xlabel("TPM") plt.ylabel("Density") plt.savefig("figures/rna_abundance_density.pdf") # plt.show() plt.close()
def _plot_metric_nodes_distribution(self, leaf_metrics: pd.DataFrame, dist_type: str = 'kde') -> None: g = sns.displot(data=leaf_metrics, x=self.metric_col, hue=self.__NODE_RULES__COL, kind=dist_type, common_norm=False) g.savefig( f"bias_distribution_{self.metric_col}-{self.dataset_name}.png", dpi=600) plt.show()
def distToCentroid(self, labels, distances): """ Method to compute and plot distance of each point to the centroïd of its cluster input labels: list containing cluster label attached to index distances: distance from point to centroïd output distribution plot of distances from each point to its cluster's centroïd """ self.clustersPCA = pd.DataFrame([list(i) for i in zip(labels,distances)],columns=['cluster','distance']) self.clustersPCA['distanceToCluster'] = self.clustersPCA['distance'].apply(lambda x: min(x)) self.clustersPCA['distToCluster1'] = self.clustersPCA['distance'].apply(lambda x: x[0]) self.clustersPCA['distToCluster2'] = self.clustersPCA['distance'].apply(lambda x: x[1]) self.clustersPCA['distToCluster3'] = self.clustersPCA['distance'].apply(lambda x: x[2]) self.clustersPCA.cluster.replace({0:1, 1:2, 2:3}, inplace=True) sns.displot(data=self.clustersPCA, x='distanceToCluster', hue='cluster', kde=True) plt.show()
def plot_island_distribution(data: PlotData, island_size_: int, tmp_dir: Path): csv_out = Path(tmp_dir, f"out_{island_size_}.csv") with time_func(f"Populating the CSV at {csv_out}"): populate_csv(csv_out, data.distributions, [island_size_]) with time_func("Reading the CSV"): data_set = pd.read_csv(csv_out) for normalize in (True, False): xs = "avg_occurr" if not normalize else "ln_avg_occurr" with time_func("Displaying the dataset:"): sns.displot( data_set, x=xs, hue="edge_length", kind="kde", # kde=True, palette=sns.color_palette("Paired", data.lambdas)) title = f"island_size_{island_size_}" if normalize: title = "normalized_" + title out_fie = Path(data.out_dir, f"{title}.png") plt.title(title) plt.savefig(str(out_fie))
def map_total_calc_cov(target_moment_df, col_name, title, file_path: str = None): # 過去に保存した同じファイルがあれば削除 if path.exists(file_path): os.remove(file_path) sns_plt = sns.displot(data=target_moment_df, x=col_name) plt.title(title) sns_plt.savefig(file_path)
def analysis(request): data = pd.read_csv( r"C:/Users/Ajay's/Desktop/Prediction/HousePricePrediction/ml/static/js/USA_Housing.csv" ) sns.displot(data=data, x="Price", y="Avg. Area Number of Rooms", kind="kde", rug=True) plt.savefig( r"C:/Users/Ajay's/Desktop/Prediction/HousePricePrediction/ml/static/img/price_and_room.png" ) sns.displot(data=data, x="Price", y="Avg. Area House Age", kind="kde") plt.savefig( r"C:/Users/Ajay's/Desktop/Prediction/HousePricePrediction/ml/static/img/price_and_house.png" ) data = data.drop(['Address'], axis=1) X = data.drop('Price', axis=1) Y = data['Price'] X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.30) model = LinearRegression() model.fit(X_train, Y_train) coeff_df = pd.DataFrame(model.coef_, X.columns, columns=['Cofficient']) v1 = round(coeff_df.at['Avg. Area Income', 'Cofficient'], 3) v2 = round(coeff_df.at['Avg. Area House Age', 'Cofficient'], 3) v3 = round(coeff_df.at['Avg. Area Number of Rooms', 'Cofficient'], 3) v4 = round(coeff_df.at['Avg. Area Number of Bedrooms', 'Cofficient'], 3) v5 = round(coeff_df.at['Area Population', 'Cofficient'], 3) prediction = model.predict(X_test) error = round(np.sqrt(metrics.mean_absolute_error(Y_test, prediction)), 3) return render(request, 'dash.html', { "v1": v1, "v2": v2, "v3": v3, "v4": v4, "v5": v5, "error": error, })
def plot_congestion_dist(columns, dataframe, path, prefix, save, show): for atr in columns: plt.figure(figsize=set_size(418)) plt.style.use('seaborn') plt.rcParams.update(tex_fonts) plt.title('Distribution of ' + atr) plt.ylabel('Count') sns.displot(x=atr, data=dataframe, palette='Spectral') if atr is 'TempExMax': plt.xlabel('Maximum temporal extend [min]') elif atr is 'SpatExMax': plt.xlabel('Maximum spatial extend [m]') elif atr is 'TempDist': plt.xlabel('Minimum temporal distance to incident [min]') elif atr is 'SpatDist': plt.xlabel('Minimum spatial distance to incident [m]') elif atr is 'temporalGlobalLoc': plt.xlabel('Relative temporal location') elif atr is 'spatialGlobalLoc': plt.xlabel('Relative spatial location') elif atr is 'temporalInternalLoc': plt.xlabel('Internal relative temporal location') elif atr is 'spatialInternalLoc': plt.xlabel('Internal relative spatial location') elif atr is 'Coverage': plt.xlabel('Ratio of jammed cells in covering rectangle [\%]') elif atr is 'TimeLossCar': plt.xlabel('Time loss per Cars [s]') elif atr is 'TimeLossHGV': plt.xlabel('Time loss per HGVs [s]') else: plt.xlabel(atr) if save: plt.savefig(path + prefix + '_congestion_dist_' + atr + '.pdf') if not show: plt.close() if show: plt.show() else: plt.close()
def hist_pair2(df, stat, col2, cum=False, title=None): """Histogram of population pair stats. dd12_{n}_{k}_pop{kj} ddRank12_{n}_{k}_pop{kj} Parameters ---------- df : TYPE DESCRIPTION. stat : TYPE DESCRIPTION. col2 : TYPE DESCRIPTION. cum : TYPE, optional DESCRIPTION. The default is False. title : TYPE, optional DESCRIPTION. The default is None. Returns ------- None. """ stat_cols = [col for col in df.columns if stat == col.split("_")[0]] df_stat = pd.melt(df.filter(regex=f"{stat}"), value_vars=stat_cols, var_name=stat, value_name=col2) # add pop column pop = df_stat[stat].str.split("_", n=-1, expand=True) df_stat["pops"] = pop[pop.columns[-1]] df_stat["subpop"] = pop[2] df_stat[stat] = pop[0] # plotting if just obs or just sims if title is None: title = stat name = "" if cum: g = sns.histplot(data=df_stat, x=col2, hue="subpop", col="pops", kind="ecdf") name = "cum" else: g = sns.displot(data=df_stat, x=col2, hue="subpop", col="pops", kind="hist") g.savefig(f"{stat}.{name}histpair.pdf", bbox_inches='tight')
def kolm_test(file, pref, source, target, meta_file): """ Performs KS test for optimization process Args: file (str): Path to the file with TopoCMap output table pref (str): Prefix to all statistical files (e.g. histograms etc.) source (str): Source cell type target (str): Target cell type meta_file (str): Path to the file with drugs metadata Returns: statistics (:obj:`list` of :obj:`tuple` of :obj:`float`): Output of ks_2samp function for all 10 iterations mean_1 (float): Mean of 'Golden Standard' molecules distribution mean_2 (float): Mean of means of all molecules distribution """ dist = [] cids = [] cmap_db = pd.read_csv(file) drug_meta = pd.read_csv(meta_file) cids_cur = stand_chems(source, target) cids_cur = [float(cid) for cid in cids_cur] pert_cur = [] cids_cur = pd.unique(cids_cur) for ind, chem in enumerate(drug_meta['pubchem_cid']): for chem_1 in cids_cur: try: if int(chem) == int(chem_1): pert_cur.append(drug_meta['pert_id'].loc[ind]) except ValueError: continue for ind, chem in enumerate(cmap_db['pert_id']): for chem_1 in pert_cur: if chem == chem_1: cids.append(chem) dist.append(cmap_db['cosine_dist'].loc[ind]) statistics = [] mean_1 = np.mean(dist) means = [] cmap_db = cmap_db[~cmap_db["pert_id"].isin(pert_cur)] print(len(cmap_db)) for i in range(10): dist_rand = np.random.choice(list(cmap_db['cosine_dist']), len(dist)) means.append(np.mean(dist_rand)) stat, pval = stats.ks_2samp(dist, dist_rand) statistics.append((stat, pval)) sns_plot = sns.displot([dist, dist_rand], kde=True) sns_plot.savefig(pref + str(i) + ".png") mean_2 = np.mean(means) return statistics, mean_1, mean_2
def plot_2d_kde(x, y, hue, data): """ Plot a bivariate kernel density estimate Parameters ---------- x : array x-axis variable y : array y-axis variable hue : array Variable to map to colors in order to visually distinguish separate bivariate densities Returns ------- None """ plt.figure(figsize=(16, 16)) sns.displot(x=x, y=y, hue=hue, kind='kde', data=data) plt.show()
def plot_cont(df, plt_typ): numeric_columns = df.select_dtypes(include=['number']).columns.tolist() df = df[numeric_columns] for i in range(0, len(numeric_columns), 2): if len(numeric_columns) > i + 1: plt.figure(figsize=(10, 4)) plt.subplot(121) if plt_typ == 'boxplot': sns.boxplot(df[numeric_columns[i]]) plt.subplot(122) sns.boxplot(df[numeric_columns[i + 1]]) elif plt_typ == 'displot': sns.boxplot(df[numeric_columns[i]]) plt.subplot(122) sns.displot(df[numeric_columns[i + 1]]) else: print('Pass either distplot/boxplot') plt.tight_layout() plt.show()
def GCdistribut(self, _indf, out, X='query_length', Dup=[], log=False, title=''): if not _indf.empty: indef = _indf.copy() if Dup: indef = indef[Dup +[X]].drop_duplicates(keep='first') indef[X] = indef[X].astype(float) dp = sns.displot(data=indef, x=X, kde=True, log_scale=log) dp.set_xticklabels(rotation=270) if title: plt.title(title) plt.tight_layout() plt.savefig( out ) plt.close()
def create_fig(data, draw=False): if draw is not False: df = pd.DataFrame(data) df.columns = ['Node', 'Edges'] fig = sns.displot(df['Edges'], bins=max(df['Edges']) + 1, kde=True) plt.title('Amount of edges per node') plt.show() else: pass
def DerCore_Ratio(enh): cdratio = enh.loc[enh["core_remodeling"] == 1].groupby( ['enh_id', 'core'])["arch"].count().reset_index() cdratio.columns = ["enh_id", "core", "num derived regions per enh"] data = cdratio.loc[cdratio["core"] == 0].describe() sns.set("talk") g = sns.displot(cdratio.loc[cdratio["core"] == 0, "num derived regions per enh"], kind="ecdf") outf = f"{RE}cdf_n_der.pdf" plt.savefig(outf, bbox_inches="tight") return data
def show_sentence_length(): train_data['sentence_length'] = list( map(lambda x: len(x), train_data['sentence'])) ##绘制训练集句子长度分布 sns.countplot(train_data["sentence_length"]) plt.xticks([]) plt.show() # 绘制dist长度分布图 sns.displot(train_data["sentence_length"]) plt.yticks([]) plt.show() valid_data['sentence_length'] = list( map(lambda x: len(x), valid_data['sentence'])) ##绘制训练集句子长度分布 sns.countplot(valid_data["sentence_length"]) plt.xticks([]) plt.show() # 绘制dist长度分布图 sns.displot(valid_data["sentence_length"]) plt.yticks([]) plt.show()
def _plot_univariate(y_test, y_sampled): idlist = [[i + 1] * len(x) for i, x in enumerate([y_test, y_sampled])] df = pd.DataFrame(np.array( [np.concatenate([y_test, y_sampled]), np.concatenate(idlist)]).T, columns=['value', 'type']) df['type'] = df['type'].map({1: 'original', 2: 'sampled'}) return sns.displot(df, x='value', rug=True, kind='kde', color='black', hue='type')
def get_charging_time(self): # Plot pdf do tempo de recarga do ev sns.set_theme(style="darkgrid") # Transformando vetor de curvas em vetor de horas hour = 1 nem_charging_time = [] for item in self.ev_charging_time: if item > 0: aux = [hour] * abs(int(item)) nem_charging_time.extend(aux) hour += 1 if hour == 25: hour = 1 ev_charging_dict = {"value": nem_charging_time} df = pd.DataFrame.from_dict(ev_charging_dict) sns.displot(data=df, x="value", kind="kde") plt.gcf().subplots_adjust(bottom=0.15) plt.xlabel('Time [h]') plt.xlim(0, 24) plt.ylabel('PDF') plt.savefig("plot_charging_time.png", dpi=199) plt.show()
def plot_distance_from_median_pl(distance_files, patient_types): print(f"-------- Distance from median PL --------") distance_stats_df = pd.DataFrame() for distances, patient_type in zip(distance_files, patient_types): distances = pd.read_csv(DOTENV_KEY2VAL["GEN_DATA_DIR"] + distances) distances = distances.set_index("Unnamed: 0") for i in range(len(HOMOLOGY_DIMENSIONS)): distance_data = distances.iloc[:, i] print(f"-------- {patient_type} H_{i} --------") distance_stats_dict = dict() distance_stats_dict["Mean"] = np.mean(distance_data) distance_stats_dict["Median"] = np.median(distance_data) distance_stats_dict["Standard deviation"] = np.std(distance_data) distance_stats_dict["Q3"] = np.quantile(distance_data, 0.75) distance_stats_dict["Max"] = np.max(distance_data) # distance_stats_dict["kurtosis"] = kurtosis(distance_data) distance_stats_dict["Skewness"] = skew(distance_data) distance_stats_dict["Shapiro-Wilk test"] = shapiro( distance_data ).pvalue print(f"Shapiro-Wilk: {shapiro(distance_data)}") distance_stats_df_entry = pd.DataFrame.from_dict( distance_stats_dict, orient="index" ) distance_stats_df_entry.columns = [f"{patient_type} $H_{i}$"] distance_stats_df = distance_stats_df.append( distance_stats_df_entry.T ) ax = sns.displot(distance_data, kde=True) # ax.set(ylim=(0, 1)) # Finetuned to the data ax.set(xlim=(0, 12)) plt.savefig( DOTENV_KEY2VAL["GEN_FIGURES_DIR"] + "/median_pls/" + f"median_pl_{patient_type}_H_{i}_displot.png", bbox_inches="tight", ) test_results = pd.DataFrame( distance_stats_df["Shapiro-Wilk test"] ).applymap(lambda x: format_tex(x)) stats = distance_stats_df[ ["Mean", "Median", "Standard deviation", "Q3", "Max", "Skewness"] ].applymap(lambda x: format_tex_numbers(x)) distance_stats_df = stats.join(test_results) print(test_results) distance_stats_df.to_latex( DOTENV_KEY2VAL["GEN_DATA_DIR"] + "distance_from_median_pl_statistics.tex", float_format="{:0.2f}".format, escape=False, )
def Exploration_Taille_Masques(): df_train = cheminSources + dftrain st.title("Taille des masques de forme\n") st.write('\n') st.subheader("Répartition des surfaces (en pixels) des masques encodés :") file2 = df_train df_train = load_data(file2) sns.histplot(df_train[df_train['nb_pixels'] != 0].nb_pixels, bins=20, kde=True, stat="density") st.pyplot() nb_form = df_train[df_train['nb_pixels'] != 0].shape[0] st.subheader( "Répartition des surfaces (en pixels) des masques encodés suivant leur label : " ) df_form = df_train[(df_train['nb_pixels'] != 0)] sns.set_context(font_scale=2) sns.displot(data=df_form, bins=20, x='nb_pixels', col="Label", stat="density") st.pyplot() st.subheader("Boxplot associés :") sns.boxplot( y='nb_pixels', x='Label', data=df_form, #width=0.5, palette="colorblind") st.pyplot()
def ETC2Run(): delta_vals = [i * 0.04 for i in range(1, 25)] ub_regrets = [] actual_regrets = [] num_runs = 500 for delta in delta_vals: ub_regret_sum, actual_regret_sum = 0, 0 for run_no in range(num_runs): ub_regret, actual_regret = ExploreThenCommit2(n=1000, delta=delta) ub_regret_sum += ub_regret actual_regret_sum += actual_regret ub_regrets.append(ub_regret_sum / num_runs) actual_regrets.append(actual_regret_sum / num_runs) plt.plot(delta_vals, ub_regrets, label="Upper Bound") plt.plot(delta_vals, actual_regrets, label="Actual Regret") plt.legend() plt.show() m_vals = [i * 15 for i in range(1, 25)] actual_regrets = np.zeros((len(m_vals), num_runs)) for i, m in enumerate(m_vals): actual_regret_sum = 0 for run_no in range(num_runs): _, actual_regret = ExploreThenCommit2(n=2000, delta=0.1, m=m) actual_regrets[i, run_no] = actual_regret plt.plot(m_vals, actual_regrets.mean(axis=1)) plt.show() plt.plot(m_vals, actual_regrets.std(axis=1)) plt.show() hue = np.array(m_vals) hue = np.repeat(hue, num_runs) sns.displot(x=actual_regrets.flatten(), hue=hue, kind="kde") plt.show()
def plot_input_length(df, split_folder): """ Plots the input length of the decisions in the given dataframe :param df: the dataframe containing the decision texts :param split_folder: where to save the plots and csv files :return: """ # compute median input length input_length_distribution = df[['num_tokens_spacy', 'num_tokens_bert']].describe().round(0).astype(int) input_length_distribution.to_csv(split_folder / 'input_length_distribution.csv', index_label='measure') # bin outliers together at the cutoff point cutoff = 4000 cut_df = df[['num_tokens_spacy', 'num_tokens_bert']] cut_df.num_tokens_spacy = cut_df.num_tokens_spacy.clip(upper=cutoff) cut_df.num_tokens_bert = cut_df.num_tokens_bert.clip(upper=cutoff) hist_df = pd.concat([cut_df.num_tokens_spacy, cut_df.num_tokens_bert], keys=['spacy', 'bert']).to_frame() hist_df = hist_df.reset_index(level=0) hist_df = hist_df.rename(columns={'level_0': 'tokenizer', 0: 'Number of tokens'}) plot = sns.displot(hist_df, x="Number of tokens", hue="tokenizer", bins=100, kde=True, fill=True, height=5, aspect=2.5, legend=False) plot.set(xticks=list(range(0, 4500, 500))) plt.ylabel('Number of court cases') plt.legend(["BERT", "SpaCy"], loc='upper right', title='Tokenizer', fontsize=16, title_fontsize=18) plot.savefig(split_folder / 'input_length_distribution-histogram.png', bbox_inches="tight") plt.clf() plot = sns.displot(hist_df, x="Number of tokens", hue="tokenizer", kind="ecdf", legend=False) plt.ylabel('Number of court cases') plt.legend(["BERT", "SPaCy"], loc='lower right', title='Tokenizer') plot.savefig(split_folder / 'input_length_distribution-cumulative.png', bbox_inches="tight") plt.clf() plot = sns.displot(cut_df, x="num_tokens_spacy", y="num_tokens_bert") plot.savefig(split_folder / 'input_length_distribution-bivariate.png', bbox_inches="tight") plt.clf()
def distributions( df: pd.DataFrame, dist_class: str, column: str, show: bool, save_location: str, ) -> None: """ Plot distribution of the same x variable for multiple classes Args: df (pd.DataFrame): Data dist_class (str): How to split the values from df column (str): Column of dataframe show (bool): Flag to show plot hist (bool): Hist type flag kde (bool): Kde type flag save_location (str): Path to where the plot should be saved """ sns.displot(x=df[column], hue=df[dist_class], kind="kde", clip=(1.0, 8.0)) plt.savefig(save_location) if show: plt.show()
def make_perc_coverred_dist(self): """ from all summary, plot perc covered by sequencing data on bar plot For All, HIP, Supp-SGD and Supp-PROT :return: """ print(self._all_summary.columns) print(self._all_mut.columns) not_fully_covered = self._all_summary.loc[ (self._all_summary["aligned_perc"] < 1) & (self._all_summary["found"] == "y")] print(self._all_summary["aligned_perc"]) print(not_fully_covered.shape) fig, ax = plt.subplots(figsize=(10, 12)) sns.displot(not_fully_covered.aligned_perc * 100, bins=40, ax=ax, color="#084c61", edgecolor="#084c61") plt.title("Human 9.1 ORFs") plt.xlabel("Percent of ORF len aligned") plt.tight_layout() plt.savefig(os.path.join(self._dir, "nfully_human91_perc_dist.png"))
def displot(data, key, aim, **kwargs): sns.set_style('white') # fig = plt.figure(figsize=(10, 7.5)) fig = plt.figure() ax = sns.displot(data=data, x=aim, kind="ecdf", hue="algorithm", hue_order=['DRPA', 'FP', 'WMMSE', 'maximum', 'random'], height=3, aspect=1.5, facet_kws=dict(legend_out=False), # aspect=1.5, facet_kws=dict(legend_out=False), **kwargs) ax.legend.set_title('') ax.legend._loc=7 plt.xlabel(f'Average {aim} (bps/Hz)') plt.grid(axis="y") return fig, ax
def oneVarDistribution(xName, data, title, catName=""): if is_numeric_dtype(data.loc[:, xName]): if catName == "": preparation() sns.displot(data=data, x=xName) plt.title(title) plt.show() else: preparation() sns.displot(data=data, x=xName, hue=catName) plt.title(title) plt.show() else: if catName == "": preparation() sns.countplot(data=data, x=xName) plt.title(title) plt.show() else: preparation() sns.countplot(data=data, x=xName, hue=catName) plt.title(title) plt.show()
def plot_correlation_distribution(countries, delta=timedelta(days=60), weekly=True): """ Args: countries (): delta (): weekly (): """ # regions x = None regions = [] if 'CZ' in countries: regions.append(CZ_regions) if 'PL' in countries: regions.append(PL_regions) if 'SE' in countries: regions.append(SE_regions) if 'IT' in countries: regions.append(IT_regions) # compute correlations for country in regions: components = 'ID' if country[0][:2] in {'PL', 'SE'} else 'IRD' corrs = prediction_data_correlation(country, components, delta=delta, weekly=weekly) corrs['Country'] = country[0][:2] if x is None: x = corrs else: x = pd.concat([x, corrs]) # plot fig, ax = plt.subplots(figsize=(8, 6)) sns.displot(x, x="D", hue="Country", element="step", multiple="stack", bins=20, ax=ax) ax.set_xlim([-1, 1])
def plot_prob_dist(data, output_path, cluster_type, n_cluster): """ KDE plot of cluster probabilities. Args: data: Neuron activation dataframe with cluster labels output_path: Output path to save to cluster_type: Cluster algorithm used for file naming n_cluster: Number of unique clusters in clustering algorithm Returns: """ plt.figure(figsize=(12, 6)) sns.displot(data=data, x='label prob', hue='label', multiple='stack', palette='dark', kind='kde', aspect=2) plt.xlabel('Cluster Probability', fontsize=14) plt.ylabel('Density', fontsize=14) plt.savefig(join(output_path, f'{cluster_type}_{n_cluster}_prob_dist.png'), bbox_inches='tight')
def confoundplot(tseries, gs_ts, gs_dist=None, name=None, normalize=True, units=None, tr=None, hide_x=True, color='b', nskip=0, cutoff=None, ylims=None): # Define TR and number of frames notr = False if tr is None: notr = True tr = 1. ntsteps = len(tseries) # Normalize time series tseries = np.array(tseries) if normalize: tseries /= tr # Define nested GridSpec gs = mgs.GridSpecFromSubplotSpec(1, 2, subplot_spec=gs_ts, width_ratios=[1, 100], wspace=0.0) ax_ts = plt.subplot(gs[1]) ax_ts.grid(False) ax_ts.plot(tseries, color=color) ax_ts.set_xlim((0, ntsteps - 1)) # Set 10 frame markers in X axis interval = ntsteps // 10 xticks = list(range(0, ntsteps)[::interval]) + [ntsteps - 1] ax_ts.set_xticks(xticks) if not hide_x: if notr: ax_ts.set_xlabel('time (frame #)') else: ax_ts.set_xlabel('time (s)') labels = tr * np.array(xticks) ax_ts.set_xticklabels(['%.02f' % t for t in labels.tolist()]) else: ax_ts.set_xticklabels([]) no_scale = notr or not normalize if not name is None: var_label = name if not units is None: var_label += (' [{}]' if no_scale else ' [{}/s]').format(units) ax_ts.set_ylabel(var_label) for side in ["top", "right"]: ax_ts.spines[side].set_color('none') ax_ts.spines[side].set_visible(False) if not hide_x: ax_ts.spines["bottom"].set_position(('outward', 20)) ax_ts.xaxis.set_ticks_position('bottom') else: ax_ts.spines["bottom"].set_color('none') ax_ts.spines["bottom"].set_visible(False) ax_ts.spines["left"].set_position(('outward', 30)) ax_ts.yaxis.set_ticks_position('left') # Calculate Y limits def_ylims = [0.95 * tseries[~np.isnan(tseries)].min(), 1.1 * tseries[~np.isnan(tseries)].max()] if ylims is not None: if ylims[0] is not None: def_ylims[0] = min([def_ylims[0], ylims[0]]) if ylims[1] is not None: def_ylims[1] = max([def_ylims[1], ylims[1]]) ax_ts.set_ylim(def_ylims) yticks = sorted(def_ylims) ax_ts.set_yticks(yticks) ax_ts.set_yticklabels(['%.02f' % y for y in yticks]) yrange = def_ylims[1] - def_ylims[0] # Plot average if cutoff is None: cutoff = [] cutoff.insert(0, tseries[~np.isnan(tseries)].mean()) for i, thr in enumerate(cutoff): ax_ts.plot((0, ntsteps - 1), [thr] * 2, linewidth=.75, linestyle='-' if i == 0 else ':', color=color if i == 0 else 'k') if i == 0: mean_label = r'$\mu$=%.3f%s' % (thr, units if units is not None else '') ax_ts.annotate( mean_label, xy=(ntsteps - 1, thr), xytext=(11, 0), textcoords='offset points', va='center', color='w', size=10, bbox=dict(boxstyle='round', fc=color, ec='none', color='none', lw=0), arrowprops=dict( arrowstyle='wedge,tail_width=0.8', lw=0, patchA=None, patchB=None, fc=color, ec='none', relpos=(0.01, 0.5))) else: y_off = [0.0, 0.0] for pth in cutoff[:i]: inc = abs(thr - pth) if inc < yrange: factor = (- (inc / yrange) + 1) ** 2 if (thr - pth) < 0.0: y_off[0] -= factor * 20 else: y_off[1] += factor * 20 offset = y_off[0] if abs(y_off[0]) > y_off[1] else y_off[1] a_label = '%.2f%s' % (thr, units if units is not None else '') ax_ts.annotate( a_label, xy=(ntsteps - 1, thr), xytext=(11, offset), textcoords='offset points', va='center', color='w', size=10, bbox=dict(boxstyle='round', fc='dimgray', ec='none', color='none', lw=0), arrowprops=dict( arrowstyle='wedge,tail_width=.9', lw=0, patchA=None, patchB=None, fc='dimgray', ec='none', relpos=(.1, .5))) if not gs_dist is None: ax_dist = plt.subplot(gs_dist) sns.displot(tseries, vertical=True, ax=ax_dist) ax_dist.set_xlabel('Timesteps') ax_dist.set_ylim(ax_ts.get_ylim()) ax_dist.set_yticklabels([]) return [ax_ts, ax_dist], gs else: return ax_ts, gs
def confoundplot(tseries, gs_ts, gs_dist=None, name=None, normalize=True, units=None, tr=None, hide_x=True, color='b', nskip=4): # Define TR and number of frames notr = False if tr is None: notr = True tr = 1. ntsteps = len(tseries) # Normalize time series tseries = np.array(tseries) if normalize: tseries /= tr # Define nested GridSpec gs = mgs.GridSpecFromSubplotSpec(1, 2, subplot_spec=gs_ts, width_ratios=[1, 100], wspace=0.0) ax_ts = plt.subplot(gs[1]) ax_ts.grid(False) ax_ts.plot(tseries, color=color) ax_ts.set_xlim((0, ntsteps - 1)) # Set 10 frame markers in X axis interval = ntsteps // 10 xticks = list(range(0, ntsteps)[::interval]) + [ntsteps - 1] ax_ts.set_xticks(xticks) if not hide_x: if notr: ax_ts.set_xlabel('time (frame #)') else: ax_ts.set_xlabel('time (s)') labels = tr * np.array(xticks) ax_ts.set_xticklabels(['%.02f' % t for t in labels.tolist()]) else: ax_ts.set_xticklabels([]) if not name is None: var_label = name if not units is None: var_label += (' [{}]' if notr else ' [{}/s]').format(units) ax_ts.set_ylabel(var_label) for side in ["top", "right"]: ax_ts.spines[side].set_color('none') ax_ts.spines[side].set_visible(False) if not hide_x: ax_ts.spines["bottom"].set_position(('outward', 20)) ax_ts.xaxis.set_ticks_position('bottom') else: ax_ts.spines["bottom"].set_color('none') ax_ts.spines["bottom"].set_visible(False) ax_ts.spines["left"].set_position(('outward', 30)) ax_ts.yaxis.set_ticks_position('left') # Plot average ax_ts.plot((0, ntsteps), [tseries.mean()] * 2, color=color, linestyle=':') ax_ts.set_ylim(tseries[nskip:].min(), tseries[nskip:].max()) if not gs_dist is None: ax_dist = plt.subplot(gs_dist) sns.displot(tseries, vertical=True, ax=ax_dist) ax_dist.set_xlabel('Timesteps') ax_dist.set_ylim(ax_ts.get_ylim()) ax_dist.set_yticklabels([]) return [ax_ts, ax_dist], gs else: return ax_ts, gs
def confoundplot(tseries, gs_ts, gs_dist=None, name=None, units=None, tr=None, hide_x=True, color='b', nskip=0, cutoff=None, ylims=None): # Define TR and number of frames notr = False if tr is None: notr = True tr = 1. ntsteps = len(tseries) tseries = np.array(tseries) # Define nested GridSpec gs = mgs.GridSpecFromSubplotSpec(1, 2, subplot_spec=gs_ts, width_ratios=[1, 100], wspace=0.0) ax_ts = plt.subplot(gs[1]) ax_ts.grid(False) # Set 10 frame markers in X axis interval = max((ntsteps // 10, ntsteps // 5, 1)) xticks = list(range(0, ntsteps)[::interval]) ax_ts.set_xticks(xticks) if not hide_x: if notr: ax_ts.set_xlabel('time (frame #)') else: ax_ts.set_xlabel('time (s)') labels = tr * np.array(xticks) ax_ts.set_xticklabels(['%.02f' % t for t in labels.tolist()]) else: ax_ts.set_xticklabels([]) if name is not None: if units is not None: name += ' [%s]' % units ax_ts.annotate( name, xy=(0.0, 0.7), xytext=(0, 0), xycoords='axes fraction', textcoords='offset points', va='center', ha='left', color=color, size=8, bbox={'boxstyle': 'round', 'fc': 'w', 'ec': 'none', 'color': 'none', 'lw': 0, 'alpha': 0.8}) for side in ["top", "right"]: ax_ts.spines[side].set_color('none') ax_ts.spines[side].set_visible(False) if not hide_x: ax_ts.spines["bottom"].set_position(('outward', 20)) ax_ts.xaxis.set_ticks_position('bottom') else: ax_ts.spines["bottom"].set_color('none') ax_ts.spines["bottom"].set_visible(False) # ax_ts.spines["left"].set_position(('outward', 30)) ax_ts.spines["left"].set_color('none') ax_ts.spines["left"].set_visible(False) # ax_ts.yaxis.set_ticks_position('left') ax_ts.set_yticks([]) ax_ts.set_yticklabels([]) nonnan = tseries[~np.isnan(tseries)] if nonnan.size > 0: # Calculate Y limits valrange = (nonnan.max() - nonnan.min()) def_ylims = [nonnan.min() - 0.1 * valrange, nonnan.max() + 0.1 * valrange] if ylims is not None: if ylims[0] is not None: def_ylims[0] = min([def_ylims[0], ylims[0]]) if ylims[1] is not None: def_ylims[1] = max([def_ylims[1], ylims[1]]) # Add space for plot title and mean/SD annotation def_ylims[0] -= 0.1 * (def_ylims[1] - def_ylims[0]) ax_ts.set_ylim(def_ylims) # Annotate stats maxv = nonnan.max() mean = nonnan.mean() stdv = nonnan.std() p95 = np.percentile(nonnan, 95.0) else: maxv = 0 mean = 0 stdv = 0 p95 = 0 stats_label = (r'max: {max:.3f}{units} $\bullet$ mean: {mean:.3f}{units} ' r'$\bullet$ $\sigma$: {sigma:.3f}').format( max=maxv, mean=mean, units=units or '', sigma=stdv) ax_ts.annotate( stats_label, xy=(0.98, 0.7), xycoords='axes fraction', xytext=(0, 0), textcoords='offset points', va='center', ha='right', color=color, size=4, bbox={'boxstyle': 'round', 'fc': 'w', 'ec': 'none', 'color': 'none', 'lw': 0, 'alpha': 0.8} ) # Annotate percentile 95 ax_ts.plot((0, ntsteps - 1), [p95] * 2, linewidth=.1, color='lightgray') ax_ts.annotate( '%.2f' % p95, xy=(0, p95), xytext=(-1, 0), textcoords='offset points', va='center', ha='right', color='lightgray', size=3) if cutoff is None: cutoff = [] for i, thr in enumerate(cutoff): ax_ts.plot((0, ntsteps - 1), [thr] * 2, linewidth=.2, color='dimgray') ax_ts.annotate( '%.2f' % thr, xy=(0, thr), xytext=(-1, 0), textcoords='offset points', va='center', ha='right', color='dimgray', size=3) ax_ts.plot(tseries, color=color, linewidth=.8) ax_ts.set_xlim((0, ntsteps - 1)) if gs_dist is not None: ax_dist = plt.subplot(gs_dist) sns.displot(tseries, vertical=True, ax=ax_dist) ax_dist.set_xlabel('Timesteps') ax_dist.set_ylim(ax_ts.get_ylim()) ax_dist.set_yticklabels([]) return [ax_ts, ax_dist], gs return ax_ts, gs