def pairwise_comp(data, cty_prop, prop_list, params, sig_level=0.05): """ Pairwise comparison of parameters between cell-types """ diff_param_list = [] p_val_list = [] for param in params: for comb in combinations(prop_list, 2): cty_x, cty_y = comb paramx = data.loc[data[cty_prop] == cty_x, param].values paramy = data.loc[data[cty_prop] == cty_y, param].values _, p_val_x = mannwhitneyu(paramx, paramy, alternative='less') _, p_val_y = mannwhitneyu(paramy, paramx, alternative='less') comp_type = '%s<%s' % ( cty_x, cty_y) if p_val_x < p_val_y else '%s<%s' % (cty_y, cty_x) p_val = min(p_val_x, p_val_y) sig_dict = {'Comp_type': comp_type, 'param': param} diff_param_list.append(sig_dict) p_val_list.append(p_val) # FDR correction for multiple comparison _, p_val_corrected = fdrcorrection(p_val_list) diff_param_df = pd.DataFrame(diff_param_list) diff_param_df['p_val'] = p_val_corrected diff_param_df['sig_level'] = diff_param_df['p_val'].apply( lambda x: man_utils.pval_to_sig(x)) return diff_param_df
def draw_significance(mu_dist1, mu_dist2, pval, ax, height_offset=.1): sig_text = man_utils.pval_to_sig(pval) bin_dist1, _ = np.histogram(mu_dist1, density=True, bins=10) bin_dist2, _ = np.histogram(mu_dist2, density=True, bins=10) mean_dist1 = np.mean(mu_dist1) mean_dist2 = np.mean(mu_dist2) y_height1 = np.max(bin_dist1) y_height2 = np.max(bin_dist2) max_y_height = (1 + height_offset) * np.max([y_height1, y_height2]) ax.plot([mean_dist1, mean_dist1, mean_dist2, mean_dist2], [y_height1, max_y_height, max_y_height, y_height2], color='k') ax.text((mean_dist1 + mean_dist2) * .5, max_y_height, sig_text, ha='center', va='bottom', color='k') return ax
def sig_test(feature_df): diff_ephys_df = [] p_val_list = [] for efeat in all_renamed_feats: for comb in combinations(inh_subclasses, 2): # 2 for pairs, 3 for triplets, etc subclass_x_idx, subclass_y_idx = comb subclass_x_idx_efeat = feature_df.loc[feature_df.ttype == subclass_x_idx, efeat].values subclass_y_idx_efeat = feature_df.loc[feature_df.ttype == subclass_y_idx, efeat].values _, p_val = mannwhitneyu(subclass_x_idx_efeat, subclass_y_idx_efeat, alternative='two-sided') comp_type = '%s~%s' % (subclass_x_idx, subclass_y_idx) sig_dict = {'comp_type': comp_type, 'feature': efeat} diff_ephys_df.append(sig_dict) p_val_list.append(p_val) _, p_val_corrected = fdrcorrection(p_val_list) diff_ephys_df = pd.DataFrame(diff_ephys_df) diff_ephys_df['p_val'] = p_val_corrected diff_ephys_df['sig_level'] = diff_ephys_df['p_val'].apply(lambda x: man_utils.pval_to_sig(x)) return diff_ephys_df
inter_mask_blkdiag = np.kron( inter_mask_cell, np.ones((len(unique_cell_ids), len(unique_cell_ids)))) masked_intra_conductance = np.multiply(conductance_dist_matrix, intra_mask_blkdiag).flatten() masked_intra_conductance = masked_intra_conductance[ masked_intra_conductance != 0] masked_inter_conductance = np.multiply(conductance_dist_matrix, inter_mask_blkdiag).flatten() masked_inter_conductance = masked_inter_conductance[ masked_inter_conductance != 0] _, p_less_ = mannwhitneyu(masked_intra_conductance, masked_inter_conductance, alternative='less') sig_text = man_utils.pval_to_sig(p_less_) intra_dist[subclass_] = masked_intra_conductance.tolist() inter_dist[subclass_] = masked_inter_conductance.tolist() axis_fontsize = 14 tick_fontsize = 12 legend_fontsize = axis_fontsize ax[ii // 2, ii % 2] = sns.distplot(intra_dist[subclass_], norm_hist=True, ax=ax[ii // 2, ii % 2], hist_kws={'label': 'intra'}, color=intra_cell_intra_class_col) ax[ii // 2, ii % 2] = sns.distplot(inter_dist[subclass_], norm_hist=True, ax=ax[ii // 2, ii % 2],
cre_x, cre_y) if p_val_x < p_val_y else '%s<%s' % (cre_y, cre_x) p_val = min(p_val_x, p_val_y) sig_dict = { 'Comp_type': comp_type, 'gene': gene_, } diff_gene_expression_df.append(sig_dict) p_val_list.append(p_val) # FDR correction @5% _, p_val_corrected = fdrcorrection(p_val_list) diff_gene_expression_df = pd.DataFrame(diff_gene_expression_df) diff_gene_expression_df['p_val'] = p_val_corrected diff_gene_expression_df['sig_level'] = diff_gene_expression_df['p_val'].apply( lambda x: man_utils.pval_to_sig(x)) diff_gene_expression_df = diff_gene_expression_df.loc[ diff_gene_expression_df.sig_level != 'n.s.', ] gene_sig_grouped = diff_gene_expression_df.groupby('gene') exc_expression_melted = pd.melt(exc_expression_data, id_vars=['sample_id', 'Cre_line'], value_vars=h_genes, var_name='gene', value_name='cpm') exc_expression_melted['Cre_gene'] = exc_expression_melted.apply( lambda x: x.gene + '.' + x.Cre_line, axis=1) comp_types = exc_expression_melted['Cre_gene'].unique().tolist() data_types = exc_expression_melted.gene.unique().tolist()
for comb in combinations(inh_lines, 2): # 2 for pairs, 3 for triplets, etc cre_x,cre_y = comb cre_x_efeat = select_spiking_df.loc[select_spiking_df.Cre_line == cre_x,efeat].values cre_y_efeat = select_spiking_df.loc[select_spiking_df.Cre_line == cre_y,efeat].values _,p_val = mannwhitneyu(cre_x_efeat,cre_y_efeat,alternative='two-sided') comp_type = '%s~%s'%(cre_x,cre_y) sig_dict = {'comp_type' : comp_type, 'feature': efeat} diff_ephys_df.append(sig_dict) p_val_list.append(p_val) _,p_val_corrected = fdrcorrection(p_val_list) diff_ephys_df = pd.DataFrame(diff_ephys_df) diff_ephys_df['p_val'] = p_val_corrected diff_ephys_df['sig_level'] = diff_ephys_df['p_val'].apply(lambda x: man_utils.pval_to_sig(x)) spiking_melt_df = pd.melt(select_spiking_df,id_vars=['Cell_id','Cre_line'], value_vars=all_renamed_feats,var_name='features',value_name='value') # filtered ME cells filtered_me_inh_cells = utility.load_pickle(filtered_me_inh_cells_filename) spiking_melt_df = spiking_melt_df.loc[spiking_melt_df.Cell_id.isin(filtered_me_inh_cells),] ylim_list = [1.6, 60,220, 2.0] sns.set(style='whitegrid') fig,ax = plt.subplots(1,len(all_renamed_feats),sharey=False,figsize=(12,3)) for ii,feat_ in enumerate(all_renamed_feats): data = spiking_melt_df.loc[spiking_melt_df.features == feat_,]
'Model': sag_features_model, # 'Rbp4like':sag_perturbed_Rbp4, # 'Nr5like':sag_perturbed_Nr5 } sig_dict_list = [] for type_, data_ in data_dict.items(): feat_Rbp4 = data_.loc[data_.Cre_line == "Rbp4-Cre_KL100", select_sag_feature].values feat_Nr5 = data_.loc[data_.Cre_line == "Nr5a1-Cre", select_sag_feature].values _, p_val = mannwhitneyu(feat_Nr5, feat_Rbp4, alternative='two-sided') sig_dict_list.append({ 'data_type': type_, 'sig_level': man_utils.pval_to_sig(p_val), 'Comp_type': "Nr5a1-Cre~Rbp4-Cre_KL100" }) sig_df = pd.DataFrame(sig_dict_list) ephys_sig_group = sig_df.groupby('data_type') sig_vars = sig_df.data_type.tolist() sag_features_all = pd.concat( [ sag_features_exp, sag_features_exp_selected, sag_features_model, #sag_perturbed_Rbp4,sag_perturbed_Nr5 ], sort=False)