def find_genes_CX(drug, model, gdsc_expr, gdsc_dr, test_tcga_expr): print('obtaining masked data...') # masked_data, list_of_baselines = get_masked_data_for_CXPlain(model, gdsc_expr, pathway_matrix) masked_data, list_of_baselines = get_masked_data_for_CXPlain(model, gdsc_expr, pathway_matrix, subtract_mean=True) lb = np.concatenate(list_of_baselines).reshape(len(gdsc_expr), -1) lb = pd.DataFrame(lb, index=gdsc_expr.index, columns=pathway_names) lb.to_csv(res_dir + drug + '/baselines.csv') # print(lb.shape) # exit() print('obtained masked data...') import tensorflow as tf tf.compat.v1.disable_v2_behavior() tf.keras.backend.clear_session() tf.random.set_seed(SEED) from tensorflow.python.keras.losses import mean_squared_error as loss from cxplain import CXPlain from cxplain.backend.model_builders.custom_mlp import CustomMLPModelBuilder # from cxplain.backend.masking.zero_masking import FastZeroMasking n_pathways = len(pathway_names) model_builder = CustomMLPModelBuilder(num_layers=2, num_units=512, batch_size=16, learning_rate=0.001, n_feature_groups=n_pathways) # masking_operation = FastZeroMasking() print(gdsc_expr.values.shape, gdsc_dr.values.shape) print("Fitting CXPlain model") explainer = CXPlain(model, model_builder, None, loss, num_models=3) explainer.fit(gdsc_expr.values, gdsc_dr.values, masked_data=masked_data) print("Attributing using CXPlain") attr,_ = explainer.explain_groups(test_tcga_expr.values) print('attr') attr = pd.DataFrame(attr, index=test_tcga_expr.index, columns=pathway_names) borda = get_ranked_list(attr, k=n_pathways) attr_mean = list(np.abs(attr).mean(axis=0).nlargest(n_pathways).index) out = pd.DataFrame(columns=['borda', 'mean']) out['borda'] = borda out['mean'] = attr_mean out.to_csv(res_dir + drug + '/pathways.csv', index=False) if not os.path.exists(res_dir + drug + '/explainer/'): os.mkdir(res_dir + drug + '/explainer/') explainer.save(res_dir + drug + '/explainer/', custom_model_saver=None)
def find_genes_CX(drug, model, gdsc_expr, gdsc_dr, test_tcga_expr): print('obtaining precalculating omegas...') loss = torch.nn.MSELoss(reduction='none') # idx = gdsc_expr.index[:10] # gdsc_expr = gdsc_expr.loc[idx] # gdsc_dr = gdsc_dr[idx] # omegas, lb = precalculate_omegas(model, gdsc_expr, gdsc_dr, pathway_matrix, loss) # omegas, lb = precalculate_omegas_scaled(model, gdsc_expr, gdsc_dr, pathway_matrix, loss) # omegas, lb = precalculate_omegas(model, gdsc_expr, gdsc_dr, pathway_matrix, loss,mode='scaled-difference') # omegas, lb = precalculate_omegas(model, gdsc_expr, gdsc_dr, pathway_matrix, loss, mode='delta-of-delta') omegas, lb = precalculate_omegas(model, gdsc_expr, gdsc_dr, pathway_matrix, loss, mode='delta-scaled') if lb is not None: lb = pd.DataFrame(lb, index=gdsc_expr.index, columns=pathway_names) lb.to_csv(res_dir + drug + '/baselines.csv') print('obtained masked data...') import tensorflow as tf tf.compat.v1.disable_v2_behavior() tf.keras.backend.clear_session() tf.random.set_seed(SEED) from tensorflow.python.keras.losses import mean_squared_error as loss from cxplain import CXPlain from cxplain.backend.model_builders.custom_mlp_precalc import CustomMLPModelBuilder # from cxplain.backend.masking.zero_masking import FastZeroMasking n_pathways = len(pathway_names) model_builder = CustomMLPModelBuilder(num_layers=2, num_units=512, batch_size=16, learning_rate=0.001, n_feature_groups=n_pathways) # masking_operation = FastZeroMasking() print(gdsc_expr.values.shape, gdsc_dr.values.shape) print("Fitting CXPlain model") explainer = CXPlain(model, model_builder, None, loss, num_models=3) explainer.fit_precalc(gdsc_expr.values, gdsc_dr.values, omega=omegas) print("Attributing using CXPlain") attr, _ = explainer.explain_groups(test_tcga_expr.values) print('attr') attr = pd.DataFrame(attr, index=test_tcga_expr.index, columns=pathway_names) borda = get_ranked_list(attr, k=n_pathways) attr_mean = list(np.abs(attr).mean(axis=0).nlargest(n_pathways).index) out = pd.DataFrame(columns=['borda', 'mean']) out['borda'] = borda out['mean'] = attr_mean out.to_csv(res_dir + drug + '/pathways.csv', index=False) if not os.path.exists(res_dir + drug + '/explainer/'): os.mkdir(res_dir + drug + '/explainer/') explainer.save(res_dir + drug + '/explainer/', custom_model_saver=None)
return (x, y_pred, masked_outs) model_builder = CustomMLPModelBuilder(num_layers=2, num_units=32, batch_size=32, learning_rate=0.001, n_feature_groups=10) # masking_operation = ZeroMasking() # masking_operation = FastZeroMasking() k = get_masked_data_for_CXPlain(model, x[:200]) explainer = CXPlain(model, model_builder, None, loss) #,downsample_factors=(5,)) explainer.fit(x[:200], y[:200], masked_data=k) attributions = explainer.explain_groups(x[200:]) attr = pd.DataFrame(attributions, index=range(200, 300)) # plt.plot(range(50), np.abs(attr).mean(axis=0), label='attr') # #plt.plot(range(50), np.abs(mult).mean(axis=0), label='mult') # plt.show() print(attr.shape) print(attr.loc[200]) df = pd.DataFrame(x[200:], index=range(200, 300)) idx = df.loc[np.abs(df[20]) < 0.1].loc[np.abs(df[40]) > 1].index[0] # plt.plot(range(50), np.abs(attr.loc[idx]), label='attr') # plt.legend() # plt.show()