Beispiel #1
0
def find_genes_CX(drug, model, gdsc_expr, gdsc_dr, test_tcga_expr):
    print('obtaining masked data...')
    # masked_data, list_of_baselines = get_masked_data_for_CXPlain(model, gdsc_expr, pathway_matrix)
    masked_data, list_of_baselines = get_masked_data_for_CXPlain(model, gdsc_expr, pathway_matrix, subtract_mean=True)
    lb = np.concatenate(list_of_baselines).reshape(len(gdsc_expr), -1)
    lb = pd.DataFrame(lb, index=gdsc_expr.index, columns=pathway_names)
    lb.to_csv(res_dir + drug + '/baselines.csv')
    # print(lb.shape)
    # exit()
    print('obtained masked data...')

    import tensorflow as tf
    tf.compat.v1.disable_v2_behavior()
    tf.keras.backend.clear_session()
    tf.random.set_seed(SEED)
    from tensorflow.python.keras.losses import mean_squared_error as loss
    from cxplain import CXPlain
    from cxplain.backend.model_builders.custom_mlp import CustomMLPModelBuilder
    # from cxplain.backend.masking.zero_masking import FastZeroMasking
    n_pathways = len(pathway_names)
    model_builder = CustomMLPModelBuilder(num_layers=2, num_units=512, batch_size=16, learning_rate=0.001, n_feature_groups=n_pathways)
    # masking_operation = FastZeroMasking()

    print(gdsc_expr.values.shape, gdsc_dr.values.shape)

    print("Fitting CXPlain model")
    explainer = CXPlain(model, model_builder, None, loss, num_models=3)
    explainer.fit(gdsc_expr.values, gdsc_dr.values, masked_data=masked_data)
    print("Attributing using CXPlain")

    attr,_ = explainer.explain_groups(test_tcga_expr.values)
    print('attr')

    attr = pd.DataFrame(attr, index=test_tcga_expr.index, columns=pathway_names)
    borda = get_ranked_list(attr, k=n_pathways)

    attr_mean = list(np.abs(attr).mean(axis=0).nlargest(n_pathways).index)
    out = pd.DataFrame(columns=['borda', 'mean'])
    out['borda'] = borda 
    out['mean'] = attr_mean

    out.to_csv(res_dir + drug + '/pathways.csv', index=False)

    if not os.path.exists(res_dir + drug + '/explainer/'):
        os.mkdir(res_dir + drug + '/explainer/')

    explainer.save(res_dir + drug + '/explainer/', custom_model_saver=None)
Beispiel #2
0
def find_genes_CX(drug, model, gdsc_expr, gdsc_dr, test_tcga_expr):
    print('obtaining precalculating omegas...')
    loss = torch.nn.MSELoss(reduction='none')
    # idx  = gdsc_expr.index[:10]
    # gdsc_expr = gdsc_expr.loc[idx]
    # gdsc_dr = gdsc_dr[idx]
    # omegas, lb = precalculate_omegas(model, gdsc_expr, gdsc_dr, pathway_matrix, loss)
    # omegas, lb = precalculate_omegas_scaled(model, gdsc_expr, gdsc_dr, pathway_matrix, loss)
    # omegas, lb = precalculate_omegas(model, gdsc_expr, gdsc_dr, pathway_matrix, loss,mode='scaled-difference')
    # omegas, lb = precalculate_omegas(model, gdsc_expr, gdsc_dr, pathway_matrix, loss, mode='delta-of-delta')
    omegas, lb = precalculate_omegas(model,
                                     gdsc_expr,
                                     gdsc_dr,
                                     pathway_matrix,
                                     loss,
                                     mode='delta-scaled')

    if lb is not None:
        lb = pd.DataFrame(lb, index=gdsc_expr.index, columns=pathway_names)
        lb.to_csv(res_dir + drug + '/baselines.csv')
    print('obtained masked data...')

    import tensorflow as tf
    tf.compat.v1.disable_v2_behavior()
    tf.keras.backend.clear_session()
    tf.random.set_seed(SEED)
    from tensorflow.python.keras.losses import mean_squared_error as loss
    from cxplain import CXPlain
    from cxplain.backend.model_builders.custom_mlp_precalc import CustomMLPModelBuilder
    # from cxplain.backend.masking.zero_masking import FastZeroMasking
    n_pathways = len(pathway_names)
    model_builder = CustomMLPModelBuilder(num_layers=2,
                                          num_units=512,
                                          batch_size=16,
                                          learning_rate=0.001,
                                          n_feature_groups=n_pathways)
    # masking_operation = FastZeroMasking()

    print(gdsc_expr.values.shape, gdsc_dr.values.shape)

    print("Fitting CXPlain model")
    explainer = CXPlain(model, model_builder, None, loss, num_models=3)
    explainer.fit_precalc(gdsc_expr.values, gdsc_dr.values, omega=omegas)
    print("Attributing using CXPlain")

    attr, _ = explainer.explain_groups(test_tcga_expr.values)
    print('attr')

    attr = pd.DataFrame(attr,
                        index=test_tcga_expr.index,
                        columns=pathway_names)
    borda = get_ranked_list(attr, k=n_pathways)

    attr_mean = list(np.abs(attr).mean(axis=0).nlargest(n_pathways).index)
    out = pd.DataFrame(columns=['borda', 'mean'])
    out['borda'] = borda
    out['mean'] = attr_mean

    out.to_csv(res_dir + drug + '/pathways.csv', index=False)

    if not os.path.exists(res_dir + drug + '/explainer/'):
        os.mkdir(res_dir + drug + '/explainer/')

    explainer.save(res_dir + drug + '/explainer/', custom_model_saver=None)
Beispiel #3
0
    return (x, y_pred, masked_outs)


model_builder = CustomMLPModelBuilder(num_layers=2,
                                      num_units=32,
                                      batch_size=32,
                                      learning_rate=0.001,
                                      n_feature_groups=10)

# masking_operation = ZeroMasking()
# masking_operation = FastZeroMasking()
k = get_masked_data_for_CXPlain(model, x[:200])
explainer = CXPlain(model, model_builder, None,
                    loss)  #,downsample_factors=(5,))
explainer.fit(x[:200], y[:200], masked_data=k)
attributions = explainer.explain_groups(x[200:])

attr = pd.DataFrame(attributions, index=range(200, 300))
# plt.plot(range(50), np.abs(attr).mean(axis=0), label='attr')
# #plt.plot(range(50), np.abs(mult).mean(axis=0), label='mult')
# plt.show()

print(attr.shape)
print(attr.loc[200])

df = pd.DataFrame(x[200:], index=range(200, 300))
idx = df.loc[np.abs(df[20]) < 0.1].loc[np.abs(df[40]) > 1].index[0]

# plt.plot(range(50), np.abs(attr.loc[idx]), label='attr')
# plt.legend()
# plt.show()