def test_boston_housing_load_save_valid(self):
        (x_train, y_train), (x_test, y_test) = TestUtil.get_boston_housing()
        explained_model = RandomForestRegressor(n_estimators=64, max_depth=5, random_state=1)
        explained_model.fit(x_train, y_train)

        model_builder = MLPModelBuilder(num_layers=2, num_units=32, activation="relu", p_dropout=0.2, verbose=0,
                                        batch_size=32, learning_rate=0.001, num_epochs=2, early_stopping_patience=128)
        masking_operation = ZeroMasking()
        loss = mean_squared_error

        num_models_settings = [1, 2]
        for num_models in num_models_settings:
            explainer = CXPlain(explained_model, model_builder, masking_operation, loss,
                                num_models=num_models)

            explainer.fit(x_train, y_train)
            median_1 = explainer.predict(x_test)

            tmp_dir_name = tempfile.mkdtemp()
            explainer.save(tmp_dir_name)

            with self.assertRaises(ValueError):
                explainer.save(tmp_dir_name, overwrite=False)

            explainer.save(tmp_dir_name, overwrite=True)
            explainer.load(tmp_dir_name)
            median_2 = explainer.predict(x_test)

            self.assertTrue(np.array_equal(median_1, median_2))

            shutil.rmtree(tmp_dir_name)  # Cleanup.
Exemple #2
0
def find_genes_CX(drug, test_tcga_expr):
    tf.keras.backend.clear_session()
    attr_ind = []
    for seed in range(1, 11):
        exp = CXPlain.load('gene_finding/results/CX_ind1/%s/seed%d/explainer' %
                           (drug, seed),
                           relpath=True)
        attr = exp.explain(test_tcga_expr.values)
        attr = pd.DataFrame(attr,
                            index=test_tcga_expr.index,
                            columns=dataset.hgnc)
        attr_ind.append(attr)

    all_attr = pd.DataFrame(columns=['seed', 'sample'] + list(dataset.hgnc))

    i = 0
    for sample in attr_ind[0].index:
        for seed in range(10):
            all_attr.loc[i] = [seed, sample] + list(attr_ind[seed].loc[sample])
            i += 1

    print('boat')
    boat = borda_of_all_tuples(all_attr)
    print('seed then sample')
    seed_then_sample = sample_borda_of_seed_bordas(all_attr,
                                                   test_tcga_expr.index)
    print('sample then seed')
    sample_then_seed = seed_borda_of_sample_bordas(all_attr, range(10))

    return boat, seed_then_sample, sample_then_seed
Exemple #3
0
#     _, _, _, test_tcga_expr = dataset.filter_and_normalize_data(drug)
#     exp = CXPlain.load('gene_finding/results/%s/%s/explainer'%(folder, drug), custom_model_loader=None, relpath=True)
#     attr,_ = exp.explain(test_tcga_expr.values)
#     attr = pd.DataFrame(attr, index=test_tcga_expr.index, columns=dataset.genes)
#     attr_dict[drug]=attr

attr_dict = {}
conf_dict = {}
for i, drug in enumerate(drugs):
    print(drug)
    _, _, _, test_tcga_expr = dataset.filter_and_normalize_data(drug)
    
    attr_all = np.zeros((len(test_tcga_expr.index), len(dataset.genes)))

    for seed in range(1, 11):
        exp = CXPlain.load('gene_finding/results/%s/%s/seed%d/explainer'%(folder, drug, seed), custom_model_loader=None, relpath=True)
        attr = exp.explain(test_tcga_expr.values)
        attr_all += attr

    
    attr = pd.DataFrame(attr_all/10.0, index=test_tcga_expr.index, columns=dataset.genes)        
    attr_dict[drug]=attr

fig, axes = plt.subplots(7, 2, figsize=(14, 35))

writer_a = pd.ExcelWriter('gene_finding/results/%s/top_genes_mean_of_means_aggregation.xlsx'%folder, engine='xlsxwriter')

conv = pd.DataFrame(index=dataset.genes, columns=['hgnc'])
conv['hgnc'] = dataset.hgnc

for i, drug in enumerate(drugs):