def test_boston_housing_load_save_valid(self): (x_train, y_train), (x_test, y_test) = TestUtil.get_boston_housing() explained_model = RandomForestRegressor(n_estimators=64, max_depth=5, random_state=1) explained_model.fit(x_train, y_train) model_builder = MLPModelBuilder(num_layers=2, num_units=32, activation="relu", p_dropout=0.2, verbose=0, batch_size=32, learning_rate=0.001, num_epochs=2, early_stopping_patience=128) masking_operation = ZeroMasking() loss = mean_squared_error num_models_settings = [1, 2] for num_models in num_models_settings: explainer = CXPlain(explained_model, model_builder, masking_operation, loss, num_models=num_models) explainer.fit(x_train, y_train) median_1 = explainer.predict(x_test) tmp_dir_name = tempfile.mkdtemp() explainer.save(tmp_dir_name) with self.assertRaises(ValueError): explainer.save(tmp_dir_name, overwrite=False) explainer.save(tmp_dir_name, overwrite=True) explainer.load(tmp_dir_name) median_2 = explainer.predict(x_test) self.assertTrue(np.array_equal(median_1, median_2)) shutil.rmtree(tmp_dir_name) # Cleanup.
def find_genes_CX(drug, test_tcga_expr): tf.keras.backend.clear_session() attr_ind = [] for seed in range(1, 11): exp = CXPlain.load('gene_finding/results/CX_ind1/%s/seed%d/explainer' % (drug, seed), relpath=True) attr = exp.explain(test_tcga_expr.values) attr = pd.DataFrame(attr, index=test_tcga_expr.index, columns=dataset.hgnc) attr_ind.append(attr) all_attr = pd.DataFrame(columns=['seed', 'sample'] + list(dataset.hgnc)) i = 0 for sample in attr_ind[0].index: for seed in range(10): all_attr.loc[i] = [seed, sample] + list(attr_ind[seed].loc[sample]) i += 1 print('boat') boat = borda_of_all_tuples(all_attr) print('seed then sample') seed_then_sample = sample_borda_of_seed_bordas(all_attr, test_tcga_expr.index) print('sample then seed') sample_then_seed = seed_borda_of_sample_bordas(all_attr, range(10)) return boat, seed_then_sample, sample_then_seed
# _, _, _, test_tcga_expr = dataset.filter_and_normalize_data(drug) # exp = CXPlain.load('gene_finding/results/%s/%s/explainer'%(folder, drug), custom_model_loader=None, relpath=True) # attr,_ = exp.explain(test_tcga_expr.values) # attr = pd.DataFrame(attr, index=test_tcga_expr.index, columns=dataset.genes) # attr_dict[drug]=attr attr_dict = {} conf_dict = {} for i, drug in enumerate(drugs): print(drug) _, _, _, test_tcga_expr = dataset.filter_and_normalize_data(drug) attr_all = np.zeros((len(test_tcga_expr.index), len(dataset.genes))) for seed in range(1, 11): exp = CXPlain.load('gene_finding/results/%s/%s/seed%d/explainer'%(folder, drug, seed), custom_model_loader=None, relpath=True) attr = exp.explain(test_tcga_expr.values) attr_all += attr attr = pd.DataFrame(attr_all/10.0, index=test_tcga_expr.index, columns=dataset.genes) attr_dict[drug]=attr fig, axes = plt.subplots(7, 2, figsize=(14, 35)) writer_a = pd.ExcelWriter('gene_finding/results/%s/top_genes_mean_of_means_aggregation.xlsx'%folder, engine='xlsxwriter') conv = pd.DataFrame(index=dataset.genes, columns=['hgnc']) conv['hgnc'] = dataset.hgnc for i, drug in enumerate(drugs):