def predict_with_existing(self, X_train, y_train_regression, y_train_classification, X_val, y_val_regression, y_val_classification, X_test, y_test_regression, y_test_classification, PMTNN_weight_file): model = self.setup_model_ensemble() model.load_weights(PMTNN_weight_file) y_pred_on_train = model.predict(X_train) y_pred_on_val = model.predict(X_val) y_pred_on_test = model.predict(X_test) print print('train precision: {}'.format(precision_auc_single(y_train_classification, y_pred_on_train))) print('train roc: {}'.format(roc_auc_single(y_train_classification, y_pred_on_train))) print('train bedroc: {}'.format(bedroc_auc_single(y_train_classification, y_pred_on_train))) print print('validation precision: {}'.format(precision_auc_single(y_val_classification, y_pred_on_val))) print('validation roc: {}'.format(roc_auc_single(y_val_classification, y_pred_on_val))) print('validation bedroc: {}'.format(bedroc_auc_single(y_val_classification, y_pred_on_val))) print print('test precision: {}'.format(precision_auc_single(y_test_classification, y_pred_on_test))) print('test roc: {}'.format(roc_auc_single(y_test_classification, y_pred_on_test))) print('test bedroc: {}'.format(bedroc_auc_single(y_test_classification, y_pred_on_test))) print for EF_ratio in self.EF_ratio_list: n_actives, ef, ef_max = enrichment_factor_single(y_test_classification, y_pred_on_test, EF_ratio) print('ratio: {}, EF: {},\tactive: {}'.format(EF_ratio, ef, n_actives)) return
def predict_with_existing(self, X_train, y_train, X_val, y_val, X_test, y_test, PMTNN_weight_file): model = self.setup_model() model.load_weights(PMTNN_weight_file) y_pred_on_train = model.predict(X_train)[:, -1] y_train = y_train[:, -1] y_pred_on_val = model.predict(X_val)[:, -1] y_val = y_val[:, -1] if X_test is not None: y_pred_on_test = model.predict(X_test)[:, -1] y_test = y_test[:, -1] print y_train.shape, '\t', y_pred_on_test.shape print print('train precision: {}'.format( precision_auc_single(y_train, y_pred_on_train))) print('train roc: {}'.format(roc_auc_single(y_train, y_pred_on_train))) print('train bedroc: {}'.format( bedroc_auc_single(y_train, y_pred_on_train))) print print('validation precision: {}'.format( precision_auc_single(y_val, y_pred_on_val))) print('validation roc: {}'.format(roc_auc_single(y_val, y_pred_on_val))) print('validation bedroc: {}'.format( bedroc_auc_single(y_val, y_pred_on_val))) print if X_test is not None: print('test precision: {}'.format( precision_auc_single(y_test, y_pred_on_test))) print('test roc: {}'.format(roc_auc_single(y_test, y_pred_on_test))) print('test bedroc: {}'.format( bedroc_auc_single(y_test, y_pred_on_test))) print if X_test is not None: for EF_ratio in self.EF_ratio_list: n_actives, ef, ef_max = enrichment_factor_single( y_test, y_pred_on_test, EF_ratio) nef = ef / ef_max print('ratio: {}, EF: {},\tactive: {}'.format( EF_ratio, ef, n_actives)) print('ratio: {}, NEF: {}'.format(EF_ratio, nef)) return
def train_and_predict(self, X_train, y_train_regression, y_train_classification, X_val, y_val_regression, y_val_classification, X_test, y_test_regression, y_test_classification, PMTNN_weight_file): model = self.setup_model() sw = get_sample_weight(self, y_train_regression) print 'Sample Weight\t', sw model.compile(loss=self.compile_loss, optimizer=self.compile_optimizer) model.fit(x=X_train, y=y_train_regression, nb_epoch=self.fit_nb_epoch, batch_size=self.fit_batch_size, verbose=self.fit_verbose, sample_weight=sw, validation_data=[X_val, y_val_regression], shuffle=True) model.save_weights(PMTNN_weight_file) y_pred_on_train = model.predict(X_train) y_pred_on_val = model.predict(X_val) if X_test is not None: y_pred_on_test = model.predict(X_test) print print('train precision: {}'.format(precision_auc_single(y_train_classification, y_pred_on_train))) print('train roc: {}'.format(roc_auc_single(y_train_classification, y_pred_on_train))) print('train bedroc: {}'.format(bedroc_auc_single(y_train_classification, y_pred_on_train))) print print('validation precision: {}'.format(precision_auc_single(y_val_classification, y_pred_on_val))) print('validation roc: {}'.format(roc_auc_single(y_val_classification, y_pred_on_val))) print('validation bedroc: {}'.format(bedroc_auc_single(y_val_classification, y_pred_on_val))) print if X_test is not None: print('test precision: {}'.format(precision_auc_single(y_test_classification, y_pred_on_test))) print('test roc: {}'.format(roc_auc_single(y_test_classification, y_pred_on_test))) print('test bedroc: {}'.format(bedroc_auc_single(y_test_classification, y_pred_on_test))) print if X_test is not None: for EF_ratio in self.EF_ratio_list: n_actives, ef, ef_max = enrichment_factor_single(y_test_classification, y_pred_on_test, EF_ratio) nef = ef / ef_max print('ratio: {}, EF: {},\tactive: {}'.format(EF_ratio, ef, n_actives)) print('ratio: {}, NEF: {}'.format(EF_ratio, nef)) return
def get_rf(self, X_train, y_train, X_val, y_val, X_test, y_test): max_features = 'log2' n_estimators = 4000 min_samples_leaf = 1 class_weight = 'balanced' rnd_state = 1337 np.random.seed(seed=rnd_state) rf = RandomForestClassifier(n_estimators=n_estimators, max_features=max_features, min_samples_leaf=min_samples_leaf, n_jobs=3, class_weight=class_weight, random_state=rnd_state, oob_score=False, verbose=1) rf.fit(X_train, y_train) y_pred_on_train = reshape_data_into_2_dim(rf.predict(X_train)) y_pred_on_val = reshape_data_into_2_dim(rf.predict(X_val)) y_pred_on_test = reshape_data_into_2_dim(rf.predict(X_test)) print('train precision: {}'.format(precision_auc_single(y_train, y_pred_on_train))) print('train roc: {}'.format(roc_auc_single(y_train, y_pred_on_train))) print('train bedroc: {}'.format(bedroc_auc_single(y_train, y_pred_on_train))) print print('validation precision: {}'.format(precision_auc_single(y_val, y_pred_on_val))) print('validation roc: {}'.format(roc_auc_single(y_val, y_pred_on_val))) print('validation bedroc: {}'.format(bedroc_auc_single(y_val, y_pred_on_val))) print print('test precision: {}'.format(precision_auc_single(y_test, y_pred_on_test))) print('test roc: {}'.format(roc_auc_single(y_test, y_pred_on_test))) print('test bedroc: {}'.format(bedroc_auc_single(y_test, y_pred_on_test))) print for EF_ratio in self.EF_ratio_list: n_actives, ef, ef_max = enrichment_factor_single(y_test, y_pred_on_test, EF_ratio) print('ratio: {}, EF: {},\tactive: {}'.format(EF_ratio, ef, n_actives)) return rf
def get_EF_score_with_existing_model(self, X_test, y_test, y_test_classification, file_path, EF_ratio): model = self.setup_model_ensemble() model.load_weights(file_path) y_pred_on_test = model.predict(X_test) print('test precision: {}'.format(precision_auc_single(y_test_classification, y_pred_on_test))) print('test roc: {}'.format(roc_auc_single(y_test_classification, y_pred_on_test))) print('test bedroc: {}'.format(bedroc_auc_single(y_test_classification, y_pred_on_test))) print n_actives, ef, ef_max = enrichment_factor_single(y_test_classification, y_pred_on_test, EF_ratio) print('EF: {},\tactive: {}'.format(ef, n_actives)) return
def get_EF_values_single_task(task, X_test, y_test, model_weight, EF_ratio_list): model = task.setup_model() model.load_weights(model_weight) y_pred_on_test = model.predict(X_test) print('test precision: {}'.format( precision_auc_single(y_test, y_pred_on_test))) print('test roc: {}'.format(roc_auc_single(y_test, y_pred_on_test))) print('test bedroc: {}'.format(bedroc_auc_single(y_test, y_pred_on_test))) print ef_values = [] ef_max_values = [] for EF_ratio in EF_ratio_list: n_actives, ef, ef_max = enrichment_factor_single( y_test, y_pred_on_test, EF_ratio) ef_values.append(ef) ef_max_values.append(ef_max) return ef_values, ef_max_values
def train_and_predict(self, X_train, y_train, y_train_classification, X_val, y_val, y_val_classification, X_test, y_test, y_test_classification, mode): model = Sequential() conf = self.conf batch_normalizer_epsilon = conf['batch']['epsilon'] batch_normalizer_mode = conf['batch']['mode'] batch_normalizer_axis = conf['batch']['axis'] batch_normalizer_momentum = conf['batch']['momentum'] batch_normalizer_weights = conf['batch']['weights'] batch_normalizer_beta_init = conf['batch']['beta_init'] batch_normalizer_gamma_init = conf['batch']['gamma_init'] batch_normalizer = BatchNormalization(epsilon=batch_normalizer_epsilon, mode=batch_normalizer_mode, axis=batch_normalizer_axis, momentum=batch_normalizer_momentum, weights=batch_normalizer_weights, beta_init=batch_normalizer_beta_init, gamma_init=batch_normalizer_gamma_init) if mode == 'classification': model.add(Dense(2048, input_dim=1024, init='glorot_normal', activation='relu')) model.add(Dropout(0.5)) model.add(Dense(1024, init='glorot_normal', activation='relu')) model.add(Dropout(0.5)) if self.batch_is_use: model.add(batch_normalizer) model.add(Dense(1, init='glorot_normal', activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam') else: model.add(Dense(2048, input_dim=1024, init='glorot_normal', activation='sigmoid')) model.add(Dropout(0.5)) model.add(Dense(1024, init='glorot_normal', activation='sigmoid')) model.add(Dropout(0.5)) if self.batch_is_use: model.add(batch_normalizer) model.add(Dense(1, init='glorot_normal', activation='linear')) model.compile(loss='mse', optimizer='adam') model.fit(X_train, y_train, batch_size=self.fit_batch_size, nb_epoch=self.fit_nb_epoch, verbose=self.fit_verbose, validation_data=(X_val, y_val)) y_pred_on_train = model.predict(X_train) y_pred_on_val = model.predict(X_val) y_pred_on_test = model.predict(X_test) print print 'this is mode ', mode print('train precision: {}'.format(precision_auc_single(y_train_classification, y_pred_on_train))) print('train roc: {}'.format(roc_auc_single(y_train_classification, y_pred_on_train))) print('train bedroc: {}'.format(bedroc_auc_single(y_train_classification, y_pred_on_train))) print print('validation precision: {}'.format(precision_auc_single(y_val_classification, y_pred_on_val))) print('validation roc: {}'.format(roc_auc_single(y_val_classification, y_pred_on_val))) print('validation bedroc: {}'.format(bedroc_auc_single(y_val_classification, y_pred_on_val))) print print('test precision: {}'.format(precision_auc_single(y_test_classification, y_pred_on_test))) print('test roc: {}'.format(roc_auc_single(y_test_classification, y_pred_on_test))) print('test bedroc: {}'.format(bedroc_auc_single(y_test_classification, y_pred_on_test))) print for EF_ratio in self.EF_ratio_list: n_actives, ef, ef_max = enrichment_factor_single(y_test_classification, y_pred_on_test, EF_ratio) print('ratio: {}, EF: {},\tactive: {}'.format(EF_ratio, ef, n_actives)) return y_pred_on_test
def train_and_predict_ensemble(self, X_train, y_train_regression, y_train_classification, X_val, y_val_regression, y_val_classification, X_test, y_test_regression, y_test_classification, PMTNN_weight_file): model = self.setup_model_ensemble() # TODO: remove print model.summary() if self.weight_schema == 'weighted': loss_weight = {'classification_output_layer': 1., 'regression_output_layer': 100.} elif self.weight_schema == 'no_weight': loss_weight = {'classification_output_layer': 1., 'regression_output_layer': 1.} else: raise ValueError('Wrong weight schema. Should be no_weight, or weighted.') model.compile(optimizer=self.compile_optimizer, loss={'classification_output_layer': 'binary_crossentropy', 'regression_output_layer': 'mse'}, loss_weights=loss_weight) model.fit({'input_layer': X_train}, {'classification_output_layer': y_train_classification, 'regression_output_layer': y_train_regression}, nb_epoch=self.fit_nb_epoch, batch_size=self.fit_batch_size, verbose=self.fit_verbose, validation_data=({'input_layer': X_val}, {'classification_output_layer': y_val_classification, 'regression_output_layer': y_val_regression}), shuffle=True) model.save_weights(PMTNN_weight_file) y_pred_on_train_ensemble = np.array(model.predict(X_train)) y_pred_on_val_ensmble = np.array(model.predict(X_val)) y_pred_on_test_ensemble = np.array(model.predict(X_test)) print print 'TreeNet Ensemble' mode_list = ['TreeNet classification', 'TreeNet regression'] for mode in range(2): print print mode_list[mode] y_pred_on_train = y_pred_on_train_ensemble[mode] y_pred_on_val = y_pred_on_val_ensmble[mode] y_pred_on_test = y_pred_on_test_ensemble[mode] print('train precision: {}'.format(precision_auc_single(y_train_classification, y_pred_on_train))) print('train roc: {}'.format(roc_auc_single(y_train_classification, y_pred_on_train))) print('train bedroc: {}'.format(bedroc_auc_single(y_train_classification, y_pred_on_train))) print print('validation precision: {}'.format(precision_auc_single(y_val_classification, y_pred_on_val))) print('validation roc: {}'.format(roc_auc_single(y_val_classification, y_pred_on_val))) print('validation bedroc: {}'.format(bedroc_auc_single(y_val_classification, y_pred_on_val))) print print('test precision: {}'.format(precision_auc_single(y_test_classification, y_pred_on_test))) print('test roc: {}'.format(roc_auc_single(y_test_classification, y_pred_on_test))) print('test bedroc: {}'.format(bedroc_auc_single(y_test_classification, y_pred_on_test))) print for EF_ratio in self.EF_ratio_list: n_actives, ef, ef_max = enrichment_factor_single(y_test_classification, y_pred_on_test, EF_ratio) print('ratio: {}, EF: {},\tactive: {}'.format(EF_ratio, ef, n_actives)) return y_pred_on_test_ensemble
def train_and_predict(self, X_train, y_train, X_val, y_val, X_test, y_test, PMTNN_weight_file): model = self.setup_model() if self.early_stopping_option == 'auc': early_stopping = KeckCallBackOnROC( X_train, y_train, X_val, y_val, patience=self.early_stopping_patience, file_path=PMTNN_weight_file) callbacks = [early_stopping] elif self.early_stopping_option == 'precision': early_stopping = KeckCallBackOnPrecision( X_train, y_train, X_val, y_val, patience=self.early_stopping_patience, file_path=PMTNN_weight_file) callbacks = [early_stopping] else: callbacks = [] cw = get_class_weight(self, y_train) print 'cw ', cw model.compile(loss=self.compile_loss, optimizer=self.compile_optimizer) model.fit(X_train, y_train, nb_epoch=self.fit_nb_epoch, batch_size=self.fit_batch_size, verbose=self.fit_verbose, class_weight=cw, shuffle=True, callbacks=callbacks) if self.early_stopping_option == 'auc' or self.early_stopping_option == 'precision': model = early_stopping.get_best_model() y_pred_on_train = model.predict(X_train) y_pred_on_val = model.predict(X_val) if X_test is not None: y_pred_on_test = model.predict(X_test) print print('train precision: {}'.format( precision_auc_single(y_train, y_pred_on_train))) print('train roc: {}'.format(roc_auc_single(y_train, y_pred_on_train))) print('train bedroc: {}'.format( bedroc_auc_single(y_train, y_pred_on_train))) print print('validation precision: {}'.format( precision_auc_single(y_val, y_pred_on_val))) print('validation roc: {}'.format(roc_auc_single(y_val, y_pred_on_val))) print('validation bedroc: {}'.format( bedroc_auc_single(y_val, y_pred_on_val))) print if X_test is not None: print('test precision: {}'.format( precision_auc_single(y_test, y_pred_on_test))) print('test roc: {}'.format(roc_auc_single(y_test, y_pred_on_test))) print('test bedroc: {}'.format( bedroc_auc_single(y_test, y_pred_on_test))) print if X_test is not None: for EF_ratio in self.EF_ratio_list: n_actives, ef, ef_max = enrichment_factor_single( y_test, y_pred_on_test, EF_ratio) nef = ef / ef_max print('ratio: {}, EF: {},\tactive: {}'.format( EF_ratio, ef, n_actives)) print('ratio: {}, NEF: {}'.format(EF_ratio, nef)) return