def validation_step(self, batch, batch_idx, dataloader_idx): x, y, lengths = batch # x = torch.squeeze(x, 0).type(torch.float32) # y = torch.squeeze(y, 0).type(torch.float32) output, _ = self(x) prefix = "val" if dataloader_idx else "train" loss = self.loss(output, y) auroc, auprc = compute_auc(y.cpu().detach().numpy(), torch.sigmoid(output.cpu()).detach().numpy(), self.hparams.num_classes) hard_prediction = probs_to_hard_predictions(output.cpu().detach().numpy(), self.hparams.num_classes) accuracy, f_measure, f_beta, g_beta = compute_beta_score(y.cpu().detach().numpy(), hard_prediction, 1, self.hparams.num_classes) return {"{}_loss".format(prefix): loss, "{}_auroc".format(prefix): torch.tensor(auroc), "{}_auprc".format(prefix): torch.tensor(auprc), "{}_accuracy".format(prefix): torch.tensor(accuracy), "{}_F_measure".format(prefix): torch.tensor(f_measure), "{}_F_beta".format(prefix): torch.tensor(f_beta), "{}_G_beta".format(prefix): torch.tensor(g_beta), "output": output.detach(), "y": y.detach() }
def test_auc(self): from sklearn.metrics import auc ranks, labels = self._gen_metric_data() res = metrics.compute_auc(ranks, labels) TPRs = [1 / 3, 2 / 3, 1] FPRs = [1 / 3, 2 / 3, 1] self.assertAllCloseAccordingToType(res['AUC'], auc(FPRs, TPRs, reorder=True))
def xgboost_test(extractor, opt): import xgboost as xgb res = defaultdict(list) res_train = defaultdict(list) for study_num in range(7): #print(study_name) train_set, test_set = get_merged_common_dataset(opt, skip_study=study_num) train_data, train_labels = get_data(train_set) val_data, val_labels = get_data(test_set) if True: train_features = extractor(train_data).detach().numpy() val_features = extractor(val_data).detach().numpy() else: train_features = train_data val_features = val_data # train the model model = xgb.XGBClassifier() clf = model.fit(train_features, train_labels.astype(int), eval_set=[(val_features, val_labels)], early_stopping_rounds=50, verbose=True, eval_metric='auc') #model = LogisticRegression() #model = SVC(probability=True, class_weight='balanced') #clf = model.fit(train_features, train_labels.astype(int)) print(val_data.shape) res['bias'].append(val_labels.sum() / len(val_labels)) print(res['bias'][-1]) y_pred = clf.predict_proba(val_features)[:, 1] x_pred = clf.predict_proba(train_features)[:, 1] compute_metrics(res, val_labels.flatten() > 0.5, y_pred > 0.5) compute_auc(res, val_labels.flatten() > 0.5, y_pred) compute_metrics(res_train, train_labels.flatten() > 0.5, x_pred > 0.5) compute_auc(res_train, train_labels.flatten() > 0.5, x_pred) for key in res_train: ave = numpy.asarray(res_train[key]).mean(axis=0) print('Train {0}: {1}'.format(key, ave)) for key in res: ave = numpy.asarray(res[key]).mean(axis=0) print('Test {0}: {1}'.format(key, ave))
def test_DFM_avazu(data, train, test): print("\nTesting DFM on avazu dataset...\n") results_activation_function = {"auc": [], "logloss": [], "rmse": []} results_dropout = {"auc": [], "logloss": [], "rmse": []} results_number_of_neurons = {"auc": [], "logloss": [], "rmse": []} auc = 0 logloss = 0 rmse = 0 features_labels = train.columns sparse_features_labels = features_labels[1:23] target_label = features_labels[0] dnn_feature_columns = [ SparseFeat( feat, vocabulary_size=data[feat].nunique(), embedding_dim=4, ) for feat in sparse_features_labels ] linear_feature_columns = [ SparseFeat( feat, vocabulary_size=data[feat].nunique(), embedding_dim=4, ) for feat in sparse_features_labels ] feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) train_model_input = {name: train[name] for name in feature_names} test_model_input = {name: test[name] for name in feature_names} true_y = test[target_label].values print("\t\t-- ACTIVATION FUNCTIONS --\t\t") for dnn_activation in dnn_activation_list: print("\nTesting {dnn_activation}...".format( dnn_activation=dnn_activation)) model = DeepFM(linear_feature_columns, dnn_feature_columns, dnn_activation=dnn_activation, task='binary') model.compile( "adam", "binary_crossentropy", metrics=['binary_crossentropy'], ) model.fit( train_model_input, train[target_label].values, batch_size=256, epochs=10, verbose=0, validation_split=TEST_PROPORTION, ) pred_y = model.predict(test_model_input, batch_size=256) auc = compute_auc(true_y, pred_y) logloss = compute_log_loss(true_y, pred_y) rmse = compute_rmse(true_y, pred_y) results_activation_function["auc"].append(auc) results_activation_function["logloss"].append(logloss) results_activation_function["rmse"].append(rmse) print("\t\t-- DROPOUT RATES --\t\t") for dnn_dropout in dnn_dropout_list: print("\nTesting {dnn_dropout}...".format(dnn_dropout=dnn_dropout)) model = DeepFM(linear_feature_columns, dnn_feature_columns, dnn_dropout=dnn_dropout, task='binary') model.compile( "adam", "binary_crossentropy", metrics=['binary_crossentropy'], ) model.fit( train_model_input, train[target_label].values, batch_size=256, epochs=10, verbose=0, validation_split=TEST_PROPORTION, ) pred_y = model.predict(test_model_input, batch_size=256) auc = compute_auc(true_y, pred_y) logloss = compute_log_loss(true_y, pred_y) rmse = compute_rmse(true_y, pred_y) results_dropout["auc"].append(auc) results_dropout["logloss"].append(logloss) results_dropout["rmse"].append(rmse) print("\t\t-- HIDDEN UNITS --\t\t") for dnn_hidden_units in dnn_hidden_units_list: print("\nTesting {dnn_hidden_units}...".format( dnn_hidden_units=dnn_hidden_units)) model = DeepFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=dnn_hidden_units, task='binary') model.compile( "adam", "binary_crossentropy", metrics=['binary_crossentropy'], ) model.fit( train_model_input, train[target_label].values, batch_size=256, epochs=10, verbose=0, validation_split=TEST_PROPORTION, ) pred_y = model.predict(test_model_input, batch_size=256) auc = compute_auc(true_y, pred_y) logloss = compute_log_loss(true_y, pred_y) rmse = compute_rmse(true_y, pred_y) results_number_of_neurons["auc"].append(auc) results_number_of_neurons["logloss"].append(logloss) results_number_of_neurons["rmse"].append(rmse) if PLOT: create_plots("DFM", "avazu", results_activation_function, "Activation Function", "activation_func", dnn_activation_list) create_plots("DFM", "avazu", results_dropout, "Dropout Rate", "dropout", dnn_dropout_list) create_plots("DFM", "avazu", results_number_of_neurons, "Number of Neurons per layer", "nr_neurons", dnn_hidden_units_list)