def evaluate(dataset, limit_num_sents: bool): train_str = dataset_2_string_rasa(dataset['train'], limit_num_sents=limit_num_sents, set_type='train') X_val, y_val = get_X_y_rasa(dataset['val'] + dataset['oos_val'], limit_num_sents=limit_num_sents, set_type='val') X_test, y_test = get_X_y_rasa(dataset['test'] + dataset['oos_test'], limit_num_sents=limit_num_sents, set_type='test') with NamedTemporaryFile(suffix='.yml') as f: f.write(train_str.encode('utf8')) f.seek(0) training_data = rasa.shared.nlu.training_data.loading.load_data(f.name) config = rasa.nlu.config.load('config.yml') trainer = rasa.nlu.model.Trainer(config) model = trainer.train(training_data) val_predictions_labels = [] # used to find threshold for sent, true_int_label in zip(X_val, y_val): pred = model.parse(sent) pred_label = pred['intent']['name'] similarity = pred['intent']['confidence'] pred = (pred_label, similarity) val_predictions_labels.append((pred, true_int_label)) threshold = find_best_threshold(val_predictions_labels, 'oos') # Test testing = Testing(model, X_test, y_test, 'rasa', 'oos') results_dct = testing.test_threshold(threshold) return results_dct
def evaluate_verification(self, ref_embeddings, qry_embeddings, labels): metric_dict = {} for k in qry_embeddings.keys(): cos_sim = torch.nn.functional.cosine_similarity( ref_embeddings[k], qry_embeddings[k]) predictions = torch.stack((cos_sim, labels), dim=1).numpy() accs, thrs = [], [] thresholds = np.arange(-1.0, 1.0, 0.005) for train_idx, test_idx in KFold(n=len(self), n_folds=10): best_thr = find_best_threshold(thresholds, predictions[train_idx]) accs.append(eval_acc(best_thr, predictions[test_idx])) thrs.append(best_thr) metric_dict[k] = dict(acc=np.mean(accs), std=np.std(accs), thr=np.mean(thrs)) metric_dict['criterion'] = np.mean( np.array([metric_dict[k]['acc'] for k in metric_dict.keys()])) return metric_dict
def evaluate(dataset, limit_num_sents: bool): # Split dataset split = Split() X_train, y_train = split.get_X_y( dataset['train'], fit=True, limit_num_sents=limit_num_sents, set_type='train') # fit only on first dataset X_val, y_val = split.get_X_y(dataset['val'] + dataset['oos_val'], fit=False, limit_num_sents=limit_num_sents, set_type='val') X_test, y_test = split.get_X_y(dataset['test'] + dataset['oos_test'], fit=False, limit_num_sents=limit_num_sents, set_type='test') svc_int = svm.SVC(C=1, kernel='linear', probability=True).fit(X_train, y_train) val_predictions_labels = [] # used to find threshold for sent_vec, true_int_label in zip(X_val, y_val): pred_probs = svc_int.predict_proba(sent_vec)[ 0] # intent prediction probabilities pred_label = argmax(pred_probs) # intent prediction similarity = pred_probs[pred_label] pred = (pred_label, similarity) val_predictions_labels.append((pred, true_int_label)) threshold = find_best_threshold(val_predictions_labels, split.intents_dct['oos']) # Test testing = Testing(svc_int, X_test, y_test, 'svm', split.intents_dct['oos']) results_dct = testing.test_threshold(threshold) return results_dct
def evaluate(dataset, dim: int, limit_num_sents: bool): train_str = dataset_2_string(dataset['train'], limit_num_sents=limit_num_sents, set_type='train') X_val, y_val = get_X_y_fasttext(dataset['val'] + dataset['oos_val'], limit_num_sents=limit_num_sents, set_type='val') X_test, y_test = get_X_y_fasttext(dataset['test'] + dataset['oos_test'], limit_num_sents=limit_num_sents, set_type='test') with NamedTemporaryFile() as f: f.write(train_str.encode('utf8')) f.seek(0) # Train model for in-scope queries model = fasttext.train_supervised( input=f.name, dim=dim, pretrainedVectors=f'{PRETRAINED_VECTORS_PATH}/cc.en.{dim}.vec') val_predictions_labels = [] # used to find threshold for sent, true_int_label in zip(X_val, y_val): pred = model.predict(sent) pred_label = pred[0][0] similarity = pred[1][0] pred = (pred_label, similarity) val_predictions_labels.append((pred, true_int_label)) threshold = find_best_threshold(val_predictions_labels, '__label__oos') # Test testing = Testing(model, X_test, y_test, 'fasttext', '__label__oos') results_dct = testing.test_threshold(threshold) return results_dct
def evaluate(dataset, limit_num_sents: bool): # Split and tokenize dataset split = Split_BERT() tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') X_train, y_train = split.get_X_y(dataset['train'], limit_num_sents=limit_num_sents, set_type='train') X_val, y_val = split.get_X_y(dataset['val'] + dataset['oos_val'], limit_num_sents=limit_num_sents, set_type='val') X_test, y_test = split.get_X_y(dataset['test'] + dataset['oos_test'], limit_num_sents=limit_num_sents, set_type='test') train_ids, train_attention_masks, train_labels = tokenize_BERT( X_train, y_train, tokenizer) val_ids, val_attention_masks, val_labels = tokenize_BERT( X_val, y_val, tokenizer) test_ids, test_attention_masks, test_labels = tokenize_BERT( X_test, y_test, tokenizer) num_labels = len(split.intents_dct.keys( )) - 1 # minus 1 because 'oos' label isn't used in training # Train model model = TFBertForSequenceClassification.from_pretrained( 'bert-base-uncased', num_labels=num_labels) # we have to adjust the number of labels print('\nBert Model', model.summary()) log_dir = 'tensorboard_data/tb_bert' model_save_path = './models/bert_model.h5' callbacks = [ tf.keras.callbacks.ModelCheckpoint(filepath=model_save_path, save_weights_only=True, monitor='val_loss', mode='min', save_best_only=True), tf.keras.callbacks.TensorBoard(log_dir=log_dir) ] loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy') optimizer = tf.keras.optimizers.Adam(learning_rate=4e-5) model.compile(loss=loss, optimizer=optimizer, metrics=[metric]) history = model.fit([train_ids, train_attention_masks], train_labels, batch_size=32, epochs=5, validation_data=([val_ids, val_attention_masks], val_labels), callbacks=callbacks) val_predictions_labels = [] # used to find threshold for sent, true_int_label in zip(X_val, y_val): predict_input = tokenizer.encode(sent, truncation=True, padding=True, return_tensors="tf") tf_output = model.predict(predict_input)[0] pred_probs = tf.nn.softmax( tf_output, axis=1).numpy()[0] # intent prediction probabilities pred_label = argmax(pred_probs) # intent prediction similarity = pred_probs[pred_label] pred = (pred_label, similarity) val_predictions_labels.append((pred, true_int_label)) threshold = find_best_threshold(val_predictions_labels, split.intents_dct['oos']) # Test testing = Testing(model, { 'test_ids': test_ids, 'test_attention_masks': test_attention_masks }, test_labels, 'bert', split.intents_dct['oos']) results_dct = testing.test_threshold(threshold) return results_dct
xgb_trainy = processed_data["xgboost_data"]["train_y"] xgb_validx = processed_data["xgboost_data"]["valid_x"] xgb_validy = processed_data["xgboost_data"]["valid_y"] xgb_testx = processed_data["xgboost_data"]["test_x"] xgb_testy = processed_data["xgboost_data"]["test_y"] # build xgboost model print("Training xgboost model...") xgb_clf = XGBClassifier(n_estimators=100, max_depth=4) xgb_clf.fit(xgb_trainx, xgb_trainy) # evaluate xgboost model print("------Evaluating xgboost model------") test_pred = xgb_clf.predict_proba(xgb_testx)[:, 1] xgb_auc = roc_auc_score(xgb_testy, test_pred) xgb_threshold, _ = find_best_threshold(xgb_clf, xgb_validx, xgb_validy) xgb_f1 = find_best_threshold(xgb_clf, xgb_testx, xgb_testy, best_thresh=xgb_threshold) print("AUC = %.4f, F1-score = %.4f" % (xgb_auc, xgb_f1)) # Precision and Recall y_prob = test_pred for i in [99, 98, 95, 90]: threshold = np.percentile(y_prob, i) print( f'Checking top {100-i}% suspicious transactions: {len(y_prob[y_prob > threshold])}' ) precision = np.mean(xgb_testy[y_prob > threshold]) recall = sum(xgb_testy[y_prob > threshold]) / sum(xgb_testy)
shuffle=True, num_workers=1) if int(args.finetune) == 2: model_name = "model_checkpoint_finetune_2_fold_{}.pth".format(n) elif int(args.finetune) == 1: model_name = "model_checkpoint_finetune_1_fold_{}.pth".format(n) elif int(args.finetune) == 0: model_name = "model_checkpoint_fold_{}.pth".format(n) if torch.cuda.is_available(): model.load_state_dict(torch.load(model_name)) else: model.load_state_dict(torch.load(model_name), map_location='cpu') if n == saved_best_cv: best_threshold = find_best_threshold(model, val_loader) if n == 0: y_pred_test = infer_prediction(model, test_loader) else: y_pred_test += infer_prediction(model, test_loader) y_pred_test = y_pred_test/4 binary_prediction = (y_pred_test > best_threshold).astype(int) else: valid_idx = valid_indexes[saved_best_cv] salt_ID_dataset_valid = saltIDDataset(path_train, SaltLevel.train_ids.iloc[valid_idx].values, transforms=False, train="valid") val_loader = torch.utils.data.DataLoader(dataset=salt_ID_dataset_valid, batch_size=2, shuffle=True,