def evaluate(dataset, limit_num_sents: bool):
    train_str = dataset_2_string_rasa(dataset['train'], limit_num_sents=limit_num_sents, set_type='train')
    X_val, y_val = get_X_y_rasa(dataset['val'] + dataset['oos_val'], limit_num_sents=limit_num_sents,
                                set_type='val')
    X_test, y_test = get_X_y_rasa(dataset['test'] + dataset['oos_test'], limit_num_sents=limit_num_sents,
                                  set_type='test')

    with NamedTemporaryFile(suffix='.yml') as f:
        f.write(train_str.encode('utf8'))
        f.seek(0)

        training_data = rasa.shared.nlu.training_data.loading.load_data(f.name)

    config = rasa.nlu.config.load('config.yml')
    trainer = rasa.nlu.model.Trainer(config)
    model = trainer.train(training_data)

    val_predictions_labels = []  # used to find threshold

    for sent, true_int_label in zip(X_val, y_val):
        pred = model.parse(sent)
        pred_label = pred['intent']['name']
        similarity = pred['intent']['confidence']

        pred = (pred_label, similarity)
        val_predictions_labels.append((pred, true_int_label))

    threshold = find_best_threshold(val_predictions_labels, 'oos')

    # Test
    testing = Testing(model, X_test, y_test, 'rasa', 'oos')
    results_dct = testing.test_threshold(threshold)

    return results_dct
Esempio n. 2
0
    def evaluate_verification(self, ref_embeddings, qry_embeddings, labels):
        metric_dict = {}
        for k in qry_embeddings.keys():
            cos_sim = torch.nn.functional.cosine_similarity(
                ref_embeddings[k], qry_embeddings[k])
            predictions = torch.stack((cos_sim, labels), dim=1).numpy()

            accs, thrs = [], []
            thresholds = np.arange(-1.0, 1.0, 0.005)
            for train_idx, test_idx in KFold(n=len(self), n_folds=10):
                best_thr = find_best_threshold(thresholds,
                                               predictions[train_idx])
                accs.append(eval_acc(best_thr, predictions[test_idx]))
                thrs.append(best_thr)
            metric_dict[k] = dict(acc=np.mean(accs),
                                  std=np.std(accs),
                                  thr=np.mean(thrs))
        metric_dict['criterion'] = np.mean(
            np.array([metric_dict[k]['acc'] for k in metric_dict.keys()]))
        return metric_dict
def evaluate(dataset, limit_num_sents: bool):
    # Split dataset
    split = Split()

    X_train, y_train = split.get_X_y(
        dataset['train'],
        fit=True,
        limit_num_sents=limit_num_sents,
        set_type='train')  # fit only on first dataset
    X_val, y_val = split.get_X_y(dataset['val'] + dataset['oos_val'],
                                 fit=False,
                                 limit_num_sents=limit_num_sents,
                                 set_type='val')
    X_test, y_test = split.get_X_y(dataset['test'] + dataset['oos_test'],
                                   fit=False,
                                   limit_num_sents=limit_num_sents,
                                   set_type='test')

    svc_int = svm.SVC(C=1, kernel='linear',
                      probability=True).fit(X_train, y_train)

    val_predictions_labels = []  # used to find threshold

    for sent_vec, true_int_label in zip(X_val, y_val):
        pred_probs = svc_int.predict_proba(sent_vec)[
            0]  # intent prediction probabilities
        pred_label = argmax(pred_probs)  # intent prediction
        similarity = pred_probs[pred_label]

        pred = (pred_label, similarity)
        val_predictions_labels.append((pred, true_int_label))

    threshold = find_best_threshold(val_predictions_labels,
                                    split.intents_dct['oos'])

    # Test
    testing = Testing(svc_int, X_test, y_test, 'svm', split.intents_dct['oos'])
    results_dct = testing.test_threshold(threshold)

    return results_dct
def evaluate(dataset, dim: int, limit_num_sents: bool):
    train_str = dataset_2_string(dataset['train'],
                                 limit_num_sents=limit_num_sents,
                                 set_type='train')
    X_val, y_val = get_X_y_fasttext(dataset['val'] + dataset['oos_val'],
                                    limit_num_sents=limit_num_sents,
                                    set_type='val')
    X_test, y_test = get_X_y_fasttext(dataset['test'] + dataset['oos_test'],
                                      limit_num_sents=limit_num_sents,
                                      set_type='test')

    with NamedTemporaryFile() as f:
        f.write(train_str.encode('utf8'))
        f.seek(0)

        # Train model for in-scope queries
        model = fasttext.train_supervised(
            input=f.name,
            dim=dim,
            pretrainedVectors=f'{PRETRAINED_VECTORS_PATH}/cc.en.{dim}.vec')

    val_predictions_labels = []  # used to find threshold

    for sent, true_int_label in zip(X_val, y_val):
        pred = model.predict(sent)
        pred_label = pred[0][0]
        similarity = pred[1][0]

        pred = (pred_label, similarity)
        val_predictions_labels.append((pred, true_int_label))

    threshold = find_best_threshold(val_predictions_labels, '__label__oos')

    # Test
    testing = Testing(model, X_test, y_test, 'fasttext', '__label__oos')
    results_dct = testing.test_threshold(threshold)

    return results_dct
def evaluate(dataset, limit_num_sents: bool):
    # Split and tokenize dataset
    split = Split_BERT()
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    X_train, y_train = split.get_X_y(dataset['train'],
                                     limit_num_sents=limit_num_sents,
                                     set_type='train')
    X_val, y_val = split.get_X_y(dataset['val'] + dataset['oos_val'],
                                 limit_num_sents=limit_num_sents,
                                 set_type='val')
    X_test, y_test = split.get_X_y(dataset['test'] + dataset['oos_test'],
                                   limit_num_sents=limit_num_sents,
                                   set_type='test')

    train_ids, train_attention_masks, train_labels = tokenize_BERT(
        X_train, y_train, tokenizer)
    val_ids, val_attention_masks, val_labels = tokenize_BERT(
        X_val, y_val, tokenizer)
    test_ids, test_attention_masks, test_labels = tokenize_BERT(
        X_test, y_test, tokenizer)

    num_labels = len(split.intents_dct.keys(
    )) - 1  # minus 1 because 'oos' label isn't used in training

    # Train model
    model = TFBertForSequenceClassification.from_pretrained(
        'bert-base-uncased',
        num_labels=num_labels)  # we have to adjust the number of labels
    print('\nBert Model', model.summary())

    log_dir = 'tensorboard_data/tb_bert'
    model_save_path = './models/bert_model.h5'

    callbacks = [
        tf.keras.callbacks.ModelCheckpoint(filepath=model_save_path,
                                           save_weights_only=True,
                                           monitor='val_loss',
                                           mode='min',
                                           save_best_only=True),
        tf.keras.callbacks.TensorBoard(log_dir=log_dir)
    ]

    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
    optimizer = tf.keras.optimizers.Adam(learning_rate=4e-5)

    model.compile(loss=loss, optimizer=optimizer, metrics=[metric])

    history = model.fit([train_ids, train_attention_masks],
                        train_labels,
                        batch_size=32,
                        epochs=5,
                        validation_data=([val_ids,
                                          val_attention_masks], val_labels),
                        callbacks=callbacks)

    val_predictions_labels = []  # used to find threshold

    for sent, true_int_label in zip(X_val, y_val):
        predict_input = tokenizer.encode(sent,
                                         truncation=True,
                                         padding=True,
                                         return_tensors="tf")

        tf_output = model.predict(predict_input)[0]
        pred_probs = tf.nn.softmax(
            tf_output, axis=1).numpy()[0]  # intent prediction probabilities
        pred_label = argmax(pred_probs)  # intent prediction
        similarity = pred_probs[pred_label]

        pred = (pred_label, similarity)
        val_predictions_labels.append((pred, true_int_label))

    threshold = find_best_threshold(val_predictions_labels,
                                    split.intents_dct['oos'])

    # Test
    testing = Testing(model, {
        'test_ids': test_ids,
        'test_attention_masks': test_attention_masks
    }, test_labels, 'bert', split.intents_dct['oos'])
    results_dct = testing.test_threshold(threshold)

    return results_dct
Esempio n. 6
0
        xgb_trainy = processed_data["xgboost_data"]["train_y"]
        xgb_validx = processed_data["xgboost_data"]["valid_x"]
        xgb_validy = processed_data["xgboost_data"]["valid_y"]
        xgb_testx = processed_data["xgboost_data"]["test_x"]
        xgb_testy = processed_data["xgboost_data"]["test_y"]

        # build xgboost model
        print("Training xgboost model...")
        xgb_clf = XGBClassifier(n_estimators=100, max_depth=4)
        xgb_clf.fit(xgb_trainx, xgb_trainy)

        # evaluate xgboost model
        print("------Evaluating xgboost model------")
        test_pred = xgb_clf.predict_proba(xgb_testx)[:, 1]
        xgb_auc = roc_auc_score(xgb_testy, test_pred)
        xgb_threshold, _ = find_best_threshold(xgb_clf, xgb_validx, xgb_validy)
        xgb_f1 = find_best_threshold(xgb_clf,
                                     xgb_testx,
                                     xgb_testy,
                                     best_thresh=xgb_threshold)
        print("AUC = %.4f, F1-score = %.4f" % (xgb_auc, xgb_f1))

        # Precision and Recall
        y_prob = test_pred
        for i in [99, 98, 95, 90]:
            threshold = np.percentile(y_prob, i)
            print(
                f'Checking top {100-i}% suspicious transactions: {len(y_prob[y_prob > threshold])}'
            )
            precision = np.mean(xgb_testy[y_prob > threshold])
            recall = sum(xgb_testy[y_prob > threshold]) / sum(xgb_testy)
                                                       shuffle=True,
                                                       num_workers=1)
            if int(args.finetune) == 2:
                model_name = "model_checkpoint_finetune_2_fold_{}.pth".format(n)
            elif int(args.finetune) == 1:
                model_name = "model_checkpoint_finetune_1_fold_{}.pth".format(n)
            elif int(args.finetune) == 0:
                model_name = "model_checkpoint_fold_{}.pth".format(n)

            if torch.cuda.is_available():
                model.load_state_dict(torch.load(model_name))
            else:
                model.load_state_dict(torch.load(model_name), map_location='cpu')

            if n == saved_best_cv:
                best_threshold = find_best_threshold(model, val_loader)

            if n == 0:
                y_pred_test = infer_prediction(model, test_loader)
            else:
                y_pred_test += infer_prediction(model, test_loader)

        y_pred_test = y_pred_test/4
        binary_prediction = (y_pred_test > best_threshold).astype(int)

    else:
        valid_idx = valid_indexes[saved_best_cv]
        salt_ID_dataset_valid = saltIDDataset(path_train, SaltLevel.train_ids.iloc[valid_idx].values, transforms=False, train="valid")
        val_loader = torch.utils.data.DataLoader(dataset=salt_ID_dataset_valid, 
                                                   batch_size=2, 
                                                   shuffle=True,