def evaluate(binary_dataset, model_int, X_int_test, y_int_test): train_str_bin = dataset_2_string_rasa(binary_dataset['train'], limit_num_sents=False, set_type='train') with NamedTemporaryFile(suffix='.yml') as f: f.write(train_str_bin.encode('utf8')) f.seek(0) training_data = rasa.shared.nlu.training_data.loading.load_data(f.name) config = rasa.nlu.config.load('config.yml') trainer = rasa.nlu.model.Trainer(config) model_bin = trainer.train(training_data) # Test testing = Testing(model_int, X_int_test, y_int_test, 'rasa', 'oos', bin_model=model_bin) results_dct = testing.test_binary() return results_dct
def evaluate(binary_dataset, model_int, X_int_test, y_int_test, dim): train_str_bin = dataset_2_string(binary_dataset['train'], limit_num_sents=False, set_type='train') with NamedTemporaryFile() as f: f.write(train_str_bin.encode('utf8')) f.seek(0) # Train model for in-scope queries model_bin = fasttext.train_supervised( input=f.name, dim=dim, pretrainedVectors=f'{PRETRAINED_VECTORS_PATH}/cc.en.{dim}.vec') # Test testing = Testing(model_int, X_int_test, y_int_test, 'fasttext', '__label__oos', bin_model=model_bin) results_dct = testing.test_binary() return results_dct
def evaluate(binary_dataset, mlp_int, X_int_test, y_int_test, split): X_bin_train, y_bin_train = split.get_X_y(binary_dataset['train'], fit=False, limit_num_sents=False, set_type='train') mlp_bin = MLPClassifier(activation='tanh').fit(X_bin_train, y_bin_train) # Test testing = Testing(mlp_int, X_int_test, y_int_test, 'mlp', split.intents_dct['oos'], bin_model=mlp_bin) results_dct = testing.test_binary() return results_dct
def evaluate(binary_dataset, svc_int, X_int_test, y_int_test, split): X_bin_train, y_bin_train = split.get_X_y(binary_dataset['train'], fit=False, limit_num_sents=False, set_type='train') # X_bin_test, y_bin_test = split.get_X_y(binary_dataset['test'], fit=False, limit_num_sents=False, set_type='test') svc_bin = svm.SVC(C=1, kernel='linear').fit(X_bin_train, y_bin_train) # Test testing = Testing(svc_int, X_int_test, y_int_test, 'svm', split.intents_dct['oos'], bin_model=svc_bin) results_dct = testing.test_binary() return results_dct
def evaluate(binary_dataset, model_int, X_int_test, y_int_test, split_int): # Split and tokenize dataset split_bin = Split_BERT( ) # we have to create a new split because the labels of BERT have to be between [0, num_labels - 1] tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') X_bin_train, y_bin_train = split_bin.get_X_y(binary_dataset['train'], limit_num_sents=False, set_type='train') X_bin_val, y_bin_val = split_bin.get_X_y(binary_dataset['val'], limit_num_sents=False, set_type='val') train_bin_ids, train_bin_attention_masks, train_bin_labels = tokenize_BERT( X_bin_train, y_bin_train, tokenizer) val_bin_ids, val_bin_attention_masks, val_bin_labels = tokenize_BERT( X_bin_val, y_bin_val, tokenizer) # Train model model_bin = TFBertForSequenceClassification.from_pretrained( 'bert-base-uncased', num_labels=2) # we have to adjust the number of labels print('\nBert Model', model_bin.summary()) log_dir = 'tensorboard_data/tb_bert_bin' model_save_path = './models/bert_model_bin.h5' callbacks = [ tf.keras.callbacks.ModelCheckpoint(filepath=model_save_path, save_weights_only=True, monitor='val_loss', mode='min', save_best_only=True), tf.keras.callbacks.TensorBoard(log_dir=log_dir) ] loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy') optimizer = tf.keras.optimizers.Adam(learning_rate=4e-5) model_bin.compile(loss=loss, optimizer=optimizer, metrics=[metric]) history = model_bin.fit( [train_bin_ids, train_bin_attention_masks], train_bin_labels, batch_size=32, epochs=5, validation_data=([val_bin_ids, val_bin_attention_masks], val_bin_labels), callbacks=callbacks) # Test testing = Testing(model_int, X_int_test, y_int_test, 'bert', split_int.intents_dct['oos'], bin_model=model_bin, bin_oos_label=split_bin.intents_dct['oos']) results_dct = testing.test_binary() return results_dct