Beispiel #1
0
print("Training completed!")

os.makedirs("finetuning_bert")
model.save_pretrained("finetuning_bert")
tokenizer.save_pretrained("finetuning_bert")
#Evaulation
predictions = []
true_labels = []
model.eval()
for batch in validation_dataloader:
    batch = tuple(t.to('cuda') for t in batch)
    b_input_ids, b_input_mask, b_labels = batch
    with torch.no_grad():
        outputs = model(b_input_ids,
                        token_type_ids=None,
                        attention_mask=b_input_mask)
        logits = outputs[0]
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        predictions.append(logits)
        true_labels.append(label_ids)
flat_predictions = [item for sublist in predictions for item in sublist]
flat_predictions = np.argmax(flat_predictions, axis=1).flatten()
flat_true_labels = [item for sublist in true_labels for item in sublist]
bert_evaluation_scores, bert_cm = evaluation.multilabel_evaluation(
    flat_true_labels, flat_predictions, "SciBERT Embeddings Finetuning")
documentation_file_modelopt = open(
    "classifier_optimization_finetuning_scibert.txt", "w+")
documentation_file_modelopt.write(bert_evaluation_scores)
documentation_file_modelopt.close()
Beispiel #2
0
                       "Random Undersampling")
sampling_strategies.append(triple_undersampled)

for sampling in sampling_strategies:
    q_train_sample = sampling[0]
    d_train_sample = sampling[1]
    name = sampling[2]
    documentation_file_modelopt.write(str(name) + "\n")

    #Linear SVM model training and evaluation
    print("SVM model evaluation")
    classifier_svm = svm.LinearSVC()
    classifier_svm.fit(np.asarray(q_train_sample), np.asarray(d_train_sample))
    pred_svm = classifier_svm.predict(np.asarray(q_test))
    #evaluate the model
    svm_evaluation_scores, svm_cm = evaluation.multilabel_evaluation(
        d_test, label_encoder.inverse_transform(pred_svm), "LinearSVM")
    documentation_file_modelopt.write(svm_evaluation_scores)

    # Random Forest: optimizing parameters with grid search
    print("Random Forest model evaluation")
    classifier_rf = RandomForestClassifier(class_weight='balanced',
                                           max_depth=100)
    classifier_rf.fit(np.asarray(q_train_sample), np.asarray(d_train_sample))
    pred_rf = classifier_rf.predict(np.asarray(q_test))
    #evaluate the model
    rf_evaluation_scores, rf_cm = evaluation.multilabel_evaluation(
        d_test, label_encoder.inverse_transform(pred_rf), "Random Forest")
    documentation_file_modelopt.write(rf_evaluation_scores)

    # Logistic Regression: optimizing parameters with grid search
    print("Logistic Regression model evaluation")
Beispiel #3
0
}
cnn_scan = talos.Scan(x=X,
                      y=d_train_array,
                      model=cnn_optimization,
                      params=cnn_params,
                      experiment_name='CNN_Optimization',
                      round_limit=10,
                      fraction_limit=0.05)
cnn_analyze = talos.Analyze(cnn_scan)
documentation_file_parameteropt.write(
    "CNN: Best parameters {}, reached score: {} \n".format(
        cnn_analyze.best_params('accuracy', ['accuracy', 'loss', 'val_loss']),
        cnn_analyze.high('accuracy')))
pred_cnn = talos.Predict(cnn_scan).predict(x_t, metric='val_f1score', asc=True)
#evaluate the model
cnn_evaluation_scores, cnn_cm = evaluation.multilabel_evaluation(
    d_test_array, label_binarizer.inverse_transform(pred_cnn), "CNN")
documentation_file_modelopt.write(cnn_evaluation_scores)
#deploy best model
model_cnn = talos.Deploy(cnn_scan, "model_cnn_scibert", metric='val_accuracy')

#build LSTM model and evaluate the model
print("LSTM model evaluation")


def lstm_optimization(x_train, y_train, x_test, y_test, params):
    """Randomized search to optimize parameters of Neural Network."""
    optimization_model = models.Sequential()
    optimization_model.add(layers.LSTM(params['units'], return_sequences=True))
    optimization_model.add(layers.LSTM(params['units'],
                                       return_sequences=False))
    optimization_model.add(layers.Dropout(0.5))