print("Training completed!") os.makedirs("finetuning_bert") model.save_pretrained("finetuning_bert") tokenizer.save_pretrained("finetuning_bert") #Evaulation predictions = [] true_labels = [] model.eval() for batch in validation_dataloader: batch = tuple(t.to('cuda') for t in batch) b_input_ids, b_input_mask, b_labels = batch with torch.no_grad(): outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask) logits = outputs[0] logits = logits.detach().cpu().numpy() label_ids = b_labels.to('cpu').numpy() predictions.append(logits) true_labels.append(label_ids) flat_predictions = [item for sublist in predictions for item in sublist] flat_predictions = np.argmax(flat_predictions, axis=1).flatten() flat_true_labels = [item for sublist in true_labels for item in sublist] bert_evaluation_scores, bert_cm = evaluation.multilabel_evaluation( flat_true_labels, flat_predictions, "SciBERT Embeddings Finetuning") documentation_file_modelopt = open( "classifier_optimization_finetuning_scibert.txt", "w+") documentation_file_modelopt.write(bert_evaluation_scores) documentation_file_modelopt.close()
"Random Undersampling") sampling_strategies.append(triple_undersampled) for sampling in sampling_strategies: q_train_sample = sampling[0] d_train_sample = sampling[1] name = sampling[2] documentation_file_modelopt.write(str(name) + "\n") #Linear SVM model training and evaluation print("SVM model evaluation") classifier_svm = svm.LinearSVC() classifier_svm.fit(np.asarray(q_train_sample), np.asarray(d_train_sample)) pred_svm = classifier_svm.predict(np.asarray(q_test)) #evaluate the model svm_evaluation_scores, svm_cm = evaluation.multilabel_evaluation( d_test, label_encoder.inverse_transform(pred_svm), "LinearSVM") documentation_file_modelopt.write(svm_evaluation_scores) # Random Forest: optimizing parameters with grid search print("Random Forest model evaluation") classifier_rf = RandomForestClassifier(class_weight='balanced', max_depth=100) classifier_rf.fit(np.asarray(q_train_sample), np.asarray(d_train_sample)) pred_rf = classifier_rf.predict(np.asarray(q_test)) #evaluate the model rf_evaluation_scores, rf_cm = evaluation.multilabel_evaluation( d_test, label_encoder.inverse_transform(pred_rf), "Random Forest") documentation_file_modelopt.write(rf_evaluation_scores) # Logistic Regression: optimizing parameters with grid search print("Logistic Regression model evaluation")
} cnn_scan = talos.Scan(x=X, y=d_train_array, model=cnn_optimization, params=cnn_params, experiment_name='CNN_Optimization', round_limit=10, fraction_limit=0.05) cnn_analyze = talos.Analyze(cnn_scan) documentation_file_parameteropt.write( "CNN: Best parameters {}, reached score: {} \n".format( cnn_analyze.best_params('accuracy', ['accuracy', 'loss', 'val_loss']), cnn_analyze.high('accuracy'))) pred_cnn = talos.Predict(cnn_scan).predict(x_t, metric='val_f1score', asc=True) #evaluate the model cnn_evaluation_scores, cnn_cm = evaluation.multilabel_evaluation( d_test_array, label_binarizer.inverse_transform(pred_cnn), "CNN") documentation_file_modelopt.write(cnn_evaluation_scores) #deploy best model model_cnn = talos.Deploy(cnn_scan, "model_cnn_scibert", metric='val_accuracy') #build LSTM model and evaluate the model print("LSTM model evaluation") def lstm_optimization(x_train, y_train, x_test, y_test, params): """Randomized search to optimize parameters of Neural Network.""" optimization_model = models.Sequential() optimization_model.add(layers.LSTM(params['units'], return_sequences=True)) optimization_model.add(layers.LSTM(params['units'], return_sequences=False)) optimization_model.add(layers.Dropout(0.5))