def predictResult(x_train, y_train, y_test, x_test): data2 = pd.read_csv("/tmp/predict_result.csv", header=0) # vamos percorrer o arquivo com o valor a ser testado, onde vamos pegar as colunas e jogar os valores numa array cols2 = data2.columns[(data2.columns != columnResultName)] fts2 = data2[cols2] fts2 = Normalizer().fit_transform(fts2) scores = cross_val_score(logisticR, x_train, y_train, n_jobs=30) print("scores cross val") print(scores) logisticR.fit(x_train, y_train) dump(logisticR, 'logistic.model') logisticLoaded = load('logistic.model') prFit = logisticLoaded.predict(x_test) print("predicao:", prFit) print("Matriz de Confusao LR:") print(cfm(y_test, prFit)) print("F1 score LR:") print(f1s(y_test, prFit)) print("Precision score LR:") print(ps(y_test, prFit)) print("Recall score LR:") print(rs(y_test, prFit)) print("Classification Report") print(cr(y_test, prFit)) print("Accuracy score") print(asc(y_test, prFit)) class_names = [0, 1] # name of classes fig, ax = plt.subplots() tick_marks = np.arange(len(class_names)) plt.xticks(tick_marks, class_names) plt.yticks(tick_marks, class_names) # create heatmap sns.heatmap(pd.DataFrame(cfm(y_test, prFit)), annot=True, cmap="YlGnBu", fmt='g') ax.xaxis.set_label_position("top") plt.tight_layout() plt.title('Confusion matrix', y=1.1) plt.ylabel('Actual label') plt.xlabel('Predicted label') plt.show() y_pred_proba = logisticLoaded.predict_proba(x_test)[::, 1] fpr, tpr, _ = metrics.roc_curve(y_test, y_pred_proba) auc = metrics.roc_auc_score(y_test, y_pred_proba) plt.plot(fpr, tpr, label="data 1, auc=" + str(auc)) plt.legend(loc=4) plt.show() pr1 = logisticLoaded.predict(fts2) print("predico unica", pr1) return pr1
def predictResult(x_train, y_train, y_test, x_test): data2 = pd.read_csv("/tmp/predict_result.csv", header=0) # vamos percorrer o arquivo com o valor a ser testado, onde vamos pegar as colunas e jogar os valores numa array cols2 = data2.columns[(data2.columns != columnResultName)] fts2 = data2[cols2] fts2 = Normalizer().fit_transform(fts2) randomForest.fit(x_train, y_train) dump(randomForest, 'randomForest.model') randomForestLoaded = load('randomForest.model') prFit = randomForestLoaded.predict(x_test) print("predicao:", prFit) print("Matriz de Confusao LR:") print(cfm(y_test, prFit)) print("F1 score LR:") print(f1s(y_test, prFit)) print("Precision score LR:") print(ps(y_test, prFit)) print("Recall score LR:") print(rs(y_test, prFit)) print("Classification Report") print(cr(y_test, prFit)) pr1 = randomForestLoaded.predict(fts2) print("predico unica", pr1) return pr1
def predictResult(betterN, x_train, y_train, y_test, x_test): data2 = pd.read_csv("/tmp/predict_result.csv", header=0) # vamos percorrer o arquivo com o valor a ser testado, onde vamos pegar as colunas e jogar os valores numa array cols2 = data2.columns[(data2.columns != columnResultName)] fts2 = np.array(data2[cols2]) #quando nao mandar um vaor de betterN, significa que demos o load do modelo if betterN > 0: knn.n_neighbors = betterN knn.fit(x_train, y_train) # dump(knn, 'models/knn_teste.joblib') prFit = knn.predict(x_test) print("predicao: a", prFit) print("Matriz de Confusao NB:") print(cfm(y_test, prFit)) print("F1 score NB:") print(f1s(y_test, prFit)) print("Precision score NB:") print(ps(y_test, prFit)) print("Recall score NB:") print(rs(y_test, prFit)) print("Classification Report") print(cr(y_test, prFit)) pr1 = knn.predict(fts2) print("predico unica", int(pr1[0])) print("predicao unica score") print(pr1) return pr1
def confusion_matrix(y_valid, y_pred): confusion_matrix = cfm(y_valid, y_pred) sensitivity = confusion_matrix[0, 0] / (confusion_matrix[0, 0] + confusion_matrix[1, 0]) specificity = confusion_matrix[1, 1] / (confusion_matrix[1, 1] + confusion_matrix[0, 1]) print("sensitivity = {}/({}+{}) = {}".format(confusion_matrix[0, 0], confusion_matrix[0, 0], confusion_matrix[1, 0], sensitivity)) print("specificity = {}/({}+{}) = {}".format(confusion_matrix[1, 1], confusion_matrix[1, 1], confusion_matrix[0, 1], specificity))
def confusion_matrix(truth, predictions): conf_mat = cfm(truth, predictions) return conf_mat
save_best_only=True, mode='max', period=2) tensorboard = TensorBoard(log_dir='./logs', batch_size=batch_size) callbacks_list = [checkpoint, tensorboard] epochs = 25 steps_per_epoch = 100 number_of_validation_batches = generator_validation.n / batch_size history = new_model.fit_generator( generator=generator_train, epochs=epochs, steps_per_epoch=steps_per_epoch, validation_data=generator_validation, validation_steps=number_of_validation_batches, callbacks=callbacks_list) acc = history.history['categorical_accuracy'] #create dataloader fr this one. prediction = new_model.predict_generator(generator_test) print(type(prediction), type(test_ID_list)) pred_name = np.argmax(prediction, axis=1) mat = cfm(generator_test.classes, pred_name) print("confusion matrix") print(mat) true = 0.0 for i in range(len(mat)): true += mat[i][i] acc = true / len(test_ID_list) print("test accuracy", acc)
for (index, replacement) in zip(unique, range(len(unique))): ID_list[ID_list == index] = replacement return imgs, ID_list # #dataArrays test_image_array, test_ID_list = parse(test_dir) train_image_array, train_ID_list = parse(train_dir) print("arrays created") # #get output from VGG layer_name = 'fc2' fc2_layer_model = Model(inputs=model.input, outputs=model.get_layer(layer_name).output) #get train and test features feature = fc2_layer_model.predict(train_image_array) testfeature = fc2_layer_model.predict(test_image_array) print("features created") #SVM fitting scaler = StandardScaler() scaler.fit(feature) feature = scaler.transform(feature) testfeature = scaler.transform(testfeature) svc = LinearSVC(random_state=5) svc.fit(testfeature, test_ID_list) ycap = svc.predict(testfeature) print(cfm(test_ID_list, ycap)) print('test score is :', svc.score(testfeature, test_ID_list))
print( cr(test_label.argmax(axis=1), prediction_1.argmax(axis=1), target_names=label_names)) # Classification Report # Training and Validation Curves training_and_validation_accuracy(result_1) training_and_validation_loss(result_1) # Confusion Matrix Visualization prediction_class_1 = np.argmax( prediction_1, axis=1) # Convert predictions classes to one hot vectors test_label_cfm = np.argmax( test_label, axis=1) # Convert validation observations to one hot vectors confusion_mtx = cfm(test_label_cfm, prediction_class_1) # Compute the confusion matrix plot_confusion_matrix(confusion_mtx, classes=label_names) # Plot the confusion matrix random_test_images(model_1) else: model_2, result_2 = optimize(cnn_model_two()) # Train CNN Model 2 prediction_2 = model_2.predict(test_data) print("Evaluate Test") model_2.evaluate(test_data, test_label) print( cr(test_label.argmax(axis=1), prediction_2.argmax(axis=1), target_names=label_names)) # Classification Report
else: print("No aspect/opinion pairs") if has_outliers: outliers_dic[stars] += 1 print( "----------------------------------------------------------------------------------------------------------" ) print( "----------------------------------------------------------------------------------------------------------" ) input() y_true = [p[0] for p in predictions] y_pred = [p[1] for p in predictions] cm = cfm(y_true, y_pred) labels = ['NEG', "NEU", "POS"] f = seaborn.heatmap(cm, annot=True, xticklabels=labels, yticklabels=labels, fmt='g') print("Total reviews: " + str(len(revs))) print("Predicted: " + str(len(predictions))) print(metrics.classification_report(y_true, y_pred)) inc_revs = {"revs": incoherent_revs} print("Incoherent reviews: " + str(len(incoherent_revs))) print("Outliers Stats: ", outliers_dic)
labels, test_size=0.25, shuffle=True, stratify=labels, random_state=5) print("number of training data points: ", x_train.shape[0]) print('number of validation data points: ', x_val.shape[0]) #Building and Fitting a simple Logistic Regression Model on training data log_reg = LogisticRegression() log_reg.fit(x_train, y_train) #Results of Logistic Regression model y_pred = log_reg.predict(x_val) print('accuracy: ', log_reg.score(x_val, y_val)) print('f1_score: ', f1_score(y_val, y_pred)) print('confusion matrix: ') print(cfm(y_val, y_pred)) #Using the trained regression model for predicting likelihood score on test data prediction = log_reg.predict_proba(test_vectorised) model_predictions = pd.DataFrame({ 'news': test_news, 'virality_likelihood': prediction[:, 1] * 100 }) model_predictions.head() #Saving predictions to csv file model_predictions.to_csv('predicted_virality.csv')
pred_arr = np.array([]) real_df = pd.read_csv('id_label_val.csv') pred_df = pd.read_csv('output.csv') real_arr = real_df.LABEL.values for i in range(3595, 4793): temp1 = pred_df[pred_df.ID == i] if len(temp1[temp1.LABEL == 1]) > 0: pred_arr = np.append(pred_arr, 1) else: pred_arr = np.append(pred_arr, 0) fpr, tpr, _ = roc_curve(real_arr, pred_arr) roc_auc = auc(fpr, tpr) # Plot of a ROC curve for a specific class plt.figure() plt.plot(fpr, tpr, label='ROC curve (area = %0.2f)' % roc_auc) plt.plot([0, 1], [0, 1], 'k--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic example') plt.legend(loc="lower right") plt.show() # conf matrix print cfm(real_arr, pred_arr) # print clr(real_arr, pred_arr)
def evaluate(params, mode="test", confusion=False): if mode == "valid": data_dir = ENV.valid_dir num_batches = ENV.num_valid_batches pattern = "valid" elif mode == "test": data_dir = ENV.test_dir num_batches = ENV.num_test_batches pattern = "test" else: raise ("Evaluate doesn't know passed mode") with tf.variable_scope("train"): x, y = create_data_reader(data_dir, mode="eval", batch_size=params.batch_size, pattern=pattern) rn = build_model(x, y, params, mode="eval") save_to = os.path.join(ENV.save_to_dir, mode) model_dir = os.path.join(ENV.save_to_dir, "train") saver = tf.train.Saver() writer = tf.summary.FileWriter(save_to) config = tf.ConfigProto(allow_soft_placement=True) sess = tf.Session(config=config) tf.train.start_queue_runners(sess) while True: #time.sleep(30) checkpoint = tf.train.get_checkpoint_state(model_dir) if not checkpoint or not checkpoint.model_checkpoint_path: tf.logging.error("Model not found {0}".format(model_dir)) continue tf.logging.info("Start loading checkpoint at {0}".format(model_dir)) saver.restore(sess, checkpoint.model_checkpoint_path) correct, total, total_loss = 0.0, 0.0, 0.0 cfm_pred = [] cfm_true = [] for i in range(num_batches): run = [ rn.inference, rn.labels, rn.loss, rn.global_step, rn.summary ] pred, true, loss, step, summary = sess.run(run) pred = np.argmax(pred, axis=1) true = np.argmax(true, axis=1) if confusion: cfm_pred = np.append(cfm_pred, pred) cfm_true = np.append(cfm_true, true) correct += np.sum(true == pred) total_loss += loss total += pred.shape[0] precision = correct / total summary_precision = tf.Summary() summary_precision.value.add(tag=mode + "_precision", simple_value=precision) writer.add_summary(summary_precision, step) summary_loss = tf.Summary() total_loss /= num_batches summary_loss.value.add(tag=mode + "_loss", simple_value=total_loss) writer.add_summary(summary_loss, step) msg = "{0}_precision: {1:.5f}, {0}_loss: {2:.5f}" tf.logging.info(msg.format(mode, precision, total_loss)) writer.flush() if confusion: confusion_matrix = cfm(cfm_true, cfm_pred) cfm_path = ENV.save_to_dir + "/confusion_matrix.p" with open(cfm_path, "wb") as cfm_file: pickle.dump(confusion_matrix, cfm_file) break
from sklearn.metrics import confusion_matrix as cfm with open("output_dev.txt") as f: data = f.readlines() pred = [] y = [] label = {"drug":1,"person":2,"place":3,"movie":4,"company":5} for line in data: strs = line.split() for s in strs: if "guess=" in s: pred.append(label[s.replace("guess=","")]) if "gold=" in s: y.append(label[s.replace("gold=","")]) print cfm(y,pred) index = [i for i,e in enumerate(y) if e!=pred[i]] for idx in index: print data[idx].strip()
neighbors = [3, 5, 7, 13, 1] for n in range(0, 5): print "Quantidade Vizinhos:", neighbors[n] knn3 = KNeighborsClassifier(n_neighbors=neighbors[n]) knn3.fit(x_train, y_train) print "Accuracy Training KNN:", knn3.score(x_train, y_train) predictions = knn3.predict(x_test) accuracy = metrics.accuracy_score(y_test, predictions) print "Accuracy Test KNN:", accuracy print "Matriz de Confusao KNN:" print cfm(y_test, predictions) print "F1 score KNN:" print f1s(y_test, predictions) print "Precision score KNN:" print ps(y_test, predictions) print "Recall score KNN:" print rs(y_test, predictions) #svm kernel linear svm = svm.SVC(kernel='linear', C=1.0) svm.fit(x_train, y_train) predictionsSvm = svm.predict(x_test) accuracySvm = metrics.accuracy_score(predictionsSvm, y_test)