def svm_func(train_A, words_of_tweets, extra_features, feature_selection, encoding, print_file): reading = Twitter_Depression_Detection.Reader() # Import the Twitter_Depression_Detection.py file, to get the encoding print('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') print(words_of_tweets) x = np.array(words_of_tweets) y = train_A['label'] # Initialize the roc-auc score running average list # Initialize a count to print the number of folds # Initialize metrics to print their average av_roc = 0. count = 0 precision = 0 accuracy = 0 recall = 0 f1score = 0 # Below 3 variables are used for ROC-AUC curve tprs = [] aucs = [] mean_fpr = np.linspace(0, 1, 100) # Initialize your 10 - cross vailidation # Set shuffle equals True to randomize your splits on your training data kf = KFold(n_splits=10, random_state=41, shuffle=True) # Set up for loop to run for the number of cross vals you defined in your parameter for train_index, test_index in kf.split(x): count += 1 print('Fold #: ', count) with open(print_file, "a") as myfile: # Write above print into output file myfile.write('Fold #: ' + str(count) + '\n') # This indexs your train and test data for your cross validation and sorts them in random order, since we used shuffle equals True x_train, x_test = reading.get_enc(x[train_index], 1, y[train_index], train_index, extra_features, feature_selection, encoding, print_file), reading.get_enc(x[test_index], 0, y[test_index], test_index, extra_features, feature_selection, encoding, print_file) y_train, y_test = y[train_index], y[test_index] # Assumed you have, X (predictor) and Y (target) for training data set and x_test(predictor) of test_dataset # Create SVM classification object # For very large C, the margin is hard, and points cannot lie in it. For smaller C, the margin is softer, and can grow to encompass some points. # gamma: Higher the value of gamma, will try to exact fit the training data set i.e.generalization error and cause over-fitting problem. model = naive_bayes.GaussianNB() ####################################################################################################################### # Feature Scaling minMaxScaler = MinMaxScaler(feature_range=(0, 1)) # Get points and discard classification labels #x_train = minMaxScaler.fit_transform(x_train) #x_test = minMaxScaler.transform(x_test) ####################################################################################################################### oversample = SMOTE(sampling_strategy='minority', k_neighbors=10, random_state=0) model.fit(x_train, y_train) return model ####################################################################################################################### # Visualization of normal and oversampled data '''visualize_data(x_train, y_train, "Normal Dataset")''' # 'minority': resample only the minority class; x_train, y_train = oversample.fit_resample(x_train, y_train) '''visualize_data(x_train, y_train, "Oversampled Dataset")''' ####################################################################################################################### model.score(x_train, y_train) # Predict Output y_pred = model.predict(x_test) #return model ####################################################################################################################### # Your model is fit. Time to predict our output and test our training data print("Evaluating model...") with open(print_file, "a") as myfile: # Write above print into output file myfile.write("Evaluating model..." + '\n') #roc = roc_auc_score(y_test, y_pred) # Print your ROC-AUC score for your kfold, and the running score average #print('ROC: ', roc) #av_roc += roc #print('Continued Avg: ', av_roc / count) #with open(print_file, "a") as myfile: # Write above print into output file #myfile.write('ROC: ' + str(Continued Avg: ' + str(av_roc / count) + '\n') #y_pred = (y_pred > 0.5) # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ''' # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(y_test, y_pred) tprs.append(interp(mean_fpr, fpr, tpr)) tprs[-1][0] = 0.0 roc_auc = auc(fpr, tpr) aucs.append(roc_auc) plt.plot(fpr, tpr, lw=1, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (count - 1, roc_auc)) ''' # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ''' # Creating the Confusion Matrix cm = confusion_matrix(y_test, y_pred) print(cm) with open(print_file, "a") as myfile: # Write above print into output file myfile.write(str(cm) + '\n') ''' print(y_pred) temp_accuracy = accuracy_score(y_test, y_pred) temp_precision, temp_recall, temp_f1_score, _ = precision_recall_fscore_support(y_test, y_pred, average='macro') accuracy += temp_accuracy precision += temp_precision recall += temp_recall f1score += temp_f1_score print("Accuracy: ", temp_accuracy) print("Precision: ", temp_precision) print("Recall: ", temp_recall) print("F1 score: ", temp_f1_score) print(metrics.classification_report(y_test,y_pred)) # Create ROC-AUC curve # compute_ROC_Curve(tprs, mean_fpr, aucs) ########################################################################################################################## # Print average of metrics print("Average Precision: ", precision / 10) print("Average Accuracy: ", accuracy / 10) print("Average Recall: ", recall / 10) print("Average F1-score: ", f1score / 10) # Print your final average ROC-AUC score and organize your models predictions in a dataframe #print('Average ROC:', av_roc / 10) with open(print_file, "a") as myfile: # Write above print into output file myfile.write("Average Precision: " + str(precision / 10) + '\n' + "Average Accuracy: " + str(accuracy / 10) + '\n' + "Average Recall: " + str(recall / 10) + '\n' + "Average F1-score: " + str(f1score / 10) + '\n' + 'Average ROC:' + str(av_roc / 10) + '\n')
def svm_func2(model2, train_A, words_of_tweets, extra_features, feature_selection, encoding, print_file): reading = Twitter_Depression_Detection.Reader() # Import the Twitter_Depression_Detection.py file, to get the encoding x = np.array(words_of_tweets) y = train_A['label'] # Initialize the roc-auc score running average list # Initialize a count to print the number of folds # Initialize metrics to print their average av_roc = 0. count = 0 precision = 0 accuracy = 0 recall = 0 f1score = 0 # Below 3 variables are used for ROC-AUC curve tprs = [] aucs = [] mean_fpr = np.linspace(0, 1, 100) # Initialize your 10 - cross vailidation # Set shuffle equals True to randomize your splits on your training data kf = KFold(n_splits=10, random_state=7, shuffle=True) print(x.size) # Set up for loop to run for the number of cross vals you defined in your parameter for train_index, test_index in kf.split(x): count += 1 print('Fold #: ', count) print(train_index) print(test_index) with open(print_file, "a") as myfile: # Write above print into output file myfile.write('Fold #: ' + str(count) + '\n') # This indexs your train and test data for your cross validation and sorts them in random order, since we used shuffle equals True x_train, x_test = reading.get_enc(x[train_index], 1, y[train_index], train_index, extra_features, feature_selection, encoding, print_file), reading.get_enc(x[test_index], 0, y[test_index], test_index, extra_features, feature_selection, encoding, print_file) y_train, y_test = y[train_index], y[test_index] x_train = model2.predict_proba(x_train) x_test = model2.predict_proba(x_test) # Assumed you have, X (predictor) and Y (target) for training data set and x_test(predictor) of test_dataset # Create SVM classification object # For very large C, the margin is hard, and points cannot lie in it. For smaller C, the margin is softer, and can grow to encompass some points. # gamma: Higher the value of gamma, will try to exact fit the training data set i.e.generalization error and cause over-fitting problem. model = svm.SVC(kernel='rbf', C=100, gamma=0.1) ####################################################################################################################### # Feature Scaling minMaxScaler = MinMaxScaler(feature_range=(0, 1)) # Get points and discard classification labels x_train = minMaxScaler.fit_transform(x_train) x_test = minMaxScaler.transform(x_test) ####################################################################################################################### model.fit(x_train, y_train) model.score(x_train, y_train) # Predict Output y_pred = model.predict(x_test) #return model ####################################################################################################################### # Your model is fit. Time to predict our output and test our training data print("Evaluating model...") with open(print_file, "a") as myfile: # Write above print into output file myfile.write("Evaluating model..." + '\n') #roc = roc_auc_score(y_test, y_pred) # Print your ROC-AUC score for your kfold, and the running score average #print('ROC: ', roc) #av_roc += roc #print('Continued Avg: ', av_roc / count) #with open(print_file, "a") as myfile: # Write above print into output file #myfile.write('ROC: ' + str(Continued Avg: ' + str(av_roc / count) + '\n') #y_pred = (y_pred > 0.5) # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ''' # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(y_test, y_pred) tprs.append(interp(mean_fpr, fpr, tpr)) tprs[-1][0] = 0.0 roc_auc = auc(fpr, tpr) aucs.append(roc_auc) plt.plot(fpr, tpr, lw=1, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (count - 1, roc_auc)) ''' # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ''' # Creating the Confusion Matrix cm = confusion_matrix(y_test, y_pred) print(cm) with open(print_file, "a") as myfile: # Write above print into output file myfile.write(str(cm) + '\n') ''' print(y_pred) temp_accuracy = accuracy_score(y_test, y_pred) temp_precision, temp_recall, temp_f1_score, _ = precision_recall_fscore_support(y_test, y_pred, average='macro') accuracy += temp_accuracy precision += temp_precision recall += temp_recall f1score += temp_f1_score print("Accuracy: ", temp_accuracy) print("Precision: ", temp_precision) print("Recall: ", temp_recall) print("F1 score: ", temp_f1_score) # ============================================================================= # Plot HEATMAP # ============================================================================= '''plt.title('SVM - Confusion Matrix ' '\n[Accuracy = %0.2f, Recall = %0.2f, Precision = %0.2f, F1-Score = %0.2f] ' '\nTrue Positive = %d, False Positive = %d ' '\nFalse Negative = %d, True Negative = %d]' % ( temp_accuracy * 100, temp_recall * 100, temp_precision * 100, temp_f1_score * 100, cm[0][0], cm[0][1], cm[1][0], cm[1][1])) sns.heatmap(cm, cmap='Oranges', # Color of heatmap annot=True, fmt="d", # Enables values inside the heatmap boxes and sets that are integer values with fmt="d" cbar=False, # Delete the heat bar (shows the numbers corresponding to colors) xticklabels=["depression", "no depression"], yticklabels=["depression", "no depression"] # Name the x and y value labels ).tick_params(left=False, bottom=False) # Used to delete dash from name values of axis x and y # Fix a bug where heatmap top and bottom boxes are cut off b, t = plt.ylim() # discover the values for bottom and top b += 0.5 # Add 0.5 to the bottom t -= 0.5 # Subtract 0.5 from the top plt.ylim(b, t) # update the ylim(bottom, top) values plt.xlabel('True output') plt.ylabel('Predicted output') plt.show() ''' # ============================================================================= # Create ROC-AUC curve # compute_ROC_Curve(tprs, mean_fpr, aucs) ########################################################################################################################## # Print average of metrics print("Average Precision: ", precision / 10) print("Average Accuracy: ", accuracy / 10) print("Average Recall: ", recall / 10) print("Average F1-score: ", f1score / 10) # Print your final average ROC-AUC score and organize your models predictions in a dataframe #print('Average ROC:', av_roc / 10) with open(print_file, "a") as myfile: # Write above print into output file myfile.write("Average Precision: " + str(precision / 10) + '\n' + "Average Accuracy: " + str(accuracy / 10) + '\n' + "Average Recall: " + str(recall / 10) + '\n' + "Average F1-score: " + str(f1score / 10) + '\n' + 'Average ROC:' + str(av_roc / 10) + '\n')
def Bayes(train_A, words_of_tweets, extra_features, feature_selection, encoding, print_file): reading = Twitter_Depression_Detection.Reader( ) # Import the ClassRead.py file, to get the encoding x = np.array(words_of_tweets) y = train_A['label'] # Initialize the roc-auc score running average list # Initialize a count to print the number of folds # Initialize metrics to print their average av_roc = 0. count = 0 precision = 0 accuracy = 0 recall = 0 f1score = 0 # Above 3 variables are used for ROC-AUC curve tprs = [] aucs = [] mean_fpr = np.linspace(0, 1, 100) # Initialize your 10 - cross vailidation # Set shuffle equals True to randomize your splits on your training data kf = KFold(n_splits=10, random_state=41, shuffle=True) # Set up for loop to run for the number of cross vals you defined in your parameter for train_index, test_index in kf.split(x): count += 1 print('Fold #: ', count) with open(print_file, "a") as myfile: # Write above print into output file myfile.write('Fold #: ' + str(count) + '\n') # This indexs your train and test data for your cross validation and sorts them in random order, since we used shuffle equals True x_train, x_test = reading.get_enc(x[train_index], 1, y[train_index], train_index, extra_features, feature_selection, encoding, print_file), reading.get_enc( x[test_index], 0, y[test_index], test_index, extra_features, feature_selection, encoding, print_file) y_train, y_test = y[train_index], y[test_index] ####################################################################################################################### model = GaussianNB() ####################################################################################################################### # 'minority': resample only the minority class; oversample = SMOTE(sampling_strategy='minority', k_neighbors=10, random_state=0) x_train, y_train = oversample.fit_resample(x_train, y_train) # Fit Gaussian Naive Bayes according to x, y # Make a prediction using the Naive Bayes Model model.fit( x_train, y_train ) # x : array-like, shape (n_samples, n_features) Training vectors, where n_samples is the number of samples and n_features is the number of features. # y : array-like, shape (n_samples,) Target values. y_pred = model.predict(x_test) ####################################################################################################################### # Your model is fit. Time to predict our output and test our training data print("Evaluating model...") with open(print_file, "a") as myfile: # Write above print into output file myfile.write("Evaluating model..." + '\n') roc = roc_auc_score(y_test, y_pred) # Print your ROC-AUC score for your kfold, and the running score average print('ROC: ', roc) av_roc += roc print('Continued Avg: ', av_roc / count) with open(print_file, "a") as myfile: # Write above print into output file myfile.write('ROC: ' + str(roc) + '\n' + 'Continued Avg: ' + str(av_roc / count) + '\n') y_pred = (y_pred > 0.5) # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ''' # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(y_test, y_pred) tprs.append(interp(mean_fpr, fpr, tpr)) tprs[-1][0] = 0.0 roc_auc = auc(fpr, tpr) aucs.append(roc_auc) plt.plot(fpr, tpr, lw=1, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (count - 1, roc_auc)) ''' # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Creating the Confusion Matrix cm = confusion_matrix(y_test, y_pred) print(cm) with open(print_file, "a") as myfile: # Write above print into output file myfile.write(str(cm) + '\n') temp_accuracy = accuracy_score(y_test, y_pred) temp_precision, temp_recall, temp_f1_score, _ = precision_recall_fscore_support( y_test, y_pred, average='binary') accuracy += temp_accuracy precision += temp_precision recall += temp_recall f1score += temp_f1_score print("Accuracy: ", temp_accuracy) print("Precision: ", temp_precision) print("Recall: ", temp_recall) print("F1 score: ", temp_f1_score) # Create ROC-AUC curve # compute_ROC_Curve(tprs, mean_fpr, aucs) # Print average of metrics print("Average Precision: ", precision / 10) print("Average Accuracy: ", accuracy / 10) print("Average Recall: ", recall / 10) print("Average F1-score: ", f1score / 10) # Print your final average ROC-AUC score and organize your models predictions in a dataframe print('Average ROC:', av_roc / 10) with open(print_file, "a") as myfile: # Write above print into output file myfile.write("Average Precision: " + str(precision / 10) + '\n' + "Average Accuracy: " + str(accuracy / 10) + '\n' + "Average Recall: " + str(recall / 10) + '\n' + "Average F1-score: " + str(f1score / 10) + '\n' + 'Average ROC:' + str(av_roc / 10) + '\n')
def conv1d_class(train_A, words_of_tweets, extra_features, feature_selection, encoding, print_file): reading = Twitter_Depression_Detection.Reader( ) # Import the ClassRead.py file, to get the encoding # fix random seed for reproducibility numpy.random.seed(7) x = np.array(words_of_tweets) y = train_A['label'] # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Initialize the roc-auc score running average list # Initialize a count to print the number of folds # Initialize metrics to print their average av_roc = 0. count = 0 precision = 0 accuracy = 0 recall = 0 f1score = 0 # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Initialize your 10 - cross vailidation # Set shuffle equals True to randomize your splits on your training data kf = KFold(n_splits=10, random_state=41, shuffle=True) # Set up for loop to run for the number of cross vals you defined in your parameter for train_index, test_index in kf.split(x): count += 1 print('Fold #: ', count) with open(print_file, "a") as myfile: # Write above print into output file myfile.write('Fold #: ' + str(count) + '\n') # This indexs your train and test data for your cross validation and sorts them in random order, since we used shuffle equals True x_train, x_test = reading.get_enc(x[train_index], 1, y[train_index], train_index, extra_features, feature_selection, encoding, print_file), reading.get_enc( x[test_index], 0, y[test_index], test_index, extra_features, feature_selection, encoding, print_file) y_train, y_test = y[train_index], y[test_index] # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Initializing Neural Network classifier = Sequential() # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # 'minority': resample only the minority class; oversample = SMOTE(sampling_strategy='minority', k_neighbors=10, random_state=0) x_train, y_train = oversample.fit_resample(x_train, y_train) # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- print(x_train.shape[0], ' ', x_train.shape[1]) print(x_test.shape[0], ' ', x_test.shape[1]) x_train = x_train.reshape(x_train.shape[0], 1, x_train.shape[1]) x_test = x_test.reshape(x_test.shape[0], 1, x_test.shape[1]) classifier.add( Dense(20, kernel_initializer='glorot_uniform', activation='softsign', kernel_constraint=maxnorm(2), input_shape=(1, x_train.shape[2]))) classifier.add( Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')) classifier.add(Dropout(0.2)) classifier.add( Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')) classifier.add( Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')) classifier.add( Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')) classifier.add( Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')) classifier.add(Dropout(0.2)) classifier.add(GlobalAveragePooling1D()) classifier.add( Dense(500, kernel_initializer='glorot_uniform', activation='softsign', kernel_constraint=maxnorm(2))) # Adding the output layer with 1 output classifier.add( Dense(1, kernel_initializer='glorot_uniform', activation='sigmoid')) optimizer = RMSprop(lr=0.001) # Compiling Neural Network classifier.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy']) # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=2, verbose=0, mode='auto') # Fitting our model classifier.fit(x_train, y_train, batch_size=20, epochs=50) # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Your model is fit. Time to predict our output and test our training data print("Evaluating model...") with open(print_file, "a") as myfile: # Write above print into output file myfile.write("Evaluating model..." + '\n') test_preds = classifier.predict_proba(x_test, verbose=0) roc = roc_auc_score(y_test, test_preds) scores = classifier.evaluate(x_test, y_test) print(scores) # Print your model summary print(classifier.summary()) # Print your ROC-AUC score for your kfold, and the running score average print('ROC: ', roc) av_roc += roc print('Continued Avg: ', av_roc / count) with open(print_file, "a") as myfile: # Write above print into output file myfile.write('Scores: ' + str(scores) + '\n' + 'Classifier summary: ' + str(classifier.summary()) + '\n' + 'ROC: ' + str(roc) + '\n' + 'Continued Avg: ' + str(av_roc / count) + '\n') # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Predicting the Test set results y_pred = classifier.predict(x_test) y_pred = (y_pred > 0.5) # Creating the Confusion Matrix cm = confusion_matrix(y_test, y_pred) print(cm) with open(print_file, "a") as myfile: # Write above print into output file myfile.write(str(cm) + '\n') temp_accuracy = accuracy_score(y_test, y_pred) temp_precision, temp_recall, temp_f1_score, _ = precision_recall_fscore_support( y_test, y_pred, average='binary') accuracy += temp_accuracy precision += temp_precision recall += temp_recall f1score += temp_f1_score print("Accuracy: ", temp_accuracy) print("Precision: ", temp_precision) print("Recall: ", temp_recall) print("F1 score: ", temp_f1_score) # Print average of metrics print("Average Precision: ", precision / 10) print("Average Accuracy: ", accuracy / 10) print("Average Recall: ", recall / 10) print("Average F1-score: ", f1score / 10) # Print your final average ROC-AUC score and organize your models predictions in a dataframe print('Average ROC:', av_roc / 10) with open(print_file, "a") as myfile: # Write above print into output file myfile.write("Average Precision: " + str(precision / 10) + '\n' + "Average Accuracy: " + str(accuracy / 10) + '\n' + "Average Recall: " + str(recall / 10) + '\n' + "Average F1-score: " + str(f1score / 10) + '\n' + 'Average ROC:' + str(av_roc / 10) + '\n')
def K_Neighbors(train_A, words_of_tweets, extra_features, feature_selection, encoding, print_file): reading = Twitter_Depression_Detection.Reader( ) # Import the ClassRead.py file, to get the encoding x = np.array(words_of_tweets) y = train_A['label'] # Initialize the roc-auc score running average list # Initialize a count to print the number of folds # Initialize metrics to print their average av_roc = 0. count = 0 precision = 0 accuracy = 0 recall = 0 f1score = 0 # Above 3 variables are used for ROC-AUC curve tprs = [] aucs = [] mean_fpr = np.linspace(0, 1, 100) # Initialize your 10 - cross vailidation # Set shuffle equals True to randomize your splits on your training data kf = KFold(n_splits=10, random_state=41, shuffle=True) # Set up for loop to run for the number of cross vals you defined in your parameter for train_index, test_index in kf.split(x): count += 1 print('Fold #: ', count) with open(print_file, "a") as myfile: # Write above print into output file myfile.write('Fold #: ' + str(count) + '\n') # This indexs your train and test data for your cross validation and sorts them in random order, since we used shuffle equals True x_train, x_test = reading.get_enc(x[train_index], 1, y[train_index], train_index, extra_features, feature_selection, encoding, print_file), reading.get_enc( x[test_index], 0, y[test_index], test_index, extra_features, feature_selection, encoding, print_file) y_train, y_test = y[train_index], y[test_index] ####################################################################################################################### # leaf_size: int, optional(default=30) # p : integer, optional (default = 2) # When p = 1, this is equivalent to using manhattan_distance (l1), # and euclidean_distance (l2) for p = 2. # For arbitrary p, minkowski_distance (l_p) is used. # algorithm : {‘auto’, ‘ball_tree’, ‘kd_tree’, ‘brute’}, optional Algorithm used to compute the nearest neighbors: # ‘ball_tree’ will use BallTree # ‘kd_tree’ will use KDTree # ‘brute’ will use a brute-force search. # ‘auto’ will attempt to decide the most appropriate algorithm based on the values passed to fit method. # weights : str or callable, optional (default = ‘uniform’) weight function used in prediction. Possible values: # ‘uniform’ : uniform weights. All points in each neighborhood are weighted equally. # ‘distance’ : weight points by the inverse of their distance. in this case, closer neighbors of a query point will have a greater influence than neighbors which are further away. scaler = Normalizer() x_train = scaler.fit_transform(x_train) x_test = scaler.transform(x_test) classifier = KNeighborsClassifier(n_neighbors=40) # 'minority': resample only the minority class; oversample = SMOTE(sampling_strategy='minority', k_neighbors=10, random_state=0) x_train, y_train = oversample.fit_resample(x_train, y_train) classifier.fit(x_train, y_train) y_pred = classifier.predict(x_test) ####################################################################################################################### # Your model is fit. Time to predict our output and test our training data print("Evaluating model...") with open(print_file, "a") as myfile: # Write above print into output file myfile.write("Evaluating model..." + '\n') roc = roc_auc_score(y_test, y_pred) # Print your ROC-AUC score for your kfold, and the running score average print('ROC: ', roc) av_roc += roc print('Continued Avg: ', av_roc / count) with open(print_file, "a") as myfile: # Write above print into output file myfile.write('ROC: ' + str(roc) + '\n' + 'Continued Avg: ' + str(av_roc / count) + '\n') # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ''' # Compute ROC curve and area the curve fpr, tpr, thresholds = roc_curve(y_test, y_pred) tprs.append(interp(mean_fpr, fpr, tpr)) tprs[-1][0] = 0.0 roc_auc = auc(fpr, tpr) aucs.append(roc_auc) plt.plot(fpr, tpr, lw=1, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (count - 1, roc_auc)) ''' # ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- y_pred = (y_pred > 0.5) # Creating the Confusion Matrix cm = confusion_matrix(y_test, y_pred) print(cm) with open(print_file, "a") as myfile: # Write above print into output file myfile.write(str(cm) + '\n') report = classification_report(y_test, y_pred) print(report) temp_accuracy = accuracy_score(y_test, y_pred) temp_precision, temp_recall, temp_f1_score, _ = precision_recall_fscore_support( y_test, y_pred, average='binary') accuracy += temp_accuracy precision += temp_precision recall += temp_recall f1score += temp_f1_score print("Accuracy: ", temp_accuracy) print("Precision: ", temp_precision) print("Recall: ", temp_recall) print("F1 score: ", temp_f1_score) # Create ROC-AUC curve # compute_ROC_Curve(tprs, mean_fpr, aucs) # Print average of metrics print("Average Precision: ", precision / 10) print("Average Accuracy: ", accuracy / 10) print("Average Recall: ", recall / 10) print("Average F1-score: ", f1score / 10) # Print your final average ROC-AUC score and organize your models predictions in a dataframe print('Average ROC:', av_roc / 10) with open(print_file, "a") as myfile: # Write above print into output file myfile.write("Average Precision: " + str(precision / 10) + '\n' + "Average Accuracy: " + str(accuracy / 10) + '\n' + "Average Recall: " + str(recall / 10) + '\n' + "Average F1-score: " + str(f1score / 10) + '\n' + 'Average ROC:' + str(av_roc / 10) + '\n')