Python ClassRead Examples

Programming Language: Python

Class/Type: ClassRead

Examples at hotexamples.com: 7

Python ClassRead - 7 examples found. These are the top rated real world Python examples of ClassRead extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Reader(7)

Frequently Used Methods

Reader (7)

Example #1

Show file

def Multinomial_NB(train_A, words_of_tweets, extra_features, feature_selection, encoding, print_file):
    reading = ClassRead.Reader()  # Import the ClassRead.py file, to get the encoding

    x = np.array(words_of_tweets)
    y = train_A['label']

    # Initialize the roc-auc score running average list
    # Initialize a count to print the number of folds
    # Initialize metrics to print their average
    av_roc = 0.
    count = 0
    precision = 0
    accuracy = 0
    recall = 0
    f1score = 0

    # Initialize your 10 - cross vailidation
    # Set shuffle equals True to randomize your splits on your training data
    kf = KFold(n_splits=10, random_state=41, shuffle=True)

    # Set up for loop to run for the number of cross vals you defined in your parameter
    for train_index, test_index in kf.split(x):
        count += 1
        print('Fold #: ', count)

        with open(print_file, "a") as myfile: # Write above print into output file
            myfile.write('Fold #: ' + str(count) + '\n')

        # This indexs your train and test data for your cross validation and sorts them in random order, since we used shuffle equals True
        x_train, x_test = reading.get_enc(x[train_index], 1, y[train_index], train_index, extra_features, feature_selection, encoding, print_file), reading.get_enc(x[test_index], 0, y[test_index], test_index, extra_features, feature_selection, encoding, print_file)
        y_train, y_test = y[train_index], y[test_index]

#######################################################################################################################

        model = MultinomialNB()

        # Fit Multinomial Naive Bayes according to x, y
        # Make a prediction using the Multinomial Naive Bayes Model
        model.fit(x_train, y_train) # x : array-like, shape (n_samples, n_features)   Training vectors, where n_samples is the number of samples and n_features is the number of features.
                                    # y : array-like, shape (n_samples,)   Target values.

        y_pred = model.predict(x_test)

#######################################################################################################################

        # Your model is fit. Time to predict our output and test our training data
        print("Evaluating model...")

        with open(print_file, "a") as myfile: # Write above print into output file
            myfile.write("Evaluating model..." + '\n')

        roc = roc_auc_score(y_test, y_pred)

        # Print your ROC-AUC score for your kfold, and the running score average
        print('ROC: ', roc)
        av_roc += roc
        print('Continued Avg: ', av_roc / count)

        with open(print_file, "a") as myfile: # Write above print into output file
            myfile.write('ROC: ' + str(roc) + '\n' + 'Continued Avg: ' + str(av_roc / count) + '\n')

#######################################################################################################################

        y_pred = (y_pred > 0.5)

        # Creating the Confusion Matrix
        cm = confusion_matrix(y_test, y_pred)
        print(cm)

        with open(print_file, "a") as myfile: # Write above print into output file
            myfile.write(str(cm) + '\n')

        temp_accuracy = accuracy_score(y_test, y_pred)
        temp_precision, temp_recall, temp_f1_score, _ = precision_recall_fscore_support(y_test, y_pred,
                                                                                        average='binary')

        accuracy += temp_accuracy
        precision += temp_precision
        recall += temp_recall
        f1score += temp_f1_score

        print("Accuracy: ", temp_accuracy)
        print("Precision: ", temp_precision)
        print("Recall: ", temp_recall)
        print("F1 score: ", temp_f1_score)

    # Print average of metrics
    print("Average Precision: ", precision / 10)
    print("Average Accuracy: ", accuracy / 10)
    print("Average Recall: ", recall / 10)
    print("Average F1-score: ", f1score / 10)

    # Print your final average ROC-AUC score and organize your models predictions in a dataframe
    print('Average ROC:', av_roc / 10)

    with open(print_file, "a") as myfile:  # Write above print into output file
        myfile.write("Average Precision: " + str(precision / 10) + '\n' + "Average Accuracy: " + str(accuracy / 10) + '\n' + "Average Recall: " + str(recall / 10) + '\n' + "Average F1-score: " + str(f1score / 10) + '\n' + 'Average ROC:' + str(av_roc / 10) + '\n')

Example #2

Show file

File: main.py Project: valantiskon/irony-detection

import KNeighbors  # Implements KNeighbors classification
import MultinomialNB  # Implements MultinomialNB classification
import VotingEnsembles  # Implements VotingEnsembles classification
import LSTM  # Implements  LSTM classification
import Conv1D  # Implements Conv1D classification
import os.path

##############################################################################################################################################################
##############################################################################################################################################################

# Main

##############################################################################################################################################################
##############################################################################################################################################################

reading = ClassRead.Reader(
)  # Import the ClassRead.py file, that reads the input and the training sets
dir = os.getcwd()  # Gets the current working directory

##############################################################################################################################################################

# Read input and training file, check if the dataset is imbalanced

##############################################################################################################################################################

reading.readTrain()
#reading.checkImbalance()

##############################################################################################################################################################

# Call all algorithms with different combinations of feature selection and encoding

Example #3

Show file

File: LSTM.py Project: valantiskon/irony-detection

def lstm(train_A, words_of_tweets, extra_features, feature_selection, encoding, print_file):
    reading = ClassRead.Reader()  # Import the ClassRead.py file, to get the encoding

    x = np.array(words_of_tweets)
    y = train_A['label']

# -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

    # Initialize the roc-auc score running average list
    # Initialize a count to print the number of folds
    # Initialize metrics to print their average
    av_roc = 0.
    count = 0
    precision = 0
    accuracy = 0
    recall = 0
    f1score = 0
    # Above 3 variables are used for ROC-AUC curve
    tprs = []
    aucs = []
    mean_fpr = np.linspace(0, 1, 100)

    # fix random seed for reproducibility
    numpy.random.seed(7)


# -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

    # Initialize your 10 - cross vailidation
    # Set shuffle equals True to randomize your splits on your training data
    kf = KFold(n_splits=10, random_state=41, shuffle=True)

    # Set up for loop to run for the number of cross vals you defined in your parameter
    for train_index, test_index in kf.split(x):
        count += 1
        print('Fold #: ', count)

        with open(print_file, "a") as myfile:  # Write above print into output file
            myfile.write('Fold #: ' + str(count) + '\n')

        # This indexs your train and test data for your cross validation and sorts them in random order, since we used shuffle equals True
        x_train, x_test = reading.get_enc(x[train_index], 1, y[train_index], train_index, extra_features, feature_selection, encoding, print_file), reading.get_enc(x[test_index], 0, y[test_index], test_index, extra_features, feature_selection, encoding, print_file)
        y_train, y_test = y[train_index], y[test_index]


# -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

        # Initializing Neural Network
        classifier = Sequential()

        print(x_train.shape[0], ' ', x_train.shape[1])
        print(x_test.shape[0], ' ', x_test.shape[1])
        x_train = x_train.reshape(x_train.shape[0], 1, x_train.shape[1])
        x_test = x_test.reshape(x_test.shape[0], 1, x_test.shape[1])


        classifier.add(LSTM(10, input_shape=(1, x_train.shape[2]), return_sequences=True, activation='softplus'))
        classifier.add(Dropout(0.2))
        classifier.add(LSTM(20, activation='softplus'))
        classifier.add(Dropout(0.2))

        classifier.add(Dense(500, kernel_initializer='glorot_uniform', activation='softsign', kernel_constraint=maxnorm(2)))

        # Adding the output layer with 1 output
        classifier.add(Dense(1, kernel_initializer='glorot_uniform', activation='sigmoid'))

        optimizer = RMSprop(lr=0.001)

        # Compiling Neural Network
        classifier.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])


# -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

        callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=2, verbose=0, mode='auto')

        # Fitting our model
        classifier.fit(x_train, y_train, batch_size=20, epochs=50)

# -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

        # Your model is fit. Time to predict our output and test our training data
        print("Evaluating model...")

        with open(print_file, "a") as myfile:  # Write above print into output file
            myfile.write("Evaluating model..." + '\n')

        test_preds = classifier.predict_proba(x_test, verbose=0)

        roc = roc_auc_score(y_test, test_preds)
        scores = classifier.evaluate(x_test, y_test)
        print(scores)

        # Print your model summary
        print(classifier.summary())

        # Print your ROC-AUC score for your kfold, and the running score average
        print('ROC: ', roc)
        av_roc += roc
        print('Continued Avg: ', av_roc / count)

        with open(print_file, "a") as myfile:  # Write above print into output file
            myfile.write('Scores: ' + str(scores) + '\n' + 'Classifier summary: ' + str(
                classifier.summary()) + '\n' + 'ROC: ' + str(roc) + '\n' + 'Continued Avg: ' + str(
                av_roc / count) + '\n')

# -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        '''
        # Compute ROC curve and area the curve

        fpr, tpr, thresholds = roc_curve(y_test, test_preds)
        tprs.append(interp(mean_fpr, fpr, tpr))
        tprs[-1][0] = 0.0
        roc_auc = auc(fpr, tpr)
        aucs.append(roc_auc)
        plt.plot(fpr, tpr, lw=1, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (count - 1, roc_auc))
        '''
# -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

        # Predicting the Test set results
        y_pred = classifier.predict(x_test)
        y_pred = (y_pred > 0.5)

        # Creating the Confusion Matrix
        cm = confusion_matrix(y_test, y_pred)
        print(cm)

        with open(print_file, "a") as myfile:  # Write above print into output file
            myfile.write(str(cm) + '\n')

        temp_accuracy = accuracy_score(y_test, y_pred)
        temp_precision, temp_recall, temp_f1_score, _ = precision_recall_fscore_support(y_test, y_pred,
                                                                                        average='binary')

        accuracy += temp_accuracy
        precision += temp_precision
        recall += temp_recall
        f1score += temp_f1_score

        print("Accuracy: ", temp_accuracy)
        print("Precision: ", temp_precision)
        print("Recall: ", temp_recall)
        print("F1 score: ", temp_f1_score)



    # Create ROC-AUC curve
#    compute_ROC_Curve(tprs, mean_fpr, aucs)


    # Print average of metrics
    print("Average Precision: ", precision / 10)
    print("Average Accuracy: ", accuracy / 10)
    print("Average Recall: ", recall / 10)
    print("Average F1-score: ", f1score / 10)

    # Print your final average ROC-AUC score and organize your models predictions in a dataframe
    print('Average ROC:', av_roc / 10)

    with open(print_file, "a") as myfile:  # Write above print into output file
        myfile.write("Average Precision: " + str(precision / 10) + '\n' + "Average Accuracy: " + str(
            accuracy / 10) + '\n' + "Average Recall: " + str(recall / 10) + '\n' + "Average F1-score: " + str(
            f1score / 10) + '\n' + 'Average ROC:' + str(av_roc / 10) + '\n')

Example #4

Show file

def Voting_Ensembles(train_A, words_of_tweets, extra_features, feature_selection, encoding, print_file):
    reading = ClassRead.Reader()  # Import the ClassRead.py file, to get the encoding

    x = np.array(words_of_tweets)
    y = train_A['label']

    # Initialize the roc-auc score running average list
    # Initialize a count to print the number of folds
    # Initialize metrics to print their average
    av_roc = 0.
    count = 0
    precision = 0
    accuracy = 0
    recall = 0
    f1score = 0
    # Above 3 variables are used for ROC-AUC curve
    tprs = []
    aucs = []
    mean_fpr = np.linspace(0, 1, 100)

    # Initialize your 10 - cross vailidation
    # Set shuffle equals True to randomize your splits on your training data
    kf = KFold(n_splits=10, random_state=41, shuffle=True)

    # Set up for loop to run for the number of cross vals you defined in your parameter
    for train_index, test_index in kf.split(x):
        count += 1
        print('Fold #: ', count)

        with open(print_file, "a") as myfile: # Write above print into output file
            myfile.write('Fold #: ' + str(count) + '\n')

        # This indexs your train and test data for your cross validation and sorts them in random order, since we used shuffle equals True
        x_train, x_test = reading.get_enc(x[train_index], 1, y[train_index], train_index, extra_features, feature_selection, encoding, print_file), reading.get_enc(x[test_index], 0, y[test_index], test_index, extra_features, feature_selection, encoding, print_file)
        y_train, y_test = y[train_index], y[test_index]

#######################################################################################################################

        class1 = svm.SVC(kernel='rbf', C=10000, gamma=0.1)
        class2 = svm.SVC(kernel='rbf', C=1000, gamma=0.1)
        class3 = svm.SVC(kernel='rbf', C=100, gamma=0.1)
        class4 = svm.SVC(kernel='rbf', C=10, gamma=0.1)
        class5 = KNeighborsClassifier(n_neighbors=140)
        class6 = BernoulliNB()

        model = VotingClassifier(
            estimators=[('svm1', class1), ('svm2', class2), ('svm3', class3), ('svm4', class4), ('kneigh', class5),
                        ('bern', class6)], voting='hard')

#######################################################################################################################

        model.fit(x_train, y_train)
        y_pred = model.predict(x_test)

#######################################################################################################################

        # Your model is fit. Time to predict our output and test our training data
        print("Evaluating model...")

        with open(print_file, "a") as myfile: # Write above print into output file
            myfile.write("Evaluating model..." + '\n')

        y_prob_pred = model.predict(x_test)
        roc = roc_auc_score(y_test, y_prob_pred)

        # Print your ROC-AUC score for your kfold, and the running score average
        print('ROC: ', roc)
        av_roc += roc
        print('Continued Avg: ', av_roc / count)

        with open(print_file, "a") as myfile: # Write above print into output file
            myfile.write('ROC: ' + str(roc) + '\n' + 'Continued Avg: ' + str(av_roc / count) + '\n')



        y_pred = (y_pred > 0.5)

# -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        '''
        # Compute ROC curve and area the curve

        fpr, tpr, thresholds = roc_curve(y_test, y_pred)
        tprs.append(interp(mean_fpr, fpr, tpr))
        tprs[-1][0] = 0.0
        roc_auc = auc(fpr, tpr)
        aucs.append(roc_auc)
        plt.plot(fpr, tpr, lw=1, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (count - 1, roc_auc))
        '''
# -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

        # Creating the Confusion Matrix
        cm = confusion_matrix(y_test, y_pred)
        print(cm)

        with open(print_file, "a") as myfile: # Write above print into output file
            myfile.write(str(cm) + '\n')

        temp_accuracy = accuracy_score(y_test, y_pred)
        temp_precision, temp_recall, temp_f1_score, _ = precision_recall_fscore_support(y_test, y_pred,
                                                                                        average='binary')

        accuracy += temp_accuracy
        precision += temp_precision
        recall += temp_recall
        f1score += temp_f1_score

        print("Accuracy: ", temp_accuracy)
        print("Precision: ", temp_precision)
        print("Recall: ", temp_recall)
        print("F1 score: ", temp_f1_score)


    # Create ROC-AUC curve
#    compute_ROC_Curve(tprs, mean_fpr, aucs)

    # Print average of metrics
    print("Average Precision: ", precision / 10)
    print("Average Accuracy: ", accuracy / 10)
    print("Average Recall: ", recall / 10)
    print("Average F1-score: ", f1score / 10)

    # Print your final average ROC-AUC score and organize your models predictions in a dataframe
    print('Average ROC:', av_roc / 10)

    with open(print_file, "a") as myfile:  # Write above print into output file
        myfile.write("Average Precision: " + str(precision / 10) + '\n' + "Average Accuracy: " + str(accuracy / 10) + '\n' + "Average Recall: " + str(recall / 10) + '\n' + "Average F1-score: " + str(f1score / 10) + '\n' + 'Average ROC:' + str(av_roc / 10) + '\n')

Example #5

Show file

def svm_func(train_A, words_of_tweets, extra_features, feature_selection,
             encoding, print_file):
    reading = ClassRead.Reader(
    )  # Import the ClassRead.py file, to get the encoding

    x = np.array(words_of_tweets)
    y = train_A['label']

    # Initialize the roc-auc score running average list
    # Initialize a count to print the number of folds
    # Initialize metrics to print their average
    av_roc = 0.
    count = 0
    precision = 0
    accuracy = 0
    recall = 0
    f1score = 0
    # Below 3 variables are used for ROC-AUC curve
    tprs = []
    aucs = []
    mean_fpr = np.linspace(0, 1, 100)

    # Initialize your 10 - cross vailidation
    # Set shuffle equals True to randomize your splits on your training data
    kf = KFold(n_splits=10, random_state=41, shuffle=True)

    # Set up for loop to run for the number of cross vals you defined in your parameter
    for train_index, test_index in kf.split(x):
        count += 1
        print('Fold #: ', count)

        with open(print_file,
                  "a") as myfile:  # Write above print into output file
            myfile.write('Fold #: ' + str(count) + '\n')

        # This indexs your train and test data for your cross validation and sorts them in random order, since we used shuffle equals True
        x_train, x_test = reading.get_enc(x[train_index], 1, y[train_index],
                                          train_index, extra_features,
                                          feature_selection, encoding,
                                          print_file), reading.get_enc(
                                              x[test_index], 0, y[test_index],
                                              test_index, extra_features,
                                              feature_selection, encoding,
                                              print_file)
        y_train, y_test = y[train_index], y[test_index]

        # Assumed you have, X (predictor) and Y (target) for training data set and x_test(predictor) of test_dataset
        # Create SVM classification object
        # For very large C, the margin is hard, and points cannot lie in it. For smaller C, the margin is softer, and can grow to encompass some points.
        # gamma: Higher the value of gamma, will try to exact fit the training data set i.e.generalization error and cause over-fitting problem.
        model = svm.SVC(kernel='rbf', C=100, gamma=0.1)

        #######################################################################################################################
        # Feature Scaling
        #sc = StandardScaler()
        #x_train = sc.fit_transform(x_train)
        #x_test = sc.transform(x_test)
        #######################################################################################################################

        model.fit(x_train, y_train)

        model.score(x_train, y_train)
        # Predict Output
        y_pred = model.predict(x_test)

        #######################################################################################################################

        # Your model is fit. Time to predict our output and test our training data
        print("Evaluating model...")

        with open(print_file,
                  "a") as myfile:  # Write above print into output file
            myfile.write("Evaluating model..." + '\n')

        roc = roc_auc_score(y_test, y_pred)

        # Print your ROC-AUC score for your kfold, and the running score average
        print('ROC: ', roc)
        av_roc += roc
        print('Continued Avg: ', av_roc / count)

        with open(print_file,
                  "a") as myfile:  # Write above print into output file
            myfile.write('ROC: ' + str(roc) + '\n' + 'Continued Avg: ' +
                         str(av_roc / count) + '\n')

        y_pred = (y_pred > 0.5)

        # -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        '''
        # Compute ROC curve and area the curve

        fpr, tpr, thresholds = roc_curve(y_test, y_pred)
        tprs.append(interp(mean_fpr, fpr, tpr))
        tprs[-1][0] = 0.0
        roc_auc = auc(fpr, tpr)
        aucs.append(roc_auc)
        plt.plot(fpr, tpr, lw=1, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (count - 1, roc_auc))
        '''
        # -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

        # Creating the Confusion Matrix
        cm = confusion_matrix(y_test, y_pred)
        print(cm)

        with open(print_file,
                  "a") as myfile:  # Write above print into output file
            myfile.write(str(cm) + '\n')

        temp_accuracy = accuracy_score(y_test, y_pred)
        temp_precision, temp_recall, temp_f1_score, _ = precision_recall_fscore_support(
            y_test, y_pred, average='binary')

        accuracy += temp_accuracy
        precision += temp_precision
        recall += temp_recall
        f1score += temp_f1_score

        print("Accuracy: ", temp_accuracy)
        print("Precision: ", temp_precision)
        print("Recall: ", temp_recall)
        print("F1 score: ", temp_f1_score)

    # Create ROC-AUC curve
#    compute_ROC_Curve(tprs, mean_fpr, aucs)

##########################################################################################################################

# Print average of metrics
    print("Average Precision: ", precision / 10)
    print("Average Accuracy: ", accuracy / 10)
    print("Average Recall: ", recall / 10)
    print("Average F1-score: ", f1score / 10)

    # Print your final average ROC-AUC score and organize your models predictions in a dataframe
    print('Average ROC:', av_roc / 10)

    with open(print_file, "a") as myfile:  # Write above print into output file
        myfile.write("Average Precision: " + str(precision / 10) + '\n' +
                     "Average Accuracy: " + str(accuracy / 10) + '\n' +
                     "Average Recall: " + str(recall / 10) + '\n' +
                     "Average F1-score: " + str(f1score / 10) + '\n' +
                     'Average ROC:' + str(av_roc / 10) + '\n')

Example #6

Show file

def K_Neighbors(train_A, words_of_tweets, extra_features, feature_selection,
                encoding, print_file):
    reading = ClassRead.Reader(
    )  # Import the ClassRead.py file, to get the encoding

    x = np.array(words_of_tweets)
    y = train_A['label']

    # Initialize the roc-auc score running average list
    # Initialize a count to print the number of folds
    # Initialize metrics to print their average
    av_roc = 0.
    count = 0
    precision = 0
    accuracy = 0
    recall = 0
    f1score = 0
    # Above 3 variables are used for ROC-AUC curve
    tprs = []
    aucs = []
    mean_fpr = np.linspace(0, 1, 100)

    # Initialize your 10 - cross vailidation
    # Set shuffle equals True to randomize your splits on your training data
    kf = KFold(n_splits=10, random_state=41, shuffle=True)

    # Set up for loop to run for the number of cross vals you defined in your parameter
    for train_index, test_index in kf.split(x):
        count += 1
        print('Fold #: ', count)

        with open(print_file,
                  "a") as myfile:  # Write above print into output file
            myfile.write('Fold #: ' + str(count) + '\n')

        # This indexs your train and test data for your cross validation and sorts them in random order, since we used shuffle equals True
        x_train, x_test = reading.get_enc(x[train_index], 1, y[train_index],
                                          train_index, extra_features,
                                          feature_selection, encoding,
                                          print_file), reading.get_enc(
                                              x[test_index], 0, y[test_index],
                                              test_index, extra_features,
                                              feature_selection, encoding,
                                              print_file)
        y_train, y_test = y[train_index], y[test_index]

        #######################################################################################################################

        # leaf_size: int, optional(default=30)

        # p : integer, optional (default = 2)
        # When p = 1, this is equivalent to using manhattan_distance (l1),
        # and euclidean_distance (l2) for p = 2.
        # For arbitrary p, minkowski_distance (l_p) is used.

        # algorithm : {‘auto’, ‘ball_tree’, ‘kd_tree’, ‘brute’}, optional Algorithm used to compute the nearest neighbors:
        # ‘ball_tree’ will use BallTree
        # ‘kd_tree’ will use KDTree
        # ‘brute’ will use a brute-force search.
        # ‘auto’ will attempt to decide the most appropriate algorithm based on the values passed to fit method.

        # weights : str or callable, optional (default = ‘uniform’) weight function used in prediction. Possible values:
        # ‘uniform’ : uniform weights. All points in each neighborhood are weighted equally.
        # ‘distance’ : weight points by the inverse of their distance. in this case, closer neighbors of a query point will have a greater influence than neighbors which are further away.

        scaler = Normalizer()
        x_train = scaler.fit_transform(x_train)
        x_test = scaler.transform(x_test)

        classifier = KNeighborsClassifier(n_neighbors=140)
        classifier.fit(x_train, y_train)
        y_pred = classifier.predict(x_test)

        #######################################################################################################################

        # Your model is fit. Time to predict our output and test our training data
        print("Evaluating model...")

        with open(print_file,
                  "a") as myfile:  # Write above print into output file
            myfile.write("Evaluating model..." + '\n')

        roc = roc_auc_score(y_test, y_pred)

        # Print your ROC-AUC score for your kfold, and the running score average
        print('ROC: ', roc)
        av_roc += roc
        print('Continued Avg: ', av_roc / count)

        with open(print_file,
                  "a") as myfile:  # Write above print into output file
            myfile.write('ROC: ' + str(roc) + '\n' + 'Continued Avg: ' +
                         str(av_roc / count) + '\n')

# -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        '''
        # Compute ROC curve and area the curve

        fpr, tpr, thresholds = roc_curve(y_test, y_pred)
        tprs.append(interp(mean_fpr, fpr, tpr))
        tprs[-1][0] = 0.0
        roc_auc = auc(fpr, tpr)
        aucs.append(roc_auc)
        plt.plot(fpr, tpr, lw=1, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (count - 1, roc_auc))
        '''
        # -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

        y_pred = (y_pred > 0.5)

        # Creating the Confusion Matrix
        cm = confusion_matrix(y_test, y_pred)
        print(cm)

        with open(print_file,
                  "a") as myfile:  # Write above print into output file
            myfile.write(str(cm) + '\n')

        report = classification_report(y_test, y_pred)
        print(report)

        temp_accuracy = accuracy_score(y_test, y_pred)
        temp_precision, temp_recall, temp_f1_score, _ = precision_recall_fscore_support(
            y_test, y_pred, average='binary')

        accuracy += temp_accuracy
        precision += temp_precision
        recall += temp_recall
        f1score += temp_f1_score

        print("Accuracy: ", temp_accuracy)
        print("Precision: ", temp_precision)
        print("Recall: ", temp_recall)
        print("F1 score: ", temp_f1_score)

    # Create ROC-AUC curve
#    compute_ROC_Curve(tprs, mean_fpr, aucs)

# Print average of metrics
    print("Average Precision: ", precision / 10)
    print("Average Accuracy: ", accuracy / 10)
    print("Average Recall: ", recall / 10)
    print("Average F1-score: ", f1score / 10)

    # Print your final average ROC-AUC score and organize your models predictions in a dataframe
    print('Average ROC:', av_roc / 10)

    with open(print_file, "a") as myfile:  # Write above print into output file
        myfile.write("Average Precision: " + str(precision / 10) + '\n' +
                     "Average Accuracy: " + str(accuracy / 10) + '\n' +
                     "Average Recall: " + str(recall / 10) + '\n' +
                     "Average F1-score: " + str(f1score / 10) + '\n' +
                     'Average ROC:' + str(av_roc / 10) + '\n')

Example #7

Show file

def neural(train_A, words_of_tweets, extra_features, feature_selection,
           encoding, print_file):
    reading = ClassRead.Reader(
    )  # Import the ClassRead.py file, to get the encoding

    x = np.array(words_of_tweets)
    y = train_A['label']

    # -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
    # -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

    # Initialize the roc-auc score running average list
    # Initialize a count to print the number of folds
    # Initialize metrics to print their average
    av_roc = 0.
    count = 0
    precision = 0
    accuracy = 0
    recall = 0
    f1score = 0
    # Above 3 variables are used for ROC-AUC curve
    tprs = []
    aucs = []
    mean_fpr = np.linspace(0, 1, 100)

    # -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
    # -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

    # Initialize your 10 - cross vailidation
    # Set shuffle equals True to randomize your splits on your training data
    kf = KFold(n_splits=10, random_state=41, shuffle=True)

    # Set up for loop to run for the number of cross vals you defined in your parameter
    for train_index, test_index in kf.split(x):
        count += 1
        print('Fold #: ', count)

        with open(print_file,
                  "a") as myfile:  # Write above print into output file
            myfile.write('Fold #: ' + str(count) + '\n')

        # This indexs your train and test data for your cross validation and sorts them in random order, since we used shuffle equals True
        x_train, x_test = reading.get_enc(x[train_index], 1, y[train_index],
                                          train_index, extra_features,
                                          feature_selection, encoding,
                                          print_file), reading.get_enc(
                                              x[test_index], 0, y[test_index],
                                              test_index, extra_features,
                                              feature_selection, encoding,
                                              print_file)
        y_train, y_test = y[train_index], y[test_index]

        # -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        # -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

        # Initializing Neural Network
        classifier = Sequential()

        feature_dimensions = x_train.shape[1]
        print("second dimension (feature dimension): ", x_train.shape[1])

        # Adding the input layer and the first hidden layer (20 neurons)
        classifier.add(
            Dense(20,
                  kernel_initializer='glorot_uniform',
                  activation='softsign',
                  input_dim=feature_dimensions,
                  kernel_constraint=maxnorm(2)))
        classifier.add(Dropout(0.2))

        # Adding the second hidden layer (10 neurons)
        classifier.add(
            Dense(10,
                  kernel_initializer='glorot_uniform',
                  activation='softsign',
                  kernel_constraint=maxnorm(2)))
        classifier.add(Dropout(0.2))

        # Adding the output layer with 1 output
        classifier.add(
            Dense(1, kernel_initializer='glorot_uniform',
                  activation='sigmoid'))

        optimizer = RMSprop(lr=0.001)

        # Compiling Neural Network
        classifier.compile(optimizer=optimizer,
                           loss='binary_crossentropy',
                           metrics=['accuracy'])

        # -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        # -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        '''

# How to Tune Batch Size and Number of Epochs
        # create model
        model = KerasClassifier(build_fn=create_model, verbose=0)
        # define the grid search parameters
        batch_size = [10, 20, 40, 60, 80, 100]
        epochs = [10, 20, 40]
        param_grid = dict(batch_size=batch_size, epochs=epochs)
        '''
        '''
		
		# create model
        model = KerasClassifier(build_fn=create_model, epochs=20, batch_size=20, verbose=0)
		
# How to Tune the Training Optimization Algorithm
        # define the grid search parameters
        optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
        param_grid = dict(optimizer=optimizer)
        
        
# How to Tune Learning Rate and Momentum
        # define the grid search parameters
        learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
       # momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9]
        param_grid = dict(learn_rate=learn_rate)
        

# How to Tune Network Weight Initialization
        # define the grid search parameters
        init_mode = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal',
                     'he_uniform']
        param_grid = dict(init_mode=init_mode)
       

# How to Tune the Neuron Activation Function
        # define the grid search parameters
        activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
        param_grid = dict(activation=activation)
        
# How to Tune Dropout Regularization
        # define the grid search parameters
        weight_constraint = [1, 2, 3, 4, 5]
        dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
        param_grid = dict(dropout_rate=dropout_rate, weight_constraint=weight_constraint)
       




# How to Tune the Number of Neurons in the Hidden Layer
        # define the grid search parameters
        neurons = [1, 5, 10, 15, 20, 25, 30, 35, 40]
        param_grid = dict(neurons=neurons)

        '''

        # -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        '''
        grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
        # Use only the training data set (cannot use whole data set cause it is not encoded)
        grid_result = grid.fit(x_train, y_train)
        # summarize results
        print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
        means = grid_result.cv_results_['mean_test_score']
        stds = grid_result.cv_results_['std_test_score']
        params = grid_result.cv_results_['params']
        for mean, stdev, param in zip(means, stds, params):
            print("%f (%f) with: %r" % (mean, stdev, param))
        '''

        # -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        # -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

        #    classifier = model

        #    classifier = create_model()

        callbacks.EarlyStopping(monitor='val_loss',
                                min_delta=0,
                                patience=2,
                                verbose=0,
                                mode='auto')

        # Fitting our model
        classifier.fit(x_train, y_train, batch_size=20, epochs=50)

        # -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        # -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

        # Your model is fit. Time to predict our output and test our training data
        print("Evaluating model...")

        with open(print_file,
                  "a") as myfile:  # Write above print into output file
            myfile.write("Evaluating model..." + '\n')

        test_preds = classifier.predict_proba(x_test, verbose=0)

        roc = roc_auc_score(y_test, test_preds)
        scores = classifier.evaluate(x_test, y_test)
        print(scores)

        # Print your model summary
        print(classifier.summary())

        # Print your ROC-AUC score for your kfold, and the running score average
        print('ROC: ', roc)
        av_roc += roc
        print('Continued Avg: ', av_roc / count)

        with open(print_file,
                  "a") as myfile:  # Write above print into output file
            myfile.write('Scores: ' + str(scores) + '\n' +
                         'Classifier summary: ' + str(classifier.summary()) +
                         '\n' + 'ROC: ' + str(roc) + '\n' + 'Continued Avg: ' +
                         str(av_roc / count) + '\n')

# -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
        '''
        
        # Compute ROC curve and area the curve

        fpr, tpr, thresholds = roc_curve(y_test, test_preds)
        tprs.append(interp(mean_fpr, fpr, tpr))
        tprs[-1][0] = 0.0
        roc_auc = auc(fpr, tpr)
        aucs.append(roc_auc)
        plt.plot(fpr, tpr, lw=1, alpha=0.3, label='ROC fold %d (AUC = %0.2f)' % (count-1, roc_auc))
        
        '''
        # -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

        # Predicting the Test set results
        y_pred = classifier.predict(x_test)
        y_pred = (y_pred > 0.5)

        # Creating the Confusion Matrix
        cm = confusion_matrix(y_test, y_pred)
        print(cm)

        with open(print_file,
                  "a") as myfile:  # Write above print into output file
            myfile.write(str(cm) + '\n')

        temp_accuracy = accuracy_score(y_test, y_pred)
        temp_precision, temp_recall, temp_f1_score, _ = precision_recall_fscore_support(
            y_test, y_pred, average='binary')

        accuracy += temp_accuracy
        precision += temp_precision
        recall += temp_recall
        f1score += temp_f1_score

        print("Accuracy: ", temp_accuracy)
        print("Precision: ", temp_precision)
        print("Recall: ", temp_recall)
        print("F1 score: ", temp_f1_score)

    # Create ROC-AUC curve
#    compute_ROC_Curve(tprs, mean_fpr, aucs)

# Print average of metrics
    print("Average Precision: ", precision / 10)
    print("Average Accuracy: ", accuracy / 10)
    print("Average Recall: ", recall / 10)
    print("Average F1-score: ", f1score / 10)

    # Print your final average ROC-AUC score and organize your models predictions in a dataframe
    print('Average ROC:', av_roc / 10)

    with open(print_file, "a") as myfile:  # Write above print into output file
        myfile.write("Average Precision: " + str(precision / 10) + '\n' +
                     "Average Accuracy: " + str(accuracy / 10) + '\n' +
                     "Average Recall: " + str(recall / 10) + '\n' +
                     "Average F1-score: " + str(f1score / 10) + '\n' +
                     'Average ROC:' + str(av_roc / 10) + '\n')