예제 #1
0
scl = preprocessing.StandardScaler()
xtrain_glove_scl = scl.fit_transform(xtrain_glove)
xvalid_glove_scl = scl.transform(xvalid_glove)

# In[ ]:

# we need to binarize the labels for the neural net
ytrain_enc = np_utils.to_categorical(ytrain)
yvalid_enc = np_utils.to_categorical(yvalid)

# In[ ]:

# create a simple 3 layer sequential neural net
model = Sequential()

model.add(Dense(300, input_dim=300, activation='relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(300, activation='relu'))
model.add(Dropout(0.3))
model.add(BatchNormalization())

model.add(Dense(3))
model.add(Activation('softmax'))

# compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam')

# In[ ]:
예제 #2
0
    def train_model(self):

        if self.ui.radioButton_4.isChecked():
            pick_in = open('dataIndecises.pickle', 'rb')

            # Load the pickle file into data variable
            data = pickle.load(pick_in)
            pick_in.close()
            random.shuffle(data)
            features = []
            labels = []

            # Split the elements in data into features and labels
            for feature, label in data:
                features.append(feature)
                labels.append(label)

            # Split the data into train (70%) and test data (30%)
            xtrain, xtest, ytrain, ytest = train_test_split(features,
                                                            labels,
                                                            test_size=0.3)

            # Get an instance of the model
            # Parameter tuning: find the number of neighbors that results in best predictions

            accuracies = []
            models = []
            for k in range(1, int(self.ui.lineEdit_2.text())):
                knn_model = KNeighborsClassifier(n_neighbors=k)
                # Train the model
                knn_model.fit(xtrain, ytrain)
                # Predict from the test data and compare with actual labels
                predictions = knn_model.predict(xtest)
                # accuracy = np.mean(predictions==ytest)
                accuracies.append(np.mean(predictions == ytest))
                models.append(knn_model)

            for k in range(1, 11):
                print("Accuracy for k: ", k)
                print(accuracies[k - 1])

            # Plotting accuracies
            # plt.plot(range(1,11),accuracies)
            # plt.ylabel("Accuracies")
            # plt.xlabel("# of Neighbours")
            # plt.title("Accuracies")
            # plt.grid()
            # plt.show()

            # Get accuracies as percentages
            percentages = []
            for i in range(1, 11):
                percentages.append(100 * accuracies[i - 1])

            # Find maximum value and corresponding K index
            maximum = max(percentages)
            print("Max percentage: ", maximum)
            print("K-value: ", percentages.index(maximum) + 1)
            print("K-value: ", accuracies.index(max(accuracies)) + 1)

            index = percentages.index(maximum) + 1
            print(percentages[index - 1])

            plt.plot(range(1, 11), percentages)
            plt.ylabel("Accuracies Percentages")
            plt.xlabel("# of Neighbours")
            plt.title("Percentages")
            plt.grid()
            plt.savefig('knnplot.jpg')
            knnplot = Image.open('knnplot.jpg')
            new_knn_plot = knnplot.resize(510, 110)
            new_knn_plot.save('knnplot.jpg')
            plt.show()

            self.ui.label_17.setPixmap(QPixmap('knnplot.jpg'))

            optimized_model = models[index - 1]
            # Save the model in 'model.sav' folder
            pick = open('knn_model.sav', 'wb')
            pickle.dump(knn_model, pick)
            pick.close()

        if self.ui.radioButton_5.isChecked():
            pick_in = open('dataIndecises.pickle', 'rb')

            # Load the pickle file into data variable
            data = pickle.load(pick_in)
            pick_in.close()
            random.shuffle(data)
            features = []
            labels = []

            # Split the elements in data into features and labels
            for feature, label in data:
                features.append(feature)
                labels.append(label)

            # Split the data into train (70%) and test data (30%)
            xtrain, xtest, ytrain, ytest = train_test_split(features,
                                                            labels,
                                                            test_size=0.3)

            decision_trees_model = tree.DecisionTreeClassifier()
            decision_trees_model.fit(xtrain, ytrain)
            prediction = decision_trees_model.predict(xtest)

            self.ui.label_17.setText(classification_report(ytest, prediction))

            print("depth: ", decision_trees_model.get_depth())
            print("prediction", prediction)
            # print("Testing accuracy ", score)
            # print("Numpy accuracy ", np.mean(ytest == prediction))

            # Saves the model in 'model.sav' folder
            pick = open('decision_trees_model.sav', 'wb')
            pickle.dump(decision_trees_model, pick)
            pick.close()

        if self.ui.radioButton_6.isChecked():
            # Read the pickle file containing the labeled data
            pick_in = open('dataIndecises.pickle', 'rb')
            # Load the pickle file into data variable
            data = pickle.load(pick_in)
            pick_in.close()

            # Shuffle the data
            random.shuffle(data)
            features = []
            labels = []

            # Split the elements in data into features and labels
            for feature, label in data:
                features.append(feature)
                labels.append(label)

            # Split the data into train (70%) and test data (30%)
            xtrain, xtest, ytrain, ytest = train_test_split(features,
                                                            labels,
                                                            test_size=0.3)

            # Define a parameter grid for the SVM model
            param_grid = {
                'C': [0.1, 1, 10, 100, 1000],
                'gamma': [0.01, 0.001, 0.0001],
                'kernel': ['rbf', 'poly', 'linear', 'sigmoid']
            }

            # Define the SVM model
            svc = svm.SVC(probability=True)
            # Chooses the best parameters from param_grid for the SVM model
            model = GridSearchCV(svc, param_grid, cv=3)
            # Trains the model on the specified training data
            model.fit(xtrain, ytrain)

            # Saves the model in 'model_svm.sav' folder
            pick = open('model_svm.sav', 'wb')
            pickle.dump(model, pick)
            pick.close()
            print("svm")

            # Testing phase: predict and store the predictions of the testing data in model_predictions
            model_predictions = model.predict(xtest)
            # Print out a classification report for the model that includes: precision, accuracy, f-value, and recall
            self.ui.label_17.setText(
                classification_report(ytest, model_predictions))

        if self.ui.radioButton_7.isChecked():
            # load the trained pickle file
            pick = open("dataIndecises.pickle", "rb")
            data = pickle.load(pick)
            pick.close()

            # Split the elements in data into features and labels

            random.shuffle(data)
            features = []
            labels = []
            for feature, label in data:
                features.append(feature)
                labels.append(label)

            size = len(feature)

            # Split the data into train (70%) and test data (30%)
            xtrain, xtest, ytrain, ytest = train_test_split(features,
                                                            labels,
                                                            test_size=0.3)

            # reshape training and testing features lists based on number of features selected by the user
            xtrain = np.reshape(xtrain, (-1, size, 1, 1))
            xtest = np.reshape(xtest, (-1, size, 1, 1))

            # convert to tensors
            xtrain = tf.convert_to_tensor(xtrain, dtype=tf.float32)
            xtest = tf.convert_to_tensor(xtest)
            ytrain = tf.convert_to_tensor(ytrain)
            ytest = tf.convert_to_tensor(ytest)

            # define the CNN Sequential Model

            model = Sequential()

            model.add(Conv2D(64, (3, 1), input_shape=xtrain.shape[1:]))
            model.add(Activation('relu'))
            model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))

            model.add(Conv2D(64, (3, 1)))
            model.add(Activation('relu'))
            model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))

            model.add(Flatten())

            model.add(Dense(1))
            model.add(Activation('sigmoid'))

            model.compile(loss='binary_crossentropy',
                          optimizer='adam',
                          metrics=['accuracy'])

            model.fit(xtrain,
                      ytrain,
                      batch_size=1,
                      epochs=19,
                      validation_data=(xtest, ytest))

            model.save('CNN_Ratios.model')
            model = tf.keras.models.load_model('CNN_Ratios.model')
            prediction = model.predict(dataIndex)
            #self.ui.label_16.setText(model.compile(metrics = ['accuracy']))
            print("cnn")

        if self.ui.radioButton_8.isChecked():
            print("random")
            # Read the pickle file containing the labeled data
            pick_in = open('dataIndecises.pickle', 'rb')
            # Load the pickle file into data variable
            data = pickle.load(pick_in)
            pick_in.close()

            # Shuffle the data
            random.shuffle(data)
            dataInd = []
            labels = []
            # Split the elements in data into features and labels
            for ind, label in data:
                dataInd.append(ind)
                labels.append(label)

            #
            # print(dataInd)
            # print(labels)

            X_train, X_test, y_train, y_test = train_test_split(dataInd,
                                                                labels,
                                                                test_size=0.1,
                                                                random_state=0)

            # Feature Scaling

            sc = StandardScaler()
            X_train = sc.fit_transform(X_train)
            X_test = sc.transform(X_test)

            classifier = RandomForestClassifier(n_estimators=20,
                                                random_state=0)
            classifier.fit(X_train, y_train)

            # Saves the model in 'model_forest.sav' folder
            pick = open('model_forest.sav', 'wb')
            pickle.dump(model, pick)
            pick.close()
            y_pred = classifier.predict(X_test)

            plot_confusion_matrix(classifier,
                                  X_test,
                                  y_test,
                                  values_format='d',
                                  display_labels=["old", "young"])
            plt.show()
            self.ui.label_17.setText(classification_report(y_test, y_pred))
            # print(accuracy_score(y_test, y_pred.round(), normalize=True))

        if self.ui.radioButton.isChecked():

            def get_dataset():
                # Read the pickle file containing the labeled data
                pick_in = open('dataIndecises.pickle', 'rb')
                # Load the pickle file into data variable
                data = pickle.load(pick_in)
                pick_in.close()

                # Shuffle the data
                random.shuffle(data)
                dataInd = []
                labels = []
                # Split the elements in data into features and labels
                for ind, label in data:
                    dataInd.append(ind)
                    labels.append(label)

                return dataInd, labels

            # define the base models
            level0 = list()
            if self.ui.checkBox_11.isChecked():
                level0.append(('knn', KNeighborsClassifier()))
            if self.ui.checkBox_12.isChecked():
                level0.append(('cart', DecisionTreeClassifier()))
            if self.ui.checkBox_13.isChecked():
                level0.append(('svm', SVC()))
            if self.ui.checkBox_14.isChecked():
                level0.append(('lr', LogisticRegression()))
            if self.ui.checkBox_15.isChecked():
                level0.append(('bayes', GaussianNB()))
            # define meta learner model
            level1 = LogisticRegression()

            # define the stacking ensemble
            model = StackingClassifier(estimators=level0,
                                       final_estimator=level1,
                                       cv=5)

            # fit the model on all available data
            dataInd, labels = get_dataset()
            xtrain, xtest, ytrain, ytest = train_test_split(dataInd,
                                                            labels,
                                                            test_size=0.1,
                                                            random_state=1,
                                                            stratify=labels)

            model.fit(xtrain, ytrain)
            # Saves the model in 'model_stacking.sav' folder
            pick = open('model_stacking.sav', 'wb')
            pickle.dump(model, pick)
            pick.close()

            # Testing phase: predict and store the predictions of the testing data in model_predictions
            model_predictions = model.predict(xtest)
            # Print out a classification report for the model that includes: precision, accuracy, f-value, and recall
            print("stacking")
            self.ui.label_17.setText(
                classification_report(ytest, model_predictions))
예제 #3
0
def run_model(training_data='',
              testing_data='',
              training_y='',
              testing_y='',
              svm_flag=False,
              gs_flag=False):

    x_train = training_data
    x_test = testing_data
    y_train = training_y
    y_test = testing_y

    if svm_flag:

        if gs_flag:

            logging.getLogger('regular.time').info(
                'running GRIDSEARCH SVM model')
            param_grid = [
                {
                    'C': [1, 10, 100, 1000],
                    'kernel': ['linear']
                },
                {
                    'C': [1, 10, 100, 1000],
                    'gamma': [0.001, 0.0001],
                    'kernel': ['rbf']
                },
            ]
            model = GridSearchCV(estimator=svm.SVC(),
                                 param_grid=param_grid,
                                 n_jobs=-1)
            model.fit(x_train, y_train)
            logging.getLogger('regular.time').debug('finished training model')

            # View the accuracy score
            logging.getLogger('regular').debug(
                'Best score for data1: {0}'.format(model.best_score_))

            # View the best parameters for the model found using grid search
            logging.getLogger('regular').debug('Best C: {0}'.format(
                model.best_estimator_.C))
            logging.getLogger('regular').debug('Best Kernel: {0}'.format(
                model.best_estimator_.kernel))
            logging.getLogger('regular').debug('Best Gamma: {0}'.format(
                model.best_estimator_.gamma))

        else:
            logging.getLogger('regular.time').info('running SVM model')
            model = svm.SVC()
            model.fit(x_train, y_train)
            logging.getLogger('regular.time').debug('finished training model')

        svm_score = model.score(x_test, y_test)
        logging.getLogger('regular').info("score: {0}".format(svm_score))

    else:

        logging.getLogger('regular').info('running basic NN model')
        logging.getLogger('regular.time').debug('creating and compiling model')
        model = Sequential()
        model.add(Dense(12, input_dim=np.shape(x_train)[1], activation='relu'))
        model.add(Dense(8, activation='relu'))
        model.add(Dense(1, activation='sigmoid'))
        model.compile(loss='binary_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])

        logging.getLogger('regular.time').info('training model')
        logging.getLogger('regular').debug(
            'training dataset size processed = {0}'.format(np.shape(x_train)))
        logging.getLogger('regular').debug(
            'testing dataset size processed = {0}'.format(np.shape(x_test)))
        model.fit(x_train, y_train, epochs=150, batch_size=5, verbose=1)

        logging.getLogger('regular.time').info('evaluating model')
        scores = model.evaluate(x_test, y_test, verbose=0)
        logging.getLogger('regular').info(
            "%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100))
예제 #4
0
#converting dataset into x_train and y_train
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(dataset)

x_train, y_train = [], []
for i in range(60, len(train)):
    x_train.append(scaled_data[i - 60:i, 0])
    y_train.append(scaled_data[i, 0])
x_train, y_train = np.array(x_train), np.array(y_train)

x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

# create and fit the LSTM network
model = Sequential()
model.add(
    LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(LSTM(units=50))
model.add(Dense(1))

model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x_train, y_train, epochs=1, batch_size=1, verbose=2)

#predicting 246 values, using past 60 from the train data
inputs = new_data[len(new_data) - len(valid) - 60:].values
inputs = inputs.reshape(-1, 1)
inputs = scaler.transform(inputs)

X_test = []
for i in range(60, inputs.shape[0]):
    X_test.append(inputs[i - 60:i, 0])
X_test = np.array(X_test)