scl = preprocessing.StandardScaler() xtrain_glove_scl = scl.fit_transform(xtrain_glove) xvalid_glove_scl = scl.transform(xvalid_glove) # In[ ]: # we need to binarize the labels for the neural net ytrain_enc = np_utils.to_categorical(ytrain) yvalid_enc = np_utils.to_categorical(yvalid) # In[ ]: # create a simple 3 layer sequential neural net model = Sequential() model.add(Dense(300, input_dim=300, activation='relu')) model.add(Dropout(0.2)) model.add(BatchNormalization()) model.add(Dense(300, activation='relu')) model.add(Dropout(0.3)) model.add(BatchNormalization()) model.add(Dense(3)) model.add(Activation('softmax')) # compile the model model.compile(loss='categorical_crossentropy', optimizer='adam') # In[ ]:
def train_model(self): if self.ui.radioButton_4.isChecked(): pick_in = open('dataIndecises.pickle', 'rb') # Load the pickle file into data variable data = pickle.load(pick_in) pick_in.close() random.shuffle(data) features = [] labels = [] # Split the elements in data into features and labels for feature, label in data: features.append(feature) labels.append(label) # Split the data into train (70%) and test data (30%) xtrain, xtest, ytrain, ytest = train_test_split(features, labels, test_size=0.3) # Get an instance of the model # Parameter tuning: find the number of neighbors that results in best predictions accuracies = [] models = [] for k in range(1, int(self.ui.lineEdit_2.text())): knn_model = KNeighborsClassifier(n_neighbors=k) # Train the model knn_model.fit(xtrain, ytrain) # Predict from the test data and compare with actual labels predictions = knn_model.predict(xtest) # accuracy = np.mean(predictions==ytest) accuracies.append(np.mean(predictions == ytest)) models.append(knn_model) for k in range(1, 11): print("Accuracy for k: ", k) print(accuracies[k - 1]) # Plotting accuracies # plt.plot(range(1,11),accuracies) # plt.ylabel("Accuracies") # plt.xlabel("# of Neighbours") # plt.title("Accuracies") # plt.grid() # plt.show() # Get accuracies as percentages percentages = [] for i in range(1, 11): percentages.append(100 * accuracies[i - 1]) # Find maximum value and corresponding K index maximum = max(percentages) print("Max percentage: ", maximum) print("K-value: ", percentages.index(maximum) + 1) print("K-value: ", accuracies.index(max(accuracies)) + 1) index = percentages.index(maximum) + 1 print(percentages[index - 1]) plt.plot(range(1, 11), percentages) plt.ylabel("Accuracies Percentages") plt.xlabel("# of Neighbours") plt.title("Percentages") plt.grid() plt.savefig('knnplot.jpg') knnplot = Image.open('knnplot.jpg') new_knn_plot = knnplot.resize(510, 110) new_knn_plot.save('knnplot.jpg') plt.show() self.ui.label_17.setPixmap(QPixmap('knnplot.jpg')) optimized_model = models[index - 1] # Save the model in 'model.sav' folder pick = open('knn_model.sav', 'wb') pickle.dump(knn_model, pick) pick.close() if self.ui.radioButton_5.isChecked(): pick_in = open('dataIndecises.pickle', 'rb') # Load the pickle file into data variable data = pickle.load(pick_in) pick_in.close() random.shuffle(data) features = [] labels = [] # Split the elements in data into features and labels for feature, label in data: features.append(feature) labels.append(label) # Split the data into train (70%) and test data (30%) xtrain, xtest, ytrain, ytest = train_test_split(features, labels, test_size=0.3) decision_trees_model = tree.DecisionTreeClassifier() decision_trees_model.fit(xtrain, ytrain) prediction = decision_trees_model.predict(xtest) self.ui.label_17.setText(classification_report(ytest, prediction)) print("depth: ", decision_trees_model.get_depth()) print("prediction", prediction) # print("Testing accuracy ", score) # print("Numpy accuracy ", np.mean(ytest == prediction)) # Saves the model in 'model.sav' folder pick = open('decision_trees_model.sav', 'wb') pickle.dump(decision_trees_model, pick) pick.close() if self.ui.radioButton_6.isChecked(): # Read the pickle file containing the labeled data pick_in = open('dataIndecises.pickle', 'rb') # Load the pickle file into data variable data = pickle.load(pick_in) pick_in.close() # Shuffle the data random.shuffle(data) features = [] labels = [] # Split the elements in data into features and labels for feature, label in data: features.append(feature) labels.append(label) # Split the data into train (70%) and test data (30%) xtrain, xtest, ytrain, ytest = train_test_split(features, labels, test_size=0.3) # Define a parameter grid for the SVM model param_grid = { 'C': [0.1, 1, 10, 100, 1000], 'gamma': [0.01, 0.001, 0.0001], 'kernel': ['rbf', 'poly', 'linear', 'sigmoid'] } # Define the SVM model svc = svm.SVC(probability=True) # Chooses the best parameters from param_grid for the SVM model model = GridSearchCV(svc, param_grid, cv=3) # Trains the model on the specified training data model.fit(xtrain, ytrain) # Saves the model in 'model_svm.sav' folder pick = open('model_svm.sav', 'wb') pickle.dump(model, pick) pick.close() print("svm") # Testing phase: predict and store the predictions of the testing data in model_predictions model_predictions = model.predict(xtest) # Print out a classification report for the model that includes: precision, accuracy, f-value, and recall self.ui.label_17.setText( classification_report(ytest, model_predictions)) if self.ui.radioButton_7.isChecked(): # load the trained pickle file pick = open("dataIndecises.pickle", "rb") data = pickle.load(pick) pick.close() # Split the elements in data into features and labels random.shuffle(data) features = [] labels = [] for feature, label in data: features.append(feature) labels.append(label) size = len(feature) # Split the data into train (70%) and test data (30%) xtrain, xtest, ytrain, ytest = train_test_split(features, labels, test_size=0.3) # reshape training and testing features lists based on number of features selected by the user xtrain = np.reshape(xtrain, (-1, size, 1, 1)) xtest = np.reshape(xtest, (-1, size, 1, 1)) # convert to tensors xtrain = tf.convert_to_tensor(xtrain, dtype=tf.float32) xtest = tf.convert_to_tensor(xtest) ytrain = tf.convert_to_tensor(ytrain) ytest = tf.convert_to_tensor(ytest) # define the CNN Sequential Model model = Sequential() model.add(Conv2D(64, (3, 1), input_shape=xtrain.shape[1:])) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2), padding='same')) model.add(Conv2D(64, (3, 1))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2), padding='same')) model.add(Flatten()) model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(xtrain, ytrain, batch_size=1, epochs=19, validation_data=(xtest, ytest)) model.save('CNN_Ratios.model') model = tf.keras.models.load_model('CNN_Ratios.model') prediction = model.predict(dataIndex) #self.ui.label_16.setText(model.compile(metrics = ['accuracy'])) print("cnn") if self.ui.radioButton_8.isChecked(): print("random") # Read the pickle file containing the labeled data pick_in = open('dataIndecises.pickle', 'rb') # Load the pickle file into data variable data = pickle.load(pick_in) pick_in.close() # Shuffle the data random.shuffle(data) dataInd = [] labels = [] # Split the elements in data into features and labels for ind, label in data: dataInd.append(ind) labels.append(label) # # print(dataInd) # print(labels) X_train, X_test, y_train, y_test = train_test_split(dataInd, labels, test_size=0.1, random_state=0) # Feature Scaling sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) classifier = RandomForestClassifier(n_estimators=20, random_state=0) classifier.fit(X_train, y_train) # Saves the model in 'model_forest.sav' folder pick = open('model_forest.sav', 'wb') pickle.dump(model, pick) pick.close() y_pred = classifier.predict(X_test) plot_confusion_matrix(classifier, X_test, y_test, values_format='d', display_labels=["old", "young"]) plt.show() self.ui.label_17.setText(classification_report(y_test, y_pred)) # print(accuracy_score(y_test, y_pred.round(), normalize=True)) if self.ui.radioButton.isChecked(): def get_dataset(): # Read the pickle file containing the labeled data pick_in = open('dataIndecises.pickle', 'rb') # Load the pickle file into data variable data = pickle.load(pick_in) pick_in.close() # Shuffle the data random.shuffle(data) dataInd = [] labels = [] # Split the elements in data into features and labels for ind, label in data: dataInd.append(ind) labels.append(label) return dataInd, labels # define the base models level0 = list() if self.ui.checkBox_11.isChecked(): level0.append(('knn', KNeighborsClassifier())) if self.ui.checkBox_12.isChecked(): level0.append(('cart', DecisionTreeClassifier())) if self.ui.checkBox_13.isChecked(): level0.append(('svm', SVC())) if self.ui.checkBox_14.isChecked(): level0.append(('lr', LogisticRegression())) if self.ui.checkBox_15.isChecked(): level0.append(('bayes', GaussianNB())) # define meta learner model level1 = LogisticRegression() # define the stacking ensemble model = StackingClassifier(estimators=level0, final_estimator=level1, cv=5) # fit the model on all available data dataInd, labels = get_dataset() xtrain, xtest, ytrain, ytest = train_test_split(dataInd, labels, test_size=0.1, random_state=1, stratify=labels) model.fit(xtrain, ytrain) # Saves the model in 'model_stacking.sav' folder pick = open('model_stacking.sav', 'wb') pickle.dump(model, pick) pick.close() # Testing phase: predict and store the predictions of the testing data in model_predictions model_predictions = model.predict(xtest) # Print out a classification report for the model that includes: precision, accuracy, f-value, and recall print("stacking") self.ui.label_17.setText( classification_report(ytest, model_predictions))
def run_model(training_data='', testing_data='', training_y='', testing_y='', svm_flag=False, gs_flag=False): x_train = training_data x_test = testing_data y_train = training_y y_test = testing_y if svm_flag: if gs_flag: logging.getLogger('regular.time').info( 'running GRIDSEARCH SVM model') param_grid = [ { 'C': [1, 10, 100, 1000], 'kernel': ['linear'] }, { 'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf'] }, ] model = GridSearchCV(estimator=svm.SVC(), param_grid=param_grid, n_jobs=-1) model.fit(x_train, y_train) logging.getLogger('regular.time').debug('finished training model') # View the accuracy score logging.getLogger('regular').debug( 'Best score for data1: {0}'.format(model.best_score_)) # View the best parameters for the model found using grid search logging.getLogger('regular').debug('Best C: {0}'.format( model.best_estimator_.C)) logging.getLogger('regular').debug('Best Kernel: {0}'.format( model.best_estimator_.kernel)) logging.getLogger('regular').debug('Best Gamma: {0}'.format( model.best_estimator_.gamma)) else: logging.getLogger('regular.time').info('running SVM model') model = svm.SVC() model.fit(x_train, y_train) logging.getLogger('regular.time').debug('finished training model') svm_score = model.score(x_test, y_test) logging.getLogger('regular').info("score: {0}".format(svm_score)) else: logging.getLogger('regular').info('running basic NN model') logging.getLogger('regular.time').debug('creating and compiling model') model = Sequential() model.add(Dense(12, input_dim=np.shape(x_train)[1], activation='relu')) model.add(Dense(8, activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) logging.getLogger('regular.time').info('training model') logging.getLogger('regular').debug( 'training dataset size processed = {0}'.format(np.shape(x_train))) logging.getLogger('regular').debug( 'testing dataset size processed = {0}'.format(np.shape(x_test))) model.fit(x_train, y_train, epochs=150, batch_size=5, verbose=1) logging.getLogger('regular.time').info('evaluating model') scores = model.evaluate(x_test, y_test, verbose=0) logging.getLogger('regular').info( "%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100))
#converting dataset into x_train and y_train scaler = MinMaxScaler(feature_range=(0, 1)) scaled_data = scaler.fit_transform(dataset) x_train, y_train = [], [] for i in range(60, len(train)): x_train.append(scaled_data[i - 60:i, 0]) y_train.append(scaled_data[i, 0]) x_train, y_train = np.array(x_train), np.array(y_train) x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1)) # create and fit the LSTM network model = Sequential() model.add( LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1))) model.add(LSTM(units=50)) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') model.fit(x_train, y_train, epochs=1, batch_size=1, verbose=2) #predicting 246 values, using past 60 from the train data inputs = new_data[len(new_data) - len(valid) - 60:].values inputs = inputs.reshape(-1, 1) inputs = scaler.transform(inputs) X_test = [] for i in range(60, inputs.shape[0]): X_test.append(inputs[i - 60:i, 0]) X_test = np.array(X_test)