class Classification(Supervised): def __init__(self, X, y, split=True, split_ratio=0.2): Supervised.__init__(self, X, y, split, split_ratio) self.LR = None self.DTC = None self.RFC = None self.GNB = None def fit(): """ Acronyms ---------- LR : Logistic Regression DTC : Decision Tree Classifier RFC : Random Forest Classifier GNB : Gaussian Naive Bayes Returns ------- None """ self.LR = LogisticRegression(random_state=0).fit(X_train, y_train) self.DTC = DecisionTreeClassifier().fit(X_train, y_train) self.RFC = RandomForestClassifier(max_depth=None, random_state=0).fit( X_train, y_train) self.GNB = GaussianNB().fit(X_train, y_train) def evaluate(): if (self.X_test != None): lr_eval = self.LR.evaluate(X_test, y_test) dtc_eval = self.DTC.evaluate(X_test, y_test) rfc_eval = self.RFC.evaluate(X_test, y_test) gnb_eval = self.GNB.evaluate(X_test, y_test)
model.add(Dense(1, init="uniform", activation="sigmoid")) model.compile(loss="binary_crossentropy", metrics=['accuracy'], optimizer='adam') model.summary() history=model.fit(X_train,Y_train, epochs=100, batch_size=100) plt.plot(history.history['loss']) #plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.show() model.evaluate(X_test,Y_test)
# Add an input layer model.add(Dense(16, activation='relu', input_dim=10)) # Add another input layer model.add(Dense(12, activation='relu')) # Add another input layer model.add(Dense(12, activation='relu')) # Add another input layer model.add(Dense(8, activation='relu')) # Add an output layer model.add(Dense( 9, activation='softmax')) # output 9 correspond to number of predicted class # compile model and run the model model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) model.fit(xx, yy, epochs=100, validation_data=(val_x, pd.get_dummies(val_y).values)) # Also for evaluate, val_y have to change to one-hot encoding dummy variable model.evaluate(val_x, pd.get_dummies(val_y).values) model.summary()
# Display Model Summary and Show Parameters model.summary() # Start Training Our Classifier batch_size = 10 epochs = 50 history = model.fit(X_train, y_train, batch_size = batch_size, epochs = epochs, verbose = 1, ) predictions1 = model.predict(X_test) score = model.evaluate(X_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) # Plotting our loss charts import matplotlib.pyplot as plt history_dict = history.history loss_values = history_dict['loss'] val_loss_values = history_dict['val_loss'] epochs = range(1, len(loss_values) + 1) line1 = plt.plot(epochs, val_loss_values, label='Validation/Test Loss') line2 = plt.plot(epochs, loss_values, label='Training Loss') plt.setp(line1, linewidth=2.0, marker = '+', markersize=10.0)
model.add(Flatten()) model.add(Dropout(0.5)) model.add(Dense(64, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(32, activation='relu')) model.add(Dense(5, activation='softmax')) #stochastic gradient descent sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) return model model = createModel() history=model.fit(train_features,train_labels, epochs=20, batch_size=10,validation_data=(test_features,test_labels),callbacks=[plot_losses]) scores = model.evaluate(test_features, test_labels) print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100)) #Artificial Neural Network from sklearn.model_selection import train_test_split train_features, test_features, train_labels, test_labels = train_test_split(mega_data, labels_one_hot, test_size = 0.3, random_state =9, stratify=labels_one_hot) from sklearn.preprocessing import StandardScaler scaler = StandardScaler() scaler.fit(train_features) train_features = scaler.transform(train_features) test_features = scaler.transform(test_features) print(train_features.shape) print(test_features.shape)
epochs = 10 checpoint = ModelCheckpoint('models/WWII_names.h5f', monitor='val_loss', save_best_only=True, verbose=1) callbacks = [checpoint] H = model.fit(trainX, trainY, epochs=epochs, validation_data=(testX, testY), callbacks=callbacks, verbose=1) model = load_model('models/WWII_names.h5f') score, acc = model.evaluate(testX, testY) print(score, acc) prediction = model.predict(testX) prediction = prediction.argmax(axis=1) print(classification_report(testY.argmax(axis=1), prediction)) plt.style.use("ggplot") plt.figure() plt.plot(np.arange(0, epochs), H.history["loss"], label="train_loss") plt.plot(np.arange(0, epochs), H.history["val_loss"], label="val_loss") plt.plot(np.arange(0, epochs), H.history["acc"], label="train_acc") plt.plot(np.arange(0, epochs), H.history["val_acc"], label="val_acc") plt.title("Training Loss and Accuracy") plt.xlabel("Epoch #") plt.ylabel("Loss/Accuracy")
class_weight='balanced', n_estimators=50) model.fit(x_train, y_train) # add predictions to dataset df['PREDICTIONS'] = model.predict(df['FEATURES'].values.tolist()) # train LSTM model max_features = len(word_to_index) maxlen = len(features[0]) batch_size = 32 model = Sequential() model.add(Embedding(max_features, 128)) model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2)) model.add(Dense(1, activation='sigmoid')) # try using different optimizers and different optimizer configs x_train, x_test, y_train, y_test = np.array(x_train), np.array( x_test), np.array(y_train), np.array(y_test) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, epochs=1, validation_data=(x_test, y_test)) score, acc = model.evaluate(x_test, y_test, batch_size=batch_size) print('Test score:', score) print('Test accuracy:', acc)
y_proba=model.predict_proba(x_test) f1_scor=f1_score_(y_proba,y_test) # LSTM model embed_dim = 128 lstm_out = 196 model = Sequential() model.add(Embedding(max_fatures, embed_dim,input_length = X_train.shape[1])) model.add(SpatialDropout1D(0.4)) model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2)) model.add(Dense(2,activation='softmax')) model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy']) print(model.summary()) batch_size = 32 model.fit(X_train, Y_train, epochs = 7, batch_size=batch_size, verbose = 2) score,acc = model.evaluate(X_test, Y_test, verbose = 2, batch_size = batch_size) print("score: %.2f" % (score)) print("acc: %.2f" % (acc))
from keras.models import Sequential from keras.layers import Dense, Activation from keras.optimizers import SGD K.clear_session() # clear model from memory model = Sequential() model.add(Dense(1, input_shape=(4, ), activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy']) # train model history = model.fit(X_train, y_train, epochs=10) # record history of training progress result = model.evaluate(X_test, y_test) # visualize the training process historydf = pd.DataFrame(history.history, index=history.epoch) historydf.plot(ylim=(0, 1)) plt.title("Test accuracy: {:3.1f} %".format(result[1] * 100), fontsize=15) # =================================== # manually tune learning rate # =================================== dflist = [] learning_rates = [0.01, 0.05, 0.1, 0.5] for lr in learning_rates:
model = Sequential() model.add(Dense(1, input_shape=(4, ), activation='sigmoid')) model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy']) history = model.fit(X_train, y_train, epochs=30, verbose=2, validation_split=0.1) #Evaluate gives the accuracy and loss, while the model.predict gives the prediction that is basically the output for the given input #model.evaluate gives the loss and accuracy for 0 and 1 index respectively result = model.evaluate(X_test, y_test) history = pd.DataFrame(history.history, index=history.epoch) history.plot(ylim=(0, 1)) plt.title('the accuracy for the test set is {:.3f}'.format(result[1] * 100), fontsize=15) dflist = [] learning_rates = [0.01, 0.05, 0.1, 0.5] for lr in learning_rates: K.clear_session() model = Sequential() model.add(Dense(1, input_shape=(4, ), activation='sigmoid')) model.compile(Adam(lr=lr),
def train(alg_str, tempx_train, tempy_train, tempx_val, tempy_val, x_test, y_test): #print("Algorithm:", alg_str) #print("tempx_train shape:", tempx_train.shape) #print("tempy_train shape:", tempy_train.shape) if (alg_str != "cnn" and alg_str != "dnn"): if (alg_str == "knn"): kVals = range(1, 5) accuracies_knn = [] # loop over various values of `k` for the k-Nearest Neighbor classifier for k in range(1, 5): # train the k-Nearest Neighbor classifier with the current value of `k` knnmodel = KNeighborsClassifier(n_neighbors=k) knnmodel.fit(tempx_train, tempy_train) score = knnmodel.score(tempx_val, tempy_val) print("k=%d, accuracy=%.2f%%" % (k, score * 100)) accuracies_knn.append(score) # find the value of k that has the largest accuracy j = int(np.argmax(accuracies_knn)) print( "k=%d achieved highest accuracy of %.2f%% on validation data" % (kVals[j], accuracies_knn[j] * 100)) knnmodel = KNeighborsClassifier(n_neighbors=kVals[j]) model = knnmodel elif (alg_str == "rf"): kVals = [100] accuracies_rf = [] # loop over various values of `k` for the Random Forest classifier for k in [100]: # train the Random Forest classifier with the current value of `k` model = RandomForestClassifier(n_estimators=k) model.fit(tempx_train, tempy_train) score = model.score( tempx_val, tempy_val ) #is this right? Using val to find best # of trees? print("k=%d, accuracy=%.2f%%" % (k, score * 100)) accuracies_rf.append(score) # find the value of k that has the largest accuracy j = int(np.argmax(accuracies_rf)) print( "k=%d achieved highest accuracy of %.2f%% on validation data" % (kVals[j], accuracies_rf[j] * 100)) rfmodel = RandomForestClassifier(n_estimators=kVals[j]) model = rfmodel else: #svm or lin case model = skmodel_dict[alg_str] model.fit(tempx_train, tempy_train) predictions = model.predict(x_test) acc = accuracy_score(y_test, predictions) print(alg_str + " accuracy: ", acc) #recording the variance variance.append([i, acc]) #format: [[5,0.123],[5,0.135],[5,n], etc.] return round(acc, 4) else: #cnn or dnn case if (alg_str == "cnn"): #callbacks filepath = 'cnnbestweights.hdf5' callbacks = [ EarlyStopping(monitor='val_loss', patience=20, verbose=1), ModelCheckpoint(filepath, monitor='val_loss', save_best_only=True, verbose=1), ] model = cnnmodel() if (alg_str == "dnn"): #callbacks filepath = 'dnnbestweights.hdf5' callbacks = [ EarlyStopping(monitor='val_loss', patience=20, verbose=1), ModelCheckpoint(filepath, monitor='val_loss', save_best_only=True, verbose=1), ] model = dnnmodel() #fitting the model model.fit(tempx_train, tempy_train, batch_size=batch_size, epochs=epochs, verbose=1, callbacks=callbacks, validation_data=(tempx_val, tempy_val)) #testing the model on the testing set testscore = model.evaluate(x_test, y_test, verbose=0) print("testscore: ", testscore[1]) #recording the variance variance.append([i, testscore[1] ]) #format: [[5,0.123],[5,0.135],[5,n], etc.] return round(testscore[1], 4)