class my_classification_model: def __init__(self, model_name, no_class): self.model_name = model_name self.no_class = no_class if model_name == 'lr': self.model = LogisticRegression() if model_name == 'sv': self.model = SVC() if model_name == 'rf': self.model = RandomForestClassifier() if model_name == 'dnn1': self.model = keras.Sequential([ keras.layers.Dense(128, activation='relu'), keras.layers.Dense(self.no_class, activation='softmax') ]) self.model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) if model_name == 'dnn2': self.model = keras.Sequential([ keras.layers.Dense(256, activation='relu'), keras.layers.Dense(128, activation='relu'), keras.layers.Dense(self.no_class, activation='softmax') ]) self.model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) def fit(self, X_train, y_train): if model_name in ['lr', 'sv', 'rf']: self.model.fit(X_train, y_train) if model_name in ['dnn1', 'dnn2']: self.model.fit(X_train, y_train, epochs=150, verbose=0) def predict(self, X_test): if model_name in ['lr', 'sv', 'rf']: return self.model.predict(X_test) if model_name in ['dnn1', 'dnn2']: return np.argmax(self.model.predict(X_test), axis=1) def predict_s(self, X_test): if model_name == 'sv': return self.model.predict(X_test) if model_name in ['lr', 'rf']: return self.model.predict_proba(X_test) @ np.arange(self.no_class) if model_name in ['dnn1', 'dnn2']: return self.model.predict(X_test) @ np.arange(self.no_class)
def main(): width = 128 height = 128 depth = 3 classes = 2 NUM_EPOCHS = 50 #initialize the optimizer and model opt = tf.keras.optimizers.SGD(lr=0.01) project_dir = "deepfake-detection-challenge" #train_metadata, train_videos, labels, originals = load_json(project_dir) train_sub_dir = "/train_sample_videos/" dest_train_1 = '/train_1/' #break_to_frames_train(project_dir, train_videos, labels, width, height) #test_video_names, test_videos = load_test_videos(project_dir) test_sub_dir = "/test_videos/" dest_test_1 = '/test_1/' #break_to_frames_test(project_dir, test_videos, width, height) train_new_csv = make_dataframe_train(project_dir) test_new_csv = make_dataframe_test(project_dir) train_new_csv = '/train_new.csv' X_train, y_train, X_test, y_test, train, y_train_original, y_test_original = get_Xy(project_dir,train_new_csv, width, height, depth) #Normalization X_train = X_train.astype("float")/ 255.0 X_test = X_test.astype ("float")/ 255.0 #One hot encode y choice = 4 if choice == 1: #not working base_model = vgg16Model(X_train, X_test, width, height, depth, classes) # checkpointing to save the weights of best model mcp_save = tf.keras.callbacks.ModelCheckpoint('weight.hdf5', save_best_only=True, monitor='val_loss', mode='min') # compiling the model base_model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy']) # training the model H = base_model.fit(X_train, y_train, epochs=50, validation_data=(X_test, y_test), callbacks=[mcp_save], batch_size=128) print ("Base Model - Test Data Loss and Accuracy: ", model.evaluate(X_test, y_test)) print("Final Plot ") plotAccLoss(H, NUM_EPOCHS) if choice == 2: # Feature Extraction and Usage of Secondary Model vggModel = tf.keras.applications.VGG16(weights='imagenet', include_top=False, input_shape=(width, height, depth)) print(vggModel.summary()) X_train_new = vggModel.predict(X_train) X_train_new = X_train_new.reshape(X_train_new.shape[0], -1) X_val_new = vggModel.predict(X_test) X_val_new = X_val_new.reshape(X_val_new.shape[0], -1) secondary_model = 'random_forest' if (secondary_model == 'random_forest'): print("Secondary Model - Random Forest ") model = RandomForestClassifier(200) model.fit(X_train_new, y_train) # evaluate the model results = model.predict(X_val_new) print ("Random Forest Accuracy ", metrics.accuracy_score(results, y_test)) if(secondary_model == 'naive_bayes'): print("Secondary Model - Using Naive Bayes") nBayes = GaussianNB() nBayes = nBayes.fit( X_train_new , y_train) accuracy = nBayes.score(X_val_new, y_test) print ("Naive Bayes Accuracy ", accuracy) if choice == 3: # not working # FineTuning inceptionV3Model= tf.keras.applications.InceptionV3(weights = 'imagenet',include_top =False, input_shape =(width, height,depth)) inceptionV3Model.trainable = False model =tf.keras.models.Sequential() model.add (inceptionV3Model) model.add(tf.keras.layers.Flatten()) model.add(tf.keras.layers.Dropout (0.5)) model.add(tf.keras.layers.Dense (256, 'relu')) model.add(tf.keras.layers.Dense (classes, activation='sigmoid')) print (model.summary) NUM_EPOCHS =50 opt = tf.keras.optimizers.SGD(lr=0.001) model.compile(loss="sparse_categorical_crossentropy", optimizer=opt,metrics=["accuracy"]) H = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test)) plotAccLoss(H, NUM_EPOCHS) print ("\n Phase B - Fine Tune Fully Connected Layer and Selected Convolutional Layers \n") inceptionV3Model.trainable = True trainableFlag = False for layer in inceptionV3Model.layers: if layer.name == 'block4_conv1': trainableFlag = True layer.trainable = trainableFlag opt = tf.keras.optimizers.SGD(lr=0.00001) model.compile(loss="sparse_categorical_crossentropy", optimizer=opt,metrics=["accuracy"]) print (model.summary) H = model.fit(trainX, trainY, epochs=NUM_EPOCHS, batch_size=32, validation_data=(testX, testY)) print("Final Plot ") plotAccLoss(H, NUM_EPOCHS) if choice == 4: # works # Feature Extraction and Usage of Secondary Model inceptionV3Model= tf.keras.applications.InceptionV3(weights = 'imagenet',include_top =False, input_shape =(width, height,depth)) inceptionV3Model.trainable = False print(inceptionV3Model.summary()) X_train_new = inceptionV3Model.predict(X_train) X_train_new = X_train_new.reshape(X_train_new.shape[0], -1) X_val_new = inceptionV3Model.predict(X_test) X_val_new = X_val_new.reshape(X_val_new.shape[0], -1) secondary_model = 'random_forest' if(secondary_model == 'random_forest'): print("Secondary Model - Random Forest ") model = RandomForestClassifier(200) model.fit(X_train_new, y_train) # evaluate the model accuracy = evaluate(model, X_val_new, y_test) # results = model.predict(X_val_new) # print ("Random Forest Accuracy ", metrics.accuracy_score(results, y_test)) print("Random Forest Accuracy ", accuracy) if(secondary_model == 'naive_bayes'): print("Secondary Model - Using Naive Bayes") nBayes = GaussianNB() nBayes = nBayes.fit( X_train_new , y_train) accuracy = nBayes.score(X_val_new, y_test) print ("Naive Bayes Accuracy ", accuracy) if choice == 41: # works # Feature Extraction and Usage of Secondary Model inceptionV3Model= tf.keras.applications.InceptionV3(weights = 'imagenet',include_top =False, input_shape =(width, height,depth)) inceptionV3Model.trainable = False print(inceptionV3Model.summary()) X_train_new = inceptionV3Model.predict(X_train) X_train_new = X_train_new.reshape(X_train_new.shape[0], -1) X_val_new = inceptionV3Model.predict(X_test) print("X_val_new b4 reshaping ", X_val_new) X_val_new = X_val_new.reshape(X_val_new.shape[0], -1) secondary_model = 'random_forest' if(secondary_model == 'random_forest'): print("Secondary Model - Random Forest ") model = RandomForestClassifier(200) model.fit(X_train_new, y_train) # evaluate the model predY = model.predict(X_val_new) #accuracy on the images print ("Images - Random Forest Accuracy ", metrics.accuracy_score(predY, y_test)) #name of the video , label in X_val_new #collect all the images/group by all iamges with same irst name and count probability , if out of 11 frames atleast 3 are fake, then video is fake` # storing the images and their class in a dataframe # print("train.head() ", train.head(), train.shape ) # print("y_test ", y_test, y_test.shape ) # print("predY ", predY, predY.shape ) # print("predY[:,0] ", predY[:,0]) #this a series # print("X_val_new ", X_val_new,X_val_new.shape ) # pred_data_frame = train.copy(deep=True) # video_names = [] # image_names = train['image'] # for i in range(len(image_names)): # #get the video name from the frame e.g. aagfhgtpmv.mp4_frame0.jpg # video_names.append(image_names[i].split("_")[0]) # pred_data_frame['video'] = video_names # print("pred_data_frame.head() ", pred_data_frame.head()) # pred_data_frame['pred_image_fake'] = predY[:,0] # pred_data_frame['pred_image_real'] = predY[:,1] # pred_video_label1 = [] # # #sort the df based on video names # # pred_data_frame = pred_data_frame.sort_values(by=['video']) # pred_video_label = pred_data_frame.groupby(['video'])['pred_image_label'].count() # print(pred_video_label.head()) # print ("Video Classification Accuracy ", metrics.accuracy_score(predY, y_test)) # if(secondary_model == 'naive_bayes'): # print("Secondary Model - Using Naive Bayes") # nBayes = GaussianNB() # nBayes = nBayes.fit( X_train_new , y_train) # accuracy = nBayes.score(X_val_new, y_test) # print ("Naive Bayes Accuracy ", accuracy) if choice == 5: #lstm model = Sequential() model.add(LSTM(256,dropout=0.2,input_shape=(train_data.shape[1],train_data.shape[2]))) model.add(Dense(1024, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(5, activation='softmax')) sgd = SGD(lr=0.00005, decay = 1e-6, momentum=0.9, nesterov=True) model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) #model.load_weights('video_1_LSTM_1_512.h5') callbacks = [ EarlyStopping(monitor='val_loss', patience=10, verbose=0), ModelCheckpoint('video_1_LSTM_1_1024.h5', monitor='val_loss', save_best_only=True, verbose=0) ] nb_epoch = 500 model.fit(train_data,train_labels,validation_data=(validation_data,validation_labels),batch_size=batch_size,nb_epoch=nb_epoch,callbacks=callbacks,shuffle=True,verbose=1) return model if choice ==6: #ensemble vggModel= tf.keras.applications.VGG16 (weights = 'imagenet',include_top =False, input_shape =(128, 128,3)) model1 = tf.keras.models.Sequential() model1.add (vggModel) model1.add(tf.keras.layers.Flatten()) model1.add(tf.keras.layers.Dropout (0.5)) model1.add(tf.keras.layers.Dense (256, 'relu')) model1.add(tf.keras.layers.Dense (17, activation='softmax')) inceptionv3model= tf.keras.applications.InceptionV3(weights = 'imagenet',include_top =False, input_shape =(128, 128,3)) model2 = tf.keras.models.Sequential() model2.add(inceptionv3model) model2.add(tf.keras.layers.Flatten()) model2.add(tf.keras.layers.Dropout (0.5)) model2.add(tf.keras.layers.Dense (256, 'relu')) model2.add(tf.keras.layers.Dense (17, activation='softmax')) model_name = 'knn' if(model_name == 'randomforest'): model = RandomForestClassifier(200) model.fit(featuresTrain, trainY) # evaluate the model results = model.predict(featuresVal) print (metrics.accuracy_score(results, testY)) if(model_name == 'knn'): print("using knn") knn = KNeighborsClassifier(n_neighbors=3) knn.fit(featuresTrain, trainY) knn.predict(featuresVal) results = knn.predict(featuresVal) print (metrics.accuracy_score(results, testY)) if(model_name == 'naive_bayes'): print("Using Naive Bayes") nBayes = GaussianNB() nBayes = nBayes.fit( featuresTrain , trainY) accuracy = nBayes.score(featuresVal, testY) print ("Naive Bayes Accuracy ", accuracy) if(model_name == 'svm'): print("Using SVM") svc = SVC(gamma='auto') svc = svc.fit(featuresTrain, trainY) # accuracy = svc.score(test_features, test_labels) accuracy = evaluate(svc, featuresVal, testY) print ("SVM Accuracy ", accuracy) # resnet50model = tf.keras.applications.resnet50(weights = 'imagenet',include_top =False, input_shape =(128, 128,3)) # model3 = tf.keras.models.Sequential() # model3.add(resnet50model) # model3.add(tf.keras.layers.Flatten()) # model3.add(tf.keras.layers.Dropout (0.5)) # model3.add(tf.keras.layers.Dense (256, 'relu')) # model3.add(tf.keras.layers.Dense (17, activation='softmax')) # Find the probabilities of all 17 classes in each instance of test data - should be 340 *17 predicted_vals1 = model1.predict(testX) print("predicted_vals1 shape ", predicted_vals1.shape ) print("predicted_vals1 ", predicted_vals1 ) predicted_vals2 = model2.predict(testX) print("predicted_vals2 shape ", predicted_vals2.shape ) print("predicted_vals2 ", predicted_vals2 ) # predicted_vals3 = model3.predict(testX) # print("predicted_vals3 shape ", predicted_vals3.shape ) # print("predicted_vals3 ", predicted_vals3 ) # element wise addition will help, as we want to add probabilities of each class for each image. Then takke average, # as I am using 3 models so 1/3 is multipled to the sum predY_sum = predicted_vals1+ predicted_vals2 element_wise_sum_avg = predY_sum * (1/2) # Now doing np.argmax predY = np.argmax(element_wise_sum_avg, axis =1) print("predY ", predY) print("Checking shapes of testY and predY ", testY.shape, " ", predY.shape) accuracy = accuracy_score(testY, predY) print(accuracy) if choice == 7: resnet101model = tf.keras.applications.ResNet101(weights='imagenet', include_top=False, input_shape=(128, 128, 3)) print(resnet101model.summary()) featuresTrain = resnet101model.predict(trainX) featuresTrain = featuresTrain.reshape(featuresTrain.shape[0], -1) featuresVal = resnet101model.predict(testX) featuresVal = featuresVal.reshape(featuresVal.shape[0], -1)
print(padded_test) #%% #RandomForest model fitting model = RandomForestClassifier(n_estimators=100) model.fit(padded_train, y_train) #%% y_pred = model.predict(padded_test) acc = accuracy_score(y_pred, y_test) print(acc * 100, "%") # %% #%% vocab_size = 50_000 one_hots = [one_hot(word, vocab_size) for word in X_train] print(one_hots) # %% padded = pad_sequences(one_hots, padding='post', maxlen=5) print(padded) # %% model = Sequential() model.add(Embedding(vocab_size, 50)) model.compile("adam", "mse") # %% predict = model.predict(padded) # %% predict.shape # %% # %%
def main (): print("......... Welcome to SBA Loan Data Analysis ....... ") print() ip1 = int(input("What you want to do : \n 1) Prediction \n or \n 2) Analyze the data..?? \n\n ")) if ip1 == 1: print("Menu :\n \ 1)Random forest \n \ 2)Decision Tree \n \ 3)Naives Bayes \n \ 4)SVM \n \ 5)XG Boost \n \ 6)KNN \n \ 7)Keras Neural Network ") ip2 = int(input("Enter a value from Above Menu : ")) # Importing the libraries import numpy as np import pandas as pd # Importing the dataset D7aFY1991_FY1999 = pd.read_csv('D:/CDAC_DATA\CDAC_PROJECT/15-1-MachineL/7aFY1991_FY1999_1.csv') D7aFY2000_FY2009 = pd.read_csv('D:/CDAC_DATA\CDAC_PROJECT/15-1-MachineL/7aFY2000_FY2009_1.csv') D7aFY2010_Present = pd.read_csv('D:/CDAC_DATA\CDAC_PROJECT/15-1-MachineL/7aFY2010_Present_1.csv') #merge same dataframes Data_7a = D7aFY1991_FY1999.append(D7aFY2000_FY2009) Data_7a = Data_7a.append(D7aFY2010_Present) #create sample data #Data_sample_7a = Data_7a.sample(frac = 0.1,random_state = 0) Data_sample_7a = Data_7a Data_sample_7a = Data_sample_7a.iloc[:,[4,6,9,11,12,14,16,17,18,19,20,24,25,26,28,29]].values #convert into pandas dataframe Data_sample_7a = pd.DataFrame(data=Data_sample_7a) # Taking care of missing data from sklearn.preprocessing import Imputer imputer = Imputer(missing_values = 'NaN', strategy = 'most_frequent', axis = 0) imputer = imputer.fit(Data_sample_7a.iloc[:,10:11]) Data_sample_7a.iloc[:,10:11] = imputer.transform(Data_sample_7a.iloc[:,10:11]) from sklearn.preprocessing import Imputer imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0) imputer = imputer.fit(Data_sample_7a.iloc[:,8:9]) Data_sample_7a.iloc[:,8:9] = imputer.transform(Data_sample_7a.iloc[:,8:9]) Data_sample_7a = Data_sample_7a.dropna() #slpit data columns into dependent and independent variables X7a = Data_sample_7a.iloc[:,0:14].values #independent y7a = Data_sample_7a.iloc[:,[15]].values #dependent # ============================================================================= #convert numpy objects to pandas dataframes pd_X7a = pd.DataFrame(data=X7a[0:,0:]) pd_y7a = pd.DataFrame(data=y7a[0:,0:]) # ============================================================================= #encoding categorical data in independent variable pd_X7a=np.asarray(pd_X7a)#convert pandas dataframe into numpy array pd_X7a = encoder(pd_X7a) #getuser data and encode Userdata = getdata(pd_X7a) Userdata=np.asarray(Userdata)#convert pandas dataframe into numpy array Userdata = encoder(Userdata) #encoding dependent variable pd_y7a[0] = pd_y7a[0].replace(['PIF'],'0') pd_y7a[0] = pd_y7a[0].replace(['CANCLD','EXEMPT','CHGOFF','COMMIT'],'1') pd_y7a=np.asarray(pd_y7a)#convert pandas dataframe into numpy array # Splitting the dataset into the Training set and Test set from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(pd_X7a, pd_y7a, test_size = 0.25, random_state = 0) # Feature Scaling of accuracy data (X_train1, X_test) = scalingFunction(X_train,X_test) # Feature Scaling of user data from sklearn.preprocessing import StandardScaler sc = StandardScaler() Userdata = sc.fit_transform(Userdata) if ip2 == 1: # Fitting Random Forest Classification to the Training set from sklearn.ensemble import RandomForestClassifier classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0) classifier.fit(X_train1, y_train) # Predicting the Test set results y_pred = classifier.predict(X_test) y_pred = pd.DataFrame(data=y_pred[0:]) #converting to data frame # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) print("Confusion Matrix : \n") print(cm) print("Accuracy rate is : \n ") print((cm[0,0]+cm[1,1])/(len(y_pred))) # Predicting the user set results user_pred = classifier.predict(Userdata) converter(user_pred) print("The Loan will be : ") print(user_pred) elif ip2 == 2: # Fitting Decision Tree Classification to the Training set from sklearn.tree import DecisionTreeClassifier classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0) classifier.fit(X_train, y_train) # Predicting the Test set results y_pred = classifier.predict(X_test) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) print("Confusion Matrix : \n") print(cm) print("Accuracy rate is : \n ") print((cm[0,0]+cm[1,1])/(len(y_pred))) # Predicting the user set results user_pred = classifier.predict(Userdata) converter(user_pred) print("The Loan will be : ") print(user_pred) elif ip2 == 3: # Fitting Naive Bayes to the Training set from sklearn.naive_bayes import GaussianNB classifier = GaussianNB() classifier.fit(X_train, y_train) # Predicting the Test set results y_pred = classifier.predict(X_test) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) print("Confusion Matrix : \n") print(cm) print("Accuracy rate is : \n ") print((cm[0,0]+cm[1,1])/(len(y_pred))) # Predicting the user set results user_pred = classifier.predict(Userdata) converter(user_pred) print("The Loan will be : ") print(user_pred) elif ip2 == 4: # Fitting SVM to the Training set from sklearn.svm import SVC classifier = SVC(kernel = 'linear', random_state = 0) classifier.fit(X_train, y_train) # Predicting the Test set results y_pred = classifier.predict(X_test) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) print("Confusion Matrix : \n") print(cm) print("Accuracy rate is : \n ") print((cm[0,0]+cm[1,1])/(len(y_pred))) # Predicting the user set results user_pred = classifier.predict(Userdata) converter(user_pred) print("The Loan will be : ") print(user_pred) elif ip2 == 5: # Fitting XGBoost to the Training set from xgboost import XGBClassifier classifier = XGBClassifier() classifier.fit(X_train, y_train) # Predicting the Test set results y_pred = classifier.predict(X_test) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) print("Confusion Matrix : \n") print(cm) print("Accuracy rate is : \n ") print((cm[0,0]+cm[1,1])/(len(y_pred))) # Predicting the user set results user_pred = classifier.predict(Userdata) converter(user_pred) print("The Loan will be : ") print(user_pred) elif ip2 == 6: # Fitting K-NN to the Training set from sklearn.neighbors import KNeighborsClassifier classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2) classifier.fit(X_train, y_train) # Predicting the Test set results y_pred = classifier.predict(X_test) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) print("Confusion Matrix : \n") print(cm) print("Accuracy rate is : \n ") print((cm[0,0]+cm[1,1])/(len(y_pred))) # Predicting the user set results user_pred = classifier.predict(Userdata) converter(user_pred) print("The Loan will be : ") print(user_pred) elif ip2 == 7: # Importing the Keras libraries and packages import keras from keras.models import Sequential from keras.layers import Dense # Initialising the ANN classifier = Sequential() # Adding the input layer and the first hidden layer1 classifier.add(Dense(output_dim = 7, init = 'uniform', activation = 'relu', input_dim = 14)) # Adding the second hidden layer classifier.add(Dense(output_dim = 7, init = 'uniform', activation = 'relu')) # Adding the output layer classifier.add(Dense(output_dim = 1, init = 'uniform', activation = 'sigmoid')) # Compiling the ANN classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy']) # Fitting the ANN to the Training set classifier.fit(X_train, y_train, batch_size = 10, nb_epoch = 100) # Part 3 - Making the predictions and evaluating the model # Predicting the Test set results y_pred = classifier.predict(X_test) y_pred = (y_pred > 0.5) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) print("Confusion Matrix : \n") print(cm) print("Accuracy rate is : \n ") print((cm[0,0]+cm[1,1])/(len(y_pred))) # Predicting the Test set results user_pred = classifier.predict(Userdata) user_pred = (user_pred > 0.5) converter(user_pred) print("The Loan will be : ") print(user_pred) elif ip1 == 2: print("Menu : \n \ 1)Business wise JobsSupported \n \ 2)Compare:Gross Aproval Vs SBA Aproval \n \ 3)DistOffice wise SBAapproval \n \ 4)GrossApproval Per LoanStatus \n \ 5)GrossApproval Per DeliveryMethod \n \ 6)JobsSupported per LoanStatus \n \ 7)SBAapproval Loan Status \n \ ") ip2 = int(input("Enter a value from Above Menu : ")) from PIL import Image if ip2 == 1: img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\BusinnJobsSupp.png') img.format = "PNG" img.show() elif ip2 == 2: img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\Comp_GrossSBA.png') img.format = "PNG" img.show() elif ip2 == 3: img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\DistOff_wise_SBAappr.png') img.format = "PNG" img.show() elif ip2 == 4: img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\GrossAppLoanSt.png') img.format = "PNG" img.show() elif ip2 == 5: img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\GrossAppr_DeliveryMethod.png') img.format = "PNG" img.show() elif ip2 == 6: img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\JobsSuppLoanSt.png') img.format = "PNG" img.show() elif ip2 == 7: img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\SBAapprLoanSt.png') img.format = "PNG" img.show()
# Adding the second hidden layer #classifier.add(Dense(output_dim = 150, init = 'uniform', activation = 'sigmoid')) # Adding the third hidden layer classifier.add(Dense(output_dim=80, init='uniform', activation='relu')) # Adding the fourth hidden layer classifier.add(Dense(output_dim=12, init='uniform', activation='sigmoid')) # Adding the output layer classifier.add(Dense(output_dim=1, init='uniform', activation='sigmoid')) # Compiling the ANN classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # Fitting the ANN to the Training set classifier.fit(X_train, y_train, batch_size=10, nb_epoch=100) # Part 3 - Making the predictions and evaluating the model # Predicting the Test set results y_pred = classifier.predict(X_test) y_pred = (y_pred > 0.5) # Making the Confusion Matrix cmANN = confusion_matrix(y_test, y_pred) print('\n Confusion Matrix using ANN')
from keras.layers import Dense model = RandomForestClassifier() cross_val_score(model, X, y_true) X_train, X_test, y_train, y_test = train_test_split(X, y_true, test_size=0.3, random_state=42) import keras.backend as K model = Sequential() model.add(Dense(1, input_shape=(4, ), activation='sigmoid')) model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy']) history = model.fit(X_train, y_train, epochs=30, verbose=2, validation_split=0.1) #Evaluate gives the accuracy and loss, while the model.predict gives the prediction that is basically the output for the given input #model.evaluate gives the loss and accuracy for 0 and 1 index respectively result = model.evaluate(X_test, y_test) history = pd.DataFrame(history.history, index=history.epoch) history.plot(ylim=(0, 1)) plt.title('the accuracy for the test set is {:.3f}'.format(result[1] * 100),
# %tensorflow_version 2.x # If you wish to use Tensorflow 1.X run the following line and then restart runtime # %tensorflow_version 1.x # You'll need to change your import statements from tensorflow.keras to keras import tensorflow.keras from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense model = Sequential() model.add(Dense(18, kernel_initializer = "uniform", activation = "relu", input_dim=16)) model.add(Dense(1, kernel_initializer = "uniform", activation = "sigmoid")) model.compile(optimizer= "adam",loss = "binary_crossentropy",metrics = ["accuracy"]) # Display Model Summary and Show Parameters model.summary() # Start Training Our Classifier batch_size = 10 epochs = 50 history = model.fit(X_train, y_train, batch_size = batch_size, epochs = epochs, verbose = 1, )
def get_classifier(clf, input_shape=None): """ This function returns a classifier object defined by clf INPUTS: @str : a string indicating which classifier will be used @input_shape: in the case that a CNN model will be trained, the input shape (it is necessary to build the model) OUTPUT @classifier : a classifier object """ if clf == 'SVM': classifier = svm.SVC(C=1e5, kernel='rbf', class_weight="balanced") elif clf == 'log_reg': classifier = linear_model.LogisticRegression(C=1e5, class_weight="balanced") elif clf == 'rf': classifier = RandomForestClassifier(n_estimators=50, max_depth=10, class_weight="balanced") elif clf == 'boost': classifier = AdaBoostClassifier() elif clf == 'cnn': # CNN network to classify patches input_img = Input(shape=input_shape) x = input_img x = Convolution2D(32, 3, 3, border_mode='same')(x) x = LeakyReLU()(x) x = Convolution2D(32, 3, 3, border_mode='same')(x) x = LeakyReLU()(x) x = Convolution2D(32, 3, 3, border_mode='same')(x) x = LeakyReLU()(x) x = MaxPooling2D(pool_size=(2, 2))(x) x = Convolution2D(64, 3, 3, border_mode='same')(x) x = LeakyReLU()(x) x = Convolution2D(64, 3, 3, border_mode='same')(x) x = LeakyReLU()(x) x = MaxPooling2D(pool_size=(2, 2))(x) x = Convolution2D(128, 3, 3, border_mode='same')(x) x = LeakyReLU()(x) x = Convolution2D(128, 3, 3, border_mode='same')(x) x = LeakyReLU()(x) x = MaxPooling2D(pool_size=(2, 2))(x) f = x x = Flatten()(x) x = Dense(16)(x) x = LeakyReLU()(x) x = Dropout(0.5)(x) x = Dense(2)(x) o = Activation('softmax')(x) # model train classifier = Model(input_img, o) classifier.summary() classifier.compile(loss='categorical_crossentropy', optimizer='Adadelta', metrics=[fmeasure]) elif clf == 'Unet': # unet network, https://github.com/jocicmarko/ultrasound-nerve-segmentation inputs = Input(input_shape) conv1 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(inputs) conv1 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(conv1) pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) conv2 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(pool1) conv2 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(conv2) pool2 = MaxPooling2D(pool_size=(2, 2))(conv2) conv3 = Convolution2D(128, 3, 3, activation='relu', border_mode='same')(pool2) conv3 = Convolution2D(128, 3, 3, activation='relu', border_mode='same')(conv3) pool3 = MaxPooling2D(pool_size=(2, 2))(conv3) conv4 = Convolution2D(256, 3, 3, activation='relu', border_mode='same')(pool3) conv4 = Convolution2D(256, 3, 3, activation='relu', border_mode='same')(conv4) pool4 = MaxPooling2D(pool_size=(2, 2))(conv4) conv5 = Convolution2D(512, 3, 3, activation='relu', border_mode='same')(pool4) conv5 = Convolution2D(512, 3, 3, activation='relu', border_mode='same')(conv5) up6 = merge([UpSampling2D(size=(2, 2))(conv5), conv4], mode='concat', concat_axis=1) conv6 = Convolution2D(256, 3, 3, activation='relu', border_mode='same')(up6) conv6 = Convolution2D(256, 3, 3, activation='relu', border_mode='same')(conv6) up7 = merge([UpSampling2D(size=(2, 2))(conv6), conv3], mode='concat', concat_axis=1) conv7 = Convolution2D(128, 3, 3, activation='relu', border_mode='same')(up7) conv7 = Convolution2D(128, 3, 3, activation='relu', border_mode='same')(conv7) up8 = merge([UpSampling2D(size=(2, 2))(conv7), conv2], mode='concat', concat_axis=1) conv8 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(up8) conv8 = Convolution2D(64, 3, 3, activation='relu', border_mode='same')(conv8) up9 = merge([UpSampling2D(size=(2, 2))(conv8), conv1], mode='concat', concat_axis=1) conv9 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(up9) conv9 = Convolution2D(32, 3, 3, activation='relu', border_mode='same')(conv9) conv10 = Convolution2D(1, 1, 1, activation='sigmoid')(conv9) classifier = Model(input=inputs, output=conv10) classifier.summary() classifier.compile(optimizer=Adam(lr=1e-3), loss='binary_crossentropy', metrics=[fmeasure]) else: sys.exit("The classifier you chose is not implemented") return classifier
if args.classifier == "random_forest": final_model = RandomForestClassifier(n_estimators=100, max_depth=32, random_state=0, n_jobs=-1, verbose=True) elif args.classifier == "logistic_regression_keras": classes = 26 final_model = Sequential() final_model.add( Dense(classes, activation='softmax', kernel_regularizer=regularizers.l1(0.0000001), input_shape=(293, ))) final_model.compile(optimizer=optimizers.adam(lr=0.01), loss='categorical_crossentropy', metrics=['accuracy']) final_model.fit(final_X, to_categorical(final_Y), epochs=100, batch_size=32) elif args.classifier == "logistic_regression_scikit": final_model = LogisticRegression(penalty='l1', C=1000, multi_class="multinomial", solver="saga", max_iter=100, verbose=True, n_jobs=-1) final_model.fit(final_X, final_Y)
# allAlgorithms = all_estimators(type_filter='regressor') # print(allAlgorithms) # print(len(allAlgorithms)) # print(type(allAlgorithms)) # for (name, algorithm) in allAlgorithms: # model = algorithm() # model.fit(x_train, y_train) # y_pred = model.predict(x_test) # print(name, "의 loss = ", r2_score(y_test, y_pred)) model = RandomForestClassifier() model.fit(x1_train, y1_train) ''' # 5. 모델 훈련 from keras.callbacks import EarlyStopping, TensorBoard # td_hist = TensorBoard(log_dir='./graph', # histogram_freq=0, # write_graph=True, # write_images=True) early_stopping = EarlyStopping(monitor='loss', patience=60, mode='auto') model.compile(loss='mae', optimizer='adam', metrics=['mse']) # adam=평타는 침. # 이 때문에 아래서 acc가 나온다. model.fit(x1_train_scaled, y1_train, epochs=10, batch_size=10, validation_split=0.2,
index = list(range(len(df))) train_index = random.sample(index0, int(0.8 * len(index0))) + random.sample( index1, int(0.8 * len(index1))) ##test_index is the index of test data随机选出2000个样本作为测试样本 test_index = [] ##train_index is the index of train data for i in index: if i not in train_index: test_index.append(i) print(len(train_index)) model = Sequential() model.add(Dense(output_dim=50, input_dim=len(df[0]), activation='relu')) model.add(Dense(output_dim=20, input_dim=50, activation='relu')) model.add(Dense(output_dim=1, input_dim=20, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam') model.fit(df[train_index], label[train_index], nb_epoch=1000, batch_size=20) pred = model.predict_classes(df[test_index]).reshape(len(test_index)) print(pred) k = 0 for i in range(len(pred)): if pred[i] == label[test_index][i]: k = k + 1 print(k / len(test_index)) #model.save_weights('E:\...\my_model_weights.h5') ''' 神经网络的精度还是比较高的,并且网络的规模不需要太大,太大反而容易过拟合降低精度
def main(): # df = combine_datasets() df = pd.read_csv('./data/combined.csv', index_col=0) # df.fillna(-1, inplace=True) # df = df.drop(df[~df['certificate'].isin(['G', 'PG', 'PG-13', 'R', 'Not Rated'])].index) # df = add_award_points(df) # Data preprocessing/encoding df = df.drop(['movie', 'movie_id', 'synopsis', 'genre'], axis=1) df['popularity'] = 1 / np.array(df['popularity']) * 100 df = pd.get_dummies(df, columns=['certificate']) cols = df.columns.tolist() cols = cols[df.columns.get_loc('oscar_animated') + 1:] + cols[:df.columns.get_loc('oscar_animated') + 1] df = df[cols] df = df.reset_index(drop=True) splitIndex = df.index[df['year'] == 2018][0] df = df.drop(['year'], axis=1) # Splits data into training and testing sets oscarStart = df.columns.get_loc('oscar_best_picture') x = df.iloc[:, :oscarStart].values y = df.iloc[:, oscarStart:].values y[(y > 0) & (y < 1)] = 0.5 # winner is 1, nominee is 0.5, nothing is 0 xTrain, xTest = x[:splitIndex], x[splitIndex:] yTrain, yTest = y[:splitIndex], y[splitIndex:] # Checks how imbalanced the data is unique, counts = np.unique(yTrain, return_counts=True) print(dict(zip(unique, counts))) # Scales inputs to avoid one variable having more weight than another sc = StandardScaler() xTrain = sc.fit_transform(xTrain) xTest = sc.transform(xTest) modelType = 'neuralnetwork' predictCategory = True if modelType == 'randomforest': model = RandomForestClassifier(random_state=21) model.fit(xTrain, yTrain) yPred = model.predict(xTest) p = np.where(yPred == 2) v = np.where(yTest == 2) elif modelType == 'neuralnetwork': if not predictCategory: # One hot encoding for softmax activation function trainTargets = [] for i in yTrain: if 1 in i: trainTargets.append([1, 0, 0]) elif 0.5 in i: trainTargets.append([0, 1, 0]) else: trainTargets.append([0, 0, 1]) yTrain = np.array(trainTargets) testTargets = [] for i in yTest: if 1 in i: testTargets.append([1, 0, 0]) elif 0.5 in i: testTargets.append([0, 1, 0]) else: testTargets.append([0, 0, 1]) yTest = np.array(testTargets) model = Sequential() model.add(Dense(256, input_dim=xTrain.shape[1])) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Dense(3)) model.add(Activation('softmax')) model.compile(optimizer=Adam(lr=0.01), loss='categorical_crossentropy', metrics=['mse']) classWeights = { 0: counts.sum() / counts[2], 1: counts.sum() / counts[1], 2: counts.sum() / counts[0] } model.fit(xTrain, yTrain, epochs=512, batch_size=32, class_weight=classWeights) else: # One hot encoding for softmax activation function trainTargets = [[] for i in range(0, 6)] for i in yTrain: for idx, j in enumerate(i): if j == 1: # winner trainTargets[idx].append([1, 0, 0]) elif j == 0.5: # nominee trainTargets[idx].append([0, 1, 0]) else: # loser/nothing trainTargets[idx].append([0, 0, 1]) yTrain = [np.array(i) for i in trainTargets] testTargets = [[] for i in range(0, 6)] for i in yTest: for idx, j in enumerate(i): if j == 1: # winner testTargets[idx].append([1, 0, 0]) elif j == 0.5: # nominee testTargets[idx].append([0, 1, 0]) else: # loser/nothing testTargets[idx].append([0, 0, 1]) yTest = [np.array(i) for i in testTargets] if os.path.exists('best.h5'): model = load_model('best.h5') else: input = Input(shape=(xTrain.shape[1], )) x = Dense(128, activation='relu')(input) x = BatchNormalization()(x) x = Dropout(0.2)(x) output1 = Dense(3, activation='softmax')(x) output2 = Dense(3, activation='softmax')(x) output3 = Dense(3, activation='softmax')(x) output4 = Dense(3, activation='softmax')(x) output5 = Dense(3, activation='softmax')(x) output6 = Dense(3, activation='softmax')(x) model = Model(inputs=input, outputs=[ output1, output2, output3, output4, output5, output6 ]) model.compile(optimizer=Adam(lr=0.01), loss='categorical_crossentropy') classWeights = { 0: counts.sum() / counts[2], 1: counts.sum() / counts[1], 2: counts.sum() / counts[0] } model.fit(xTrain, yTrain, epochs=512, batch_size=32, class_weight=classWeights) # model.save('best.h5') # Training accuracy (put training data back in) and testing accuracy compute_model_accuracies(predictCategory, '(TRAINING)', model, xTrain, yTrain, splitIndex) compute_model_accuracies(predictCategory, '(TESTING)', model, xTest, yTest, splitIndex)
def tenfoldcrossvalidation(feature_map, id_truth_map, index, id_tweet_map): feature_map = dict(sorted(feature_map.items(), key=operator.itemgetter(1))) tweets = [] truth = [] keys = [] for key, feature in feature_map.iteritems(): tweets.append(feature) truth.append(index[id_truth_map[key]]) keys.append(key) accuracy = 0.0 tp = 0.0 tn = 0.0 fp = 0.0 fn = 0.0 for i in xrange(10): tenth = len(tweets)/10 start = i*tenth end = (i+1)*tenth test_index = xrange(start,end) train_index = [i for i in range(len(tweets)) if i not in test_index] train_tweets = [] train_keys = [] test_tweets = [] test_keys = [] train_truth = [] test_truth = [] for i in xrange(len(tweets)): if i in train_index: train_tweets.append(tweets[i]) train_truth.append(truth[i]) train_keys.append(keys[i]) else: test_tweets.append(tweets[i]) test_truth.append(truth[i]) test_keys.append(keys[i]) new_train_tweets = featureselection(train_tweets, train_tweets, train_truth) new_test_tweets = featureselection(test_tweets, train_tweets, train_truth) if sys.argv[1] == "rbfsvm": print "RBF kernel SVM" clf = svm.SVC(kernel='rbf', C=1000, gamma=0.0001) clf.fit(np.array(new_train_tweets), np.array(train_truth)) test_predicted = clf.predict(np.array(new_test_tweets)) elif sys.argv[1] == "randomforest": # # Using Random forest for classification. print 'Random forest' clf = RandomForestClassifier(n_estimators=10, max_depth=None) clf.fit(np.array(new_train_tweets), np.array(train_truth)) test_predicted = clf.predict(np.array(new_test_tweets)) # getaccuracy(test_predicted, test_truth) elif sys.argv[1] == "linearsvm": # # Using Linear svm for classification. print 'Linear SVM' clf = svm.LinearSVC(random_state=20) clf.fit(np.array(new_train_tweets), np.array(train_truth)) test_predicted = clf.predict(np.array(new_test_tweets)) # print "F.score:" # print(f1_score(test_predicted, test_truth, average="micro")) # print "Accuracy:" # print(accuracy_score(test_predicted, test_truth, normalize="False")) # getaccuracy(test_predicted, test_truth) # elif sys.argv[1] == "polysvm": # print 'Poly SVM' # clf = svm.SVC(kernel='poly') # clf.fit(np.array(new_train_tweets), np.array(train_truth)) # test_predicted = clf.predict(np.array(new_test_tweets)) elif sys.argv[1] == "nn": print 'Neural Network' clf = Sequential() clf.add(Dense(7460, activation='relu')) clf.add(Dense(5000, activation='relu')) clf.add(Dense(2000, activation='relu')) clf.add(Dense(500, activation='relu')) clf.add(Dense(1, activation='softmax')) clf.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) clf.fit(np.array(new_train_tweets), np.array(train_truth), batch_size=64, epochs=10, validation_split=0.1) test_predicted = clf.predict(np.array(new_test_tweets)) print(f1_score(test_predicted, test_truth, average="micro")) elif sys.argv[1]=="xgb": xgb_model = xgb.XGBClassifier(objective="binary:logistic") xgb_model.fit(np.array(new_train_tweets), np.array(train_truth)) test_predicted = xgb_model.predict(np.array(new_test_tweets)) accuracy += getaccuracy(test_predicted, test_truth) tp += gettp(test_predicted, test_truth) tn += gettn(test_predicted, test_truth) fp += getfp(test_predicted, test_truth) fn += getfn(test_predicted, test_truth) if(sys.argv[1]=="nn"): print accuracy # print tp, tn, fp, fn precision = tp/(tp+fp) recall = tp/(tp+fn) print "F-score:" print (2*precision*recall)/(precision + recall) break print accuracy/10.0 # print tp, tn, fp, fn precision = tp/(tp+fp) recall = tp/(tp+fn) print "F-score:" print (2*precision*recall)/(precision + recall)
metrics = Metrics() from sklearn.neural_network import MLPClassifier model = MLPClassifier(hidden_layer_sizes=(200,), max_iter=500, alpha=0.0001, solver='', verbose=10, random_state=0,tol=0.00000001,batch_size=100) """ model.add(Dense(8, input_dim=8, activation='relu')) model.add(Dense(12, activation='relu')) model.add(Dense(6, activation='relu')) model.add(Dense(1, activation='sigmoid')) """ #compile the model """ model.compile(loss='binary_crossentropy', optimizer='adagrad',metrics=['accuracy']) model.summary() """ model.fit(X_train, y_train) y_predict=model.predict(X_test) print(accuracy_score(y_test, y_predict)) #print("Score of Neural Network--->", score[0])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) import keras.backend as K from keras.models import Sequential from keras.layers import Dense, Activation from keras.optimizers import SGD K.clear_session() # clear model from memory model = Sequential() model.add(Dense(1, input_shape=(4, ), activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy']) # train model history = model.fit(X_train, y_train, epochs=10) # record history of training progress result = model.evaluate(X_test, y_test) # visualize the training process historydf = pd.DataFrame(history.history, index=history.epoch) historydf.plot(ylim=(0, 1)) plt.title("Test accuracy: {:3.1f} %".format(result[1] * 100), fontsize=15) # =================================== # manually tune learning rate # ===================================
def algorithm(method_A, OneVsRest, OneVsOne, randomized): print("Selecting algorithm...") print(" ") if method_A == "svm": print("Starting with " + method_A) print(" ") parameters_svm = { 'kernel': ('linear', 'rbf'), 'C': [1, 3, 10, 100], 'gamma': [0.01, 0.001] } model = svm.SVC() model = search_par(randomized, model, parameters_svm) if method_A == "random_forest": print("Starting with " + method_A) print(" ") parameters_random = { "max_depth": [2, 3, None], "max_features": [2, 4, 6], "min_samples_split": [2, 4, 6], "min_samples_leaf": [2, 4, 6], "bootstrap": [True, False], "criterion": ["gini", "entropy"] } model = RandomForestClassifier(n_estimators=100) model = search_par(randomized, model, parameters_random) if method_A == "logistic": print("Starting with " + method_A) print(" ") parameters_logistic = {'C': [100, 1000], 'tol': [0.001, 0.0001]} model = LogisticRegression(solver='lbfgs', multi_class='multinomial') model = search_par(randomized, model, parameters_logistic) if method_A == "neural_networks": print("Starting with " + method_A) print(" ") #model = MLPClassifier() model = Sequential() model.add( Dense(991, input_dim=179, init='normal') ) # number of features of the data +1 node for the bias term. model.add(Activation('relu')) model.add(Dropout(0.2)) model.add( Dense(495, init='normal') ) #In sum, for most problems, one could probably get decent performance (even without a second optimization step) by setting the hidden layer configuration using just two rules: (i) number of hidden layers equals one; and (ii) the number of neurons in that layer is the mean of the neurons in the input and output layers. model.add(Activation('relu')) model.add(Dropout(0.5)) model.add( Dense(99, init='normal') ) # If the NN is a classifier, then it also has a single node unless softmax is used in which case the output layer has one node per class label in your model. model.add(Activation('softmax')) sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) OneVsRest = False OneVsOne = False if OneVsRest: print("Using OneVsRest ") print(" ") return OneVsRestClassifier(model) if OneVsOne: print("Using OneVsOne") print(" ") return OneVsOneClassifier(model) print("Algorithm selected: " + method_A) print(" ") return model
# Add an input layer model.add(Dense(16, activation='relu', input_dim=10)) # Add another input layer model.add(Dense(12, activation='relu')) # Add another input layer model.add(Dense(12, activation='relu')) # Add another input layer model.add(Dense(8, activation='relu')) # Add an output layer model.add(Dense( 9, activation='softmax')) # output 9 correspond to number of predicted class # compile model and run the model model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) model.fit(xx, yy, epochs=100, validation_data=(val_x, pd.get_dummies(val_y).values)) # Also for evaluate, val_y have to change to one-hot encoding dummy variable model.evaluate(val_x, pd.get_dummies(val_y).values) model.summary()
class_weight='balanced', n_estimators=50) model.fit(x_train, y_train) # add predictions to dataset df['PREDICTIONS'] = model.predict(df['FEATURES'].values.tolist()) # train LSTM model max_features = len(word_to_index) maxlen = len(features[0]) batch_size = 32 model = Sequential() model.add(Embedding(max_features, 128)) model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2)) model.add(Dense(1, activation='sigmoid')) # try using different optimizers and different optimizer configs x_train, x_test, y_train, y_test = np.array(x_train), np.array( x_test), np.array(y_train), np.array(y_test) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, epochs=1, validation_data=(x_test, y_test)) score, acc = model.evaluate(x_test, y_test, batch_size=batch_size) print('Test score:', score) print('Test accuracy:', acc)
# model y_prediction = model.predict(X_test) print("\naccuracy", np.sum(y_prediction == df_test['label'].values) / float(len(y_test))) from keras.models import Sequential from keras.layers import Dense, Activation, Dropout start = time() model = Sequential() model.add(Dense(512, input_shape=(784, ))) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Dense(512)) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Dense(10)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) mode.fit(X_train, y_train_onehot) print('\ntime taken %s seconds' % str(time() - start)) y_prediction = model.predict_classes(X_test) print("\n\naccuracy", np.sum(y_prediction == y_test) / float(len(y_test)))
class Vorace_agent: initialState = None classifier = None history = None callbacks_list = None epochs = 40 batch_size = 0 def __init__(self): self.initialState = None self.classifier = None self.history = None def __init__(self, typeP, nClasses, inputLayer=None, batch_size=0, callbacks_list=None, n_classifiers=10): if typeP == 6: typeP = random.randint(0, 5) if typeP == 3: typeP = random.randint(0, 2) self.batch_size = batch_size self.callbacks_list = callbacks_list #print(typeP) if typeP == 0: self.classifier = Vorace_agent.getModel(nClasses, inputLayer) self.classifier = Model(inputLayer, self.classifier) if nClasses == 2: self.classifier.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='adam') else: self.classifier.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam') self.initialState = self.classifier.get_weights() elif typeP == 1: if random.randint(0, 1) == 0: self.classifier = DecisionTreeClassifier( criterion="gini", max_depth=random.randint(5, 25), random_state=0) else: self.classifier = DecisionTreeClassifier( criterion="entropy", max_depth=random.randint(5, 25), random_state=0) self.initialState = clone(self.classifier) elif typeP == 2: A = math.log(pow(2, -5)) B = math.log(pow(2, 5)) c_value = math.exp(random.uniform(A, B)) if random.randint(0, 1) == 0: self.classifier = svm.SVC(kernel='rbf', C=c_value, gamma='auto', probability=True) else: A = 3 B = 5 degree = int(round(random.uniform(A, B))) #print("C: {} DEGREE: {}".format(c_value, degree)) self.classifier = svm.SVC(kernel='poly', degree=degree, C=c_value, gamma='auto', probability=True) self.initialState = clone(self.classifier) elif typeP == 4: value_lists = { 'bootstrap': [True, False], 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, None], 'max_features': ['auto', 'sqrt'], 'min_samples_leaf': [1, 2, 4], 'min_samples_split': [2, 5, 10], 'n_estimators': [10, 20, 50, 100, 200] } #'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]} params = { 'bootstrap': random.choice(value_lists['bootstrap']), 'max_depth': random.choice(value_lists['max_depth']), 'max_features': random.choice(value_lists['max_features']), 'min_samples_leaf': random.choice(value_lists['min_samples_leaf']), 'min_samples_split': random.choice(value_lists['min_samples_split']), 'n_estimators': random.choice(value_lists['n_estimators']), } self.classifier = RandomForestClassifier(**params) self.initialState = clone(self.classifier) elif typeP == 5: self.classifier = xgb.XGBClassifier( max_depth=random.randint(3, 25), n_estimators=n_classifiers, subsambple=random.random(), colsample_bytree=random.random()) self.initialState = clone(self.classifier) def reset(self): #print(type(self.classifier)) if type(self.classifier) == Model: self.classifier.set_weights(self.initialState) else: self.classifier = clone(self.initialState) def fit(self, x, y, y_oneHot=None): if type(self.classifier) == Model: self.history = self.classifier.fit(x, y_oneHot, epochs=self.epochs, batch_size=self.batch_size, shuffle=True, callbacks=self.callbacks_list, verbose=0) self.history = self.history.history['acc'][-1] else: self.classifier.fit(x, y) y_pred = self.classifier.predict(x) self.history = metrics.accuracy_score(y, y_pred) def predict(self, x): if type(self.classifier) == Model: y_pred = self.classifier.predict(x) else: y_pred = self.classifier.predict_proba(x) return y_pred def getModel(nClass, inputLayer, nHLayers=4): n = random.randint(2, nHLayers) nInput = K.int_shape(inputLayer)[1] #A=math.log(nInput) #B=math.log(nInput**2) A = math.log(16) B = math.log(128) #print("A:"+str(A)) #print("B:"+str(B)) activation = ('relu', 'tanh') nNodes = int(round(math.exp(random.uniform(A, B)))) act_fun = random.randint(0, len(activation) - 1) #print("nNodes:"+str(nNodes)) x = Dense(nNodes, activation=activation[act_fun])(inputLayer) #print(K.int_shape(inputLayer)[1]) for i in range(1, n): #nNodes = random.randint(nInput*2,nInput**2) nNodes = int(round(math.exp(random.uniform(A, B)))) #print(nNodes) act_fun = random.randint(0, len(activation) - 1) #print(act_fun) x = Dense(nNodes, activation=activation[act_fun])(x) if nClass == 2: x = Dense(nClass, activation='sigmoid')(x) else: x = Dense(nClass, activation='softmax')(x) return x
# DecisionTree Classifier tree_params = {"criterion": ["gini", "entropy"], "max_depth": list(range(2,4,1)), "min_samples_leaf": list(range(5,7,1))} grid_tree = GridSearchCV(DecisionTreeClassifier(), tree_params) grid_tree.fit(X_train, Y_train) # tree best estimator tree_clf = grid_tree.best_estimator_ model=Sequential() model.add(Dense(128, init="uniform", input_dim=13, activation='relu')) model.add(Dense(64, init ="uniform", activation="relu")) model.add(Dense(1, init="uniform", activation="sigmoid")) model.compile(loss="binary_crossentropy", metrics=['accuracy'], optimizer='adam') model.summary() history=model.fit(X_train,Y_train, epochs=100, batch_size=100) plt.plot(history.history['loss']) #plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.show() model.evaluate(X_test,Y_test)
#Kernel SVM Classifier - RBF - 94% accuracy on test set. Linear - 96% accuracy on test set from sklearn.svm import SVC classifier = SVC(kernel="linear", random_state=0) cm_svm = evaluate_classifier(classifier, X_train, y_train) #Neural Network from keras.layers import Dense from keras.models import Sequential from keras.layers import Dropout classifier = Sequential() classifier.add( Dense(input_dim=100, output_dim=50, activation="relu", init="uniform")) classifier.add(Dropout(p=0.1)) classifier.add(Dense(output_dim=50, activation="relu", init="uniform")) classifier.add(Dropout(p=0.1)) classifier.add(Dense(output_dim=6, activation="softmax", init="uniform")) classifier.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]) classifier.fit(X_train_pca, y_train, batch_size=25, epochs=100) y_pred = classifier.predict(X_test_pca) y_prediction = np.argmax(y_pred, axis=1) y_test = np.argmax(y_test, axis=1) #y_prediction = np.array([1,2,3,4,5,6]) from sklearn.metrics import confusion_matrix cm_nn = confusion_matrix(y_test, y_prediction)