'''model=LogisticRegression() model.fit(x_train,y_train) prediction=model.predict(x_test) print(accuracy_score(prediction,y_test)) print(confusion_matrix(y_test,prediction)) print(classification_report(y_test,prediction))''' #Random-forest from sklearn.ensemble import RandomForestClassifier model = RandomForestClassifier() model.fit(x_train, y_train) prediction = model.predict(x_test) print(accuracy_score(prediction, y_test)) print(confusion_matrix(y_test, prediction)) print(classification_report(y_test, prediction)) #DL '''import keras from keras.models import Sequential from keras.layers import Dense model=Sequential() model.add(Dense(9,activation='relu',input_dim=18)) model.add(Dense(1,activation='sigmoid')) model.compile(optimizer='adam',loss='binary_crossentropy',metrics=["accuracy"]) model.fit(x_train,y_train,batch_size=10,epochs=16,validation_data=(x_test,y_test)) score=model.evaluate(x_test,y_test,verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1])''' # Random forest gave the benchmark for the prediction accuracy #Checking the features which high imporatnce in predicting outcome feature_imp = pd.DataFrame(model.feature_importances_, index=pd.DataFrame(x_train).columns, columns=['importance']).sort_values('importance',
# Try FC-NN model from keras.models import Sequential from keras.layers import Dense # xx is the train data as DataFrame with (390144, 10), row is number of samples and column is number of features # In here number of features also correspond to input_dim for first NN layer xx = train_x # Output is muti-class thus train_y have to convert to one-hot encoding yy = pd.get_dummies(train_y).values model = Sequential() # Add an input layer model.add(Dense(16, activation='relu', input_dim=10)) # Add another input layer model.add(Dense(12, activation='relu')) # Add another input layer model.add(Dense(12, activation='relu')) # Add another input layer model.add(Dense(8, activation='relu')) # Add an output layer model.add(Dense( 9, activation='softmax')) # output 9 correspond to number of predicted class
# Commented out IPython magic to ensure Python compatibility. # Neural nets example # %tensorflow_version 2.x # If you wish to use Tensorflow 1.X run the following line and then restart runtime # %tensorflow_version 1.x # You'll need to change your import statements from tensorflow.keras to keras import tensorflow.keras from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense model = Sequential() model.add(Dense(18, kernel_initializer = "uniform", activation = "relu", input_dim=16)) model.add(Dense(1, kernel_initializer = "uniform", activation = "sigmoid")) model.compile(optimizer= "adam",loss = "binary_crossentropy",metrics = ["accuracy"]) # Display Model Summary and Show Parameters model.summary() # Start Training Our Classifier batch_size = 10 epochs = 50 history = model.fit(X_train, y_train, batch_size = batch_size, epochs = epochs,
print(padded_test) #%% #RandomForest model fitting model = RandomForestClassifier(n_estimators=100) model.fit(padded_train, y_train) #%% y_pred = model.predict(padded_test) acc = accuracy_score(y_pred, y_test) print(acc * 100, "%") # %% #%% vocab_size = 50_000 one_hots = [one_hot(word, vocab_size) for word in X_train] print(one_hots) # %% padded = pad_sequences(one_hots, padding='post', maxlen=5) print(padded) # %% model = Sequential() model.add(Embedding(vocab_size, 50)) model.compile("adam", "mse") # %% predict = model.predict(padded) # %% predict.shape # %% # %%
def algorithm(method_A, OneVsRest, OneVsOne, randomized): print("Selecting algorithm...") print(" ") if method_A == "svm": print("Starting with " + method_A) print(" ") parameters_svm = { 'kernel': ('linear', 'rbf'), 'C': [1, 3, 10, 100], 'gamma': [0.01, 0.001] } model = svm.SVC() model = search_par(randomized, model, parameters_svm) if method_A == "random_forest": print("Starting with " + method_A) print(" ") parameters_random = { "max_depth": [2, 3, None], "max_features": [2, 4, 6], "min_samples_split": [2, 4, 6], "min_samples_leaf": [2, 4, 6], "bootstrap": [True, False], "criterion": ["gini", "entropy"] } model = RandomForestClassifier(n_estimators=100) model = search_par(randomized, model, parameters_random) if method_A == "logistic": print("Starting with " + method_A) print(" ") parameters_logistic = {'C': [100, 1000], 'tol': [0.001, 0.0001]} model = LogisticRegression(solver='lbfgs', multi_class='multinomial') model = search_par(randomized, model, parameters_logistic) if method_A == "neural_networks": print("Starting with " + method_A) print(" ") #model = MLPClassifier() model = Sequential() model.add( Dense(991, input_dim=179, init='normal') ) # number of features of the data +1 node for the bias term. model.add(Activation('relu')) model.add(Dropout(0.2)) model.add( Dense(495, init='normal') ) #In sum, for most problems, one could probably get decent performance (even without a second optimization step) by setting the hidden layer configuration using just two rules: (i) number of hidden layers equals one; and (ii) the number of neurons in that layer is the mean of the neurons in the input and output layers. model.add(Activation('relu')) model.add(Dropout(0.5)) model.add( Dense(99, init='normal') ) # If the NN is a classifier, then it also has a single node unless softmax is used in which case the output layer has one node per class label in your model. model.add(Activation('softmax')) sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) OneVsRest = False OneVsOne = False if OneVsRest: print("Using OneVsRest ") print(" ") return OneVsRestClassifier(model) if OneVsOne: print("Using OneVsOne") print(" ") return OneVsOneClassifier(model) print("Algorithm selected: " + method_A) print(" ") return model
#Applying k-fold cross validation from sklearn.model_selection import cross_val_score accuracies = cross_val_score(estimator = classifier, X = X_train, y= y_train, cv = 10) #cv parameter is the number of folds to split the data m= accuracies.mean() print(m) s= accuracies.std() print(s) #calculating train score and test score train_scores = [classifier.score(X_train, y_train)] test_scores = [classifier.score(X_test, y_test)] #ANN '''#Importing the Keras Libraries and packages import keras from keras.models import Sequential from keras.layers import Dense from keras.layers import Dropout from keras.callbacks import EarlyStopping from keras.callbacks import ModelCheckpoint from keras.optimizers import SGD from keras.regularizers import l2 from keras.constraints import maxnorm classifier = Sequential() #Adding the input layer and the first hidden layer classifier.add(Dense(output_dim = 11, init = 'uniform',activation='relu',bias_regularizer='l2', input_dim = 12))
# Making the Confusion Matrix from sklearn.metrics import confusion_matrix cmSVM = confusion_matrix(y_test, y_pred) #-------------------------------------ANN----------------------------- import keras from keras.models import Sequential from keras.layers import Dense # Initialising the ANN classifier = Sequential() # Adding the input layer and the first hidden layer classifier.add( Dense(output_dim=12, init='uniform', activation='relu', input_dim=6)) # Adding the second hidden layer #classifier.add(Dense(output_dim = 150, init = 'uniform', activation = 'sigmoid')) # Adding the third hidden layer classifier.add(Dense(output_dim=80, init='uniform', activation='relu')) # Adding the fourth hidden layer classifier.add(Dense(output_dim=12, init='uniform', activation='sigmoid')) # Adding the output layer classifier.add(Dense(output_dim=1, init='uniform', activation='sigmoid')) # Compiling the ANN classifier.compile(optimizer='adam',
1)).toarray() y_test = onehot_direct.fit_transform(np.array(y_test).reshape(600, 1)).toarray() ## Deep Learning from keras.models import Sequential, Model from keras.layers import Dense, Input '''deep_inp = Input(shape=x_train.shape,name='input') deep = Dense(100, activation='tanh')(deep_inp) deep = Dense(100, activation='tanh')(deep) deep_out = Dense(4, activation='softmax')(deep) model = Model(inputs=deep_inp, outputs=deep_out)''' model = Sequential() model.add(Dense(units=100, activation='tanh', kernel_initializer='he_uniform')) model.add(Dense(units=100, activation='tanh', kernel_initializer='he_uniform')) model.add(Dense(units=4, activation='sigmoid')) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) model.summary() from sklearn.preprocessing import OneHotEncoder, onehot_direct = OneHotEncoder() '''y_train_oh = np.array(y_train) y_train_oh=y_train_oh.reshape(len(y_train_oh),1) y_train_oh.shape y_train_oh = onehot_direct.fit_transform(y_train_oh).values y_train_oh
model = RandomForestClassifier(n_estimators=100, verbose=1, class_weight={ 0: 1., 1: weight_1 }) # model = GaussianNB(,verbose=1) # model = linear_model.LogisticRegression(verbose=1) # model = svm.SVC(kernel='sigmoid', gamma=5,C=1,verbose=1) DNN = False if DNN: model = Sequential() model.add( Dense(2000, input_dim=tr.shape[1] - 1, kernel_initializer='normal', activation='relu')) # model.add(Dropout(0.5)) # model.add(Dense(1000, input_dim=1000, kernel_initializer='normal', activation='relu')) # model.add(Dropout(0.5)) model.add(Dense(1, kernel_initializer='normal', activation='sigmoid')) # Compile model model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) X = tr.ix[:, tr.columns != 'order'] X = lsa.fit_transform(X) y = tr['order'] t1 = time.time()
step 3) Baseline Model 2: Logistic Regression """ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) import keras.backend as K from keras.models import Sequential from keras.layers import Dense, Activation from keras.optimizers import SGD K.clear_session() # clear model from memory model = Sequential() model.add(Dense(1, input_shape=(4, ), activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy']) # train model history = model.fit(X_train, y_train, epochs=10) # record history of training progress result = model.evaluate(X_test, y_test) # visualize the training process historydf = pd.DataFrame(history.history, index=history.epoch) historydf.plot(ylim=(0, 1)) plt.title("Test accuracy: {:3.1f} %".format(result[1] * 100), fontsize=15) # ===================================
_val_f1 = f1_score(val_targ, val_predict) _val_recall = recall_score(val_targ, val_predict) _val_precision = precision_score(val_targ, val_predict) self.val_f1s.append(_val_f1) self.val_recalls.append(_val_recall) self.val_precisions.append(_val_precision) print (" — val_f1: % f — val_precision: % f — val_recall % f " ,_val_f1, _val_precision, _val_recall) return metrics = Metrics() from sklearn.neural_network import MLPClassifier model = MLPClassifier(hidden_layer_sizes=(200,), max_iter=500, alpha=0.0001, solver='', verbose=10, random_state=0,tol=0.00000001,batch_size=100) """ model.add(Dense(8, input_dim=8, activation='relu')) model.add(Dense(12, activation='relu')) model.add(Dense(6, activation='relu')) model.add(Dense(1, activation='sigmoid')) """ #compile the model """ model.compile(loss='binary_crossentropy', optimizer='adagrad',metrics=['accuracy']) model.summary()
# In[17]: # Download the dataset from keras.datasets import mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() # In[18]: from keras.models import Sequential from keras.layers import Dense, Activation # In[19]: # Build the architecture model = Sequential() model.add(Dense(20, input_dim=784)) model.add(Activation('relu')) model.add(Dense(20)) model.add(Activation('relu')) model.add(Dense(10)) model.add(Activation('softmax')) model.summary() # In[20]: # Set the optimizer and the loss from keras.optimizers import SGD opt = SGD(lr=0.001) model.compile(optimizer=opt, loss='categorical_crossentropy',
model = model.fit(x_train, y_train) y_predict = model.predict(x_test) print("\naccuracy", np.sum(y_prediction == y_test) / float(len(y_test))) # 1-layer Neural Network ############################### from keras.models import Sequential from keras.layers import Dense, Activation start = time() model = Sequential() model.add(Dense(input_dim=4, output_dim=2)) model.add(Activation("softmax")) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) model.fit(x_train, y_train_onehot) print('\ntime taken %s seconds' % str(time() - start)) y_prediction = model.predict_class(x_test) print('\n\naccuracy', np.sum(y_prediction == y_test) / float(len(y_test))) ##########################################
from keras.models import Sequential from keras.optimizers import Adam, SGD from keras.layers import Dense model = RandomForestClassifier() cross_val_score(model, X, y_true) X_train, X_test, y_train, y_test = train_test_split(X, y_true, test_size=0.3, random_state=42) import keras.backend as K model = Sequential() model.add(Dense(1, input_shape=(4, ), activation='sigmoid')) model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy']) history = model.fit(X_train, y_train, epochs=30, verbose=2, validation_split=0.1) #Evaluate gives the accuracy and loss, while the model.predict gives the prediction that is basically the output for the given input #model.evaluate gives the loss and accuracy for 0 and 1 index respectively result = model.evaluate(X_test, y_test) history = pd.DataFrame(history.history, index=history.epoch)
final_X = final_data[:, 0:-1] final_Y = final_data[:, -1] # Based on user choise, choose the classifier to be trained if args.classifier == "random_forest": final_model = RandomForestClassifier(n_estimators=100, max_depth=32, random_state=0, n_jobs=-1, verbose=True) elif args.classifier == "logistic_regression_keras": classes = 26 final_model = Sequential() final_model.add( Dense(classes, activation='softmax', kernel_regularizer=regularizers.l1(0.0000001), input_shape=(293, ))) final_model.compile(optimizer=optimizers.adam(lr=0.01), loss='categorical_crossentropy', metrics=['accuracy']) final_model.fit(final_X, to_categorical(final_Y), epochs=100, batch_size=32) elif args.classifier == "logistic_regression_scikit": final_model = LogisticRegression(penalty='l1', C=1000, multi_class="multinomial", solver="saga", max_iter=100,
from keras.layers import Dense, Activation, Dropout, Convolution2D, MaxPooling2D, Flatten start = time() img_rows, img_cols = 28, 28 nb_filters = 32 pool_size = (2,2) kernel_size = (3,3) X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1) X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1) input_shape = (img_rows, img_cols, 1) model = Sequential() model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1], border_mode='valid', input_shape=input_shape)) model.add(Activation('relu')) model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1])) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=pool_size)) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(10)) model.add(Activation('softmax'))
print('Accuracy Score',acc) accuracy.append(acc) y_proba=model.predict_proba(x_test) f1_scor=f1_score_(y_proba,y_test) # LSTM model embed_dim = 128 lstm_out = 196 model = Sequential() model.add(Embedding(max_fatures, embed_dim,input_length = X_train.shape[1])) model.add(SpatialDropout1D(0.4)) model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2)) model.add(Dense(2,activation='softmax')) model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy']) print(model.summary()) batch_size = 32 model.fit(X_train, Y_train, epochs = 7, batch_size=batch_size, verbose = 2) score,acc = model.evaluate(X_test, Y_test, verbose = 2, batch_size = batch_size) print("score: %.2f" % (score)) print("acc: %.2f" % (acc))
cm_values = list() for i in range(10): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) from sklearn.preprocessing import StandardScaler sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) from keras.models import Sequential from keras.layers import Dense # Initialising the ANN classifier = Sequential() # Adding the input layer and the first hidden layer classifier.add( Dense(output_dim=20, init='uniform', activation='relu', input_dim=72)) # Adding the second hidden layer classifier.add(Dense(output_dim=5, init='uniform', activation='relu')) # Adding the output layer classifier.add( Dense(output_dim=1, init='uniform', activation='hard_sigmoid')) # Compiling the ANN classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # Fitting the ANN to the Training set classifier.fit(X_train, y_train, batch_size=10, nb_epoch=10)
print('Test AUC %.2f' % roc_auc_score(y_test, model.predict(x_test))) print('Test accuracy %.2f' % model.score(x_test, y_test)) model = RandomForestClassifier(max_depth=6, class_weight='balanced', n_estimators=50) model.fit(x_train, y_train) # add predictions to dataset df['PREDICTIONS'] = model.predict(df['FEATURES'].values.tolist()) # train LSTM model max_features = len(word_to_index) maxlen = len(features[0]) batch_size = 32 model = Sequential() model.add(Embedding(max_features, 128)) model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2)) model.add(Dense(1, activation='sigmoid')) # try using different optimizers and different optimizer configs x_train, x_test, y_train, y_test = np.array(x_train), np.array( x_test), np.array(y_train), np.array(y_test) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, epochs=1, validation_data=(x_test, y_test)) score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
acc = model.score(x_test, y_test) y_pred = model.predict(x_test) acc2 = accuracy_score(y_test, y_pred) from keras.models import Sequential, Model from keras.layers import Dense, LSTM, Input from keras.utils import np_utils y_train = np_utils.to_categorical(y_train, 11) y_test = np_utils.to_categorical(y_test, 11) model = Sequential() model.add(Dense(100, input_dim=11, activation='relu')) model.add(Dense(64, activation='relu')) model.add(Dense(32, activation='relu')) model.add(Dense(11, activation='softmax')) model.summary() model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) from keras.callbacks import EarlyStopping callback1 = EarlyStopping(monitor='loss', patience=20, mode='auto') model.fit(x_train, y_train, epochs=1000, batch_size=10) print(acc)
def main (): print("......... Welcome to SBA Loan Data Analysis ....... ") print() ip1 = int(input("What you want to do : \n 1) Prediction \n or \n 2) Analyze the data..?? \n\n ")) if ip1 == 1: print("Menu :\n \ 1)Random forest \n \ 2)Decision Tree \n \ 3)Naives Bayes \n \ 4)SVM \n \ 5)XG Boost \n \ 6)KNN \n \ 7)Keras Neural Network ") ip2 = int(input("Enter a value from Above Menu : ")) # Importing the libraries import numpy as np import pandas as pd # Importing the dataset D7aFY1991_FY1999 = pd.read_csv('D:/CDAC_DATA\CDAC_PROJECT/15-1-MachineL/7aFY1991_FY1999_1.csv') D7aFY2000_FY2009 = pd.read_csv('D:/CDAC_DATA\CDAC_PROJECT/15-1-MachineL/7aFY2000_FY2009_1.csv') D7aFY2010_Present = pd.read_csv('D:/CDAC_DATA\CDAC_PROJECT/15-1-MachineL/7aFY2010_Present_1.csv') #merge same dataframes Data_7a = D7aFY1991_FY1999.append(D7aFY2000_FY2009) Data_7a = Data_7a.append(D7aFY2010_Present) #create sample data #Data_sample_7a = Data_7a.sample(frac = 0.1,random_state = 0) Data_sample_7a = Data_7a Data_sample_7a = Data_sample_7a.iloc[:,[4,6,9,11,12,14,16,17,18,19,20,24,25,26,28,29]].values #convert into pandas dataframe Data_sample_7a = pd.DataFrame(data=Data_sample_7a) # Taking care of missing data from sklearn.preprocessing import Imputer imputer = Imputer(missing_values = 'NaN', strategy = 'most_frequent', axis = 0) imputer = imputer.fit(Data_sample_7a.iloc[:,10:11]) Data_sample_7a.iloc[:,10:11] = imputer.transform(Data_sample_7a.iloc[:,10:11]) from sklearn.preprocessing import Imputer imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0) imputer = imputer.fit(Data_sample_7a.iloc[:,8:9]) Data_sample_7a.iloc[:,8:9] = imputer.transform(Data_sample_7a.iloc[:,8:9]) Data_sample_7a = Data_sample_7a.dropna() #slpit data columns into dependent and independent variables X7a = Data_sample_7a.iloc[:,0:14].values #independent y7a = Data_sample_7a.iloc[:,[15]].values #dependent # ============================================================================= #convert numpy objects to pandas dataframes pd_X7a = pd.DataFrame(data=X7a[0:,0:]) pd_y7a = pd.DataFrame(data=y7a[0:,0:]) # ============================================================================= #encoding categorical data in independent variable pd_X7a=np.asarray(pd_X7a)#convert pandas dataframe into numpy array pd_X7a = encoder(pd_X7a) #getuser data and encode Userdata = getdata(pd_X7a) Userdata=np.asarray(Userdata)#convert pandas dataframe into numpy array Userdata = encoder(Userdata) #encoding dependent variable pd_y7a[0] = pd_y7a[0].replace(['PIF'],'0') pd_y7a[0] = pd_y7a[0].replace(['CANCLD','EXEMPT','CHGOFF','COMMIT'],'1') pd_y7a=np.asarray(pd_y7a)#convert pandas dataframe into numpy array # Splitting the dataset into the Training set and Test set from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(pd_X7a, pd_y7a, test_size = 0.25, random_state = 0) # Feature Scaling of accuracy data (X_train1, X_test) = scalingFunction(X_train,X_test) # Feature Scaling of user data from sklearn.preprocessing import StandardScaler sc = StandardScaler() Userdata = sc.fit_transform(Userdata) if ip2 == 1: # Fitting Random Forest Classification to the Training set from sklearn.ensemble import RandomForestClassifier classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0) classifier.fit(X_train1, y_train) # Predicting the Test set results y_pred = classifier.predict(X_test) y_pred = pd.DataFrame(data=y_pred[0:]) #converting to data frame # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) print("Confusion Matrix : \n") print(cm) print("Accuracy rate is : \n ") print((cm[0,0]+cm[1,1])/(len(y_pred))) # Predicting the user set results user_pred = classifier.predict(Userdata) converter(user_pred) print("The Loan will be : ") print(user_pred) elif ip2 == 2: # Fitting Decision Tree Classification to the Training set from sklearn.tree import DecisionTreeClassifier classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0) classifier.fit(X_train, y_train) # Predicting the Test set results y_pred = classifier.predict(X_test) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) print("Confusion Matrix : \n") print(cm) print("Accuracy rate is : \n ") print((cm[0,0]+cm[1,1])/(len(y_pred))) # Predicting the user set results user_pred = classifier.predict(Userdata) converter(user_pred) print("The Loan will be : ") print(user_pred) elif ip2 == 3: # Fitting Naive Bayes to the Training set from sklearn.naive_bayes import GaussianNB classifier = GaussianNB() classifier.fit(X_train, y_train) # Predicting the Test set results y_pred = classifier.predict(X_test) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) print("Confusion Matrix : \n") print(cm) print("Accuracy rate is : \n ") print((cm[0,0]+cm[1,1])/(len(y_pred))) # Predicting the user set results user_pred = classifier.predict(Userdata) converter(user_pred) print("The Loan will be : ") print(user_pred) elif ip2 == 4: # Fitting SVM to the Training set from sklearn.svm import SVC classifier = SVC(kernel = 'linear', random_state = 0) classifier.fit(X_train, y_train) # Predicting the Test set results y_pred = classifier.predict(X_test) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) print("Confusion Matrix : \n") print(cm) print("Accuracy rate is : \n ") print((cm[0,0]+cm[1,1])/(len(y_pred))) # Predicting the user set results user_pred = classifier.predict(Userdata) converter(user_pred) print("The Loan will be : ") print(user_pred) elif ip2 == 5: # Fitting XGBoost to the Training set from xgboost import XGBClassifier classifier = XGBClassifier() classifier.fit(X_train, y_train) # Predicting the Test set results y_pred = classifier.predict(X_test) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) print("Confusion Matrix : \n") print(cm) print("Accuracy rate is : \n ") print((cm[0,0]+cm[1,1])/(len(y_pred))) # Predicting the user set results user_pred = classifier.predict(Userdata) converter(user_pred) print("The Loan will be : ") print(user_pred) elif ip2 == 6: # Fitting K-NN to the Training set from sklearn.neighbors import KNeighborsClassifier classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2) classifier.fit(X_train, y_train) # Predicting the Test set results y_pred = classifier.predict(X_test) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) print("Confusion Matrix : \n") print(cm) print("Accuracy rate is : \n ") print((cm[0,0]+cm[1,1])/(len(y_pred))) # Predicting the user set results user_pred = classifier.predict(Userdata) converter(user_pred) print("The Loan will be : ") print(user_pred) elif ip2 == 7: # Importing the Keras libraries and packages import keras from keras.models import Sequential from keras.layers import Dense # Initialising the ANN classifier = Sequential() # Adding the input layer and the first hidden layer1 classifier.add(Dense(output_dim = 7, init = 'uniform', activation = 'relu', input_dim = 14)) # Adding the second hidden layer classifier.add(Dense(output_dim = 7, init = 'uniform', activation = 'relu')) # Adding the output layer classifier.add(Dense(output_dim = 1, init = 'uniform', activation = 'sigmoid')) # Compiling the ANN classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy']) # Fitting the ANN to the Training set classifier.fit(X_train, y_train, batch_size = 10, nb_epoch = 100) # Part 3 - Making the predictions and evaluating the model # Predicting the Test set results y_pred = classifier.predict(X_test) y_pred = (y_pred > 0.5) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) print("Confusion Matrix : \n") print(cm) print("Accuracy rate is : \n ") print((cm[0,0]+cm[1,1])/(len(y_pred))) # Predicting the Test set results user_pred = classifier.predict(Userdata) user_pred = (user_pred > 0.5) converter(user_pred) print("The Loan will be : ") print(user_pred) elif ip1 == 2: print("Menu : \n \ 1)Business wise JobsSupported \n \ 2)Compare:Gross Aproval Vs SBA Aproval \n \ 3)DistOffice wise SBAapproval \n \ 4)GrossApproval Per LoanStatus \n \ 5)GrossApproval Per DeliveryMethod \n \ 6)JobsSupported per LoanStatus \n \ 7)SBAapproval Loan Status \n \ ") ip2 = int(input("Enter a value from Above Menu : ")) from PIL import Image if ip2 == 1: img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\BusinnJobsSupp.png') img.format = "PNG" img.show() elif ip2 == 2: img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\Comp_GrossSBA.png') img.format = "PNG" img.show() elif ip2 == 3: img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\DistOff_wise_SBAappr.png') img.format = "PNG" img.show() elif ip2 == 4: img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\GrossAppLoanSt.png') img.format = "PNG" img.show() elif ip2 == 5: img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\GrossAppr_DeliveryMethod.png') img.format = "PNG" img.show() elif ip2 == 6: img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\JobsSuppLoanSt.png') img.format = "PNG" img.show() elif ip2 == 7: img = Image.open('D:\CDAC_DATA\CDAC_PROJECT\ggwp\SBAapprLoanSt.png') img.format = "PNG" img.show()
def tenfoldcrossvalidation(feature_map, id_truth_map, index, id_tweet_map): feature_map = dict(sorted(feature_map.items(), key=operator.itemgetter(1))) tweets = [] truth = [] keys = [] for key, feature in feature_map.iteritems(): tweets.append(feature) truth.append(index[id_truth_map[key]]) keys.append(key) accuracy = 0.0 tp = 0.0 tn = 0.0 fp = 0.0 fn = 0.0 for i in xrange(10): tenth = len(tweets)/10 start = i*tenth end = (i+1)*tenth test_index = xrange(start,end) train_index = [i for i in range(len(tweets)) if i not in test_index] train_tweets = [] train_keys = [] test_tweets = [] test_keys = [] train_truth = [] test_truth = [] for i in xrange(len(tweets)): if i in train_index: train_tweets.append(tweets[i]) train_truth.append(truth[i]) train_keys.append(keys[i]) else: test_tweets.append(tweets[i]) test_truth.append(truth[i]) test_keys.append(keys[i]) new_train_tweets = featureselection(train_tweets, train_tweets, train_truth) new_test_tweets = featureselection(test_tweets, train_tweets, train_truth) if sys.argv[1] == "rbfsvm": print "RBF kernel SVM" clf = svm.SVC(kernel='rbf', C=1000, gamma=0.0001) clf.fit(np.array(new_train_tweets), np.array(train_truth)) test_predicted = clf.predict(np.array(new_test_tweets)) elif sys.argv[1] == "randomforest": # # Using Random forest for classification. print 'Random forest' clf = RandomForestClassifier(n_estimators=10, max_depth=None) clf.fit(np.array(new_train_tweets), np.array(train_truth)) test_predicted = clf.predict(np.array(new_test_tweets)) # getaccuracy(test_predicted, test_truth) elif sys.argv[1] == "linearsvm": # # Using Linear svm for classification. print 'Linear SVM' clf = svm.LinearSVC(random_state=20) clf.fit(np.array(new_train_tweets), np.array(train_truth)) test_predicted = clf.predict(np.array(new_test_tweets)) # print "F.score:" # print(f1_score(test_predicted, test_truth, average="micro")) # print "Accuracy:" # print(accuracy_score(test_predicted, test_truth, normalize="False")) # getaccuracy(test_predicted, test_truth) # elif sys.argv[1] == "polysvm": # print 'Poly SVM' # clf = svm.SVC(kernel='poly') # clf.fit(np.array(new_train_tweets), np.array(train_truth)) # test_predicted = clf.predict(np.array(new_test_tweets)) elif sys.argv[1] == "nn": print 'Neural Network' clf = Sequential() clf.add(Dense(7460, activation='relu')) clf.add(Dense(5000, activation='relu')) clf.add(Dense(2000, activation='relu')) clf.add(Dense(500, activation='relu')) clf.add(Dense(1, activation='softmax')) clf.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) clf.fit(np.array(new_train_tweets), np.array(train_truth), batch_size=64, epochs=10, validation_split=0.1) test_predicted = clf.predict(np.array(new_test_tweets)) print(f1_score(test_predicted, test_truth, average="micro")) elif sys.argv[1]=="xgb": xgb_model = xgb.XGBClassifier(objective="binary:logistic") xgb_model.fit(np.array(new_train_tweets), np.array(train_truth)) test_predicted = xgb_model.predict(np.array(new_test_tweets)) accuracy += getaccuracy(test_predicted, test_truth) tp += gettp(test_predicted, test_truth) tn += gettn(test_predicted, test_truth) fp += getfp(test_predicted, test_truth) fn += getfn(test_predicted, test_truth) if(sys.argv[1]=="nn"): print accuracy # print tp, tn, fp, fn precision = tp/(tp+fp) recall = tp/(tp+fn) print "F-score:" print (2*precision*recall)/(precision + recall) break print accuracy/10.0 # print tp, tn, fp, fn precision = tp/(tp+fp) recall = tp/(tp+fn) print "F-score:" print (2*precision*recall)/(precision + recall)
def main(): width = 128 height = 128 depth = 3 classes = 2 NUM_EPOCHS = 50 #initialize the optimizer and model opt = tf.keras.optimizers.SGD(lr=0.01) project_dir = "deepfake-detection-challenge" #train_metadata, train_videos, labels, originals = load_json(project_dir) train_sub_dir = "/train_sample_videos/" dest_train_1 = '/train_1/' #break_to_frames_train(project_dir, train_videos, labels, width, height) #test_video_names, test_videos = load_test_videos(project_dir) test_sub_dir = "/test_videos/" dest_test_1 = '/test_1/' #break_to_frames_test(project_dir, test_videos, width, height) train_new_csv = make_dataframe_train(project_dir) test_new_csv = make_dataframe_test(project_dir) train_new_csv = '/train_new.csv' X_train, y_train, X_test, y_test, train, y_train_original, y_test_original = get_Xy(project_dir,train_new_csv, width, height, depth) #Normalization X_train = X_train.astype("float")/ 255.0 X_test = X_test.astype ("float")/ 255.0 #One hot encode y choice = 4 if choice == 1: #not working base_model = vgg16Model(X_train, X_test, width, height, depth, classes) # checkpointing to save the weights of best model mcp_save = tf.keras.callbacks.ModelCheckpoint('weight.hdf5', save_best_only=True, monitor='val_loss', mode='min') # compiling the model base_model.compile(loss='categorical_crossentropy',optimizer='Adam',metrics=['accuracy']) # training the model H = base_model.fit(X_train, y_train, epochs=50, validation_data=(X_test, y_test), callbacks=[mcp_save], batch_size=128) print ("Base Model - Test Data Loss and Accuracy: ", model.evaluate(X_test, y_test)) print("Final Plot ") plotAccLoss(H, NUM_EPOCHS) if choice == 2: # Feature Extraction and Usage of Secondary Model vggModel = tf.keras.applications.VGG16(weights='imagenet', include_top=False, input_shape=(width, height, depth)) print(vggModel.summary()) X_train_new = vggModel.predict(X_train) X_train_new = X_train_new.reshape(X_train_new.shape[0], -1) X_val_new = vggModel.predict(X_test) X_val_new = X_val_new.reshape(X_val_new.shape[0], -1) secondary_model = 'random_forest' if (secondary_model == 'random_forest'): print("Secondary Model - Random Forest ") model = RandomForestClassifier(200) model.fit(X_train_new, y_train) # evaluate the model results = model.predict(X_val_new) print ("Random Forest Accuracy ", metrics.accuracy_score(results, y_test)) if(secondary_model == 'naive_bayes'): print("Secondary Model - Using Naive Bayes") nBayes = GaussianNB() nBayes = nBayes.fit( X_train_new , y_train) accuracy = nBayes.score(X_val_new, y_test) print ("Naive Bayes Accuracy ", accuracy) if choice == 3: # not working # FineTuning inceptionV3Model= tf.keras.applications.InceptionV3(weights = 'imagenet',include_top =False, input_shape =(width, height,depth)) inceptionV3Model.trainable = False model =tf.keras.models.Sequential() model.add (inceptionV3Model) model.add(tf.keras.layers.Flatten()) model.add(tf.keras.layers.Dropout (0.5)) model.add(tf.keras.layers.Dense (256, 'relu')) model.add(tf.keras.layers.Dense (classes, activation='sigmoid')) print (model.summary) NUM_EPOCHS =50 opt = tf.keras.optimizers.SGD(lr=0.001) model.compile(loss="sparse_categorical_crossentropy", optimizer=opt,metrics=["accuracy"]) H = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test)) plotAccLoss(H, NUM_EPOCHS) print ("\n Phase B - Fine Tune Fully Connected Layer and Selected Convolutional Layers \n") inceptionV3Model.trainable = True trainableFlag = False for layer in inceptionV3Model.layers: if layer.name == 'block4_conv1': trainableFlag = True layer.trainable = trainableFlag opt = tf.keras.optimizers.SGD(lr=0.00001) model.compile(loss="sparse_categorical_crossentropy", optimizer=opt,metrics=["accuracy"]) print (model.summary) H = model.fit(trainX, trainY, epochs=NUM_EPOCHS, batch_size=32, validation_data=(testX, testY)) print("Final Plot ") plotAccLoss(H, NUM_EPOCHS) if choice == 4: # works # Feature Extraction and Usage of Secondary Model inceptionV3Model= tf.keras.applications.InceptionV3(weights = 'imagenet',include_top =False, input_shape =(width, height,depth)) inceptionV3Model.trainable = False print(inceptionV3Model.summary()) X_train_new = inceptionV3Model.predict(X_train) X_train_new = X_train_new.reshape(X_train_new.shape[0], -1) X_val_new = inceptionV3Model.predict(X_test) X_val_new = X_val_new.reshape(X_val_new.shape[0], -1) secondary_model = 'random_forest' if(secondary_model == 'random_forest'): print("Secondary Model - Random Forest ") model = RandomForestClassifier(200) model.fit(X_train_new, y_train) # evaluate the model accuracy = evaluate(model, X_val_new, y_test) # results = model.predict(X_val_new) # print ("Random Forest Accuracy ", metrics.accuracy_score(results, y_test)) print("Random Forest Accuracy ", accuracy) if(secondary_model == 'naive_bayes'): print("Secondary Model - Using Naive Bayes") nBayes = GaussianNB() nBayes = nBayes.fit( X_train_new , y_train) accuracy = nBayes.score(X_val_new, y_test) print ("Naive Bayes Accuracy ", accuracy) if choice == 41: # works # Feature Extraction and Usage of Secondary Model inceptionV3Model= tf.keras.applications.InceptionV3(weights = 'imagenet',include_top =False, input_shape =(width, height,depth)) inceptionV3Model.trainable = False print(inceptionV3Model.summary()) X_train_new = inceptionV3Model.predict(X_train) X_train_new = X_train_new.reshape(X_train_new.shape[0], -1) X_val_new = inceptionV3Model.predict(X_test) print("X_val_new b4 reshaping ", X_val_new) X_val_new = X_val_new.reshape(X_val_new.shape[0], -1) secondary_model = 'random_forest' if(secondary_model == 'random_forest'): print("Secondary Model - Random Forest ") model = RandomForestClassifier(200) model.fit(X_train_new, y_train) # evaluate the model predY = model.predict(X_val_new) #accuracy on the images print ("Images - Random Forest Accuracy ", metrics.accuracy_score(predY, y_test)) #name of the video , label in X_val_new #collect all the images/group by all iamges with same irst name and count probability , if out of 11 frames atleast 3 are fake, then video is fake` # storing the images and their class in a dataframe # print("train.head() ", train.head(), train.shape ) # print("y_test ", y_test, y_test.shape ) # print("predY ", predY, predY.shape ) # print("predY[:,0] ", predY[:,0]) #this a series # print("X_val_new ", X_val_new,X_val_new.shape ) # pred_data_frame = train.copy(deep=True) # video_names = [] # image_names = train['image'] # for i in range(len(image_names)): # #get the video name from the frame e.g. aagfhgtpmv.mp4_frame0.jpg # video_names.append(image_names[i].split("_")[0]) # pred_data_frame['video'] = video_names # print("pred_data_frame.head() ", pred_data_frame.head()) # pred_data_frame['pred_image_fake'] = predY[:,0] # pred_data_frame['pred_image_real'] = predY[:,1] # pred_video_label1 = [] # # #sort the df based on video names # # pred_data_frame = pred_data_frame.sort_values(by=['video']) # pred_video_label = pred_data_frame.groupby(['video'])['pred_image_label'].count() # print(pred_video_label.head()) # print ("Video Classification Accuracy ", metrics.accuracy_score(predY, y_test)) # if(secondary_model == 'naive_bayes'): # print("Secondary Model - Using Naive Bayes") # nBayes = GaussianNB() # nBayes = nBayes.fit( X_train_new , y_train) # accuracy = nBayes.score(X_val_new, y_test) # print ("Naive Bayes Accuracy ", accuracy) if choice == 5: #lstm model = Sequential() model.add(LSTM(256,dropout=0.2,input_shape=(train_data.shape[1],train_data.shape[2]))) model.add(Dense(1024, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(5, activation='softmax')) sgd = SGD(lr=0.00005, decay = 1e-6, momentum=0.9, nesterov=True) model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) #model.load_weights('video_1_LSTM_1_512.h5') callbacks = [ EarlyStopping(monitor='val_loss', patience=10, verbose=0), ModelCheckpoint('video_1_LSTM_1_1024.h5', monitor='val_loss', save_best_only=True, verbose=0) ] nb_epoch = 500 model.fit(train_data,train_labels,validation_data=(validation_data,validation_labels),batch_size=batch_size,nb_epoch=nb_epoch,callbacks=callbacks,shuffle=True,verbose=1) return model if choice ==6: #ensemble vggModel= tf.keras.applications.VGG16 (weights = 'imagenet',include_top =False, input_shape =(128, 128,3)) model1 = tf.keras.models.Sequential() model1.add (vggModel) model1.add(tf.keras.layers.Flatten()) model1.add(tf.keras.layers.Dropout (0.5)) model1.add(tf.keras.layers.Dense (256, 'relu')) model1.add(tf.keras.layers.Dense (17, activation='softmax')) inceptionv3model= tf.keras.applications.InceptionV3(weights = 'imagenet',include_top =False, input_shape =(128, 128,3)) model2 = tf.keras.models.Sequential() model2.add(inceptionv3model) model2.add(tf.keras.layers.Flatten()) model2.add(tf.keras.layers.Dropout (0.5)) model2.add(tf.keras.layers.Dense (256, 'relu')) model2.add(tf.keras.layers.Dense (17, activation='softmax')) model_name = 'knn' if(model_name == 'randomforest'): model = RandomForestClassifier(200) model.fit(featuresTrain, trainY) # evaluate the model results = model.predict(featuresVal) print (metrics.accuracy_score(results, testY)) if(model_name == 'knn'): print("using knn") knn = KNeighborsClassifier(n_neighbors=3) knn.fit(featuresTrain, trainY) knn.predict(featuresVal) results = knn.predict(featuresVal) print (metrics.accuracy_score(results, testY)) if(model_name == 'naive_bayes'): print("Using Naive Bayes") nBayes = GaussianNB() nBayes = nBayes.fit( featuresTrain , trainY) accuracy = nBayes.score(featuresVal, testY) print ("Naive Bayes Accuracy ", accuracy) if(model_name == 'svm'): print("Using SVM") svc = SVC(gamma='auto') svc = svc.fit(featuresTrain, trainY) # accuracy = svc.score(test_features, test_labels) accuracy = evaluate(svc, featuresVal, testY) print ("SVM Accuracy ", accuracy) # resnet50model = tf.keras.applications.resnet50(weights = 'imagenet',include_top =False, input_shape =(128, 128,3)) # model3 = tf.keras.models.Sequential() # model3.add(resnet50model) # model3.add(tf.keras.layers.Flatten()) # model3.add(tf.keras.layers.Dropout (0.5)) # model3.add(tf.keras.layers.Dense (256, 'relu')) # model3.add(tf.keras.layers.Dense (17, activation='softmax')) # Find the probabilities of all 17 classes in each instance of test data - should be 340 *17 predicted_vals1 = model1.predict(testX) print("predicted_vals1 shape ", predicted_vals1.shape ) print("predicted_vals1 ", predicted_vals1 ) predicted_vals2 = model2.predict(testX) print("predicted_vals2 shape ", predicted_vals2.shape ) print("predicted_vals2 ", predicted_vals2 ) # predicted_vals3 = model3.predict(testX) # print("predicted_vals3 shape ", predicted_vals3.shape ) # print("predicted_vals3 ", predicted_vals3 ) # element wise addition will help, as we want to add probabilities of each class for each image. Then takke average, # as I am using 3 models so 1/3 is multipled to the sum predY_sum = predicted_vals1+ predicted_vals2 element_wise_sum_avg = predY_sum * (1/2) # Now doing np.argmax predY = np.argmax(element_wise_sum_avg, axis =1) print("predY ", predY) print("Checking shapes of testY and predY ", testY.shape, " ", predY.shape) accuracy = accuracy_score(testY, predY) print(accuracy) if choice == 7: resnet101model = tf.keras.applications.ResNet101(weights='imagenet', include_top=False, input_shape=(128, 128, 3)) print(resnet101model.summary()) featuresTrain = resnet101model.predict(trainX) featuresTrain = featuresTrain.reshape(featuresTrain.shape[0], -1) featuresVal = resnet101model.predict(testX) featuresVal = featuresVal.reshape(featuresVal.shape[0], -1)
def main(): # df = combine_datasets() df = pd.read_csv('./data/combined.csv', index_col=0) # df.fillna(-1, inplace=True) # df = df.drop(df[~df['certificate'].isin(['G', 'PG', 'PG-13', 'R', 'Not Rated'])].index) # df = add_award_points(df) # Data preprocessing/encoding df = df.drop(['movie', 'movie_id', 'synopsis', 'genre'], axis=1) df['popularity'] = 1 / np.array(df['popularity']) * 100 df = pd.get_dummies(df, columns=['certificate']) cols = df.columns.tolist() cols = cols[df.columns.get_loc('oscar_animated') + 1:] + cols[:df.columns.get_loc('oscar_animated') + 1] df = df[cols] df = df.reset_index(drop=True) splitIndex = df.index[df['year'] == 2018][0] df = df.drop(['year'], axis=1) # Splits data into training and testing sets oscarStart = df.columns.get_loc('oscar_best_picture') x = df.iloc[:, :oscarStart].values y = df.iloc[:, oscarStart:].values y[(y > 0) & (y < 1)] = 0.5 # winner is 1, nominee is 0.5, nothing is 0 xTrain, xTest = x[:splitIndex], x[splitIndex:] yTrain, yTest = y[:splitIndex], y[splitIndex:] # Checks how imbalanced the data is unique, counts = np.unique(yTrain, return_counts=True) print(dict(zip(unique, counts))) # Scales inputs to avoid one variable having more weight than another sc = StandardScaler() xTrain = sc.fit_transform(xTrain) xTest = sc.transform(xTest) modelType = 'neuralnetwork' predictCategory = True if modelType == 'randomforest': model = RandomForestClassifier(random_state=21) model.fit(xTrain, yTrain) yPred = model.predict(xTest) p = np.where(yPred == 2) v = np.where(yTest == 2) elif modelType == 'neuralnetwork': if not predictCategory: # One hot encoding for softmax activation function trainTargets = [] for i in yTrain: if 1 in i: trainTargets.append([1, 0, 0]) elif 0.5 in i: trainTargets.append([0, 1, 0]) else: trainTargets.append([0, 0, 1]) yTrain = np.array(trainTargets) testTargets = [] for i in yTest: if 1 in i: testTargets.append([1, 0, 0]) elif 0.5 in i: testTargets.append([0, 1, 0]) else: testTargets.append([0, 0, 1]) yTest = np.array(testTargets) model = Sequential() model.add(Dense(256, input_dim=xTrain.shape[1])) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Dense(3)) model.add(Activation('softmax')) model.compile(optimizer=Adam(lr=0.01), loss='categorical_crossentropy', metrics=['mse']) classWeights = { 0: counts.sum() / counts[2], 1: counts.sum() / counts[1], 2: counts.sum() / counts[0] } model.fit(xTrain, yTrain, epochs=512, batch_size=32, class_weight=classWeights) else: # One hot encoding for softmax activation function trainTargets = [[] for i in range(0, 6)] for i in yTrain: for idx, j in enumerate(i): if j == 1: # winner trainTargets[idx].append([1, 0, 0]) elif j == 0.5: # nominee trainTargets[idx].append([0, 1, 0]) else: # loser/nothing trainTargets[idx].append([0, 0, 1]) yTrain = [np.array(i) for i in trainTargets] testTargets = [[] for i in range(0, 6)] for i in yTest: for idx, j in enumerate(i): if j == 1: # winner testTargets[idx].append([1, 0, 0]) elif j == 0.5: # nominee testTargets[idx].append([0, 1, 0]) else: # loser/nothing testTargets[idx].append([0, 0, 1]) yTest = [np.array(i) for i in testTargets] if os.path.exists('best.h5'): model = load_model('best.h5') else: input = Input(shape=(xTrain.shape[1], )) x = Dense(128, activation='relu')(input) x = BatchNormalization()(x) x = Dropout(0.2)(x) output1 = Dense(3, activation='softmax')(x) output2 = Dense(3, activation='softmax')(x) output3 = Dense(3, activation='softmax')(x) output4 = Dense(3, activation='softmax')(x) output5 = Dense(3, activation='softmax')(x) output6 = Dense(3, activation='softmax')(x) model = Model(inputs=input, outputs=[ output1, output2, output3, output4, output5, output6 ]) model.compile(optimizer=Adam(lr=0.01), loss='categorical_crossentropy') classWeights = { 0: counts.sum() / counts[2], 1: counts.sum() / counts[1], 2: counts.sum() / counts[0] } model.fit(xTrain, yTrain, epochs=512, batch_size=32, class_weight=classWeights) # model.save('best.h5') # Training accuracy (put training data back in) and testing accuracy compute_model_accuracies(predictCategory, '(TRAINING)', model, xTrain, yTrain, splitIndex) compute_model_accuracies(predictCategory, '(TESTING)', model, xTest, yTest, splitIndex)
testY = to_categorical(testY, num_classes=len(labels) + 1) model = RandomForestClassifier(criterion='gini', max_depth=138, max_features='auto', n_estimators=1) model.fit(trainX, trainY) y_pred = model.predict(testX) print('accuracy RR %s' % metrics.accuracy_score(y_pred, testY)) # neural network model = Sequential() model.add( Embedding(vocab_size, output_dim=1500, input_length=max_len, trainable=True)) model.add(Bidirectional(CuDNNLSTM(128, return_sequences=False))) model.add(Dropout(0.1)) model.add(Dense(units=1024, activation='relu')) model.add(Dropout(0.3)) model.add(Dense(len(labels) + 1)) model.add(Activation("softmax")) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) epochs = 10 checpoint = ModelCheckpoint('models/WWII_names.h5f',
index1.append(i) import random index = list(range(len(df))) train_index = random.sample(index0, int(0.8 * len(index0))) + random.sample( index1, int(0.8 * len(index1))) ##test_index is the index of test data随机选出2000个样本作为测试样本 test_index = [] ##train_index is the index of train data for i in index: if i not in train_index: test_index.append(i) print(len(train_index)) model = Sequential() model.add(Dense(output_dim=50, input_dim=len(df[0]), activation='relu')) model.add(Dense(output_dim=20, input_dim=50, activation='relu')) model.add(Dense(output_dim=1, input_dim=20, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam') model.fit(df[train_index], label[train_index], nb_epoch=1000, batch_size=20) pred = model.predict_classes(df[test_index]).reshape(len(test_index)) print(pred) k = 0 for i in range(len(pred)): if pred[i] == label[test_index][i]: k = k + 1 print(k / len(test_index)) #model.save_weights('E:\...\my_model_weights.h5')
from sklearn.ensemble import RandomForestClassifier model = RandomForestClassifier(random_state=0, verbose=3) mode = model.fit(X_train, df_train['label'].values) # model y_prediction = model.predict(X_test) print("\naccuracy", np.sum(y_prediction == df_test['label'].values) / float(len(y_test))) from keras.models import Sequential from keras.layers import Dense, Activation, Dropout start = time() model = Sequential() model.add(Dense(512, input_shape=(784, ))) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Dense(512)) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Dense(10)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) mode.fit(X_train, y_train_onehot) print('\ntime taken %s seconds' % str(time() - start))
neural_data = np.loadtxt('stats_noheader.csv', delimiter=',') # split into input (X) and output (Y) variables X_neural = neural_data[:, 0:47] Y_neural = neural_data[:, 47] # split into 75% for train and 25% for test X_train, X_test, y_train, y_test = train_test_split(X_neural, Y_neural, test_size=0.25, random_state=7) # create model model = Sequential() model.add( Dense(12, input_dim=47, kernel_initializer='uniform', activation='relu')) model.add(Dense(8, kernel_initializer='uniform', activation='relu')) model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid')) # compile model model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # fit the model history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=10) # list all data in history
# SVC best estimator svc = grid_svc.best_estimator_ # DecisionTree Classifier tree_params = {"criterion": ["gini", "entropy"], "max_depth": list(range(2,4,1)), "min_samples_leaf": list(range(5,7,1))} grid_tree = GridSearchCV(DecisionTreeClassifier(), tree_params) grid_tree.fit(X_train, Y_train) # tree best estimator tree_clf = grid_tree.best_estimator_ model=Sequential() model.add(Dense(128, init="uniform", input_dim=13, activation='relu')) model.add(Dense(64, init ="uniform", activation="relu")) model.add(Dense(1, init="uniform", activation="sigmoid")) model.compile(loss="binary_crossentropy", metrics=['accuracy'], optimizer='adam') model.summary() history=model.fit(X_train,Y_train, epochs=100, batch_size=100) plt.plot(history.history['loss']) #plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.show()
from sklearn.ensemble import RandomForestClassifier classifier = RandomForestClassifier(criterion="gini", random_state=0) cm_random_forest = evaluate_classifier(classifier, X_train, y_train) #Kernel SVM Classifier - RBF - 94% accuracy on test set. Linear - 96% accuracy on test set from sklearn.svm import SVC classifier = SVC(kernel="linear", random_state=0) cm_svm = evaluate_classifier(classifier, X_train, y_train) #Neural Network from keras.layers import Dense from keras.models import Sequential from keras.layers import Dropout classifier = Sequential() classifier.add( Dense(input_dim=100, output_dim=50, activation="relu", init="uniform")) classifier.add(Dropout(p=0.1)) classifier.add(Dense(output_dim=50, activation="relu", init="uniform")) classifier.add(Dropout(p=0.1)) classifier.add(Dense(output_dim=6, activation="softmax", init="uniform")) classifier.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]) classifier.fit(X_train_pca, y_train, batch_size=25, epochs=100) y_pred = classifier.predict(X_test_pca) y_prediction = np.argmax(y_pred, axis=1) y_test = np.argmax(y_test, axis=1) #y_prediction = np.array([1,2,3,4,5,6]) from sklearn.metrics import confusion_matrix