done = False i = 0 bad_indices = np.arange(len(training_data)) jokes_indices_list = np.array([]) serious_indices_list = np.array([]) while not done: ################ CLASSIFICATION METHOD ########################## model = Sequential() model.add(Dense(128, activation='relu', input_dim=n_dim)) model.add(Dropout(0.2)) model.add(Dense(128, activation='relu')) model.add(Dense(1, activation='sigmoid')) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=[AUC()]) model.fit(train_vecs_w2v, y_train, epochs=10, batch_size=32, verbose=2) score = model.evaluate(test_vecs_w2v, y_test, batch_size=128, verbose=2) print("Evaluation accuracy: ", score[1]) predictions = model.predict(train_vecs) pred = predictions.flatten() # should be a 0d numpy vector of predictions ###################################################################### bad_indices_dict = {} for j in bad_indices: bad_indices_dict[j] = pred[j] thresh = (i + 4)**4
loss.head() model.save('em_best.h5') loss[['loss','val_loss']].plot() loss[['accuracy','val_accuracy']].plot() predict = model.predict_generator(validation_generator) predict.shape from keras.metrics import AUC m = AUC(num_thresholds=50,curve='ROC') predict = predict>0.5 validation_generator.classes from sklearn.preprocessing import LabelBinarizer binarise = LabelBinarizer() m.update_state(y_enco_true,predict) y_enco_true = binarise.fit_transform(validation_generator.classes) y_enco_true.shape
) inputs = model.inputs[:2] dense = model.get_layer('NSP-Dense').output dropout = keras.layers.Dropout(0.5)(dense) outputs_t = keras.layers.Dense(N_TOPICS, name="topic", activation="sigmoid")(dropout) outputs = keras.layers.Dense(1, name="label", activation="sigmoid")(dropout) model = keras.models.Model(inputs, [outputs, outputs_t]) #model.summary() losses = { "label": "binary_crossentropy", "topic": cos_similarity } metrics = { "label": AUC(name="auc"), "topic": cos_distance } lossWeights = {"label": ALPHA, "topic": 1-ALPHA} model.compile( Adam(lr=LR), loss=losses, loss_weights=lossWeights, metrics=metrics, ) try: os.remove("best_model.h5") except: print ("file not found")
class NasFcAnn(object): '''Attributes''' #Add class attributes simple to complex. #The following is just a partial list with default values. Add as needed. __name = 'default' __type = 'slice' #Path Parameters #'INPUT/Voi_Data/', 'EXP1/','EXP1/CHKPNT/' paths = ('CadLung/INPUT/Voi_Data/', 'CadLung/EXP2/', 'CadLung/EXP2/CHKPNT/', 'CadLung/EXP2/REPORT/') __dataPath = 'none' #Data Parameters __normalize = 'none' #Options: {Default: 'none', 'ra', 'zs'} __positiveRegion = 'y' #Options: {Default: 'n', 'y'} positiveRegionMin = 0.0001 #{Default: 0.0001} #Weight Option Parameters weightThreshold = 0.01 #0.001 #Model Parameters learningRate = 0.01 #Default: 0.01 valSplit = 0.15 epochs = 500 batchSize = 32 METRICS = [ 'accuracy', AUC(name='AUC'), TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives() ] lossFn = 'mean_squared_error' #'binary_crossentropy' initializer = 'random_normal' optMthd = SGD( learning_rate=learningRate) #Adam(learning_rate=learningRate) __regRate = 0.001 #Network Architecture Parameters #first layer, hidden layers, and output layer. #hidden notes > 1. maxNumNodes = (None, 5, 5, 5, 1) #Activation Functions and Shape Parameters #Creating custom swish activation functions swishBeta = 0.5 #Default = 1 def swish(x, beta=swishBeta): return (x * sigmoid(beta * x)) def customRelu(x): #https://keras.io/api/layers/activations/ return relu(x, alpha=0.2, max_value=None, threshold=0) get_custom_objects().update({'swish': swish, 'customRelu': customRelu}) #setting activation functions activationFn = (None, 'swish', 'swish', 'swish', 'sigmoid') #derived attributes regFn = rg.l2(__regRate) checkpoint_filepath = paths[2] + 'checkpoint.hdf5' lenMaxNumHidenLayer = len(maxNumNodes) - 2 modelChkPnt_cBk = cB.ModelCheckpoint(filepath=checkpoint_filepath, save_weights_only=True, monitor='val_AUC', mode='max', save_best_only=True) clBacks = [modelChkPnt_cBk] def __init__(self, **kwarg): '''Initialization''' self.__dataPath = kwarg['dataPath'] self.__name = kwarg['name'] self.__type = kwarg['type'] self.__normalize = kwarg['normalize'] self.__regRate = kwarg['regRate'] self.__positiveRegion = kwarg['positiveRegion'] # def exportParam(self, **kwarg): '''function to export parameters to tf file''' parameter_dict = { 'Name': self.__name, 'Normalization': self.__normalize } with open(self.paths[1] + '/MODEL/{}Parameters.tf'.format(self.__name), 'w') as file: for key in parameter_dict.keys(): file.write("%s,%s\n" % (key, parameter_dict[key])) # def loadData(self, **kwarg): '''function to load data''' if self.__type == 'slice': dataset_filepath = self.paths[0] + self.__dataPath with open(dataset_filepath, 'rb') as f2: self.train_set_all = np.load(f2) self.train_label_all = np.load(f2) self.test_set_all = np.load(f2) self.test_label_all = np.load(f2) else: file = np.load(self.__dataPath) self.train_set_all = file['arr_0'] self.train_label_all = file['arr_1'] self.test_set_all = file['arr_2'] self.test_label_all = file['arr_3'] # def exportData(self, **kwarg): '''function to export data to bin file''' data_list = [self.train_set_all, self.test_set_all] with open(self.paths[1] + '/INPUT/{}Data.tf'.format(self.__name), 'w') as file: for i in data_list[0]: file.write(str(i) + '\n') for j in data_list[1]: file.write(str(j) + '\n') # def doPreProcess(self, **kwarg): '''function to do pre-processing on the data''' #Normalize your data def rangeNormalize(data, lower, upper): #lower, upper = range """function to range normalize data""" scaler = MinMaxScaler(feature_range=(lower, upper)) normalized = scaler.fit_transform(data) return normalized # def positiveNormalize(data): """function to move data to the positive region""" for i in range(len(data)): dataMin = min(data[i]) if dataMin < self.positiveRegionMin: scal = self.positiveRegionMin - dataMin data[i] = data[ i] + scal #shifting elements to make minimum 0.0001 return data # if self.__positiveRegion == 'y': self.train_set_all = positiveNormalize(self.train_set_all) self.test_set_all = positiveNormalize(self.test_set_all) if self.__normalize == 'ra': self.train_set_all = rangeNormalize(self.train_set_all, 0, 1) self.test_set_all = rangeNormalize(self.test_set_all, 0, 1) elif self.__normalize == 'zs': self.train_set_all = stats.zscore(self.train_set_all) self.test_set_all = stats.zscore(self.test_set_all) # #Dim1: Batch, (Dim2,Dim3): Flattened images self.train_set_all = np.reshape( self.train_set_all, (self.train_set_all.shape[0], 1, self.train_set_all.shape[1])) #Dim1: Batch, (Dim2,Dim3): Flattened images self.test_set_all = np.reshape( self.test_set_all, (self.test_set_all.shape[0], 1, self.test_set_all.shape[1])) # def exportPreProcData(self, **kwarg): '''function to export pre processed data to bin file''' preProcessedData_list = [self.train_set_all, self.test_set_all] with open( self.paths[1] + 'INPUT/PROCESSED/{}PreProcessedData.tf'.format(self.__name), 'w') as file: for i in preProcessedData_list[0]: file.write(str(i) + '\n') for j in preProcessedData_list[1]: file.write(str(i) + '\n') # def setUpModelTrain(self, **kwarg): '''function to find the best model structure''' print('Training...') if os.path.exists(self.paths[1] + '/MODEL/{}Model.hdf5'.format(self.__name)): loadedModel = keras.models.load_model( self.paths[1] + '/MODEL/{}Model.hdf5'.format(self.__name), custom_objects={ 'AUC': AUC(), 'TP': TruePositives(), 'FP': FalsePositives(), 'TN': TrueNegatives(), 'FN': FalseNegatives() }, compile=False) loadedModel.compile(loss=self.lossFn, optimizer=self.optMthd, metrics=[ 'accuracy', AUC(name='AUC'), TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives() ]) loadLoss, loadAcc, loadAUC, loadTP, loadFP, loadTN, loadFN = loadedModel.evaluate( self.test_set_all, self.test_label_all, verbose=0) bestAUC = 0 #best AUC score #[0 0 0 0] 1st one is for input. number of nodes added to the current layer numNodeLastHidden = np.zeros(self.lenMaxNumHidenLayer + 1) #Searching the best network architecture for hL in range(1, self.lenMaxNumHidenLayer + 1): #Hidden Layer Loop (1 to 4) for j in range(1, self.maxNumNodes[hL] + 1): #Node loop (1 to 6), 3 times numNodeLastHidden[ hL] += 1 #A new node added to the current layer #Re-create the temp model with a new node at the layer modelTmp = keras.Sequential() #initialize temporary model modelTmp.add(Flatten()) #Input layer for iL in range(1, hL + 1): #Adds number of hidden layers modelTmp.add( Dense(int(numNodeLastHidden[iL]), activation=self.activationFn[hL], kernel_initializer=self.initializer, kernel_regularizer=self.regFn)) #output layer modelTmp.add( Dense(1, activation=self.activationFn[-1], kernel_initializer=self.initializer, kernel_regularizer=self.regFn)) modelTmp.compile(loss=self.lossFn, optimizer=self.optMthd, metrics=[ 'accuracy', AUC(name='AUC'), TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives() ]) modelFitTmp = modelTmp.fit(self.train_set_all, self.train_label_all, batch_size=self.batchSize, epochs=self.epochs, verbose=0, callbacks=self.clBacks, validation_split=self.valSplit) #After pulling out the best weights and the corresponding model "modelFitTmp", modelTmp.load_weights( self.checkpoint_filepath, by_name=True, skip_mismatch=True) #loading test weights #modelTmp test evaluation tmpLoss, tmpAcc, tmpAUC, tmpTP, tmpFP, tmpTN, tmpFN = modelTmp.evaluate( self.test_set_all, self.test_label_all, verbose=0) #compare against the last "bestAUC" if tmpAUC > bestAUC: #update the best model and continue adding a node to this layer bestAUC = tmpAUC self.bestModel = modelTmp self.modelFitBest = modelFitTmp del modelTmp #WHY ? else: #adding a new node did not improve the performance. if numNodeLastHidden[hL] != 1: numNodeLastHidden[ hL] -= 1 #going back to best number of nodes #Stop adding a new node to this layer break # #for j #for hL #Comparing best Model to saved Model if os.path.exists(self.paths[1] + '/MODEL/{}Model.hdf5'.format(self.__name)): if loadAUC > bestAUC: print('Saved Model Performed Better') self.bestModel = loadedModel else: print('New Model Performed Better') #Printing best model structure print(self.bestModel.summary()) # def convertWeights(self, **kwarg): '''function to convert model weights''' numLayers = len(self.bestModel.layers) for layer in range(1, numLayers): #first layer empty, index 0 = weights, index 1 = bias weights = self.bestModel.layers[layer].get_weights() for inputs in range(len(weights[0])): for w in range(len(weights[0][inputs])): if abs(weights[0][inputs][w]) < self.weightThreshold: weights[0][inputs][w] = 0 #setting new weights self.bestModel.layers[layer].set_weights(weights) # def exportModel(self, **kwarg): '''function to save model to hdf5 file''' self.bestModel.save( self.paths[1] + '/MODEL/{}Model.hdf5'.format(self.__name)) #saving best model # def loadModel(self, **kwarg): self.bestModel = keras.models.load_model( self.paths[1] + '/MODEL/{}Model.hdf5'.format(self.__name), custom_objects={ 'AUC': AUC(), 'TP': TruePositives(), 'FP': FalsePositives(), 'TN': TrueNegatives(), 'FN': FalseNegatives() }, compile=False) self.bestModel.compile(loss=self.lossFn, optimizer=self.optMthd, metrics=[ 'accuracy', AUC(name='AUC'), TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives() ]) # def testModel(self, **kwarg): #making test prediction self.test_pred = [] #reshape from (1092, 1) to (1092) for i in self.bestModel.predict(self.test_set_all).reshape( self.test_label_all.shape[0]): if self.__type == 'volume': self.test_pred.append(i.round()) else: self.test_pred.append(i) # def exportPredict(self, **kwarg): '''function to export model predictions to tf file''' with open( self.paths[1] + 'OUTPUT/{}TestPredictions.tf'.format(self.__name), 'w') as file: for i in self.test_pred: file.write(str(i) + '\n') # def evaluate(self, **kwarg): '''function to evaluate performance of model''' #Evaluate performance of the model self.testLoss, self.testAcc, self.testAUC, self.testTP, self.testFP, self.testTN, self.testFN = self.bestModel.evaluate( self.test_set_all, self.test_label_all, verbose=0) print('Accuracy: {}'.format(self.testAcc)) print('AUC of ROC: {}'.format(self.testAUC)) print('True Positives: {}'.format(self.testTP)) print('False Positives: {}'.format(self.testFP)) print('True Negatives: {}'.format(self.testTN)) print('False Negatives: {}'.format(self.testFN)) # def exportTestPerf(self, **kwarg): '''function to export test performance to csv file''' testPerformance_dict = { 'Accuracy': self.testAcc, 'AUC': self.testAUC, 'True Positives': self.testTP, 'False Positives': self.testFP, 'True Negatives': self.testTN, 'False Negatives': self.testFN } with open( self.paths[3] + '/PERFORMANCE/{}TestPerformance.csv'.format(self.__name), 'w') as file: for key in testPerformance_dict.keys(): file.write("%s,%s\n" % (key, testPerformance_dict[key])) # def visualPerf(self, **kwarg): '''function to visualize model performance''' performance = [self.testAcc, self.testAUC] x = ['Accuracy', 'AUC of ROC'] #creating performance labels x_pos = [i for i, _ in enumerate(x)] plt.bar(x_pos, performance, color=('blue', 'red')) plt.ylim([0, 1]) #setting performance score range plt.xticks(x_pos, x) plt.title('Model Performance Metrics') plt.show() # def exportChart(self, **kwarg): '''function to save chart as a png file''' performance = [self.testAcc, self.testAUC] x = ['Accuracy', 'AUC of ROC'] #creating performance labels x_pos = [i for i, _ in enumerate(x)] plt.bar(x_pos, performance, color=('blue', 'red')) plt.ylim([0, 1]) #setting performance score range plt.xticks(x_pos, x) plt.title('Model Performance Metrics') plt.savefig(self.paths[1] + '/OUTPUT/{}ModelPerformance.png'.format(self.__name)) # def exportTrainPred(self, **kwarg): '''function to export training predictions to the next Model''' #making train predictions train_pred = [] for i in self.bestModel.predict(self.train_set_all).reshape( self.train_label_all.shape[0]): train_pred.append(i) with open( self.paths[1] + 'OUTPUT/{}TrainPredictions.tf'.format(self.__name), 'w') as file: for i in train_pred: file.write(str(i) + '\n') # def exportModelWeights(self, **kwarg): '''function to export Model Weights''' weight_dict = {} numLayers = len(self.bestModel.layers) for layer in range(1, numLayers): #number of layers #first layer empty, index 0 = weights, index 1 = bias for node in range( 0, len(self.bestModel.layers[layer].get_weights()[0] [0])): #number of nodes nodeW = self.bestModel.layers[layer].get_weights()[0][:, node] weight_dict['layer{}node{}'.format(layer, node + 1)] = nodeW df = pd.DataFrame( {key: pd.Series(value) for key, value in weight_dict.items()}) df.to_csv(self.paths[3] + 'WEIGHTS/{}Weights.csv'.format(self.__name), encoding='utf-8', index=False) # def exportTrainingError(self, **kwarg): '''function to export Training Error''' error = self.modelFitBest.history['loss'] val_error = self.modelFitBest.history['val_loss'] epochs = range(len(error)) #number of epochs error_dict = {'Training Error': error, 'Validation Error': val_error} df = pd.DataFrame( {key: pd.Series(value) for key, value in error_dict.items()}) df.to_csv(self.paths[3] + 'ERROR/{}Error.csv'.format(self.__name), encoding='utf-8', index=False) # def exportCovCorrCoef(self, **kwarg): '''function to export Covariance and Correlation Matrices''' weight_matrix = [] columnNames = [] #Only using first weight layer for node in range(0, len(self.bestModel.layers[1].get_weights()[0] [0])): #number of nodes columnNames.append('W_1_{}'.format(node)) nodeW = self.bestModel.layers[1].get_weights( )[0][:, node] #getting weights of node weight_matrix.append(nodeW) cov_matrix = np.cov(weight_matrix) corrCoef_matrix = np.corrcoef(weight_matrix) try: covdf = pd.DataFrame(data=cov_matrix, columns=columnNames, index=columnNames) covdf.to_csv(self.paths[3] + 'COVandCORR/{}Cov.csv'.format(self.__name), encoding='utf-8') except: covdf = pd.DataFrame(data=float(cov_matrix), columns=columnNames, index=columnNames) covdf.to_csv(self.paths[3] + 'COVandCORR/{}Cov.csv'.format(self.__name), encoding='utf-8') corrdf = pd.DataFrame(data=corrCoef_matrix, columns=columnNames, index=columnNames) corrdf.to_csv(self.paths[3] + 'COVandCORR/{}CorrCoef.csv'.format(self.__name), encoding='utf-8')
def setUpModelTrain(self, **kwarg): '''function to find the best model structure''' print('Training...') if os.path.exists(self.paths[1] + '/MODEL/{}Model.hdf5'.format(self.__name)): loadedModel = keras.models.load_model( self.paths[1] + '/MODEL/{}Model.hdf5'.format(self.__name), custom_objects={ 'AUC': AUC(), 'TP': TruePositives(), 'FP': FalsePositives(), 'TN': TrueNegatives(), 'FN': FalseNegatives() }, compile=False) loadedModel.compile(loss=self.lossFn, optimizer=self.optMthd, metrics=[ 'accuracy', AUC(name='AUC'), TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives() ]) loadLoss, loadAcc, loadAUC, loadTP, loadFP, loadTN, loadFN = loadedModel.evaluate( self.test_set_all, self.test_label_all, verbose=0) bestAUC = 0 #best AUC score #[0 0 0 0] 1st one is for input. number of nodes added to the current layer numNodeLastHidden = np.zeros(self.lenMaxNumHidenLayer + 1) #Searching the best network architecture for hL in range(1, self.lenMaxNumHidenLayer + 1): #Hidden Layer Loop (1 to 4) for j in range(1, self.maxNumNodes[hL] + 1): #Node loop (1 to 6), 3 times numNodeLastHidden[ hL] += 1 #A new node added to the current layer #Re-create the temp model with a new node at the layer modelTmp = keras.Sequential() #initialize temporary model modelTmp.add(Flatten()) #Input layer for iL in range(1, hL + 1): #Adds number of hidden layers modelTmp.add( Dense(int(numNodeLastHidden[iL]), activation=self.activationFn[hL], kernel_initializer=self.initializer, kernel_regularizer=self.regFn)) #output layer modelTmp.add( Dense(1, activation=self.activationFn[-1], kernel_initializer=self.initializer, kernel_regularizer=self.regFn)) modelTmp.compile(loss=self.lossFn, optimizer=self.optMthd, metrics=[ 'accuracy', AUC(name='AUC'), TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives() ]) modelFitTmp = modelTmp.fit(self.train_set_all, self.train_label_all, batch_size=self.batchSize, epochs=self.epochs, verbose=0, callbacks=self.clBacks, validation_split=self.valSplit) #After pulling out the best weights and the corresponding model "modelFitTmp", modelTmp.load_weights( self.checkpoint_filepath, by_name=True, skip_mismatch=True) #loading test weights #modelTmp test evaluation tmpLoss, tmpAcc, tmpAUC, tmpTP, tmpFP, tmpTN, tmpFN = modelTmp.evaluate( self.test_set_all, self.test_label_all, verbose=0) #compare against the last "bestAUC" if tmpAUC > bestAUC: #update the best model and continue adding a node to this layer bestAUC = tmpAUC self.bestModel = modelTmp self.modelFitBest = modelFitTmp del modelTmp #WHY ? else: #adding a new node did not improve the performance. if numNodeLastHidden[hL] != 1: numNodeLastHidden[ hL] -= 1 #going back to best number of nodes #Stop adding a new node to this layer break # #for j #for hL #Comparing best Model to saved Model if os.path.exists(self.paths[1] + '/MODEL/{}Model.hdf5'.format(self.__name)): if loadAUC > bestAUC: print('Saved Model Performed Better') self.bestModel = loadedModel else: print('New Model Performed Better') #Printing best model structure print(self.bestModel.summary())
MaxPooling2D(), Conv2D(64 * 4, (3, 3), activation='relu'), MaxPooling2D(), Conv2D(128 * 4, (3, 3), activation='relu'), MaxPooling2D(), Flatten(), Dense(1024, activation='relu'), Dense(2048, activation='relu'), Dense(2, activation='sigmoid') ]) # model.summary() model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy', AUC(), f1_m]) e_s = EarlyStopping(monitor='val_loss', patience=10) hist = model.fit(x_train, y_train, epochs=nb_epochs, validation_data=[x_test, y_test], batch_size=32, callbacks=[e_s]) pd.DataFrame(hist.history).to_csv(model_name + '_history.csv') test_loss, test_acc, test_AUC, test_f1 = model.evaluate(x_test, y_test) print("-------------------")