def loadModel(self, **kwarg): self.bestModel = keras.models.load_model( self.paths[1] + '/MODEL/{}Model.hdf5'.format(self.__name), custom_objects={ 'AUC': AUC(), 'TP': TruePositives(), 'FP': FalsePositives(), 'TN': TrueNegatives(), 'FN': FalseNegatives() }, compile=False) self.bestModel.compile(loss=self.lossFn, optimizer=self.optMthd, metrics=[ 'accuracy', AUC(name='AUC'), TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives() ])
# target classification categorical balances for each fold for train_idx, test_idx in StratifiedKFold(n_splits=total_folds, shuffle=True, random_state=1).split( data_x, data_y): print('Fold {}/{}'.format(fold_num, total_folds)) fold_num += 1 # Set up the training and testing sets X_train, X_test = data_x[train_idx], data_x[test_idx] y_train, y_test = data_y[train_idx], data_y[test_idx] # Set up the metrics we want to collect accuracy = BinaryAccuracy( ) # Will change this to Categorical if the target classification is categorical tp = TruePositives( ) # These could be collected with a confusion matrix, however translating back tn = TrueNegatives( ) # and forth from an image may be frustrating (it was last time I did it) fp = FalsePositives() fn = FalseNegatives() metrics = [accuracy, tp, tn, fp, fn] # The model must be reinitialized otherwise the model will have trained on all of the data (that wouldn't be true 10-fold cv) model = Sequential() model.add(Dense(128, input_shape=( 9, ))) # Input layer, needs same shape as input data (9 values 1D) model.add(Dense(64, activation='relu')) # Hidden layer of nodes model.add(Dense(32, activation='relu')) # Hidden layer of nodes model.add(Dense( 1, activation='sigmoid')) # Output layer of only one node (on/off)
PATH_MODEL = 'model.h5' BATCH_SIZE = 10 # %% [code] lung_path = os.path.join(PATH, 'CXR_png') mask_path = os.path.join(PATH, 'masks') test_path = os.path.join(PATH, 'test') weight_path = "{}_weights.best.hdf5".format('cxr_reg') # %% [code] train_lung_path, train_mask_path, test_lung_path = get_path_images(lung_path, mask_path, test_path) # %% [code] metrics = [TruePositives(name='tp'), # Valores realmente positivos TrueNegatives(name='tn'), # Valores realmente negativos FalsePositives(name='fp'), # Valores erroneamente positivos FalseNegatives(name='fn'), # Valores erroneamente negativos BinaryAccuracy(name='accuracy')] # %% [code] filtros = 32 depth = 5 act = 'elu' # Criação e compilação do modelo 1 proposto model = model_unet((DIM, DIM, 1), filter_root=filtros, depth=depth, activation=act) model.compile(optimizer=Adam(lr=1e-3), loss=dice_coef_loss, metrics=metrics)
mode="min", epsilon=1e-2, cooldown=2, min_lr=1e-8, ) # Metrica para a parada do treino early = EarlyStopping(monitor="val_loss", mode="min", restore_best_weights=True, patience=40) callbacks_list = [checkpoint, early, reduceLROnPlat] model = model_unet((DIM, DIM, 1), filter_root=16, depth=4, activation="relu") metrics = [ TruePositives(name="tp"), # Valores realmente positivos TrueNegatives(name="tn"), # Valores realmente negativos FalsePositives(name="fp"), # Valores erroneamente positivos FalseNegatives(name="fn"), # Valores erroneamente negativos BinaryAccuracy(name="accuracy"), ] weight_path = "./.model/weight_val_acc_96.34.h5" model = model_unet((DIM, DIM, 1), filter_root=32, depth=5, activation="relu") model.compile(optimizer=Adam(lr=1e-3), loss=dice_coef_loss, metrics=metrics) model.summary() model.load_weights(weight_path) new_data = "./data" old_data = os.listdir("./old_data")
shuffle=True) val_generator = train_datagen.flow_from_directory(proj_dir / "0" / "test", target_size=(299, 299), color_mode='rgb', batch_size=40, class_mode='categorical', shuffle=True) op = optimizers.SGD(lr=0.1, momentum=0.1, decay=0.01, nesterov=False) adam = optimizers.Adam(lr=0.001) model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=[ Precision(), Recall(), TruePositives(), FalsePositives(), FalseNegatives(), TrueNegatives() ]) # Adam optimizer # loss function will be categorical cross entropy print("Beginning Training") step_size_train = train_generator.n // train_generator.batch_size validation_steps = val_generator.n // val_generator.batch_size history = model.fit_generator(generator=train_generator, steps_per_epoch=step_size_train, epochs=200, validation_data=val_generator, validation_steps=validation_steps,
class NasFcAnn(object): '''Attributes''' #Add class attributes simple to complex. #The following is just a partial list with default values. Add as needed. __name = 'default' __type = 'slice' #Path Parameters #'INPUT/Voi_Data/', 'EXP1/','EXP1/CHKPNT/' paths = ('CadLung/INPUT/Voi_Data/', 'CadLung/EXP2/', 'CadLung/EXP2/CHKPNT/', 'CadLung/EXP2/REPORT/') __dataPath = 'none' #Data Parameters __normalize = 'none' #Options: {Default: 'none', 'ra', 'zs'} __positiveRegion = 'y' #Options: {Default: 'n', 'y'} positiveRegionMin = 0.0001 #{Default: 0.0001} #Weight Option Parameters weightThreshold = 0.01 #0.001 #Model Parameters learningRate = 0.01 #Default: 0.01 valSplit = 0.15 epochs = 500 batchSize = 32 METRICS = [ 'accuracy', AUC(name='AUC'), TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives() ] lossFn = 'mean_squared_error' #'binary_crossentropy' initializer = 'random_normal' optMthd = SGD( learning_rate=learningRate) #Adam(learning_rate=learningRate) __regRate = 0.001 #Network Architecture Parameters #first layer, hidden layers, and output layer. #hidden notes > 1. maxNumNodes = (None, 5, 5, 5, 1) #Activation Functions and Shape Parameters #Creating custom swish activation functions swishBeta = 0.5 #Default = 1 def swish(x, beta=swishBeta): return (x * sigmoid(beta * x)) def customRelu(x): #https://keras.io/api/layers/activations/ return relu(x, alpha=0.2, max_value=None, threshold=0) get_custom_objects().update({'swish': swish, 'customRelu': customRelu}) #setting activation functions activationFn = (None, 'swish', 'swish', 'swish', 'sigmoid') #derived attributes regFn = rg.l2(__regRate) checkpoint_filepath = paths[2] + 'checkpoint.hdf5' lenMaxNumHidenLayer = len(maxNumNodes) - 2 modelChkPnt_cBk = cB.ModelCheckpoint(filepath=checkpoint_filepath, save_weights_only=True, monitor='val_AUC', mode='max', save_best_only=True) clBacks = [modelChkPnt_cBk] def __init__(self, **kwarg): '''Initialization''' self.__dataPath = kwarg['dataPath'] self.__name = kwarg['name'] self.__type = kwarg['type'] self.__normalize = kwarg['normalize'] self.__regRate = kwarg['regRate'] self.__positiveRegion = kwarg['positiveRegion'] # def exportParam(self, **kwarg): '''function to export parameters to tf file''' parameter_dict = { 'Name': self.__name, 'Normalization': self.__normalize } with open(self.paths[1] + '/MODEL/{}Parameters.tf'.format(self.__name), 'w') as file: for key in parameter_dict.keys(): file.write("%s,%s\n" % (key, parameter_dict[key])) # def loadData(self, **kwarg): '''function to load data''' if self.__type == 'slice': dataset_filepath = self.paths[0] + self.__dataPath with open(dataset_filepath, 'rb') as f2: self.train_set_all = np.load(f2) self.train_label_all = np.load(f2) self.test_set_all = np.load(f2) self.test_label_all = np.load(f2) else: file = np.load(self.__dataPath) self.train_set_all = file['arr_0'] self.train_label_all = file['arr_1'] self.test_set_all = file['arr_2'] self.test_label_all = file['arr_3'] # def exportData(self, **kwarg): '''function to export data to bin file''' data_list = [self.train_set_all, self.test_set_all] with open(self.paths[1] + '/INPUT/{}Data.tf'.format(self.__name), 'w') as file: for i in data_list[0]: file.write(str(i) + '\n') for j in data_list[1]: file.write(str(j) + '\n') # def doPreProcess(self, **kwarg): '''function to do pre-processing on the data''' #Normalize your data def rangeNormalize(data, lower, upper): #lower, upper = range """function to range normalize data""" scaler = MinMaxScaler(feature_range=(lower, upper)) normalized = scaler.fit_transform(data) return normalized # def positiveNormalize(data): """function to move data to the positive region""" for i in range(len(data)): dataMin = min(data[i]) if dataMin < self.positiveRegionMin: scal = self.positiveRegionMin - dataMin data[i] = data[ i] + scal #shifting elements to make minimum 0.0001 return data # if self.__positiveRegion == 'y': self.train_set_all = positiveNormalize(self.train_set_all) self.test_set_all = positiveNormalize(self.test_set_all) if self.__normalize == 'ra': self.train_set_all = rangeNormalize(self.train_set_all, 0, 1) self.test_set_all = rangeNormalize(self.test_set_all, 0, 1) elif self.__normalize == 'zs': self.train_set_all = stats.zscore(self.train_set_all) self.test_set_all = stats.zscore(self.test_set_all) # #Dim1: Batch, (Dim2,Dim3): Flattened images self.train_set_all = np.reshape( self.train_set_all, (self.train_set_all.shape[0], 1, self.train_set_all.shape[1])) #Dim1: Batch, (Dim2,Dim3): Flattened images self.test_set_all = np.reshape( self.test_set_all, (self.test_set_all.shape[0], 1, self.test_set_all.shape[1])) # def exportPreProcData(self, **kwarg): '''function to export pre processed data to bin file''' preProcessedData_list = [self.train_set_all, self.test_set_all] with open( self.paths[1] + 'INPUT/PROCESSED/{}PreProcessedData.tf'.format(self.__name), 'w') as file: for i in preProcessedData_list[0]: file.write(str(i) + '\n') for j in preProcessedData_list[1]: file.write(str(i) + '\n') # def setUpModelTrain(self, **kwarg): '''function to find the best model structure''' print('Training...') if os.path.exists(self.paths[1] + '/MODEL/{}Model.hdf5'.format(self.__name)): loadedModel = keras.models.load_model( self.paths[1] + '/MODEL/{}Model.hdf5'.format(self.__name), custom_objects={ 'AUC': AUC(), 'TP': TruePositives(), 'FP': FalsePositives(), 'TN': TrueNegatives(), 'FN': FalseNegatives() }, compile=False) loadedModel.compile(loss=self.lossFn, optimizer=self.optMthd, metrics=[ 'accuracy', AUC(name='AUC'), TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives() ]) loadLoss, loadAcc, loadAUC, loadTP, loadFP, loadTN, loadFN = loadedModel.evaluate( self.test_set_all, self.test_label_all, verbose=0) bestAUC = 0 #best AUC score #[0 0 0 0] 1st one is for input. number of nodes added to the current layer numNodeLastHidden = np.zeros(self.lenMaxNumHidenLayer + 1) #Searching the best network architecture for hL in range(1, self.lenMaxNumHidenLayer + 1): #Hidden Layer Loop (1 to 4) for j in range(1, self.maxNumNodes[hL] + 1): #Node loop (1 to 6), 3 times numNodeLastHidden[ hL] += 1 #A new node added to the current layer #Re-create the temp model with a new node at the layer modelTmp = keras.Sequential() #initialize temporary model modelTmp.add(Flatten()) #Input layer for iL in range(1, hL + 1): #Adds number of hidden layers modelTmp.add( Dense(int(numNodeLastHidden[iL]), activation=self.activationFn[hL], kernel_initializer=self.initializer, kernel_regularizer=self.regFn)) #output layer modelTmp.add( Dense(1, activation=self.activationFn[-1], kernel_initializer=self.initializer, kernel_regularizer=self.regFn)) modelTmp.compile(loss=self.lossFn, optimizer=self.optMthd, metrics=[ 'accuracy', AUC(name='AUC'), TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives() ]) modelFitTmp = modelTmp.fit(self.train_set_all, self.train_label_all, batch_size=self.batchSize, epochs=self.epochs, verbose=0, callbacks=self.clBacks, validation_split=self.valSplit) #After pulling out the best weights and the corresponding model "modelFitTmp", modelTmp.load_weights( self.checkpoint_filepath, by_name=True, skip_mismatch=True) #loading test weights #modelTmp test evaluation tmpLoss, tmpAcc, tmpAUC, tmpTP, tmpFP, tmpTN, tmpFN = modelTmp.evaluate( self.test_set_all, self.test_label_all, verbose=0) #compare against the last "bestAUC" if tmpAUC > bestAUC: #update the best model and continue adding a node to this layer bestAUC = tmpAUC self.bestModel = modelTmp self.modelFitBest = modelFitTmp del modelTmp #WHY ? else: #adding a new node did not improve the performance. if numNodeLastHidden[hL] != 1: numNodeLastHidden[ hL] -= 1 #going back to best number of nodes #Stop adding a new node to this layer break # #for j #for hL #Comparing best Model to saved Model if os.path.exists(self.paths[1] + '/MODEL/{}Model.hdf5'.format(self.__name)): if loadAUC > bestAUC: print('Saved Model Performed Better') self.bestModel = loadedModel else: print('New Model Performed Better') #Printing best model structure print(self.bestModel.summary()) # def convertWeights(self, **kwarg): '''function to convert model weights''' numLayers = len(self.bestModel.layers) for layer in range(1, numLayers): #first layer empty, index 0 = weights, index 1 = bias weights = self.bestModel.layers[layer].get_weights() for inputs in range(len(weights[0])): for w in range(len(weights[0][inputs])): if abs(weights[0][inputs][w]) < self.weightThreshold: weights[0][inputs][w] = 0 #setting new weights self.bestModel.layers[layer].set_weights(weights) # def exportModel(self, **kwarg): '''function to save model to hdf5 file''' self.bestModel.save( self.paths[1] + '/MODEL/{}Model.hdf5'.format(self.__name)) #saving best model # def loadModel(self, **kwarg): self.bestModel = keras.models.load_model( self.paths[1] + '/MODEL/{}Model.hdf5'.format(self.__name), custom_objects={ 'AUC': AUC(), 'TP': TruePositives(), 'FP': FalsePositives(), 'TN': TrueNegatives(), 'FN': FalseNegatives() }, compile=False) self.bestModel.compile(loss=self.lossFn, optimizer=self.optMthd, metrics=[ 'accuracy', AUC(name='AUC'), TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives() ]) # def testModel(self, **kwarg): #making test prediction self.test_pred = [] #reshape from (1092, 1) to (1092) for i in self.bestModel.predict(self.test_set_all).reshape( self.test_label_all.shape[0]): if self.__type == 'volume': self.test_pred.append(i.round()) else: self.test_pred.append(i) # def exportPredict(self, **kwarg): '''function to export model predictions to tf file''' with open( self.paths[1] + 'OUTPUT/{}TestPredictions.tf'.format(self.__name), 'w') as file: for i in self.test_pred: file.write(str(i) + '\n') # def evaluate(self, **kwarg): '''function to evaluate performance of model''' #Evaluate performance of the model self.testLoss, self.testAcc, self.testAUC, self.testTP, self.testFP, self.testTN, self.testFN = self.bestModel.evaluate( self.test_set_all, self.test_label_all, verbose=0) print('Accuracy: {}'.format(self.testAcc)) print('AUC of ROC: {}'.format(self.testAUC)) print('True Positives: {}'.format(self.testTP)) print('False Positives: {}'.format(self.testFP)) print('True Negatives: {}'.format(self.testTN)) print('False Negatives: {}'.format(self.testFN)) # def exportTestPerf(self, **kwarg): '''function to export test performance to csv file''' testPerformance_dict = { 'Accuracy': self.testAcc, 'AUC': self.testAUC, 'True Positives': self.testTP, 'False Positives': self.testFP, 'True Negatives': self.testTN, 'False Negatives': self.testFN } with open( self.paths[3] + '/PERFORMANCE/{}TestPerformance.csv'.format(self.__name), 'w') as file: for key in testPerformance_dict.keys(): file.write("%s,%s\n" % (key, testPerformance_dict[key])) # def visualPerf(self, **kwarg): '''function to visualize model performance''' performance = [self.testAcc, self.testAUC] x = ['Accuracy', 'AUC of ROC'] #creating performance labels x_pos = [i for i, _ in enumerate(x)] plt.bar(x_pos, performance, color=('blue', 'red')) plt.ylim([0, 1]) #setting performance score range plt.xticks(x_pos, x) plt.title('Model Performance Metrics') plt.show() # def exportChart(self, **kwarg): '''function to save chart as a png file''' performance = [self.testAcc, self.testAUC] x = ['Accuracy', 'AUC of ROC'] #creating performance labels x_pos = [i for i, _ in enumerate(x)] plt.bar(x_pos, performance, color=('blue', 'red')) plt.ylim([0, 1]) #setting performance score range plt.xticks(x_pos, x) plt.title('Model Performance Metrics') plt.savefig(self.paths[1] + '/OUTPUT/{}ModelPerformance.png'.format(self.__name)) # def exportTrainPred(self, **kwarg): '''function to export training predictions to the next Model''' #making train predictions train_pred = [] for i in self.bestModel.predict(self.train_set_all).reshape( self.train_label_all.shape[0]): train_pred.append(i) with open( self.paths[1] + 'OUTPUT/{}TrainPredictions.tf'.format(self.__name), 'w') as file: for i in train_pred: file.write(str(i) + '\n') # def exportModelWeights(self, **kwarg): '''function to export Model Weights''' weight_dict = {} numLayers = len(self.bestModel.layers) for layer in range(1, numLayers): #number of layers #first layer empty, index 0 = weights, index 1 = bias for node in range( 0, len(self.bestModel.layers[layer].get_weights()[0] [0])): #number of nodes nodeW = self.bestModel.layers[layer].get_weights()[0][:, node] weight_dict['layer{}node{}'.format(layer, node + 1)] = nodeW df = pd.DataFrame( {key: pd.Series(value) for key, value in weight_dict.items()}) df.to_csv(self.paths[3] + 'WEIGHTS/{}Weights.csv'.format(self.__name), encoding='utf-8', index=False) # def exportTrainingError(self, **kwarg): '''function to export Training Error''' error = self.modelFitBest.history['loss'] val_error = self.modelFitBest.history['val_loss'] epochs = range(len(error)) #number of epochs error_dict = {'Training Error': error, 'Validation Error': val_error} df = pd.DataFrame( {key: pd.Series(value) for key, value in error_dict.items()}) df.to_csv(self.paths[3] + 'ERROR/{}Error.csv'.format(self.__name), encoding='utf-8', index=False) # def exportCovCorrCoef(self, **kwarg): '''function to export Covariance and Correlation Matrices''' weight_matrix = [] columnNames = [] #Only using first weight layer for node in range(0, len(self.bestModel.layers[1].get_weights()[0] [0])): #number of nodes columnNames.append('W_1_{}'.format(node)) nodeW = self.bestModel.layers[1].get_weights( )[0][:, node] #getting weights of node weight_matrix.append(nodeW) cov_matrix = np.cov(weight_matrix) corrCoef_matrix = np.corrcoef(weight_matrix) try: covdf = pd.DataFrame(data=cov_matrix, columns=columnNames, index=columnNames) covdf.to_csv(self.paths[3] + 'COVandCORR/{}Cov.csv'.format(self.__name), encoding='utf-8') except: covdf = pd.DataFrame(data=float(cov_matrix), columns=columnNames, index=columnNames) covdf.to_csv(self.paths[3] + 'COVandCORR/{}Cov.csv'.format(self.__name), encoding='utf-8') corrdf = pd.DataFrame(data=corrCoef_matrix, columns=columnNames, index=columnNames) corrdf.to_csv(self.paths[3] + 'COVandCORR/{}CorrCoef.csv'.format(self.__name), encoding='utf-8')
def setUpModelTrain(self, **kwarg): '''function to find the best model structure''' print('Training...') if os.path.exists(self.paths[1] + '/MODEL/{}Model.hdf5'.format(self.__name)): loadedModel = keras.models.load_model( self.paths[1] + '/MODEL/{}Model.hdf5'.format(self.__name), custom_objects={ 'AUC': AUC(), 'TP': TruePositives(), 'FP': FalsePositives(), 'TN': TrueNegatives(), 'FN': FalseNegatives() }, compile=False) loadedModel.compile(loss=self.lossFn, optimizer=self.optMthd, metrics=[ 'accuracy', AUC(name='AUC'), TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives() ]) loadLoss, loadAcc, loadAUC, loadTP, loadFP, loadTN, loadFN = loadedModel.evaluate( self.test_set_all, self.test_label_all, verbose=0) bestAUC = 0 #best AUC score #[0 0 0 0] 1st one is for input. number of nodes added to the current layer numNodeLastHidden = np.zeros(self.lenMaxNumHidenLayer + 1) #Searching the best network architecture for hL in range(1, self.lenMaxNumHidenLayer + 1): #Hidden Layer Loop (1 to 4) for j in range(1, self.maxNumNodes[hL] + 1): #Node loop (1 to 6), 3 times numNodeLastHidden[ hL] += 1 #A new node added to the current layer #Re-create the temp model with a new node at the layer modelTmp = keras.Sequential() #initialize temporary model modelTmp.add(Flatten()) #Input layer for iL in range(1, hL + 1): #Adds number of hidden layers modelTmp.add( Dense(int(numNodeLastHidden[iL]), activation=self.activationFn[hL], kernel_initializer=self.initializer, kernel_regularizer=self.regFn)) #output layer modelTmp.add( Dense(1, activation=self.activationFn[-1], kernel_initializer=self.initializer, kernel_regularizer=self.regFn)) modelTmp.compile(loss=self.lossFn, optimizer=self.optMthd, metrics=[ 'accuracy', AUC(name='AUC'), TruePositives(), FalsePositives(), TrueNegatives(), FalseNegatives() ]) modelFitTmp = modelTmp.fit(self.train_set_all, self.train_label_all, batch_size=self.batchSize, epochs=self.epochs, verbose=0, callbacks=self.clBacks, validation_split=self.valSplit) #After pulling out the best weights and the corresponding model "modelFitTmp", modelTmp.load_weights( self.checkpoint_filepath, by_name=True, skip_mismatch=True) #loading test weights #modelTmp test evaluation tmpLoss, tmpAcc, tmpAUC, tmpTP, tmpFP, tmpTN, tmpFN = modelTmp.evaluate( self.test_set_all, self.test_label_all, verbose=0) #compare against the last "bestAUC" if tmpAUC > bestAUC: #update the best model and continue adding a node to this layer bestAUC = tmpAUC self.bestModel = modelTmp self.modelFitBest = modelFitTmp del modelTmp #WHY ? else: #adding a new node did not improve the performance. if numNodeLastHidden[hL] != 1: numNodeLastHidden[ hL] -= 1 #going back to best number of nodes #Stop adding a new node to this layer break # #for j #for hL #Comparing best Model to saved Model if os.path.exists(self.paths[1] + '/MODEL/{}Model.hdf5'.format(self.__name)): if loadAUC > bestAUC: print('Saved Model Performed Better') self.bestModel = loadedModel else: print('New Model Performed Better') #Printing best model structure print(self.bestModel.summary())