def CascadeTraining(model, X_train, Y_train, X_test, Y_test, stringOfHistory=None, dataAugmentation=None, X_val=None, Y_val=None, epochs=20, loss='categorical_crossentropy', optimizer='sgd', initialLr=0.01, weightDecay=10e-4, patience=10, windowSize=5, batch_size=128, outNeurons=64, nb_classes=10, index=0, fast=True, gradient=False): """Method to cascade a given model # Arguments model: model to cascade. X_train: training inputs. Y_train: training targets. X_test: test inputs. Y_test: test targets. stringOfHistory: location to save results dataAugmentation: data augmentation generator. optimizer: optimizer to ise in every training phase. initialLr: initial learning rate. weightDecay: weight decay of the training function. patience, windowSize: parameters used in the callback. batch_size: batch size of training outNeurons: number of neurons in output block nb_classes: number of classes fast: catches the pseudo-inputs if True (Enough memory is required) gradient: computes the gradients and adds them to the history dictionary if True #Returns Results of training (accuracy, loss), and full model once cascaded """ if (stringOfHistory == None) or ( stringOfHistory != None and not os.path.isfile(stringOfHistory + 'cascaded_model' + str(index) + '.h5')): nextModelToTrain = Sequential() #INIT MODEL TO TRAIN saveImportLayersIndexes = list( ) #INIT VARIABLE TO STORE THE INDEXES OF CORE LAYERS # weights = list() # history = dict() if stringOfHistory != None and os.path.isfile( stringOfHistory + 'history_tmp' + str(index) + '.txt'): #LOAD HISTORY FILE IF IT EXISTS history = cPickle.load( open(stringOfHistory + 'history_tmp' + str(index) + '.txt', 'r')) else: #OTHERWISE INITIALIZE history = dict() if stringOfHistory != None and os.path.isfile( stringOfHistory + 'model_to_predict' + str(index) + '.h5'): #LOAD MODEL TO PREDICT nextModelToPredict = load_model(stringOfHistory + 'model_to_predict' + str(index) + '.h5') nextModelToPredict = nextModelToPredict.layers # nextModelToPredict.load_weights(stringOfHistory + 'Tmp.h5') else: #OTHERWISE INITIALIZE nextModelToPredict = None #SAVE IMPORTANT LAYERS INDEXES i = 0 for currentLayer in model.layers: #GET THE INDEX OF CORE LAYERS IN GIVEN MODEL if ((currentLayer.get_config()['name'][0] == 'c') or (currentLayer.get_config()['name'][0] == 'f')): saveImportLayersIndexes.append(i) i += 1 for i in range(len(saveImportLayersIndexes) - 1): #UP TO THE FLATTEN LAYER if ('iter' + str(i) not in history.keys()): #IF THE LAYER HAS NOT BEEN TRAINED history['iter' + str(i)] = dict( ) #INITIALIZE DICTIONARY TO SAVE RESULTS OF CURRENT RUN print('ITERATION %d' % (i)) if (i == 0): #IF ITS THE FIRST ITERATION nextModelToTrain = list() for j in model.layers[0:saveImportLayersIndexes[ 1]]: #FOR CORRESPONDING LAYERS FOR CURRENT RUN IN MODEL nextModelToTrain.append(j) tmp = Sequential() #CREATE KERAS MODEL for j in nextModelToTrain: #APPEND ALL THE NECESSARY LAYERS TO THE MODEL tmp.add(j) nextModelToTrain = tmp del tmp nextModelToTrain.add(Flatten()) nextModelToTrain.add(Dropout(0.5)) nextModelToTrain.add( Dense(outNeurons, kernel_regularizer=l2(weightDecay))) nextModelToTrain.add(Activation('relu')) nextModelToTrain.add(Dropout(0.5)) nextModelToTrain.add( Dense(outNeurons / 2, kernel_regularizer=l2(weightDecay))) nextModelToTrain.add(Activation('relu')) # nextModelToTrain.add(Dropout(0.5)) nextModelToTrain.add( Dense(nb_classes, kernel_regularizer=l2(weightDecay))) nextModelToTrain.add(Activation('softmax')) else: #IF IS NOT THE FIRST ITERATION nextModelToTrain = list() nextModelToPredictShape = (X_train.shape[1], X_train.shape[2], X_train.shape[3]) inputs = Input(shape=nextModelToPredictShape) x = nextModelToPredict[1](inputs) for k in range(1, len(nextModelToPredict) - 1): x = nextModelToPredict[k + 1](x) nextModelToPredict = Model(inputs=inputs, outputs=x) nextModelToPredict.compile(loss=loss, optimizer='sgd', metrics=['accuracy' ]) #COMPILE MODEL if stringOfHistory != None: #IF SAVING IS REQUIRED (IN CASE THE SCRIPT CRASHES) save_full_model(model=nextModelToPredict, history=None, path=stringOfHistory, name='model_to_predict' + str(index) + '.h5') for k in model.layers[ saveImportLayersIndexes[i]:saveImportLayersIndexes[ i + 1]]: #GET THE LAYERS OF NEXT MODEL TO TRAIN nextModelToTrain.append(k) nextShape = nextModelToPredict.predict(X_train[[ 0 ]]).shape[1::] #GET INPUT SHAPE OF THE MODEL TO TRAIN #SET THE INPUT SHAPE OF MODEL, PRESERVING PREVIOUS CONFIGURATION. #IF THE OUTPUT HAS NOT BEEN FLATTENED nextModelToTrain.append(Flatten()) #IF OUTPUT BLOCK HAS NOT BEEN CONNECTED nextModelToTrain.append(Dropout(0.5)) if not (i + 1 == len(saveImportLayersIndexes)): nextModelToTrain.append(Dropout(0.5)) nextModelToTrain.append( Dense(outNeurons, kernel_regularizer=l2(weightDecay))) nextModelToTrain.append(Activation('relu')) nextModelToTrain.append(Dropout(0.5)) nextModelToTrain.append( Dense(outNeurons / 2, kernel_regularizer=l2(weightDecay))) nextModelToTrain.append(Activation('relu')) nextModelToTrain.append( Dense(nb_classes, kernel_regularizer=l2(weightDecay))) nextModelToTrain.append(Activation('softmax')) #INITIALIZE KERAS MODEL USING LAYERS IN nextModelToTrain LIST nextModelToTrainInputs = Input(shape=nextShape) x = nextModelToTrain[0](nextModelToTrainInputs) for current_layer_index in range( len(nextModelToTrain) - 1): x = nextModelToTrain[current_layer_index + 1](x) nextModelToTrain = Model(inputs=nextModelToTrainInputs, outputs=x) K.set_value( optimizer.lr, initialLr ) #SET INITIAL LEARNING RATE (IT MIGHT HAVE BEEN CHANGED BY PREVIOUS ITERATIONS) nextModelToTrain.compile(loss=loss, optimizer=optimizer, metrics=['accuracy']) if nextModelToPredict != None: #IF MODEL TO PREDICT EXISTS print('MODEL TO PREDICT LAYERS' ) #PLOT THE LAYERS OF THE MODEL for k in nextModelToPredict.layers: print(k.get_config()['name']) print('MODEL TO TRAIN LAYERS') #PLOT LAYERS OF MODEL TO TRAIN for k in nextModelToTrain.layers: print(k.get_config()['name']) # currentEpochs = epochs+5*i currentEpochs = epochs + 10 * i #SET THE NUMBER OF EPOCHS OF CURRENT RUN # if currentEpochs > 50: #MAXIMUM NUMBER OF EPOCHS ON CASCADE LEARNING IS 50 # currentEpochs = 50 dataAugmentation.modelToPredict = nextModelToPredict #SET MODEL TO GENERATE ARTIFICIAL INPUTS IN GENERATOR if fast: tmpX = list() tmpY = list() # if nextModelToPredict != None: # tmpX = np.zeros([len(X_train)]+[nextModelToPredict.input_shape]) # tmpY = np.zeros([len(X_train)]+[nextModelToPredict.output_shape]) # else: # tmpX = np.zeros(X_train.shape) # tmpY = np.zeros(Y_train.shape) progbar = generic_utils.Progbar(len(X_train)) #LOAD ARITIFICAL INPUTS print('LOADING TRAINING DATA') for k, (X_batch, Y_batch) in enumerate( dataAugmentation.flow(X_train, Y_train, batch_size=1)): tmpX.append(X_batch[0, :]) tmpY.append(Y_batch[0, :]) progbar.add(1) if (k >= len(X_train) - 1): print('\n') break tmpX = np.asarray(tmpX) tmpY = np.asarray(tmpY) # CALLBACK TO REDUCE THE LEARNING RATE AND STORE INFORMATION OF VALIDATION AND TESTING RESULTS DURING TRAINING learningCall = LearningRateC(X_val, Y_val, X_test, Y_test, dataAugmentation, batch_size, patience=patience, windowSize=windowSize, gradient=gradient) #TRAIN THE MODEL tmpHistory = nextModelToTrain.fit(tmpX, tmpY, batch_size=batch_size, epochs=currentEpochs, verbose=2, callbacks=[learningCall]) else: # dataAugmentation.modelToPredict = nextModelToPredict #SET MODEL TO GENERATE ARTIFICIAL INPUTS IN GENERATOR # progbar = generic_utils.Progbar(len(X_train)) # #CALLBACK TO REDUCE THE LEARNING RATE AND STORE INFORMATION OF VALIDATION AND TESTING RESULTS DURING TRAINING learningCall = LearningRateC(X_val, Y_val, X_test, Y_test, dataAugmentation, batch_size, patience=patience, windowSize=windowSize, gradient=gradient) #TRAIN THE MODEL tmpHistory = nextModelToTrain.fit_generator( dataAugmentation.flow(X_train, Y_train, batch_size=batch_size), steps_per_epoch=np.ceil(1. * len(X_train) / batch_size).astype(int), epochs=currentEpochs, verbose=1, callbacks=[learningCall]) if (nextModelToPredict == None ): #IF MODEL TO PREDICT DOES NOT EXIST nextModelToPredict = nextModelToTrain.layers[ 0: -9] #TAKE THE LAYERS OF nextModelToTrain WITHOUT OUTPUT BLOCK else: #OTHERWISE APPEND LAYERS (WITHOUT THE OUTPUT BLOCK) OF nextModelToTrain IN nextModelToPredict nextModelToPredict = nextModelToPredict.layers nextModelToPredict.extend(nextModelToTrain.layers[1:-9]) #SAVE RESULTS IN SINGLE DICTIONARY, ALSO CALCULATE THE CONFUSION MATRIX OF THE TRAINED MODEL history['iter' + str(i)].update(learningCall.history) history['iter' + str(i)]['lossTraining'] = tmpHistory.history['loss'] history['iter' + str(i)]['accuracyTraining'] = tmpHistory.history['acc'] # history['iter'+str(i)]['confusionMatrix'] = GetConfusionMatrix(nextModelToTrain,X_test,Y_test,dataAugmentation) if stringOfHistory != None: save_full_model(history=history, path=stringOfHistory, name='_tmp' + str(index)) # plot_history(history,stringOfHistory) #GET WHOLE CASCADED MODEL input_model_predict = Input(shape=X_train.shape[1::]) # input_model_predict = nextModelToPredict[0] x = nextModelToPredict[1](input_model_predict) for i in nextModelToPredict[2::]: x = i(x) for i in nextModelToTrain.layers[-9::]: x = i(x) if stringOfHistory != None: os.remove(stringOfHistory + 'model_to_predict' + str(index) + '.h5') os.remove(stringOfHistory + 'history_tmp' + str(index) + '.txt') modelToReturn = Model(inputs=input_model_predict, outputs=x) return modelToReturn, history #RETURN CASCADED MODEL AND RESULTS OF TRAINING else: return load_model(stringOfHistory + 'cascaded_model' + str(index) + '.h5'), cPickle.load( open( stringOfHistory + 'history' + str(index) + '.txt', 'r'))