Example #1
0
def CascadeTraining(model,
                    X_train,
                    Y_train,
                    X_test,
                    Y_test,
                    stringOfHistory=None,
                    dataAugmentation=None,
                    X_val=None,
                    Y_val=None,
                    epochs=20,
                    loss='categorical_crossentropy',
                    optimizer='sgd',
                    initialLr=0.01,
                    weightDecay=10e-4,
                    patience=10,
                    windowSize=5,
                    batch_size=128,
                    outNeurons=64,
                    nb_classes=10,
                    index=0,
                    fast=True,
                    gradient=False):
    """Method to cascade a given model

        # Arguments
            model: model to cascade.
            X_train: training inputs.
            Y_train: training targets.
            X_test: test inputs.
            Y_test: test targets.
            stringOfHistory: location to save results
            dataAugmentation: data augmentation generator.
            optimizer: optimizer to ise in every training phase.
            initialLr: initial learning rate.
            weightDecay: weight decay of the training function.
            patience, windowSize: parameters used in the callback.
            batch_size: batch size of training
            outNeurons: number of neurons in output block
            nb_classes: number of classes
            fast: catches the pseudo-inputs if True (Enough memory is required)
            gradient: computes the gradients and adds them to the history dictionary if True
        #Returns
            Results of training (accuracy, loss), and full model once cascaded
    """
    if (stringOfHistory == None) or (
            stringOfHistory != None
            and not os.path.isfile(stringOfHistory + 'cascaded_model' +
                                   str(index) + '.h5')):
        nextModelToTrain = Sequential()  #INIT MODEL TO TRAIN
        saveImportLayersIndexes = list(
        )  #INIT VARIABLE TO STORE THE INDEXES OF CORE LAYERS
        # weights = list() #
        history = dict()

        if stringOfHistory != None and os.path.isfile(
                stringOfHistory + 'history_tmp' + str(index) +
                '.txt'):  #LOAD HISTORY FILE IF IT EXISTS
            history = cPickle.load(
                open(stringOfHistory + 'history_tmp' + str(index) + '.txt',
                     'r'))
        else:  #OTHERWISE INITIALIZE
            history = dict()
        if stringOfHistory != None and os.path.isfile(
                stringOfHistory + 'model_to_predict' + str(index) +
                '.h5'):  #LOAD MODEL TO PREDICT
            nextModelToPredict = load_model(stringOfHistory +
                                            'model_to_predict' + str(index) +
                                            '.h5')
            nextModelToPredict = nextModelToPredict.layers
            # nextModelToPredict.load_weights(stringOfHistory + 'Tmp.h5')
        else:  #OTHERWISE INITIALIZE
            nextModelToPredict = None
        #SAVE IMPORTANT LAYERS INDEXES
        i = 0
        for currentLayer in model.layers:  #GET THE INDEX OF CORE LAYERS IN GIVEN MODEL
            if ((currentLayer.get_config()['name'][0] == 'c')
                    or (currentLayer.get_config()['name'][0] == 'f')):
                saveImportLayersIndexes.append(i)
            i += 1
        for i in range(len(saveImportLayersIndexes) -
                       1):  #UP TO THE FLATTEN LAYER
            if ('iter' + str(i)
                    not in history.keys()):  #IF THE LAYER HAS NOT BEEN TRAINED
                history['iter' + str(i)] = dict(
                )  #INITIALIZE DICTIONARY TO SAVE RESULTS OF CURRENT RUN
                print('ITERATION %d' % (i))
                if (i == 0):  #IF ITS THE FIRST ITERATION
                    nextModelToTrain = list()
                    for j in model.layers[0:saveImportLayersIndexes[
                            1]]:  #FOR CORRESPONDING LAYERS FOR CURRENT RUN IN MODEL
                        nextModelToTrain.append(j)
                    tmp = Sequential()  #CREATE KERAS MODEL
                    for j in nextModelToTrain:  #APPEND ALL THE NECESSARY LAYERS TO THE MODEL
                        tmp.add(j)
                    nextModelToTrain = tmp
                    del tmp

                    nextModelToTrain.add(Flatten())
                    nextModelToTrain.add(Dropout(0.5))
                    nextModelToTrain.add(
                        Dense(outNeurons, kernel_regularizer=l2(weightDecay)))
                    nextModelToTrain.add(Activation('relu'))
                    nextModelToTrain.add(Dropout(0.5))
                    nextModelToTrain.add(
                        Dense(outNeurons / 2,
                              kernel_regularizer=l2(weightDecay)))
                    nextModelToTrain.add(Activation('relu'))
                    # nextModelToTrain.add(Dropout(0.5))
                    nextModelToTrain.add(
                        Dense(nb_classes, kernel_regularizer=l2(weightDecay)))
                    nextModelToTrain.add(Activation('softmax'))

                else:  #IF IS NOT THE FIRST ITERATION
                    nextModelToTrain = list()
                    nextModelToPredictShape = (X_train.shape[1],
                                               X_train.shape[2],
                                               X_train.shape[3])
                    inputs = Input(shape=nextModelToPredictShape)
                    x = nextModelToPredict[1](inputs)
                    for k in range(1, len(nextModelToPredict) - 1):
                        x = nextModelToPredict[k + 1](x)
                    nextModelToPredict = Model(inputs=inputs, outputs=x)
                    nextModelToPredict.compile(loss=loss,
                                               optimizer='sgd',
                                               metrics=['accuracy'
                                                        ])  #COMPILE MODEL
                    if stringOfHistory != None:  #IF SAVING IS REQUIRED (IN CASE THE SCRIPT CRASHES)
                        save_full_model(model=nextModelToPredict,
                                        history=None,
                                        path=stringOfHistory,
                                        name='model_to_predict' + str(index) +
                                        '.h5')
                    for k in model.layers[
                            saveImportLayersIndexes[i]:saveImportLayersIndexes[
                                i +
                                1]]:  #GET THE LAYERS OF NEXT MODEL TO TRAIN
                        nextModelToTrain.append(k)
                    nextShape = nextModelToPredict.predict(X_train[[
                        0
                    ]]).shape[1::]  #GET INPUT SHAPE OF THE MODEL TO TRAIN
                    #SET THE INPUT SHAPE OF MODEL, PRESERVING PREVIOUS CONFIGURATION.
                    #IF THE OUTPUT HAS NOT BEEN FLATTENED
                    nextModelToTrain.append(Flatten())
                    #IF OUTPUT BLOCK HAS NOT BEEN CONNECTED                nextModelToTrain.append(Dropout(0.5))
                    if not (i + 1 == len(saveImportLayersIndexes)):
                        nextModelToTrain.append(Dropout(0.5))
                    nextModelToTrain.append(
                        Dense(outNeurons, kernel_regularizer=l2(weightDecay)))
                    nextModelToTrain.append(Activation('relu'))
                    nextModelToTrain.append(Dropout(0.5))
                    nextModelToTrain.append(
                        Dense(outNeurons / 2,
                              kernel_regularizer=l2(weightDecay)))
                    nextModelToTrain.append(Activation('relu'))
                    nextModelToTrain.append(
                        Dense(nb_classes, kernel_regularizer=l2(weightDecay)))
                    nextModelToTrain.append(Activation('softmax'))

                    #INITIALIZE KERAS MODEL USING LAYERS IN nextModelToTrain LIST
                    nextModelToTrainInputs = Input(shape=nextShape)
                    x = nextModelToTrain[0](nextModelToTrainInputs)
                    for current_layer_index in range(
                            len(nextModelToTrain) - 1):
                        x = nextModelToTrain[current_layer_index + 1](x)
                    nextModelToTrain = Model(inputs=nextModelToTrainInputs,
                                             outputs=x)
                K.set_value(
                    optimizer.lr, initialLr
                )  #SET INITIAL LEARNING RATE (IT MIGHT HAVE BEEN CHANGED BY PREVIOUS ITERATIONS)
                nextModelToTrain.compile(loss=loss,
                                         optimizer=optimizer,
                                         metrics=['accuracy'])

                if nextModelToPredict != None:  #IF MODEL TO PREDICT EXISTS
                    print('MODEL TO PREDICT LAYERS'
                          )  #PLOT THE LAYERS OF THE MODEL
                    for k in nextModelToPredict.layers:
                        print(k.get_config()['name'])

                print('MODEL TO TRAIN LAYERS')  #PLOT LAYERS OF MODEL TO TRAIN
                for k in nextModelToTrain.layers:
                    print(k.get_config()['name'])
                # currentEpochs = epochs+5*i
                currentEpochs = epochs + 10 * i  #SET THE NUMBER OF EPOCHS OF CURRENT RUN
                # if currentEpochs > 50: #MAXIMUM NUMBER OF EPOCHS ON CASCADE LEARNING IS 50
                #     currentEpochs = 50
                dataAugmentation.modelToPredict = nextModelToPredict  #SET MODEL TO GENERATE ARTIFICIAL INPUTS IN GENERATOR
                if fast:
                    tmpX = list()
                    tmpY = list()
                    # if nextModelToPredict != None:
                    #     tmpX = np.zeros([len(X_train)]+[nextModelToPredict.input_shape])
                    #     tmpY = np.zeros([len(X_train)]+[nextModelToPredict.output_shape])
                    # else:
                    #     tmpX = np.zeros(X_train.shape)
                    #     tmpY = np.zeros(Y_train.shape)
                    progbar = generic_utils.Progbar(len(X_train))
                    #LOAD ARITIFICAL INPUTS
                    print('LOADING TRAINING DATA')
                    for k, (X_batch, Y_batch) in enumerate(
                            dataAugmentation.flow(X_train,
                                                  Y_train,
                                                  batch_size=1)):
                        tmpX.append(X_batch[0, :])
                        tmpY.append(Y_batch[0, :])
                        progbar.add(1)
                        if (k >= len(X_train) - 1):
                            print('\n')
                            break
                    tmpX = np.asarray(tmpX)
                    tmpY = np.asarray(tmpY)
                    # CALLBACK TO REDUCE THE LEARNING RATE AND STORE INFORMATION OF VALIDATION AND TESTING RESULTS DURING TRAINING
                    learningCall = LearningRateC(X_val,
                                                 Y_val,
                                                 X_test,
                                                 Y_test,
                                                 dataAugmentation,
                                                 batch_size,
                                                 patience=patience,
                                                 windowSize=windowSize,
                                                 gradient=gradient)
                    #TRAIN THE MODEL
                    tmpHistory = nextModelToTrain.fit(tmpX,
                                                      tmpY,
                                                      batch_size=batch_size,
                                                      epochs=currentEpochs,
                                                      verbose=2,
                                                      callbacks=[learningCall])
                else:
                    # dataAugmentation.modelToPredict = nextModelToPredict #SET MODEL TO GENERATE ARTIFICIAL INPUTS IN GENERATOR
                    # progbar = generic_utils.Progbar(len(X_train))
                    # #CALLBACK TO REDUCE THE LEARNING RATE AND STORE INFORMATION OF VALIDATION AND TESTING RESULTS DURING TRAINING
                    learningCall = LearningRateC(X_val,
                                                 Y_val,
                                                 X_test,
                                                 Y_test,
                                                 dataAugmentation,
                                                 batch_size,
                                                 patience=patience,
                                                 windowSize=windowSize,
                                                 gradient=gradient)
                    #TRAIN THE MODEL
                    tmpHistory = nextModelToTrain.fit_generator(
                        dataAugmentation.flow(X_train,
                                              Y_train,
                                              batch_size=batch_size),
                        steps_per_epoch=np.ceil(1. * len(X_train) /
                                                batch_size).astype(int),
                        epochs=currentEpochs,
                        verbose=1,
                        callbacks=[learningCall])

                if (nextModelToPredict == None
                    ):  #IF MODEL TO PREDICT DOES NOT EXIST
                    nextModelToPredict = nextModelToTrain.layers[
                        0:
                        -9]  #TAKE THE LAYERS OF nextModelToTrain WITHOUT OUTPUT BLOCK
                else:  #OTHERWISE APPEND LAYERS (WITHOUT THE OUTPUT BLOCK) OF nextModelToTrain IN nextModelToPredict
                    nextModelToPredict = nextModelToPredict.layers
                    nextModelToPredict.extend(nextModelToTrain.layers[1:-9])
                #SAVE RESULTS IN SINGLE DICTIONARY, ALSO CALCULATE THE CONFUSION MATRIX OF THE TRAINED MODEL
                history['iter' + str(i)].update(learningCall.history)
                history['iter' +
                        str(i)]['lossTraining'] = tmpHistory.history['loss']
                history['iter' +
                        str(i)]['accuracyTraining'] = tmpHistory.history['acc']
                # history['iter'+str(i)]['confusionMatrix'] = GetConfusionMatrix(nextModelToTrain,X_test,Y_test,dataAugmentation)
                if stringOfHistory != None:
                    save_full_model(history=history,
                                    path=stringOfHistory,
                                    name='_tmp' + str(index))

    # plot_history(history,stringOfHistory)
    #GET WHOLE CASCADED MODEL
        input_model_predict = Input(shape=X_train.shape[1::])
        # input_model_predict = nextModelToPredict[0]
        x = nextModelToPredict[1](input_model_predict)
        for i in nextModelToPredict[2::]:
            x = i(x)
        for i in nextModelToTrain.layers[-9::]:
            x = i(x)
        if stringOfHistory != None:
            os.remove(stringOfHistory + 'model_to_predict' + str(index) +
                      '.h5')
            os.remove(stringOfHistory + 'history_tmp' + str(index) + '.txt')
        modelToReturn = Model(inputs=input_model_predict, outputs=x)
        return modelToReturn, history  #RETURN CASCADED MODEL AND RESULTS OF TRAINING
    else:
        return load_model(stringOfHistory + 'cascaded_model' + str(index) +
                          '.h5'), cPickle.load(
                              open(
                                  stringOfHistory + 'history' + str(index) +
                                  '.txt', 'r'))