def evaluateModel(self,
                      BIDIRECTIONAL,
                      N_HIDDEN_LIST,
                      batch_size,
                      dataName,
                      wavDir,
                      data_store_dir,
                      meanStd_path,
                      model_load,
                      nbMFCCs,
                      store_dir,
                      force_overwrite=False):
        logger_evaluate.info("\n\n\n")

        ####### THE DATA you want to evaluate ##########
        data_store_path = data_store_dir + dataName.replace(
            '/', '_') + "_nbMFCC" + str(nbMFCCs)
        if not os.path.exists(data_store_dir): os.makedirs(data_store_dir)
        predictions_path = store_dir + os.sep + dataName.replace(
            '/', '_') + "_predictions.pkl"

        # log file
        logFile = store_dir + os.sep + "Evaluation" + dataName.replace(
            '/', '_') + '.log'
        if os.path.exists(logFile) and not force_overwrite:
            from general_tools import query_yes_no
            if query_yes_no(
                    "Log file already exists at %s\n Do you want to evaluate again and overwrite?",
                    "y"):
                pass
            else:
                logger_evaluate.info(
                    "Log file already exists, not re-evaluating.... ")
                return 0
        fh = logging.FileHandler(logFile, 'w')  # create new logFile
        fh.setLevel(logging.INFO)
        fh.setFormatter(formatter)
        logger_evaluate.addHandler(fh)
        logger_evaluate.info("\n  MODEL:    %s", model_load)
        logger_evaluate.info("\n  WAV_DIR:  %s", wavDir)
        logger_evaluate.info("\n  PREDICTS: %s", predictions_path)
        logger_evaluate.info("\n  LOG:      %s", logFile)
        logger_evaluate.info("\n")

        # GATHERING DATA
        logger_evaluate.info("* Gathering Data ...")
        if os.path.exists(data_store_path + ".pkl"):
            [inputs, targets,
             valid_frames] = unpickle(data_store_path + ".pkl")
            calculateAccuracy = True
            logger_evaluate.info(
                "Successfully loaded preprocessed data, with targets")

        elif os.path.exists(
                data_store_path + "_noTargets.pkl"
        ):  # TODO: make it work for unlabeled datasets. see RNN_tools_lstm.py, eg iterate_minibatch_noTargets.
            [inputs] = unpickle(data_store_path + "_noTargets.pkl")
            calculateAccuracy = False  # we can't as we don't know the correct labels
            logger_evaluate.info(
                "Successfully loaded preprocessed data, no targets")

        else:
            logger_evaluate.info("Data not found, preprocessing...")

            # From WAVS, generate X, y and valid_frames; also store under data_store_dir
            def preprocessLabeledWavs(wavDir, store_dir, name):
                # fixWavs -> suppose this is done
                # convert to pkl
                X, y, valid_frames = preprocessWavs.preprocess_dataset(
                    source_path=wavDir,
                    nbMFCCs=nbMFCCs,
                    logger=logger_evaluate)

                X_data_type = 'float32'
                X = preprocessWavs.set_type(X, X_data_type)
                y_data_type = 'int32'
                y = preprocessWavs.set_type(y, y_data_type)
                valid_frames_data_type = 'int32'
                valid_frames = preprocessWavs.set_type(valid_frames,
                                                       valid_frames_data_type)

                return X, y, valid_frames

            def preprocessUnlabeledWavs(wavDir, store_dir, name):  #TODO
                # fixWavs -> suppose this is done
                # convert to pkl
                X = preprocessWavs.preprocess_unlabeled_dataset(
                    source_path=wavDir,
                    nbMFCCs=nbMFCCs,
                    logger=logger_evaluate)

                X_data_type = 'float32'
                X = preprocessWavs.set_type(X, X_data_type)

                return X

            # load wavs and labels
            wav_files = transform.loadWavs(wavDir)
            wav_filenames = [
                str(
                    os.path.basename(
                        os.path.dirname(
                            os.path.dirname(os.path.dirname(wav_file)))) +
                    os.sep + os.path.basename(
                        os.path.dirname(os.path.dirname(wav_file))) + os.sep +
                    os.path.basename(os.path.dirname(wav_file)) + os.sep +
                    os.path.basename(wav_file)) for wav_file in wav_files
            ]
            logger_evaluate.info("Found %s files to evaluate \n Example: %s",
                                 len(wav_filenames), wav_filenames[0])
            label_files = transform.loadPhns(wavDir)

            # if source dir doesn't contain labels, we can't calculate accuracy
            calculateAccuracy = True
            if not (len(wav_files) == len(label_files)):
                calculateAccuracy = False
                inputs = preprocessUnlabeledWavs(wavDir=wavDir,
                                                 store_dir=store_dir,
                                                 name=dataName)
            else:
                inputs, targets, valid_frames = preprocessLabeledWavs(
                    wavDir=wavDir, store_dir=store_dir, name=dataName)

            # normalize inputs using dataset Mean and Std_dev;  convert to float32 for GPU evaluation
            with open(meanStd_path, 'rb') as cPickle_file:
                [mean_val, std_val] = cPickle.load(cPickle_file)
            inputs = preprocessWavs.normalize(inputs, mean_val, std_val)

            # just to be sure
            X_data_type = 'float32'
            inputs = preprocessWavs.set_type(inputs, X_data_type)

            # Print some information
            logger_evaluate.debug("* Data information")
            logger_evaluate.debug('  inputs')
            logger_evaluate.debug('%s %s', type(inputs), len(inputs))
            logger_evaluate.debug('%s %s', type(inputs[0]), inputs[0].shape)
            logger_evaluate.debug('%s %s', type(inputs[0][0]),
                                  inputs[0][0].shape)
            logger_evaluate.debug('%s', type(inputs[0][0][0]))
            logger_evaluate.debug('y train')
            logger_evaluate.debug('  %s %s', type(targets), len(targets))
            logger_evaluate.debug('  %s %s', type(targets[0]),
                                  targets[0].shape)
            logger_evaluate.debug('  %s %s', type(targets[0][0]),
                                  targets[0][0].shape)

            # slice to have a number of inputs that is a multiple of batch size
            logger_evaluate.info(
                "Not evaluating %s last files (batch size mismatch)",
                len(inputs) % batch_size)
            inputs = inputs[:-(len(inputs) % batch_size) or None]
            if calculateAccuracy:
                targets = targets[:-(len(targets) % batch_size) or None]
                valid_frames = valid_frames[:-(len(valid_frames) %
                                               batch_size) or None]

            # pad the inputs to process batches easily
            inputs = pad_sequences_X(inputs)
            if calculateAccuracy: targets = pad_sequences_y(targets)

            # save the preprocessed data
            logger_evaluate.info("storing preprocessed data to: %s",
                                 data_store_path)
            if calculateAccuracy:
                general_tools.saveToPkl(data_store_path + '.pkl',
                                        [inputs, targets, valid_frames])
            else:
                general_tools.saveToPkl(data_store_path + '_noTargets.pkl',
                                        [inputs])

        # Gather filenames; for debugging
        wav_files = transform.loadWavs(wavDir)
        wav_filenames = [
            str(
                os.path.basename(
                    os.path.dirname(os.path.dirname(os.path.dirname(
                        wav_file)))) + os.sep +
                os.path.basename(os.path.dirname(os.path.dirname(wav_file))) +
                os.sep + os.path.basename(os.path.dirname(wav_file)) + os.sep +
                os.path.basename(wav_file)) for wav_file in wav_files
        ]
        logger_evaluate.debug(" # inputs: %s, # wav files: %s", len(inputs),
                              len(wav_files))

        # make copy of data because we might need to use is again for calculating accurasy, and the iterator will remove elements from the array
        inputs_bak = copy.deepcopy(inputs)
        if calculateAccuracy:
            targets_bak = copy.deepcopy(targets)
            valid_frames_bak = copy.deepcopy(valid_frames)

        logger_evaluate.info("* Evaluating: pass over Evaluation Set")

        if calculateAccuracy:  # if .phn files are provided, we can check our predictions
            logger_evaluate.info(
                "Getting predictions and calculating accuracy...")
            avg_error, avg_acc, predictions = self.RNN_network.run_epoch(X=inputs, y=targets, valid_frames=valid_frames, \
                                                                         get_predictions=True, batch_size=batch_size)

            logger_evaluate.info("All batches, avg Accuracy: %s", avg_acc)
            inputs = inputs_bak
            targets = targets_bak
            valid_frames = valid_frames_bak

            #uncomment if you want to save everything in one place (takes quite a lot of storage space)
            #general_tools.saveToPkl(predictions_path, [inputs, predictions, targets, valid_frames, avg_Acc])

        else:
            # TODO fix this
            for inputs, masks, seq_lengths in tqdm(
                    iterate_minibatches_noTargets(inputs,
                                                  batch_size=batch_size,
                                                  shuffle=False),
                    total=len(inputs)):
                # get predictions
                nb_inputs = len(
                    inputs)  # usually batch size, but could be lower
                seq_len = len(inputs[0])
                prediction = self.RNN_network.predictions_fn(inputs, masks)
                prediction = np.reshape(prediction, (nb_inputs, -1))
                prediction = list(prediction)
                predictions = predictions + prediction

            inputs = inputs_bak
            #general_tools.saveToPkl(predictions_path, [inputs, predictions])

        # Print information about the predictions
        logger_evaluate.info("* Done")
        end_evaluation_time = time.time()
        eval_duration = end_evaluation_time - program_start_time
        logger_evaluate.info('Total time: {:.3f}'.format(eval_duration))
        # Print the results
        try:
            printEvaluation(wav_filenames,
                            inputs,
                            predictions,
                            targets,
                            valid_frames,
                            avg_acc,
                            range(len(inputs)),
                            logger=logger_evaluate,
                            only_final_accuracy=True)
        except:
            pdb.set_trace()
        logger_evaluate.info(
            'Evaluation duration: {:.3f}'.format(eval_duration))
        logger_evaluate.info(
            'Printing duration: {:.3f}'.format(time.time() -
                                               end_evaluation_time))

        # close the log handler
        fh.close()
        logger_evaluate.removeHandler(fh)
Example #2
0
# print some more to check that cast succeeded
logger.debug('X train')
logger.debug('  %s %s', type(X_train), len(X_train))
logger.debug('  %s %s', type(X_train[0]), X_train[0].shape)
logger.debug('  %s %s', type(X_train[0][0]), X_train[0][0].shape)
logger.debug('  %s %s', type(X_train[0][0][0]), X_train[0][0].shape)
logger.debug('y train')
logger.debug('  %s %s', type(y_train), len(y_train))
logger.debug('  %s %s', type(y_train[0]), y_train[0].shape)
logger.debug('  %s %s', type(y_train[0][0]), y_train[0][0].shape)

### STORE DATA ###
logger.info('Saving data to %s', target_path)
dataList = [
    X_train, y_train, valid_frames_train, X_val, y_val, valid_frames_val,
    X_test, y_test, valid_frames_test
]
general_tools.saveToPkl(target_path, dataList)

# these can be used to evaluate new data, so you don't have to load the whole dataset just to normalize
meanStd_path = os.path.dirname(outputDir) + os.sep + os.path.basename(
    dataRootDir) + "MeanStd.pkl"
logger.info('Saving Mean and Std_val to %s', meanStd_path)
dataList = [mean_val, std_val]
general_tools.saveToPkl(meanStd_path, dataList)

logger.info('Preprocessing complete!')
logger.info('Total time: {:.3f}'.format(timeit.default_timer() -
                                        program_start_time))
def prepLip_one(speakerFile=None, trainFraction=0.70, validFraction=0.10,
                sourceDataDir=None, storeProcessed=False, processedDir=None,
                verbose=False, loadData=True, viseme=False):
    # from https://www.cs.toronto.edu/~kriz/cifar.html
    # also see http://stackoverflow.com/questions/35032675/how-to-create-dataset-similar-to-cifar-10

    if processedDir != None:
        store_path = ''.join([processedDir, ("_viseme" if viseme else "_phoneme"), "_train", str(trainFraction).replace("0.", ""), "valid",
                              str(validFraction).replace("0.", ""), os.sep, os.path.basename(speakerFile)])
        print(store_path)
        # if already processed, just load it from disk
        if os.path.exists(store_path):
            if loadData:  #before starting training, we just want to check if it exists, and generate otherwise. Not load the data
                logger_prepLip.info("loading stored files X's...")
                return unpickle(store_path)
            return
    logger_prepLip.info(" %s processed data doesn't exist yet; generating...", speakerFile)

    dtype = 'uint8'
    memAvaliableMB = 6000;
    memAvaliable = memAvaliableMB * 1024
    img_shape = (1, 120, 120)
    img_size = np.prod(img_shape)

    # load the images
    # first initialize the matrices
    X_train = [];   y_train = []
    X_val   = [];   y_val = []
    X_test  = [];   y_test = []

    #logger_prepLip.info('loading file %s', speakerFile)
    data = unpickle(sourceDataDir + os.sep + speakerFile)
    # convert phonemes to viseme labels if needed
    if viseme:
        from phoneme_set import phoneme_set_39, classToPhoneme39, phonemeToViseme, viseme_set, classToViseme
        for i in range(len(data['labels'])):
            phoneme = classToPhoneme39(data['labels'][i])
            data['labels'][i] = viseme_set[phonemeToViseme[phoneme]]


    thisN = data['data'].shape[0]
    thisTrain = int(trainFraction * thisN)
    thisValid = int(validFraction * thisN)
    thisTest = thisN - thisTrain - thisValid  # compensates for rounding
    if trainFraction + validFraction == 1.0:
        thisValid = thisN - thisTrain; thisTest = 0

    if verbose:
        logger_prepLip.info("This dataset contains %s images", thisN)
        logger_prepLip.info("now loading : nbTrain, nbValid, nbTest")
        logger_prepLip.info("\t\t\t %s %s %s", thisTrain, thisValid, thisTest)


    X_train = X_train + list(data['data'][0:thisTrain])
    X_val   = X_val   + list(data['data'][thisTrain:thisTrain + thisValid])
    X_test  = X_test  + list(data['data'][thisTrain + thisValid:thisN])

    y_train = y_train + list(data['labels'][0:thisTrain])
    y_val   = y_val   + list(data['labels'][thisTrain:thisTrain + thisValid])
    y_test  = y_test  + list(data['labels'][thisTrain + thisValid:thisN])

    if verbose:
        logger_prepLip.info("nbTrainLoaded: ", len(X_train))
        logger_prepLip.info("nbValidLoaded: ", len(X_val))
        logger_prepLip.info("nbTestLoaded: ",  len(X_test))
        logger_prepLip.info("Total loaded: ", len(X_train) + len(X_val) + len(X_test))

    # estimate as float32 = 4* memory as uint8
    memEstimate = 4 * (sys.getsizeof(X_train) + sys.getsizeof(X_val) + sys.getsizeof(X_test) + \
                       sys.getsizeof(y_train) + sys.getsizeof(y_val) + sys.getsizeof(y_test))
    if verbose: logger_prepLip.info("memory estimate: %s MB", memEstimate / 1000.0)
    # if memEstimate > 0.6 * memAvaliable:
    #     logger_prepLip.info("loaded too many for memory, stopping loading...")
    #     break

    # cast to numpy array, correct datatype
    dtypeX = 'float32'
    dtypeY = 'int32'  # needed for
    if isinstance(X_train, list):       X_train = np.asarray(X_train).astype(dtypeX);
    if isinstance(y_train, list):       y_train = np.asarray(y_train).astype(dtypeY);
    if isinstance(X_val, list):         X_val = np.asarray(X_val).astype(dtypeX);
    if isinstance(y_val, list):         y_val = np.asarray(y_val).astype(dtypeY);
    if isinstance(X_test, list):        X_test = np.asarray(X_test).astype(dtypeX);
    if isinstance(y_test, list):        y_test = np.asarray(y_test).astype(dtypeY);

    if verbose:
        logger_prepLip.info("TRAIN: %s %s", X_train.shape, X_train[0][0].dtype)
        logger_prepLip.info("%s %s",y_train.shape, y_train[0].dtype)
        logger_prepLip.info("VALID: %s", X_val.shape)
        logger_prepLip.info("%s", y_val.shape)
        logger_prepLip.info("TEST: %s", X_test.shape)
        logger_prepLip.info("%s",y_test.shape)

    memTot = X_train.nbytes + X_val.nbytes + X_test.nbytes + y_train.nbytes + y_val.nbytes + y_test.nbytes
    if verbose: logger_prepLip.info("Total memory size required as float32: %s MB", memTot / 1000000)

    # fix labels (labels start at 1, but the library expects them to start at 0)
    # y_train = y_train - 1
    # y_val = y_val - 1
    # y_test = y_test - 1

    # rescale to interval [-1,1], cast to float32 for GPU use
    X_train = np.multiply(2. / 255., X_train, dtype='float32')
    X_train = np.subtract(X_train, 1., dtype='float32');
    X_val = np.multiply(2. / 255., X_val, dtype='float32')
    X_val = np.subtract(X_val, 1., dtype='float32');
    X_test = np.multiply(2. / 255., X_test, dtype='float32')
    X_test = np.subtract(X_test, 1., dtype='float32');

    if verbose:
        logger_prepLip.info("Train: %s %s", X_train.shape, X_train[0][0].dtype)
        logger_prepLip.info("Valid: %s %s", X_val.shape, X_val[0][0].dtype)
        logger_prepLip.info("Test:  %s %s", X_test.shape, X_test[0][0].dtype)

    # reshape to get one image per row
    X_train = np.reshape(X_train, (-1, 1, 120, 120))
    X_val = np.reshape(X_val, (-1, 1, 120, 120))
    X_test = np.reshape(X_test, (-1, 1, 120, 120))

    # cast to correct datatype, just to be sure. Everything needs to be float32 for GPU processing
    dtypeX = 'float32'
    dtypeY = 'int32'
    X_train = X_train.astype(dtypeX);
    y_train = y_train.astype(dtypeY);
    X_val = X_val.astype(dtypeX);
    y_val = y_val.astype(dtypeY);
    X_test = X_test.astype(dtypeX);
    y_test = y_test.astype(dtypeY);

    if verbose:
        logger_prepLip.info("TRAIN: %s %s", X_train.shape, X_train[0][0].dtype)
        logger_prepLip.info("%s %s", y_train.shape, y_train[0].dtype)
        logger_prepLip.info("VALID: %s", X_val.shape)
        logger_prepLip.info("%s", y_val.shape)
        logger_prepLip.info("TEST: %s", X_test.shape)
        logger_prepLip.info("%s", y_test.shape)

    ### STORE DATA ###
    if storeProcessed and store_path != None: general_tools.saveToPkl(store_path,
                                                                      [X_train, y_train, X_val, y_val, X_test, y_test])

    return X_train, y_train, X_val, y_val, X_test, y_test
def prepLip_all(data_path=os.path.join(os.path.expanduser('~/TCDTIMIT/lipreading/database_binary/')),
                store_path=os.path.join(
                        os.path.expanduser('~/TCDTIMIT/lipreading/database_binaryprocessed/dataset.pkl')),
                type="all", nbLip=3, nbVol=54, trainFraction=0.8, validFraction=0.1, testFraction=0.1,
                nbClasses=39, onehot=False, verbose=False):
    # from https://www.cs.toronto.edu/~kriz/cifar.html
    # also see http://stackoverflow.com/questions/35032675/how-to-create-dataset-similar-to-cifar-10

    # Lipspeaker 1:                  14627 phonemes,    14617 extacted and useable
    # Lipspeaker 2:  28363 - 14627 = 13736 phonemes     13707 extracted
    # Lipspeaker 3:  42535 - 28363 = 14172 phonemes     14153 extracted
    # total Lipspeakers:  14500 + 13000 + 14000 = 42477

    dtype = 'uint8'
    memAvaliableMB = 6000;
    memAvaliable = memAvaliableMB * 1024
    img_shape = (1, 120, 120)
    img_size = np.prod(img_shape)

    # prepare data to load
    fnamesLipspkrs = ['Lipspkr%i.pkl' % i for i in range(1, nbLip + 1)]  # all 3 lipsteakers
    fnamesVolunteers = ['Volunteer%i.pkl' % i for i in range(1, nbVol + 1)]  # some volunteers
    if type == "lipspeakers":
        fnames = fnamesLipspkrs
    elif type == "volunteers":
        fnames = fnamesVolunteers
    elif type == "all":
        fnames = fnamesLipspkrs + fnamesVolunteers
    else:
        raise Exception("wrong type of dataset entered")

    datasets = {}
    for name in fnames:
        fname = os.path.join(data_path, name)
        if not os.path.exists(fname):
            raise IOError(fname + " was not found.")
        datasets[name] = cache.datasetCache.cache_file(fname)

    # load the images
    # first initialize the matrices
    X_train = [];
    y_train = []
    X_val = [];
    y_val = []
    X_test = [];
    y_test = []

    # now load train data
    trainLoaded = 0
    validLoaded = 0
    testLoaded = 0

    for i, fname in enumerate(fnames):

        if verbose:
            logger_prepLip.info("Total loaded till now: %s ", trainLoaded + validLoaded + testLoaded)
            logger_prepLip.info("nbTrainLoaded:  %s", trainLoaded)
            logger_prepLip.info("nbValidLoaded:  %s", validLoaded)
            logger_prepLip.info("nbTestLoaded:   %s", testLoaded)

        logger_prepLip.info('loading file %s', datasets[fname])
        data = unpickle(datasets[fname])
        thisN = data['data'].shape[0]
        thisTrain = int(trainFraction * thisN)
        thisValid = int(validFraction * thisN)
        thisTest = thisN - thisTrain - thisValid  # compensates for rounding\
        if verbose:
            logger_prepLip.info("This dataset contains %s images", thisN)
            logger_prepLip.info("now loading : nbTrain, nbValid, nbTest")
            logger_prepLip.info("\t\t\t %s %s %s ", thisTrain, thisValid, thisTest)

        X_train = X_train + list(data['data'][0:thisTrain])
        X_val = X_val + list(data['data'][thisTrain:thisTrain + thisValid])
        X_test = X_test + list(data['data'][thisTrain + thisValid:thisN])

        y_train = y_train + list(data['labels'][0:thisTrain])
        y_val = y_val + list(data['labels'][thisTrain:thisTrain + thisValid])
        y_test = y_test + list(data['labels'][thisTrain + thisValid:thisN])

        trainLoaded += thisTrain
        validLoaded += thisValid
        testLoaded += thisTest
        if verbose:
            logger_prepLip.info("nbTrainLoaded:  %s", trainLoaded)
            logger_prepLip.info("nbValidLoaded:  %s", validLoaded)
            logger_prepLip.info("nbTestLoaded:   %s", testLoaded)
            logger_prepLip.info("Total loaded till now: %s", trainLoaded + validLoaded + testLoaded)

        # estimate as float32 = 4* memory as uint8
        memEstimate = 4 * (sys.getsizeof(X_train) + sys.getsizeof(X_val) + sys.getsizeof(X_test) + \
                           sys.getsizeof(y_train) + sys.getsizeof(y_val) + sys.getsizeof(y_test))
        if verbose: logger_prepLip.info("memory estimate: %s MB", memEstimate / 1000.0)
        # if memEstimate > 0.6 * memAvaliable:
        #     logger_prepLip.info("loaded too many for memory, stopping loading...")
        #     break

    # cast to numpy array, correct datatype
    dtypeX = 'float32'
    dtypeY = 'int32'  # needed for
    if isinstance(X_train, list):       X_train = np.asarray(X_train).astype(dtypeX);
    if isinstance(y_train, list):       y_train = np.asarray(y_train).astype(dtypeY);
    if isinstance(X_val, list):       X_val = np.asarray(X_val).astype(dtypeX);
    if isinstance(y_val, list):       y_val = np.asarray(y_val).astype(dtypeY);
    if isinstance(X_test, list):        X_test = np.asarray(X_test).astype(dtypeX);
    if isinstance(y_test, list):        y_test = np.asarray(y_test).astype(dtypeY);

    if verbose:
        logger_prepLip.info("TRAIN: %s %s", X_train.shape, X_train[0][0].dtype)
        logger_prepLip.info("%s %s", y_train.shape, y_train[0].dtype)
        logger_prepLip.info("VALID: %s", X_val.shape)
        logger_prepLip.info("%s",y_val.shape)
        logger_prepLip.info("TEST: %s", X_test.shape)
        logger_prepLip.info("%s",y_test.shape)

    memTot = X_train.nbytes + X_val.nbytes + X_test.nbytes + y_train.nbytes + y_val.nbytes + y_test.nbytes
    logger_prepLip.info("Total memory size required as float32: %s MB", memTot / 1000000)

    # rescale to interval [-1,1], cast to float32 for GPU use
    X_train = np.multiply(2. / 255., X_train, dtype='float32')
    X_train = np.subtract(X_train, 1., dtype='float32');
    X_val = np.multiply(2. / 255., X_val, dtype='float32')
    X_val = np.subtract(X_val, 1., dtype='float32');
    X_test = np.multiply(2. / 255., X_test, dtype='float32')
    X_test = np.subtract(X_test, 1., dtype='float32');

    if verbose:
        logger_prepLip.info("Train: %s %s", X_train.shape, X_train[0][0].dtype)
        logger_prepLip.info("Valid: %s %s", X_val.shape, X_val[0][0].dtype)
        logger_prepLip.info("Test: %s %s", X_test.shape, X_test[0][0].dtype)

    # reshape to get one image per row
    X_train = np.reshape(X_train, (-1, 1, 120, 120))
    X_val = np.reshape(X_val, (-1, 1, 120, 120))
    X_test = np.reshape(X_test, (-1, 1, 120, 120))

    # also flatten targets to get one target per row
    # y_train = np.hstack(y_train)
    # y_val = np.hstack(y_val)
    # y_test = np.hstack(y_test)

    # Onehot the targets
    if onehot:
        y_train = np.float32(np.eye(nbClasses)[y_train])
        y_val = np.float32(np.eye(nbClasses)[y_val])
        y_test = np.float32(np.eye(nbClasses)[y_test])

    # for hinge loss
    if onehot:
        y_train = 2 * y_train - 1.
        y_val = 2 * y_val - 1.
        y_test = 2 * y_test - 1.

    # cast to correct datatype, just to be sure. Everything needs to be float32 for GPU processing
    dtypeX = 'float32'
    dtypeY = 'int32'
    X_train = X_train.astype(dtypeX);
    y_train = y_train.astype(dtypeY);
    X_val = X_val.astype(dtypeX);
    y_val = y_val.astype(dtypeY);
    X_test = X_test.astype(dtypeX);
    y_test = y_test.astype(dtypeY);
    if verbose:
        logger_prepLip.info("\n Final datatype: ")
        logger_prepLip.info("TRAIN: %s %s ", X_train.shape, X_train[0][0].dtype)
        logger_prepLip.info("%s %s", y_train.shape, y_train[0].dtype)
        logger_prepLip.info("VALID: %s", X_val.shape)
        logger_prepLip.info("%s", y_val.shape)
        logger_prepLip.info("TEST: %s", X_test.shape)
        logger_prepLip.info("%s", y_test.shape)

    ### STORE DATA ###
    dataList = [X_train, y_train, X_val, y_val, X_test, y_test]
    general_tools.saveToPkl(store_path, dataList)

    return X_train, y_train, X_val, y_val, X_test, y_test
Example #5
0
def train(train_fn,
          val_fn,
          model,
          batch_size,
          LR_start,
          LR_decay,
          num_epochs,
          X_train,
          y_train,
          X_val,
          y_val,
          X_test,
          y_test,
          save_name=None,
          shuffle_parts=1,
          justTest=False):
    # A function which shuffles a dataset
    def shuffle(X, y):

        # print(len(X))

        chunk_size = len(X) / shuffle_parts
        shuffled_range = range(chunk_size)

        X_buffer = np.copy(X[0:chunk_size])
        y_buffer = np.copy(y[0:chunk_size])

        for k in range(shuffle_parts):

            np.random.shuffle(shuffled_range)

            for i in range(chunk_size):
                X_buffer[i] = X[k * chunk_size + shuffled_range[i]]
                y_buffer[i] = y[k * chunk_size + shuffled_range[i]]

            X[k * chunk_size:(k + 1) * chunk_size] = X_buffer
            y[k * chunk_size:(k + 1) * chunk_size] = y_buffer

        return X, y

        # shuffled_range = range(len(X))
        # np.random.shuffle(shuffled_range)

        # new_X = np.copy(X)
        # new_y = np.copy(y)

        # for i in range(len(X)):

        # new_X[i] = X[shuffled_range[i]]
        # new_y[i] = y[shuffled_range[i]]

        # return new_X,new_y

    # This function trains the model a full epoch (on the whole dataset)
    def train_epoch(X, y, LR):

        loss = 0
        batches = len(X) / batch_size

        for i in tqdm(range(batches)):
            loss += train_fn(X[i * batch_size:(i + 1) * batch_size],
                             y[i * batch_size:(i + 1) * batch_size], LR)

        loss /= batches

        return loss

    # This function tests the model a full epoch (on the whole dataset)
    def val_epoch(X, y):

        err = 0
        loss = 0
        batches = len(X) / batch_size

        for i in tqdm(range(batches)):
            new_loss, new_err = val_fn(X[i * batch_size:(i + 1) * batch_size],
                                       y[i * batch_size:(i + 1) * batch_size])
            err += new_err
            loss += new_loss

        err = err / batches * 100
        loss /= batches

        return err, loss

    if save_name == None:
        save_name = "./bestModel_binary"
    save_path = save_name + ".npz"

    # shuffle the train set
    X_train, y_train = shuffle(X_train, y_train)
    best_val_err = 100
    best_epoch = 1
    LR = LR_start
    network_train_info = {
        'train_cost': [],
        'val_cost': [],
        'val_acc': [],
        'test_cost': [],
        'test_acc': [],
    }

    test_err, test_loss = val_epoch(X_test, y_test)

    print("Initial results: ")
    print("  test loss:                     " + str(test_loss))
    print("  test error rate:               " + str(test_err) + "%")

    if justTest: return 0

    # load old train info
    import general_tools
    if os.path.exists(save_name + ".npz") and os.path.exists(save_name +
                                                             "_trainInfo.pkl"):
        old_train_info = general_tools.unpickle(save_name + '_trainInfo.pkl')

        best_val_err = 1 - max(old_train_info['val_acc'])
        test_cost = min(old_train_info['test_cost'])
        test_err = 1 - max(old_train_info['test_acc'])
        network_train_info = old_train_info

    # We iterate over epochs:
    for epoch in tqdm(range(num_epochs)):

        start_time = time.time()

        train_loss = train_epoch(X_train, y_train, LR)
        X_train, y_train = shuffle(X_train, y_train)

        val_err, val_loss = val_epoch(X_val, y_val)

        print("  previous best validation error rate:    " +
              str(best_val_err) + "%")
        print("  LR:                            " + str(LR))
        print("  training loss:                 " + str(train_loss))
        print("  validation loss:               " + str(val_loss))
        print("  validation error rate:         " + str(val_err) + "%")
        print(" \n best epoch:                    " + str(best_epoch))

        # test if validation error went down
        if val_err <= best_val_err:
            print("Best ever validation score; evaluating test...")
            best_val_err = val_err
            best_epoch = epoch + 1

            test_err, test_loss = val_epoch(X_test, y_test)
            print("  test loss:                     " + str(test_loss))
            print("  test error rate:               " + str(test_err) + "%")

            np.savez(save_name, lasagne.layers.get_all_param_values(model))
        # else:
        #     print(save_path)
        #     if os.path.exists(save_path):
        #         with np.load(save_path) as f:
        #             param_values = [f['arr_%d' % i] for i in range(len(f.files))]
        #             try: lasagne.layers.set_all_param_values(model, *param_values)
        #             except:
        #                 lasagne.layers.set_all_param_values(model, param_values)
        #             print("Not improved, load best model " + save_path)

        epoch_duration = time.time() - start_time

        # save the training info
        network_train_info['train_cost'].append(train_loss)
        network_train_info['val_cost'].append(val_loss)
        network_train_info['val_acc'].append(1 - val_err)
        network_train_info['test_cost'].append(test_loss)
        network_train_info['test_acc'].append(1 - test_err)

        import general_tools
        general_tools.saveToPkl(save_name + '_trainInfo.pkl',
                                network_train_info)

        # Then we print the results for this epoch:
        print("Epoch " + str(epoch + 1) + " of " + str(num_epochs) + " took " +
              str(epoch_duration) + "s" + "\n\n")

        # decay the LR
        LR *= LR_decay
Example #6
0
def train(train_fn,
          val_fn,
          out_fn,
          topk_acc_fn,
          k,
          network_output_layer,
          batch_size,
          LR_start,
          LR_decay,
          num_epochs,
          dataset,
          database_binaryDir,
          storeProcessed,
          processedDir,
          loadPerSpeaker=False,
          save_name=None,
          shuffleEnabled=True):

    if loadPerSpeaker:
        trainingSpeakerFiles, testSpeakerFiles = dataset
        logger_train.info("train files: \n%s", [
            os.path.basename(speakerFile)
            for speakerFile in trainingSpeakerFiles
        ])
        logger_train.info("test files:  \n %s", [
            os.path.basename(speakerFile) for speakerFile in testSpeakerFiles
        ])
    else:
        X_train, y_train, X_val, y_val, X_test, y_test = dataset
        logger_train.info("the number of training examples is: %s",
                          len(X_train))
        logger_train.info("the number of valid examples is:    %s", len(X_val))
        logger_train.info("the number of test examples is:     %s",
                          len(X_test))

    #import pdb; pdb.set_trace()

    # A function which shuffles a dataset
    def shuffle(X, y):
        shuffle_parts = 1
        chunk_size = len(X) / shuffle_parts
        shuffled_range = range(chunk_size)

        X_buffer = np.copy(X[0:chunk_size])
        y_buffer = np.copy(y[0:chunk_size])

        for k in range(shuffle_parts):

            np.random.shuffle(shuffled_range)

            for i in range(chunk_size):
                X_buffer[i] = X[k * chunk_size + shuffled_range[i]]
                y_buffer[i] = y[k * chunk_size + shuffled_range[i]]

            X[k * chunk_size:(k + 1) * chunk_size] = X_buffer
            y[k * chunk_size:(k + 1) * chunk_size] = y_buffer

        return X, y

    # This function trains the model a full epoch (on the whole dataset)
    def train_epoch(X, y, LR):
        cost = 0
        nb_batches = len(X) / batch_size

        i = 0
        for i in tqdm(range(nb_batches), total=nb_batches):
            batch_X = X[i * batch_size:(i + 1) * batch_size]
            batch_y = y[i * batch_size:(i + 1) * batch_size]
            # print("batch_X.shape: ", batch_X.shape)
            # print("batch_y.shape: ", batch_y.shape)
            cost += train_fn(batch_X, batch_y, LR)

            # if i==0:
            #     out = out_fn(batch_X)
            #     import pdb;pdb.set_trace()

        return cost, nb_batches

    # This function tests the model a full epoch (on the whole dataset)
    def val_epoch(X, y):
        err = 0
        cost = 0
        topk_acc = 0
        nb_batches = len(X) / batch_size

        for i in tqdm(range(nb_batches)):
            batch_X = X[i * batch_size:(i + 1) * batch_size]
            batch_y = y[i * batch_size:(i + 1) * batch_size]
            new_cost, new_acc, new_topk_acc = val_fn(batch_X, batch_y)
            err += new_acc
            cost += new_cost
            topk_acc += new_topk_acc

        return cost, err, topk_acc, nb_batches

    # evaluate many TRAINING speaker files -> train loss, val loss and vall acc. Load them in one by one (so they fit in memory)
    def evalTRAINING(trainingSpeakerFiles,
                     LR,
                     shuffleEnabled,
                     verbose=False,
                     sourceDataDir=None,
                     storeProcessed=False,
                     processedDir=None):
        train_cost = 0
        val_acc = 0
        val_cost = 0
        val_topk_acc = 0
        nb_train_batches = 0
        nb_val_batches = 0
        # for each speaker, pass over the train set, then val set. (test is other files). save the results.
        for speakerFile in tqdm(trainingSpeakerFiles,
                                total=len(trainingSpeakerFiles)):
            # TODO: pallelize this with the GPU evaluation to eliminate waiting
            logger_train.debug("processing %s", speakerFile)
            X_train, y_train, X_val, y_val, X_test, y_test = preprocessLipreading.prepLip_one(
                speakerFile=speakerFile,
                trainFraction=0.8,
                validFraction=0.2,
                sourceDataDir=sourceDataDir,
                storeProcessed=storeProcessed,
                processedDir=processedDir)
            if verbose:
                logger_train.debug("the number of training examples is: %s",
                                   len(X_train))
                logger_train.debug("the number of valid examples is:    %s",
                                   len(X_val))
                logger_train.debug("the number of test examples is:     %s",
                                   len(X_test))

            if shuffleEnabled: X_train, y_train = shuffle(X_train, y_train)
            train_cost_one, train_batches_one = train_epoch(X=X_train,
                                                            y=y_train,
                                                            LR=LR)
            train_cost += train_cost_one
            nb_train_batches += train_batches_one

            # get results for validation  set
            val_cost_one, val_acc_one, val_topk_acc_one, val_batches_one = val_epoch(
                X=X_val, y=y_val)
            val_cost += val_cost_one
            val_acc += val_acc_one
            val_topk_acc += val_topk_acc_one
            nb_val_batches += val_batches_one

            if verbose:
                logger_train.debug("  this speaker results: ")
                logger_train.debug("\ttraining cost:     %s",
                                   train_cost_one / train_batches_one)
                logger_train.debug("\tvalidation cost:   %s",
                                   val_cost_one / val_batches_one)
                logger_train.debug("\vvalidation acc rate:  %s %%",
                                   val_acc_one / val_batches_one * 100)
                logger_train.debug("\vvalidation top %s acc rate:  %s %%", k,
                                   val_topk_acc_one / val_batches_one * 100)

        # get the average over all speakers
        train_cost /= nb_train_batches
        val_cost /= nb_val_batches
        val_acc = val_acc / nb_val_batches * 100  # convert to %
        val_topk_acc = val_topk_acc / nb_val_batches * 100  # convert to %

        return train_cost, val_cost, val_acc, val_topk_acc

    # evaluate many TEST speaker files. Load them in one by one (so they fit in memory)
    def evalTEST(testSpeakerFiles,
                 verbose=False,
                 sourceDataDir=None,
                 storeProcessed=False,
                 processedDir=None):
        test_acc = 0
        test_cost = 0
        test_topk_acc = 0
        nb_test_batches = 0
        # for each speaker, pass over the train set, then test set. (test is other files). save the results.
        for speakerFile in tqdm(testSpeakerFiles, total=len(testSpeakerFiles)):
            logger_train.debug("processing %s", speakerFile)
            X_train, y_train, X_val, y_val, X_test, y_test = preprocessLipreading.prepLip_one(
                speakerFile=speakerFile,
                trainFraction=0.0,
                validFraction=0.0,
                sourceDataDir=sourceDataDir,
                storeProcessed=storeProcessed,
                processedDir=processedDir)

            if verbose:
                logger_train.debug("the number of training examples is: %s",
                                   len(X_train))
                logger_train.debug("the number of valid examples is:    %s",
                                   len(X_val))
                logger_train.debug("the number of test examples is:     %s",
                                   len(X_test))

            # get results for testidation  set
            test_cost_one, test_acc_one, test_topk_acc_one, test_batches_one = val_epoch(
                X=X_test, y=y_test)
            test_acc += test_acc_one
            test_cost += test_cost_one
            test_topk_acc += test_topk_acc_one
            nb_test_batches += test_batches_one

            if verbose:
                logger_train.debug("  this speaker results: ")
                logger_train.debug("\ttest cost:   %s",
                                   test_cost_one / test_batches_one)
                logger_train.debug("\vtest acc rate:  %s %%",
                                   test_acc_one / test_batches_one * 100)
                logger_train.debug("\vtest  top %s acc rate:  %s %%", k,
                                   test_topk_acc_one / test_batches_one * 100)

        # get the average over all speakers
        test_acc = test_acc / nb_test_batches * 100
        test_cost /= nb_test_batches
        test_topk_acc = test_topk_acc / nb_test_batches * 100
        return test_cost, test_acc, test_topk_acc

    def updateLR(LR, LR_decay, network_train_info, epochsNotImproved):
        this_cost = network_train_info['val_cost'][-1]  #validation cost
        try:
            last_cost = network_train_info['val_cost'][-2]
        except:
            last_cost = 10 * this_cost  # first time it will fail because there is only 1 result stored

        # only reduce LR if not much improvment anymore
        if this_cost / float(last_cost) >= 0.99:
            logger_train.info(
                " Error not much reduced: %s vs %s. Reducing LR: %s",
                this_cost, last_cost, LR * LR_decay)
            epochsNotImproved += 1
            return LR * LR_decay, epochsNotImproved
        else:
            epochsNotImproved = max(epochsNotImproved - 1,
                                    0)  # reduce by 1, minimum 0
            return LR, epochsNotImproved

    best_val_acc = 0
    test_topk_acc = 0
    test_cost = 0
    test_acc = 0
    #try to load performance metrics of stored model
    if os.path.exists(save_name + ".npz") and os.path.exists(save_name +
                                                             "_trainInfo.pkl"):
        old_train_info = preprocessLipreading.unpickle(save_name +
                                                       '_trainInfo.pkl')
        # backward compatibility
        if type(old_train_info) == list:
            old_train_info = old_train_info[0]
            best_val_acc = min(old_train_info[2])
            test_cost = min(old_train_info[3])
            test_acc = min(old_train_info[3])
        elif type(old_train_info) == dict:  # normal case
            best_val_acc = min(old_train_info['val_acc'])
            test_cost = min(old_train_info['test_cost'])
            test_acc = min(old_train_info['test_acc'])
            try:
                test_topk_acc = min(old_train_info['test_topk_acc'])
            except:
                test_topk_acc = 0
        else:
            best_val_acc = 0
            test_topk_acc = 0
            test_cost = 0
            test_acc = 0

    logger_train.info("previous training session results: ")
    logger_train.info("\t  test cost:        %s", test_cost)
    logger_train.info("\t  test acc rate:  %s %%", test_acc)
    logger_train.info("\t  val acc:  %s %%", best_val_acc)

    best_epoch = 1
    LR = LR_start
    # for storage of training info
    network_train_info = {
        'train_cost': [],
        'val_cost': [],
        'val_acc': [],
        'val_topk_acc': [],
        'test_cost': [],
        'test_acc': [],
        'test_topk_acc': []
    }  #used to be list of lists
    epochsNotImproved = 0

    logger_train.info("starting training for %s epochs...", num_epochs)
    # now run through the epochs

    # TODO: remove this
    if not loadPerSpeaker:  # all at once
        test_cost, test_acc, test_topk_acc, nb_test_batches = val_epoch(
            X_test, y_test)
        test_acc = test_acc / nb_test_batches * 100
        test_cost /= nb_test_batches
        test_topk_acc = test_topk_acc / nb_test_batches * 100

    else:  # process each speaker seperately
        test_cost, test_acc, test_topk_acc = evalTEST(
            testSpeakerFiles,
            sourceDataDir=database_binaryDir,
            storeProcessed=storeProcessed,
            processedDir=processedDir)
    logger_train.info("TEST results: ")
    logger_train.info("\t  test cost:        %s", test_cost)
    logger_train.info("\t  test acc rate:  %s %%", test_acc)
    logger_train.info("\t  test top %s acc:  %s %%", k, test_topk_acc)
    # # TODO: end remove

    for epoch in range(num_epochs):
        logger_train.info("\n\n\n Epoch %s started", epoch + 1)
        start_time = time.time()

        if not loadPerSpeaker:
            total_train_cost, nb_train_batches = train_epoch(X=X_train,
                                                             y=y_train,
                                                             LR=LR)
            train_cost = total_train_cost / nb_train_batches
            X_train, y_train = shuffle(X_train, y_train)

            val_cost, val_acc, val_topk_acc, nb_val_batches = val_epoch(
                X=X_val, y=y_val)
            val_acc = val_acc / nb_val_batches * 100
            val_cost /= nb_val_batches
            val_topk_acc = val_topk_acc / nb_val_batches * 100

        else:
            train_cost, val_cost, val_acc, val_topk_acc = evalTRAINING(
                trainingSpeakerFiles,
                LR,
                shuffleEnabled,
                sourceDataDir=database_binaryDir,
                storeProcessed=storeProcessed,
                processedDir=processedDir)

        # test if validation acc went down
        printTest = False
        if val_acc > best_val_acc:
            printTest = True
            best_val_acc = val_acc
            best_epoch = epoch + 1

            logger_train.info(
                "\n\nBest ever validation score; evaluating TEST set...")

            if not loadPerSpeaker:  # all at once
                test_cost, test_acc, test_topk_acc, nb_test_batches = val_epoch(
                    X_test, y_test)
                test_acc = test_acc / nb_test_batches * 100
                test_cost /= nb_test_batches
                test_topk_acc = test_topk_acc / nb_test_batches * 100

            else:  # process each speaker seperately
                test_cost, test_acc, test_topk_acc = evalTEST(
                    testSpeakerFiles,
                    sourceDataDir=database_binaryDir,
                    storeProcessed=storeProcessed,
                    processedDir=processedDir)
            logger_train.info("TEST results: ")
            logger_train.info("\t  test cost:        %s", test_cost)
            logger_train.info("\t  test acc rate:  %s %%", test_acc)
            logger_train.info("\t  test top %s acc:  %s %%", k, test_topk_acc)

            if save_name is None:
                save_name = "./bestModel"
            if not os.path.exists(os.path.dirname(save_name)):
                os.makedirs(os.path.dirname(save_name))
            logger_train.info("saving model to %s", save_name)
            np.savez(
                save_name,
                *lasagne.layers.get_all_param_values(network_output_layer))

        epoch_duration = time.time() - start_time

        # Then we logger_train.info the results for this epoch:
        logger_train.info("Epoch %s of %s took %s seconds", epoch + 1,
                          num_epochs, epoch_duration)
        logger_train.info("  LR:                            %s", LR)
        logger_train.info("  training cost:                 %s", train_cost)
        logger_train.info("  validation cost:               %s", val_cost)
        logger_train.info("  validation acc rate:         %s %%", val_acc)
        logger_train.info("  validation top %s acc rate:         %s %%", k,
                          val_topk_acc)
        logger_train.info("  best epoch:                    %s", best_epoch)
        logger_train.info("  best validation acc rate:    %s %%", best_val_acc)
        if printTest:
            logger_train.info("  test cost:                 %s", test_cost)
            logger_train.info("  test acc rate:           %s %%", test_acc)
            logger_train.info("  test top %s acc rate:    %s %%", k,
                              test_topk_acc)

        # save the training info
        network_train_info['train_cost'].append(train_cost)
        network_train_info['val_cost'].append(val_cost)
        network_train_info['val_acc'].append(val_acc)
        network_train_info['val_topk_acc'].append(val_topk_acc)
        network_train_info['test_cost'].append(test_cost)
        network_train_info['test_acc'].append(test_acc)
        network_train_info['test_topk_acc'].append(test_topk_acc)

        store_path = save_name + '_trainInfo.pkl'
        general_tools.saveToPkl(store_path, network_train_info)
        logger_train.info("Train info written to:\t %s", store_path)

        # decay the LR
        #LR *= LR_decay
        LR, epochsNotImproved = updateLR(LR, LR_decay, network_train_info,
                                         epochsNotImproved)

        if epochsNotImproved > 8:
            logger_train.warning("\n\n NO MORE IMPROVEMENTS -> stop training")
            test_cost, test_acc, test_topk_acc = evalTEST(
                testSpeakerFiles,
                sourceDataDir=database_binaryDir,
                storeProcessed=storeProcessed,
                processedDir=processedDir)

            logger_train.info("FINAL TEST results: ")
            logger_train.info("\t  test cost:        %s", test_cost)
            logger_train.info("\t  test acc rate:  %s %%", test_acc)
            logger_train.info("\t  test top %s acc:  %s %%", k, test_topk_acc)
            break

    logger_train.info("Done.")