def evaluateModel(self, BIDIRECTIONAL, N_HIDDEN_LIST, batch_size, dataName, wavDir, data_store_dir, meanStd_path, model_load, nbMFCCs, store_dir, force_overwrite=False): logger_evaluate.info("\n\n\n") ####### THE DATA you want to evaluate ########## data_store_path = data_store_dir + dataName.replace( '/', '_') + "_nbMFCC" + str(nbMFCCs) if not os.path.exists(data_store_dir): os.makedirs(data_store_dir) predictions_path = store_dir + os.sep + dataName.replace( '/', '_') + "_predictions.pkl" # log file logFile = store_dir + os.sep + "Evaluation" + dataName.replace( '/', '_') + '.log' if os.path.exists(logFile) and not force_overwrite: from general_tools import query_yes_no if query_yes_no( "Log file already exists at %s\n Do you want to evaluate again and overwrite?", "y"): pass else: logger_evaluate.info( "Log file already exists, not re-evaluating.... ") return 0 fh = logging.FileHandler(logFile, 'w') # create new logFile fh.setLevel(logging.INFO) fh.setFormatter(formatter) logger_evaluate.addHandler(fh) logger_evaluate.info("\n MODEL: %s", model_load) logger_evaluate.info("\n WAV_DIR: %s", wavDir) logger_evaluate.info("\n PREDICTS: %s", predictions_path) logger_evaluate.info("\n LOG: %s", logFile) logger_evaluate.info("\n") # GATHERING DATA logger_evaluate.info("* Gathering Data ...") if os.path.exists(data_store_path + ".pkl"): [inputs, targets, valid_frames] = unpickle(data_store_path + ".pkl") calculateAccuracy = True logger_evaluate.info( "Successfully loaded preprocessed data, with targets") elif os.path.exists( data_store_path + "_noTargets.pkl" ): # TODO: make it work for unlabeled datasets. see RNN_tools_lstm.py, eg iterate_minibatch_noTargets. [inputs] = unpickle(data_store_path + "_noTargets.pkl") calculateAccuracy = False # we can't as we don't know the correct labels logger_evaluate.info( "Successfully loaded preprocessed data, no targets") else: logger_evaluate.info("Data not found, preprocessing...") # From WAVS, generate X, y and valid_frames; also store under data_store_dir def preprocessLabeledWavs(wavDir, store_dir, name): # fixWavs -> suppose this is done # convert to pkl X, y, valid_frames = preprocessWavs.preprocess_dataset( source_path=wavDir, nbMFCCs=nbMFCCs, logger=logger_evaluate) X_data_type = 'float32' X = preprocessWavs.set_type(X, X_data_type) y_data_type = 'int32' y = preprocessWavs.set_type(y, y_data_type) valid_frames_data_type = 'int32' valid_frames = preprocessWavs.set_type(valid_frames, valid_frames_data_type) return X, y, valid_frames def preprocessUnlabeledWavs(wavDir, store_dir, name): #TODO # fixWavs -> suppose this is done # convert to pkl X = preprocessWavs.preprocess_unlabeled_dataset( source_path=wavDir, nbMFCCs=nbMFCCs, logger=logger_evaluate) X_data_type = 'float32' X = preprocessWavs.set_type(X, X_data_type) return X # load wavs and labels wav_files = transform.loadWavs(wavDir) wav_filenames = [ str( os.path.basename( os.path.dirname( os.path.dirname(os.path.dirname(wav_file)))) + os.sep + os.path.basename( os.path.dirname(os.path.dirname(wav_file))) + os.sep + os.path.basename(os.path.dirname(wav_file)) + os.sep + os.path.basename(wav_file)) for wav_file in wav_files ] logger_evaluate.info("Found %s files to evaluate \n Example: %s", len(wav_filenames), wav_filenames[0]) label_files = transform.loadPhns(wavDir) # if source dir doesn't contain labels, we can't calculate accuracy calculateAccuracy = True if not (len(wav_files) == len(label_files)): calculateAccuracy = False inputs = preprocessUnlabeledWavs(wavDir=wavDir, store_dir=store_dir, name=dataName) else: inputs, targets, valid_frames = preprocessLabeledWavs( wavDir=wavDir, store_dir=store_dir, name=dataName) # normalize inputs using dataset Mean and Std_dev; convert to float32 for GPU evaluation with open(meanStd_path, 'rb') as cPickle_file: [mean_val, std_val] = cPickle.load(cPickle_file) inputs = preprocessWavs.normalize(inputs, mean_val, std_val) # just to be sure X_data_type = 'float32' inputs = preprocessWavs.set_type(inputs, X_data_type) # Print some information logger_evaluate.debug("* Data information") logger_evaluate.debug(' inputs') logger_evaluate.debug('%s %s', type(inputs), len(inputs)) logger_evaluate.debug('%s %s', type(inputs[0]), inputs[0].shape) logger_evaluate.debug('%s %s', type(inputs[0][0]), inputs[0][0].shape) logger_evaluate.debug('%s', type(inputs[0][0][0])) logger_evaluate.debug('y train') logger_evaluate.debug(' %s %s', type(targets), len(targets)) logger_evaluate.debug(' %s %s', type(targets[0]), targets[0].shape) logger_evaluate.debug(' %s %s', type(targets[0][0]), targets[0][0].shape) # slice to have a number of inputs that is a multiple of batch size logger_evaluate.info( "Not evaluating %s last files (batch size mismatch)", len(inputs) % batch_size) inputs = inputs[:-(len(inputs) % batch_size) or None] if calculateAccuracy: targets = targets[:-(len(targets) % batch_size) or None] valid_frames = valid_frames[:-(len(valid_frames) % batch_size) or None] # pad the inputs to process batches easily inputs = pad_sequences_X(inputs) if calculateAccuracy: targets = pad_sequences_y(targets) # save the preprocessed data logger_evaluate.info("storing preprocessed data to: %s", data_store_path) if calculateAccuracy: general_tools.saveToPkl(data_store_path + '.pkl', [inputs, targets, valid_frames]) else: general_tools.saveToPkl(data_store_path + '_noTargets.pkl', [inputs]) # Gather filenames; for debugging wav_files = transform.loadWavs(wavDir) wav_filenames = [ str( os.path.basename( os.path.dirname(os.path.dirname(os.path.dirname( wav_file)))) + os.sep + os.path.basename(os.path.dirname(os.path.dirname(wav_file))) + os.sep + os.path.basename(os.path.dirname(wav_file)) + os.sep + os.path.basename(wav_file)) for wav_file in wav_files ] logger_evaluate.debug(" # inputs: %s, # wav files: %s", len(inputs), len(wav_files)) # make copy of data because we might need to use is again for calculating accurasy, and the iterator will remove elements from the array inputs_bak = copy.deepcopy(inputs) if calculateAccuracy: targets_bak = copy.deepcopy(targets) valid_frames_bak = copy.deepcopy(valid_frames) logger_evaluate.info("* Evaluating: pass over Evaluation Set") if calculateAccuracy: # if .phn files are provided, we can check our predictions logger_evaluate.info( "Getting predictions and calculating accuracy...") avg_error, avg_acc, predictions = self.RNN_network.run_epoch(X=inputs, y=targets, valid_frames=valid_frames, \ get_predictions=True, batch_size=batch_size) logger_evaluate.info("All batches, avg Accuracy: %s", avg_acc) inputs = inputs_bak targets = targets_bak valid_frames = valid_frames_bak #uncomment if you want to save everything in one place (takes quite a lot of storage space) #general_tools.saveToPkl(predictions_path, [inputs, predictions, targets, valid_frames, avg_Acc]) else: # TODO fix this for inputs, masks, seq_lengths in tqdm( iterate_minibatches_noTargets(inputs, batch_size=batch_size, shuffle=False), total=len(inputs)): # get predictions nb_inputs = len( inputs) # usually batch size, but could be lower seq_len = len(inputs[0]) prediction = self.RNN_network.predictions_fn(inputs, masks) prediction = np.reshape(prediction, (nb_inputs, -1)) prediction = list(prediction) predictions = predictions + prediction inputs = inputs_bak #general_tools.saveToPkl(predictions_path, [inputs, predictions]) # Print information about the predictions logger_evaluate.info("* Done") end_evaluation_time = time.time() eval_duration = end_evaluation_time - program_start_time logger_evaluate.info('Total time: {:.3f}'.format(eval_duration)) # Print the results try: printEvaluation(wav_filenames, inputs, predictions, targets, valid_frames, avg_acc, range(len(inputs)), logger=logger_evaluate, only_final_accuracy=True) except: pdb.set_trace() logger_evaluate.info( 'Evaluation duration: {:.3f}'.format(eval_duration)) logger_evaluate.info( 'Printing duration: {:.3f}'.format(time.time() - end_evaluation_time)) # close the log handler fh.close() logger_evaluate.removeHandler(fh)
# print some more to check that cast succeeded logger.debug('X train') logger.debug(' %s %s', type(X_train), len(X_train)) logger.debug(' %s %s', type(X_train[0]), X_train[0].shape) logger.debug(' %s %s', type(X_train[0][0]), X_train[0][0].shape) logger.debug(' %s %s', type(X_train[0][0][0]), X_train[0][0].shape) logger.debug('y train') logger.debug(' %s %s', type(y_train), len(y_train)) logger.debug(' %s %s', type(y_train[0]), y_train[0].shape) logger.debug(' %s %s', type(y_train[0][0]), y_train[0][0].shape) ### STORE DATA ### logger.info('Saving data to %s', target_path) dataList = [ X_train, y_train, valid_frames_train, X_val, y_val, valid_frames_val, X_test, y_test, valid_frames_test ] general_tools.saveToPkl(target_path, dataList) # these can be used to evaluate new data, so you don't have to load the whole dataset just to normalize meanStd_path = os.path.dirname(outputDir) + os.sep + os.path.basename( dataRootDir) + "MeanStd.pkl" logger.info('Saving Mean and Std_val to %s', meanStd_path) dataList = [mean_val, std_val] general_tools.saveToPkl(meanStd_path, dataList) logger.info('Preprocessing complete!') logger.info('Total time: {:.3f}'.format(timeit.default_timer() - program_start_time))
def prepLip_one(speakerFile=None, trainFraction=0.70, validFraction=0.10, sourceDataDir=None, storeProcessed=False, processedDir=None, verbose=False, loadData=True, viseme=False): # from https://www.cs.toronto.edu/~kriz/cifar.html # also see http://stackoverflow.com/questions/35032675/how-to-create-dataset-similar-to-cifar-10 if processedDir != None: store_path = ''.join([processedDir, ("_viseme" if viseme else "_phoneme"), "_train", str(trainFraction).replace("0.", ""), "valid", str(validFraction).replace("0.", ""), os.sep, os.path.basename(speakerFile)]) print(store_path) # if already processed, just load it from disk if os.path.exists(store_path): if loadData: #before starting training, we just want to check if it exists, and generate otherwise. Not load the data logger_prepLip.info("loading stored files X's...") return unpickle(store_path) return logger_prepLip.info(" %s processed data doesn't exist yet; generating...", speakerFile) dtype = 'uint8' memAvaliableMB = 6000; memAvaliable = memAvaliableMB * 1024 img_shape = (1, 120, 120) img_size = np.prod(img_shape) # load the images # first initialize the matrices X_train = []; y_train = [] X_val = []; y_val = [] X_test = []; y_test = [] #logger_prepLip.info('loading file %s', speakerFile) data = unpickle(sourceDataDir + os.sep + speakerFile) # convert phonemes to viseme labels if needed if viseme: from phoneme_set import phoneme_set_39, classToPhoneme39, phonemeToViseme, viseme_set, classToViseme for i in range(len(data['labels'])): phoneme = classToPhoneme39(data['labels'][i]) data['labels'][i] = viseme_set[phonemeToViseme[phoneme]] thisN = data['data'].shape[0] thisTrain = int(trainFraction * thisN) thisValid = int(validFraction * thisN) thisTest = thisN - thisTrain - thisValid # compensates for rounding if trainFraction + validFraction == 1.0: thisValid = thisN - thisTrain; thisTest = 0 if verbose: logger_prepLip.info("This dataset contains %s images", thisN) logger_prepLip.info("now loading : nbTrain, nbValid, nbTest") logger_prepLip.info("\t\t\t %s %s %s", thisTrain, thisValid, thisTest) X_train = X_train + list(data['data'][0:thisTrain]) X_val = X_val + list(data['data'][thisTrain:thisTrain + thisValid]) X_test = X_test + list(data['data'][thisTrain + thisValid:thisN]) y_train = y_train + list(data['labels'][0:thisTrain]) y_val = y_val + list(data['labels'][thisTrain:thisTrain + thisValid]) y_test = y_test + list(data['labels'][thisTrain + thisValid:thisN]) if verbose: logger_prepLip.info("nbTrainLoaded: ", len(X_train)) logger_prepLip.info("nbValidLoaded: ", len(X_val)) logger_prepLip.info("nbTestLoaded: ", len(X_test)) logger_prepLip.info("Total loaded: ", len(X_train) + len(X_val) + len(X_test)) # estimate as float32 = 4* memory as uint8 memEstimate = 4 * (sys.getsizeof(X_train) + sys.getsizeof(X_val) + sys.getsizeof(X_test) + \ sys.getsizeof(y_train) + sys.getsizeof(y_val) + sys.getsizeof(y_test)) if verbose: logger_prepLip.info("memory estimate: %s MB", memEstimate / 1000.0) # if memEstimate > 0.6 * memAvaliable: # logger_prepLip.info("loaded too many for memory, stopping loading...") # break # cast to numpy array, correct datatype dtypeX = 'float32' dtypeY = 'int32' # needed for if isinstance(X_train, list): X_train = np.asarray(X_train).astype(dtypeX); if isinstance(y_train, list): y_train = np.asarray(y_train).astype(dtypeY); if isinstance(X_val, list): X_val = np.asarray(X_val).astype(dtypeX); if isinstance(y_val, list): y_val = np.asarray(y_val).astype(dtypeY); if isinstance(X_test, list): X_test = np.asarray(X_test).astype(dtypeX); if isinstance(y_test, list): y_test = np.asarray(y_test).astype(dtypeY); if verbose: logger_prepLip.info("TRAIN: %s %s", X_train.shape, X_train[0][0].dtype) logger_prepLip.info("%s %s",y_train.shape, y_train[0].dtype) logger_prepLip.info("VALID: %s", X_val.shape) logger_prepLip.info("%s", y_val.shape) logger_prepLip.info("TEST: %s", X_test.shape) logger_prepLip.info("%s",y_test.shape) memTot = X_train.nbytes + X_val.nbytes + X_test.nbytes + y_train.nbytes + y_val.nbytes + y_test.nbytes if verbose: logger_prepLip.info("Total memory size required as float32: %s MB", memTot / 1000000) # fix labels (labels start at 1, but the library expects them to start at 0) # y_train = y_train - 1 # y_val = y_val - 1 # y_test = y_test - 1 # rescale to interval [-1,1], cast to float32 for GPU use X_train = np.multiply(2. / 255., X_train, dtype='float32') X_train = np.subtract(X_train, 1., dtype='float32'); X_val = np.multiply(2. / 255., X_val, dtype='float32') X_val = np.subtract(X_val, 1., dtype='float32'); X_test = np.multiply(2. / 255., X_test, dtype='float32') X_test = np.subtract(X_test, 1., dtype='float32'); if verbose: logger_prepLip.info("Train: %s %s", X_train.shape, X_train[0][0].dtype) logger_prepLip.info("Valid: %s %s", X_val.shape, X_val[0][0].dtype) logger_prepLip.info("Test: %s %s", X_test.shape, X_test[0][0].dtype) # reshape to get one image per row X_train = np.reshape(X_train, (-1, 1, 120, 120)) X_val = np.reshape(X_val, (-1, 1, 120, 120)) X_test = np.reshape(X_test, (-1, 1, 120, 120)) # cast to correct datatype, just to be sure. Everything needs to be float32 for GPU processing dtypeX = 'float32' dtypeY = 'int32' X_train = X_train.astype(dtypeX); y_train = y_train.astype(dtypeY); X_val = X_val.astype(dtypeX); y_val = y_val.astype(dtypeY); X_test = X_test.astype(dtypeX); y_test = y_test.astype(dtypeY); if verbose: logger_prepLip.info("TRAIN: %s %s", X_train.shape, X_train[0][0].dtype) logger_prepLip.info("%s %s", y_train.shape, y_train[0].dtype) logger_prepLip.info("VALID: %s", X_val.shape) logger_prepLip.info("%s", y_val.shape) logger_prepLip.info("TEST: %s", X_test.shape) logger_prepLip.info("%s", y_test.shape) ### STORE DATA ### if storeProcessed and store_path != None: general_tools.saveToPkl(store_path, [X_train, y_train, X_val, y_val, X_test, y_test]) return X_train, y_train, X_val, y_val, X_test, y_test
def prepLip_all(data_path=os.path.join(os.path.expanduser('~/TCDTIMIT/lipreading/database_binary/')), store_path=os.path.join( os.path.expanduser('~/TCDTIMIT/lipreading/database_binaryprocessed/dataset.pkl')), type="all", nbLip=3, nbVol=54, trainFraction=0.8, validFraction=0.1, testFraction=0.1, nbClasses=39, onehot=False, verbose=False): # from https://www.cs.toronto.edu/~kriz/cifar.html # also see http://stackoverflow.com/questions/35032675/how-to-create-dataset-similar-to-cifar-10 # Lipspeaker 1: 14627 phonemes, 14617 extacted and useable # Lipspeaker 2: 28363 - 14627 = 13736 phonemes 13707 extracted # Lipspeaker 3: 42535 - 28363 = 14172 phonemes 14153 extracted # total Lipspeakers: 14500 + 13000 + 14000 = 42477 dtype = 'uint8' memAvaliableMB = 6000; memAvaliable = memAvaliableMB * 1024 img_shape = (1, 120, 120) img_size = np.prod(img_shape) # prepare data to load fnamesLipspkrs = ['Lipspkr%i.pkl' % i for i in range(1, nbLip + 1)] # all 3 lipsteakers fnamesVolunteers = ['Volunteer%i.pkl' % i for i in range(1, nbVol + 1)] # some volunteers if type == "lipspeakers": fnames = fnamesLipspkrs elif type == "volunteers": fnames = fnamesVolunteers elif type == "all": fnames = fnamesLipspkrs + fnamesVolunteers else: raise Exception("wrong type of dataset entered") datasets = {} for name in fnames: fname = os.path.join(data_path, name) if not os.path.exists(fname): raise IOError(fname + " was not found.") datasets[name] = cache.datasetCache.cache_file(fname) # load the images # first initialize the matrices X_train = []; y_train = [] X_val = []; y_val = [] X_test = []; y_test = [] # now load train data trainLoaded = 0 validLoaded = 0 testLoaded = 0 for i, fname in enumerate(fnames): if verbose: logger_prepLip.info("Total loaded till now: %s ", trainLoaded + validLoaded + testLoaded) logger_prepLip.info("nbTrainLoaded: %s", trainLoaded) logger_prepLip.info("nbValidLoaded: %s", validLoaded) logger_prepLip.info("nbTestLoaded: %s", testLoaded) logger_prepLip.info('loading file %s', datasets[fname]) data = unpickle(datasets[fname]) thisN = data['data'].shape[0] thisTrain = int(trainFraction * thisN) thisValid = int(validFraction * thisN) thisTest = thisN - thisTrain - thisValid # compensates for rounding\ if verbose: logger_prepLip.info("This dataset contains %s images", thisN) logger_prepLip.info("now loading : nbTrain, nbValid, nbTest") logger_prepLip.info("\t\t\t %s %s %s ", thisTrain, thisValid, thisTest) X_train = X_train + list(data['data'][0:thisTrain]) X_val = X_val + list(data['data'][thisTrain:thisTrain + thisValid]) X_test = X_test + list(data['data'][thisTrain + thisValid:thisN]) y_train = y_train + list(data['labels'][0:thisTrain]) y_val = y_val + list(data['labels'][thisTrain:thisTrain + thisValid]) y_test = y_test + list(data['labels'][thisTrain + thisValid:thisN]) trainLoaded += thisTrain validLoaded += thisValid testLoaded += thisTest if verbose: logger_prepLip.info("nbTrainLoaded: %s", trainLoaded) logger_prepLip.info("nbValidLoaded: %s", validLoaded) logger_prepLip.info("nbTestLoaded: %s", testLoaded) logger_prepLip.info("Total loaded till now: %s", trainLoaded + validLoaded + testLoaded) # estimate as float32 = 4* memory as uint8 memEstimate = 4 * (sys.getsizeof(X_train) + sys.getsizeof(X_val) + sys.getsizeof(X_test) + \ sys.getsizeof(y_train) + sys.getsizeof(y_val) + sys.getsizeof(y_test)) if verbose: logger_prepLip.info("memory estimate: %s MB", memEstimate / 1000.0) # if memEstimate > 0.6 * memAvaliable: # logger_prepLip.info("loaded too many for memory, stopping loading...") # break # cast to numpy array, correct datatype dtypeX = 'float32' dtypeY = 'int32' # needed for if isinstance(X_train, list): X_train = np.asarray(X_train).astype(dtypeX); if isinstance(y_train, list): y_train = np.asarray(y_train).astype(dtypeY); if isinstance(X_val, list): X_val = np.asarray(X_val).astype(dtypeX); if isinstance(y_val, list): y_val = np.asarray(y_val).astype(dtypeY); if isinstance(X_test, list): X_test = np.asarray(X_test).astype(dtypeX); if isinstance(y_test, list): y_test = np.asarray(y_test).astype(dtypeY); if verbose: logger_prepLip.info("TRAIN: %s %s", X_train.shape, X_train[0][0].dtype) logger_prepLip.info("%s %s", y_train.shape, y_train[0].dtype) logger_prepLip.info("VALID: %s", X_val.shape) logger_prepLip.info("%s",y_val.shape) logger_prepLip.info("TEST: %s", X_test.shape) logger_prepLip.info("%s",y_test.shape) memTot = X_train.nbytes + X_val.nbytes + X_test.nbytes + y_train.nbytes + y_val.nbytes + y_test.nbytes logger_prepLip.info("Total memory size required as float32: %s MB", memTot / 1000000) # rescale to interval [-1,1], cast to float32 for GPU use X_train = np.multiply(2. / 255., X_train, dtype='float32') X_train = np.subtract(X_train, 1., dtype='float32'); X_val = np.multiply(2. / 255., X_val, dtype='float32') X_val = np.subtract(X_val, 1., dtype='float32'); X_test = np.multiply(2. / 255., X_test, dtype='float32') X_test = np.subtract(X_test, 1., dtype='float32'); if verbose: logger_prepLip.info("Train: %s %s", X_train.shape, X_train[0][0].dtype) logger_prepLip.info("Valid: %s %s", X_val.shape, X_val[0][0].dtype) logger_prepLip.info("Test: %s %s", X_test.shape, X_test[0][0].dtype) # reshape to get one image per row X_train = np.reshape(X_train, (-1, 1, 120, 120)) X_val = np.reshape(X_val, (-1, 1, 120, 120)) X_test = np.reshape(X_test, (-1, 1, 120, 120)) # also flatten targets to get one target per row # y_train = np.hstack(y_train) # y_val = np.hstack(y_val) # y_test = np.hstack(y_test) # Onehot the targets if onehot: y_train = np.float32(np.eye(nbClasses)[y_train]) y_val = np.float32(np.eye(nbClasses)[y_val]) y_test = np.float32(np.eye(nbClasses)[y_test]) # for hinge loss if onehot: y_train = 2 * y_train - 1. y_val = 2 * y_val - 1. y_test = 2 * y_test - 1. # cast to correct datatype, just to be sure. Everything needs to be float32 for GPU processing dtypeX = 'float32' dtypeY = 'int32' X_train = X_train.astype(dtypeX); y_train = y_train.astype(dtypeY); X_val = X_val.astype(dtypeX); y_val = y_val.astype(dtypeY); X_test = X_test.astype(dtypeX); y_test = y_test.astype(dtypeY); if verbose: logger_prepLip.info("\n Final datatype: ") logger_prepLip.info("TRAIN: %s %s ", X_train.shape, X_train[0][0].dtype) logger_prepLip.info("%s %s", y_train.shape, y_train[0].dtype) logger_prepLip.info("VALID: %s", X_val.shape) logger_prepLip.info("%s", y_val.shape) logger_prepLip.info("TEST: %s", X_test.shape) logger_prepLip.info("%s", y_test.shape) ### STORE DATA ### dataList = [X_train, y_train, X_val, y_val, X_test, y_test] general_tools.saveToPkl(store_path, dataList) return X_train, y_train, X_val, y_val, X_test, y_test
def train(train_fn, val_fn, model, batch_size, LR_start, LR_decay, num_epochs, X_train, y_train, X_val, y_val, X_test, y_test, save_name=None, shuffle_parts=1, justTest=False): # A function which shuffles a dataset def shuffle(X, y): # print(len(X)) chunk_size = len(X) / shuffle_parts shuffled_range = range(chunk_size) X_buffer = np.copy(X[0:chunk_size]) y_buffer = np.copy(y[0:chunk_size]) for k in range(shuffle_parts): np.random.shuffle(shuffled_range) for i in range(chunk_size): X_buffer[i] = X[k * chunk_size + shuffled_range[i]] y_buffer[i] = y[k * chunk_size + shuffled_range[i]] X[k * chunk_size:(k + 1) * chunk_size] = X_buffer y[k * chunk_size:(k + 1) * chunk_size] = y_buffer return X, y # shuffled_range = range(len(X)) # np.random.shuffle(shuffled_range) # new_X = np.copy(X) # new_y = np.copy(y) # for i in range(len(X)): # new_X[i] = X[shuffled_range[i]] # new_y[i] = y[shuffled_range[i]] # return new_X,new_y # This function trains the model a full epoch (on the whole dataset) def train_epoch(X, y, LR): loss = 0 batches = len(X) / batch_size for i in tqdm(range(batches)): loss += train_fn(X[i * batch_size:(i + 1) * batch_size], y[i * batch_size:(i + 1) * batch_size], LR) loss /= batches return loss # This function tests the model a full epoch (on the whole dataset) def val_epoch(X, y): err = 0 loss = 0 batches = len(X) / batch_size for i in tqdm(range(batches)): new_loss, new_err = val_fn(X[i * batch_size:(i + 1) * batch_size], y[i * batch_size:(i + 1) * batch_size]) err += new_err loss += new_loss err = err / batches * 100 loss /= batches return err, loss if save_name == None: save_name = "./bestModel_binary" save_path = save_name + ".npz" # shuffle the train set X_train, y_train = shuffle(X_train, y_train) best_val_err = 100 best_epoch = 1 LR = LR_start network_train_info = { 'train_cost': [], 'val_cost': [], 'val_acc': [], 'test_cost': [], 'test_acc': [], } test_err, test_loss = val_epoch(X_test, y_test) print("Initial results: ") print(" test loss: " + str(test_loss)) print(" test error rate: " + str(test_err) + "%") if justTest: return 0 # load old train info import general_tools if os.path.exists(save_name + ".npz") and os.path.exists(save_name + "_trainInfo.pkl"): old_train_info = general_tools.unpickle(save_name + '_trainInfo.pkl') best_val_err = 1 - max(old_train_info['val_acc']) test_cost = min(old_train_info['test_cost']) test_err = 1 - max(old_train_info['test_acc']) network_train_info = old_train_info # We iterate over epochs: for epoch in tqdm(range(num_epochs)): start_time = time.time() train_loss = train_epoch(X_train, y_train, LR) X_train, y_train = shuffle(X_train, y_train) val_err, val_loss = val_epoch(X_val, y_val) print(" previous best validation error rate: " + str(best_val_err) + "%") print(" LR: " + str(LR)) print(" training loss: " + str(train_loss)) print(" validation loss: " + str(val_loss)) print(" validation error rate: " + str(val_err) + "%") print(" \n best epoch: " + str(best_epoch)) # test if validation error went down if val_err <= best_val_err: print("Best ever validation score; evaluating test...") best_val_err = val_err best_epoch = epoch + 1 test_err, test_loss = val_epoch(X_test, y_test) print(" test loss: " + str(test_loss)) print(" test error rate: " + str(test_err) + "%") np.savez(save_name, lasagne.layers.get_all_param_values(model)) # else: # print(save_path) # if os.path.exists(save_path): # with np.load(save_path) as f: # param_values = [f['arr_%d' % i] for i in range(len(f.files))] # try: lasagne.layers.set_all_param_values(model, *param_values) # except: # lasagne.layers.set_all_param_values(model, param_values) # print("Not improved, load best model " + save_path) epoch_duration = time.time() - start_time # save the training info network_train_info['train_cost'].append(train_loss) network_train_info['val_cost'].append(val_loss) network_train_info['val_acc'].append(1 - val_err) network_train_info['test_cost'].append(test_loss) network_train_info['test_acc'].append(1 - test_err) import general_tools general_tools.saveToPkl(save_name + '_trainInfo.pkl', network_train_info) # Then we print the results for this epoch: print("Epoch " + str(epoch + 1) + " of " + str(num_epochs) + " took " + str(epoch_duration) + "s" + "\n\n") # decay the LR LR *= LR_decay
def train(train_fn, val_fn, out_fn, topk_acc_fn, k, network_output_layer, batch_size, LR_start, LR_decay, num_epochs, dataset, database_binaryDir, storeProcessed, processedDir, loadPerSpeaker=False, save_name=None, shuffleEnabled=True): if loadPerSpeaker: trainingSpeakerFiles, testSpeakerFiles = dataset logger_train.info("train files: \n%s", [ os.path.basename(speakerFile) for speakerFile in trainingSpeakerFiles ]) logger_train.info("test files: \n %s", [ os.path.basename(speakerFile) for speakerFile in testSpeakerFiles ]) else: X_train, y_train, X_val, y_val, X_test, y_test = dataset logger_train.info("the number of training examples is: %s", len(X_train)) logger_train.info("the number of valid examples is: %s", len(X_val)) logger_train.info("the number of test examples is: %s", len(X_test)) #import pdb; pdb.set_trace() # A function which shuffles a dataset def shuffle(X, y): shuffle_parts = 1 chunk_size = len(X) / shuffle_parts shuffled_range = range(chunk_size) X_buffer = np.copy(X[0:chunk_size]) y_buffer = np.copy(y[0:chunk_size]) for k in range(shuffle_parts): np.random.shuffle(shuffled_range) for i in range(chunk_size): X_buffer[i] = X[k * chunk_size + shuffled_range[i]] y_buffer[i] = y[k * chunk_size + shuffled_range[i]] X[k * chunk_size:(k + 1) * chunk_size] = X_buffer y[k * chunk_size:(k + 1) * chunk_size] = y_buffer return X, y # This function trains the model a full epoch (on the whole dataset) def train_epoch(X, y, LR): cost = 0 nb_batches = len(X) / batch_size i = 0 for i in tqdm(range(nb_batches), total=nb_batches): batch_X = X[i * batch_size:(i + 1) * batch_size] batch_y = y[i * batch_size:(i + 1) * batch_size] # print("batch_X.shape: ", batch_X.shape) # print("batch_y.shape: ", batch_y.shape) cost += train_fn(batch_X, batch_y, LR) # if i==0: # out = out_fn(batch_X) # import pdb;pdb.set_trace() return cost, nb_batches # This function tests the model a full epoch (on the whole dataset) def val_epoch(X, y): err = 0 cost = 0 topk_acc = 0 nb_batches = len(X) / batch_size for i in tqdm(range(nb_batches)): batch_X = X[i * batch_size:(i + 1) * batch_size] batch_y = y[i * batch_size:(i + 1) * batch_size] new_cost, new_acc, new_topk_acc = val_fn(batch_X, batch_y) err += new_acc cost += new_cost topk_acc += new_topk_acc return cost, err, topk_acc, nb_batches # evaluate many TRAINING speaker files -> train loss, val loss and vall acc. Load them in one by one (so they fit in memory) def evalTRAINING(trainingSpeakerFiles, LR, shuffleEnabled, verbose=False, sourceDataDir=None, storeProcessed=False, processedDir=None): train_cost = 0 val_acc = 0 val_cost = 0 val_topk_acc = 0 nb_train_batches = 0 nb_val_batches = 0 # for each speaker, pass over the train set, then val set. (test is other files). save the results. for speakerFile in tqdm(trainingSpeakerFiles, total=len(trainingSpeakerFiles)): # TODO: pallelize this with the GPU evaluation to eliminate waiting logger_train.debug("processing %s", speakerFile) X_train, y_train, X_val, y_val, X_test, y_test = preprocessLipreading.prepLip_one( speakerFile=speakerFile, trainFraction=0.8, validFraction=0.2, sourceDataDir=sourceDataDir, storeProcessed=storeProcessed, processedDir=processedDir) if verbose: logger_train.debug("the number of training examples is: %s", len(X_train)) logger_train.debug("the number of valid examples is: %s", len(X_val)) logger_train.debug("the number of test examples is: %s", len(X_test)) if shuffleEnabled: X_train, y_train = shuffle(X_train, y_train) train_cost_one, train_batches_one = train_epoch(X=X_train, y=y_train, LR=LR) train_cost += train_cost_one nb_train_batches += train_batches_one # get results for validation set val_cost_one, val_acc_one, val_topk_acc_one, val_batches_one = val_epoch( X=X_val, y=y_val) val_cost += val_cost_one val_acc += val_acc_one val_topk_acc += val_topk_acc_one nb_val_batches += val_batches_one if verbose: logger_train.debug(" this speaker results: ") logger_train.debug("\ttraining cost: %s", train_cost_one / train_batches_one) logger_train.debug("\tvalidation cost: %s", val_cost_one / val_batches_one) logger_train.debug("\vvalidation acc rate: %s %%", val_acc_one / val_batches_one * 100) logger_train.debug("\vvalidation top %s acc rate: %s %%", k, val_topk_acc_one / val_batches_one * 100) # get the average over all speakers train_cost /= nb_train_batches val_cost /= nb_val_batches val_acc = val_acc / nb_val_batches * 100 # convert to % val_topk_acc = val_topk_acc / nb_val_batches * 100 # convert to % return train_cost, val_cost, val_acc, val_topk_acc # evaluate many TEST speaker files. Load them in one by one (so they fit in memory) def evalTEST(testSpeakerFiles, verbose=False, sourceDataDir=None, storeProcessed=False, processedDir=None): test_acc = 0 test_cost = 0 test_topk_acc = 0 nb_test_batches = 0 # for each speaker, pass over the train set, then test set. (test is other files). save the results. for speakerFile in tqdm(testSpeakerFiles, total=len(testSpeakerFiles)): logger_train.debug("processing %s", speakerFile) X_train, y_train, X_val, y_val, X_test, y_test = preprocessLipreading.prepLip_one( speakerFile=speakerFile, trainFraction=0.0, validFraction=0.0, sourceDataDir=sourceDataDir, storeProcessed=storeProcessed, processedDir=processedDir) if verbose: logger_train.debug("the number of training examples is: %s", len(X_train)) logger_train.debug("the number of valid examples is: %s", len(X_val)) logger_train.debug("the number of test examples is: %s", len(X_test)) # get results for testidation set test_cost_one, test_acc_one, test_topk_acc_one, test_batches_one = val_epoch( X=X_test, y=y_test) test_acc += test_acc_one test_cost += test_cost_one test_topk_acc += test_topk_acc_one nb_test_batches += test_batches_one if verbose: logger_train.debug(" this speaker results: ") logger_train.debug("\ttest cost: %s", test_cost_one / test_batches_one) logger_train.debug("\vtest acc rate: %s %%", test_acc_one / test_batches_one * 100) logger_train.debug("\vtest top %s acc rate: %s %%", k, test_topk_acc_one / test_batches_one * 100) # get the average over all speakers test_acc = test_acc / nb_test_batches * 100 test_cost /= nb_test_batches test_topk_acc = test_topk_acc / nb_test_batches * 100 return test_cost, test_acc, test_topk_acc def updateLR(LR, LR_decay, network_train_info, epochsNotImproved): this_cost = network_train_info['val_cost'][-1] #validation cost try: last_cost = network_train_info['val_cost'][-2] except: last_cost = 10 * this_cost # first time it will fail because there is only 1 result stored # only reduce LR if not much improvment anymore if this_cost / float(last_cost) >= 0.99: logger_train.info( " Error not much reduced: %s vs %s. Reducing LR: %s", this_cost, last_cost, LR * LR_decay) epochsNotImproved += 1 return LR * LR_decay, epochsNotImproved else: epochsNotImproved = max(epochsNotImproved - 1, 0) # reduce by 1, minimum 0 return LR, epochsNotImproved best_val_acc = 0 test_topk_acc = 0 test_cost = 0 test_acc = 0 #try to load performance metrics of stored model if os.path.exists(save_name + ".npz") and os.path.exists(save_name + "_trainInfo.pkl"): old_train_info = preprocessLipreading.unpickle(save_name + '_trainInfo.pkl') # backward compatibility if type(old_train_info) == list: old_train_info = old_train_info[0] best_val_acc = min(old_train_info[2]) test_cost = min(old_train_info[3]) test_acc = min(old_train_info[3]) elif type(old_train_info) == dict: # normal case best_val_acc = min(old_train_info['val_acc']) test_cost = min(old_train_info['test_cost']) test_acc = min(old_train_info['test_acc']) try: test_topk_acc = min(old_train_info['test_topk_acc']) except: test_topk_acc = 0 else: best_val_acc = 0 test_topk_acc = 0 test_cost = 0 test_acc = 0 logger_train.info("previous training session results: ") logger_train.info("\t test cost: %s", test_cost) logger_train.info("\t test acc rate: %s %%", test_acc) logger_train.info("\t val acc: %s %%", best_val_acc) best_epoch = 1 LR = LR_start # for storage of training info network_train_info = { 'train_cost': [], 'val_cost': [], 'val_acc': [], 'val_topk_acc': [], 'test_cost': [], 'test_acc': [], 'test_topk_acc': [] } #used to be list of lists epochsNotImproved = 0 logger_train.info("starting training for %s epochs...", num_epochs) # now run through the epochs # TODO: remove this if not loadPerSpeaker: # all at once test_cost, test_acc, test_topk_acc, nb_test_batches = val_epoch( X_test, y_test) test_acc = test_acc / nb_test_batches * 100 test_cost /= nb_test_batches test_topk_acc = test_topk_acc / nb_test_batches * 100 else: # process each speaker seperately test_cost, test_acc, test_topk_acc = evalTEST( testSpeakerFiles, sourceDataDir=database_binaryDir, storeProcessed=storeProcessed, processedDir=processedDir) logger_train.info("TEST results: ") logger_train.info("\t test cost: %s", test_cost) logger_train.info("\t test acc rate: %s %%", test_acc) logger_train.info("\t test top %s acc: %s %%", k, test_topk_acc) # # TODO: end remove for epoch in range(num_epochs): logger_train.info("\n\n\n Epoch %s started", epoch + 1) start_time = time.time() if not loadPerSpeaker: total_train_cost, nb_train_batches = train_epoch(X=X_train, y=y_train, LR=LR) train_cost = total_train_cost / nb_train_batches X_train, y_train = shuffle(X_train, y_train) val_cost, val_acc, val_topk_acc, nb_val_batches = val_epoch( X=X_val, y=y_val) val_acc = val_acc / nb_val_batches * 100 val_cost /= nb_val_batches val_topk_acc = val_topk_acc / nb_val_batches * 100 else: train_cost, val_cost, val_acc, val_topk_acc = evalTRAINING( trainingSpeakerFiles, LR, shuffleEnabled, sourceDataDir=database_binaryDir, storeProcessed=storeProcessed, processedDir=processedDir) # test if validation acc went down printTest = False if val_acc > best_val_acc: printTest = True best_val_acc = val_acc best_epoch = epoch + 1 logger_train.info( "\n\nBest ever validation score; evaluating TEST set...") if not loadPerSpeaker: # all at once test_cost, test_acc, test_topk_acc, nb_test_batches = val_epoch( X_test, y_test) test_acc = test_acc / nb_test_batches * 100 test_cost /= nb_test_batches test_topk_acc = test_topk_acc / nb_test_batches * 100 else: # process each speaker seperately test_cost, test_acc, test_topk_acc = evalTEST( testSpeakerFiles, sourceDataDir=database_binaryDir, storeProcessed=storeProcessed, processedDir=processedDir) logger_train.info("TEST results: ") logger_train.info("\t test cost: %s", test_cost) logger_train.info("\t test acc rate: %s %%", test_acc) logger_train.info("\t test top %s acc: %s %%", k, test_topk_acc) if save_name is None: save_name = "./bestModel" if not os.path.exists(os.path.dirname(save_name)): os.makedirs(os.path.dirname(save_name)) logger_train.info("saving model to %s", save_name) np.savez( save_name, *lasagne.layers.get_all_param_values(network_output_layer)) epoch_duration = time.time() - start_time # Then we logger_train.info the results for this epoch: logger_train.info("Epoch %s of %s took %s seconds", epoch + 1, num_epochs, epoch_duration) logger_train.info(" LR: %s", LR) logger_train.info(" training cost: %s", train_cost) logger_train.info(" validation cost: %s", val_cost) logger_train.info(" validation acc rate: %s %%", val_acc) logger_train.info(" validation top %s acc rate: %s %%", k, val_topk_acc) logger_train.info(" best epoch: %s", best_epoch) logger_train.info(" best validation acc rate: %s %%", best_val_acc) if printTest: logger_train.info(" test cost: %s", test_cost) logger_train.info(" test acc rate: %s %%", test_acc) logger_train.info(" test top %s acc rate: %s %%", k, test_topk_acc) # save the training info network_train_info['train_cost'].append(train_cost) network_train_info['val_cost'].append(val_cost) network_train_info['val_acc'].append(val_acc) network_train_info['val_topk_acc'].append(val_topk_acc) network_train_info['test_cost'].append(test_cost) network_train_info['test_acc'].append(test_acc) network_train_info['test_topk_acc'].append(test_topk_acc) store_path = save_name + '_trainInfo.pkl' general_tools.saveToPkl(store_path, network_train_info) logger_train.info("Train info written to:\t %s", store_path) # decay the LR #LR *= LR_decay LR, epochsNotImproved = updateLR(LR, LR_decay, network_train_info, epochsNotImproved) if epochsNotImproved > 8: logger_train.warning("\n\n NO MORE IMPROVEMENTS -> stop training") test_cost, test_acc, test_topk_acc = evalTEST( testSpeakerFiles, sourceDataDir=database_binaryDir, storeProcessed=storeProcessed, processedDir=processedDir) logger_train.info("FINAL TEST results: ") logger_train.info("\t test cost: %s", test_cost) logger_train.info("\t test acc rate: %s %%", test_acc) logger_train.info("\t test top %s acc: %s %%", k, test_topk_acc) break logger_train.info("Done.")