def preprocessLabeledWavs(wavDir, store_dir, name): # fixWavs -> suppose this is done # convert to pkl X, y, valid_frames = preprocessWavs.preprocess_dataset( source_path=wavDir, nbMFCCs=nbMFCCs, logger=logger_evaluate) X_data_type = 'float32' X = preprocessWavs.set_type(X, X_data_type) y_data_type = 'int32' y = preprocessWavs.set_type(y, y_data_type) valid_frames_data_type = 'int32' valid_frames = preprocessWavs.set_type(valid_frames, valid_frames_data_type) return X, y, valid_frames
def preprocessUnlabeledWavs(wavDir, store_dir, name): #TODO # fixWavs -> suppose this is done # convert to pkl X = preprocessWavs.preprocess_unlabeled_dataset( source_path=wavDir, nbMFCCs=nbMFCCs, logger=logger_evaluate) X_data_type = 'float32' X = preprocessWavs.set_type(X, X_data_type) return X
def oneTypeToPkl(noiseType, ratio_dB): global target_path, dataList dataRootDir = root + dataset + "/fixed" + str( nbPhonemes) + "_" + noiseType + os.sep + "ratio" + str( ratio_dB) + os.sep + 'TEST' outputDir = root + dataset + "/binary" + str(nbPhonemes) + "_" + noiseType + \ os.sep + "ratio" + str(ratio_dB) + os.sep + dataset FRAC_TRAINING = 0.0 # TOTAL = TRAINING + TEST = TRAIN + VALIDATION + TEST ### store path target = os.path.join(outputDir, dataset + '_' + str(nbMFCCs) + '_ch') target_path = target + '.pkl' if not os.path.exists(outputDir): os.makedirs(outputDir) # Already exists, ask if overwrite if (os.path.exists(target_path)): if not forceOverwrite: logger.info("This file already exists, skipping", target_path) return 0 ##### The PREPROCESSING itself ##### logger.info('Preprocessing data ...') # FIRST, gather the WAV and PHN files, generate MFCCs, extract labels to make inputs and targets for the network # for a dataset containing no TRAIN/TEST subdivision, just a bunch of wavs -> choose training set yourself def processDataset(FRAC_TRAINING, data_source_path, logger=None): logger.info(' Data: %s ', data_source_path) X_test, y_test, valid_frames_test = preprocessWavs.preprocess_dataset( source_path=data_source_path, nbMFCCs=nbMFCCs, logger=logger, debug=None) assert len(X_test) == len(y_test) == len(valid_frames_test) logger.info(' Loading data complete.') logger.debug('Type and shape/len of X_test') logger.debug('type(X_test): {}'.format(type(X_test))) logger.debug('type(X_test[0]): {}'.format(type(X_test[0]))) logger.debug('type(X_test[0][0]): {}'.format(type(X_test[0][0]))) logger.debug('type(X_test[0][0][0]): {}'.format(type(X_test[0][0][0]))) return X_test, y_test, valid_frames_test X_test, y_test, valid_frames_test = processDataset(FRAC_TRAINING, dataRootDir, logger) logger.info(" test X: %s", len(X_test)) logger.info(" test y: %s", len(y_test)) logger.info(" test valid_frames: %s", len(valid_frames_test)) ### NORMALIZE data ### logger.info('Normalizing data ...') logger.info(' Each channel mean=0, sd=1 ...') mean_val, std_val = unpickle(normalizePkl_path) X_test = preprocessWavs.normalize(X_test, mean_val, std_val) # make sure we're working with float32 X_data_type = 'float32' X_test = preprocessWavs.set_type(X_test, X_data_type) y_data_type = 'int32' y_test = preprocessWavs.set_type(y_test, y_data_type) valid_frames_data_type = 'int32' valid_frames_test = preprocessWavs.set_type(valid_frames_test, valid_frames_data_type) # print some more to check that cast succeeded logger.debug('X test') logger.debug(' %s %s', type(X_test), len(X_test)) logger.debug(' %s %s', type(X_test[0]), X_test[0].shape) logger.debug(' %s %s', type(X_test[0][0]), X_test[0][0].shape) logger.debug(' %s %s', type(X_test[0][0][0]), X_test[0][0].shape) logger.debug('y test') logger.debug(' %s %s', type(y_test), len(y_test)) logger.debug(' %s %s', type(y_test[0]), y_test[0].shape) logger.debug(' %s %s', type(y_test[0][0]), y_test[0][0].shape) ### STORE DATA ### logger.info('Saving data to %s', target_path) dataList = [X_test, y_test, valid_frames_test] saveToPkl(target_path, dataList) logger.info('Preprocessing complete!') logger.info('Total time: {:.3f}'.format(timeit.default_timer() - program_start_time))
def evaluateModel(self, BIDIRECTIONAL, N_HIDDEN_LIST, batch_size, dataName, wavDir, data_store_dir, meanStd_path, model_load, nbMFCCs, store_dir, force_overwrite=False): logger_evaluate.info("\n\n\n") ####### THE DATA you want to evaluate ########## data_store_path = data_store_dir + dataName.replace( '/', '_') + "_nbMFCC" + str(nbMFCCs) if not os.path.exists(data_store_dir): os.makedirs(data_store_dir) predictions_path = store_dir + os.sep + dataName.replace( '/', '_') + "_predictions.pkl" # log file logFile = store_dir + os.sep + "Evaluation" + dataName.replace( '/', '_') + '.log' if os.path.exists(logFile) and not force_overwrite: from general_tools import query_yes_no if query_yes_no( "Log file already exists at %s\n Do you want to evaluate again and overwrite?", "y"): pass else: logger_evaluate.info( "Log file already exists, not re-evaluating.... ") return 0 fh = logging.FileHandler(logFile, 'w') # create new logFile fh.setLevel(logging.INFO) fh.setFormatter(formatter) logger_evaluate.addHandler(fh) logger_evaluate.info("\n MODEL: %s", model_load) logger_evaluate.info("\n WAV_DIR: %s", wavDir) logger_evaluate.info("\n PREDICTS: %s", predictions_path) logger_evaluate.info("\n LOG: %s", logFile) logger_evaluate.info("\n") # GATHERING DATA logger_evaluate.info("* Gathering Data ...") if os.path.exists(data_store_path + ".pkl"): [inputs, targets, valid_frames] = unpickle(data_store_path + ".pkl") calculateAccuracy = True logger_evaluate.info( "Successfully loaded preprocessed data, with targets") elif os.path.exists( data_store_path + "_noTargets.pkl" ): # TODO: make it work for unlabeled datasets. see RNN_tools_lstm.py, eg iterate_minibatch_noTargets. [inputs] = unpickle(data_store_path + "_noTargets.pkl") calculateAccuracy = False # we can't as we don't know the correct labels logger_evaluate.info( "Successfully loaded preprocessed data, no targets") else: logger_evaluate.info("Data not found, preprocessing...") # From WAVS, generate X, y and valid_frames; also store under data_store_dir def preprocessLabeledWavs(wavDir, store_dir, name): # fixWavs -> suppose this is done # convert to pkl X, y, valid_frames = preprocessWavs.preprocess_dataset( source_path=wavDir, nbMFCCs=nbMFCCs, logger=logger_evaluate) X_data_type = 'float32' X = preprocessWavs.set_type(X, X_data_type) y_data_type = 'int32' y = preprocessWavs.set_type(y, y_data_type) valid_frames_data_type = 'int32' valid_frames = preprocessWavs.set_type(valid_frames, valid_frames_data_type) return X, y, valid_frames def preprocessUnlabeledWavs(wavDir, store_dir, name): #TODO # fixWavs -> suppose this is done # convert to pkl X = preprocessWavs.preprocess_unlabeled_dataset( source_path=wavDir, nbMFCCs=nbMFCCs, logger=logger_evaluate) X_data_type = 'float32' X = preprocessWavs.set_type(X, X_data_type) return X # load wavs and labels wav_files = transform.loadWavs(wavDir) wav_filenames = [ str( os.path.basename( os.path.dirname( os.path.dirname(os.path.dirname(wav_file)))) + os.sep + os.path.basename( os.path.dirname(os.path.dirname(wav_file))) + os.sep + os.path.basename(os.path.dirname(wav_file)) + os.sep + os.path.basename(wav_file)) for wav_file in wav_files ] logger_evaluate.info("Found %s files to evaluate \n Example: %s", len(wav_filenames), wav_filenames[0]) label_files = transform.loadPhns(wavDir) # if source dir doesn't contain labels, we can't calculate accuracy calculateAccuracy = True if not (len(wav_files) == len(label_files)): calculateAccuracy = False inputs = preprocessUnlabeledWavs(wavDir=wavDir, store_dir=store_dir, name=dataName) else: inputs, targets, valid_frames = preprocessLabeledWavs( wavDir=wavDir, store_dir=store_dir, name=dataName) # normalize inputs using dataset Mean and Std_dev; convert to float32 for GPU evaluation with open(meanStd_path, 'rb') as cPickle_file: [mean_val, std_val] = cPickle.load(cPickle_file) inputs = preprocessWavs.normalize(inputs, mean_val, std_val) # just to be sure X_data_type = 'float32' inputs = preprocessWavs.set_type(inputs, X_data_type) # Print some information logger_evaluate.debug("* Data information") logger_evaluate.debug(' inputs') logger_evaluate.debug('%s %s', type(inputs), len(inputs)) logger_evaluate.debug('%s %s', type(inputs[0]), inputs[0].shape) logger_evaluate.debug('%s %s', type(inputs[0][0]), inputs[0][0].shape) logger_evaluate.debug('%s', type(inputs[0][0][0])) logger_evaluate.debug('y train') logger_evaluate.debug(' %s %s', type(targets), len(targets)) logger_evaluate.debug(' %s %s', type(targets[0]), targets[0].shape) logger_evaluate.debug(' %s %s', type(targets[0][0]), targets[0][0].shape) # slice to have a number of inputs that is a multiple of batch size logger_evaluate.info( "Not evaluating %s last files (batch size mismatch)", len(inputs) % batch_size) inputs = inputs[:-(len(inputs) % batch_size) or None] if calculateAccuracy: targets = targets[:-(len(targets) % batch_size) or None] valid_frames = valid_frames[:-(len(valid_frames) % batch_size) or None] # pad the inputs to process batches easily inputs = pad_sequences_X(inputs) if calculateAccuracy: targets = pad_sequences_y(targets) # save the preprocessed data logger_evaluate.info("storing preprocessed data to: %s", data_store_path) if calculateAccuracy: general_tools.saveToPkl(data_store_path + '.pkl', [inputs, targets, valid_frames]) else: general_tools.saveToPkl(data_store_path + '_noTargets.pkl', [inputs]) # Gather filenames; for debugging wav_files = transform.loadWavs(wavDir) wav_filenames = [ str( os.path.basename( os.path.dirname(os.path.dirname(os.path.dirname( wav_file)))) + os.sep + os.path.basename(os.path.dirname(os.path.dirname(wav_file))) + os.sep + os.path.basename(os.path.dirname(wav_file)) + os.sep + os.path.basename(wav_file)) for wav_file in wav_files ] logger_evaluate.debug(" # inputs: %s, # wav files: %s", len(inputs), len(wav_files)) # make copy of data because we might need to use is again for calculating accurasy, and the iterator will remove elements from the array inputs_bak = copy.deepcopy(inputs) if calculateAccuracy: targets_bak = copy.deepcopy(targets) valid_frames_bak = copy.deepcopy(valid_frames) logger_evaluate.info("* Evaluating: pass over Evaluation Set") if calculateAccuracy: # if .phn files are provided, we can check our predictions logger_evaluate.info( "Getting predictions and calculating accuracy...") avg_error, avg_acc, predictions = self.RNN_network.run_epoch(X=inputs, y=targets, valid_frames=valid_frames, \ get_predictions=True, batch_size=batch_size) logger_evaluate.info("All batches, avg Accuracy: %s", avg_acc) inputs = inputs_bak targets = targets_bak valid_frames = valid_frames_bak #uncomment if you want to save everything in one place (takes quite a lot of storage space) #general_tools.saveToPkl(predictions_path, [inputs, predictions, targets, valid_frames, avg_Acc]) else: # TODO fix this for inputs, masks, seq_lengths in tqdm( iterate_minibatches_noTargets(inputs, batch_size=batch_size, shuffle=False), total=len(inputs)): # get predictions nb_inputs = len( inputs) # usually batch size, but could be lower seq_len = len(inputs[0]) prediction = self.RNN_network.predictions_fn(inputs, masks) prediction = np.reshape(prediction, (nb_inputs, -1)) prediction = list(prediction) predictions = predictions + prediction inputs = inputs_bak #general_tools.saveToPkl(predictions_path, [inputs, predictions]) # Print information about the predictions logger_evaluate.info("* Done") end_evaluation_time = time.time() eval_duration = end_evaluation_time - program_start_time logger_evaluate.info('Total time: {:.3f}'.format(eval_duration)) # Print the results try: printEvaluation(wav_filenames, inputs, predictions, targets, valid_frames, avg_acc, range(len(inputs)), logger=logger_evaluate, only_final_accuracy=True) except: pdb.set_trace() logger_evaluate.info( 'Evaluation duration: {:.3f}'.format(eval_duration)) logger_evaluate.info( 'Printing duration: {:.3f}'.format(time.time() - end_evaluation_time)) # close the log handler fh.close() logger_evaluate.removeHandler(fh)
X_val = preprocessWavs.normalize(X_val, mean_val, std_val) X_test = preprocessWavs.normalize(X_test, mean_val, std_val) logger.debug('X train') logger.debug(' %s %s', type(X_train), len(X_train)) logger.debug(' %s %s', type(X_train[0]), X_train[0].shape) logger.debug(' %s %s', type(X_train[0][0]), X_train[0][0].shape) logger.debug(' %s %s', type(X_train[0][0][0]), X_train[0][0].shape) logger.debug('y train') logger.debug(' %s %s', type(y_train), len(y_train)) logger.debug(' %s %s', type(y_train[0]), y_train[0].shape) logger.debug(' %s %s', type(y_train[0][0]), y_train[0][0].shape) # make sure we're working with float32 X_data_type = 'float32' X_train = preprocessWavs.set_type(X_train, X_data_type) X_val = preprocessWavs.set_type(X_val, X_data_type) X_test = preprocessWavs.set_type(X_test, X_data_type) y_data_type = 'int32' y_train = preprocessWavs.set_type(y_train, y_data_type) y_val = preprocessWavs.set_type(y_val, y_data_type) y_test = preprocessWavs.set_type(y_test, y_data_type) valid_frames_data_type = 'int32' valid_frames_train = preprocessWavs.set_type(valid_frames_train, valid_frames_data_type) valid_frames_val = preprocessWavs.set_type(valid_frames_val, valid_frames_data_type) valid_frames_test = preprocessWavs.set_type(valid_frames_test, valid_frames_data_type)