コード例 #1
0
            def preprocessLabeledWavs(wavDir, store_dir, name):
                # fixWavs -> suppose this is done
                # convert to pkl
                X, y, valid_frames = preprocessWavs.preprocess_dataset(
                    source_path=wavDir,
                    nbMFCCs=nbMFCCs,
                    logger=logger_evaluate)

                X_data_type = 'float32'
                X = preprocessWavs.set_type(X, X_data_type)
                y_data_type = 'int32'
                y = preprocessWavs.set_type(y, y_data_type)
                valid_frames_data_type = 'int32'
                valid_frames = preprocessWavs.set_type(valid_frames,
                                                       valid_frames_data_type)

                return X, y, valid_frames
コード例 #2
0
            def preprocessUnlabeledWavs(wavDir, store_dir, name):  #TODO
                # fixWavs -> suppose this is done
                # convert to pkl
                X = preprocessWavs.preprocess_unlabeled_dataset(
                    source_path=wavDir,
                    nbMFCCs=nbMFCCs,
                    logger=logger_evaluate)

                X_data_type = 'float32'
                X = preprocessWavs.set_type(X, X_data_type)

                return X
コード例 #3
0
def oneTypeToPkl(noiseType, ratio_dB):
    global target_path, dataList
    dataRootDir = root + dataset + "/fixed" + str(
        nbPhonemes) + "_" + noiseType + os.sep + "ratio" + str(
            ratio_dB) + os.sep + 'TEST'
    outputDir = root + dataset + "/binary" + str(nbPhonemes) + "_" + noiseType + \
                os.sep + "ratio" + str(ratio_dB) + os.sep + dataset
    FRAC_TRAINING = 0.0  # TOTAL = TRAINING + TEST = TRAIN + VALIDATION + TEST
    ### store path
    target = os.path.join(outputDir, dataset + '_' + str(nbMFCCs) + '_ch')
    target_path = target + '.pkl'
    if not os.path.exists(outputDir):
        os.makedirs(outputDir)

    # Already exists, ask if overwrite
    if (os.path.exists(target_path)):
        if not forceOverwrite:
            logger.info("This file already exists, skipping", target_path)
            return 0

    ##### The PREPROCESSING itself #####
    logger.info('Preprocessing data ...')

    # FIRST, gather the WAV and PHN files, generate MFCCs, extract labels to make inputs and targets for the network
    # for a dataset containing no TRAIN/TEST subdivision, just a bunch of wavs -> choose training set yourself
    def processDataset(FRAC_TRAINING, data_source_path, logger=None):
        logger.info('  Data: %s ', data_source_path)
        X_test, y_test, valid_frames_test = preprocessWavs.preprocess_dataset(
            source_path=data_source_path,
            nbMFCCs=nbMFCCs,
            logger=logger,
            debug=None)
        assert len(X_test) == len(y_test) == len(valid_frames_test)

        logger.info(' Loading data complete.')
        logger.debug('Type and shape/len of X_test')
        logger.debug('type(X_test): {}'.format(type(X_test)))
        logger.debug('type(X_test[0]): {}'.format(type(X_test[0])))
        logger.debug('type(X_test[0][0]): {}'.format(type(X_test[0][0])))
        logger.debug('type(X_test[0][0][0]): {}'.format(type(X_test[0][0][0])))

        return X_test, y_test, valid_frames_test

    X_test, y_test, valid_frames_test = processDataset(FRAC_TRAINING,
                                                       dataRootDir, logger)
    logger.info("  test X: %s", len(X_test))
    logger.info("  test y: %s", len(y_test))
    logger.info("  test valid_frames: %s", len(valid_frames_test))
    ### NORMALIZE data ###
    logger.info('Normalizing data ...')
    logger.info('    Each channel mean=0, sd=1 ...')
    mean_val, std_val = unpickle(normalizePkl_path)
    X_test = preprocessWavs.normalize(X_test, mean_val, std_val)
    # make sure we're working with float32
    X_data_type = 'float32'
    X_test = preprocessWavs.set_type(X_test, X_data_type)
    y_data_type = 'int32'
    y_test = preprocessWavs.set_type(y_test, y_data_type)
    valid_frames_data_type = 'int32'
    valid_frames_test = preprocessWavs.set_type(valid_frames_test,
                                                valid_frames_data_type)
    # print some more to check that cast succeeded
    logger.debug('X test')
    logger.debug('  %s %s', type(X_test), len(X_test))
    logger.debug('  %s %s', type(X_test[0]), X_test[0].shape)
    logger.debug('  %s %s', type(X_test[0][0]), X_test[0][0].shape)
    logger.debug('  %s %s', type(X_test[0][0][0]), X_test[0][0].shape)
    logger.debug('y test')
    logger.debug('  %s %s', type(y_test), len(y_test))
    logger.debug('  %s %s', type(y_test[0]), y_test[0].shape)
    logger.debug('  %s %s', type(y_test[0][0]), y_test[0][0].shape)
    ### STORE DATA ###
    logger.info('Saving data to %s', target_path)
    dataList = [X_test, y_test, valid_frames_test]
    saveToPkl(target_path, dataList)

    logger.info('Preprocessing complete!')
    logger.info('Total time: {:.3f}'.format(timeit.default_timer() -
                                            program_start_time))
コード例 #4
0
    def evaluateModel(self,
                      BIDIRECTIONAL,
                      N_HIDDEN_LIST,
                      batch_size,
                      dataName,
                      wavDir,
                      data_store_dir,
                      meanStd_path,
                      model_load,
                      nbMFCCs,
                      store_dir,
                      force_overwrite=False):
        logger_evaluate.info("\n\n\n")

        ####### THE DATA you want to evaluate ##########
        data_store_path = data_store_dir + dataName.replace(
            '/', '_') + "_nbMFCC" + str(nbMFCCs)
        if not os.path.exists(data_store_dir): os.makedirs(data_store_dir)
        predictions_path = store_dir + os.sep + dataName.replace(
            '/', '_') + "_predictions.pkl"

        # log file
        logFile = store_dir + os.sep + "Evaluation" + dataName.replace(
            '/', '_') + '.log'
        if os.path.exists(logFile) and not force_overwrite:
            from general_tools import query_yes_no
            if query_yes_no(
                    "Log file already exists at %s\n Do you want to evaluate again and overwrite?",
                    "y"):
                pass
            else:
                logger_evaluate.info(
                    "Log file already exists, not re-evaluating.... ")
                return 0
        fh = logging.FileHandler(logFile, 'w')  # create new logFile
        fh.setLevel(logging.INFO)
        fh.setFormatter(formatter)
        logger_evaluate.addHandler(fh)
        logger_evaluate.info("\n  MODEL:    %s", model_load)
        logger_evaluate.info("\n  WAV_DIR:  %s", wavDir)
        logger_evaluate.info("\n  PREDICTS: %s", predictions_path)
        logger_evaluate.info("\n  LOG:      %s", logFile)
        logger_evaluate.info("\n")

        # GATHERING DATA
        logger_evaluate.info("* Gathering Data ...")
        if os.path.exists(data_store_path + ".pkl"):
            [inputs, targets,
             valid_frames] = unpickle(data_store_path + ".pkl")
            calculateAccuracy = True
            logger_evaluate.info(
                "Successfully loaded preprocessed data, with targets")

        elif os.path.exists(
                data_store_path + "_noTargets.pkl"
        ):  # TODO: make it work for unlabeled datasets. see RNN_tools_lstm.py, eg iterate_minibatch_noTargets.
            [inputs] = unpickle(data_store_path + "_noTargets.pkl")
            calculateAccuracy = False  # we can't as we don't know the correct labels
            logger_evaluate.info(
                "Successfully loaded preprocessed data, no targets")

        else:
            logger_evaluate.info("Data not found, preprocessing...")

            # From WAVS, generate X, y and valid_frames; also store under data_store_dir
            def preprocessLabeledWavs(wavDir, store_dir, name):
                # fixWavs -> suppose this is done
                # convert to pkl
                X, y, valid_frames = preprocessWavs.preprocess_dataset(
                    source_path=wavDir,
                    nbMFCCs=nbMFCCs,
                    logger=logger_evaluate)

                X_data_type = 'float32'
                X = preprocessWavs.set_type(X, X_data_type)
                y_data_type = 'int32'
                y = preprocessWavs.set_type(y, y_data_type)
                valid_frames_data_type = 'int32'
                valid_frames = preprocessWavs.set_type(valid_frames,
                                                       valid_frames_data_type)

                return X, y, valid_frames

            def preprocessUnlabeledWavs(wavDir, store_dir, name):  #TODO
                # fixWavs -> suppose this is done
                # convert to pkl
                X = preprocessWavs.preprocess_unlabeled_dataset(
                    source_path=wavDir,
                    nbMFCCs=nbMFCCs,
                    logger=logger_evaluate)

                X_data_type = 'float32'
                X = preprocessWavs.set_type(X, X_data_type)

                return X

            # load wavs and labels
            wav_files = transform.loadWavs(wavDir)
            wav_filenames = [
                str(
                    os.path.basename(
                        os.path.dirname(
                            os.path.dirname(os.path.dirname(wav_file)))) +
                    os.sep + os.path.basename(
                        os.path.dirname(os.path.dirname(wav_file))) + os.sep +
                    os.path.basename(os.path.dirname(wav_file)) + os.sep +
                    os.path.basename(wav_file)) for wav_file in wav_files
            ]
            logger_evaluate.info("Found %s files to evaluate \n Example: %s",
                                 len(wav_filenames), wav_filenames[0])
            label_files = transform.loadPhns(wavDir)

            # if source dir doesn't contain labels, we can't calculate accuracy
            calculateAccuracy = True
            if not (len(wav_files) == len(label_files)):
                calculateAccuracy = False
                inputs = preprocessUnlabeledWavs(wavDir=wavDir,
                                                 store_dir=store_dir,
                                                 name=dataName)
            else:
                inputs, targets, valid_frames = preprocessLabeledWavs(
                    wavDir=wavDir, store_dir=store_dir, name=dataName)

            # normalize inputs using dataset Mean and Std_dev;  convert to float32 for GPU evaluation
            with open(meanStd_path, 'rb') as cPickle_file:
                [mean_val, std_val] = cPickle.load(cPickle_file)
            inputs = preprocessWavs.normalize(inputs, mean_val, std_val)

            # just to be sure
            X_data_type = 'float32'
            inputs = preprocessWavs.set_type(inputs, X_data_type)

            # Print some information
            logger_evaluate.debug("* Data information")
            logger_evaluate.debug('  inputs')
            logger_evaluate.debug('%s %s', type(inputs), len(inputs))
            logger_evaluate.debug('%s %s', type(inputs[0]), inputs[0].shape)
            logger_evaluate.debug('%s %s', type(inputs[0][0]),
                                  inputs[0][0].shape)
            logger_evaluate.debug('%s', type(inputs[0][0][0]))
            logger_evaluate.debug('y train')
            logger_evaluate.debug('  %s %s', type(targets), len(targets))
            logger_evaluate.debug('  %s %s', type(targets[0]),
                                  targets[0].shape)
            logger_evaluate.debug('  %s %s', type(targets[0][0]),
                                  targets[0][0].shape)

            # slice to have a number of inputs that is a multiple of batch size
            logger_evaluate.info(
                "Not evaluating %s last files (batch size mismatch)",
                len(inputs) % batch_size)
            inputs = inputs[:-(len(inputs) % batch_size) or None]
            if calculateAccuracy:
                targets = targets[:-(len(targets) % batch_size) or None]
                valid_frames = valid_frames[:-(len(valid_frames) %
                                               batch_size) or None]

            # pad the inputs to process batches easily
            inputs = pad_sequences_X(inputs)
            if calculateAccuracy: targets = pad_sequences_y(targets)

            # save the preprocessed data
            logger_evaluate.info("storing preprocessed data to: %s",
                                 data_store_path)
            if calculateAccuracy:
                general_tools.saveToPkl(data_store_path + '.pkl',
                                        [inputs, targets, valid_frames])
            else:
                general_tools.saveToPkl(data_store_path + '_noTargets.pkl',
                                        [inputs])

        # Gather filenames; for debugging
        wav_files = transform.loadWavs(wavDir)
        wav_filenames = [
            str(
                os.path.basename(
                    os.path.dirname(os.path.dirname(os.path.dirname(
                        wav_file)))) + os.sep +
                os.path.basename(os.path.dirname(os.path.dirname(wav_file))) +
                os.sep + os.path.basename(os.path.dirname(wav_file)) + os.sep +
                os.path.basename(wav_file)) for wav_file in wav_files
        ]
        logger_evaluate.debug(" # inputs: %s, # wav files: %s", len(inputs),
                              len(wav_files))

        # make copy of data because we might need to use is again for calculating accurasy, and the iterator will remove elements from the array
        inputs_bak = copy.deepcopy(inputs)
        if calculateAccuracy:
            targets_bak = copy.deepcopy(targets)
            valid_frames_bak = copy.deepcopy(valid_frames)

        logger_evaluate.info("* Evaluating: pass over Evaluation Set")

        if calculateAccuracy:  # if .phn files are provided, we can check our predictions
            logger_evaluate.info(
                "Getting predictions and calculating accuracy...")
            avg_error, avg_acc, predictions = self.RNN_network.run_epoch(X=inputs, y=targets, valid_frames=valid_frames, \
                                                                         get_predictions=True, batch_size=batch_size)

            logger_evaluate.info("All batches, avg Accuracy: %s", avg_acc)
            inputs = inputs_bak
            targets = targets_bak
            valid_frames = valid_frames_bak

            #uncomment if you want to save everything in one place (takes quite a lot of storage space)
            #general_tools.saveToPkl(predictions_path, [inputs, predictions, targets, valid_frames, avg_Acc])

        else:
            # TODO fix this
            for inputs, masks, seq_lengths in tqdm(
                    iterate_minibatches_noTargets(inputs,
                                                  batch_size=batch_size,
                                                  shuffle=False),
                    total=len(inputs)):
                # get predictions
                nb_inputs = len(
                    inputs)  # usually batch size, but could be lower
                seq_len = len(inputs[0])
                prediction = self.RNN_network.predictions_fn(inputs, masks)
                prediction = np.reshape(prediction, (nb_inputs, -1))
                prediction = list(prediction)
                predictions = predictions + prediction

            inputs = inputs_bak
            #general_tools.saveToPkl(predictions_path, [inputs, predictions])

        # Print information about the predictions
        logger_evaluate.info("* Done")
        end_evaluation_time = time.time()
        eval_duration = end_evaluation_time - program_start_time
        logger_evaluate.info('Total time: {:.3f}'.format(eval_duration))
        # Print the results
        try:
            printEvaluation(wav_filenames,
                            inputs,
                            predictions,
                            targets,
                            valid_frames,
                            avg_acc,
                            range(len(inputs)),
                            logger=logger_evaluate,
                            only_final_accuracy=True)
        except:
            pdb.set_trace()
        logger_evaluate.info(
            'Evaluation duration: {:.3f}'.format(eval_duration))
        logger_evaluate.info(
            'Printing duration: {:.3f}'.format(time.time() -
                                               end_evaluation_time))

        # close the log handler
        fh.close()
        logger_evaluate.removeHandler(fh)
コード例 #5
0
X_val = preprocessWavs.normalize(X_val, mean_val, std_val)
X_test = preprocessWavs.normalize(X_test, mean_val, std_val)

logger.debug('X train')
logger.debug('  %s %s', type(X_train), len(X_train))
logger.debug('  %s %s', type(X_train[0]), X_train[0].shape)
logger.debug('  %s %s', type(X_train[0][0]), X_train[0][0].shape)
logger.debug('  %s %s', type(X_train[0][0][0]), X_train[0][0].shape)
logger.debug('y train')
logger.debug('  %s %s', type(y_train), len(y_train))
logger.debug('  %s %s', type(y_train[0]), y_train[0].shape)
logger.debug('  %s %s', type(y_train[0][0]), y_train[0][0].shape)

# make sure we're working with float32
X_data_type = 'float32'
X_train = preprocessWavs.set_type(X_train, X_data_type)
X_val = preprocessWavs.set_type(X_val, X_data_type)
X_test = preprocessWavs.set_type(X_test, X_data_type)

y_data_type = 'int32'
y_train = preprocessWavs.set_type(y_train, y_data_type)
y_val = preprocessWavs.set_type(y_val, y_data_type)
y_test = preprocessWavs.set_type(y_test, y_data_type)

valid_frames_data_type = 'int32'
valid_frames_train = preprocessWavs.set_type(valid_frames_train,
                                             valid_frames_data_type)
valid_frames_val = preprocessWavs.set_type(valid_frames_val,
                                           valid_frames_data_type)
valid_frames_test = preprocessWavs.set_type(valid_frames_test,
                                            valid_frames_data_type)