Beispiel #1
0
def create_tfrecords(params, data_dir, debug=False):
    """Create a dataset of TFRecords for VAD.

    Args:
        params (dict): dataset parameters
        data_dir (str): path to data directory
        debug (bool, optional): debug with a small amount of data. Defaults to False.
    """
    tf.logging.set_verbosity(tf.logging.INFO)
    np.random.seed(0)

    output_path = os.path.join(data_dir, "tfrecords/")
    if not tf.gfile.IsDirectory(output_path):
        tf.gfile.MakeDirs(output_path)

    input_size = params["input_size"]
    data_split = params["data_split"]
    num_shards = params["num_shards"]
    data_type = params["data_type"]

    # Data & label directories
    label_dir = os.path.join(data_dir, "labels/")
    data_dir = os.path.join(data_dir, "test-clean/")

    # Split data on files
    train, val, test = split_data(label_dir, data_split, random_seed=0)

    tot_files = len(train) + len(val) + len(test)
    logger.info(f"Total files: {tot_files}")
    logger.info(f"Train/val/test split: {len(train)}/{len(val)}/{len(test)}")
    train_it = slice_iter(data_dir, label_dir, train, input_size)
    val_it = slice_iter(data_dir, label_dir, val, input_size)
    test_it = slice_iter(data_dir, label_dir, test, input_size)

    # Write data to tfrecords format
    nmax = 100 if debug else -1
    if "train" in data_type:
        logger.info("Writing train tfrecords ...")
        train_path = os.path.join(output_path, "train/")
        if not tf.gfile.IsDirectory(train_path):
            tf.gfile.MakeDirs(train_path)
        write_tfrecords(train_path, train_it, num_shards, nmax=nmax)

    if "val" in data_type:
        logger.info("Writing val tfrecords ...")
        val_path = os.path.join(output_path, "val/")
        if not tf.gfile.IsDirectory(val_path):
            tf.gfile.MakeDirs(val_path)
        write_tfrecords(val_path, val_it, num_shards, nmax=nmax)

    if "test" in data_type:
        logger.info("Writing test tfrecords ...")
        test_path = os.path.join(output_path, "test/")
        if not tf.gfile.IsDirectory(test_path):
            tf.gfile.MakeDirs(test_path)
        write_tfrecords(test_path, test_it, num_shards, nmax=nmax)
Beispiel #2
0
def create_tfrecords(
    data_dir,
    seq_len=1024,
    split="0.7/0.15",
    num_shards=256,
    debug=False,
    data_type="trainval",
):
    np.random.seed(0)

    output_path = os.path.join(data_dir, "tfrecords/")
    if not tf.gfile.IsDirectory(output_path):
        tf.gfile.MakeDirs(output_path)

    # Data & label directories
    label_dir = os.path.join(data_dir, "labels/")
    data_dir = os.path.join(data_dir, "test-clean/")

    # Split data on files
    train, val, test = split_data(label_dir, split, random_seed=0)

    tot_files = len(train) + len(val) + len(test)
    logger.info(f"Total files: {tot_files}")
    logger.info(f"Train/val/test split: {len(train)}/{len(val)}/{len(test)}")
    train_it = slice_iter(data_dir, label_dir, train, seq_len)
    val_it = slice_iter(data_dir, label_dir, val, seq_len)
    test_it = slice_iter(data_dir, label_dir, test, seq_len)

    # Write data to tfrecords format
    nmax = 100 if debug else -1
    if "train" in data_type:
        logger.info("Writing train tfrecords ...")
        train_path = os.path.join(output_path, "train/")
        if not tf.gfile.IsDirectory(train_path):
            tf.gfile.MakeDirs(train_path)
        write_tfrecords(train_path, train_it, num_shards, nmax=nmax)

    if "val" in data_type:
        logger.info("Writing val tfrecords ...")
        val_path = os.path.join(output_path, "val/")
        if not tf.gfile.IsDirectory(val_path):
            tf.gfile.MakeDirs(val_path)
        write_tfrecords(val_path, val_it, num_shards, nmax=nmax)

    if "test" in data_type:
        logger.info("Writing test tfrecords ...")
        test_path = os.path.join(output_path, "test/")
        if not tf.gfile.IsDirectory(test_path):
            tf.gfile.MakeDirs(test_path)
        write_tfrecords(test_path, test_it, num_shards, nmax=nmax)
Beispiel #3
0
def create_tfrecords(data_dir,
                     seq_len=1024,
                     split='0.7/0.15',
                     num_shards=256,
                     debug=False,
                     data_type='trainval'):
    np.random.seed(0)

    output_path = os.path.join(data_dir, 'tfrecords/')
    if not tf.gfile.IsDirectory(output_path):
        tf.gfile.MakeDirs(output_path)

    # Data & label directories
    label_dir = os.path.join(data_dir, 'labels/')
    data_dir = os.path.join(data_dir, 'test-clean/')

    # Split data on files
    train, val, test = split_data(label_dir, split, random_seed=0)

    print('\nTotal files: {}'.format(len(train) + len(val) + len(test)))
    print('Train/val/test split: {}/{}/{}'.format(len(train), len(val),
                                                  len(test)))
    train_it = slice_iter(data_dir, label_dir, train, seq_len)
    val_it = slice_iter(data_dir, label_dir, val, seq_len)
    test_it = slice_iter(data_dir, label_dir, test, seq_len)

    # Write data to tfrecords format
    nmax = 100 if debug else -1
    if 'train' in data_type:
        print('\nWriting train tfrecords ...')
        train_path = os.path.join(output_path, 'train/')
        if not tf.gfile.IsDirectory(train_path):
            tf.gfile.MakeDirs(train_path)
        write_tfrecords(train_path, train_it, num_shards, nmax=nmax)

    if 'val' in data_type:
        print('\nWriting val tfrecords ...')
        val_path = os.path.join(output_path, 'val/')
        if not tf.gfile.IsDirectory(val_path):
            tf.gfile.MakeDirs(val_path)
        write_tfrecords(val_path, val_it, num_shards, nmax=nmax)

    if 'test' in data_type:
        print('\nWriting test tfrecords ...')
        test_path = os.path.join(output_path, 'test/')
        if not tf.gfile.IsDirectory(test_path):
            tf.gfile.MakeDirs(test_path)
        write_tfrecords(test_path, test_it, num_shards, nmax=nmax)
Beispiel #4
0
def main(_):
    np.random.seed(0)

    # Directories
    data_dir = os.path.join(FLAGS.data_dir, 'test-clean/')
    label_dir = os.path.join(FLAGS.data_dir, 'labels/')

    _, _, test = split_data(label_dir, split='0.7/0.15', random_seed=0)
    file_it = file_iter(data_dir, label_dir, files=test)

    # TensorFlow inputs
    features_input_ph = tf.placeholder(shape=FEAT_SIZE, dtype=tf.float32)
    features_input_op = tf.transpose(features_input_ph, perm=[1, 0])
    features_input_op = tf.expand_dims(features_input_op, axis=0)

    # TensorFlow exported model
    speech_predictor = tf.contrib.predictor.from_saved_model(export_dir=FLAGS.exported_model)
    init = tf.initializers.global_variables()
    classes = ['Noise', 'Speech']

    # Iterate though test data
    with tf.Session() as sess:
        for signal, labels, fn in file_it:
            sess.run(init)
            print('\nPrediction on file {} ...'.format(fn))
            signal_input = deque(signal[:FLAGS.seq_len].tolist(), maxlen=FLAGS.seq_len)

            preds, pred_time = [], []
            pointer = FLAGS.seq_len
            while pointer < len(signal):
                start = time()
                # Preprocess signal & extract features
                signal_to_process = np.copy(signal_input)
                signal_to_process = np.float32(signal_to_process)
                features = extract_features(signal_to_process, freq=16000, n_mfcc=5, size=512, step=16)

                # Prediction
                features_input = sess.run(features_input_op, feed_dict={features_input_ph: features})
                speech_prob = speech_predictor({'features_input': features_input})['speech'][0]
                speech_pred = classes[int(np.round(speech_prob))]

                # Time prediction & processing
                end = time()
                dt = end - start
                pred_time.append(dt)
                print('Prediction = {} | proba = {:.2f} | time = {:.2f} s'.format(speech_pred, speech_prob[0], dt))

                # For visualization
                preds.append([pointer - FLAGS.seq_len, pointer, np.round(speech_prob)])

                # Update signal segment
                signal_input.extend(signal[pointer + FLAGS.stride:pointer + FLAGS.stride + FLAGS.seq_len])
                pointer += FLAGS.seq_len + FLAGS.stride

            print('Average prediction time = {:.2f} ms'.format(np.mean(pred_time) * 1e3))

            # Smoothing & hangover
            if FLAGS.smoothing:
                preds = smooth_predictions(preds)

            # Visualization
            visualize_predictions(signal, fn, preds)
def run_inference(params, data_dir, exported_model):
    """Run Voice Activity Detection CNN inference over raw audio signals.

    Args:
        params (dict): dictionary of inference parameters
        data_dir (str): path to raw dataset directory
        exported_model (str): path to exported pre-trained TF model directory
    """
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
    tf.logging.set_verbosity(tf.logging.INFO)
    np.random.seed(0)

    input_size = params["input_size"]
    stride = params["stride"]
    smoothing = params["smoothing"]

    # Directories
    test_data_dir = os.path.join(data_dir, "test-clean/")
    label_dir = os.path.join(data_dir, "labels/")

    _, _, test = split_data(label_dir, split="0.7/0.15", random_seed=0)
    file_it = file_iter(test_data_dir, label_dir, files=test)

    # TensorFlow inputs
    features_input_ph = tf.placeholder(shape=FEAT_SIZE, dtype=tf.float32)
    features_input_op = tf.transpose(features_input_ph, perm=[1, 0])
    features_input_op = tf.expand_dims(features_input_op, axis=0)

    # TensorFlow exported model
    speech_predictor = tf.contrib.predictor.from_saved_model(export_dir=exported_model)
    init = tf.initializers.global_variables()
    classes = ["Noise", "Speech"]

    # Iterate though test data
    with tf.Session() as sess:
        for signal, labels, fn in file_it:
            sess.run(init)
            logger.info(f"Prediction on file {fn} ...")
            signal_input = deque(signal[:input_size].tolist(), maxlen=input_size)

            preds, pred_time = [], []
            pointer = input_size
            while pointer < len(signal):
                start = time.time()
                # Preprocess signal & extract features
                signal_to_process = np.copy(signal_input)
                signal_to_process = np.float32(signal_to_process)
                features = extract_features(
                    signal_to_process, freq=16000, n_mfcc=5, size=512, step=16
                )

                # Prediction
                features_input = sess.run(
                    features_input_op, feed_dict={features_input_ph: features}
                )
                speech_prob = speech_predictor({"features_input": features_input})[
                    "speech"
                ][0]
                speech_pred = classes[int(np.round(speech_prob))]

                # Time prediction & processing
                end = time.time()
                dt = end - start
                pred_time.append(dt)
                logger.info(
                    f"Prediction = {speech_pred} | proba = {speech_prob[0]:.2f} | time = {dt:.2f} s"
                )

                # For visualization
                preds.append([pointer - input_size, pointer, np.round(speech_prob)])

                # Update signal segment
                signal_input.extend(
                    signal[pointer + stride : pointer + stride + input_size]
                )
                pointer += input_size + stride

            logger.info(f"Average prediction time = {np.mean(pred_time) * 1e3:.2f} ms")

            # Smoothing & hangover
            if smoothing:
                preds = smooth_predictions(preds)

            # Visualization
            visualize_predictions(signal, fn, preds)