Example #1
0
def create_data_stream(args):
    print(args)
    sw = StopWatch()

    if not args.no_copy:
        with sw:
            print('Copying data to local machine...')
            rsync = Rsync(args.tmpdir)
            rsync.sync(args.data_path)

        args.data_path = os.path.join(args.tmpdir,
                                      os.path.basename(args.data_path))
    return fuel_utils.get_datastream(path=args.data_path,
                                     which_set=args.dataset,
                                     batch_size=args.batch_size)
Example #2
0
    sw = StopWatch()

    if not args.no_copy:
        print('Loading data streams from {}'.format(args.data_path))
        print('Copying data to local machine...')
        rsync = Rsync(args.tmpdir)
        rsync.sync(args.data_path)
        args.data_path = os.path.join(args.tmpdir,
                                      os.path.basename(args.data_path))
        sw.print_elapsed()

    ####################
    # load data stream #
    ####################
    train_datastream = get_datastream(path=args.data_path,
                                      which_set=args.train_dataset,
                                      batch_size=args.batch_size)
    valid_datastream = get_datastream(path=args.data_path,
                                      which_set=args.valid_dataset,
                                      batch_size=args.batch_size)
    test_datastream = get_datastream(path=args.data_path,
                                     which_set=args.test_dataset,
                                     batch_size=args.batch_size)

    #################
    # build network #
    #################
    print('Building and compiling network')
    input_data = T.ftensor3('input_data')
    input_mask = T.fmatrix('input_mask')
    target_data = T.imatrix('target_data')
Example #3
0
    sw = StopWatch()

    if not args.no_copy:
        print('Loading data streams from {}'.format(args.data_path))
        print('Copying data to local machine...')
        rsync = Rsync(args.tmpdir)
        rsync.sync(args.data_path)
        args.data_path = os.path.join(args.tmpdir,
                                      os.path.basename(args.data_path))
        sw.print_elapsed()

    ####################
    # load data stream #
    ####################
    train_datastream = get_datastream(path=args.data_path,
                                      which_set=args.train_dataset,
                                      batch_size=args.batch_size,
                                      use_ivectors=True)
    valid_datastream = get_datastream(path=args.data_path,
                                      which_set=args.valid_dataset,
                                      batch_size=args.batch_size,
                                      use_ivectors=True)
    test_datastream = get_datastream(path=args.data_path,
                                     which_set=args.test_dataset,
                                     batch_size=args.batch_size,
                                     use_ivectors=True)

    #################
    # build network #
    #################
    print('Building and compiling network')
    input_data = T.ftensor3('input_data')
Example #4
0
    sw = StopWatch()

    if not args.no_copy:
        print('Loading data streams from {}'.format(args.data_path))
        print('Copying data to local machine...')
        rsync = Rsync(args.tmpdir)
        rsync.sync(args.data_path)
        args.data_path = os.path.join(args.tmpdir,
                                      os.path.basename(args.data_path))
        sw.print_elapsed()

    ####################
    # load data stream #
    ####################
    data_stream = get_datastream(path=args.data_path,
                                 which_set=args.test_dataset,
                                 batch_size=args.batch_size,
                                 shuffled=False)
    id_stream = get_spkid_stream(path=args.data_path,
                                 which_set=args.test_dataset,
                                 batch_size=args.batch_size)

    spk_list = get_spk_list(id_stream)
    num_speakers = len(spk_list)
    print('List of speakers: {}'.format(spk_list))

    #################
    # build network #
    #################
    print('Building and compiling network')
    input_data = T.ftensor3('input_data')
    input_mask = T.fmatrix('input_mask')
Example #5
0
    print('Loading Parameters...', file=sys.stderr)
    if args.model:
        with open(args.model, 'rb') as f:
            [
                pretrain_network_params_val, pretrain_update_params_val,
                pretrain_total_epoch_cnt
            ] = pickle.load(f)
        set_model_param_value(network_params, pretrain_network_params_val)
    else:
        print('Must specfiy network to load', file=sys.stderr)
        sys.exit(1)

    ff_fn = ff(network, input_data, input_mask)
    test_datastream = get_datastream(path=args.data_path,
                                     which_set=args.dataset,
                                     batch_size=args.batch_size)
    uttid_datastream = get_uttid_stream(path=args.data_path,
                                        which_set=args.dataset,
                                        batch_size=args.batch_size)

    writer = kaldi_io.BaseFloatMatrixWriter(args.wxfilename)

    for batch_idx, (feat_batch, uttid_batch) in enumerate(
            zip(test_datastream.get_epoch_iterator(),
                uttid_datastream.get_epoch_iterator())):
        input_data = feat_batch[0].astype(floatX)
        input_mask = feat_batch[1].astype(floatX)

        target_data = feat_batch[2]
        target_mask = feat_batch[3].astype(floatX)
Example #6
0
    args.save_path = '{}_lhuc'.format(args.save_path)

    if args.use_ivectors:
        args.input_dim = args.input_dim + args.ivector_dim

    print(args)

    print('Load data stream {} from {}'.format(args.valid_dataset,
                                               args.data_path))
    if args.norm_path:
        print('Use normalization data from {}'.format(args.norm_path))

    valid_ds = fuel_utils.get_datastream(
        path=args.data_path,
        which_set=args.valid_dataset,
        batch_size=args.batch_size,
        norm_path=args.norm_path,
        use_ivectors=args.use_ivectors,
        truncate_ivectors=args.truncate_ivectors,
        ivector_dim=args.ivector_dim)
    valid_spkid_ds = fuel_utils.get_spkid_stream(path=args.data_path,
                                                 which_set=args.valid_dataset,
                                                 batch_size=args.batch_size)

    spk_list = fuel_utils.get_spk_list(valid_spkid_ds)
    num_speakers = len(spk_list)
    print('List of speakers: {}'.format(spk_list))

    print('Build and compile network')
    input_data = T.ftensor3('input_data')
    input_mask = T.fmatrix('input_mask')
    target_data = T.imatrix('target_data')