def create_data_stream(args): print(args) sw = StopWatch() if not args.no_copy: with sw: print('Copying data to local machine...') rsync = Rsync(args.tmpdir) rsync.sync(args.data_path) args.data_path = os.path.join(args.tmpdir, os.path.basename(args.data_path)) return fuel_utils.get_datastream(path=args.data_path, which_set=args.dataset, batch_size=args.batch_size)
sw = StopWatch() if not args.no_copy: print('Loading data streams from {}'.format(args.data_path)) print('Copying data to local machine...') rsync = Rsync(args.tmpdir) rsync.sync(args.data_path) args.data_path = os.path.join(args.tmpdir, os.path.basename(args.data_path)) sw.print_elapsed() #################### # load data stream # #################### train_datastream = get_datastream(path=args.data_path, which_set=args.train_dataset, batch_size=args.batch_size) valid_datastream = get_datastream(path=args.data_path, which_set=args.valid_dataset, batch_size=args.batch_size) test_datastream = get_datastream(path=args.data_path, which_set=args.test_dataset, batch_size=args.batch_size) ################# # build network # ################# print('Building and compiling network') input_data = T.ftensor3('input_data') input_mask = T.fmatrix('input_mask') target_data = T.imatrix('target_data')
sw = StopWatch() if not args.no_copy: print('Loading data streams from {}'.format(args.data_path)) print('Copying data to local machine...') rsync = Rsync(args.tmpdir) rsync.sync(args.data_path) args.data_path = os.path.join(args.tmpdir, os.path.basename(args.data_path)) sw.print_elapsed() #################### # load data stream # #################### train_datastream = get_datastream(path=args.data_path, which_set=args.train_dataset, batch_size=args.batch_size, use_ivectors=True) valid_datastream = get_datastream(path=args.data_path, which_set=args.valid_dataset, batch_size=args.batch_size, use_ivectors=True) test_datastream = get_datastream(path=args.data_path, which_set=args.test_dataset, batch_size=args.batch_size, use_ivectors=True) ################# # build network # ################# print('Building and compiling network') input_data = T.ftensor3('input_data')
sw = StopWatch() if not args.no_copy: print('Loading data streams from {}'.format(args.data_path)) print('Copying data to local machine...') rsync = Rsync(args.tmpdir) rsync.sync(args.data_path) args.data_path = os.path.join(args.tmpdir, os.path.basename(args.data_path)) sw.print_elapsed() #################### # load data stream # #################### data_stream = get_datastream(path=args.data_path, which_set=args.test_dataset, batch_size=args.batch_size, shuffled=False) id_stream = get_spkid_stream(path=args.data_path, which_set=args.test_dataset, batch_size=args.batch_size) spk_list = get_spk_list(id_stream) num_speakers = len(spk_list) print('List of speakers: {}'.format(spk_list)) ################# # build network # ################# print('Building and compiling network') input_data = T.ftensor3('input_data') input_mask = T.fmatrix('input_mask')
print('Loading Parameters...', file=sys.stderr) if args.model: with open(args.model, 'rb') as f: [ pretrain_network_params_val, pretrain_update_params_val, pretrain_total_epoch_cnt ] = pickle.load(f) set_model_param_value(network_params, pretrain_network_params_val) else: print('Must specfiy network to load', file=sys.stderr) sys.exit(1) ff_fn = ff(network, input_data, input_mask) test_datastream = get_datastream(path=args.data_path, which_set=args.dataset, batch_size=args.batch_size) uttid_datastream = get_uttid_stream(path=args.data_path, which_set=args.dataset, batch_size=args.batch_size) writer = kaldi_io.BaseFloatMatrixWriter(args.wxfilename) for batch_idx, (feat_batch, uttid_batch) in enumerate( zip(test_datastream.get_epoch_iterator(), uttid_datastream.get_epoch_iterator())): input_data = feat_batch[0].astype(floatX) input_mask = feat_batch[1].astype(floatX) target_data = feat_batch[2] target_mask = feat_batch[3].astype(floatX)
args.save_path = '{}_lhuc'.format(args.save_path) if args.use_ivectors: args.input_dim = args.input_dim + args.ivector_dim print(args) print('Load data stream {} from {}'.format(args.valid_dataset, args.data_path)) if args.norm_path: print('Use normalization data from {}'.format(args.norm_path)) valid_ds = fuel_utils.get_datastream( path=args.data_path, which_set=args.valid_dataset, batch_size=args.batch_size, norm_path=args.norm_path, use_ivectors=args.use_ivectors, truncate_ivectors=args.truncate_ivectors, ivector_dim=args.ivector_dim) valid_spkid_ds = fuel_utils.get_spkid_stream(path=args.data_path, which_set=args.valid_dataset, batch_size=args.batch_size) spk_list = fuel_utils.get_spk_list(valid_spkid_ds) num_speakers = len(spk_list) print('List of speakers: {}'.format(spk_list)) print('Build and compile network') input_data = T.ftensor3('input_data') input_mask = T.fmatrix('input_mask') target_data = T.imatrix('target_data')