def infer(args): """ Gets one batch of feature data and predicts labels for each sample. """ if not os.path.exists(args.infer_model_path): raise IOError("Invalid inference model path!") place = fluid.CUDAPlace(0) if args.device == 'GPU' else fluid.CPUPlace() exe = fluid.Executor(place) # load model [infer_program, feed_dict, fetch_targets] = fluid.io.load_inference_model(args.infer_model_path, exe) ltrans = [ trans_add_delta.TransAddDelta(2, 2), trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var), trans_splice.TransSplice() ] infer_data_reader = reader.AsyncDataReader(args.infer_feature_lst, args.infer_label_lst) infer_data_reader.set_transformers(ltrans) feature_t = fluid.LoDTensor() one_batch = infer_data_reader.batch_iterator(args.batch_size, 1).next() (features, labels, lod) = one_batch feature_t.set(features, place) feature_t.set_lod([lod]) results = exe.run(infer_program, feed={feed_dict[0]: feature_t}, fetch_list=fetch_targets, return_numpy=False) probs, lod = lodtensor_to_ndarray(results[0]) preds = probs.argmax(axis=1) infer_batch = split_infer_result(preds, lod) for index, sample in enumerate(infer_batch): print("result %d: " % index, sample, '\n')
def test_perfrom(self): feature = np.zeros((8, 10), dtype="float32") for i in xrange(feature.shape[0]): feature[i, :].fill(i) trans = trans_splice.TransSplice() (feature, label, name) = trans.perform_trans((feature, None, None)) self.assertEqual(feature.shape[1], 110) for i in xrange(8): nzero_num = 5 - i cur_val = 0.0 if nzero_num < 0: cur_val = i - 5 - 1 for j in xrange(11): if j <= nzero_num: for k in xrange(10): self.assertAlmostEqual(feature[i][j * 10 + k], cur_val) else: if cur_val < 7: cur_val += 1.0 for k in xrange(10): self.assertAlmostEqual(feature[i][j * 10 + k], cur_val)
def infer_from_ckpt(args): """Inference by using checkpoint.""" if not os.path.exists(args.checkpoint): raise IOError("Invalid checkpoint!") feature = fluid.data(name='feature', shape=[None, 3, 11, args.frame_dim], dtype='float32', lod_level=1) label = fluid.data(name='label', shape=[None, 1], dtype='int64', lod_level=1) prediction, avg_cost, accuracy = stacked_lstmp_model( feature=feature, label=label, hidden_dim=args.hidden_dim, proj_dim=args.proj_dim, stacked_num=args.stacked_num, class_num=args.class_num, parallel=args.parallel) infer_program = fluid.default_main_program().clone() # optimizer, placeholder optimizer = fluid.optimizer.Adam( learning_rate=fluid.layers.exponential_decay(learning_rate=0.0001, decay_steps=1879, decay_rate=1 / 1.2, staircase=True)) optimizer.minimize(avg_cost) place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # load checkpoint. fluid.io.load_persistables(exe, args.checkpoint) # init decoder decoder = Decoder(args.trans_model, args.vocabulary, args.graphs, args.log_prior, args.beam_size, args.acoustic_scale) ltrans = [ trans_add_delta.TransAddDelta(2, 2), trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var), trans_splice.TransSplice(5, 5), trans_delay.TransDelay(5) ] feature_t = fluid.LoDTensor() label_t = fluid.LoDTensor() # infer data reader infer_data_reader = reader.AsyncDataReader(args.infer_feature_lst, drop_frame_len=-1, split_sentence_threshold=-1) infer_data_reader.set_transformers(ltrans) decoding_result_writer = DecodingResultWriter(args.decode_to_path) post_matrix_writer = None if args.post_matrix_path is None \ else PostMatrixWriter(args.post_matrix_path) for batch_id, batch_data in enumerate( infer_data_reader.batch_iterator(args.batch_size, args.minimum_batch_size)): # load_data (features, labels, lod, name_lst) = batch_data features = np.reshape(features, (-1, 11, 3, args.frame_dim)) features = np.transpose(features, (0, 2, 1, 3)) feature_t.set(features, place) feature_t.set_lod([lod]) label_t.set(labels, place) label_t.set_lod([lod]) results = exe.run(infer_program, feed={ "feature": feature_t, "label": label_t }, fetch_list=[prediction, avg_cost, accuracy], return_numpy=False) probs, lod = lodtensor_to_ndarray(results[0]) infer_batch = split_infer_result(probs, lod) print("Decoding batch %d ..." % batch_id) decoded = decoder.decode_batch(name_lst, infer_batch, args.num_threads) decoding_result_writer.write(decoded) if args.post_matrix_path is not None: post_matrix_writer.write(name_lst, infer_batch)
def train(args): """train in loop. """ # paths check if args.init_model_path is not None and \ not os.path.exists(args.init_model_path): raise IOError("Invalid initial model path!") if args.checkpoints != '' and not os.path.exists(args.checkpoints): os.mkdir(args.checkpoints) if args.infer_models != '' and not os.path.exists(args.infer_models): os.mkdir(args.infer_models) train_program = fluid.Program() train_startup = fluid.Program() input_fields = { 'names': ['feature', 'label'], 'shapes': [[None, 3, 11, args.frame_dim], [None, 1]], 'dtypes': ['float32', 'int64'], 'lod_levels': [1, 1] } with fluid.program_guard(train_program, train_startup): with fluid.unique_name.guard(): inputs = [ fluid.data(name=input_fields['names'][i], shape=input_fields['shapes'][i], dtype=input_fields['dtypes'][i], lod_level=input_fields['lod_levels'][i]) for i in range(len(input_fields['names'])) ] train_reader = fluid.io.DataLoader.from_generator( feed_list=inputs, capacity=64, iterable=False, use_double_buffer=True) (feature, label) = inputs prediction, avg_cost, accuracy = stacked_lstmp_model( feature=feature, label=label, hidden_dim=args.hidden_dim, proj_dim=args.proj_dim, stacked_num=args.stacked_num, class_num=args.class_num) # optimizer = fluid.optimizer.Momentum(learning_rate=args.learning_rate, momentum=0.9) optimizer = fluid.optimizer.Adam( learning_rate=fluid.layers.exponential_decay( learning_rate=args.learning_rate, decay_steps=1879, decay_rate=1 / 1.2, staircase=True)) optimizer.minimize(avg_cost) test_program = fluid.Program() test_startup = fluid.Program() with fluid.program_guard(test_program, test_startup): with fluid.unique_name.guard(): inputs = [ fluid.data(name=input_fields['names'][i], shape=input_fields['shapes'][i], dtype=input_fields['dtypes'][i], lod_level=input_fields['lod_levels'][i]) for i in range(len(input_fields['names'])) ] test_reader = fluid.io.DataLoader.from_generator( feed_list=inputs, capacity=64, iterable=False, use_double_buffer=True) (feature, label) = inputs prediction, avg_cost, accuracy = stacked_lstmp_model( feature=feature, label=label, hidden_dim=args.hidden_dim, proj_dim=args.proj_dim, stacked_num=args.stacked_num, class_num=args.class_num) test_program = test_program.clone(for_test=True) place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(train_startup) exe.run(test_startup) if args.parallel: exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_iteration_per_drop_scope = 10 train_exe = fluid.ParallelExecutor(use_cuda=(args.device == 'GPU'), loss_name=avg_cost.name, exec_strategy=exec_strategy, main_program=train_program) test_exe = fluid.ParallelExecutor(use_cuda=(args.device == 'GPU'), main_program=test_program, exec_strategy=exec_strategy, share_vars_from=train_exe) # resume training if initial model provided. if args.init_model_path is not None: fluid.io.load_persistables(exe, args.init_model_path) ltrans = [ trans_add_delta.TransAddDelta(2, 2), trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var), trans_splice.TransSplice(5, 5), trans_delay.TransDelay(5) ] # bind train_reader train_data_reader = reader.AsyncDataReader(args.train_feature_lst, args.train_label_lst, -1, split_sentence_threshold=1024) train_data_reader.set_transformers(ltrans) def train_data_provider(): for data in train_data_reader.batch_iterator(args.batch_size, args.minimum_batch_size): yield batch_data_to_lod_tensors(args, data, fluid.CPUPlace()) train_reader.set_batch_generator(train_data_provider) if (os.path.exists(args.val_feature_lst) and os.path.exists(args.val_label_lst)): # test data reader test_data_reader = reader.AsyncDataReader( args.val_feature_lst, args.val_label_lst, -1, split_sentence_threshold=1024) test_data_reader.set_transformers(ltrans) def test_data_provider(): for data in test_data_reader.batch_iterator( args.batch_size, args.minimum_batch_size): yield batch_data_to_lod_tensors(args, data, fluid.CPUPlace()) test_reader.set_batch_generator(test_data_provider) # validation def test(exe): # If test data not found, return invalid cost and accuracy if not (os.path.exists(args.val_feature_lst) and os.path.exists(args.val_label_lst)): return -1.0, -1.0 batch_id = 0 test_costs = [] test_accs = [] while True: if batch_id == 0: test_reader.start() try: if args.parallel: cost, acc = exe.run( fetch_list=[avg_cost.name, accuracy.name], return_numpy=False) else: cost, acc = exe.run(program=test_program, fetch_list=[avg_cost, accuracy], return_numpy=False) sys.stdout.write('.') sys.stdout.flush() test_costs.append(np.array(cost)[0]) test_accs.append(np.array(acc)[0]) batch_id += 1 except fluid.core.EOFException: test_reader.reset() break return np.mean(test_costs), np.mean(test_accs) # train for pass_id in xrange(args.pass_num): pass_start_time = time.time() batch_id = 0 while True: if batch_id == 0: train_reader.start() to_print = batch_id > 0 and (batch_id % args.print_per_batches == 0) try: if args.parallel: outs = train_exe.run( fetch_list=[avg_cost.name, accuracy.name] if to_print else [], return_numpy=False) else: outs = exe.run( program=train_program, fetch_list=[avg_cost, accuracy] if to_print else [], return_numpy=False) except fluid.core.EOFException: train_reader.reset() break if to_print: if args.parallel: print("\nBatch %d, train cost: %f, train acc: %f" % (batch_id, np.mean(outs[0]), np.mean(outs[1]))) else: print( "\nBatch %d, train cost: %f, train acc: %f" % (batch_id, np.array(outs[0])[0], np.array(outs[1])[0])) # save the latest checkpoint if args.checkpoints != '': model_path = os.path.join(args.checkpoints, "deep_asr.latest.checkpoint") fluid.io.save_persistables(exe, model_path, train_program) else: sys.stdout.write('.') sys.stdout.flush() batch_id += 1 # run test val_cost, val_acc = test(test_exe if args.parallel else exe) # save checkpoint per pass if args.checkpoints != '': model_path = os.path.join( args.checkpoints, "deep_asr.pass_" + str(pass_id) + ".checkpoint") fluid.io.save_persistables(exe, model_path, train_program) # save inference model if args.infer_models != '': model_path = os.path.join( args.infer_models, "deep_asr.pass_" + str(pass_id) + ".infer.model") fluid.io.save_inference_model(model_path, ["feature"], [prediction], exe, train_program) # cal pass time pass_end_time = time.time() time_consumed = pass_end_time - pass_start_time # print info at pass end print("\nPass %d, time consumed: %f s, val cost: %f, val acc: %f\n" % (pass_id, time_consumed, val_cost, val_acc))
def profile(args): """profile the training process. """ if not args.first_batches_to_skip < args.max_batch_num: raise ValueError("arg 'first_batches_to_skip' must be smaller than " "'max_batch_num'.") if not args.first_batches_to_skip >= 0: raise ValueError( "arg 'first_batches_to_skip' must not be smaller than 0.") _, avg_cost, accuracy = stacked_lstmp_model(frame_dim=args.frame_dim, hidden_dim=args.hidden_dim, proj_dim=args.proj_dim, stacked_num=args.stacked_num, class_num=args.class_num, parallel=args.parallel) optimizer = fluid.optimizer.Adam( learning_rate=fluid.layers.exponential_decay( learning_rate=args.learning_rate, decay_steps=1879, decay_rate=1 / 1.2, staircase=True)) optimizer.minimize(avg_cost) place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) ltrans = [ trans_add_delta.TransAddDelta(2, 2), trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var), trans_splice.TransSplice(5, 5), trans_delay.TransDelay(5) ] data_reader = reader.AsyncDataReader(args.feature_lst, args.label_lst, -1, split_sentence_threshold=1024) data_reader.set_transformers(ltrans) feature_t = fluid.LoDTensor() label_t = fluid.LoDTensor() sorted_key = None if args.sorted_key is 'None' else args.sorted_key with profiler.profiler(args.device, sorted_key) as prof: frames_seen, start_time = 0, 0.0 for batch_id, batch_data in enumerate( data_reader.batch_iterator(args.batch_size, args.minimum_batch_size)): if batch_id >= args.max_batch_num: break if args.first_batches_to_skip == batch_id: profiler.reset_profiler() start_time = time.time() frames_seen = 0 # load_data (features, labels, lod, _) = batch_data features = np.reshape(features, (-1, 11, 3, args.frame_dim)) features = np.transpose(features, (0, 2, 1, 3)) feature_t.set(features, place) feature_t.set_lod([lod]) label_t.set(labels, place) label_t.set_lod([lod]) frames_seen += lod[-1] outs = exe.run(fluid.default_main_program(), feed={ "feature": feature_t, "label": label_t }, fetch_list=[avg_cost, accuracy] if args.print_train_acc else [], return_numpy=False) if args.print_train_acc: print("Batch %d acc: %f" % (batch_id, lodtensor_to_ndarray(outs[1])[0])) else: sys.stdout.write('.') sys.stdout.flush() time_consumed = time.time() - start_time frames_per_sec = frames_seen / time_consumed print("\nTime consumed: %f s, performance: %f frames/s." % (time_consumed, frames_per_sec))
def train(args): """train in loop. """ # paths check if args.init_model_path is not None and \ not os.path.exists(args.init_model_path): raise IOError("Invalid initial model path!") if args.checkpoints != '' and not os.path.exists(args.checkpoints): os.mkdir(args.checkpoints) if args.infer_models != '' and not os.path.exists(args.infer_models): os.mkdir(args.infer_models) prediction, avg_cost, accuracy = stacked_lstmp_model( frame_dim=args.frame_dim, hidden_dim=args.hidden_dim, proj_dim=args.proj_dim, stacked_num=args.stacked_num, class_num=args.class_num, parallel=args.parallel) # program for test test_program = fluid.default_main_program().clone() #optimizer = fluid.optimizer.Momentum(learning_rate=args.learning_rate, momentum=0.9) optimizer = fluid.optimizer.Adam( learning_rate=fluid.layers.exponential_decay( learning_rate=args.learning_rate, decay_steps=1879, decay_rate=1 / 1.2, staircase=True)) optimizer.minimize(avg_cost) place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # resume training if initial model provided. if args.init_model_path is not None: fluid.io.load_persistables(exe, args.init_model_path) ltrans = [ trans_add_delta.TransAddDelta(2, 2), trans_mean_variance_norm.TransMeanVarianceNorm(args.mean_var), trans_splice.TransSplice(5, 5), trans_delay.TransDelay(5) ] feature_t = fluid.LoDTensor() label_t = fluid.LoDTensor() # validation def test(exe): # If test data not found, return invalid cost and accuracy if not (os.path.exists(args.val_feature_lst) and os.path.exists(args.val_label_lst)): return -1.0, -1.0 # test data reader test_data_reader = reader.AsyncDataReader( args.val_feature_lst, args.val_label_lst, -1, split_sentence_threshold=1024) test_data_reader.set_transformers(ltrans) test_costs, test_accs = [], [] for batch_id, batch_data in enumerate( test_data_reader.batch_iterator(args.batch_size, args.minimum_batch_size)): # load_data (features, labels, lod, _) = batch_data features = np.reshape(features, (-1, 11, 3, args.frame_dim)) features = np.transpose(features, (0, 2, 1, 3)) feature_t.set(features, place) feature_t.set_lod([lod]) label_t.set(labels, place) label_t.set_lod([lod]) cost, acc = exe.run(test_program, feed={ "feature": feature_t, "label": label_t }, fetch_list=[avg_cost, accuracy], return_numpy=False) test_costs.append(lodtensor_to_ndarray(cost)[0]) test_accs.append(lodtensor_to_ndarray(acc)[0]) return np.mean(test_costs), np.mean(test_accs) # train data reader train_data_reader = reader.AsyncDataReader(args.train_feature_lst, args.train_label_lst, -1, split_sentence_threshold=1024) train_data_reader.set_transformers(ltrans) # train for pass_id in xrange(args.pass_num): pass_start_time = time.time() for batch_id, batch_data in enumerate( train_data_reader.batch_iterator(args.batch_size, args.minimum_batch_size)): # load_data (features, labels, lod, name_lst) = batch_data features = np.reshape(features, (-1, 11, 3, args.frame_dim)) features = np.transpose(features, (0, 2, 1, 3)) feature_t.set(features, place) feature_t.set_lod([lod]) label_t.set(labels, place) label_t.set_lod([lod]) to_print = batch_id > 0 and (batch_id % args.print_per_batches == 0) outs = exe.run(fluid.default_main_program(), feed={ "feature": feature_t, "label": label_t }, fetch_list=[avg_cost, accuracy] if to_print else [], return_numpy=False) if to_print: print("\nBatch %d, train cost: %f, train acc: %f" % (batch_id, lodtensor_to_ndarray( outs[0])[0], lodtensor_to_ndarray(outs[1])[0])) # save the latest checkpoint if args.checkpoints != '': model_path = os.path.join(args.checkpoints, "deep_asr.latest.checkpoint") fluid.io.save_persistables(exe, model_path) else: sys.stdout.write('.') sys.stdout.flush() # run test val_cost, val_acc = test(exe) # save checkpoint per pass if args.checkpoints != '': model_path = os.path.join( args.checkpoints, "deep_asr.pass_" + str(pass_id) + ".checkpoint") fluid.io.save_persistables(exe, model_path) # save inference model if args.infer_models != '': model_path = os.path.join( args.infer_models, "deep_asr.pass_" + str(pass_id) + ".infer.model") fluid.io.save_inference_model(model_path, ["feature"], [prediction], exe) # cal pass time pass_end_time = time.time() time_consumed = pass_end_time - pass_start_time # print info at pass end print("\nPass %d, time consumed: %f s, val cost: %f, val acc: %f\n" % (pass_id, time_consumed, val_cost, val_acc))