def infer(test_reader, use_cuda, model_path): """ inference function """ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) with fluid.scope_guard(fluid.core.Scope()): infer_program, feed_target_names, fetch_vars = fluid.io.load_inference_model( model_path, exe) accum_num_recall = 0.0 accum_num_sum = 0.0 t0 = time.time() step_id = 0 for data in test_reader(): step_id += 1 src_wordseq = utils.to_lodtensor([dat[0] for dat in data], place) label_data = [dat[1] for dat in data] dst_wordseq = utils.to_lodtensor(label_data, place) para = exe.run( infer_program, feed={"src_wordseq": src_wordseq, "dst_wordseq": dst_wordseq}, fetch_list=fetch_vars, return_numpy=False) acc_ = para[1]._get_float_element(0) data_length = len( np.concatenate( label_data, axis=0).astype("int64")) accum_num_sum += (data_length) accum_num_recall += (data_length * acc_) if step_id % 1 == 0: print("step:%d " % (step_id), accum_num_recall / accum_num_sum) t1 = time.time() print("model:%s recall@20:%.3f time_cost(s):%.2f" % (model_path, accum_num_recall / accum_num_sum, t1 - t0))
def infer(test_reader, use_cuda, model_path): """ inference function """ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) with fluid.scope_guard(fluid.core.Scope()): infer_program, feed_target_names, fetch_vars = fluid.io.load_inference_model( model_path, exe) accum_cost = 0.0 accum_words = 0 t0 = time.time() for data in test_reader(): src_wordseq = utils.to_lodtensor(map(lambda x: x[0], data), place) dst_wordseq = utils.to_lodtensor(map(lambda x: x[1], data), place) avg_cost = exe.run( infer_program, feed={"src_wordseq": src_wordseq, "dst_wordseq": dst_wordseq}, fetch_list=fetch_vars) nwords = src_wordseq.lod()[0][-1] cost = np.array(avg_cost) * nwords accum_cost += cost accum_words += nwords ppl = math.exp(accum_cost / accum_words) t1 = time.time() print("model:%s ppl:%.3f time_cost(s):%.2f" % (model_path, ppl, t1 - t0))
def train_loop(main_program): """ train network """ pass_num = args.pass_num model_dir = args.model_dir fetch_list = [avg_cost.name] place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) total_time = 0.0 for pass_idx in six.moves.xrange(pass_num): epoch_idx = pass_idx + 1 print("epoch_%d start" % epoch_idx) t0 = time.time() i = 0 newest_ppl = 0 for data in train_reader(): i += 1 lod_src_wordseq = utils.to_lodtensor([dat[0] for dat in data], place) lod_dst_wordseq = utils.to_lodtensor([dat[1] for dat in data], place) ret_avg_cost = exe.run(main_program, feed={ "src_wordseq": lod_src_wordseq, "dst_wordseq": lod_dst_wordseq }, fetch_list=fetch_list) avg_ppl = np.exp(ret_avg_cost[0]) newest_ppl = np.mean(avg_ppl) if i % args.print_batch == 0: print("step:%d ppl:%.3f" % (i, newest_ppl)) t1 = time.time() total_time += t1 - t0 print("epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, i, total_time / epoch_idx)) save_dir = "%s/epoch_%d" % (model_dir, epoch_idx) feed_var_names = ["src_wordseq", "dst_wordseq"] fetch_vars = [avg_cost, acc] if args.trainer_id == 0: fluid.io.save_inference_model(save_dir, feed_var_names, fetch_vars, exe) print("model saved in %s" % save_dir) print("finish training")
def _get_feed(self, inputs, is_infer=False): """ Convert `inputs` into model's feed data format. """ inputs = { k: to_lodtensor(v, self.place) if isinstance(v, list) else v for k, v in inputs.items() } return inputs
def train_loop(main_program): # Initialize executor place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) pass_num = args.pass_num model_dir = args.model_dir fetch_list = [avg_cost.name] exe.run(fluid.default_startup_program()) total_time = 0.0 for pass_idx in range(pass_num): epoch_idx = pass_idx + 1 print("epoch_%d start" % epoch_idx) t0 = time.time() for batch_id, data in enumerate(train_reader()): lod_text_seq = utils.to_lodtensor([dat[0] for dat in data], place) lod_pos_tag = utils.to_lodtensor([dat[1] for dat in data], place) lod_neg_tag = utils.to_lodtensor([dat[2] for dat in data], place) loss_val, correct_val = exe.run( feed={ "text": lod_text_seq, "pos_tag": lod_pos_tag, "neg_tag": lod_neg_tag }, fetch_list=[avg_cost.name, correct.name]) if batch_id % args.print_batch == 0: print( "TRAIN --> pass: {} batch_num: {} avg_cost: {}, acc: {}" .format(pass_idx, (batch_id + 10) * batch_size, np.mean(loss_val), float(np.sum(correct_val)) / batch_size)) t1 = time.time() total_time += t1 - t0 print("epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, batch_id, total_time / epoch_idx)) save_dir = "%s/epoch_%d" % (model_dir, epoch_idx) feed_var_names = ["text", "pos_tag"] fetch_vars = [cos_pos] fluid.io.save_inference_model(save_dir, feed_var_names, fetch_vars, exe) print("finish training")
def _get_feed(self, inputs, is_infer=False): """ Convert `inputs` into model's feed data format. """ if isinstance(inputs, list): # return list direclty which is used in `get_data_loader`. return inputs for k in inputs: if isinstance(inputs[k], list): inputs[k] = to_lodtensor(inputs[k], self.place) return inputs
def infer(test_reader, vocab_tag, use_cuda, model_path, epoch): """ inference function """ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) with fluid.scope_guard(fluid.Scope()): infer_program, feed_target_names, fetch_vars = fluid.io.load_inference_model( model_path, exe) t0 = time.time() step_id = 0 true_num = 0 all_num = 0 size = vocab_tag value = [] print("epoch " + str(epoch) + " start") for data in test_reader(): step_id += 1 lod_text_seq = utils.to_lodtensor([dat[0] for dat in data], place) lod_tag = utils.to_lodtensor([dat[1] for dat in data], place) lod_pos_tag = utils.to_lodtensor([dat[2] for dat in data], place) para = exe.run(infer_program, feed={ "text": lod_text_seq, "pos_tag": lod_tag }, fetch_list=fetch_vars, return_numpy=False) value.append(para[0]._get_float_element(0)) if step_id % size == 0 and step_id > 1: all_num += 1 true_pos = [dat[2] for dat in data][0][0] if value.index(max(value)) == int(true_pos): true_num += 1 value = [] print("epoch:" + str(epoch) + "\tacc:" + str(1.0 * true_num / all_num)) t1 = time.time()
def train(train_reader, vocab, network, hid_size, base_lr, batch_size, pass_num, use_cuda, parallel, model_dir, init_low_bound=-0.04, init_high_bound=0.04): """ train network """ args = parse_args() if args.enable_ce: # random seed must set before configuring the network. fluid.default_startup_program().random_seed = SEED vocab_size = len(vocab) #Input data src_wordseq = fluid.layers.data(name="src_wordseq", shape=[1], dtype="int64", lod_level=1) dst_wordseq = fluid.layers.data(name="dst_wordseq", shape=[1], dtype="int64", lod_level=1) # Train program avg_cost = None cost = network(src_wordseq, dst_wordseq, vocab_size, hid_size, init_low_bound, init_high_bound) avg_cost = fluid.layers.mean(x=cost) # Optimization to minimize lost sgd_optimizer = fluid.optimizer.SGD( learning_rate=fluid.layers.exponential_decay(learning_rate=base_lr, decay_steps=2100 * 4, decay_rate=0.5, staircase=True)) sgd_optimizer.minimize(avg_cost) # Initialize executor place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name) total_time = 0.0 fetch_list = [avg_cost.name] for pass_idx in six.moves.xrange(pass_num): epoch_idx = pass_idx + 1 print("epoch_%d start" % epoch_idx) t0 = time.time() i = 0 newest_ppl = 0 for data in train_reader(): i += 1 lod_src_wordseq = utils.to_lodtensor([dat[0] for dat in data], place) lod_dst_wordseq = utils.to_lodtensor([dat[1] for dat in data], place) ret_avg_cost = train_exe.run(feed={ "src_wordseq": lod_src_wordseq, "dst_wordseq": lod_dst_wordseq }, fetch_list=fetch_list) avg_ppl = np.exp(ret_avg_cost[0]) newest_ppl = np.mean(avg_ppl) if i % 100 == 0: print("step:%d ppl:%.3f" % (i, newest_ppl)) t1 = time.time() total_time += t1 - t0 print("epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, i, total_time / epoch_idx)) if pass_idx == pass_num - 1 and args.enable_ce: #Note: The following logs are special for CE monitoring. #Other situations do not need to care about these logs. gpu_num = get_cards(args) if gpu_num == 1: print("kpis imikolov_20_pass_duration %s" % (total_time / epoch_idx)) print("kpis imikolov_20_avg_ppl %s" % newest_ppl) else: print("kpis imikolov_20_pass_duration_card%s %s" % \ (gpu_num, total_time / epoch_idx)) print("kpis imikolov_20_avg_ppl_card%s %s" % (gpu_num, newest_ppl)) save_dir = "%s/epoch_%d" % (model_dir, epoch_idx) feed_var_names = ["src_wordseq", "dst_wordseq"] fetch_vars = [avg_cost] fluid.io.save_inference_model(save_dir, feed_var_names, fetch_vars, exe) print("model saved in %s" % save_dir) print("finish training")
def train(train_reader, vocab, network, hid_size, base_lr, batch_size, pass_num, use_cuda, parallel, model_dir, init_low_bound=-0.04, init_high_bound=0.04): """ train network """ args = parse_args() vocab_size = len(vocab) src_wordseq = fluid.layers.data(name="src_wordseq", shape=[1], dtype="int64", lod_level=1) dst_wordseq = fluid.layers.data(name="dst_wordseq", shape=[1], dtype="int64", lod_level=1) avg_cost = None if not parallel: cost = network(src_wordseq, dst_wordseq, vocab_size, hid_size, init_low_bound, init_high_bound) avg_cost = fluid.layers.mean(x=cost) else: places = fluid.layers.device.get_places() pd = fluid.layers.ParallelDo(places) with pd.do(): cost = network(pd.read_input(src_wordseq), pd.read_input(dst_wordseq), vocab_size, hid_size, init_low_bound, init_high_bound) pd.write_output(cost) cost = pd() avg_cost = fluid.layers.mean(x=cost) sgd_optimizer = fluid.optimizer.SGD( learning_rate=fluid.layers.exponential_decay(learning_rate=base_lr, decay_steps=2100 * 4, decay_rate=0.5, staircase=True)) sgd_optimizer.minimize(avg_cost) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) total_time = 0.0 for pass_idx in xrange(pass_num): epoch_idx = pass_idx + 1 print "epoch_%d start" % epoch_idx t0 = time.time() i = 0 newest_ppl = 0 for data in train_reader(): i += 1 lod_src_wordseq = utils.to_lodtensor(map(lambda x: x[0], data), place) lod_dst_wordseq = utils.to_lodtensor(map(lambda x: x[1], data), place) ret_avg_cost = exe.run(fluid.default_main_program(), feed={ "src_wordseq": lod_src_wordseq, "dst_wordseq": lod_dst_wordseq }, fetch_list=[avg_cost], use_program_cache=True) avg_ppl = math.exp(ret_avg_cost[0]) newest_ppl = avg_ppl if i % 100 == 0: print "step:%d ppl:%.3f" % (i, avg_ppl) t1 = time.time() total_time += t1 - t0 print "epoch:%d num_steps:%d time_cost(s):%f" % ( epoch_idx, i, total_time / epoch_idx) if pass_idx == pass_num - 1: if args.gpu_card_num == 1: imikolov_20_pass_duration_kpi.add_record(total_time / epoch_idx) imikolov_20_avg_ppl_kpi.add_record(newest_ppl) else: imikolov_20_pass_duration_kpi_card4.add_record(total_time / epoch_idx) imikolov_20_avg_ppl_kpi_card4.add_record(newest_ppl) save_dir = "%s/epoch_%d" % (model_dir, epoch_idx) feed_var_names = ["src_wordseq", "dst_wordseq"] fetch_vars = [avg_cost] fluid.io.save_inference_model(save_dir, feed_var_names, fetch_vars, exe) print("model saved in %s" % save_dir) if args.gpu_card_num == 1: imikolov_20_pass_duration_kpi.persist() imikolov_20_avg_ppl_kpi.persist() else: imikolov_20_pass_duration_kpi_card4.persist() imikolov_20_avg_ppl_kpi_card4.persist() print("finish training")
def train(): """ do training """ args = parse_args() if args.enable_ce: fluid.default_startup_program().random_seed = SEED fluid.default_main_program().random_seed = SEED hid_size = args.hid_size train_dir = args.train_dir vocab_path = args.vocab_path use_cuda = True if args.use_cuda else False parallel = True if args.parallel else False print("use_cuda:", use_cuda, "parallel:", parallel) batch_size = args.batch_size vocab_size, train_reader = utils.prepare_data( train_dir, vocab_path, batch_size=batch_size * get_cards(args),\ buffer_size=1000, word_freq_threshold=0, is_train=True) # Train program src_wordseq, dst_wordseq, avg_cost, acc = net.all_vocab_network( vocab_size=vocab_size, hid_size=hid_size) # Optimization to minimize lost sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.base_lr) sgd_optimizer.minimize(avg_cost) # Initialize executor place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if parallel: train_exe = fluid.ParallelExecutor(use_cuda=use_cuda, loss_name=avg_cost.name) else: train_exe = exe pass_num = args.pass_num model_dir = args.model_dir fetch_list = [avg_cost.name] ce_info = [] total_time = 0.0 for pass_idx in six.moves.xrange(pass_num): epoch_idx = pass_idx + 1 print("epoch_%d start" % epoch_idx) t0 = time.time() i = 0 newest_ppl = 0 for data in train_reader(): i += 1 lod_src_wordseq = utils.to_lodtensor([dat[0] for dat in data], place) lod_dst_wordseq = utils.to_lodtensor([dat[1] for dat in data], place) ret_avg_cost = train_exe.run(feed={ "src_wordseq": lod_src_wordseq, "dst_wordseq": lod_dst_wordseq }, fetch_list=fetch_list) avg_ppl = np.exp(ret_avg_cost[0]) newest_ppl = np.mean(avg_ppl) ce_info.append(newest_ppl) if i % args.print_batch == 0: print("step:%d ppl:%.3f" % (i, newest_ppl)) if args.enable_ce and i > args.step_num: break t1 = time.time() total_time += t1 - t0 print("epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, i, total_time / epoch_idx)) save_dir = "%s/epoch_%d" % (model_dir, epoch_idx) feed_var_names = ["src_wordseq", "dst_wordseq"] fetch_vars = [avg_cost, acc] fluid.io.save_inference_model(save_dir, feed_var_names, fetch_vars, exe) print("model saved in %s" % save_dir) # only for ce if args.enable_ce: ce_ppl = 0 try: ce_ppl = ce_info[-2] except: print("ce info error") epoch_idx = args.pass_num device = get_device(args) if args.use_cuda: gpu_num = device[1] print("kpis\teach_pass_duration_gpu%s\t%s" % (gpu_num, total_time / epoch_idx)) print("kpis\ttrain_ppl_gpu%s\t%s" % (gpu_num, ce_ppl)) else: cpu_num = device[1] threads_num = device[2] print("kpis\teach_pass_duration_cpu%s_thread%s\t%s" % (cpu_num, threads_num, total_time / epoch_idx)) print("kpis\ttrain_ppl_cpu%s_thread%s\t%s" % (cpu_num, threads_num, ce_ppl)) print("finish training")
def train(): """ do training """ args = parse_args() if args.enable_ce: fluid.default_startup_program().random_seed = SEED fluid.default_main_program().random_seed = SEED train_dir = args.train_dir vocab_text_path = args.vocab_text_path vocab_tag_path = args.vocab_tag_path use_cuda = True if args.use_cuda else False parallel = True if args.parallel else False batch_size = args.batch_size neg_size = args.neg_size print("use_cuda: {}, parallel: {}, batch_size: {}, neg_size: {} ".format( use_cuda, parallel, batch_size, neg_size)) vocab_text_size, vocab_tag_size, train_reader = utils.prepare_data( file_dir=train_dir, vocab_text_path=vocab_text_path, vocab_tag_path=vocab_tag_path, neg_size=neg_size, batch_size=batch_size * get_cards(args), buffer_size=batch_size * 100, is_train=True) """ train network """ # Train program avg_cost, correct, cos_pos = net.network(vocab_text_size, vocab_tag_size, neg_size=neg_size) # Optimization to minimize lost sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.base_lr) sgd_optimizer.minimize(avg_cost) # Initialize executor place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if parallel: train_exe = fluid.ParallelExecutor(use_cuda=use_cuda, loss_name=avg_cost.name) else: train_exe = exe pass_num = args.pass_num model_dir = args.model_dir fetch_list = [avg_cost.name] total_time = 0.0 ce_info = [] for pass_idx in range(pass_num): epoch_idx = pass_idx + 1 print("epoch_%d start" % epoch_idx) t0 = time.time() for batch_id, data in enumerate(train_reader()): lod_text_seq = utils.to_lodtensor([dat[0] for dat in data], place) lod_pos_tag = utils.to_lodtensor([dat[1] for dat in data], place) lod_neg_tag = utils.to_lodtensor([dat[2] for dat in data], place) loss_val, correct_val = train_exe.run( feed={ "text": lod_text_seq, "pos_tag": lod_pos_tag, "neg_tag": lod_neg_tag }, fetch_list=[avg_cost.name, correct.name]) ce_info.append( float(np.sum(correct_val)) / (args.num_devices * batch_size)) if batch_id % args.print_batch == 0: print("TRAIN --> pass: {} batch_num: {} avg_cost: {}, acc: {}". format( pass_idx, (batch_id + 10) * batch_size, np.mean(loss_val), float(np.sum(correct_val)) / (args.num_devices * batch_size))) t1 = time.time() total_time += t1 - t0 print("epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, batch_id, total_time / epoch_idx)) save_dir = "%s/epoch_%d" % (model_dir, epoch_idx) feed_var_names = ["text", "pos_tag"] fetch_vars = [cos_pos] fluid.io.save_inference_model(save_dir, feed_var_names, fetch_vars, exe) # only for ce if args.enable_ce: ce_acc = 0 try: ce_acc = ce_info[-2] except: logger.error("ce info error") epoch_idx = args.pass_num device = get_device(args) if args.use_cuda: gpu_num = device[1] print("kpis\teach_pass_duration_gpu%s\t%s" % (gpu_num, total_time / epoch_idx)) print("kpis\ttrain_acc_gpu%s\t%s" % (gpu_num, ce_acc)) else: cpu_num = device[1] threads_num = device[2] print("kpis\teach_pass_duration_cpu%s_thread%s\t%s" % (cpu_num, threads_num, total_time / epoch_idx)) print("kpis\ttrain_acc_cpu%s_thread%s\t%s" % (cpu_num, threads_num, ce_acc)) print("finish training")