def infer(args): batch_size = args.batch_size items_num = reader.read_config(args.config_path) test_data = reader.Data(args.test_path, False) place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) loss, acc, py_reader, feed_datas, logits = network.network( items_num, args.hidden_size, args.step, args.batch_size) exe.run(fluid.default_startup_program()) [infer_program, feeded_var_names, target_var] = fluid.io.load_inference_model(dirname=args.model_path, executor=exe) feed_list = [e.name for e in feed_datas] print(feed_list, type(target_var[0]), type(logits)) infer_reader = test_data.reader(batch_size, batch_size * 20, False) feeder = fluid.DataFeeder(place=place, feed_list=feed_list) for iter, data in enumerate(infer_reader()): res = exe.run(infer_program, feed=feeder.feed(data), fetch_list=[logits]) #logits = res #print('session:', data, 'label:',np.argmax(logits)) print("@@@, ", res) print("!!!,", logits) if iter == 0: break
def infer(epoch_num): args = parse_args() batch_size = args.batch_size test_data = reader.Data(args.test_path, False) place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) model_path = args.model_path + "epoch_" + str(epoch_num) try: [infer_program, feed_names, fetch_targets] = fluid.io.load_inference_model( model_path, exe) feeder = fluid.DataFeeder( feed_list=feed_names, place=place, program=infer_program) loss_sum = 0.0 acc_sum = 0.0 count = 0 for data in test_data.reader(batch_size, batch_size, False): res = exe.run(infer_program, feed=feeder.feed(data), fetch_list=fetch_targets) loss_sum += res[0] acc_sum += res[1] count += 1 logger.info("TEST --> loss: %.4lf, Recall@20: %.4lf" % (loss_sum / count, acc_sum / count)) except ValueError as e: logger.info("TEST --> error: there is no model in " + model_path)
def infer_wrong(args): batch_size = args.batch_size items_num = reader.read_config(args.config_path) test_data = reader.Data(args.test_path, False) place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) loss, acc, py_reader, feed_datas = network.network(items_num, args.hidden_size, args.step, batch_size) exe.run(fluid.default_startup_program()) infer_program = fluid.default_main_program().clone(for_test=True) for epoch_num in range(args.start_index, args.last_index + 1): model_path = os.path.join(args.model_path, "epoch_" + str(epoch_num)) try: if not os.path.exists(model_path): raise ValueError() fluid.io.load_persistables(executor=exe, dirname=model_path, main_program=infer_program) loss_sum = 0.0 acc_sum = 0.0 count = 0 py_reader.set_sample_list_generator( test_data.reader(batch_size, batch_size * 20, False)) py_reader.start() try: while True: res = exe.run(infer_program, fetch_list=[loss.name, acc.name], use_program_cache=True) loss_sum += res[0] acc_sum += res[1] count += 1 except fluid.core.EOFException: py_reader.reset() logger.info("TEST --> loss: %.4lf, Recall@20: %.4lf" % (loss_sum / count, acc_sum / count)) except ValueError as e: logger.info("TEST --> error: there is no model in " + model_path)
# data = reader.Data().load_raw_data(reader.ptb_raw_dir(), add_beg_token='<s>', # add_end_token='</s>') # text_list = data.seqs_to_text(data.datas[0], skip_beg_token=True, skip_end_token=True) # out_list, err_list = dependency_parser(text_list, word_to_id=data.word_to_id) # # outdir = 'debug' # wb.mkdir(outdir) # with open(os.path.join(outdir, 'ptb.parser.res'), 'wt') as f: # for out, err in zip(out_list, err_list): # f.write(err + '\n') # for info in out: # f.write(str(info) + '\n') data = reader.Data().load_raw_data(['ptb_demo/ptb.train.txt'], add_beg_token='<s>', add_end_token='</s>') text_list = data.seqs_to_text(data.datas[0], skip_beg_token=True, skip_end_token=True)[0:100] print('begin parser....') tbeg = time.time() out_list, err_list = dependency_parser(text_list, word_to_id=data.word_to_id) print('end, time={}s'.format(time.time() - tbeg)) outdir = 'debug' wb.mkdir(outdir) with open(os.path.join(outdir, 'ptb.parser.res'), 'wt') as f: for out, err in zip(out_list, err_list):
def train(): args = parse_args() if args.enable_ce: SEED = 102 fluid.default_main_program().random_seed = SEED fluid.default_startup_program().random_seed = SEED batch_size = args.batch_size items_num = reader.read_config(args.config_path) loss, acc, py_reader, feed_datas, _ = network.network(items_num, args.hidden_size, args.step, batch_size) data_reader = reader.Data(args.train_path, True) logger.info("load data complete") use_cuda = True if args.use_cuda else False use_parallel = True if args.use_parallel else False place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) step_per_epoch = data_reader.length // batch_size optimizer = fluid.optimizer.Adam( learning_rate=fluid.layers.exponential_decay( learning_rate=args.lr, decay_steps=step_per_epoch * args.lr_dc_step, decay_rate=args.lr_dc), regularization=fluid.regularizer.L2DecayRegularizer( regularization_coeff=args.l2)) optimizer.minimize(loss) exe.run(fluid.default_startup_program()) all_vocab = fluid.global_scope().var("all_vocab").get_tensor() all_vocab.set( np.arange(1, items_num).astype("int64").reshape((-1)), place) feed_list = [e.name for e in feed_datas] if use_parallel: train_exe = fluid.ParallelExecutor( use_cuda=use_cuda, loss_name=loss.name) else: train_exe = exe logger.info("begin train") total_time = [] ce_info = [] start_time = time.time() loss_sum = 0.0 acc_sum = 0.0 global_step = 0 PRINT_STEP = 500 #py_reader.decorate_paddle_reader(data_reader.reader(batch_size, batch_size * 20, True)) py_reader.set_sample_list_generator(data_reader.reader(batch_size, batch_size * 20, True)) for i in range(args.epoch_num): epoch_sum = [] py_reader.start() try: while True: res = train_exe.run(fetch_list=[loss.name, acc.name]) loss_sum += res[0].mean() acc_sum += res[1].mean() epoch_sum.append(res[0].mean()) global_step += 1 if global_step % PRINT_STEP == 0: ce_info.append([loss_sum / PRINT_STEP, acc_sum / PRINT_STEP]) total_time.append(time.time() - start_time) logger.info("global_step: %d, loss: %.4lf, train_acc: %.4lf" % ( global_step, loss_sum / PRINT_STEP, acc_sum / PRINT_STEP)) loss_sum = 0.0 acc_sum = 0.0 start_time = time.time() except fluid.core.EOFException: py_reader.reset() logger.info("epoch loss: %.4lf" % (np.mean(epoch_sum))) save_dir = os.path.join(args.model_path, "epoch_" + str(i)) fetch_vars = [loss, acc] fluid.io.save_inference_model(save_dir, feed_list, fetch_vars, exe) logger.info("model saved in " + save_dir) # only for ce if args.enable_ce: gpu_num = get_cards(args) ce_loss = 0 ce_acc = 0 ce_time = 0 try: ce_loss = ce_info[-1][0] ce_acc = ce_info[-1][1] ce_time = total_time[-1] except: print("ce info error") print("kpis\teach_pass_duration_card%s\t%s" % (gpu_num, ce_time)) print("kpis\ttrain_loss_card%s\t%f" % (gpu_num, ce_loss)) print("kpis\ttrain_acc_card%s\t%f" % (gpu_num, ce_acc))
def __init__(self,D_in,H,D_out): super(TwoLayerNet,self).__init__() self.linear1 = nn.Linear(D_in,H) self.linear2 = nn.Linear(H,D_out) self.softmax = nn.LogSoftmax() def forward(self,x): h_relu = self.linear1(x).clamp(min=0) y_pred = self.linear2(h_relu) y_prob = self.softmax(y_pred) return y_prob model = TwoLayerNet(23,10,2) loss_fn = nn.NLLLoss() optim = RMSprop(model.parameters()) reader = reader.Data(32) num_steps_per_epoch = len(reader)/32 max_steps = num_steps_per_epoch*5 # viz = visdom.Visdom() for step in xrange(max_steps): step_data = reader.next() y_pred = model(Variable(step_data[0])) loss = loss_fn(y_pred,Variable(step_data[1])) optim.zero_grad() loss.backward() optim.step() if step%20==0: