def main(data_args, use_cuda, num_passes, lr): i_shape = [3, 224, 224] image = fluid.layers.data(name="image", shape=i_shape, dtype='float32') Net = fvnet.Net() out = Net.inference(input=image) label = fluid.layers.data(name="label", shape=[2], dtype='float32') sec = fluid.layers.square_error_cost(input=out, label=label) avg_cost = fluid.layers.mean(x=sec) optimizer = fluid.optimizer.Adam(learning_rate=lr) #Adam optimizer.minimize(avg_cost) fluid.memory_optimize(fluid.default_main_program()) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) layer_load = [ "conv1_1", "conv1_2", "conv2_1", "conv2_2", "conv3_1", "conv3_2", "conv3_3", "conv4_1", "conv4_2", "conv4_3" ] Net.load_weights(pretrained_npy, exe, place, layer_load) # if pretrained_model: # fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) model_path = "./pre_weight/" fluid.io.save_inference_model(model_path, ['image'], [out], exe)
def infer(args): # parameters from arguments class_dim = args.class_dim model_name = args.model save_inference = args.save_inference pretrained_model = args.pretrained_model with_memory_optimization = args.with_mem_opt image_shape = [int(m) for m in args.image_shape.split(",")] model_list = [m for m in dir(models) if "__" not in m] assert model_name in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') # model definition model = models.__dict__[model_name]() if model_name == "GoogleNet": out, _, _ = model.net(input=image, class_dim=class_dim) else: out = model.net(input=image, class_dim=class_dim) out = fluid.layers.softmax(out) test_program = fluid.default_main_program().clone(for_test=True) fetch_list = [out.name] if with_memory_optimization and not save_inference: fluid.memory_optimize(fluid.default_main_program(), skip_opt_set=set(fetch_list)) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) fluid.io.load_persistables(exe, pretrained_model) if save_inference: fluid.io.save_inference_model(dirname=model_name, feeded_var_names=['image'], main_program=test_program, target_vars=out, executor=exe, model_filename='model', params_filename='params') print("model: ", model_name, " is already saved") exit(0) test_batch_size = 1 test_reader = reader.test(settings=args, batch_size=test_batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image]) TOPK = 1 for batch_id, data in enumerate(test_reader()): result = exe.run(test_program, fetch_list=fetch_list, feed=feeder.feed(data)) result = result[0][0] pred_label = np.argsort(result)[::-1][:TOPK] print("Test-{0}-score: {1}, class {2}".format(batch_id, result[pred_label], pred_label)) sys.stdout.flush()
def check_decay_with_place(self, place, python_decay_fn, fluid_decay_fn, kwargs): main_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(main_prog, startup_prog): decayed_lr = fluid_decay_fn(**kwargs) place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) fluid.memory_optimize(main_prog) for step in range(10): lr_val, = exe.run(main_prog, feed={}, fetch_list=[decayed_lr]) python_decayed_lr = python_decay_fn(global_step=float(step), **kwargs) self.assertAlmostEqual( python_decayed_lr, lr_val[0], msg='Failed fn is {0}, Python result is {1}, Fluid result is {2}' .format(python_decay_fn.__name__, str(python_decayed_lr), str(lr_val[0])))
def infer(): # parameters from arguments use_gpu = False class_dim = 5 model_name = "ResNet50" pretrained_model = "./output_indoor/ResNet50/61" with_memory_optimization = True image_shape = [3, 224, 224] # assert model_name in model_list, "{} is not in lists: {}".format(args.model, # model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') # model definition model = mo.__dict__[model_name]() if model_name is "GoogleNet": out, _, _ = model.net(input=image, class_dim=class_dim) else: out = model.net(input=image, class_dim=class_dim) test_program = fluid.default_main_program().clone(for_test=True) if with_memory_optimization: fluid.memory_optimize(fluid.default_main_program()) place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) test_batch_size = 1 test_reader = paddle.batch(reader.test(), batch_size=test_batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image]) fetch_list = [out.name] TOPK = 1 for batch_id, data in enumerate(test_reader()): result = exe.run(test_program, fetch_list=fetch_list, feed=feeder.feed(data)) result = result[0][0] pred_label = np.argsort(result)[::-1][:TOPK] #print("Test-{0}-score: {1}, class {2}" # .format(batch_id, result[pred_label], pred_label)) result = pred_label sys.stdout.flush() return result
def main(): args = parse_args() print_arguments(args) print_paddle_envs() if args.no_random: fluid.default_startup_program().random_seed = 1 # the unique trainer id, starting from 0, needed by trainer # only nccl_id_var, num_trainers, trainer_id = (None, 1, int( os.getenv( "PADDLE_TRAINER_ID", "0"))) if args.use_cprof: pr = cProfile.Profile() pr.enable() model_def = __import__("%s" % args.model, fromlist=["models"]) train_args = list(model_def.get_model(args)) train_args.append(args) # Run optimizer.minimize(avg_loss) train_args[2].minimize(train_args[0]) if args.memory_optimize: fluid.memory_optimize(fluid.default_main_program()) if args.update_method == "pserver": train_prog, startup_prog = dist_transpile(trainer_id, args) if not train_prog: raise Exception( "Must configure correct environments to run dist train.") train_args.extend([train_prog, startup_prog]) if args.gpus > 1 and os.getenv("TRAINING_ROLE") == "TRAINER": train_args.extend([nccl_id_var, num_trainers, trainer_id]) train_parallel(*train_args) exit(0) train(*train_args) exit(0) # for other update methods, use default programs train_args.append(fluid.default_main_program()) train_args.append(fluid.default_startup_program()) if args.update_method == "nccl2": nccl_id_var, num_trainers, trainer_id = append_nccl2_prepare( trainer_id) if args.gpus == 1: # NOTE: parallel executor use profiler interanlly if args.use_nvprof and args.device == 'GPU': with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: train(*train_args) else: train(*train_args) else: if args.device == "CPU": raise Exception("Only support GPU perf with parallel exe") train_args.extend([nccl_id_var, num_trainers, trainer_id]) train_parallel(*train_args)
def main(data_args,use_cuda,num_passes,lr,json_path1,json_path2): i_shape=[3,224, 224] image = fluid.layers.data(name="image", shape=i_shape, dtype='float32') Net = fvnet.vgg_fluid() out = Net.net(input=image) label = fluid.layers.data(name="label", shape=[28*28], dtype='float32')#28*28 2 sec = fluid.layers.square_error_cost(input=out, label=label) avg_cost = fluid.layers.mean(x=sec) optimizer = fluid.optimizer.Adam(learning_rate=lr)#Adam optimizer.minimize(avg_cost) fluid.memory_optimize(fluid.default_main_program()) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if pretrained_model: fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) panda_cl = provider.Pandas_fm(json_path1,json_path2) train_reader = paddle.batch(provider.read_train_for(data_args,panda_cl), batch_size=1) Loss = [] ttime = 0 for pass_id in range(num_passes): Loss_A = [] for batch_id , data in enumerate(train_reader()): start = datetime.datetime.now() i_datas = np.array(data[0][0]) i_labels = np.array(data[0][1]) loss = exe.run(fluid.default_main_program(), feed={"image":i_datas,"label":i_labels},fetch_list = [avg_cost]) loss = np.mean(np.array(loss)) Loss_A.append(loss) end = datetime.datetime.now() ttime += (end-start).total_seconds() if batch_id % 100 == 0: print("Pass {0}, trainbatch {1}, loss {2}, time {3}".format(pass_id, \ batch_id, loss,ttime)) sys.stdout.flush() ttime = 0 Loss.append(np.mean(Loss_A)) np.save('./models/loss/'+str(pass_id)+'_loss.npy',np.array(Loss)) model_path = os.path.join("./models/",str(pass_id)) if not os.path.isdir(model_path): os.makedirs(model_path) fluid.io.save_inference_model(model_path, ['image'], [out], exe) np.save('./models/loss.npy',np.array(Loss))
def train(config): """ model training """ config.vocab_size = len(open(config.vocab_path).readlines()) bow_loss, kl_loss, nll_loss, final_loss = knowledge_seq2seq(config) bow_loss.persistable = True kl_loss.persistable = True nll_loss.persistable = True final_loss.persistable = True main_program = fluid.default_main_program() inference_program = fluid.default_main_program().clone(for_test=True) fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByGlobalNorm( clip_norm=config.grad_clip)) optimizer = fluid.optimizer.Adam(learning_rate=config.lr) if config.stage == 0: print("stage 0") optimizer.minimize(bow_loss) else: print("stage 1") optimizer.minimize(final_loss) fluid.memory_optimize(main_program) opt_var_name_list = optimizer.get_opti_var_name_list() if config.use_gpu: place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) param_list = main_program.block(0).all_parameters() param_name_list = [p.name for p in param_list] init_model(config, param_name_list, place) processors = KnowledgeCorpus(data_dir=config.data_dir, data_prefix=config.data_prefix, vocab_path=config.vocab_path, min_len=config.min_len, max_len=config.max_len) train_generator = processors.data_generator(batch_size=config.batch_size, phase="train", shuffle=True) valid_generator = processors.data_generator(batch_size=config.batch_size, phase="dev", shuffle=False) model_handle = [exe, place, bow_loss, kl_loss, nll_loss, final_loss] train_loop(config, train_generator, valid_generator, main_program, inference_program, model_handle, param_name_list, opt_var_name_list)
def eval(args): # parameters from arguments model_name = args.model pretrained_model = args.pretrained_model with_memory_optimization = args.with_mem_opt image_shape = [int(m) for m in args.image_shape.split(",")] assert model_name in model_list, "{} is not in lists: {}".format(args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[model_name]() out = model.net(input=image, embedding_size=args.embedding_size) test_program = fluid.default_main_program().clone(for_test=True) if with_memory_optimization: fluid.memory_optimize(fluid.default_main_program()) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) test_reader = paddle.batch(reader.test(args), batch_size=args.batch_size, drop_last=False) feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) fetch_list = [out.name] f, l = [], [] for batch_id, data in enumerate(test_reader()): t1 = time.time() [feas] = exe.run(test_program, fetch_list=fetch_list, feed=feeder.feed(data)) label = np.asarray([x[1] for x in data]) f.append(feas) l.append(label) t2 = time.time() period = t2 - t1 if batch_id % 20 == 0: print("[%s] testbatch %d, time %2.2f sec" % \ (fmt_time(), batch_id, period)) f = np.vstack(f) l = np.hstack(l) recall = recall_topk(f, l, k=1) print("[%s] End test %d, test_recall %.5f" % (fmt_time(), len(f), recall)) sys.stdout.flush()
def infer(args): # parameters from arguments seg_num = args.seg_num class_dim = args.class_dim num_layers = args.num_layers test_model = args.test_model if test_model == None: print('Please specify the test model ...') return image_shape = [int(m) for m in args.image_shape.split(",")] image_shape = [seg_num] + image_shape # model definition model = TSN_ResNet(layers=num_layers, seg_num=seg_num) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') out = model.net(input=image, class_dim=class_dim) # for test inference_program = fluid.default_main_program().clone(for_test=True) if args.with_mem_opt: fluid.memory_optimize(fluid.default_main_program()) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) def is_parameter(var): if isinstance(var, Parameter): return isinstance(var, Parameter) if test_model is not None: vars = filter(is_parameter, inference_program.list_vars()) fluid.io.load_vars(exe, test_model, vars=vars) # reader test_reader = paddle.batch(reader.infer(seg_num), batch_size=1) feeder = fluid.DataFeeder(place=place, feed_list=[image]) fetch_list = [out.name] # test TOPK = 1 for batch_id, data in enumerate(test_reader()): data, vid = data[0] data = [[data]] result = exe.run(inference_program, fetch_list=fetch_list, feed=feeder.feed(data)) result = result[0][0] pred_label = np.argsort(result)[::-1][:TOPK] print("Test sample: {0}, score: {1}, class {2}".format( vid, result[pred_label], pred_label)) sys.stdout.flush()
def infer(model): predict = create_model(model=model) place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) fluid.memory_optimize(fluid.default_main_program()) load_model(exe,fluid.default_main_program(),model=model) print("load model succeed") while True: ret,frame = cap.read() cv2.imshow('frame',frame)#一个窗口用以显示原视频 gray = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY) # 探测图片中的人脸 faces = face_cascade.detectMultiScale( gray, scaleFactor = 1.5, minNeighbors = 5, minSize = (5,5) ) for (x, y, w, h) in faces: w = w*1.1 h = h*1.3 w = int(w) h = int(h) frame = cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2) xmin = x xmax = x + w ymin = y ymax = y + h slip_image = frame[ymin:ymax,xmin:xmax] slip_image_224 = cv2.resize(slip_image, (224,224), interpolation=cv2.INTER_CUBIC) slip_image_224 = slip_image_224.transpose((2,0,1)) imgs = [] imgs.append(slip_image_224) imgs = np.array(imgs) imgs = imgs.astype(np.float32) imgs /= 255.0 result = exe.run(fluid.default_main_program(), feed={'img': imgs}, fetch_list=[predict]) points = result[0] points = points.reshape(-1,2) draw_landmark_point(slip_image,points) cv2.imshow("image!",slip_image) cv2.imshow('origin image',frame)#一个窗口用以显示原视频 if cv2.waitKey(1) &0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def add_optimizer(args, avg_cost): #optimizer = fluid.optimizer.SGD(learning_rate=0.002) optimizer = fluid.optimizer.Momentum( learning_rate=fluid.layers.piecewise_decay( boundaries=[100], values=[0.1, 0.2]), momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) optimizer.minimize(avg_cost) if args.use_mem_opt: fluid.memory_optimize(fluid.default_main_program())
def main(): rnn_out = encoder_decoder() label = layers.data( name="target_language_next_word", shape=[1], dtype='int64', lod_level=1) cost = layers.cross_entropy(input=rnn_out, label=label) avg_cost = fluid.layers.mean(cost) optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4) optimizer.minimize(avg_cost) fluid.memory_optimize(fluid.default_main_program()) # fluid.release_memory(fluid.default_main_program()) # fix the order of training data train_data = paddle.batch( paddle.dataset.wmt14.train(dict_size), batch_size=batch_size) # train_data = paddle.batch( # paddle.reader.shuffle( # paddle.dataset.wmt14.train(dict_size), buf_size=1000), # batch_size=batch_size) place = core.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) feed_order = [ 'src_word_id', 'target_language_word', 'target_language_next_word' ] feed_list = [ fluid.default_main_program().global_block().var(var_name) for var_name in feed_order ] feeder = fluid.DataFeeder(feed_list, place) batch_id = 0 for pass_id in range(10): for data in train_data(): outs = exe.run(fluid.default_main_program(), feed=feeder.feed(data), fetch_list=[avg_cost]) avg_cost_val = np.array(outs[0]) print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) + " avg_cost=" + str(avg_cost_val)) if batch_id > 2: exit(0) if math.isnan(float(avg_cost_val)): sys.exit("got NaN loss, training failed.") batch_id += 1
def main(): args = parse_args() print_arguments(args) # the unique trainer id, starting from 0, needed by trainer # only nccl_id_var, num_trainers, trainer_id = ( None, 1, int(os.getenv("PADDLE_TRAINER_ID", "-1"))) if args.use_cprof: pr = cProfile.Profile() pr.enable() model_def = __import__("models.%s" % args.model, fromlist=["models"]) train_args = list(model_def.get_model(args)) train_args.append(args) # Run optimizer.minimize(avg_loss) train_args[2].minimize(train_args[0]) if args.memory_optimize: fluid.memory_optimize(fluid.default_main_program()) if args.update_method == "pserver": train_prog, startup_prog = dist_transpile(trainer_id) if not train_prog: raise Exception( "Must configure correct environments to run dist train.") train_args.extend([train_prog, startup_prog]) if args.gpus > 1 and os.getenv("PADDLE_TRAINING_ROLE") == "TRAINER": train_args.extend([nccl_id_var, num_trainers, trainer_id]) train_parallel(*train_args) train(*train_args) exit(0) # for other update methods, use default programs train_args.append(fluid.default_main_program()) train_args.append(fluid.default_startup_program()) if args.update_method == "nccl2": nccl_id_var, num_trainers, trainer_id = append_nccl2_prepare(trainer_id) if args.gpus == 1: # NOTE: parallel executor use profiler interanlly if args.use_nvprof and args.device == 'GPU': with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: train(*train_args) else: train(*train_args) else: if args.device == "CPU": raise Exception("Only support GPU perf with parallel exe") train_args.extend([nccl_id_var, num_trainers, trainer_id]) train_parallel(*train_args)
def get_model(args, is_train, main_prog, startup_prog): # NOTE: mnist is small, we don't implement data sharding yet. opt = None data_file_handle = None with fluid.program_guard(main_prog, startup_prog): if args.use_reader_op: filelist = [ os.path.join(args.data_path, f) for f in os.listdir(args.data_path) ] data_file_handle = fluid.layers.open_files( filenames=filelist, shapes=[[-1, 1, 28, 28], (-1, 1)], lod_levels=[0, 0], dtypes=["float32", "int64"], thread_num=1, pass_num=1) data_file = fluid.layers.double_buffer( fluid.layers.batch( data_file_handle, batch_size=args.batch_size)) with fluid.unique_name.guard(): if args.use_reader_op: input, label = fluid.layers.read_file(data_file) else: images = fluid.layers.data( name='pixel', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data( name='label', shape=[1], dtype='int64') predict = cnn_model(images) cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(x=cost) # Evaluator batch_acc = fluid.layers.accuracy(input=predict, label=label) # Optimization if is_train: opt = fluid.optimizer.AdamOptimizer( learning_rate=0.001, beta1=0.9, beta2=0.999) opt.minimize(avg_cost) if args.memory_optimize: fluid.memory_optimize(main_prog) # Reader if is_train: reader = paddle.dataset.mnist.train() else: reader = paddle.dataset.mnist.test() batched_reader = paddle.batch( reader, batch_size=args.batch_size * args.gpus) return avg_cost, opt, [batch_acc], batched_reader, data_file_handle
def infer(args): # parameters from arguments model_name = args.model pretrained_model = args.pretrained_model with_memory_optimization = args.with_mem_opt image_shape = [int(m) for m in args.image_shape.split(",")] assert model_name in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') # model definition model = models.__dict__[model_name]() out = model.net(input=image, embedding_size=args.embedding_size) test_program = fluid.default_main_program().clone(for_test=True) if with_memory_optimization: fluid.memory_optimize(fluid.default_main_program()) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if pretrained_model: def if_exist(var): return os.path.exists(os.path.join(pretrained_model, var.name)) fluid.io.load_vars(exe, pretrained_model, predicate=if_exist) infer_reader = paddle.batch(reader.infer(args), batch_size=args.batch_size, drop_last=False) feeder = fluid.DataFeeder(place=place, feed_list=[image]) fetch_list = [out.name] for batch_id, data in enumerate(infer_reader()): result = exe.run(test_program, fetch_list=fetch_list, feed=feeder.feed(data)) result = result[0][0].reshape(-1) print("Test-{0}-feature: {1}".format(batch_id, result[:5])) sys.stdout.flush()
def construct_resnet(self, depth, learning_rate, momentum): input = fluid.layers.data( name='data', shape=self.data_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') predict = self.model_dict[self.data_set](input, self.class_dim, depth) cost = fluid.layers.cross_entropy(input=predict, label=label) self.avg_cost = fluid.layers.mean(x=cost) self.accuracy = fluid.layers.accuracy(input=predict, label=label) # inference program self.inference_program = fluid.default_main_program().clone( for_test=True) optimizer = fluid.optimizer.Momentum( learning_rate=learning_rate, momentum=momentum) opts = optimizer.minimize(self.avg_cost) fluid.memory_optimize(fluid.default_main_program()) self.train_program = fluid.default_main_program().clone()
def test_main(use_cuda, use_py_func_op, use_parallel_executor): if use_cuda and not fluid.core.is_compiled_with_cuda(): return None with fluid.program_guard(fluid.Program(), fluid.Program()): with fluid.scope_guard(fluid.core.Scope()): fluid.default_main_program().random_seed = 1 fluid.default_startup_program().random_seed = 1 np.random.seed(1) img = fluid.layers.data(name='image', shape=[784], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') loss = simple_fc_net(img, label, use_py_func_op) optimizer = fluid.optimizer.SGD(learning_rate=1e-3) optimizer.minimize(loss) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() feeder = fluid.DataFeeder(feed_list=[img, label], place=place) r = paddle.batch(reader, batch_size=10) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) #FIXME force use old memory optimzie strategy here to pass the unittest #since open the new strategy will crash the unittest fluid.memory_optimize(fluid.default_main_program()) train_cp = compiler.CompiledProgram(fluid.default_main_program()) if use_parallel_executor: train_cp = train_cp.with_data_parallel(loss_name=loss.name) fetch_list = [loss.name] else: fetch_list = [loss] ret = [] for epoch_id in six.moves.range(2): for d in r(): L, = exe.run(train_cp, feed=feeder.feed(d), fetch_list=fetch_list) ret.append(L) return np.array(ret)
def construct_vgg16_net(self, learning_rate): images = fluid.layers.data(name='pixel', shape=self.data_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') net = self.vgg16_bn_drop(images) predict = fluid.layers.fc(input=net, size=self.class_dim, act='softmax') cost = fluid.layers.cross_entropy(input=predict, label=label) self.avg_cost = fluid.layers.mean(x=cost) self.accuracy = fluid.layers.accuracy(input=predict, label=label) self.inference_program = fluid.default_main_program().clone( for_test=True) optimizer = fluid.optimizer.Adam(learning_rate=learning_rate) opts = optimizer.minimize(self.avg_cost) fluid.memory_optimize(fluid.default_main_program()) self.train_program = fluid.default_main_program().clone()
# lr = fluid.layers.polynomial_decay(base_lr, total_step, end_learning_rate=0, power=0.9) # area = fluid.layers.elementwise_max( # fluid.layers.reduce_mean(mask), # fluid.layers.assign(np.array( # [0.1], dtype=np.float32))) # loss_mean = fluid.layers.reduce_mean(loss) / area # opt = fluid.optimizer.Adam(learning_rate=2e-4) # opt = fluid.optimizer.Momentum( # lr, # momentum=0.9, # regularization=fluid.regularizer.L2DecayRegularizer( # regularization_coeff=weight_decay), ) # retv = opt.minimize(loss_mean, startup_program=sp, no_grad_set=no_grad_set) # tp = tp.clone(True) fluid.memory_optimize(tp, print_log=False, skip_opt_set=[pred.name], level=1) place = fluid.CPUPlace() if use_gpu: place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(sp) load_model(recover_path) if parallel_flag: exe_p = fluid.ParallelExecutor(use_cuda=True, loss_name=loss_mean.name, main_program=tp) early_stopcount = 0
def train(args): """Train """ if not os.path.exists(args.save_path): os.makedirs(args.save_path) net = Network(args.vocab_size, args.emb_size, args.hidden_size) train_program = fluid.Program() train_startup = fluid.Program() if "CE_MODE_X" in os.environ: train_program.random_seed = 110 train_startup.random_seed = 110 with fluid.program_guard(train_program, train_startup): with fluid.unique_name.guard(): logits, loss = net.network(args.loss_type) loss.persistable = True logits.persistable = True # gradient clipping fluid.clip.set_gradient_clip( clip=fluid.clip.GradientClipByValue(max=1.0, min=-1.0)) optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) optimizer.minimize(loss) print("begin memory optimization ...") fluid.memory_optimize(train_program) print("end memory optimization ...") test_program = fluid.Program() test_startup = fluid.Program() if "CE_MODE_X" in os.environ: test_program.random_seed = 110 test_startup.random_seed = 110 with fluid.program_guard(test_program, test_startup): with fluid.unique_name.guard(): logits, loss = net.network(args.loss_type) loss.persistable = True logits.persistable = True test_program = test_program.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) print("device count %d" % dev_count) print("theoretical memory usage: ") print( fluid.contrib.memory_usage(program=train_program, batch_size=args.batch_size)) exe = fluid.Executor(place) exe.run(train_startup) exe.run(test_startup) train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, loss_name=loss.name, main_program=train_program) test_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, main_program=test_program, share_vars_from=train_exe) if args.word_emb_init is not None: print("start loading word embedding init ...") if six.PY2: word_emb = np.array(pickle.load(open(args.word_emb_init, 'rb'))).astype('float32') else: word_emb = np.array( pickle.load(open(args.word_emb_init, 'rb'), encoding="bytes")).astype('float32') net.set_word_embedding(word_emb, place) print("finish init word embedding ...") print("start loading data ...") def train_with_feed(batch_data): """ Train on one batch """ #to do get_feed_names feed_dict = dict(zip(net.get_feed_names(), batch_data)) cost = train_exe.run(feed=feed_dict, fetch_list=[loss.name]) return cost[0] def test_with_feed(batch_data): """ Test on one batch """ feed_dict = dict(zip(net.get_feed_names(), batch_data)) score = test_exe.run(feed=feed_dict, fetch_list=[logits.name]) return score[0] def evaluate(): """ Evaluate to choose model """ val_batches = reader.batch_reader(args.val_path, args.batch_size, place, args.max_len, 1) scores = [] labels = [] for batch in val_batches: scores.extend(test_with_feed(batch)) labels.extend([x[0] for x in batch[2]]) return eva.evaluate_Recall(zip(scores, labels)) def save_exe(step, best_recall): """ Save exe conditional """ recall_dict = evaluate() print('evaluation recall result:') print('1_in_2: %s\t1_in_10: %s\t2_in_10: %s\t5_in_10: %s' % (recall_dict['1_in_2'], recall_dict['1_in_10'], recall_dict['2_in_10'], recall_dict['5_in_10'])) if recall_dict['1_in_10'] > best_recall and step != 0: fluid.io.save_inference_model(args.save_path, net.get_feed_inference_names(), logits, exe, main_program=train_program) print("Save model at step %d ... " % step) print( time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) best_recall = recall_dict['1_in_10'] return best_recall # train over different epoches global_step, train_time = 0, 0.0 best_recall = 0 for epoch in six.moves.xrange(args.num_scan_data): train_batches = reader.batch_reader(args.train_path, args.batch_size, place, args.max_len, args.sample_pro) begin_time = time.time() sum_cost = 0 ce_cost = 0 for batch in train_batches: if (args.save_path is not None) and (global_step % args.save_step == 0): best_recall = save_exe(global_step, best_recall) cost = train_with_feed(batch) global_step += 1 sum_cost += cost.mean() ce_cost = cost.mean() if global_step % args.print_step == 0: print('training step %s avg loss %s' % (global_step, sum_cost / args.print_step)) sum_cost = 0 pass_time_cost = time.time() - begin_time train_time += pass_time_cost print("Pass {0}, pass_time_cost {1}".format( epoch, "%2.2f sec" % pass_time_cost)) if "CE_MODE_X" in os.environ and epoch == args.num_scan_data - 1: card_num = get_cards() print("kpis\ttrain_duration_card%s\t%s" % (card_num, pass_time_cost)) print("kpis\ttrain_loss_card%s\t%s" % (card_num, ce_cost))
def finetune(args): """ Finetune """ if not os.path.exists(args.save_path): os.makedirs(args.save_path) net = Network(args.vocab_size, args.emb_size, args.hidden_size) train_program = fluid.Program() train_startup = fluid.Program() if "CE_MODE_X" in os.environ: train_program.random_seed = 110 train_startup.random_seed = 110 with fluid.program_guard(train_program, train_startup): with fluid.unique_name.guard(): logits, loss = net.network(args.loss_type) loss.persistable = True logits.persistable = True # gradient clipping fluid.clip.set_gradient_clip( clip=fluid.clip.GradientClipByValue(max=1.0, min=-1.0)) optimizer = fluid.optimizer.Adam( learning_rate=fluid.layers.exponential_decay( learning_rate=args.learning_rate, decay_steps=400, decay_rate=0.9, staircase=True)) optimizer.minimize(loss) print("begin memory optimization ...") fluid.memory_optimize(train_program) print("end memory optimization ...") test_program = fluid.Program() test_startup = fluid.Program() if "CE_MODE_X" in os.environ: test_program.random_seed = 110 test_startup.random_seed = 110 with fluid.program_guard(test_program, test_startup): with fluid.unique_name.guard(): logits, loss = net.network(args.loss_type) loss.persistable = True logits.persistable = True test_program = test_program.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) print("device count %d" % dev_count) print("theoretical memory usage: ") print( fluid.contrib.memory_usage(program=train_program, batch_size=args.batch_size)) exe = fluid.Executor(place) exe.run(train_startup) exe.run(test_startup) train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, loss_name=loss.name, main_program=train_program) test_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, main_program=test_program, share_vars_from=train_exe) if args.init_model: init.init_pretraining_params(exe, args.init_model, main_program=train_startup) print('sccuess init %s' % args.init_model) print("start loading data ...") def train_with_feed(batch_data): """ Train on one batch """ #to do get_feed_names feed_dict = dict(zip(net.get_feed_names(), batch_data)) cost = train_exe.run(feed=feed_dict, fetch_list=[loss.name]) return cost[0] def test_with_feed(batch_data): """ Test on one batch """ feed_dict = dict(zip(net.get_feed_names(), batch_data)) score = test_exe.run(feed=feed_dict, fetch_list=[logits.name]) return score[0] def evaluate(): """ Evaluate to choose model """ val_batches = reader.batch_reader(args.val_path, args.batch_size, place, args.max_len, 1) scores = [] labels = [] for batch in val_batches: scores.extend(test_with_feed(batch)) labels.extend([x[0] for x in batch[2]]) scores = [x[0] for x in scores] return eva.evaluate_cor(scores, labels) def save_exe(step, best_cor): """ Save exe conditional """ cor = evaluate() print('evaluation cor relevance %s' % cor) if cor > best_cor and step != 0: fluid.io.save_inference_model(args.save_path, net.get_feed_inference_names(), logits, exe, main_program=train_program) print("Save model at step %d ... " % step) print( time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) best_cor = cor return best_cor # train over different epoches global_step, train_time = 0, 0.0 best_cor = 0.0 pre_index = -1 for epoch in six.moves.xrange(args.num_scan_data): train_batches = reader.batch_reader(args.train_path, args.batch_size, place, args.max_len, args.sample_pro) begin_time = time.time() sum_cost = 0 for batch in train_batches: if (args.save_path is not None) and (global_step % args.save_step == 0): best_cor = save_exe(global_step, best_cor) cost = train_with_feed(batch) global_step += 1 sum_cost += cost.mean() if global_step % args.print_step == 0: print('training step %s avg loss %s' % (global_step, sum_cost / args.print_step)) sum_cost = 0 pass_time_cost = time.time() - begin_time train_time += pass_time_cost print("Pass {0}, pass_time_cost {1}".format( epoch, "%2.2f sec" % pass_time_cost))
def main(): args = parse_args() lstm_size = args.hidden_dim data = fluid.layers.data(name="words", shape=[1], lod_level=1, dtype='int64') sentence = fluid.layers.embedding(input=data, size=[len(word_dict), args.emb_dim]) sentence = fluid.layers.fc(input=sentence, size=lstm_size, act='tanh') rnn = fluid.layers.DynamicRNN() with rnn.block(): word = rnn.step_input(sentence) prev_hidden = rnn.memory(value=0.0, shape=[lstm_size]) prev_cell = rnn.memory(value=0.0, shape=[lstm_size]) def gate_common( ipt, hidden, size, ): gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True) gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False) gate = fluid.layers.sums(input=[gate0, gate1]) return gate forget_gate = fluid.layers.sigmoid( x=gate_common(word, prev_hidden, lstm_size)) input_gate = fluid.layers.sigmoid( x=gate_common(word, prev_hidden, lstm_size)) output_gate = fluid.layers.sigmoid( x=gate_common(word, prev_hidden, lstm_size)) cell_gate = fluid.layers.tanh( x=gate_common(word, prev_hidden, lstm_size)) cell = fluid.layers.sums(input=[ fluid.layers.elementwise_mul(x=forget_gate, y=prev_cell), fluid.layers.elementwise_mul(x=input_gate, y=cell_gate) ]) hidden = fluid.layers.elementwise_mul(x=output_gate, y=fluid.layers.tanh(x=cell)) rnn.update_memory(prev_cell, cell) rnn.update_memory(prev_hidden, hidden) rnn.output(hidden) last = fluid.layers.sequence_pool(rnn(), 'last') logit = fluid.layers.fc(input=last, size=2, act='softmax') loss = fluid.layers.cross_entropy(input=logit, label=fluid.layers.data(name='label', shape=[1], dtype='int64')) loss = fluid.layers.mean(x=loss) # add acc batch_size_tensor = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy(input=logit, label=fluid.layers.data(name='label', \ shape=[1], dtype='int64'), total=batch_size_tensor) adam = fluid.optimizer.Adam() adam.minimize(loss) fluid.memory_optimize(fluid.default_main_program()) place = fluid.CPUPlace() if args.device == 'CPU' else fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) train_reader = batch(paddle.reader.shuffle(crop_sentence( imdb.train(word_dict), args.crop_size), buf_size=25000), batch_size=args.batch_size) train_acc_kpi = None for kpi in tracking_kpis: if kpi.name == 'imdb_%s_train_acc' % (args.batch_size): train_acc_kpi = kpi train_speed_kpi = None for kpi in tracking_kpis: if kpi.name == 'imdb_%s_train_speed' % (args.batch_size): train_speed_kpi = kpi iters, num_samples, start_time = 0, 0, time.time() for pass_id in range(args.pass_num): train_accs = [] train_losses = [] for batch_id, data in enumerate(train_reader()): if iters == args.skip_batch_num: start_time = time.time() num_samples = 0 if iters == args.iterations: break tensor_words = to_lodtensor([x[0] for x in data], place) label = numpy.array([x[1] for x in data]).astype("int64") label = label.reshape((-1, 1)) loss_np, acc, weight = exe.run( fluid.default_main_program(), feed={ "words": tensor_words, "label": label }, fetch_list=[loss, batch_acc, batch_size_tensor]) iters += 1 for x in data: num_samples += len(x[0]) print( "Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" % (pass_id, iters, loss_np, acc) ) # The accuracy is the accumulation of batches, but not the current batch. train_elapsed = time.time() - start_time examples_per_sec = num_samples / train_elapsed print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' % (num_samples, train_elapsed, examples_per_sec)) train_speed_kpi.add_record(np.array(examples_per_sec, dtype='float32')) break train_speed_kpi.persist()
def run_benchmark(model, args): dshape, class_dim = get_data_shape(args) input = fluid.layers.data(name='data', shape=dshape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') predict = model(input, class_dim) cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(x=cost) fluid.default_main_program().seed = 1 fluid.default_startup_program().seed = 1 batch_size_tensor = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy(input=predict, label=label, total=batch_size_tensor) inference_program = fluid.default_main_program().clone() with fluid.program_guard(inference_program): inference_program = fluid.io.get_inference_program( target_vars=[batch_acc, batch_size_tensor]) optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9) opts = optimizer.minimize(avg_cost) fluid.memory_optimize(fluid.default_main_program()) # Init ParallelExecutor train_exe, test_exe = get_parallel_executor(args, avg_cost, fluid.default_main_program(), inference_program) # Prepare reader train_reader, test_reader = init_reader(args) def test(test_exe): test_accuracy = fluid.average.WeightedAverage() for batch_id, data in enumerate(test_reader()): if batch_id == args.iterations: break img_data = np.array(map(lambda x: x[0].reshape(dshape), data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("int64").reshape([-1, 1]) acc, weight = test_exe.run( fetch_list=[batch_acc.name, batch_size_tensor.name], feed={ "data": img_data, "label": y_data }) acc = float((acc * weight).sum() / weight.sum()) weight = int(weight.sum()) test_accuracy.add(value=acc, weight=weight) return test_accuracy.eval() im_num, total_train_time, total_iters = 0, 0.0, 0 accuracy = fluid.average.WeightedAverage() fetch_list = [avg_cost.name, batch_acc.name, batch_size_tensor.name] for pass_id in range(args.pass_num): every_pass_loss = [] accuracy.reset() iter, pass_duration = 0, 0.0 for batch_id, data in enumerate(train_reader()): batch_start = time.time() if iter == args.iterations: break image = np.array(map(lambda x: x[0].reshape(dshape), data)).astype('float32') label = np.array(map(lambda x: x[1], data)).astype('int64').reshape([-1, 1]) loss, acc, weight = train_exe.run(fetch_list=fetch_list, feed={ 'data': image, 'label': label }) acc = float((acc * weight).sum() / weight.sum()) loss = (loss * weight).sum() / weight.sum() weight = int(weight.sum()) accuracy.add(value=acc, weight=weight) if iter >= args.skip_batch_num or pass_id != 0: batch_duration = time.time() - batch_start pass_duration += batch_duration im_num += label.shape[0] every_pass_loss.append(loss) # print("Pass: %d, Iter: %d, loss: %s, acc: %s" % # (pass_id, iter, str(loss), str(acc))) iter += 1 total_iters += 1 total_train_time += pass_duration pass_train_acc = accuracy.eval() pass_test_acc = test(test_exe) print( "Pass:%d, Loss:%f, Train Accuray:%f, Test Accuray:%f, Handle Images Duration: %f\n" % (pass_id, np.mean(every_pass_loss), pass_train_acc, pass_test_acc, pass_duration)) record_kpi(pass_id, iter, pass_train_acc, total_train_time, im_num) examples_per_sec = im_num / total_train_time sec_per_batch = total_train_time / \ (iter * args.pass_num - args.skip_batch_num) print('\nTotal examples: %d, total time: %.5f' % (im_num, total_train_time)) print('%.5f examples/sec, %.5f sec/batch \n' % (examples_per_sec, sec_per_batch))
def check_network_convergence(self, method, memory_opt=True, iter=50, batch_size=None, allow_op_delay=False, feed_dict=None, seed=None, use_parallel_executor=True, balance_parameter_opt_between_cards=False): def run_executor(exe, feed, fetch_list, program=None): if isinstance(exe, fluid.ParallelExecutor): res = exe.run(fetch_list=fetch_list, feed=feed) elif isinstance(exe, fluid.Executor): if program is None: program = fluid.default_main_program() res = exe.run(program=program, feed=feed, fetch_list=fetch_list) else: raise ValueError('Unkown type exe') return res main = fluid.Program() startup = fluid.Program() startup.random_seed = 1 # Fix random seed with fluid.program_guard(main, startup): if seed is not None: startup.random_seed = seed loss = method(use_feed=feed_dict is not None) adam = fluid.optimizer.Adam() adam.minimize(loss) if memory_opt: fluid.memory_optimize(main) place = fluid.CUDAPlace(0) startup_exe = fluid.Executor(place) startup_exe.run(startup) exec_strategy = fluid.ExecutionStrategy() exec_strategy.allow_op_delay = allow_op_delay build_strategy = fluid.BuildStrategy() build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce if balance_parameter_opt_between_cards else fluid.BuildStrategy.ReduceStrategy.AllReduce if use_parallel_executor: exe = fluid.ParallelExecutor( True, loss_name=loss.name, exec_strategy=exec_strategy, build_strategy=build_strategy) else: exe = fluid.Executor(place=place) if batch_size is not None: batch_size *= fluid.core.get_cuda_device_count() begin = time.time() first_loss, = run_executor( exe=exe, feed=feed_dict, fetch_list=[loss.name]) first_loss = np.array(first_loss) for i in xrange(iter): run_executor(exe=exe, feed=feed_dict, fetch_list=[]) last_loss, = run_executor( exe=exe, feed=feed_dict, fetch_list=[loss.name]) end = time.time() if batch_size is not None: print "%.4f Instance per second" % ( (batch_size * iter + 2) / (end - begin)) last_loss = np.array(last_loss) print first_loss, last_loss # self.assertGreater(first_loss[0], last_loss[0]) return first_loss, last_loss
def train(args): # parse config config = parse_config(args.config) train_config = merge_configs(config, 'train', vars(args)) valid_config = merge_configs(config, 'valid', vars(args)) train_model = models.get_model(args.model_name, train_config, mode='train') valid_model = models.get_model(args.model_name, valid_config, mode='valid') # build model startup = fluid.Program() train_prog = fluid.Program() with fluid.program_guard(train_prog, startup): with fluid.unique_name.guard(): train_model.build_input(not args.no_use_pyreader) train_model.build_model() # for the input, has the form [data1, data2,..., label], so train_feeds[-1] is label train_feeds = train_model.feeds() train_feeds[-1].persistable = True # for the output of classification model, has the form [pred] train_outputs = train_model.outputs() for output in train_outputs: output.persistable = True train_loss = train_model.loss() train_loss.persistable = True # outputs, loss, label should be fetched, so set persistable to be true optimizer = train_model.optimizer() optimizer.minimize(train_loss) train_pyreader = train_model.pyreader() if not args.no_memory_optimize: fluid.memory_optimize(train_prog) valid_prog = fluid.Program() with fluid.program_guard(valid_prog, startup): with fluid.unique_name.guard(): valid_model.build_input(not args.no_use_pyreader) valid_model.build_model() valid_feeds = valid_model.feeds() valid_outputs = valid_model.outputs() valid_loss = valid_model.loss() valid_pyreader = valid_model.pyreader() place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup) if args.resume: # if resume weights is given, load resume weights directly assert os.path.exists(args.resume), \ "Given resume weight dir {} not exist.".format(args.resume) def if_exist(var): return os.path.exists(os.path.join(args.resume, var.name)) fluid.io.load_vars(exe, args.resume, predicate=if_exist, main_program=train_prog) else: # if not in resume mode, load pretrain weights if args.pretrain: assert os.path.exists(args.pretrain), \ "Given pretrain weight dir {} not exist.".format(args.pretrain) pretrain = args.pretrain or train_model.get_pretrain_weights() if pretrain: train_model.load_pretrain_params(exe, pretrain, train_prog, place) train_exe = fluid.ParallelExecutor( use_cuda=args.use_gpu, loss_name=train_loss.name, main_program=train_prog) valid_exe = fluid.ParallelExecutor( use_cuda=args.use_gpu, share_vars_from=train_exe, main_program=valid_prog) # get reader bs_denominator = 1 if (not args.no_use_pyreader) and args.use_gpu: bs_denominator = train_config.TRAIN.num_gpus train_config.TRAIN.batch_size = int(train_config.TRAIN.batch_size / bs_denominator) valid_config.VALID.batch_size = int(valid_config.VALID.batch_size / bs_denominator) train_reader = get_reader(args.model_name.upper(), 'train', train_config) valid_reader = get_reader(args.model_name.upper(), 'valid', valid_config) # get metrics train_metrics = get_metrics(args.model_name.upper(), 'train', train_config) valid_metrics = get_metrics(args.model_name.upper(), 'valid', valid_config) train_fetch_list = [train_loss.name] + [x.name for x in train_outputs ] + [train_feeds[-1].name] valid_fetch_list = [valid_loss.name] + [x.name for x in valid_outputs ] + [valid_feeds[-1].name] epochs = args.epoch_num or train_model.epoch_num() if args.no_use_pyreader: train_feeder = fluid.DataFeeder(place=place, feed_list=train_feeds) valid_feeder = fluid.DataFeeder(place=place, feed_list=valid_feeds) train_without_pyreader(exe, train_prog, train_exe, train_reader, train_feeder, train_fetch_list, train_metrics, epochs = epochs, log_interval = args.log_interval, valid_interval = args.valid_interval, save_dir = args.save_dir, save_model_name = args.model_name, test_exe = valid_exe, test_reader = valid_reader, test_feeder = valid_feeder, test_fetch_list = valid_fetch_list, test_metrics = valid_metrics) else: train_pyreader.decorate_paddle_reader(train_reader) valid_pyreader.decorate_paddle_reader(valid_reader) train_with_pyreader(exe, train_prog, train_exe, train_pyreader, train_fetch_list, train_metrics, epochs = epochs, log_interval = args.log_interval, valid_interval = args.valid_interval, save_dir = args.save_dir, save_model_name = args.model_name, test_exe = valid_exe, test_pyreader = valid_pyreader, test_fetch_list = valid_fetch_list, test_metrics = valid_metrics)
def train(conf_dict): """ train process """ # Get data layer data = layers.DataLayer() # Load network structure dynamically net = utils.import_class( "nets", conf_dict["net"]["module_name"], conf_dict["net"]["class_name"])(conf_dict) # Load loss function dynamically loss = utils.import_class( "losses", conf_dict["loss"]["module_name"], conf_dict["loss"]["class_name"])(conf_dict) # Load Optimization method optimizer = utils.import_class( "optimizers", "paddle_optimizers", conf_dict["optimizer"]["class_name"])(conf_dict) # Get service place = fluid.core.CPUPlace() if conf_dict["task_mode"] == "pairwise": # Build network left = data.ops(name="left", shape=[1], dtype="int64", lod_level=1) pos_right = data.ops(name="right", shape=[ 1], dtype="int64", lod_level=1) neg_right = data.ops(name="neg_right", shape=[ 1], dtype="int64", lod_level=1) left_feat, pos_score = net.predict(left, pos_right) _, neg_score = net.predict(left, neg_right) avg_cost = loss.compute(pos_score, neg_score) # Get Feeder and Reader feeder = fluid.DataFeeder(place=place, feed_list=[ left.name, pos_right.name, neg_right.name]) reader = data_reader.get_reader(conf_dict, False, None) else: # Build network left = data.ops(name="left", shape=[1], dtype="int64", lod_level=1) right = data.ops(name="right", shape=[1], dtype="int64", lod_level=1) label = data.ops(name="label", shape=[1], dtype="int64", lod_level=0) left_feat, pred = net.predict(left, right) avg_cost = loss.compute(pred, label) # Get Feeder and Reader feeder = fluid.DataFeeder(place=place, feed_list=[ left.name, right.name, label.name]) reader = data_reader.get_reader(conf_dict, False, None) # Save Infer model infer_program = fluid.default_main_program().clone() # operate Optimization optimizer.ops(avg_cost) # optimize memory fluid.memory_optimize(fluid.default_main_program()) executor = fluid.Executor(place) executor.run(fluid.default_startup_program()) # Get and run executor parallel_executor = fluid.ParallelExecutor( use_cuda=False, loss_name=avg_cost.name, main_program=fluid.default_main_program()) # Get device number device_count = parallel_executor.device_count logging.info("device count: %d" % device_count) # run train logging.info("start train process ...") for epoch_id in range(conf_dict["epoch_num"]): losses = [] # Get batch data iterator batch_data = paddle.batch(reader, conf_dict["batch_size"], drop_last=False) start_time = time.time() for iter, data in enumerate(batch_data()): if len(data) < device_count: continue avg_loss = parallel_executor.run( [avg_cost.name], feed=feeder.feed(data)) print("epoch: %d, iter: %d, loss: %f" % (epoch_id, iter, np.mean(avg_loss[0]))) losses.append(np.mean(avg_loss[0])) end_time = time.time() print("epoch: %d, loss: %f, used time: %d sec" % (epoch_id, np.mean(losses), end_time - start_time)) model_save_dir = conf_dict["model_path"] model_path = os.path.join(model_save_dir, str(epoch_id)) if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) if conf_dict["task_mode"] == "pairwise": feed_var_names = [left.name, pos_right.name] target_vars = [left_feat, pos_score] else: feed_var_names = [left.name, right.name] target_vars = [left_feat, pred] fluid.io.save_inference_model( model_path, feed_var_names, target_vars, executor, infer_program)
def train(): avg_cost, feeding_list = seq_to_seq_net(args.embedding_dim, args.encoder_size, args.decoder_size, args.dict_size, args.dict_size, False, beam_size=args.beam_size, max_length=args.max_length) # clone from default main program inference_program = fluid.default_main_program().clone() optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) optimizer.minimize(avg_cost) fluid.memory_optimize(fluid.default_main_program()) train_batch_generator = paddle.batch(paddle.reader.shuffle( paddle.dataset.wmt14.train(args.dict_size), buf_size=1000), batch_size=args.batch_size) test_batch_generator = paddle.batch(paddle.reader.shuffle( paddle.dataset.wmt14.test(args.dict_size), buf_size=1000), batch_size=args.batch_size) place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0) exe = Executor(place) exe.run(framework.default_startup_program()) def do_validation(): total_loss = 0.0 count = 0 for batch_id, data in enumerate(test_batch_generator()): src_seq = to_lodtensor(map(lambda x: x[0], data), place)[0] trg_seq = to_lodtensor(map(lambda x: x[1], data), place)[0] lbl_seq = to_lodtensor(map(lambda x: x[2], data), place)[0] fetch_outs = exe.run(inference_program, feed={ feeding_list[0]: src_seq, feeding_list[1]: trg_seq, feeding_list[2]: lbl_seq }, fetch_list=[avg_cost], return_numpy=False) total_loss += lodtensor_to_ndarray(fetch_outs[0])[0] count += 1 return total_loss / count iters, num_samples, start_time = 0, 0, time.time() for pass_id in xrange(args.pass_num): train_accs = [] train_losses = [] for batch_id, data in enumerate(train_batch_generator()): if iters == args.skip_batch_num: start_time = time.time() num_samples = 0 if iters == args.iterations: break src_seq, word_num = to_lodtensor(map(lambda x: x[0], data), place) num_samples += word_num trg_seq, word_num = to_lodtensor(map(lambda x: x[1], data), place) num_samples += word_num lbl_seq, _ = to_lodtensor(map(lambda x: x[2], data), place) fetch_outs = exe.run(framework.default_main_program(), feed={ feeding_list[0]: src_seq, feeding_list[1]: trg_seq, feeding_list[2]: lbl_seq }, fetch_list=[avg_cost]) iters += 1 loss = np.array(fetch_outs[0]) print( "Pass = %d, Iter = %d, Loss = %f" % (pass_id, iters, loss) ) # The accuracy is the accumulation of batches, but not the current batch. train_elapsed = time.time() - start_time examples_per_sec = num_samples / train_elapsed print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' % (num_samples, train_elapsed, examples_per_sec)) # evaluation if args.with_test: test_loss = do_validation() exit(0)
def train(args): print("pretraining start") ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() train_program = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_pyreader, next_sent_acc, mask_lm_loss, total_loss = create_model( pyreader_name='train_reader', ernie_config=ernie_config) scheduled_lr = optimization(loss=total_loss, warmup_steps=args.warmup_steps, num_train_steps=args.num_train_steps, learning_rate=args.learning_rate, train_program=train_program, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16, loss_scaling=args.loss_scaling) fluid.memory_optimize(input_program=train_program, skip_opt_set=[ next_sent_acc.name, mask_lm_loss.name, total_loss.name ]) test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_pyreader, next_sent_acc, mask_lm_loss, total_loss = create_model( pyreader_name='test_reader', ernie_config=ernie_config) test_prog = test_prog.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) print("Device count %d" % dev_count) print("theoretical memory usage: ") if args.in_tokens: print( fluid.contrib.memory_usage(program=train_program, batch_size=args.batch_size // args.max_seq_len)) else: print( fluid.contrib.memory_usage(program=train_program, batch_size=args.batch_size)) nccl2_num_trainers = 1 nccl2_trainer_id = 0 print("args.is_distributed:", args.is_distributed) if args.is_distributed: worker_endpoints_env = os.getenv("worker_endpoints") worker_endpoints = worker_endpoints_env.split(",") trainers_num = len(worker_endpoints) current_endpoint = os.getenv("current_endpoint") trainer_id = worker_endpoints.index(current_endpoint) if trainer_id == 0: print("train_id == 0, sleep 60s") time.sleep(60) print("worker_endpoints:{} trainers_num:{} current_endpoint:{} \ trainer_id:{}".format(worker_endpoints, trainers_num, current_endpoint, trainer_id)) # prepare nccl2 env. config = fluid.DistributeTranspilerConfig() config.mode = "nccl2" t = fluid.DistributeTranspiler(config=config) t.transpile(trainer_id, trainers=worker_endpoints_env, current_endpoint=current_endpoint, program=train_program, startup_program=startup_prog) nccl2_num_trainers = trainers_num nccl2_trainer_id = trainer_id exe = fluid.Executor(place) exe.run(startup_prog) if args.init_checkpoint and args.init_checkpoint != "": init_checkpoint(exe, args.init_checkpoint, train_program, args.use_fp16) data_reader = ErnieDataReader(filelist=args.train_filelist, batch_size=args.batch_size, vocab_path=args.vocab_path, voc_size=ernie_config['vocab_size'], epoch=args.epoch, max_seq_len=args.max_seq_len, generate_neg_sample=args.generate_neg_sample, in_tokens=args.in_tokens, is_bidirection=args.is_bidirection) exec_strategy = fluid.ExecutionStrategy() if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = dev_count exec_strategy.num_iteration_per_drop_scope = min(10, args.skip_steps) build_strategy = fluid.BuildStrategy() build_strategy.remove_unnecessary_lock = False train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, loss_name=total_loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy, main_program=train_program, num_trainers=nccl2_num_trainers, trainer_id=nccl2_trainer_id) if args.valid_filelist and args.valid_filelist != "": predict = predict_wrapper(args, exe, ernie_config, test_prog=test_prog, pyreader=test_pyreader, fetch_list=[ next_sent_acc.name, mask_lm_loss.name, total_loss.name ]) train_pyreader.decorate_tensor_provider(data_reader.data_generator()) train_pyreader.start() steps = 0 cost = [] lm_cost = [] acc = [] time_begin = time.time() while steps < args.num_train_steps: try: steps += nccl2_num_trainers skip_steps = args.skip_steps * nccl2_num_trainers if nccl2_trainer_id != 0: train_exe.run(fetch_list=[]) continue if steps % skip_steps != 0: train_exe.run(fetch_list=[]) else: each_next_acc, each_mask_lm_cost, each_total_cost, np_lr = train_exe.run( fetch_list=[ next_sent_acc.name, mask_lm_loss.name, total_loss.name, scheduled_lr.name ]) acc.extend(each_next_acc) lm_cost.extend(each_mask_lm_cost) cost.extend(each_total_cost) print("feed_queue size", train_pyreader.queue.size()) time_end = time.time() used_time = time_end - time_begin epoch, current_file_index, total_file, current_file, mask_type = data_reader.get_progress( ) print("current learning_rate:%f" % np_lr[0]) print( "epoch: %d, progress: %d/%d, step: %d, loss: %f, " "ppl: %f, next_sent_acc: %f, speed: %f steps/s, file: %s, mask_type: %s" % (epoch, current_file_index, total_file, steps, np.mean(np.array(cost)), np.mean(np.exp( np.array(lm_cost))), np.mean(np.array(acc)), skip_steps / used_time, current_file, mask_type)) cost = [] lm_cost = [] acc = [] time_begin = time.time() if steps % args.save_steps == 0: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) if args.valid_filelist and steps % args.validation_steps == 0: vali_cost, vali_lm_cost, vali_acc, vali_steps, vali_speed = predict( ) print("[validation_set] epoch: %d, step: %d, " "loss: %f, global ppl: %f, batch-averged ppl: %f, " "next_sent_acc: %f, speed: %f steps/s" % (epoch, steps, np.mean(np.array(vali_cost) / vali_steps), np.exp(np.mean(np.array(vali_lm_cost) / vali_steps)), np.mean(np.exp(np.array(vali_lm_cost) / vali_steps)), np.mean(np.array(vali_acc) / vali_steps), vali_speed)) except fluid.core.EOFException: train_pyreader.reset() break
places = fluid.layers.get_places(device_count=0, device_type=device_type) pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl) with pd.do(): x_ = pd.read_input(x) y_ = pd.read_input(y) y_predict = fluid.layers.fc(input=x_, size=1, act=None) cost = fluid.layers.square_error_cost(input=y_predict, label=y_) avg_cost = fluid.layers.mean(x=cost) pd.write_output(avg_cost) cost = pd() avg_cost = fluid.layers.mean(x=cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.01) sgd_optimizer.minimize(avg_cost) fluid.memory_optimize(fluid.default_main_program(), print_log=True) # fluid.release_memory(fluid.default_main_program()) BATCH_SIZE = 200 # fix the order of training data train_reader = paddle.batch( paddle.dataset.uci_housing.train(), batch_size=BATCH_SIZE) # train_reader = paddle.batch( # paddle.reader.shuffle( # paddle.dataset.uci_housing.train(), buf_size=500), # batch_size=BATCH_SIZE) feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) exe = fluid.Executor(place)
main_program=program) if __name__ == '__main__': parse = argparse.ArgumentParser(description='') parse.add_argument('--model', help='model name', nargs='?') args = parse.parse_args() model = args.model DataSet = create_reader(model) predict = create_model(model=model) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) fluid.memory_optimize(fluid.default_main_program()) load_model(exe, fluid.default_main_program(), model=model) batches = DataSet.get_batch_generator(1, 1234) for i, imgs, names in batches: result = exe.run(fluid.default_main_program(), feed={'img': imgs}, fetch_list=[predict]) print(i) path = path + 'data/unet/test/ColorImage/' + names[0].split( "image/")[1] picture = np.argmax(result[0], axis=1) picture = picture.reshape((1024, 1024)) saveImage(picture, path)
def main(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exe = fluid.Executor(place) reader = task_reader.ClassifyReader( vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=args.in_tokens, random_seed=args.random_seed) if not (args.do_train or args.do_val or args.do_test): raise ValueError("For args `do_train`, `do_val` and `do_test`, at " "least one of them must be True.") startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.do_train: train_data_generator = reader.data_generator( input_file=args.train_set, batch_size=args.batch_size, epoch=args.epoch, shuffle=True, phase="train") num_train_examples = reader.get_num_examples(args.train_set) if args.in_tokens: max_train_steps = args.epoch * num_train_examples // ( args.batch_size // args.max_seq_len) // dev_count else: max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count warmup_steps = int(max_train_steps * args.warmup_proportion) print("Device count: %d" % dev_count) print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) print("Num warmup steps: %d" % warmup_steps) train_program = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_pyreader, graph_vars = create_model( args, pyreader_name='train_reader', ernie_config=ernie_config) scheduled_lr = optimization( loss=graph_vars["loss"], warmup_steps=warmup_steps, num_train_steps=max_train_steps, learning_rate=args.learning_rate, train_program=train_program, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16, loss_scaling=args.loss_scaling) fluid.memory_optimize( input_program=train_program, skip_opt_set=[ graph_vars["loss"].name, graph_vars["probs"].name, graph_vars["accuracy"].name, graph_vars["num_seqs"].name, ]) if args.verbose: if args.in_tokens: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size // args.max_seq_len) else: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) print("Theoretical memory usage in training: %.3f - %.3f %s" % (lower_mem, upper_mem, unit)) if args.do_val or args.do_test: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_pyreader, graph_vars = create_model( args, pyreader_name='test_reader', ernie_config=ernie_config) test_prog = test_prog.clone(for_test=True) exe.run(startup_prog) if args.do_train: if args.init_checkpoint and args.init_pretraining_params: print( "WARNING: args 'init_checkpoint' and 'init_pretraining_params' " "both are set! Only arg 'init_checkpoint' is made valid.") if args.init_checkpoint: init_checkpoint( exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) elif args.init_pretraining_params: init_pretraining_params( exe, args.init_pretraining_params, main_program=startup_prog, use_fp16=args.use_fp16) elif args.do_val or args.do_test: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") init_checkpoint( exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) if args.do_train: exec_strategy = fluid.ExecutionStrategy() if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = dev_count exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope train_exe = fluid.ParallelExecutor( use_cuda=args.use_cuda, loss_name=graph_vars["loss"].name, exec_strategy=exec_strategy, main_program=train_program) train_pyreader.decorate_tensor_provider(train_data_generator) else: train_exe = None if args.do_train: train_pyreader.start() steps = 0 if warmup_steps > 0: graph_vars["learning_rate"] = scheduled_lr if args.save_log and args.log_path: if os.path.exists(args.log_path): raise FileExistsError("Logging file already exists!") with open(args.log_path, 'w') as logfile: logfile.write('%s\n' % time.asctime()) print('Writing logs into %s' % args.log_path) time_begin = time.time() while True: try: steps += 1 if steps % args.skip_steps != 0: train_exe.run(fetch_list=[]) else: outputs = evaluate(train_exe, train_program, train_pyreader, graph_vars, "train") if args.verbose: verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( ) verbose += "learning rate: %f" % ( outputs["learning_rate"] if warmup_steps > 0 else args.learning_rate) print(verbose) current_example, current_epoch = reader.get_train_progress() time_end = time.time() used_time = time_end - time_begin print("epoch: %d, progress: %d/%d, step: %d, " "ave loss: %.4f, micro_f1: %.4f, micro_p: %.4f, micro_r: %.4f, " "speed: %f steps/s" % (current_epoch, current_example, num_train_examples, steps, outputs["loss"], outputs["micro_f"], outputs["micro_p"], outputs["micro_r"], args.skip_steps / used_time)) # Todo: complete logging function # Todo: print more useful metrics: f1/p/r instead of acc if args.save_log and args.log_path: with open(args.log_path, 'a') as logfile: logfile.write("epoch: %d, progress: %d/%d, step: %d, " "ave loss: %.4f, ave_acc: %.4f, micro_f1: %.4f, micro_p: %.4f, micro_r: %.4f, " "speed: %f steps/s\n" % (current_epoch, current_example, num_train_examples, steps, outputs["loss"], outputs["accuracy"], outputs["micro_f"], outputs["micro_p"], outputs["micro_r"], args.skip_steps / used_time)) time_begin = time.time() if steps % args.save_steps == 0: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) if steps % args.validation_steps == 0: # evaluate dev set if args.do_val: test_pyreader.decorate_tensor_provider( reader.data_generator( args.dev_set, batch_size=args.batch_size, epoch=1, shuffle=False)) evaluate(exe, test_prog, test_pyreader, graph_vars, "dev") # evaluate test set if args.do_test: test_pyreader.decorate_tensor_provider( reader.data_generator( args.test_set, batch_size=args.batch_size, epoch=1, shuffle=False)) evaluate(exe, test_prog, test_pyreader, graph_vars, "test") except fluid.core.EOFException: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) train_pyreader.reset() break # final eval on dev set if args.do_val: test_pyreader.decorate_tensor_provider( reader.data_generator( args.dev_set, batch_size=args.batch_size, epoch=1, shuffle=False)) print("Final validation result:") evaluate(exe, test_prog, test_pyreader, graph_vars, "dev") # final eval on test set if args.do_test: test_pyreader.decorate_tensor_provider( reader.data_generator( args.test_set, batch_size=args.batch_size, epoch=1, shuffle=False)) print("Final test result:") evaluate(exe, test_prog, test_pyreader, graph_vars, "test")
def main(): if args.data_set == "cifar10": classdim = 10 if args.data_format == 'NCHW': data_shape = [3, 32, 32] else: data_shape = [32, 32, 3] else: classdim = 102 if args.data_format == 'NCHW': data_shape = [3, 224, 224] else: data_shape = [224, 224, 3] # Input data images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # Train program net = vgg16_bn_drop(images) predict = fluid.layers.fc(input=net, size=classdim, act='softmax') cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(x=cost) # Evaluator batch_size_tensor = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy(input=predict, label=label, total=batch_size_tensor) # inference program inference_program = fluid.default_main_program().clone() with fluid.program_guard(inference_program): inference_program = fluid.io.get_inference_program( target_vars=[batch_acc, batch_size_tensor]) # Optimization optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) opts = optimizer.minimize(avg_cost) fluid.memory_optimize(fluid.default_main_program()) # Initialize executor place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0) exe = fluid.Executor(place) # Parameter initialization exe.run(fluid.default_startup_program()) # data reader train_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.cifar.train10() if args.data_set == 'cifar10' else paddle.dataset.flowers.train(), buf_size=5120), batch_size=args.batch_size) test_reader = paddle.batch(paddle.dataset.cifar.test10() if args.data_set == 'cifar10' else paddle.dataset.flowers.test(), batch_size=args.batch_size) # test def test(exe): test_accuracy = fluid.average.WeightedAverage() for batch_id, data in enumerate(test_reader()): img_data = np.array(map(lambda x: x[0].reshape(data_shape), data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = y_data.reshape([-1, 1]) acc, weight = exe.run(inference_program, feed={ "pixel": img_data, "label": y_data }, fetch_list=[batch_acc, batch_size_tensor]) test_accuracy.add(value=acc, weight=weight) return test_accuracy.eval() iters, num_samples, start_time = 0, 0, time.time() accuracy = fluid.average.WeightedAverage() for pass_id in range(args.pass_num): accuracy.reset() train_accs = [] train_losses = [] for batch_id, data in enumerate(train_reader()): if iters == args.skip_batch_num: start_time = time.time() num_samples = 0 if iters == args.iterations: break img_data = np.array(map(lambda x: x[0].reshape(data_shape), data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = y_data.reshape([-1, 1]) loss, acc, weight = exe.run( fluid.default_main_program(), feed={ "pixel": img_data, "label": y_data }, fetch_list=[avg_cost, batch_acc, batch_size_tensor]) accuracy.add(value=acc, weight=weight) iters += 1 num_samples += len(y_data) print( "Pass = %d, Iter = %d, Loss = %f, Accuracy = %f" % (pass_id, iters, loss, acc) ) # The accuracy is the accumulation of batches, but not the current batch. # pass_train_acc = accuracy.eval() train_losses.append(loss) train_accs.append(acc) print("Pass: %d, Loss: %f, Train Accuray: %f\n" % (pass_id, np.mean(train_losses), np.mean(train_accs))) train_elapsed = time.time() - start_time examples_per_sec = num_samples / train_elapsed print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' % (num_samples, train_elapsed, examples_per_sec)) # evaluation if args.with_test: pass_test_acc = test(exe) exit(0)
def _build_env(self): if self.env.is_inititalized: return self._build_env_start_event() self.env.is_inititalized = True self.env.main_program = clone_program(self._base_main_program, for_test=False) self.env.startup_program = fluid.Program() with fluid.program_guard(self.env.main_program, self._base_startup_program): with fluid.unique_name.guard(self.env.UNG): self.env.outputs = self._build_net() if self.is_train_phase or self.is_test_phase: self.env.labels = self._add_label() self.env.loss = self._add_loss() self.env.metrics = self._add_metrics() if self.is_predict_phase or self.is_test_phase: self.env.main_program = clone_program(self.env.main_program, for_test=True) hub.common.paddle_helper.set_op_attr(self.env.main_program, is_test=True) if self.config.use_pyreader: t_program = fluid.Program() with fluid.program_guard(t_program, self.env.startup_program): self.env.py_reader = fluid.layers.py_reader( capacity=64, shapes=[var.shape for var in self.feed_var_list], dtypes=[ dtype_map[var.dtype] for var in self.feed_var_list ], lod_levels=[var.lod_level for var in self.feed_var_list], use_double_buffer=False) feed_var_list = self.feed_var_list py_vars = fluid.layers.read_file(self.env.py_reader) py_vars = to_list(py_vars) input_dict = { feed_var_list[index].name: py_var for index, py_var in enumerate(py_vars) } hub.connect_program(pre_program=t_program, next_program=self.env.main_program, input_dict=input_dict, need_log=False) self.env.main_program = t_program if not self.is_predict_phase: self.env.loss = self.env.main_program.global_block().vars[ self.env.loss.name] metrics_name = [var.name for var in self.env.metrics] self.env.metrics = [ self.env.main_program.global_block().vars[name] for name in metrics_name ] outputs_name = [var.name for var in self.env.outputs] self.env.outputs = [ self.env.main_program.global_block().vars[name] for name in outputs_name ] if self.config.enable_memory_optim: for var_name in self.fetch_list: var = self.env.main_program.global_block().vars[var_name] var.persistable = True if self.is_train_phase: with fluid.program_guard(self.env.main_program, self._base_startup_program): with fluid.unique_name.guard(self.env.UNG): self.config.strategy.execute(self.loss, self._base_data_reader, self.config) if self.is_train_phase: loss_name = self.env.loss.name share_vars_from = None else: loss_name = None if self._base_compiled_program is None: share_vars_from = None else: share_vars_from = self._base_compiled_program if not self.config.use_data_parallel: if self.config.enable_memory_optim: fluid.memory_optimize(self.env.main_program) self.env.main_program_compiled = None else: self.env.main_program_compiled = fluid.CompiledProgram( self.env.main_program).with_data_parallel( loss_name=loss_name, share_vars_from=share_vars_from, build_strategy=self.build_strategy) if self._base_compiled_program is None: self._base_compiled_program = self.env.main_program_compiled self.exe.run(self.env.startup_program) self._build_env_end_event()