def append_nccl2_prepare(trainer_id): if trainer_id >= 0: # append gen_nccl_id at the end of startup program trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) port = os.getenv("PADDLE_PSERVER_PORT") worker_ips = os.getenv("PADDLE_TRAINER_IPS") worker_endpoints = [] for ip in worker_ips.split(","): worker_endpoints.append(':'.join([ip, port])) num_trainers = len(worker_endpoints) current_endpoint = os.getenv("PADDLE_CURRENT_IP") + ":" + port worker_endpoints.remove(current_endpoint) nccl_id_var = fluid.default_startup_program().global_block().create_var( name="NCCLID", persistable=True, type=fluid.core.VarDesc.VarType.RAW) fluid.default_startup_program().global_block().append_op( type="gen_nccl_id", inputs={}, outputs={"NCCLID": nccl_id_var}, attrs={ "endpoint": current_endpoint, "endpoint_list": worker_endpoints, "trainer_id": trainer_id }) return nccl_id_var, num_trainers, trainer_id else: raise Exception("must set positive PADDLE_TRAINER_ID env variables for " "nccl-based dist train.")
def test_main(self): N = 10 img_expected_res = [] lbl_expected_res = [] with fluid.program_guard(fluid.Program(), fluid.Program()): data_file = fluid.layers.io.open_recordio_file( './mnist_for_preprocessor_test.recordio', shapes=[[-1, 784], [-1, 1]], lod_levels=[0, 0], dtypes=['float32', 'int64']) img, lbl = fluid.layers.io.read_file(data_file) if fluid.core.is_compiled_with_cuda(): place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) for _ in range(N): img_v, lbl_v = exe.run(fetch_list=[img, lbl]) img_expected_res.append(img_v / 2) lbl_expected_res.append(lbl_v + 1) img_actual_res = [] lbl_actual_res = [] with fluid.program_guard(fluid.Program(), fluid.Program()): data_file = fluid.layers.io.open_recordio_file( './mnist_for_preprocessor_test.recordio', shapes=[[-1, 784], [-1, 1]], lod_levels=[0, 0], dtypes=['float32', 'int64']) preprocessor = fluid.layers.io.Preprocessor(reader=data_file) with preprocessor.block(): img, lbl = preprocessor.inputs() img_out = img / 2 lbl_out = lbl + 1 preprocessor.outputs(img_out, lbl_out) data_file = fluid.layers.io.double_buffer(preprocessor()) img, lbl = fluid.layers.io.read_file(data_file) if fluid.core.is_compiled_with_cuda(): place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) for _ in range(N): img_v, lbl_v = exe.run(fetch_list=[img, lbl]) img_actual_res.append(img_v) lbl_actual_res.append(lbl_v) for idx in range(N): np.allclose(img_expected_res[idx], img_actual_res[idx]) np.allclose(lbl_expected_res[idx], lbl_actual_res[idx])
def train_loop(main_program): exe.run(fluid.default_startup_program()) embedding_param = fluid.global_scope().find_var( embedding_name).get_tensor() embedding_param.set( load_parameter(conll05.get_embedding(), word_dict_len, word_dim), place) start_time = time.time() batch_id = 0 for pass_id in xrange(PASS_NUM): for data in train_data(): cost = exe.run(main_program, feed=feeder.feed(data), fetch_list=[avg_cost]) cost = cost[0] if batch_id % 10 == 0: print("avg_cost:" + str(cost)) if batch_id != 0: print("second per batch: " + str((time.time( ) - start_time) / batch_id)) # Set the threshold low to speed up the CI test if float(cost) < 60.0: if save_dirname is not None: # TODO(liuyiqun): Change the target to crf_decode fluid.io.save_inference_model(save_dirname, [ 'word_data', 'verb_data', 'ctx_n2_data', 'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data', 'ctx_p2_data', 'mark_data' ], [feature_out], exe) return batch_id = batch_id + 1
def dist_transpile(trainer_id): if trainer_id < 0: return None, None # the port of all pservers, needed by both trainer and pserver port = os.getenv("PADDLE_PSERVER_PORT", "6174") # comma separated ips of all pservers, needed by trainer and # pserver pserver_ips = os.getenv("PADDLE_PSERVER_IPS", "") eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # total number of workers/trainers in the job, needed by # trainer and pserver trainers = int(os.getenv("PADDLE_TRAINERS")) # the IP of the local machine, needed by pserver only current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port # the role, should be either PSERVER or TRAINER training_role = os.getenv("PADDLE_TRAINING_ROLE") t = distribute_transpiler.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": pserver_program = t.get_pserver_program(current_endpoint) pserver_startup_program = t.get_startup_program(current_endpoint, pserver_program) return pserver_program, pserver_startup_program elif training_role == "TRAINER": train_program = t.get_trainer_program() return train_program, fluid.default_startup_program() else: raise ValueError( 'TRAINING_ROLE environment variable must be either TRAINER or PSERVER' )
def main(self, thread_num): file_list = [ './mnist_0.recordio', './mnist_1.recordio', './mnist_2.recordio' ] with fluid.program_guard(fluid.Program(), fluid.Program()): data_files = fluid.layers.open_files( filenames=file_list, thread_num=thread_num, shapes=[(-1, 784), (-1, 1)], lod_levels=[0, 0], dtypes=['float32', 'int64']) img, label = fluid.layers.read_file(data_files) if fluid.core.is_compiled_with_cuda(): place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) batch_count = 0 while True: try: img_val, = exe.run(fetch_list=[img]) except fluid.core.EnforceNotMet as ex: self.assertIn("There is no next data.", ex.message) break batch_count += 1 self.assertLessEqual(img_val.shape[0], self.batch_size) self.assertEqual(batch_count, self.num_batch * 3)
def train_loop(main_program): exe.run(fluid.default_startup_program()) loss = 0.0 for pass_id in range(PASS_NUM): for batch_id, data in enumerate(train_reader()): exe.run(main_program, feed=feeder.feed(data)) if (batch_id % 10) == 0: acc_list = [] avg_loss_list = [] for tid, test_data in enumerate(test_reader()): loss_t, acc_t = exe.run(program=test_program, feed=feeder.feed(test_data), fetch_list=[avg_cost, acc]) if math.isnan(float(loss_t)): sys.exit("got NaN loss, training failed.") acc_list.append(float(acc_t)) avg_loss_list.append(float(loss_t)) break # Use 1 segment for speeding up CI acc_value = numpy.array(acc_list).mean() avg_loss_value = numpy.array(avg_loss_list).mean() print( 'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'. format(pass_id, batch_id + 1, float(avg_loss_value), float(acc_value))) if acc_value > 0.01: # Low threshold for speeding up CI fluid.io.save_inference_model(save_dirname, ["pixel"], [predict], exe) return
def test_calc_gradient(self): x = layers.create_parameter(dtype="float32", shape=[5, 10]) y = layers.create_parameter(dtype="float32", shape=[10, 8]) mul_out = layers.mul(x=x, y=y) mean_out = layers.mean(mul_out) a = calc_gradient(mean_out, mul_out) b = calc_gradient(mean_out, x) place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) exe.run(fluid.default_main_program(), feed={}, fetch_list=[a, b])
def main(): args = parse_args() print_arguments(args) # the unique trainer id, starting from 0, needed by trainer # only nccl_id_var, num_trainers, trainer_id = ( None, 1, int(os.getenv("PADDLE_TRAINER_ID", "-1"))) if args.use_cprof: pr = cProfile.Profile() pr.enable() model_def = __import__("models.%s" % args.model, fromlist=["models"]) train_args = list(model_def.get_model(args)) train_args.append(args) # Run optimizer.minimize(avg_loss) train_args[2].minimize(train_args[0]) if args.memory_optimize: fluid.memory_optimize(fluid.default_main_program()) if args.update_method == "pserver": train_prog, startup_prog = dist_transpile(trainer_id) if not train_prog: raise Exception( "Must configure correct environments to run dist train.") train_args.extend([train_prog, startup_prog]) if args.gpus > 1 and os.getenv("PADDLE_TRAINING_ROLE") == "TRAINER": train_args.extend([nccl_id_var, num_trainers, trainer_id]) train_parallel(*train_args) train(*train_args) exit(0) # for other update methods, use default programs train_args.append(fluid.default_main_program()) train_args.append(fluid.default_startup_program()) if args.update_method == "nccl2": nccl_id_var, num_trainers, trainer_id = append_nccl2_prepare(trainer_id) if args.gpus == 1: # NOTE: parallel executor use profiler interanlly if args.use_nvprof and args.device == 'GPU': with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: train(*train_args) else: train(*train_args) else: if args.device == "CPU": raise Exception("Only support GPU perf with parallel exe") train_args.extend([nccl_id_var, num_trainers, trainer_id]) train_parallel(*train_args)
def run_program(self): outputs = [] places = [core.CPUPlace()] if core.is_compiled_with_cuda(): places.append(core.CUDAPlace(0)) for place in places: self.set_inputs(place) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) output = exe.run(fluid.default_main_program(), feed=self.inputs, fetch_list=self.fetch_list, return_numpy=False) outputs.append(output) self.actual_outputs = outputs
def run_program(self): """Run the test program. """ places = [core.CPUPlace()] if core.is_compiled_with_cuda(): places.append(core.CUDAPlace(0)) for place in places: self.set_inputs(place) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) output = exe.run(fluid.default_main_program(), feed=self.inputs, fetch_list=self.fetch_list, return_numpy=True) self.op_output = output
def train_loop(main_program): exe.run(fluid.default_startup_program()) for pass_id in xrange(PASS_NUM): for data in train_data(): cost_val, acc_val = exe.run(main_program, feed=feeder.feed(data), fetch_list=[cost, acc_out]) print("cost=" + str(cost_val) + " acc=" + str(acc_val)) if cost_val < 0.4 and acc_val > 0.8: if save_dirname is not None: fluid.io.save_inference_model(save_dirname, ["words"], prediction, exe) return if math.isnan(float(cost_val)): sys.exit("got NaN loss, training failed.") raise AssertionError("Cost is too large for {0}".format( net_method.__name__))
def train_loop(main_program): exe.run(fluid.default_startup_program()) for pass_id in range(PASS_NUM): for data in train_reader(): avg_cost_np = exe.run(main_program, feed=feeder.feed(data), fetch_list=[avg_cost]) if avg_cost_np[0] < 5.0: if save_dirname is not None: fluid.io.save_inference_model(save_dirname, [ 'firstw', 'secondw', 'thirdw', 'forthw' ], [predict_word], exe) return if math.isnan(float(avg_cost_np[0])): sys.exit("got NaN loss, training failed.") raise AssertionError("Cost is too large {0:2.2}".format(avg_cost_np[0]))
def test_nvprof(self): if not fluid.core.is_compiled_with_cuda(): return epoc = 8 dshape = [4, 3, 28, 28] data = layers.data(name='data', shape=[3, 28, 28], dtype='float32') conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1]) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) output_file = 'cuda_profiler.txt' with profiler.cuda_profiler(output_file, 'csv') as nvprof: for i in range(epoc): input = np.random.random(dshape).astype('float32') exe.run(fluid.default_main_program(), feed={'data': input}) os.remove(output_file)
def train_loop(main_program): feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) exe.run(fluid.default_startup_program()) PASS_NUM = 100 for pass_id in range(PASS_NUM): for data in train_reader(): avg_loss_value, = exe.run(main_program, feed=feeder.feed(data), fetch_list=[avg_cost]) print(avg_loss_value) if avg_loss_value[0] < 10.0: if save_dirname is not None: fluid.io.save_inference_model(save_dirname, ['x'], [y_predict], exe) return if math.isnan(float(avg_loss_value)): sys.exit("got NaN loss, training failed.") raise AssertionError("Fit a line cost is too large, {0:2.2}".format( avg_loss_value[0]))
def test_main(self, decorator_callback=None): # use new program with fluid.program_guard(fluid.Program(), fluid.Program()): data_file = fluid.layers.open_recordio_file( './mnist.recordio', shapes=[[-1, 784], [-1, 1]], lod_levels=[0, 0], dtypes=['float32', 'int64']) if decorator_callback is not None: data_file = decorator_callback(data_file) img, label = fluid.layers.read_file(data_file) hidden = fluid.layers.fc(input=img, size=100, act='tanh') prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) avg_loss = fluid.layers.mean(loss) fluid.optimizer.Adam(learning_rate=1e-3).minimize(avg_loss) if fluid.core.is_compiled_with_cuda(): place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) avg_loss_np = [] # train a pass batch_id = 0 while True: try: tmp, = exe.run(fetch_list=[avg_loss]) except fluid.core.EnforceNotMet as ex: self.assertIn("There is no next data.", ex.message) break avg_loss_np.append(tmp) batch_id += 1 self.assertEqual(batch_id, self.num_batches) self.assertLess(avg_loss_np[-1], avg_loss_np[0])
def check_result(self, fn, place, dtype): shape = [9, 10] x_data = np.random.random(size=shape).astype(dtype) y_data = np.random.random(size=shape).astype(dtype) python_out = fn(x_data, y_data) x_var = layers.create_global_var( name='x', shape=shape, value=0.0, dtype=dtype, persistable=True) y_var = layers.create_global_var( name='y', shape=shape, value=0.0, dtype=dtype, persistable=True) out = fn(x_var, y_var) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) fluid_out = exe.run(fluid.default_main_program(), feed={'x': x_data, 'y': y_data}, fetch_list=[out]) np.testing.assert_array_equal(python_out, fluid_out[0])
def train_loop(main_program): exe.run(fluid.default_startup_program()) PASS_NUM = 100 for pass_id in range(PASS_NUM): for batch_id, data in enumerate(train_reader()): # train a mini-batch, fetch nothing exe.run(main_program, feed=feeder.feed(data)) if (batch_id + 1) % 10 == 0: acc_set = [] avg_loss_set = [] for test_data in test_reader(): acc_np, avg_loss_np = exe.run( program=test_program, feed=feeder.feed(test_data), fetch_list=[acc, avg_loss]) acc_set.append(float(acc_np)) avg_loss_set.append(float(avg_loss_np)) # get test acc and loss acc_val = numpy.array(acc_set).mean() avg_loss_val = numpy.array(avg_loss_set).mean() if float(acc_val ) > 0.2: # Smaller value to increase CI speed if save_dirname is not None: fluid.io.save_inference_model( save_dirname, ["img"], [prediction], exe, model_filename=model_filename, params_filename=params_filename) return else: print( 'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'. format(pass_id, batch_id + 1, float(avg_loss_val), float(acc_val))) if math.isnan(float(avg_loss_val)): sys.exit("got NaN loss, training failed.") raise AssertionError("Loss of recognize digits is too large")
def train_loop(main_program): exe.run(fluid.default_startup_program()) for pass_id in xrange(PASS_NUM): train_loss_set = [] train_acc_set = [] # Calculate average training loss and accuracy # across all mini-batches in the training set for batch_id, data in enumerate(train_reader()): cost_val, acc_val = exe.run(main_program, feed=feeder.feed(data), fetch_list=[cost, acc_out]) train_loss_set.append(float(cost_val)) train_acc_set.append(float(acc_val)) train_loss = np.array(train_loss_set).mean() train_acc = np.array(train_acc_set).mean() * 100 # Calculate average valication loss and accuracy # across all mini-batches in the validation set acc_set = [] avg_loss_set = [] for tid, test_data in enumerate(test_reader()): avg_loss_np, acc_np = exe.run( program=test_program, feed=feeder.feed(test_data), fetch_list=[cost, acc_out]) acc_set.append(float(acc_np)) avg_loss_set.append(float(avg_loss_np)) acc_val = np.array(acc_set).mean() * 100 avg_loss_val = np.array(avg_loss_set).mean() print("Epoch =", pass_id, ", train-accuracy =", train_acc, ", train-loss =", train_loss, ", validation-accuracy =", acc_val, ", validation-loss =", avg_loss_val) if acc_val > target_val_acc: ## Exit the program on reaching desired accuracy value break
def build_model(self): img = fluid.layers.data(name='img', shape=[784], dtype='float32') condition = fluid.layers.data(name='condition', shape=[1], dtype='float32') noise = fluid.layers.data(name='noise', shape=[self.cfg.noise_size], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='float32') g_trainer = GTrainer(noise, condition, self.cfg) d_trainer = DTrainer(img, condition, label, self.cfg) # prepare environment place = fluid.CUDAPlace(0) if self.cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) const_n = np.random.uniform( low=-1.0, high=1.0, size=[self.cfg.batch_size, self.cfg.noise_size]).astype('float32') if self.cfg.init_model: utility.init_checkpoints(self.cfg, exe, g_trainer, "net_G") utility.init_checkpoints(self.cfg, exe, d_trainer, "net_D") ### memory optim build_strategy = fluid.BuildStrategy() build_strategy.enable_inplace = True build_strategy.memory_optimize = False g_trainer_program = fluid.CompiledProgram( g_trainer.program).with_data_parallel( loss_name=g_trainer.g_loss.name, build_strategy=build_strategy) d_trainer_program = fluid.CompiledProgram( d_trainer.program).with_data_parallel( loss_name=d_trainer.d_loss.name, build_strategy=build_strategy) t_time = 0 losses = [[], []] for epoch_id in range(self.cfg.epoch): for batch_id, data in enumerate(self.train_reader()): if len(data) != self.cfg.batch_size: continue noise_data = np.random.uniform( low=-1.0, high=1.0, size=[self.cfg.batch_size, self.cfg.noise_size]).astype('float32') real_image = np.array(list(map(lambda x: x[0], data))).reshape( [-1, 784]).astype('float32') condition_data = np.array([x[1] for x in data ]).reshape([-1, 1]).astype('float32') real_label = np.ones(shape=[real_image.shape[0], 1], dtype='float32') fake_label = np.zeros(shape=[real_image.shape[0], 1], dtype='float32') s_time = time.time() generate_image = exe.run(g_trainer.infer_program, feed={ 'noise': noise_data, 'condition': condition_data }, fetch_list=[g_trainer.fake]) d_real_loss = exe.run(d_trainer_program, feed={ 'img': real_image, 'condition': condition_data, 'label': real_label }, fetch_list=[d_trainer.d_loss])[0] d_fake_loss = exe.run(d_trainer_program, feed={ 'img': generate_image, 'condition': condition_data, 'label': fake_label }, fetch_list=[d_trainer.d_loss])[0] d_loss = d_real_loss + d_fake_loss losses[1].append(d_loss) for _ in six.moves.xrange(self.cfg.num_generator_time): g_loss = exe.run(g_trainer_program, feed={ 'noise': noise_data, 'condition': condition_data }, fetch_list=[g_trainer.g_loss])[0] losses[0].append(g_loss) batch_time = time.time() - s_time t_time += batch_time if batch_id % self.cfg.print_freq == 0: image_path = self.cfg.output + '/images' if not os.path.exists(image_path): os.makedirs(image_path) generate_const_image = exe.run(g_trainer.infer_program, feed={ 'noise': const_n, 'condition': condition_data }, fetch_list={g_trainer.fake })[0] generate_image_reshape = np.reshape( generate_const_image, (self.cfg.batch_size, -1)) total_images = np.concatenate( [real_image, generate_image_reshape]) fig = utility.plot(total_images) print( 'Epoch ID={} Batch ID={} D_loss={} G_loss={} Batch_time_cost={:.2f}' .format(epoch_id, batch_id, d_loss[0], g_loss[0], batch_time)) plt.title('Epoch ID={}, Batch ID={}'.format( epoch_id, batch_id)) plt.savefig('{}/{:04d}_{:04d}.png'.format( image_path, epoch_id, batch_id), bbox_inches='tight') plt.close(fig) if self.cfg.save_checkpoints: utility.checkpoints(epoch_id, self.cfg, exe, g_trainer, "net_G") utility.checkpoints(epoch_id, self.cfg, exe, d_trainer, "net_D")
def _init_train(self): instances = self.instances Backbone = self.Backbone bb_conf = self.bb_conf bb_name = self.bb_name dev_count = self.dev_count num_instances = len(instances) mrs = self.mrs branch = fluid.data(name="branch", shape=[1], dtype='int64') # set first_target/main task instance main_inst = None for inst in instances: if inst.is_target: main_inst = inst inst.is_first_target = True break main_conf = main_inst.config if not os.path.exists(main_conf['save_path']): os.makedirs(main_conf['save_path']) os.makedirs(os.path.join(main_conf['save_path'], 'ckpt')) # prepare backbone train_backbone = Backbone(bb_conf, phase='train') pred_backbone = Backbone(bb_conf, phase='pred') # create reader, task # then check i/o across reader, backbone and task_layer # check_fns = {} task_attrs = {} pred_task_attrs = [] joint_input_names = {} joint_shape_and_dtypes = {} name_to_position = {} for i in range(num_instances): # def check_tasks(): # i = s # def checkeach(): train_reader = instances[i].Reader(instances[i].config, phase='train') instances[i].reader['train'] = train_reader train_parad = instances[i].Paradigm(instances[i].config, phase='train', backbone_config=bb_conf) instances[i].task_layer['train'] = train_parad task_attr_from_reader = _encode_inputs( train_parad.inputs_attrs['reader'], instances[i].name) task_attrs[i] = task_attr_from_reader _check_io(train_backbone.inputs_attr, train_reader.outputs_attr, in_name=bb_name + '_backbone', out_name='reader.train') _check_io(train_parad.inputs_attrs['reader'], train_reader.outputs_attr, in_name='task_paradigm.train.reader', out_name='reader.train') _check_io(train_parad.inputs_attrs['backbone'], train_backbone.outputs_attr, in_name='task_paradigm.train.backbone', out_name=bb_name + '_backbone') # merge reader input attrs from backbone and task_instances # pred_joint_input_names = [] # pred_joint_shape_and_dtypes = [] if instances[i].is_target: if 'pred_file' not in instances[i].config: instances[i].config['pred_file'] = '' pred_reader = instances[i].Reader(instances[i].config, phase='pred') pred_parad = instances[i].Paradigm(instances[i].config, phase='pred', backbone_config=bb_conf) instances[i].task_layer['pred'] = pred_parad task_attr_from_reader = _encode_inputs( pred_parad.inputs_attrs['reader'], instances[i].name) pred_task_attrs.append(task_attr_from_reader) _check_io(pred_backbone.inputs_attr, pred_reader.outputs_attr, in_name=bb_name + '_backbone', out_name='reader.pred') _check_io(pred_parad.inputs_attrs['reader'], pred_reader.outputs_attr, in_name='task_paradigm.pred.reader', out_name='reader.pred') _check_io(pred_parad.inputs_attrs['backbone'], pred_backbone.outputs_attr, in_name='task_paradigm.pred.backbone', out_name=bb_name + '_backbone') # pred_joint_input_names, pred_joint_shape_and_dtypes, _ = merge_input_attrs(pred_backbone.inputs_attr, pred_task_attrs, insert_taskid=False, insert_batchsize=False, insert_seqlen=False, insert_batchsize_x_seqlen=False) # return joint_input_names[i], joint_shape_and_dtypes[i], name_to_position[i], pred_joint_input_names, pred_joint_shape_and_dtypes # return checkeach # check_fns[i] = check_tasks() joint_input_names[i], joint_shape_and_dtypes[i], name_to_position[ i] = merge_input_attrs(train_backbone.inputs_attr, task_attrs[i]) pred_joint_input_names, pred_joint_shape_and_dtypes, _ = merge_input_attrs( pred_backbone.inputs_attr, pred_task_attrs, insert_taskid=False, insert_batchsize=False, insert_seqlen=False, insert_batchsize_x_seqlen=False) # shapes: [task_id, shapes_of_backbone, shapes_of_inst1, ..., shapes_of_instN] if DEBUG: print('----- for debug -----') print('joint input names:') print(joint_input_names) print('joint input shape and dtypes:') print(joint_shape_and_dtypes) # load data data_fns = {} for i in range(num_instances): print(instances[i].name + ": preparing data...", end='') instances[i].reader['train'].load_data() print('ok!') # merge dataset iterators and create net input vars iterators = [] prefixes = [] mrs = [] for inst in instances: iterators.append(inst.reader['train'].iterator()) prefixes.append(inst.name) mrs.append(inst.mix_ratio) joint_iterator_fn = create_joint_iterator_fn(iterators, prefixes, joint_shape_and_dtypes, mrs, name_to_position, dev_count=dev_count, verbose=VERBOSE, return_type='dict') self._joint_iterator_fn = joint_iterator_fn input_attrs = {} net_inputs = {} bb_output_vars = {} bb_output_fns = {} # prepare predict vars for saving inference model pred_input_attrs = [[i, j, k] for i, ( j, k) in zip(pred_joint_input_names, pred_joint_shape_and_dtypes)] pred_prog = fluid.Program() pred_init_prog = fluid.Program() self._pred_prog = pred_prog with fluid.program_guard(main_program=pred_prog, startup_program=pred_init_prog): pred_net_inputs = create_net_inputs(pred_input_attrs) pred_bb_output_vars = pred_backbone.build( pred_net_inputs, scope_name='__paddlepalm_') task_inputs = {} task_output_vars = {} task_fns = {} def get_loss(i): input_attrs[i] = [[m, j, k] for m, ( j, k) in zip(joint_input_names[i], joint_shape_and_dtypes[i])] net_inputs[i] = create_net_inputs(input_attrs[i], async=False) # net_inputs = create_net_inputs(input_attrs, async=True, iterator_fn=joint_iterator_fn, dev_count=dev_count, n_prefetch=3) bb_output_vars[i] = train_backbone.build( net_inputs[i], scope_name='__paddlepalm_') assert sorted(bb_output_vars[i].keys()) == sorted( train_backbone.outputs_attr.keys()) # build backbone and task layers task_inputs[i] = {'backbone': bb_output_vars[i]} task_inputs_from_reader = _decode_inputs(net_inputs[i], instances[i].name) task_inputs[i]['reader'] = task_inputs_from_reader scope = instances[i].task_reuse_scope + '/' with fluid.unique_name.guard(scope): output_vars = instances[i].build_task_layer(task_inputs[i], phase='train', scope=scope) output_vars = { instances[i].name + '/' + key: val for key, val in output_vars.items() } loss_var = output_vars[instances[i].name + '/loss'] task_output_vars[i] = output_vars if instances[i].is_target: with fluid.program_guard(pred_prog, pred_init_prog): cur_inputs = _decode_inputs(pred_net_inputs, instances[i].name) instances[i].pred_input = cur_inputs pred_task_inputs = { 'backbone': pred_bb_output_vars, 'reader': cur_inputs } scope = instances[i].task_reuse_scope + '/' with fluid.unique_name.guard(scope): instances[i].build_task_layer(pred_task_inputs, phase='pred', scope=scope) return loss_var for i in range(num_instances): def task_loss(): task_id = i return lambda: get_loss(task_id) task_fns[i] = task_loss() loss = layers.switch_case(branch_index=branch, branch_fns=task_fns) self._switched_loss = loss.name main_reader = main_inst.reader['train'] num_examples = main_reader.num_examples for inst in instances: max_train_steps = int( main_conf['num_epochs'] * inst.mix_ratio * (num_examples // main_conf['batch_size'] // dev_count)) if inst.is_target: print('{}: expected train steps {}.'.format( inst.name, max_train_steps)) inst.steps_pur_epoch = inst.reader[ 'train'].num_examples // main_conf['batch_size'] // dev_count inst.expected_train_steps = max_train_steps global_max_train_steps = int( main_conf['num_epochs'] * sum(mrs) * (num_examples // main_conf['batch_size'] // dev_count)) print( 'Estimated overall train steps {}.'.format(global_max_train_steps)) if 'warmup_proportion' in main_conf and main_conf[ 'warmup_proportion'] > 0: warmup_steps = int(global_max_train_steps * main_conf['warmup_proportion']) print('Warmup steps: ' + str(warmup_steps)) else: warmup_steps = 0 # build optimizer if 'optimizer' in main_conf: optim_mod = importlib.import_module(OPTIMIZER_DIR + '.' + main_conf['optimizer']) optimize = getattr(optim_mod, OPTIMIZE_METHOD) optimize(loss, main_conf, max_train_steps, warmup_steps, fluid.default_main_program()) loss.persistable = True if main_conf.get('use_ema', False): assert 'ema_decay' in main_conf, "ema_decay should be set when use_ema is enabled." ema = fluid.optimizer.ExponentialMovingAverage( main_conf['ema_decay']) ema.update() # prepare for train self.train_backbone = train_backbone self.train_program = fluid.CompiledProgram( fluid.default_main_program()).with_data_parallel( loss_name=loss.name) self.saver_program = fluid.default_main_program() self.main_inst = main_inst self.has_init_train = True self.has_init_pred = True self._net_inputs = net_inputs self.exe.run(fluid.default_startup_program()) print("\nRandomly initialize parameters...\n")
def train(place, save_dirname): if args.data_set == "cifar10": class_dim = 10 data_shape = [3, 32, 32] elif args.data_set == "imagenet": class_dim = 102 data_shape = [3, 224, 224] else: raise ValueError("%s dataset is not supported" % data_set) images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') if args.model == "vgg": print("train vgg") net = vgg16(images) elif args.model == "resnet": print("train resnet") if args.data_set == "cifar10": net = resnet_cifar10(images) elif args.data_set == "imagenet": net = resnet_imagenet(images) else: raise ValueError("%s dataset is not supported" % args.data_set) else: raise ValueError("%s network is not supported" % args.model) predict = fluid.layers.fc(input=net, size=class_dim, act='softmax') cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(x=cost) acc = fluid.layers.accuracy(input=predict, label=label) #Test program test_program = fluid.default_main_program().clone(for_test=True) optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) optimizer.minimize(avg_cost) BATCH_SIZE = args.train_batch_size PASS_NUM = 100 train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.flowers.train() if args.data_set == 'imagenet' else paddle.dataset.cifar.train10(), buf_size=128 * 10), batch_size=args.train_batch_size) test_reader = paddle.batch( paddle.dataset.flowers.test() if args.data_set == 'imagenet' else paddle.dataset.cifar.test10(), batch_size=args.inf_batch_size) exe = fluid.Executor(place) feeder = fluid.DataFeeder(place=place, feed_list=[images, label]) exe.run(fluid.default_startup_program()) main_program = fluid.default_main_program() for pass_id in range(PASS_NUM): for batch_id, data in enumerate(train_reader()): train_image = np.array( map(lambda x: x[0].reshape(data_shape), data)).astype("float32") train_label = np.array(map(lambda x: x[1], data)).astype("int64") train_label = train_label.reshape([-1, 1]) exe.run(main_program, feed={'pixel': train_image, 'label': train_label}) if (batch_id % 100) == 0: acc_list = [] avg_loss_list = [] for tid, test_data in enumerate(test_reader()): test_image = np.array( map(lambda x: x[0].reshape(data_shape), test_data)).astype("float32") test_label = np.array(map(lambda x: x[1], test_data)).astype("int64") test_label = test_label.reshape([-1, 1]) loss_t, acc_t = exe.run( program=test_program, feed={"pixel": test_image, "label": test_label}, fetch_list=[avg_cost, acc]) if math.isnan(float(loss_t)): sys.exit("got NaN loss, training failed.") acc_list.append(float(acc_t)) avg_loss_list.append(float(loss_t)) acc_value = np.array(acc_list).mean() avg_loss_value = np.array(avg_loss_list).mean() print( 'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Accuracy {3:2.2}'. format(pass_id, batch_id + 1, float(avg_loss_value), float(acc_value))) if acc_value > args.threshold: print( 'Save inference model with test accuracy of {0} at {1}'. format(float(acc_value), save_dirname)) fluid.io.save_inference_model(save_dirname, ["pixel"], [predict], exe) return
def build_model(self): data_shape = [None, 3, self.cfg.crop_size, self.cfg.crop_size] input_A = fluid.data(name='input_A', shape=data_shape, dtype='float32') input_B = fluid.data(name='input_B', shape=data_shape, dtype='float32') fake_pool_A = fluid.data(name='fake_pool_A', shape=data_shape, dtype='float32') fake_pool_B = fluid.data(name='fake_pool_B', shape=data_shape, dtype='float32') # used for continuous evaluation if self.cfg.enable_ce: fluid.default_startup_program().random_seed = 90 A_loader = fluid.io.DataLoader.from_generator(feed_list=[input_A], capacity=4, iterable=True, use_double_buffer=True) B_loader = fluid.io.DataLoader.from_generator(feed_list=[input_B], capacity=4, iterable=True, use_double_buffer=True) gen_trainer = GTrainer(input_A, input_B, self.cfg, self.batch_num) d_A_trainer = DATrainer(input_B, fake_pool_B, self.cfg, self.batch_num) d_B_trainer = DBTrainer(input_A, fake_pool_A, self.cfg, self.batch_num) # prepare environment place = fluid.CUDAPlace(0) if self.cfg.use_gpu else fluid.CPUPlace() A_loader.set_batch_generator(self.A_reader, places=fluid.cuda_places() if self.cfg.use_gpu else fluid.cpu_places()) B_loader.set_batch_generator(self.B_reader, places=fluid.cuda_places() if self.cfg.use_gpu else fluid.cpu_places()) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) A_pool = utility.ImagePool() B_pool = utility.ImagePool() if self.cfg.init_model: utility.init_checkpoints(self.cfg, gen_trainer, "net_G") utility.init_checkpoints(self.cfg, d_A_trainer, "net_DA") utility.init_checkpoints(self.cfg, d_B_trainer, "net_DB") ### memory optim build_strategy = fluid.BuildStrategy() build_strategy.enable_inplace = True gen_trainer_program = fluid.CompiledProgram( gen_trainer.program).with_data_parallel( loss_name=gen_trainer.g_loss.name, build_strategy=build_strategy) d_A_trainer_program = fluid.CompiledProgram( d_A_trainer.program).with_data_parallel( loss_name=d_A_trainer.d_loss_A.name, build_strategy=build_strategy) d_B_trainer_program = fluid.CompiledProgram( d_B_trainer.program).with_data_parallel( loss_name=d_B_trainer.d_loss_B.name, build_strategy=build_strategy) t_time = 0 total_train_batch = 0 # NOTE :used for benchmark for epoch_id in range(self.cfg.epoch): batch_id = 0 for data_A, data_B in zip(A_loader(), B_loader()): if self.cfg.max_iter and total_train_batch == self.cfg.max_iter: # used for benchmark return s_time = time.time() tensor_A, tensor_B = data_A[0]['input_A'], data_B[0]['input_B'] ## optimize the g_A network g_A_loss, g_A_cyc_loss, g_A_idt_loss, g_B_loss, g_B_cyc_loss,\ g_B_idt_loss, fake_A_tmp, fake_B_tmp = exe.run( gen_trainer_program, fetch_list=[ gen_trainer.G_A, gen_trainer.cyc_A_loss, gen_trainer.idt_loss_A, gen_trainer.G_B, gen_trainer.cyc_B_loss, gen_trainer.idt_loss_B, gen_trainer.fake_A, gen_trainer.fake_B ], feed={"input_A": tensor_A, "input_B": tensor_B}) fake_pool_B = B_pool.pool_image(fake_B_tmp) fake_pool_A = A_pool.pool_image(fake_A_tmp) if self.cfg.enable_ce: fake_pool_B = fake_B_tmp fake_pool_A = fake_A_tmp # optimize the d_A network d_A_loss = exe.run(d_A_trainer_program, fetch_list=[d_A_trainer.d_loss_A], feed={ "input_B": tensor_B, "fake_pool_B": fake_pool_B })[0] # optimize the d_B network d_B_loss = exe.run(d_B_trainer_program, fetch_list=[d_B_trainer.d_loss_B], feed={ "input_A": tensor_A, "fake_pool_A": fake_pool_A })[0] batch_time = time.time() - s_time t_time += batch_time if batch_id % self.cfg.print_freq == 0: print("epoch{}: batch{}: \n\ d_A_loss: {}; g_A_loss: {}; g_A_cyc_loss: {}; g_A_idt_loss: {}; \n\ d_B_loss: {}; g_B_loss: {}; g_B_cyc_loss: {}; g_B_idt_loss: {}; \n\ Batch_time_cost: {}".format( epoch_id, batch_id, d_A_loss[0], g_A_loss[0], g_A_cyc_loss[0], g_A_idt_loss[0], d_B_loss[0], g_B_loss[0], g_B_cyc_loss[0], g_B_idt_loss[0], batch_time)) sys.stdout.flush() batch_id += 1 #NOTE: used for benchmark total_train_batch += 1 # used for benchmark # profiler tools if self.cfg.profile and epoch_id == 0 and batch_id == self.cfg.print_freq: profiler.reset_profiler() elif self.cfg.profile and epoch_id == 0 and batch_id == self.cfg.print_freq + 5: return # used for continuous evaluation if self.cfg.enable_ce and batch_id == 10: break if self.cfg.run_test: A_image_name = fluid.data(name='A_image_name', shape=[None, 1], dtype='int32') B_image_name = fluid.data(name='B_image_name', shape=[None, 1], dtype='int32') A_test_loader = fluid.io.DataLoader.from_generator( feed_list=[input_A, A_image_name], capacity=4, iterable=True, use_double_buffer=True) B_test_loader = fluid.io.DataLoader.from_generator( feed_list=[input_B, B_image_name], capacity=4, iterable=True, use_double_buffer=True) A_test_loader.set_batch_generator( self.A_test_reader, places=fluid.cuda_places() if self.cfg.use_gpu else fluid.cpu_places()) B_test_loader.set_batch_generator( self.B_test_reader, places=fluid.cuda_places() if self.cfg.use_gpu else fluid.cpu_places()) test_program = gen_trainer.infer_program utility.save_test_image(epoch_id, self.cfg, exe, place, test_program, gen_trainer, A_test_loader, B_test_loader, A_id2name=self.A_id2name, B_id2name=self.B_id2name) if self.cfg.save_checkpoints: utility.checkpoints(epoch_id, self.cfg, gen_trainer, "net_G") utility.checkpoints(epoch_id, self.cfg, d_A_trainer, "net_DA") utility.checkpoints(epoch_id, self.cfg, d_B_trainer, "net_DB") # used for continuous evaluation if self.cfg.enable_ce: device_num = fluid.core.get_cuda_device_count( ) if self.cfg.use_gpu else 1 print("kpis\tcyclegan_g_A_loss_card{}\t{}".format( device_num, g_A_loss[0])) print("kpis\tcyclegan_g_A_cyc_loss_card{}\t{}".format( device_num, g_A_cyc_loss[0])) print("kpis\tcyclegan_g_A_idt_loss_card{}\t{}".format( device_num, g_A_idt_loss[0])) print("kpis\tcyclegan_d_A_loss_card{}\t{}".format( device_num, d_A_loss[0])) print("kpis\tcyclegan_g_B_loss_card{}\t{}".format( device_num, g_B_loss[0])) print("kpis\tcyclegan_g_B_cyc_loss_card{}\t{}".format( device_num, g_B_cyc_loss[0])) print("kpis\tcyclegan_g_B_idt_loss_card{}\t{}".format( device_num, g_B_idt_loss[0])) print("kpis\tcyclegan_d_B_loss_card{}\t{}".format( device_num, d_B_loss[0])) print("kpis\tcyclegan_Batch_time_cost_card{}\t{}".format( device_num, batch_time))
def train_loop(args, train_program, reader, py_reader, loss, trainer_id, weight): py_reader.decorate_tensor_provider( convert_python_to_tensor(weight, args.batch_size, reader.train())) place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) exec_strategy = fluid.ExecutionStrategy() exec_strategy.use_experimental_executor = True print("CPU_NUM:" + str(os.getenv("CPU_NUM"))) exec_strategy.num_threads = int(os.getenv("CPU_NUM")) build_strategy = fluid.BuildStrategy() if int(os.getenv("CPU_NUM")) > 1: build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce train_exe = fluid.ParallelExecutor( use_cuda=False, loss_name=loss.name, main_program=train_program, build_strategy=build_strategy, exec_strategy=exec_strategy) for pass_id in range(args.num_passes): py_reader.start() time.sleep(10) epoch_start = time.time() batch_id = 0 start = time.time() try: while True: loss_val = train_exe.run(fetch_list=[loss.name]) loss_val = np.mean(loss_val) if batch_id % args.print_batch == 0: logger.info( "TRAIN --> pass: {} batch: {} loss: {} reader queue:{}". format(pass_id, batch_id, loss_val.mean(), py_reader.queue.size())) if args.with_speed: if batch_id % 500 == 0 and batch_id != 0: elapsed = (time.time() - start) start = time.time() samples = 1001 * args.batch_size * int( os.getenv("CPU_NUM")) logger.info("Time used: {}, Samples/Sec: {}".format( elapsed, samples / elapsed)) if batch_id % args.save_step == 0 and batch_id != 0: model_dir = args.model_output_dir + '/pass-' + str( pass_id) + ('/batch-' + str(batch_id)) if trainer_id == 0: fluid.io.save_params(executor=exe, dirname=model_dir) print("model saved in %s" % model_dir) batch_id += 1 except fluid.core.EOFException: py_reader.reset() epoch_end = time.time() logger.info("Epoch: {0}, Train total expend: {1} ".format( pass_id, epoch_end - epoch_start)) model_dir = args.model_output_dir + '/pass-' + str(pass_id) if trainer_id == 0: fluid.io.save_params(executor=exe, dirname=model_dir) print("model saved in %s" % model_dir)
input_x = fluid.layers.data(name="x", shape=[32], dtype='float32') input_y = fluid.layers.data(name="y", shape=[1], dtype='int64') cost = mlp(input_x, input_y) optimizer = fluid.optimizer.SGD(learning_rate=0.01) dist_algorithm = KSDistributedFactory.instantiation(flag=1) role = RoleMaker.PaddleCloudRoleMaker(is_collective=True) dist_algorithm.init(role) # algorithm + local optimizer optimizer = GPUStrategy(exec_config=[NumThreadsConfig(32)], dist_config=[CollectiveMode(), GradAllreduce()]).setup_optimizer( dist_algorithm, optimizer) optimizer.minimize(cost, fluid.default_startup_program()) train_prog = dist_algorithm.main_program gpu_id = int(os.getenv("FLAGS_selected_gpus", "0")) place = fluid.CUDAPlace(gpu_id) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) step = 1001 for i in range(step): cost_val = exe.run(program=train_prog, feed=gen_data(), fetch_list=[cost.name]) print("worker_index: %d, step%d cost = %f" %
def _test_slice(self, place): b = default_main_program().current_block() w = b.create_var(dtype="float64", shape=[784, 100, 100], lod_level=0) for i in range(3): nw = w[i] self.assertEqual((100, 100), nw.shape) nw = w[:] self.assertEqual((784, 100, 100), nw.shape) nw = w[:, :] self.assertEqual((784, 100, 100), nw.shape) nw = w[:, :, -1] self.assertEqual((784, 100), nw.shape) nw = w[1, 1, 1] self.assertEqual(len(nw.shape), 1) self.assertEqual(nw.shape[0], 1) nw = w[:, :, :-1] self.assertEqual((784, 100, 99), nw.shape) self.assertEqual(0, nw.lod_level) main = fluid.Program() with fluid.program_guard(main): exe = fluid.Executor(place) tensor_array = np.array([[[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16, 17, 18]], [[19, 20, 21], [22, 23, 24], [25, 26, 27]]]).astype('float32') var = fluid.layers.assign(tensor_array) var1 = var[0, 1, 1] var2 = var[1:] var3 = var[0:1] var4 = var[::-1] var5 = var[1, 1:, 1:] var_reshape = fluid.layers.reshape(var, [3, -1, 3]) var6 = var_reshape[:, :, -1] var7 = var[:, :, :-1] var8 = var[:1, :1, :1] var9 = var[:-1, :-1, :-1] var10 = var[::-1, :1, :-1] var11 = var[:-1, ::-1, -1:] var12 = var[1:2, 2:, ::-1] var13 = var[2:10, 2:, -2:-1] var14 = var[1:-1, 0:2, ::-1] var15 = var[::-1, ::-1, ::-1] x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.fc(input=x, size=1, act=None) y_1 = y[:, 0] feeder = fluid.DataFeeder(place=place, feed_list=[x]) data = [] data.append((np.random.randint(10, size=[13]).astype('float32'))) exe.run(fluid.default_startup_program()) local_out = exe.run(main, feed=feeder.feed([data]), fetch_list=[ var, var1, var2, var3, var4, var5, var6, var7, var8, var9, var10, var11, var12, var13, var14, var15 ]) self.assertTrue( np.array_equal(local_out[1], tensor_array[0, 1, 1:2])) self.assertTrue(np.array_equal(local_out[2], tensor_array[1:])) self.assertTrue(np.array_equal(local_out[3], tensor_array[0:1])) self.assertTrue(np.array_equal(local_out[4], tensor_array[::-1])) self.assertTrue( np.array_equal(local_out[5], tensor_array[1, 1:, 1:])) self.assertTrue( np.array_equal(local_out[6], tensor_array.reshape((3, -1, 3))[:, :, -1])) self.assertTrue( np.array_equal(local_out[7], tensor_array[:, :, :-1])) self.assertTrue( np.array_equal(local_out[8], tensor_array[:1, :1, :1])) self.assertTrue( np.array_equal(local_out[9], tensor_array[:-1, :-1, :-1])) self.assertTrue( np.array_equal(local_out[10], tensor_array[::-1, :1, :-1])) self.assertTrue( np.array_equal(local_out[11], tensor_array[:-1, ::-1, -1:])) self.assertTrue( np.array_equal(local_out[12], tensor_array[1:2, 2:, ::-1])) self.assertTrue( np.array_equal(local_out[13], tensor_array[2:10, 2:, -2:-1])) self.assertTrue( np.array_equal(local_out[14], tensor_array[1:-1, 0:2, ::-1])) self.assertTrue( np.array_equal(local_out[15], tensor_array[::-1, ::-1, ::-1]))
def test_case(self): np.random.seed(200) x_data = np.random.random((2, 3, 6, 6)).astype("float32") dim_data = np.array([12]).astype("int32") shape_data = np.array([12, 12]).astype("int32") actual_size_data = np.array([12, 12]).astype("int32") scale_data = np.array([2.0]).astype("float32") prog = fluid.Program() startup_prog = fluid.Program() place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( ) else fluid.CPUPlace() with fluid.program_guard(prog, startup_prog): x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") dim = fluid.data(name="dim", shape=[1], dtype="int32") shape_tensor = fluid.data( name="shape_tensor", shape=[2], dtype="int32") actual_size = fluid.data( name="actual_size", shape=[2], dtype="int32") scale_tensor = fluid.data( name="scale_tensor", shape=[1], dtype="float32") out1 = interpolate( x, size=[12, 12], mode='bicubic', align_corners=False) out2 = interpolate( x, size=[12, dim], mode='bicubic', align_corners=False) out3 = interpolate( x, size=shape_tensor, mode='bicubic', align_corners=False) out4 = interpolate( x, size=[12, 12], mode='bicubic', align_corners=False) out5 = interpolate( x, scale_factor=scale_tensor, mode='bicubic', align_corners=False) out6 = interpolate( x, scale_factor=2.0, mode='bicubic', align_corners=False) out7 = interpolate( x, scale_factor=[2.0, 2.0], mode='bicubic', align_corners=False) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) results = exe.run( fluid.default_main_program(), feed={ "x": x_data, "dim": dim_data, "shape_tensor": shape_data, "actual_size": actual_size_data, "scale_tensor": scale_data }, fetch_list=[out1, out2, out3, out4, out5, out6, out7], return_numpy=True) expect_res = bicubic_interp_np( x_data, out_h=12, out_w=12, align_corners=False) for res in results: self.assertTrue(np.allclose(res, expect_res)) with fluid.dygraph.guard(): x = fluid.dygraph.to_variable(x_data) interp = interpolate( x, size=[12, 12], mode='bicubic', align_corners=False) dy_result = interp.numpy() expect = bicubic_interp_np( x_data, out_h=12, out_w=12, align_corners=False) self.assertTrue(np.allclose(dy_result, expect))
#def req_one_data(): # 注释了原文的此条语句 for i in range(10): data_X = [i] data_Y = [i * 10 + 3] data_X = np.array(data_X).reshape(1, 1).astype("float32") data_Y = np.array(data_Y).reshape(1, 1).astype("float32") yield data_X, data_Y # 使用yield来返回单条数据 #return req_one_data # 返回 req_one_data 这个变量名!可不是req_one_data() # 注释了原文的此条语句 # 初始化项目环境 # fluid.Program 默认有 default_startup_program 和 default_main_program # 将 start_program 和 main_program 分开定义后,就可以用 program_guard 设置两个不同的程序空间 main_program = fluid.Program() # 空白程序框架 start_test = fluid.Program() # 空白的初始化程序,用于测试 start_train = fluid.default_startup_program() # 默认的初始化程序,用于训练。 # 定义 main_program 程序空间的变量,使用startup_program 进行初始化,此处因使用的空白的初始化程序,说明在此程序空间不需要初始化变量 with fluid.program_guard(main_program=main_program, startup_program=start_test ): # startup_program 默认为 default_startup_program # 定义张量格式 x = fluid.data(name="x", shape=[-1, 1], dtype="float32") # 第一个参数-1表示每批可以喂任意多的题目。第二个参数1表示每题只有一个已知条件。 y = fluid.data( name="y", shape=[-1, 1], dtype="float32") # 第一个参数-1表示每批可以喂任意多的题目。第二个参数1表示每题只有一个数字表示的答案。 # 定义神经网络 out = fluid.layers.fc(input=x, size=1) # 定义损失函数
def train(): use_cuda = False place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() prediction, [avg_cost, acc] = train_program() img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') feeder = fluid.DataFeeder(feed_list=[img, label], place=place) optimizer = optimizer_program() optimizer.minimize(avg_cost) PASS_NUM = 5 epochs = [epoch_id for epoch_id in range(PASS_NUM)] save_dirname = 'recognize_digits.inference.model' def train_test(train_test_program, train_test_feed, train_test_reader): acc_set = [] avg_loss_set = [] for test_data in train_test_reader(): acc_np, avg_loss_np = exe.run(program=train_test_program, feed=train_test_feed.feed(test_data), fetch_list=[acc, avg_cost]) acc_set.append(float(acc_np)) avg_loss_set.append(float(avg_loss_np)) acc_val_mean = numpy.array(acc_set).mean() avg_loss_val_mean = numpy.array(avg_loss_set).mean() return avg_loss_val_mean, acc_val_mean exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) main_program = fluid.default_main_program() test_program = main_program.clone(for_test=True) result_lists = [] step = 0 for epoch_id in epochs: for step_id, data in enumerate(train_reader()): metrics = exe.run(main_program, feed=feeder.feed(data), fetch_list=[avg_cost, acc]) if step % 100 == 0: event_handler(step, epoch_id, metrics[0]) step += 1 avg_cost_val, acc_val = train_test(train_test_program=test_program, train_test_reader=test_reader, train_test_feed=feeder) print('\nTest with Epoch %d, avg_cost: %s, acc: %s\n\n' % (epoch_id, avg_cost_val, acc_val)) # Test with Epoch 4, avg_cost: 0.01788416613656345, acc: 0.9940286624203821 result_lists.append((epoch_id, avg_cost_val, acc_val)) if save_dirname is not None: fluid.io.save_inference_model(save_dirname, ['img'], [prediction], exe, model_filename=None, params_filename=None) best = sorted(result_lists, key=lambda list: float(list[1]))[0] print('Best pass is %s, testing Avgcost is %s' % (best[0], best[1])) print('The classification accuracy is %.2f%%' % (float(best[2]) * 100))
def train(): """ do training """ args = parse_args() hid_size = args.hid_size train_dir = args.train_dir vocab_path = args.vocab_path use_cuda = True if args.use_cuda else False parallel = True if args.parallel else False print("use_cuda:", use_cuda, "parallel:", parallel) batch_size = args.batch_size vocab_size, train_reader = utils.prepare_data( train_dir, vocab_path, batch_size=batch_size * get_cards(args),\ buffer_size=1000, word_freq_threshold=0, is_train=True) # Train program if args.loss == 'bpr': print('bpr loss') src, pos_label, label, avg_cost = net.train_bpr_network( neg_size=args.neg_size, vocab_size=vocab_size, hid_size=hid_size) else: print('cross-entory loss') src, pos_label, label, avg_cost = net.train_cross_entropy_network( neg_size=args.neg_size, vocab_size=vocab_size, hid_size=hid_size) # Optimization to minimize lost sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.base_lr) sgd_optimizer.minimize(avg_cost) # Initialize executor place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if parallel: train_exe = fluid.ParallelExecutor(use_cuda=use_cuda, loss_name=avg_cost.name) else: train_exe = exe pass_num = args.pass_num model_dir = args.model_dir fetch_list = [avg_cost.name] total_time = 0.0 for pass_idx in six.moves.xrange(pass_num): epoch_idx = pass_idx + 1 print("epoch_%d start" % epoch_idx) t0 = time.time() i = 0 newest_ppl = 0 for data in train_reader(): i += 1 ls, lp, ll = utils.to_lodtensor_bpr(data, args.neg_size, vocab_size, place) ret_avg_cost = train_exe.run(feed={ "src": ls, "label": ll, "pos_label": lp }, fetch_list=fetch_list) avg_ppl = np.exp(ret_avg_cost[0]) newest_ppl = np.mean(avg_ppl) if i % args.print_batch == 0: print("step:%d ppl:%.3f" % (i, newest_ppl)) t1 = time.time() total_time += t1 - t0 print("epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, i, total_time / epoch_idx)) save_dir = "%s/epoch_%d" % (model_dir, epoch_idx) fluid.save(fluid.default_main_program(), model_path=save_dir) print("model saved in %s" % save_dir) print("finish training")
def validate(fluid_model_filename, golden_data_filename, model_func_name='inference', decimal=3, save_inference_model=False): """ inferece the converted Paddle fluid model, validate with given golden data """ import numpy as np import paddle.fluid as fluid logger = logging.getLogger('validate') place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) # load model fluid_model_dir, basename = os.path.split(fluid_model_filename) if basename == '__model__': # is desc program logger.debug('using desc file %s', basename) prog, _, var_outs = fluid.io.load_inference_model(fluid_model_dir, exe) out_names = var_outs # HINT: pass var if fetch ops already created logger.info('model load passed') elif basename.endswith('.py'): # is python code logger.debug('using python code file %s', basename) module_name, _ = os.path.splitext(basename) sys_path = sys.path.copy() sys.path.append(fluid_model_dir) try: module = importlib.import_module(module_name) func = getattr(module, model_func_name) except AttributeError: module_name = module_name + '.' + module_name module = importlib.import_module(module_name) func = getattr(module, model_func_name) sys.path = sys_path logger.debug('from %s imported %s: %s', module_name, model_func_name, func) var_outs = func() var_outs = _ensure_list(var_outs) out_names = [var.name for var in var_outs ] # HINT: pass string to create fetch ops logger.info('import passed') prog = fluid.default_main_program() fluid.io.load_persistables(executor=exe, dirname=fluid_model_dir, main_program=prog) logger.info('weight load passed') else: raise ValueError('unsupported Paddle fluid model filename') # load data logger.info('using golden data %s', golden_data_filename) if golden_data_filename.endswith('.npz'): test_data = np.load(golden_data_filename, encoding='bytes') input_data = test_data['inputs'].tolist() output_data = test_data['outputs'].tolist() else: test_data = np.load(golden_data_filename, encoding='bytes').tolist() input_data = test_data['inputs'] output_data = test_data['outputs'] input_data = _flatten_dict(input_data) output_data = _flatten_dict(output_data) logger.info('found %d I/O golden data, starting test ...', len(input_data) + len(output_data)) # DEBUG: reload test for python code if basename.endswith('.py') and save_inference_model: fluid.io.save_inference_model(fluid_model_dir, input_data.keys(), var_outs, exe, main_program=prog, export_for_deployment=True) logger.info('model re-save passed') fluid.io.load_inference_model(fluid_model_dir, exe) logger.info('model re-load passed') # execute outputs = exe.run(prog, feed=input_data, fetch_list=out_names) logger.info('execution passed') # validate passed = True for (name, truth), output in zip(output_data.items(), outputs): logger.info('testing output {} ...'.format(name)) try: np.testing.assert_almost_equal(output, truth, decimal=decimal) except AssertionError as e: passed = False logger.error('failed: %s\n', e) if passed: logger.info('accuracy passed') else: logger.info('accuracy not passed') # globals().update(locals()) return passed
def train(place): num_layers = 1 batch_size = 4 hidden_size = 10 num_steps = 3 init_scale = 0.1 max_epoch = 1 dropout = 0.0 vocab_size = 1000 batch_num = 200 with fluid.dygraph.guard(place): fluid.default_startup_program().random_seed = SEED fluid.default_main_program().random_seed = SEED ptb_model = PtbModel(hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, num_steps=num_steps, init_scale=init_scale, dropout=dropout) sgd = SGDOptimizer(learning_rate=1e-3, parameter_list=ptb_model.parameters()) for epoch_id in range(max_epoch): total_loss = 0.0 iters = 0.0 total_sample = 0 init_hidden_data = np.zeros((num_layers, batch_size, hidden_size), dtype='float32') init_cell_data = np.zeros((num_layers, batch_size, hidden_size), dtype='float32') init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) for step_id in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') y_data = y_data.reshape((-1, 1)) x_data = x_data.reshape((-1, num_steps, 1)) y_data = y_data.reshape((-1, num_steps, 1)) x = to_variable(x_data) y = to_variable(y_data) dy_loss, last_hidden, last_cell = ptb_model( x, y, init_hidden, init_cell) out_loss = dy_loss.numpy() dy_loss.backward() sgd.minimize(dy_loss) ptb_model.clear_gradients() total_loss += out_loss iters += num_steps total_sample += 1 if step_id % PRINT_STEP == 0: if step_id == 0: logging.info( "epoch %d | step %d, loss %0.3f" % (epoch_id, step_id, total_loss / total_sample)) avg_batch_time = time.time() else: speed = PRINT_STEP / (time.time() - avg_batch_time) logging.info( "epoch %d | step %d, loss %0.3f, speed %.3f steps/s" % (epoch_id, step_id, total_loss / total_sample, speed)) avg_batch_time = time.time() return out_loss, last_hidden.numpy(), last_cell.numpy()
def train(args): if args.use_cuda and not fluid.core.is_compiled_with_cuda(): return startup_program = fluid.default_startup_program() main_program = fluid.default_main_program() if args.enable_ce: train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=BATCH_SIZE) test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=BATCH_SIZE) startup_program.random_seed = 90 main_program.random_seed = 90 else: train_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=500), batch_size=BATCH_SIZE) test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=BATCH_SIZE) img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') net_conf = vae_neural_network _, reconstruction_loss, kl_loss, vae_loss = net_conf(img, args) test_program = main_program.clone(for_test=True) optimizer = fluid.optimizer.RMSProp(learning_rate=0.001) optimizer.minimize(vae_loss) def train_test(train_test_program, train_test_feed, train_test_reader): reconstruction_loss_set = [] kl_loss_set = [] vae_loss_set = [] for test_data in train_test_reader(): reconstruction_loss_np, kl_loss_np, vae_loss_np = exe.run( program=train_test_program, feed=train_test_feed.feed(test_data), fetch_list=[reconstruction_loss, kl_loss, vae_loss]) reconstruction_loss_set.append(float(reconstruction_loss_np)) kl_loss_set.append(float(kl_loss_np)) vae_loss_set.append(float(vae_loss_np)) # get test acc and loss reconstruction_loss_mean = np.array(reconstruction_loss_set).mean() kl_loss_mean = np.array(kl_loss_set).mean() vae_loss_mean = np.array(vae_loss_set).mean() return reconstruction_loss_mean, kl_loss_mean, vae_loss_mean place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[img, label], place=place) exe.run(startup_program) epochs = [epoch_id for epoch_id in range(50)] lists = [] step = 0 for epoch_id in epochs: for step_id, data in enumerate(train_reader()): metrics = exe.run( main_program, feed=feeder.feed(data), fetch_list=[reconstruction_loss, kl_loss, vae_loss]) if step % 100 == 0: print( "Pass %d, Epoch %d, reconstruction_loss %f, kl_loss %f, vae_loss %f" % (step, epoch_id, metrics[0], metrics[1], metrics[2])) step += 1 # test for epoch reconstruction_loss_val, kl_loss_val, vae_loss_val = train_test( train_test_program=test_program, train_test_reader=test_reader, train_test_feed=feeder) print( "Test with Epoch %d, reconstruction_loss_val: %s, kl_loss_val: %s, vae_loss_val: %s" % (epoch_id, reconstruction_loss_val, kl_loss_val, vae_loss_val)) lists.append( (epoch_id, reconstruction_loss_val, kl_loss_val, vae_loss_val))
def train(model): predict, loss, iou = create_model(model=model) optimizer = fluid.optimizer.Adam(learning_rate=1e-4) optimizer.minimize(loss) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) fluid.memory_optimize(fluid.default_main_program(), print_log=False, skip_opt_set=set([loss.name, predict.name])) if pretrain_model: load_model(exe, fluid.default_main_program(), model=model) print("load model succeed") else: print("load succeed") def trainLoop(): batches = DataSet.get_batch_generator(1, total_step) iou_count = 0 mean_iou = 0 iou_sum = 0 for i, imgs, labels, names in batches: preTime = time.time() result = exe.run(fluid.default_main_program(), feed={ 'img': imgs, 'label': labels }, fetch_list=[loss, predict, iou]) nowTime = time.time() iou_sum += result[2] iou_count += 1 mean_iou = iou_sum / iou_count print( ' iou = ', result[2], 'mean_iou = ', mean_iou) if iou_count % 1000 == 0: iou_count = 0 iou_sum = 0 if i % 1000 == 0 and i != 0: print("Model saved") save_model(exe, fluid.default_main_program(), model=model) if i % 10 == 0: train_path = path + '/train.png' picture = result[1] picture = np.argmax(picture, axis=-1) picture = picture.reshape((1024, 1024)) saveImage(picture, train_path) label_path = path + '/trainlabel.png' train_lab = np.argmax(labels[0], axis=2) saveImage(train_lab, label_path) if i % 20 == 0: argmax = np.argmax(result[1], axis=1) abc = Counter(argmax) print(' ', abc) if i % 2 == 0: print("step {:d},loss {:.6f},step_time: {:.3f}".format( i, result[0][0], nowTime - preTime)) trainLoop()
def do_train(args): if args.use_cuda: trainer_count = fluid.dygraph.parallel.Env().nranks place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id ) if trainer_count > 1 else fluid.CUDAPlace(0) else: trainer_count = 1 place = fluid.CPUPlace() # define the data generator processor = reader.DataProcessor(fpattern=args.training_file, src_vocab_fpath=args.src_vocab_fpath, trg_vocab_fpath=args.trg_vocab_fpath, token_delimiter=args.token_delimiter, use_token_batch=args.use_token_batch, batch_size=args.batch_size, device_count=trainer_count, pool_size=args.pool_size, sort_type=args.sort_type, shuffle=args.shuffle, shuffle_batch=args.shuffle_batch, start_mark=args.special_token[0], end_mark=args.special_token[1], unk_mark=args.special_token[2], max_length=args.max_length, n_head=args.n_head) batch_generator = processor.data_generator(phase="train") if args.validation_file: val_processor = reader.DataProcessor( fpattern=args.validation_file, src_vocab_fpath=args.src_vocab_fpath, trg_vocab_fpath=args.trg_vocab_fpath, token_delimiter=args.token_delimiter, use_token_batch=args.use_token_batch, batch_size=args.batch_size, device_count=trainer_count, pool_size=args.pool_size, sort_type=args.sort_type, shuffle=False, shuffle_batch=False, start_mark=args.special_token[0], end_mark=args.special_token[1], unk_mark=args.special_token[2], max_length=args.max_length, n_head=args.n_head) val_batch_generator = val_processor.data_generator(phase="train") if trainer_count > 1: # for multi-process gpu training batch_generator = fluid.contrib.reader.distributed_batch_reader( batch_generator) args.src_vocab_size, args.trg_vocab_size, args.bos_idx, args.eos_idx, \ args.unk_idx = processor.get_vocab_summary() with fluid.dygraph.guard(place): # set seed for CE random_seed = eval(str(args.random_seed)) if random_seed is not None: fluid.default_main_program().random_seed = random_seed fluid.default_startup_program().random_seed = random_seed # define data loader train_loader = fluid.io.DataLoader.from_generator(capacity=10) train_loader.set_batch_generator(batch_generator, places=place) if args.validation_file: val_loader = fluid.io.DataLoader.from_generator(capacity=10) val_loader.set_batch_generator(val_batch_generator, places=place) # define model transformer = Transformer( args.src_vocab_size, args.trg_vocab_size, args.max_length + 1, args.n_layer, args.n_head, args.d_key, args.d_value, args.d_model, args.d_inner_hid, args.prepostprocess_dropout, args.attention_dropout, args.relu_dropout, args.preprocess_cmd, args.postprocess_cmd, args.weight_sharing, args.bos_idx, args.eos_idx) # define loss criterion = CrossEntropyCriterion(args.label_smooth_eps) # define optimizer optimizer = fluid.optimizer.Adam( learning_rate=NoamDecay(args.d_model, args.warmup_steps, args.learning_rate), beta1=args.beta1, beta2=args.beta2, epsilon=float(args.eps), parameter_list=transformer.parameters()) ## init from some checkpoint, to resume the previous training if args.init_from_checkpoint: model_dict, opt_dict = fluid.load_dygraph( os.path.join(args.init_from_checkpoint, "transformer")) transformer.load_dict(model_dict) optimizer.set_dict(opt_dict) ## init from some pretrain models, to better solve the current task if args.init_from_pretrain_model: model_dict, _ = fluid.load_dygraph( os.path.join(args.init_from_pretrain_model, "transformer")) transformer.load_dict(model_dict) if trainer_count > 1: strategy = fluid.dygraph.parallel.prepare_context() transformer = fluid.dygraph.parallel.DataParallel( transformer, strategy) # the best cross-entropy value with label smoothing loss_normalizer = -( (1. - args.label_smooth_eps) * np.log( (1. - args.label_smooth_eps)) + args.label_smooth_eps * np.log(args.label_smooth_eps / (args.trg_vocab_size - 1) + 1e-20)) ce_time = [] ce_ppl = [] step_idx = 0 #NOTE: used for benchmark total_batch_num = 0 # train loop for pass_id in range(args.epoch): pass_start_time = time.time() batch_id = 0 for input_data in train_loader(): if args.max_iter and total_batch_num == args.max_iter: #NOTE: used for benchmark return batch_start = time.time() (src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight) = input_data logits = transformer(src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos, trg_slf_attn_bias, trg_src_attn_bias) sum_cost, avg_cost, token_num = criterion( logits, lbl_word, lbl_weight) if trainer_count > 1: avg_cost = transformer.scale_loss(avg_cost) avg_cost.backward() transformer.apply_collective_grads() else: avg_cost.backward() optimizer.minimize(avg_cost) transformer.clear_gradients() if step_idx % args.print_step == 0: total_avg_cost = avg_cost.numpy() * trainer_count if step_idx == 0: logging.info( "step_idx: %d, epoch: %d, batch: %d, avg loss: %f, " "normalized loss: %f, ppl: %f" % (step_idx, pass_id, batch_id, total_avg_cost, total_avg_cost - loss_normalizer, np.exp([min(total_avg_cost, 100)]))) avg_batch_time = time.time() else: logging.info( "step_idx: %d, epoch: %d, batch: %d, avg loss: %f, " "normalized loss: %f, ppl: %f, speed: %.2f step/s" % (step_idx, pass_id, batch_id, total_avg_cost, total_avg_cost - loss_normalizer, np.exp([min(total_avg_cost, 100) ]), args.print_step / (time.time() - avg_batch_time))) avg_batch_time = time.time() if step_idx % args.save_step == 0 and step_idx != 0: # validation if args.validation_file: transformer.eval() total_sum_cost = 0 total_token_num = 0 for input_data in val_loader(): (src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight) = input_data logits = transformer(src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos, trg_slf_attn_bias, trg_src_attn_bias) sum_cost, avg_cost, token_num = criterion( logits, lbl_word, lbl_weight) total_sum_cost += sum_cost.numpy() total_token_num += token_num.numpy() total_avg_cost = total_sum_cost / total_token_num logging.info("validation, step_idx: %d, avg loss: %f, " "normalized loss: %f, ppl: %f" % (step_idx, total_avg_cost, total_avg_cost - loss_normalizer, np.exp([min(total_avg_cost, 100)]))) transformer.train() if args.save_model and ( trainer_count == 1 or fluid.dygraph.parallel.Env().dev_id == 0): model_dir = os.path.join(args.save_model, "step_" + str(step_idx)) if not os.path.exists(model_dir): os.makedirs(model_dir) fluid.save_dygraph( transformer.state_dict(), os.path.join(model_dir, "transformer")) fluid.save_dygraph( optimizer.state_dict(), os.path.join(model_dir, "transformer")) batch_id += 1 total_batch_num = total_batch_num + 1 step_idx += 1 time_consumed = time.time() - pass_start_time ce_time.append(time_consumed) if args.save_model: model_dir = os.path.join(args.save_model, "step_final") if not os.path.exists(model_dir): os.makedirs(model_dir) fluid.save_dygraph(transformer.state_dict(), os.path.join(model_dir, "transformer")) fluid.save_dygraph(optimizer.state_dict(), os.path.join(model_dir, "transformer")) if args.enable_ce: _ppl = 0 _time = 0 try: _time = ce_time[-1] _ppl = ce_ppl[-1] except: print("ce info error") print("kpis\ttrain_duration_card%s\t%s" % (trainer_count, _time)) print("kpis\ttrain_ppl_card%s\t%f" % (trainer_count, _ppl))
def train_dygraph(args, batch_generator): with fluid.dygraph.guard(place): if SEED is not None: fluid.default_main_program().random_seed = SEED fluid.default_startup_program().random_seed = SEED # define data loader train_loader = fluid.io.DataLoader.from_generator(capacity=10) train_loader.set_batch_generator(batch_generator, places=place) # define model transformer = Transformer( args.src_vocab_size, args.trg_vocab_size, args.max_length + 1, args.n_layer, args.n_head, args.d_key, args.d_value, args.d_model, args.d_inner_hid, args.prepostprocess_dropout, args.attention_dropout, args.relu_dropout, args.preprocess_cmd, args.postprocess_cmd, args.weight_sharing, args.bos_idx, args.eos_idx) # define loss criterion = CrossEntropyCriterion(args.label_smooth_eps) # define optimizer learning_rate = fluid.layers.learning_rate_scheduler.noam_decay( args.d_model, args.warmup_steps, args.learning_rate) # define optimizer optimizer = fluid.optimizer.Adam( learning_rate=learning_rate, beta1=args.beta1, beta2=args.beta2, epsilon=float(args.eps), parameter_list=transformer.parameters()) # the best cross-entropy value with label smoothing loss_normalizer = -( (1. - args.label_smooth_eps) * np.log( (1. - args.label_smooth_eps)) + args.label_smooth_eps * np.log(args.label_smooth_eps / (args.trg_vocab_size - 1) + 1e-20)) ce_time = [] ce_ppl = [] avg_loss = [] step_idx = 0 for pass_id in range(args.epoch): pass_start_time = time.time() batch_id = 0 for input_data in train_loader(): (src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos, trg_slf_attn_bias, trg_src_attn_bias, lbl_word, lbl_weight) = input_data logits = transformer(src_word, src_pos, src_slf_attn_bias, trg_word, trg_pos, trg_slf_attn_bias, trg_src_attn_bias) sum_cost, avg_cost, token_num = criterion(logits, lbl_word, lbl_weight) avg_cost.backward() optimizer.minimize(avg_cost) transformer.clear_gradients() if step_idx % args.print_step == 0: total_avg_cost = avg_cost.numpy() * trainer_count avg_loss.append(total_avg_cost[0]) if step_idx == 0: logging.info( "step_idx: %d, epoch: %d, batch: %d, avg loss: %f, " "normalized loss: %f, ppl: %f" % (step_idx, pass_id, batch_id, total_avg_cost, total_avg_cost - loss_normalizer, np.exp([min(total_avg_cost, 100)]))) avg_batch_time = time.time() else: logging.info( "step_idx: %d, epoch: %d, batch: %d, avg loss: %f, " "normalized loss: %f, ppl: %f, speed: %.2f steps/s" % (step_idx, pass_id, batch_id, total_avg_cost, total_avg_cost - loss_normalizer, np.exp([min(total_avg_cost, 100)]), args.print_step / (time.time() - avg_batch_time))) ce_ppl.append(np.exp([min(total_avg_cost, 100)])) avg_batch_time = time.time() batch_id += 1 step_idx += 1 if step_idx == STEP_NUM: if args.save_dygraph_model_path: model_dir = os.path.join(args.save_dygraph_model_path) if not os.path.exists(model_dir): os.makedirs(model_dir) fluid.save_dygraph( transformer.state_dict(), os.path.join(model_dir, "transformer")) fluid.save_dygraph( optimizer.state_dict(), os.path.join(model_dir, "transformer")) break time_consumed = time.time() - pass_start_time ce_time.append(time_consumed) return np.array(avg_loss)
def eval(args): train_reader = None test_reader = None if args.data == "mnist": import paddle.dataset.mnist as reader train_reader = reader.train() val_reader = reader.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) val_reader = paddle.batch(val_reader, batch_size=args.batch_size) val_feeder = feeder = fluid.DataFeeder([image, label], place, program=val_program) load_model(exe, val_program, args.model_path) batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( val_program, feed=val_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval batch[{}] - acc_top1: {}; acc_top5: {}; time: {}".format( batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info("Final eval - acc_top1: {}; acc_top5: {}".format( np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns))))
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import unittest import paddle import paddle.fluid.core as core import paddle.fluid as fluid from paddle.fluid.backward import append_backward import paddle.fluid.framework as framework from paddle.fluid.framework import Program, switch_main_program import bisect import numpy as np fluid.default_startup_program().random_seed = 1 class TestDyRnnStaticInput(unittest.TestCase): def setUp(self): self._delta = 0.005 self._max_sequence_len = 3 self._program = Program() switch_main_program(self._program) self.output_dim = 10 self.place = core.CPUPlace() self.prepare_x_tensor() self.prepare_static_input_tensor() self.exe = fluid.Executor(self.place) def prepare_x_tensor(self):
def test_mnist_float32(self): seed = 90 epoch_num = 1 batch_size = 128 batch_num = 50 with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed mnist = MNIST("mnist") sgd = SGDOptimizer(learning_rate=1e-3) batch_py_reader = fluid.io.PyReader(capacity=1) batch_py_reader.decorate_sample_list_generator( paddle.batch(self.reader_decorator( paddle.dataset.mnist.train()), batch_size=batch_size, drop_last=True), places=fluid.CPUPlace()) mnist.train() dy_param_init_value = {} for epoch in range(epoch_num): for batch_id, data in enumerate(batch_py_reader()): if batch_id >= batch_num: break img = data[0] dy_x_data = img.numpy() label = data[1] label.stop_gradient = True cost = mnist(img) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) dy_out = avg_loss.numpy() if epoch == 0 and batch_id == 0: for param in mnist.parameters(): dy_param_init_value[param.name] = param.numpy() avg_loss.backward() sgd.minimize(avg_loss) mnist.clear_gradients() dy_param_value = {} for param in mnist.parameters(): dy_param_value[param.name] = param.numpy() with new_program_scope(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) mnist = MNIST("mnist") sgd = SGDOptimizer(learning_rate=1e-3) train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=batch_size, drop_last=True) img = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') cost = mnist(img) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) sgd.minimize(avg_loss) # initialize params and fetch them static_param_init_value = {} static_param_name_list = [] for param in mnist.parameters(): static_param_name_list.append(param.name) out = exe.run(fluid.default_startup_program(), fetch_list=static_param_name_list) for i in range(len(static_param_name_list)): static_param_init_value[static_param_name_list[i]] = out[i] for epoch in range(epoch_num): for batch_id, data in enumerate(train_reader()): if batch_id >= batch_num: break static_x_data = np.array([ x[0].reshape(1, 28, 28) for x in data ]).astype('float32') y_data = np.array([x[1] for x in data]).astype('int64').reshape( [batch_size, 1]) fetch_list = [avg_loss.name] fetch_list.extend(static_param_name_list) out = exe.run(fluid.default_main_program(), feed={ "pixel": static_x_data, "label": y_data }, fetch_list=fetch_list) static_param_value = {} static_out = out[0] for i in range(1, len(out)): static_param_value[static_param_name_list[i - 1]] = out[i] self.assertTrue(np.allclose(dy_x_data.all(), static_x_data.all())) for key, value in six.iteritems(static_param_init_value): self.assertTrue(np.allclose(value, dy_param_init_value[key])) self.assertTrue(np.allclose(static_out, dy_out)) for key, value in six.iteritems(static_param_value): self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-5))
def train(nn_type, use_cuda, save_dirname=None, model_filename=None, params_filename=None): if use_cuda and not fluid.core.is_compiled_with_cuda(): return startup_program = fluid.default_startup_program() main_program = fluid.default_main_program() if args.enable_ce: train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=BATCH_SIZE) test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=BATCH_SIZE) startup_program.random_seed = 90 main_program.random_seed = 90 else: train_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=500), batch_size=BATCH_SIZE) test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=BATCH_SIZE) img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') if nn_type == 'softmax_regression': net_conf = softmax_regression elif nn_type == 'multilayer_perceptron': net_conf = multilayer_perceptron else: net_conf = convolutional_neural_network prediction, avg_loss, acc = net_conf(img, label) test_program = main_program.clone(for_test=True) optimizer = fluid.optimizer.Adam(learning_rate=0.001) optimizer.minimize(avg_loss) def train_test(train_test_program, train_test_feed, train_test_reader): acc_set = [] avg_loss_set = [] for test_data in train_test_reader(): acc_np, avg_loss_np = exe.run(program=train_test_program, feed=train_test_feed.feed(test_data), fetch_list=[acc, avg_loss]) acc_set.append(float(acc_np)) avg_loss_set.append(float(avg_loss_np)) # get test acc and loss acc_val_mean = numpy.array(acc_set).mean() avg_loss_val_mean = numpy.array(avg_loss_set).mean() return avg_loss_val_mean, acc_val_mean place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[img, label], place=place) exe.run(startup_program) epochs = [epoch_id for epoch_id in range(PASS_NUM)] lists = [] step = 0 for epoch_id in epochs: for step_id, data in enumerate(train_reader()): metrics = exe.run(main_program, feed=feeder.feed(data), fetch_list=[avg_loss, acc]) if step % 100 == 0: print("Pass %d, Epoch %d, Cost %f" % (step, epoch_id, metrics[0])) step += 1 # test for epoch avg_loss_val, acc_val = train_test(train_test_program=test_program, train_test_reader=test_reader, train_test_feed=feeder) print("Test with Epoch %d, avg_cost: %s, acc: %s" % (epoch_id, avg_loss_val, acc_val)) lists.append((epoch_id, avg_loss_val, acc_val)) if save_dirname is not None: fluid.io.save_inference_model(save_dirname, ["img"], [prediction], exe, model_filename=model_filename, params_filename=params_filename) if args.enable_ce: print("kpis\ttrain_cost\t%f" % metrics[0]) print("kpis\ttest_cost\t%s" % avg_loss_val) print("kpis\ttest_acc\t%s" % acc_val) # find the best pass best = sorted(lists, key=lambda list: float(list[1]))[0] print('Best pass is %s, testing Avgcost is %s' % (best[0], best[1])) print('The classification accuracy is %.2f%%' % (float(best[2]) * 100))
accuracy = fluid.layers.accuracy(input=predict, label=words[-1]) auc_var, batch_auc_var, auc_states = \ fluid.layers.auc(input=predict, label=words[-1], num_thresholds=2 ** 12, slide_steps=20) dataset = fluid.DatasetFactory().create_dataset() dataset.set_use_var(self.sparse_input_ids + [label]) pipe_command = "python dataset_generator.py" dataset.set_pipe_command(pipe_command) dataset.set_batch_size(128) dataset.set_thread(10) dataset.set_hdfs_config("afs:xxx.xxx.xx.xx", "xxxx,xxxx") optimizer = fluid.optimizer.SGD(0.01) optimizer.minimize(loss) exe = fluid.Executor(fluid.CPUPlace()) train_folder = ["afs:/app/fs/20191020", "afs:/app/fs/20191021"] train_filelists = [["afs:/app/fs/20191020/0.txt", "afs:/app/fs/20191020/1.txt"], ["afs:/app/fs/20191021/0.txt", "afs:/app/fs/20191021/1.txt"]] exe.run(fluid.default_startup_program()) for filelist in train_filelists: dataset.set_filelist(filelist) exe.train_from_dataset( program=fluid.default_main_program(), dataset=dataset, fetch_list=[auc_var], fetch_info=["auc"], debug=False) # save model here
def do_train(args): """train function""" train_prog = fluid.default_main_program() startup_prog = fluid.default_startup_program() with fluid.program_guard(train_prog, startup_prog): train_prog.random_seed = args.random_seed startup_prog.random_seed = args.random_seed with fluid.unique_name.guard(): context_wordseq = fluid.data( name='context_wordseq', shape=[-1, 1], dtype='int64', lod_level=1) response_wordseq = fluid.data( name='response_wordseq', shape=[-1, 1], dtype='int64', lod_level=1) labels = fluid.data(name='labels', shape=[-1, 1], dtype='int64') input_inst = [context_wordseq, response_wordseq, labels] input_field = InputField(input_inst) data_reader = fluid.io.PyReader( feed_list=input_inst, capacity=4, iterable=False) loss = create_net( is_training=True, model_input=input_field, args=args) loss.persistable = True # gradient clipping fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByValue( max=1.0, min=-1.0)) optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) optimizer.minimize(loss) if args.use_cuda: dev_count = fluid.core.get_cuda_device_count() place = fluid.CUDAPlace( int(os.getenv('FLAGS_selected_gpus', '0'))) else: dev_count = int(os.environ.get('CPU_NUM', 1)) place = fluid.CPUPlace() processor = reader.DataProcessor( data_path=args.training_file, max_seq_length=args.max_seq_len, batch_size=args.batch_size) batch_generator = processor.data_generator( place=place, phase="train", shuffle=True, sample_pro=args.sample_pro) num_train_examples = processor.get_num_examples(phase='train') max_train_steps = args.epoch * num_train_examples // dev_count // args.batch_size print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) data_reader.decorate_batch_generator(batch_generator) exe = fluid.Executor(place) exe.run(startup_prog) assert (args.init_from_checkpoint == "") or ( args.init_from_pretrain_model == "") #init from some checkpoint, to resume the previous training if args.init_from_checkpoint: save_load_io.init_from_checkpoint(args, exe, train_prog) #init from some pretrain models, to better solve the current task if args.init_from_pretrain_model: save_load_io.init_from_pretrain_model(args, exe, train_prog) if args.word_emb_init: print("start loading word embedding init ...") if six.PY2: word_emb = np.array( pickle.load(io.open(args.word_emb_init, 'rb'))).astype( 'float32') else: word_emb = np.array( pickle.load( io.open(args.word_emb_init, 'rb'), encoding="bytes")).astype('float32') set_word_embedding(word_emb, place) print("finish init word embedding ...") build_strategy = fluid.compiler.BuildStrategy() build_strategy.enable_inplace = True compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) steps = 0 begin_time = time.time() time_begin = time.time() for epoch_step in range(args.epoch): data_reader.start() sum_loss = 0.0 ce_loss = 0.0 while True: try: fetch_list = [loss.name] outputs = exe.run(compiled_train_prog, fetch_list=fetch_list) np_loss = outputs sum_loss += np.array(np_loss).mean() ce_loss = np.array(np_loss).mean() if steps % args.print_steps == 0: time_end = time.time() used_time = time_end - time_begin current_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) print( '%s epoch: %d, step: %s, avg loss %s, speed: %f steps/s' % (current_time, epoch_step, steps, sum_loss / args.print_steps, args.print_steps / used_time)) sum_loss = 0.0 time_begin = time.time() if steps % args.save_steps == 0: if args.save_checkpoint: save_load_io.save_checkpoint(args, exe, train_prog, "step_" + str(steps)) if args.save_param: save_load_io.save_param(args, exe, train_prog, "step_" + str(steps)) steps += 1 except fluid.core.EOFException: data_reader.reset() break if args.save_checkpoint: save_load_io.save_checkpoint(args, exe, train_prog, "step_final") if args.save_param: save_load_io.save_param(args, exe, train_prog, "step_final") def get_cards(): num = 0 cards = os.environ.get('CUDA_VISIBLE_DEVICES', '') if cards != '': num = len(cards.split(",")) return num if args.enable_ce: card_num = get_cards() pass_time_cost = time.time() - begin_time print("test_card_num", card_num) print("kpis\ttrain_duration_card%s\t%s" % (card_num, pass_time_cost)) print("kpis\ttrain_loss_card%s\t%f" % (card_num, ce_loss))
def test_accuracy(self): image = fluid.layers.data( name='image', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') model = MobileNet() out = model.net(input=image, class_dim=10) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) optimizer = fluid.optimizer.Momentum( momentum=0.9, learning_rate=0.01, regularization=fluid.regularizer.L2Decay(4e-5)) optimizer.minimize(avg_cost) main_prog = fluid.default_main_program() val_prog = main_prog.clone(for_test=True) place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( ) else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) feeder = fluid.DataFeeder([image, label], place, program=main_prog) train_reader = paddle.fluid.io.batch( paddle.dataset.mnist.train(), batch_size=64) eval_reader = paddle.fluid.io.batch( paddle.dataset.mnist.test(), batch_size=64) def train(program): iter = 0 for data in train_reader(): cost, top1, top5 = exe.run( program, feed=feeder.feed(data), fetch_list=[avg_cost, acc_top1, acc_top5]) iter += 1 if iter % 100 == 0: print( 'train iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'. format(iter, cost, top1, top5)) def test(program): iter = 0 result = [[], [], []] for data in eval_reader(): cost, top1, top5 = exe.run( program, feed=feeder.feed(data), fetch_list=[avg_cost, acc_top1, acc_top5]) iter += 1 if iter % 100 == 0: print('eval iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'. format(iter, cost, top1, top5)) result[0].append(cost) result[1].append(top1) result[2].append(top5) print(' avg loss {}, acc_top1 {}, acc_top5 {}'.format( np.mean(result[0]), np.mean(result[1]), np.mean(result[2]))) return np.mean(result[1]), np.mean(result[2]) train(main_prog) top1_1, top5_1 = test(main_prog) config = { 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max', 'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'], } quant_train_prog = quant_aware(main_prog, place, config, for_test=False) quant_eval_prog = quant_aware(val_prog, place, config, for_test=True) train(quant_train_prog) quant_eval_prog, int8_prog = convert( quant_eval_prog, place, config, save_int8=True) top1_2, top5_2 = test(quant_eval_prog) # values before quantization and after quantization should be close print("before quantization: top1: {}, top5: {}".format(top1_1, top5_1)) print("after quantization: top1: {}, top5: {}".format(top1_2, top5_2))
def train(cfg): # startup_prog = fluid.Program() # train_prog = fluid.Program() drop_last = True dataset = SegDataset( file_list=cfg.DATASET.TRAIN_FILE_LIST, mode=ModelPhase.TRAIN, shuffle=True, data_dir=cfg.DATASET.DATA_DIR) def data_generator(): if args.use_mpio: data_gen = dataset.multiprocess_generator( num_processes=cfg.DATALOADER.NUM_WORKERS, max_queue_size=cfg.DATALOADER.BUF_SIZE) else: data_gen = dataset.generator() batch_data = [] for b in data_gen: batch_data.append(b) if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS): for item in batch_data: yield item[0], item[1], item[2] batch_data = [] # If use sync batch norm strategy, drop last batch if number of samples # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues if not cfg.TRAIN.SYNC_BATCH_NORM: for item in batch_data: yield item[0], item[1], item[2] # Get device environment # places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() # place = places[0] gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace() places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places() # Get number of GPU dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places) print_info("#Device count: {}".format(dev_count)) # Make sure BATCH_SIZE can divided by GPU cards assert cfg.BATCH_SIZE % dev_count == 0, ( 'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format( cfg.BATCH_SIZE, dev_count)) # If use multi-gpu training mode, batch data will allocated to each GPU evenly batch_size_per_dev = cfg.BATCH_SIZE // dev_count print_info("batch_size_per_dev: {}".format(batch_size_per_dev)) data_loader, loss, lr, pred, grts, masks, image = build_model( phase=ModelPhase.TRAIN) data_loader.set_sample_generator( data_generator, batch_size=batch_size_per_dev, drop_last=drop_last) exe = fluid.Executor(place) cfg.update_from_file(args.teacher_cfg_file) # teacher_arch = teacher_cfg.architecture teacher_program = fluid.Program() teacher_startup_program = fluid.Program() with fluid.program_guard(teacher_program, teacher_startup_program): with fluid.unique_name.guard(): _, teacher_loss, _, _, _, _, _ = build_model( teacher_program, teacher_startup_program, phase=ModelPhase.TRAIN, image=image, label=grts, mask=masks) exe.run(teacher_startup_program) teacher_program = teacher_program.clone(for_test=True) ckpt_dir = cfg.SLIM.KNOWLEDGE_DISTILL_TEACHER_MODEL_DIR assert ckpt_dir is not None print('load teacher model:', ckpt_dir) fluid.io.load_params(exe, ckpt_dir, main_program=teacher_program) # cfg = load_config(FLAGS.config) cfg.update_from_file(args.cfg_file) data_name_map = { 'image': 'image', 'label': 'label', 'mask': 'mask', } merge(teacher_program, fluid.default_main_program(), data_name_map, place) distill_pairs = [[ 'teacher_bilinear_interp_2.tmp_0', 'bilinear_interp_0.tmp_0' ]] def distill(pairs, weight): """ Add 3 pairs of distillation losses, each pair of feature maps is the input of teacher and student's yolov3_loss respectively """ loss = l2_loss(pairs[0][0], pairs[0][1]) weighted_loss = loss * weight return weighted_loss distill_loss = distill(distill_pairs, 0.1) cfg.update_from_file(args.cfg_file) optimizer = solver.Solver(None, None) all_loss = loss + distill_loss lr = optimizer.optimise(all_loss) exe.run(fluid.default_startup_program()) exec_strategy = fluid.ExecutionStrategy() # Clear temporary variables every 100 iteration if args.use_gpu: exec_strategy.num_threads = fluid.core.get_cuda_device_count() exec_strategy.num_iteration_per_drop_scope = 100 build_strategy = fluid.BuildStrategy() build_strategy.fuse_all_reduce_ops = False build_strategy.fuse_all_optimizer_ops = False build_strategy.fuse_elewise_add_act_ops = True if cfg.NUM_TRAINERS > 1 and args.use_gpu: dist_utils.prepare_for_multi_process(exe, build_strategy, fluid.default_main_program()) exec_strategy.num_threads = 1 if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu: if dev_count > 1: # Apply sync batch norm strategy print_info("Sync BatchNorm strategy is effective.") build_strategy.sync_batch_norm = True else: print_info( "Sync BatchNorm strategy will not be effective if GPU device" " count <= 1") compiled_train_prog = fluid.CompiledProgram( fluid.default_main_program()).with_data_parallel( loss_name=all_loss.name, exec_strategy=exec_strategy, build_strategy=build_strategy) # Resume training begin_epoch = cfg.SOLVER.BEGIN_EPOCH if cfg.TRAIN.RESUME_MODEL_DIR: begin_epoch = load_checkpoint(exe, fluid.default_main_program()) # Load pretrained model elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR): print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR) load_vars = [] load_fail_vars = [] def var_shape_matched(var, shape): """ Check whehter persitable variable shape is match with current network """ var_exist = os.path.exists( os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name)) if var_exist: var_shape = parse_shape_from_file( os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name)) return var_shape == shape return False for x in fluid.default_main_program().list_vars(): if isinstance(x, fluid.framework.Parameter): shape = tuple(fluid.global_scope().find_var( x.name).get_tensor().shape()) if var_shape_matched(x, shape): load_vars.append(x) else: load_fail_vars.append(x) fluid.io.load_vars( exe, dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR, vars=load_vars) for var in load_vars: print_info("Parameter[{}] loaded sucessfully!".format(var.name)) for var in load_fail_vars: print_info( "Parameter[{}] don't exist or shape does not match current network, skip" " to load it.".format(var.name)) print_info("{}/{} pretrained parameters loaded successfully!".format( len(load_vars), len(load_vars) + len(load_fail_vars))) else: print_info( 'Pretrained model dir {} not exists, training from scratch...'. format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) #fetch_list = [avg_loss.name, lr.name] fetch_list = [ loss.name, 'teacher_' + teacher_loss.name, distill_loss.name, lr.name ] if args.debug: # Fetch more variable info and use streaming confusion matrix to # calculate IoU results if in debug mode np.set_printoptions( precision=4, suppress=True, linewidth=160, floatmode="fixed") fetch_list.extend([pred.name, grts.name, masks.name]) cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True) if args.use_tb: if not args.tb_log_dir: print_info("Please specify the log directory by --tb_log_dir.") exit(1) from tb_paddle import SummaryWriter log_writer = SummaryWriter(args.tb_log_dir) # trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0)) # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) global_step = 0 all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True: all_step += 1 all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1) avg_loss = 0.0 avg_t_loss = 0.0 avg_d_loss = 0.0 best_mIoU = 0.0 timer = Timer() timer.start() if begin_epoch > cfg.SOLVER.NUM_EPOCHS: raise ValueError( ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format( begin_epoch, cfg.SOLVER.NUM_EPOCHS)) if args.use_mpio: print_info("Use multiprocess reader") else: print_info("Use multi-thread reader") for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): data_loader.start() while True: try: if args.debug: # Print category IoU and accuracy to check whether the # traning process is corresponed to expectation loss, lr, pred, grts, masks = exe.run( program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) cm.calculate(pred, grts, masks) avg_loss += np.mean(np.array(loss)) global_step += 1 if global_step % args.log_steps == 0: speed = args.log_steps / timer.elapsed_time() avg_loss /= args.log_steps category_acc, mean_acc = cm.accuracy() category_iou, mean_iou = cm.mean_iou() print_info(( "epoch={} step={} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}" ).format(epoch, global_step, lr[0], avg_loss, mean_acc, mean_iou, speed, calculate_eta(all_step - global_step, speed))) print_info("Category IoU: ", category_iou) print_info("Category Acc: ", category_acc) if args.use_tb: log_writer.add_scalar('Train/mean_iou', mean_iou, global_step) log_writer.add_scalar('Train/mean_acc', mean_acc, global_step) log_writer.add_scalar('Train/loss', avg_loss, global_step) log_writer.add_scalar('Train/lr', lr[0], global_step) log_writer.add_scalar('Train/step/sec', speed, global_step) sys.stdout.flush() avg_loss = 0.0 cm.zero_matrix() timer.restart() else: # If not in debug mode, avoid unnessary log and calculate loss, t_loss, d_loss, lr = exe.run( program=compiled_train_prog, fetch_list=fetch_list, return_numpy=True) avg_loss += np.mean(np.array(loss)) avg_t_loss += np.mean(np.array(t_loss)) avg_d_loss += np.mean(np.array(d_loss)) global_step += 1 if global_step % args.log_steps == 0 and cfg.TRAINER_ID == 0: avg_loss /= args.log_steps avg_t_loss /= args.log_steps avg_d_loss /= args.log_steps speed = args.log_steps / timer.elapsed_time() print(( "epoch={} step={} lr={:.5f} loss={:.4f} teacher loss={:.4f} distill loss={:.4f} step/sec={:.3f} | ETA {}" ).format(epoch, global_step, lr[0], avg_loss, avg_t_loss, avg_d_loss, speed, calculate_eta(all_step - global_step, speed))) if args.use_tb: log_writer.add_scalar('Train/loss', avg_loss, global_step) log_writer.add_scalar('Train/lr', lr[0], global_step) log_writer.add_scalar('Train/speed', speed, global_step) sys.stdout.flush() avg_loss = 0.0 avg_t_loss = 0.0 avg_d_loss = 0.0 timer.restart() except fluid.core.EOFException: data_loader.reset() break except Exception as e: print(e) if (epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0 or epoch == cfg.SOLVER.NUM_EPOCHS) and cfg.TRAINER_ID == 0: ckpt_dir = save_checkpoint(exe, fluid.default_main_program(), epoch) if args.do_eval: print("Evaluation start") _, mean_iou, _, mean_acc = evaluate( cfg=cfg, ckpt_dir=ckpt_dir, use_gpu=args.use_gpu, use_mpio=args.use_mpio) if args.use_tb: log_writer.add_scalar('Evaluate/mean_iou', mean_iou, global_step) log_writer.add_scalar('Evaluate/mean_acc', mean_acc, global_step) if mean_iou > best_mIoU: best_mIoU = mean_iou update_best_model(ckpt_dir) print_info("Save best model {} to {}, mIoU = {:.4f}".format( ckpt_dir, os.path.join(cfg.TRAIN.MODEL_SAVE_DIR, 'best_model'), mean_iou)) # Use Tensorboard to visualize results if args.use_tb and cfg.DATASET.VIS_FILE_LIST is not None: visualize( cfg=cfg, use_gpu=args.use_gpu, vis_file_list=cfg.DATASET.VIS_FILE_LIST, vis_dir="visual", ckpt_dir=ckpt_dir, log_writer=log_writer) if cfg.TRAINER_ID == 0: ckpt_dir = save_checkpoint(exe, fluid.default_main_program(), epoch) # save final model if cfg.TRAINER_ID == 0: save_checkpoint(exe, fluid.default_main_program(), 'final')
def do_train(args): """执行训练过程 Args: args: DefaultArgs对象,在utils.py中定义, 存储模型训练的所有参数, Returns: 训练产出的program及模型输出变量 """ train_program = fluid.default_main_program() startup_program = fluid.default_startup_program() dataset = Dataset(args) with fluid.program_guard(train_program, startup_program): train_program.random_seed = args.random_seed startup_program.random_seed = args.random_seed with fluid.unique_name.guard(): train_ret = create_model(args, dataset.vocab_size, dataset.num_labels, mode='train') test_program = train_program.clone(for_test=True) optimizer = fluid.optimizer.Adam( learning_rate=args.base_learning_rate) optimizer.minimize(train_ret["avg_cost"]) # init executor if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: dev_count = min(multiprocessing.cpu_count(), args.cpu_num) os.environ['CPU_NUM'] = str(dev_count) place = fluid.CPUPlace() train_reader = create_pyreader(args, file_name=args.train_data, feed_list=train_ret['feed_list'], place=place, reader=dataset) test_reader = create_pyreader(args, file_name=args.test_data, feed_list=train_ret['feed_list'], place=place, reader=dataset, iterable=True) exe = fluid.Executor(place) exe.run(startup_program) if args.init_checkpoint: utils.init_pretraining_params(exe, args.init_checkpoint, train_program) test_process(exe, test_program, test_reader, train_ret) if dev_count > 1: # multi cpu/gpu config exec_strategy = fluid.ExecutionStrategy() build_strategy = fluid.compiler.BuildStrategy() compiled_prog = fluid.compiler.CompiledProgram( train_program).with_data_parallel( loss_name=train_ret['avg_cost'].name, build_strategy=build_strategy, exec_strategy=exec_strategy) else: compiled_prog = fluid.compiler.CompiledProgram(train_program) step = 0 fetch_list = [] for epoch_id in range(args.epoch): for data in train_reader(): outputs = exe.run( compiled_prog, fetch_list=fetch_list, feed=data[0], ) step += 1 test_process(exe, test_program, test_reader, train_ret) return test_program, train_ret['crf_decode']
def train(conf_dict): """ train process """ # Get data layer data = layers.DataLayer() # Load network structure dynamically net = utils.import_class("nets", conf_dict["net"]["module_name"], conf_dict["net"]["class_name"])(conf_dict) # Load loss function dynamically loss = utils.import_class("losses", conf_dict["loss"]["module_name"], conf_dict["loss"]["class_name"])(conf_dict) # Load Optimization method optimizer = utils.import_class( "optimizers", "paddle_optimizers", conf_dict["optimizer"]["class_name"])(conf_dict) # Get service if "use_cuda" in conf_dict and conf_dict["use_cuda"] == 1: place = fluid.core.CUDAPlace(0) else: place = fluid.core.CPUPlace() if conf_dict["task_mode"] == "pairwise": # Build network left = data.ops(name="left", shape=[1], dtype="int64", lod_level=1) pos_right = data.ops(name="right", shape=[1], dtype="int64", lod_level=1) neg_right = data.ops(name="neg_right", shape=[1], dtype="int64", lod_level=1) left_feat, pos_score = net.predict(left, pos_right) _, neg_score = net.predict(left, neg_right) avg_cost = loss.compute(pos_score, neg_score) # Get Feeder and Reader feeder = fluid.DataFeeder( place=place, feed_list=[left.name, pos_right.name, neg_right.name]) reader = data_reader.get_reader(conf_dict, False, None) else: # Build network left = data.ops(name="left", shape=[1], dtype="int64", lod_level=1) right = data.ops(name="right", shape=[1], dtype="int64", lod_level=1) label = data.ops(name="label", shape=[1], dtype="int64", lod_level=0) left_feat, pred = net.predict(left, right) avg_cost = loss.compute(pred, label) avg_cost.persistable = True # Get Feeder and Reader feeder = fluid.DataFeeder( place=place, feed_list=[left.name, right.name, label.name]) reader = data_reader.get_reader(conf_dict, False, None) # Save Infer model infer_program = fluid.default_main_program().clone() # operate Optimization optimizer.ops(avg_cost) # optimize memory # fluid.memory_optimize(fluid.default_main_program()) executor = fluid.Executor(place) executor.run(fluid.default_startup_program()) # Get and run executor parallel_executor = fluid.ParallelExecutor( use_cuda="use_cuda" in conf_dict and conf_dict["use_cuda"] == 1, loss_name=avg_cost.name, main_program=fluid.default_main_program()) # Get device number device_count = parallel_executor.device_count logging.info("device count: %d" % device_count) # run train logging.info("start train process ...") for epoch_id in range(conf_dict["epoch_num"]): losses = [] # Get batch data iterator batch_data = paddle.batch(reader, conf_dict["batch_size"], drop_last=False) start_time = time.time() total_loss = 0.0 for iter, data in enumerate(batch_data()): if len(data) < device_count: continue avg_loss = parallel_executor.run([avg_cost.name], feed=feeder.feed(data)) total_loss += np.mean(avg_loss[0]) if (iter + 1) % 100 == 0: print("epoch: %d, iter: %d, loss: %f" % (epoch_id, iter, total_loss / 100)) total_loss = 0.0 losses.append(np.mean(avg_loss[0])) end_time = time.time() print("epoch: %d, loss: %f, used time: %f" % (epoch_id, np.mean(losses), end_time - start_time)) model_save_dir = conf_dict["model_path"] model_path = os.path.join(model_save_dir, str(epoch_id)) if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) if conf_dict["task_mode"] == "pairwise": feed_var_names = [left.name, pos_right.name] target_vars = [left_feat, pos_score] else: feed_var_names = [left.name, right.name] target_vars = [left_feat, pred] fluid.io.save_inference_model(model_path, feed_var_names, target_vars, executor, infer_program)
fluid.clip.set_gradient_clip( fluid.clip.GradientClipByGlobalNorm(clip_norm=CLIP)) p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip) grad_list = [elem[1] for elem in p_g] grad_clip_list = [elem[1] for elem in p_g_clip] train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), batch_size=BATCH_SIZE) place = fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[image, label], place=place) exe.run(fluid.default_startup_program()) count = 0 for data in train_reader(): count += 1 if count > 5: break out = exe.run(prog, feed=feeder.feed(data), fetch_list=grad_list) out_clip = exe.run(prog_clip, feed=feeder.feed(data), fetch_list=grad_clip_list) global_norm = 0 for v in out[1:]: global_norm += np.sum(np.power(v, 2)) global_norm = np.sqrt(global_norm)
def run_boxps_preload(self, is_cpu=True): x = fluid.layers.data(name='x', shape=[1], dtype='int64', lod_level=0) y = fluid.layers.data(name='y', shape=[1], dtype='int64', lod_level=0) emb_x, emb_y = _pull_box_sparse([x, y], size=2) emb_xp = _pull_box_sparse(x, size=2) concat = layers.concat([emb_x, emb_y], axis=1) fc = layers.fc(input=concat, name="fc", size=1, num_flatten_dims=1, bias_attr=False) loss = layers.reduce_mean(fc) layers.Print(loss) place = fluid.CPUPlace( ) if is_cpu or not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) exe = fluid.Executor(place) batch_size = 2 def binary_print(slot, fout): fout.write(str(len(slot)) + " ") for e in slot: fout.write(str(e) + " ") batch1 = np.ones( (batch_size, 2, 1)).astype("int64").reshape(batch_size, 2, 1) filelist = [] place_str = "cpu" if is_cpu else "gpu" for i in range(2): filelist.append("test_hdfs_" + place_str + "_" + str(i)) for f in filelist: with open(f, "w") as fout: for ins in batch1: for slot in ins: binary_print(slot, fout) fout.write("\n") def create_dataset(): dataset = fluid.DatasetFactory().create_dataset("BoxPSDataset") dataset.set_date("20190930") dataset.set_use_var([x, y]) dataset.set_batch_size(2) dataset.set_thread(1) dataset.set_filelist(filelist) return dataset datasets = [] datasets.append(create_dataset()) datasets.append(create_dataset()) optimizer = fluid.optimizer.SGD(learning_rate=0.5) optimizer = fluid.optimizer.PipelineOptimizer(optimizer, cut_list=[], place_list=[place], concurrency_list=[1], queue_size=1, sync_steps=-1) optimizer.minimize(loss) exe.run(fluid.default_startup_program()) datasets[0].load_into_memory() datasets[0].begin_pass() datasets[1].preload_into_memory() exe.train_from_dataset(program=fluid.default_main_program(), dataset=datasets[0], print_period=1) datasets[0].end_pass(True) datasets[1].wait_preload_done() datasets[1].begin_pass() exe.train_from_dataset(program=fluid.default_main_program(), dataset=datasets[1], print_period=1, debug=True) datasets[1].end_pass(False) for f in filelist: os.remove(f)
def train(): def save_model(postfix): model_path = os.path.join('./work', postfix) print('save models to %s' % (model_path)) fluid.io.save_params(exe, model_path) def network(is_train): record_file = glob.glob('train*.recordio') print(record_file) test_file = 'train00.recordio' record_file.remove(test_file) test_file = ['test.recordio', 'train00.recordio'] file_obj = fluid.layers.open_files( filenames=record_file if is_train else test_file, shapes=[[-1, 3, 540, 960], [-1, 1, 540, 960], [-1, 1], [-1, 1]], dtypes=['float32', 'float32', 'int64', 'int64'], lod_levels=[0, 0, 0, 0], pass_num=1000) file_obj = fluid.layers.shuffle(file_obj, 500) file_obj = fluid.layers.batch(file_obj, batch_size=6 if is_train else 100) img, des_im, total_num, group_num = fluid.layers.read_file(file_obj) print('read over') # here is the data total_num = fluid.layers.cast(total_num, dtype="float32") group_num = create_group(group_num) predict1, predict0 = FPN_and_groupout(img) # build our network delta0 = 100 delta1 = 100 loss0 = fluid.layers.elementwise_sub(predict1, des_im) loss0 = fluid.layers.reduce_mean(fluid.layers.abs(loss0)) loss1 = fluid.layers.reduce_mean( fluid.layers.square_error_cost(input=fluid.layers.reduce_sum( predict1, dim=[2, 3]), label=total_num)) loss2 = fluid.layers.cross_entropy(input=predict0, label=group_num, soft_label=True) loss2 = fluid.layers.reduce_mean(loss2) loss = fluid.layers.reduce_mean( fluid.layers.elementwise_add( fluid.layers.elementwise_add(loss0 * delta0, loss1), delta1 * loss2)) # here if we only use loss0, then the final loss is about e-05, # and the error is about e+01, so we add a delta in the loss return loss, predict1, total_num with fluid.unique_name.guard(): train_loss, pre_train, tr_num = network(is_train=True) optimizer = fluid.optimizer.AdamOptimizer( learning_rate=fluid.layers.exponential_decay(0.0001, 4000, 0.9)) optimizer.minimize(train_loss) test_program = fluid.Program() with fluid.unique_name.guard(): with fluid.program_guard(test_program, fluid.Program()): loss, pre, true_num = network(is_train=False) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) min_error = 1000 for i in range(200000): loss_n, pretrain, tr_train = exe.run( program=fluid.default_main_program(), fetch_list=[train_loss.name, pre_train.name, tr_num.name]) acc0 = np.abs( np.rint(np.sum(np.sum(pretrain, axis=-1), axis=-1)) - np.rint(tr_train)) / tr_train av_acc0 = np.sum(acc0) / np.shape(acc0)[0] print("step {} train loss is {}, train error is {}".format( i, loss_n, av_acc0)) if i % 1000 == 0: #save_model(str(i)) pre_map, tr_nums = exe.run(program=test_program, fetch_list=[pre.name, true_num.name]) acc = np.abs( np.rint(np.sum(np.sum(pre_map, axis=-1), axis=-1)) - np.rint(tr_nums)) / tr_nums acc_mae = np.sum( np.abs( np.rint(np.sum(np.sum(pre_map, axis=-1), axis=-1)) - np.rint(tr_nums))) / np.shape(acc)[0] av_acc = np.sum(acc) / np.shape(acc)[0] if av_acc < min_error: min_error = av_acc if i > 10000: save_model(str(i)) print("MAE is {}".format(acc_mae)) print("min erro is {}".format(min_error)) print("average error is {}".format(av_acc))
def train(args): data_shape = cityscape.train_data_shape() num_classes = cityscape.num_classes() # define network images = fluid.layers.data(name='image', shape=data_shape, dtype='float32') label_sub1 = fluid.layers.data(name='label_sub1', shape=[1], dtype='int32') label_sub2 = fluid.layers.data(name='label_sub2', shape=[1], dtype='int32') label_sub4 = fluid.layers.data(name='label_sub4', shape=[1], dtype='int32') mask_sub1 = fluid.layers.data(name='mask_sub1', shape=[-1], dtype='int32') mask_sub2 = fluid.layers.data(name='mask_sub2', shape=[-1], dtype='int32') mask_sub4 = fluid.layers.data(name='mask_sub4', shape=[-1], dtype='int32') sub4_out, sub24_out, sub124_out = icnet( images, num_classes, np.array(data_shape[1:]).astype("float32")) loss_sub4 = create_loss(sub4_out, label_sub4, mask_sub4, num_classes) loss_sub24 = create_loss(sub24_out, label_sub2, mask_sub2, num_classes) loss_sub124 = create_loss(sub124_out, label_sub1, mask_sub1, num_classes) reduced_loss = LAMBDA1 * loss_sub4 + LAMBDA2 * loss_sub24 + LAMBDA3 * loss_sub124 regularizer = fluid.regularizer.L2Decay(0.0001) optimizer = fluid.optimizer.Momentum(learning_rate=poly_decay(), momentum=0.9, regularization=regularizer) _, params_grads = optimizer.minimize(reduced_loss, no_grad_set=no_grad_set) # prepare environment place = fluid.CPUPlace() if args.use_gpu: place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.init_model is not None: print("load model from: %s" % args.init_model) sys.stdout.flush() fluid.io.load_params(exe, args.init_model) iter_id = 0 t_loss = 0. sub4_loss = 0. sub24_loss = 0. sub124_loss = 0. train_reader = cityscape.train(args.batch_size, flip=args.random_mirror, scaling=args.random_scaling) start_time = time.time() while True: # train a pass for data in train_reader(): if iter_id > TOTAL_STEP: end_time = time.time() print("kpis train_duration %f" % (end_time - start_time)) return iter_id += 1 results = exe.run( feed=get_feeder_data(data, place), fetch_list=[reduced_loss, loss_sub4, loss_sub24, loss_sub124]) t_loss += results[0] sub4_loss += results[1] sub24_loss += results[2] sub124_loss += results[3] # training log if iter_id % LOG_PERIOD == 0: print( "Iter[%d]; train loss: %.3f; sub4_loss: %.3f; sub24_loss: %.3f; sub124_loss: %.3f" % (iter_id, t_loss / LOG_PERIOD, sub4_loss / LOG_PERIOD, sub24_loss / LOG_PERIOD, sub124_loss / LOG_PERIOD)) print("kpis train_cost %f" % (t_loss / LOG_PERIOD)) t_loss = 0. sub4_loss = 0. sub24_loss = 0. sub124_loss = 0. sys.stdout.flush() if iter_id % CHECKPOINT_PERIOD == 0 and args.checkpoint_path is not None: dir_name = args.checkpoint_path + "/" + str(iter_id) fluid.io.save_persistables(exe, dirname=dir_name) print("Saved checkpoint: %s" % (dir_name))
def train(word_dict, net_method, use_cuda, seed, quality, save_dirname=None): BATCH_SIZE = 128 PASS_NUM = 100 dict_dim = len(word_dict) class_dim = 2 target_val_acc = quality # Seed for batch producer random.seed(seed) # Seed for weight initialization fluid.default_startup_program().random_seed = seed # Setup input features and label as data layers data = fluid.layers.data( name="words", shape=[1], dtype="int64", lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") cost, acc_out, prediction = net_method( data, label, input_dim=dict_dim, class_dim=class_dim) # Initialize a test program for obtaining test accuracy and cost test_program = fluid.default_main_program().clone(for_test=True) # Setup Adam optimizer adam = fluid.optimizer.Adam(learning_rate=0.0005) #Learning rate of 5e-4 works for conv models and 2e-3 for LSTM model optimize_ops, params_grads = adam.minimize(cost) # Create reader to iterate over training set train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.imdb.train(word_dict), buf_size=25000), batch_size=BATCH_SIZE) # Setup place and executor for runtime place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[data, label], place=place) # Create reader to iterate over validation set test_reader = paddle.batch( paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) def train_loop(main_program): exe.run(fluid.default_startup_program()) for pass_id in xrange(PASS_NUM): train_loss_set = [] train_acc_set = [] # Calculate average training loss and accuracy # across all mini-batches in the training set for batch_id, data in enumerate(train_reader()): cost_val, acc_val = exe.run(main_program, feed=feeder.feed(data), fetch_list=[cost, acc_out]) train_loss_set.append(float(cost_val)) train_acc_set.append(float(acc_val)) train_loss = np.array(train_loss_set).mean() train_acc = np.array(train_acc_set).mean() * 100 # Calculate average valication loss and accuracy # across all mini-batches in the validation set acc_set = [] avg_loss_set = [] for tid, test_data in enumerate(test_reader()): avg_loss_np, acc_np = exe.run( program=test_program, feed=feeder.feed(test_data), fetch_list=[cost, acc_out]) acc_set.append(float(acc_np)) avg_loss_set.append(float(avg_loss_np)) acc_val = np.array(acc_set).mean() * 100 avg_loss_val = np.array(avg_loss_set).mean() print("Epoch =", pass_id, ", train-accuracy =", train_acc, ", train-loss =", train_loss, ", validation-accuracy =", acc_val, ", validation-loss =", avg_loss_val) if acc_val > target_val_acc: ## Exit the program on reaching desired accuracy value break train_loop(fluid.default_main_program())
def test_ptb_rnn_cpu_float32(self): seed = 90 hidden_size = 10 vocab_size = 1000 num_layers = 1 num_steps = 3 init_scale = 0.1 batch_size = 4 batch_num = 200 with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed # TODO: marsyang1993 Change seed to ptb_model = PtbModel("ptb_model", hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, num_steps=num_steps, init_scale=init_scale) sgd = SGDOptimizer(learning_rate=1e-3) dy_param_updated = dict() dy_param_init = dict() dy_loss = None last_hidden = None last_cell = None for i in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') x_data = x_data.reshape((-1, num_steps, 1)) y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') init_cell_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) dy_loss, last_hidden, last_cell = ptb_model( x, y, init_hidden, init_cell) if i == 0: for param in ptb_model.parameters(): dy_param_init[param.name] = param.numpy() dy_loss.backward() sgd.minimize(dy_loss) ptb_model.clear_gradients() if i == batch_num - 1: for param in ptb_model.parameters(): dy_param_updated[param.name] = param.numpy() dy_loss_value = dy_loss.numpy() dy_last_cell_value = last_cell.numpy() dy_last_hidden_value = last_hidden.numpy() with new_program_scope(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed ptb_model = PtbModel("ptb_model", hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, num_steps=num_steps, init_scale=init_scale) exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) sgd = SGDOptimizer(learning_rate=1e-3) x = fluid.layers.data(name="x", shape=[-1, num_steps, 1], dtype='int64') y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32') init_hidden = fluid.layers.data(name="init_hidden", shape=[1], dtype='float32') init_cell = fluid.layers.data(name="init_cell", shape=[1], dtype='float32') static_loss, static_last_hidden, static_last_cell = ptb_model( x, y, init_hidden, init_cell) sgd.minimize(static_loss) static_param_updated = dict() static_param_init = dict() static_param_name_list = list() for param in ptb_model.parameters(): static_param_name_list.append(param.name) out = exe.run(framework.default_startup_program(), fetch_list=static_param_name_list) for i in range(len(static_param_name_list)): static_param_init[static_param_name_list[i]] = out[i] static_loss_value = None static_last_cell_value = None static_last_hidden_value = None for i in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') x_data = x_data.reshape((-1, num_steps, 1)) y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') init_cell_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') fetch_list = [ static_loss, static_last_hidden, static_last_cell ] fetch_list.extend(static_param_name_list) out = exe.run(fluid.default_main_program(), feed={ "x": x_data, "y": y_data, "init_hidden": init_hidden_data, "init_cell": init_cell_data }, fetch_list=fetch_list) static_loss_value = out[0] static_last_hidden_value = out[1] static_last_cell_value = out[2] if i == batch_num - 1: for k in range(3, len(out)): static_param_updated[static_param_name_list[ k - 3]] = out[k] self.assertTrue(np.array_equal(static_loss_value, dy_loss_value)) self.assertTrue( np.array_equal(static_last_cell_value, dy_last_cell_value)) self.assertTrue( np.array_equal(static_last_hidden_value, dy_last_hidden_value)) for key, value in six.iteritems(static_param_init): self.assertTrue(np.array_equal(value, dy_param_init[key])) for key, value in six.iteritems(static_param_updated): self.assertTrue(np.array_equal(value, dy_param_updated[key]))
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import unittest import paddle import paddle.fluid.core as core import paddle.fluid as fluid from paddle.fluid.backward import append_backward import paddle.fluid.framework as framework from paddle.fluid.framework import Program, switch_main_program import bisect import numpy as np fluid.default_startup_program().random_seed = 1 class TestDyRnnStaticInput(unittest.TestCase): def setUp(self): self._delta = 0.005 self._max_sequence_len = 3 self._program = Program() switch_main_program(self._program) self.output_dim = 10 self.place = core.CPUPlace() self.prepare_x_tensor() self.prepare_static_input_tensor() self.exe = fluid.Executor(self.place) def prepare_x_tensor(self):
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import paddle import paddle.fluid as fluid import math import sys # need to fix random seed and training data to compare the loss # value accurately calculated by the default and the memory optimization # version. fluid.default_startup_program().random_seed = 111 x = fluid.layers.data(name='x', shape=[13], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32') device_type = 'CPU' use_nccl = False place = fluid.CPUPlace() if fluid.core.is_compiled_with_cuda(): device_type = 'CUDA' use_nccl = False place = fluid.CUDAPlace(0) places = fluid.layers.get_places(device_count=0, device_type=device_type) pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl) with pd.do():