def main(): args = parse_args() print_arguments(args) # the unique trainer id, starting from 0, needed by trainer # only nccl_id_var, num_trainers, trainer_id = ( None, 1, int(os.getenv("PADDLE_TRAINER_ID", "-1"))) if args.use_cprof: pr = cProfile.Profile() pr.enable() model_def = __import__("models.%s" % args.model, fromlist=["models"]) train_args = list(model_def.get_model(args)) train_args.append(args) # Run optimizer.minimize(avg_loss) train_args[2].minimize(train_args[0]) if args.memory_optimize: fluid.memory_optimize(fluid.default_main_program()) if args.update_method == "pserver": train_prog, startup_prog = dist_transpile(trainer_id) if not train_prog: raise Exception( "Must configure correct environments to run dist train.") train_args.extend([train_prog, startup_prog]) if args.gpus > 1 and os.getenv("PADDLE_TRAINING_ROLE") == "TRAINER": train_args.extend([nccl_id_var, num_trainers, trainer_id]) train_parallel(*train_args) train(*train_args) exit(0) # for other update methods, use default programs train_args.append(fluid.default_main_program()) train_args.append(fluid.default_startup_program()) if args.update_method == "nccl2": nccl_id_var, num_trainers, trainer_id = append_nccl2_prepare(trainer_id) if args.gpus == 1: # NOTE: parallel executor use profiler interanlly if args.use_nvprof and args.device == 'GPU': with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: train(*train_args) else: train(*train_args) else: if args.device == "CPU": raise Exception("Only support GPU perf with parallel exe") train_args.extend([nccl_id_var, num_trainers, trainer_id]) train_parallel(*train_args)
def main(): rnn_out = encoder_decoder() label = layers.data( name="target_language_next_word", shape=[1], dtype='int64', lod_level=1) cost = layers.cross_entropy(input=rnn_out, label=label) avg_cost = fluid.layers.mean(cost) optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4) optimizer.minimize(avg_cost) # fluid.memory_optimize(fluid.default_main_program()) fluid.release_memory(fluid.default_main_program()) # fix the order of training data train_data = paddle.batch( paddle.dataset.wmt14.train(dict_size), batch_size=batch_size) # train_data = paddle.batch( # paddle.reader.shuffle( # paddle.dataset.wmt14.train(dict_size), buf_size=1000), # batch_size=batch_size) place = core.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) batch_id = 0 for pass_id in xrange(10): for data in train_data(): word_data = to_lodtensor(map(lambda x: x[0], data), place) trg_word = to_lodtensor(map(lambda x: x[1], data), place) trg_word_next = to_lodtensor(map(lambda x: x[2], data), place) outs = exe.run(fluid.default_main_program(), feed={ 'src_word_id': word_data, 'target_language_word': trg_word, 'target_language_next_word': trg_word_next }, fetch_list=[avg_cost]) avg_cost_val = np.array(outs[0]) print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) + " avg_cost=" + str(avg_cost_val)) if batch_id > 2: exit(0) if math.isnan(float(avg_cost_val)): sys.exit("got NaN loss, training failed.") batch_id += 1
def get_model(args): # Input data images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE) label = fluid.layers.data(name='label', shape=[1], dtype='int64') # Train program predict = cnn_model(images) cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(x=cost) # Evaluator batch_size_tensor = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy( input=predict, label=label, total=batch_size_tensor) # inference program inference_program = fluid.default_main_program().clone() # Optimization opt = fluid.optimizer.AdamOptimizer( learning_rate=0.001, beta1=0.9, beta2=0.999) # Reader train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=args.batch_size) test_reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=args.batch_size) return avg_cost, inference_program, opt, train_reader, test_reader, batch_acc
def run_executor(exe, feed, fetch_list, program=None): if isinstance(exe, fluid.ParallelExecutor): res = exe.run(fetch_list=fetch_list, feed=feed) elif isinstance(exe, fluid.Executor): if program is None: program = fluid.default_main_program() res = exe.run(program=program, feed=feed, fetch_list=fetch_list) else: raise ValueError('Unkown type exe') return res
def test_calc_gradient(self): x = layers.create_parameter(dtype="float32", shape=[5, 10]) y = layers.create_parameter(dtype="float32", shape=[10, 8]) mul_out = layers.mul(x=x, y=y) mean_out = layers.mean(mul_out) a = calc_gradient(mean_out, mul_out) b = calc_gradient(mean_out, x) place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) exe.run(fluid.default_main_program(), feed={}, fetch_list=[a, b])
def test_fetch_var(self): val = numpy.array([1, 3, 5]).astype(numpy.int32) x = layers.create_tensor(dtype="int32", persistable=True, name="x") layers.assign(input=val, output=x) exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_main_program(), feed={}, fetch_list=[]) fetched_x = fluid.fetch_var("x") self.assertTrue( numpy.array_equal(fetched_x, val), "fetch_x=%s val=%s" % (fetched_x, val)) self.assertEqual(fetched_x.dtype, val.dtype)
def test_assign(self): val = ( -100 + 200 * numpy.random.random(size=(2, 5))).astype(numpy.int32) x = layers.create_tensor(dtype="float32") layers.assign(input=val, output=x) exe = fluid.Executor(fluid.CPUPlace()) fetched_x = exe.run(fluid.default_main_program(), feed={}, fetch_list=[x])[0] self.assertTrue( numpy.array_equal(fetched_x, val), "fetch_x=%s val=%s" % (fetched_x, val)) self.assertEqual(fetched_x.dtype, val.dtype)
def get_model(args): if args.data_set == "cifar10": classdim = 10 if args.data_format == 'NCHW': data_shape = [3, 32, 32] else: data_shape = [32, 32, 3] else: classdim = 102 if args.data_format == 'NCHW': data_shape = [3, 224, 224] else: data_shape = [224, 224, 3] # Input data images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # Train program net = vgg16_bn_drop(images) predict = fluid.layers.fc(input=net, size=classdim, act='softmax') cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(x=cost) # Evaluator batch_size_tensor = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy( input=predict, label=label, total=batch_size_tensor) # inference program inference_program = fluid.default_main_program().clone() with fluid.program_guard(inference_program): inference_program = fluid.io.get_inference_program( target_vars=[batch_acc, batch_size_tensor]) # Optimization optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) # data reader train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.cifar.train10() if args.data_set == 'cifar10' else paddle.dataset.flowers.train(), buf_size=5120), batch_size=args.batch_size) test_reader = paddle.batch( paddle.dataset.cifar.test10() if args.data_set == 'cifar10' else paddle.dataset.flowers.test(), batch_size=args.batch_size) return avg_cost, inference_program, optimizer, train_reader, test_reader, batch_acc
def run_program(self): outputs = [] places = [core.CPUPlace()] if core.is_compiled_with_cuda(): places.append(core.CUDAPlace(0)) for place in places: self.set_inputs(place) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) output = exe.run(fluid.default_main_program(), feed=self.inputs, fetch_list=self.fetch_list, return_numpy=False) outputs.append(output) self.actual_outputs = outputs
def run_program(self): """Run the test program. """ places = [core.CPUPlace()] if core.is_compiled_with_cuda(): places.append(core.CUDAPlace(0)) for place in places: self.set_inputs(place) exe = fluid.Executor(place) output = exe.run(fluid.default_main_program(), feed=self.inputs, fetch_list=self.fetch_list, return_numpy=True) self.op_output = output
def test_nvprof(self): if not fluid.core.is_compiled_with_cuda(): return epoc = 8 dshape = [4, 3, 28, 28] data = layers.data(name='data', shape=[3, 28, 28], dtype='float32') conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1]) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) output_file = 'cuda_profiler.txt' with profiler.cuda_profiler(output_file, 'csv') as nvprof: for i in range(epoc): input = np.random.random(dshape).astype('float32') exe.run(fluid.default_main_program(), feed={'data': input}) os.remove(output_file)
def check_result(self, fn, place, dtype): shape = [9, 10] x_data = np.random.random(size=shape).astype(dtype) y_data = np.random.random(size=shape).astype(dtype) python_out = fn(x_data, y_data) x_var = layers.create_global_var( name='x', shape=shape, value=0.0, dtype=dtype, persistable=True) y_var = layers.create_global_var( name='y', shape=shape, value=0.0, dtype=dtype, persistable=True) out = fn(x_var, y_var) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) fluid_out = exe.run(fluid.default_main_program(), feed={'x': x_data, 'y': y_data}, fetch_list=[out]) np.testing.assert_array_equal(python_out, fluid_out[0])
def train(use_cuda, save_dirname=None, is_local=True): # define network topology word = fluid.layers.data( name='word_data', shape=[1], dtype='int64', lod_level=1) predicate = fluid.layers.data( name='verb_data', shape=[1], dtype='int64', lod_level=1) ctx_n2 = fluid.layers.data( name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1) ctx_n1 = fluid.layers.data( name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1) ctx_0 = fluid.layers.data( name='ctx_0_data', shape=[1], dtype='int64', lod_level=1) ctx_p1 = fluid.layers.data( name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1) ctx_p2 = fluid.layers.data( name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1) mark = fluid.layers.data( name='mark_data', shape=[1], dtype='int64', lod_level=1) feature_out = db_lstm(**locals()) target = fluid.layers.data( name='target', shape=[1], dtype='int64', lod_level=1) crf_cost = fluid.layers.linear_chain_crf( input=feature_out, label=target, param_attr=fluid.ParamAttr( name='crfw', learning_rate=mix_hidden_lr)) avg_cost = fluid.layers.mean(crf_cost) # TODO(qiao) # check other optimizers and check why out will be NAN sgd_optimizer = fluid.optimizer.SGD( learning_rate=fluid.layers.exponential_decay( learning_rate=0.01, decay_steps=100000, decay_rate=0.5, staircase=True)) sgd_optimizer.minimize(avg_cost) # TODO(qiao) # add dependency track and move this config before optimizer crf_decode = fluid.layers.crf_decoding( input=feature_out, param_attr=fluid.ParamAttr(name='crfw')) train_data = paddle.batch( paddle.reader.shuffle( paddle.dataset.conll05.test(), buf_size=8192), batch_size=BATCH_SIZE) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() feeder = fluid.DataFeeder( feed_list=[ word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, predicate, mark, target ], place=place) exe = fluid.Executor(place) def train_loop(main_program): exe.run(fluid.default_startup_program()) embedding_param = fluid.global_scope().find_var( embedding_name).get_tensor() embedding_param.set( load_parameter(conll05.get_embedding(), word_dict_len, word_dim), place) start_time = time.time() batch_id = 0 for pass_id in xrange(PASS_NUM): for data in train_data(): cost = exe.run(main_program, feed=feeder.feed(data), fetch_list=[avg_cost]) cost = cost[0] if batch_id % 10 == 0: print("avg_cost:" + str(cost)) if batch_id != 0: print("second per batch: " + str((time.time( ) - start_time) / batch_id)) # Set the threshold low to speed up the CI test if float(cost) < 60.0: if save_dirname is not None: # TODO(liuyiqun): Change the target to crf_decode fluid.io.save_inference_model(save_dirname, [ 'word_data', 'verb_data', 'ctx_n2_data', 'ctx_n1_data', 'ctx_0_data', 'ctx_p1_data', 'ctx_p2_data', 'mark_data' ], [feature_out], exe) return batch_id = batch_id + 1 if is_local: train_loop(fluid.default_main_program()) else: port = os.getenv("PADDLE_INIT_PORT", "6174") pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... trainers = int(os.getenv("TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) training_role = os.getenv("TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": pserver_prog = t.get_pserver_program(current_endpoint) pserver_startup = t.get_startup_program(current_endpoint, pserver_prog) exe.run(pserver_startup) exe.run(pserver_prog) elif training_role == "TRAINER": train_loop(t.get_trainer_program())
def train(net_type, use_cuda, save_dirname, is_local): classdim = 10 data_shape = [3, 32, 32] images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') if net_type == "vgg": print("train vgg net") net = vgg16_bn_drop(images) elif net_type == "resnet": print("train resnet") net = resnet_cifar10(images, 32) else: raise ValueError("%s network is not supported" % net_type) predict = fluid.layers.fc(input=net, size=classdim, act='softmax') cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(cost) acc = fluid.layers.accuracy(input=predict, label=label) # Test program test_program = fluid.default_main_program().clone(for_test=True) optimizer = fluid.optimizer.Adam(learning_rate=0.001) optimizer.minimize(avg_cost) BATCH_SIZE = 128 PASS_NUM = 1 train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.cifar.train10(), buf_size=128 * 10), batch_size=BATCH_SIZE) test_reader = paddle.batch( paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(place=place, feed_list=[images, label]) def train_loop(main_program): exe.run(fluid.default_startup_program()) loss = 0.0 for pass_id in range(PASS_NUM): for batch_id, data in enumerate(train_reader()): exe.run(main_program, feed=feeder.feed(data)) if (batch_id % 10) == 0: acc_list = [] avg_loss_list = [] for tid, test_data in enumerate(test_reader()): loss_t, acc_t = exe.run(program=test_program, feed=feeder.feed(test_data), fetch_list=[avg_cost, acc]) if math.isnan(float(loss_t)): sys.exit("got NaN loss, training failed.") acc_list.append(float(acc_t)) avg_loss_list.append(float(loss_t)) break # Use 1 segment for speeding up CI acc_value = numpy.array(acc_list).mean() avg_loss_value = numpy.array(avg_loss_list).mean() print( 'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'. format(pass_id, batch_id + 1, float(avg_loss_value), float(acc_value))) if acc_value > 0.01: # Low threshold for speeding up CI fluid.io.save_inference_model(save_dirname, ["pixel"], [predict], exe) return if is_local: train_loop(fluid.default_main_program()) else: port = os.getenv("PADDLE_INIT_PORT", "6174") pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... trainers = int(os.getenv("TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) training_role = os.getenv("TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": pserver_prog = t.get_pserver_program(current_endpoint) pserver_startup = t.get_startup_program(current_endpoint, pserver_prog) exe.run(pserver_startup) exe.run(pserver_prog) elif training_role == "TRAINER": train_loop(t.get_trainer_program())
def train(word_dict, net_method, use_cuda, parallel=False, save_dirname=None, is_local=True): BATCH_SIZE = 128 PASS_NUM = 5 dict_dim = len(word_dict) class_dim = 2 data = fluid.layers.data( name="words", shape=[1], dtype="int64", lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") if not parallel: cost, acc_out, prediction = net_method( data, label, input_dim=dict_dim, class_dim=class_dim) else: places = fluid.layers.get_places() pd = fluid.layers.ParallelDo(places) with pd.do(): cost, acc, _ = net_method( pd.read_input(data), pd.read_input(label), input_dim=dict_dim, class_dim=class_dim) pd.write_output(cost) pd.write_output(acc) cost, acc = pd() cost = fluid.layers.mean(cost) acc_out = fluid.layers.mean(acc) prediction = None assert save_dirname is None adagrad = fluid.optimizer.Adagrad(learning_rate=0.002) adagrad.minimize(cost) train_data = paddle.batch( paddle.reader.shuffle( paddle.dataset.imdb.train(word_dict), buf_size=1000), batch_size=BATCH_SIZE) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[data, label], place=place) def train_loop(main_program): exe.run(fluid.default_startup_program()) for pass_id in xrange(PASS_NUM): for data in train_data(): cost_val, acc_val = exe.run(main_program, feed=feeder.feed(data), fetch_list=[cost, acc_out]) print("cost=" + str(cost_val) + " acc=" + str(acc_val)) if cost_val < 0.4 and acc_val > 0.8: if save_dirname is not None: fluid.io.save_inference_model(save_dirname, ["words"], prediction, exe) return if math.isnan(float(cost_val)): sys.exit("got NaN loss, training failed.") raise AssertionError("Cost is too large for {0}".format( net_method.__name__)) if is_local: train_loop(fluid.default_main_program()) else: port = os.getenv("PADDLE_INIT_PORT", "6174") pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... trainers = int(os.getenv("TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) training_role = os.getenv("TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": pserver_prog = t.get_pserver_program(current_endpoint) pserver_startup = t.get_startup_program(current_endpoint, pserver_prog) exe.run(pserver_startup) exe.run(pserver_prog) elif training_role == "TRAINER": train_loop(t.get_trainer_program())
padding=(0, 0), dilation=(1, 1), groups=1, bias_attr=False, name="conv2dX2") add1 = fluid.layers.elementwise_add(conv2d1, conv2d2, name="add1") relu2a = fluid.layers.relu(add1, name="relu2a") relu2b = fluid.layers.relu(add1, name="relu2b") add2 = fluid.layers.elementwise_add(relu2a, relu2b, name="add2") relu3a = fluid.layers.relu(add2, name="relu3a") relu3b = fluid.layers.relu(add2, name="relu3b") exe = fluid.Executor(fluid.CPUPlace()) exe.run(fluid.default_startup_program()) inp_dict = {'inputX1': inp_blob1, 'inputX2': inp_blob2} var = [relu3a, relu3b] res_paddle = exe.run(fluid.default_main_program(), fetch_list=var, feed=inp_dict) fluid.io.save_inference_model(os.path.join(sys.argv[1], "2in_2out_dynbatch"), list(inp_dict.keys()), var, exe, model_filename="2in_2out_dynbatch.pdmodel", params_filename="2in_2out_dynbatch.pdiparams")
def train(): args = parse_args() model_type = args.model_type rnn_model = args.rnn_model logger = logging.getLogger("lm") logger.setLevel(logging.INFO) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') if args.enable_ce: fluid.default_startup_program().random_seed = SEED if args.log_path: file_handler = logging.FileHandler(args.log_path) file_handler.setLevel(logging.INFO) file_handler.setFormatter(formatter) logger.addHandler(file_handler) else: console_handler = logging.StreamHandler() console_handler.setLevel(logging.INFO) console_handler.setFormatter(formatter) logger.addHandler(console_handler) logger.info('Running with args : {}'.format(args)) vocab_size = 10000 if model_type == "test": num_layers = 1 batch_size = 2 hidden_size = 10 num_steps = 3 init_scale = 0.1 max_grad_norm = 5.0 epoch_start_decay = 1 max_epoch = 1 dropout = 0.0 lr_decay = 0.5 base_learning_rate = 1.0 elif model_type == "small": num_layers = 2 batch_size = 20 hidden_size = 200 num_steps = 20 init_scale = 0.1 max_grad_norm = 5.0 epoch_start_decay = 4 max_epoch = 13 dropout = 0.0 lr_decay = 0.5 base_learning_rate = 1.0 elif model_type == "medium": num_layers = 2 batch_size = 20 hidden_size = 650 num_steps = 35 init_scale = 0.05 max_grad_norm = 5.0 epoch_start_decay = 6 max_epoch = 39 dropout = 0.5 lr_decay = 0.8 base_learning_rate = 1.0 elif model_type == "large": num_layers = 2 batch_size = 20 hidden_size = 1500 num_steps = 35 init_scale = 0.04 max_grad_norm = 10.0 epoch_start_decay = 14 max_epoch = 55 dropout = 0.65 lr_decay = 1.0 / 1.15 base_learning_rate = 1.0 else: print("model type not support") return # Training process loss, last_hidden, last_cell, feed_order = lm_model.lm_model( hidden_size, vocab_size, batch_size, num_layers=num_layers, num_steps=num_steps, init_scale=init_scale, dropout=dropout, rnn_model=rnn_model) # clone from default main program and use it as the validation program main_program = fluid.default_main_program() inference_program = fluid.default_main_program().clone(for_test=True) fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByGlobalNorm( clip_norm=max_grad_norm)) learning_rate = fluid.layers.create_global_var(name="learning_rate", shape=[1], value=1.0, dtype='float32', persistable=True) optimizer = fluid.optimizer.SGD(learning_rate=learning_rate) optimizer.minimize(loss) place = core.CUDAPlace(0) if args.use_gpu else core.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) data_path = args.data_path print("begin to load data") raw_data = reader.ptb_raw_data(data_path) print("finished load data") train_data, valid_data, test_data, _ = raw_data def prepare_input(batch, init_hidden, init_cell, epoch_id=0, with_lr=True): x, y = batch new_lr = base_learning_rate * (lr_decay**max( epoch_id + 1 - epoch_start_decay, 0.0)) lr = np.ones((1), dtype='float32') * new_lr res = {} x = x.reshape((-1, num_steps, 1)) y = y.reshape((-1, 1)) res['x'] = x res['y'] = y res['init_hidden'] = init_hidden res['init_cell'] = init_cell if with_lr: res['learning_rate'] = lr return res def eval(data): # when eval the batch_size set to 1 eval_data_iter = reader.get_data_iter(data, batch_size, num_steps) total_loss = 0.0 iters = 0 init_hidden = np.zeros((num_layers, batch_size, hidden_size), dtype='float32') init_cell = np.zeros((num_layers, batch_size, hidden_size), dtype='float32') for batch_id, batch in enumerate(eval_data_iter): input_data_feed = prepare_input(batch, init_hidden, init_cell, epoch_id, with_lr=False) fetch_outs = exe.run( inference_program, feed=input_data_feed, fetch_list=[loss.name, last_hidden.name, last_cell.name], use_program_cache=True) cost_train = np.array(fetch_outs[0]) init_hidden = np.array(fetch_outs[1]) init_cell = np.array(fetch_outs[2]) total_loss += cost_train iters += num_steps ppl = np.exp(total_loss / iters) return ppl # get train epoch size batch_len = len(train_data) // batch_size epoch_size = (batch_len - 1) // num_steps log_interval = epoch_size // 10 total_time = 0.0 for epoch_id in range(max_epoch): start_time = time.time() print("epoch id", epoch_id) train_data_iter = reader.get_data_iter(train_data, batch_size, num_steps) total_loss = 0 init_hidden = None init_cell = None #debug_para(fluid.framework.default_main_program(), parallel_executor) total_loss = 0 iters = 0 init_hidden = np.zeros((num_layers, batch_size, hidden_size), dtype='float32') init_cell = np.zeros((num_layers, batch_size, hidden_size), dtype='float32') for batch_id, batch in enumerate(train_data_iter): input_data_feed = prepare_input(batch, init_hidden, init_cell, epoch_id=epoch_id) fetch_outs = exe.run(feed=input_data_feed, fetch_list=[ loss.name, last_hidden.name, last_cell.name, 'learning_rate' ], use_program_cache=True) cost_train = np.array(fetch_outs[0]) init_hidden = np.array(fetch_outs[1]) init_cell = np.array(fetch_outs[2]) lr = np.array(fetch_outs[3]) total_loss += cost_train iters += num_steps if batch_id > 0 and batch_id % log_interval == 0: ppl = np.exp(total_loss / iters) print("ppl ", batch_id, ppl[0], lr[0]) ppl = np.exp(total_loss / iters) if epoch_id == 0 and ppl[0] > 1000: # for bad init, after first epoch, the loss is over 1000 # no more need to continue return end_time = time.time() total_time += end_time - start_time print("train ppl", ppl[0]) if epoch_id == max_epoch - 1 and args.enable_ce: print("ptblm\tlstm_language_model_duration\t%s" % (total_time / max_epoch)) print("ptblm\tlstm_language_model_loss\t%s" % ppl[0]) model_path = os.path.join("model_new/", str(epoch_id)) if not os.path.isdir(model_path): os.makedirs(model_path) fluid.io.save_persistables(executor=exe, dirname=model_path, main_program=main_program) valid_ppl = eval(valid_data) print("valid ppl", valid_ppl[0]) test_ppl = eval(test_data) print("test ppl", test_ppl[0])
# Reader for training train_reader = paddle.batch( paddle.reader.shuffle(paddle.dataset.cifar.train10(), buf_size=50000), batch_size=BATCH_SIZE) # Reader for testing. A separated data set for testing. test_reader = paddle.batch( paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE) use_cuda = False place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() feed_order = ['pixel', 'label'] main_program = fluid.default_main_program() star_program = fluid.default_startup_program() predict,[avg_cost, acc] = train_program() # Test program test_program = main_program.clone(for_test=True) optimizer = optimizer_program() optimizer.minimize(avg_cost) exe = fluid.Executor(place) EPOCH_NUM = 2 # For training test cost def train_test(program, reader):
def forward(self, x): x = fluid.layers.reshape(x, shape=[1, 4]) x = self.affine1(x) x = fluid.layers.dropout(x, self.dropout_ratio) x = fluid.layers.relu(x) action_scores = self.affine2(x) self._x_for_debug = x return fluid.layers.softmax(action_scores, axis=1) with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = args.seed fluid.default_main_program().random_seed = args.seed np.random.seed(args.seed) policy = Policy() eps = np.finfo(np.float32).eps.item() optimizer = fluid.optimizer.AdamOptimizer( learning_rate=1e-2, parameter_list=policy.parameters()) def get_mean_and_std(values=[]): n = 0. s = 0. for val in values: s += val n += 1 mean = s / n
def train(conf_dict, data_reader, use_cuda=False): """ Training of p classification model """ label_dict_len = data_reader.get_dict_size('label_dict') # input layer word = fluid.layers.data(name='word_data', shape=[1], dtype='int64', lod_level=1) postag = fluid.layers.data(name='token_pos', shape=[1], dtype='int64', lod_level=1) # label target = fluid.layers.data(name='target', shape=[label_dict_len], dtype='float32', lod_level=0) # NN: embedding + lstm + pooling feature_out = p_model.db_lstm(data_reader, word, postag, conf_dict) # loss function for multi-label classification class_cost = fluid.layers.sigmoid_cross_entropy_with_logits(x=feature_out, \ label=target) avg_cost = fluid.layers.mean(class_cost) # optimization method sgd_optimizer = fluid.optimizer.AdamOptimizer(learning_rate=2e-3, ) sgd_optimizer.minimize(avg_cost) train_batch_reader = paddle.batch(paddle.reader.shuffle( data_reader.get_train_reader(), buf_size=8192), batch_size=conf_dict['batch_size']) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() feeder = fluid.DataFeeder(feed_list=[word, postag, target], place=place) exe = fluid.Executor(place) save_dirname = conf_dict['p_model_save_dir'] def train_loop(main_program, trainer_id=0): """start train""" exe.run(fluid.default_startup_program()) start_time = time.time() batch_id = 0 for pass_id in six.moves.xrange(conf_dict['pass_num']): pass_start_time = time.time() cost_sum, cost_counter = 0, 0 for data in train_batch_reader(): cost = exe.run(main_program, feed=feeder.feed(data), fetch_list=[avg_cost]) cost = cost[0] cost_sum += cost cost_counter += 1 if batch_id % 10 == 0 and batch_id != 0: sys.stderr.write( "batch %d finished, second per batch: %02f\n" % (batch_id, (time.time() - start_time) / batch_id)) # cost expected, training over if float(cost) < 0.01: pass_avg_cost = cost_sum / cost_counter if cost_counter > 0 else 0.0 sys.stderr.write( "%d pass end, cost time: %02f, avg_cost: %f\n" % (pass_id, time.time() - pass_start_time, pass_avg_cost)) save_path = os.path.join(save_dirname, 'final') fluid.io.save_inference_model(save_path, ['word_data', 'token_pos'], [feature_out], exe, params_filename='params') return batch_id = batch_id + 1 # save the model once each pass ends pass_avg_cost = cost_sum / cost_counter if cost_counter > 0 else 0.0 sys.stderr.write( "%d pass end, cost time: %02f, avg_cost: %f\n" % (pass_id, time.time() - pass_start_time, pass_avg_cost)) save_path = os.path.join(save_dirname, 'pass_%04d-%f' % (pass_id, pass_avg_cost)) fluid.io.save_inference_model(save_path, ['word_data', 'token_pos'], [feature_out], exe, params_filename='params') else: # pass times complete and the training is over save_path = os.path.join(save_dirname, 'final') fluid.io.save_inference_model(save_path, ['word_data', 'token_pos'], [feature_out], exe, params_filename='params') return train_loop(fluid.default_main_program())
def get_pretraining_output(self, mask_label, mask_pos, labels): """Get the loss & accuracy for pretraining""" mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') # extract the first token feature in each sentence next_sent_feat = self.get_pooled_output() reshaped_emb_out = fluid.layers.reshape(x=self._enc_out, shape=[-1, self._emb_size]) # extract masked tokens' feature mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos) # transform: fc mask_trans_feat = fluid.layers.fc( input=mask_feat, size=self._emb_size, act=self._hidden_act, param_attr=fluid.ParamAttr(name=self.model_name + 'mask_lm_trans_fc.w_0', initializer=self._param_initializer), bias_attr=fluid.ParamAttr(name=self.model_name + 'mask_lm_trans_fc.b_0')) # transform: layer norm mask_trans_feat = pre_process_layer(mask_trans_feat, 'n', name=self.model_name + 'mask_lm_trans') mask_lm_out_bias_attr = fluid.ParamAttr( name=self.model_name + "mask_lm_out_fc.b_0", initializer=fluid.initializer.Constant(value=0.0)) if self._weight_sharing: fc_out = fluid.layers.matmul( x=mask_trans_feat, y=fluid.default_main_program().global_block().var( self._word_emb_name), transpose_y=True) fc_out += fluid.layers.create_parameter(shape=[self._voc_size], dtype=self._dtype, attr=mask_lm_out_bias_attr, is_bias=True) else: fc_out = fluid.layers.fc( input=mask_trans_feat, size=self._voc_size, param_attr=fluid.ParamAttr( name=self.model_name + "mask_lm_out_fc.w_0", initializer=self._param_initializer), bias_attr=mask_lm_out_bias_attr) mask_lm_loss = fluid.layers.softmax_with_cross_entropy( logits=fc_out, label=mask_label) mean_mask_lm_loss = fluid.layers.mean(mask_lm_loss) next_sent_fc_out = fluid.layers.fc( input=next_sent_feat, size=2, param_attr=fluid.ParamAttr(name=self.model_name + "next_sent_fc.w_0", initializer=self._param_initializer), bias_attr=self.model_name + "next_sent_fc.b_0") next_sent_loss, next_sent_softmax = fluid.layers.softmax_with_cross_entropy( logits=next_sent_fc_out, label=labels, return_softmax=True) next_sent_acc = fluid.layers.accuracy(input=next_sent_softmax, label=labels) mean_next_sent_loss = fluid.layers.mean(next_sent_loss) loss = mean_next_sent_loss + mean_mask_lm_loss return next_sent_acc, mean_mask_lm_loss, loss
def _check_mlp(self, place=None): seed = 90 batch_size = 128 if place == None: place = fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) with fluid.dygraph.guard(place): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) mlp = MLP() optimizer = self.get_optimizer_dygraph( parameter_list=mlp.parameters()) batch_py_reader = fluid.io.PyReader(capacity=1) batch_py_reader.decorate_sample_list_generator( paddle.batch(self.reader_decorator( paddle.dataset.mnist.train()), batch_size=batch_size, drop_last=True), places=fluid.CPUPlace()) dy_param_init_value = {} for batch_id, data in enumerate(batch_py_reader()): if batch_id >= self.batch_num: break img = data[0] label = data[1] label.stop_gradient = True img = fluid.layers.reshape(img, shape=[batch_size, -1]) cost = mlp(img) avg_loss = fluid.layers.reduce_mean(cost) dy_out = avg_loss.numpy() if batch_id == 0: for param in mlp.parameters(): dy_param_init_value[param.name] = param.numpy() avg_loss.backward() optimizer.minimize(avg_loss) mlp.clear_gradients() dy_param_value = {} for param in mlp.parameters(): dy_param_value[param.name] = param.numpy() with new_program_scope(): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) if place == None: place = fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) exe = fluid.Executor(place) mlp = MLP() optimizer = self.get_optimizer() train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=128, drop_last=True) img = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') img = fluid.layers.reshape(img, shape=[batch_size, 784]) cost = mlp(img) avg_loss = fluid.layers.reduce_mean(cost) optimizer.minimize(avg_loss) # initialize params and fetch them static_param_init_value = {} static_param_name_list = [] for param in mlp.parameters(): static_param_name_list.append(param.name) out = exe.run(fluid.default_startup_program(), fetch_list=static_param_name_list) for i in range(len(static_param_name_list)): static_param_init_value[static_param_name_list[i]] = out[i] for batch_id, data in enumerate(train_reader()): if batch_id >= self.batch_num: break static_x_data = np.array( [x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array([x[1] for x in data ]).astype('int64').reshape([128, 1]) fetch_list = [avg_loss.name] fetch_list.extend(static_param_name_list) out = exe.run(fluid.default_main_program(), feed={ "pixel": static_x_data, "label": y_data }, fetch_list=fetch_list) static_param_value = {} static_out = out[0] for i in range(1, len(out)): static_param_value[static_param_name_list[i - 1]] = out[i] for key, value in six.iteritems(static_param_init_value): self.assertTrue(np.allclose(value, dy_param_init_value[key])) if core.is_compiled_with_rocm(): self.assertTrue(np.allclose(static_out, dy_out, atol=1e-3)) else: self.assertTrue(np.allclose(static_out, dy_out)) for key, value in six.iteritems(static_param_value): if core.is_compiled_with_rocm(): self.assertTrue( np.allclose(value, dy_param_value[key], atol=1e-3)) else: self.assertTrue(np.allclose(value, dy_param_value[key]))
def main(use_cuda): """ Advbox demo which demonstrate how to use advbox. """ TOTAL_NUM = 500 IMG_NAME = 'img' LABEL_NAME = 'label' img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32') # gradient should flow img.stop_gradient = False label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64') logits = mnist_cnn_model(img) cost = fluid.layers.cross_entropy(input=logits, label=label) avg_cost = fluid.layers.mean(x=cost) #根据配置选择使用CPU资源还是GPU资源 place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) BATCH_SIZE = 1 test_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.mnist.test(), buf_size=128 * 10), batch_size=BATCH_SIZE) fluid.io.load_params(exe, "./mnist/", main_program=fluid.default_main_program()) # advbox demo m = PaddleModel(fluid.default_main_program(), IMG_NAME, LABEL_NAME, logits.name, avg_cost.name, (-1, 1), channel_axis=1) #使用静态FGSM epsilon不可变 attack = FGSM_static(m) attack_config = {"epsilon": 0.01} # use test data to generate adversarial examples total_count = 0 fooling_count = 0 for data in test_reader(): total_count += 1 adversary = Adversary(data[0][0], data[0][1]) # FGSM non-targeted attack adversary = attack(adversary, **attack_config) if adversary.is_successful(): fooling_count += 1 #print( # 'attack success, original_label=%d, adversarial_label=%d, count=%d' # % (data[0][1], adversary.adversarial_label, total_count)) else: logger.info('attack failed, original_label=%d, count=%d' % (data[0][1], total_count)) if total_count >= TOTAL_NUM: print( "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f" % (fooling_count, total_count, float(fooling_count) / total_count)) break print("fgsm attack done without any defence") #使用FeatureFqueezingDefence # advbox FeatureFqueezingDefence demo n = PaddleLabelSmoothingDefenceModel(fluid.default_main_program(), IMG_NAME, LABEL_NAME, logits.name, avg_cost.name, (-1, 1), channel_axis=1, preprocess=None, smoothing=0.1) attack_new = FGSM_static(n) attack_config = {"epsilon": 0.01} total_count = 0 fooling_count = 0 for data in test_reader(): total_count += 1 #不设置y 会自动获取 adversary = Adversary(data[0][0], None) # FGSM non-targeted attack adversary = attack_new(adversary, **attack_config) if adversary.is_successful(): fooling_count += 1 logger.info( 'attack success, original_label=%d, adversarial_label=%d, count=%d' % (data[0][1], adversary.adversarial_label, total_count)) else: logger.info('attack failed, original_label=%d, count=%d' % (data[0][1], total_count)) if total_count >= TOTAL_NUM: print( "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f" % (fooling_count, total_count, float(fooling_count) / total_count)) break print("fgsm attack done with LabelSmoothingDefence")
def testSetVariableBeforeTrain(self): seed = 90 hidden_size = 10 vocab_size = 1000 num_layers = 1 num_steps = 3 init_scale = 0.1 batch_size = 4 batch_num = 200 with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed # TODO: marsyang1993 Change seed to ptb_model = PtbModel(hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, num_steps=num_steps, init_scale=init_scale) place = fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) adam = Adam(learning_rate=0.0, beta1=0.8, beta2=0.6, parameter_list=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None last_hidden = None last_cell = None adam.set_dict(self.opti_dict) ptb_model.set_dict(self.state_dict) for i in range(1): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') init_cell_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) dy_loss, last_hidden, last_cell = ptb_model( x, y, init_hidden, init_cell) dy_loss.backward() adam.minimize(dy_loss) ptb_model.clear_gradients() opti_dict = adam.state_dict() for k, v in opti_dict.items(): if k == "global_step": self.assertTrue( np.array_equal(v.numpy(), self.base_opti[v.name] + 1)) if k.find("beta1_pow_acc_0") > 0: self.assertTrue( np.array_equal(v.numpy(), self.base_opti[v.name] * adam._beta1)) if k.find("beta2_pow_acc_0") > 0: self.assertTrue( np.array_equal(v.numpy(), self.base_opti[v.name] * adam._beta2)) state_dict = ptb_model.state_dict() for k, v in state_dict.items(): new_t = v.numpy() base_t = self.model_base[k] self.assertTrue(np.array_equal(new_t, base_t))
def setUp(self): seed = 90 hidden_size = 10 vocab_size = 1000 num_layers = 1 num_steps = 3 init_scale = 0.1 batch_size = 4 batch_num = 200 with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed # TODO: marsyang1993 Change seed to ptb_model = PtbModel(hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, num_steps=num_steps, init_scale=init_scale) bd = [] lr_arr = [1.0] # this a fake lr decay strategy for i in range(1, 10): bd.append(100 * i) new_lr = 1.0 lr_arr.append(new_lr) place = fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) adam = Adam(learning_rate=fluid.layers.piecewise_decay( boundaries=bd, values=lr_arr), parameter_list=ptb_model.parameters()) dy_param_updated = dict() dy_param_init = dict() dy_loss = None last_hidden = None last_cell = None for i in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') init_cell_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) dy_loss, last_hidden, last_cell = ptb_model( x, y, init_hidden, init_cell) if i == 0: for param in ptb_model.parameters(): dy_param_init[param.name] = param.numpy() dy_loss.backward() adam.minimize(dy_loss) ptb_model.clear_gradients() if i == batch_num - 1: for param in ptb_model.parameters(): dy_param_updated[param.name] = param.numpy() # check optimizer self.opti_dict = adam.state_dict() self.base_opti = {} for k, v in self.opti_dict.items(): self.base_opti[v.name] = v.numpy() self.assertTrue(np.sum(np.abs(v.numpy())) != 0) fluid.save_dygraph(self.opti_dict, "./test_dy") self.state_dict = ptb_model.state_dict() self.model_base = {} for k, v in self.state_dict.items(): np_t = v.numpy() self.model_base[k] = np_t fluid.save_dygraph(self.state_dict, "./test_dy")
def train(): learning_rate = cfg.learning_rate image_shape = [3, cfg.TRAIN.max_size, cfg.TRAIN.max_size] num_iterations = cfg.max_iter devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" devices_num = len(devices.split(",")) total_batch_size = devices_num * cfg.TRAIN.im_per_batch model = model_builder.RCNN( add_conv_body_func=resnet.add_ResNet50_conv4_body, add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head, use_pyreader=cfg.use_pyreader, use_random=False) model.build_model(image_shape) losses, keys = model.loss() loss = losses[0] fetch_list = [loss] boundaries = cfg.lr_steps gamma = cfg.lr_gamma step_num = len(cfg.lr_steps) values = [learning_rate * (gamma**i) for i in range(step_num + 1)] optimizer = fluid.optimizer.Momentum( learning_rate=exponential_with_warmup_decay( learning_rate=learning_rate, boundaries=boundaries, values=values, warmup_iter=500, warmup_factor=1.0 / 3.0), regularization=fluid.regularizer.L2Decay(0.0001), momentum=0.9) optimizer.minimize(loss) fluid.memory_optimize(fluid.default_main_program()) place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if cfg.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(cfg.pretrained_model, var.name)) fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist) if cfg.parallel: train_exe = fluid.ParallelExecutor(use_cuda=bool(cfg.use_gpu), loss_name=loss.name) if cfg.use_pyreader: train_reader = reader.train(batch_size=cfg.TRAIN.im_per_batch, total_batch_size=total_batch_size, padding_total=cfg.TRAIN.padding_minibatch, shuffle=False) py_reader = model.py_reader py_reader.decorate_paddle_reader(train_reader) else: train_reader = reader.train(batch_size=total_batch_size, shuffle=False) feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) def run(iterations): reader_time = [] run_time = [] total_images = 0 for batch_id in range(iterations): start_time = time.time() data = next(train_reader()) end_time = time.time() reader_time.append(end_time - start_time) start_time = time.time() if cfg.parallel: outs = train_exe.run(fetch_list=[v.name for v in fetch_list], feed=feeder.feed(data)) else: outs = exe.run(fluid.default_main_program(), fetch_list=[v.name for v in fetch_list], feed=feeder.feed(data)) end_time = time.time() run_time.append(end_time - start_time) total_images += len(data) print("Batch {:d}, loss {:.6f} ".format(batch_id, np.mean(outs[0]))) return reader_time, run_time, total_images def run_pyreader(iterations): reader_time = [0] run_time = [] total_images = 0 py_reader.start() try: for batch_id in range(iterations): start_time = time.time() if cfg.parallel: outs = train_exe.run( fetch_list=[v.name for v in fetch_list]) else: outs = exe.run(fluid.default_main_program(), fetch_list=[v.name for v in fetch_list]) end_time = time.time() run_time.append(end_time - start_time) total_images += devices_num print("Batch {:d}, loss {:.6f} ".format( batch_id, np.mean(outs[0]))) except fluid.core.EOFException: py_reader.reset() return reader_time, run_time, total_images run_func = run if not cfg.use_pyreader else run_pyreader # warm-up run_func(2) # profiling start = time.time() if cfg.use_profile: with profiler.profiler('GPU', 'total', '/tmp/profile_file'): reader_time, run_time, total_images = run_func(num_iterations) else: reader_time, run_time, total_images = run_func(num_iterations) end = time.time() total_time = end - start print( "Total time: {0}, reader time: {1} s, run time: {2} s, images/s: {3}". format(total_time, np.sum(reader_time), np.sum(run_time), total_images / total_time))
def test_accuracy(self): image = fluid.layers.data( name='image', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') model = MobileNet() out = model.net(input=image, class_dim=10) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) optimizer = fluid.optimizer.Momentum( momentum=0.9, learning_rate=0.01, regularization=fluid.regularizer.L2Decay(4e-5)) optimizer.minimize(avg_cost) main_prog = fluid.default_main_program() val_prog = main_prog.clone(for_test=True) place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( ) else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) feeder = fluid.DataFeeder([image, label], place, program=main_prog) train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=64) eval_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=64) def train(program): iter = 0 for data in train_reader(): cost, top1, top5 = exe.run( program, feed=feeder.feed(data), fetch_list=[avg_cost, acc_top1, acc_top5]) iter += 1 if iter % 100 == 0: print( 'train iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'. format(iter, cost, top1, top5)) def test(program, outputs=[avg_cost, acc_top1, acc_top5]): iter = 0 result = [[], [], []] for data in train_reader(): cost, top1, top5 = exe.run(program, feed=feeder.feed(data), fetch_list=outputs) iter += 1 if iter % 100 == 0: print( 'eval iter={}, avg loss {}, acc_top1 {}, acc_top5 {}'. format(iter, cost, top1, top5)) result[0].append(cost) result[1].append(top1) result[2].append(top5) print(' avg loss {}, acc_top1 {}, acc_top5 {}'.format( np.mean(result[0]), np.mean(result[1]), np.mean(result[2]))) return np.mean(result[1]), np.mean(result[2]) train(main_prog) top1_1, top5_1 = test(val_prog) fluid.io.save_inference_model( dirname='./test_quant_post', feeded_var_names=[image.name, label.name], target_vars=[avg_cost, acc_top1, acc_top5], main_program=val_prog, executor=exe, model_filename='model', params_filename='params') quant_post( exe, './test_quant_post', './test_quant_post_inference', paddle.dataset.mnist.test(), model_filename='model', params_filename='params', batch_nums=10) quant_post_prog, feed_target_names, fetch_targets = fluid.io.load_inference_model( dirname='./test_quant_post_inference', executor=exe) top1_2, top5_2 = test(quant_post_prog, fetch_targets) print("before quantization: top1: {}, top5: {}".format(top1_1, top5_1)) print("after quantization: top1: {}, top5: {}".format(top1_2, top5_2))
def train(use_cuda, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() print("Loading IMDB word dict....") word_dict = paddle.dataset.imdb.word_dict() print("Reading training data....") if args.enable_ce: train_reader = paddle.batch(paddle.dataset.imdb.train(word_dict), batch_size=BATCH_SIZE) else: train_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.imdb.train(word_dict), buf_size=25000), batch_size=BATCH_SIZE) print("Reading testing data....") test_reader = paddle.batch(paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) feed_order = ['words', 'label'] pass_num = args.num_epochs main_program = fluid.default_main_program() star_program = fluid.default_startup_program() if args.enable_ce: main_program.random_seed = 90 star_program.random_seed = 90 prediction = inference_program(word_dict) train_func_outputs = train_program(prediction) avg_cost = train_func_outputs[0] test_program = main_program.clone(for_test=True) sgd_optimizer = optimizer_func() sgd_optimizer.minimize(avg_cost) exe = fluid.Executor(place) def train_test(program, reader): count = 0 feed_var_list = [ program.global_block().var(var_name) for var_name in feed_order ] feeder_test = fluid.DataFeeder(feed_list=feed_var_list, place=place) test_exe = fluid.Executor(place) accumulated = len(train_func_outputs) * [0] for test_data in reader(): avg_cost_np = test_exe.run(program=program, feed=feeder_test.feed(test_data), fetch_list=train_func_outputs) accumulated = [ x[0] + x[1][0] for x in zip(accumulated, avg_cost_np) ] count += 1 return [x / count for x in accumulated] def train_loop(): feed_var_list_loop = [ main_program.global_block().var(var_name) for var_name in feed_order ] feeder = fluid.DataFeeder(feed_list=feed_var_list_loop, place=place) exe.run(fluid.default_startup_program()) for epoch_id in range(pass_num): for step_id, data in enumerate(train_reader()): metrics = exe.run( main_program, feed=feeder.feed(data), fetch_list=[var.name for var in train_func_outputs]) if (step_id + 1) % 10 == 0: avg_cost_test, acc_test = train_test( test_program, test_reader) print('Step {0}, Test Loss {1:0.2}, Acc {2:0.2}'.format( step_id, avg_cost_test, acc_test)) print("Step {0}, Epoch {1} Metrics {2}".format( step_id, epoch_id, list(map(np.array, metrics)))) if math.isnan(float(metrics[0])): sys.exit("got NaN loss, training failed.") if params_dirname is not None: fluid.io.save_inference_model(params_dirname, ["words"], prediction, exe) if args.enable_ce and epoch_id == pass_num - 1: print("kpis\trnn_train_cost\t%f" % metrics[0]) print("kpis\trnn_train_acc\t%f" % metrics[1]) print("kpis\trnn_test_cost\t%f" % avg_cost_test) print("kpis\trnn_test_acc\t%f" % acc_test) train_loop()
def testSetNumpyBeforeTrain(self): seed = 90 hidden_size = 10 vocab_size = 1000 num_layers = 1 num_steps = 3 init_scale = 0.1 batch_size = 4 batch_num = 200 with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed # TODO: marsyang1993 Change seed to ptb_model = PtbModel("ptb_model", hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, num_steps=num_steps, init_scale=init_scale) bd = [] lr_arr = [1.0] # this a fake lr decay strategy for i in range(1, 10): bd.append(100 * i) # set lr to 0.0, not update parameter new_lr = 0.0 lr_arr.append(new_lr) place = fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) adam = Adam(learning_rate=fluid.layers.piecewise_decay( boundaries=bd, values=lr_arr), beta1=0.8, beta2=0.6) dy_param_updated = dict() dy_param_init = dict() dy_loss = None last_hidden = None last_cell = None np_opti_dict = {} np_state_dict = {} for k, v in self.opti_dict.items(): np_opti_dict[v.name] = v.numpy() for k, v in self.state_dict.items(): np_state_dict[v.name] = v.numpy() adam.set_dict(np_opti_dict) ptb_model.set_dict(np_state_dict) for i in range(1): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') x_data = x_data.reshape((-1, num_steps, 1)) y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') init_cell_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) dy_loss, last_hidden, last_cell = ptb_model( x, y, init_hidden, init_cell) dy_loss.backward() adam.minimize(dy_loss) ptb_model.clear_gradients() opti_dict = adam.state_dict() for k, v in opti_dict.items(): if k == "global_step": self.assertTrue( np.array_equal(v.numpy(), self.base_opti[v.name] + 1)) if k.find("beta1_pow_acc_0") > 0: self.assertTrue( np.array_equal(v.numpy(), self.base_opti[v.name] * adam._beta1)) if k.find("beta2_pow_acc_0") > 0: self.assertTrue( np.array_equal(v.numpy(), self.base_opti[v.name] * adam._beta2)) # check parameter state_dict = ptb_model.state_dict() for k, v in state_dict.items(): new_t = v.numpy() base_t = self.model_base[v.name] self.assertTrue(np.array_equal(new_t, base_t))
def main(): env = os.environ cfg = load_config(FLAGS.config) merge_config(FLAGS.opt) check_config(cfg) # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) check_version() main_arch = cfg.architecture if cfg.use_gpu: devices_num = fluid.core.get_cuda_device_count() else: devices_num = int(os.environ.get('CPU_NUM', 1)) if 'FLAGS_selected_gpus' in env: device_id = int(env['FLAGS_selected_gpus']) else: device_id = 0 place = fluid.CUDAPlace(device_id) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) # build program model = create(main_arch) inputs_def = cfg['TrainReader']['inputs_def'] train_feed_vars, train_loader = model.build_inputs(**inputs_def) train_fetches = model.train(train_feed_vars) loss = train_fetches['loss'] start_iter = 0 train_reader = create_reader(cfg.TrainReader, (cfg.max_iters - start_iter) * devices_num, cfg) # When iterable mode, set set_sample_list_generator(train_reader, place) train_loader.set_sample_list_generator(train_reader) # get all student variables student_vars = [] for v in fluid.default_main_program().list_vars(): try: student_vars.append((v.name, v.shape)) except: pass # uncomment the following lines to print all student variables # print("="*50 + "student_model_vars" + "="*50) # print(student_vars) eval_prog = fluid.Program() with fluid.program_guard(eval_prog, fluid.default_startup_program()): with fluid.unique_name.guard(): model = create(main_arch) inputs_def = cfg['EvalReader']['inputs_def'] test_feed_vars, eval_loader = model.build_inputs(**inputs_def) fetches = model.eval(test_feed_vars) eval_prog = eval_prog.clone(True) eval_reader = create_reader(cfg.EvalReader) # When iterable mode, set set_sample_list_generator(eval_reader, place) eval_loader.set_sample_list_generator(eval_reader) # parse eval fetches extra_keys = [] if cfg.metric == 'COCO': extra_keys = ['im_info', 'im_id', 'im_shape'] if cfg.metric == 'VOC': extra_keys = ['gt_bbox', 'gt_class', 'is_difficult'] eval_keys, eval_values, eval_cls = parse_fetches(fetches, eval_prog, extra_keys) teacher_cfg = load_config(FLAGS.teacher_config) merge_config(FLAGS.opt) teacher_arch = teacher_cfg.architecture teacher_program = fluid.Program() teacher_startup_program = fluid.Program() with fluid.program_guard(teacher_program, teacher_startup_program): with fluid.unique_name.guard(): teacher_feed_vars = OrderedDict() for name, var in train_feed_vars.items(): teacher_feed_vars[name] = teacher_program.global_block( )._clone_variable(var, force_persistable=False) model = create(teacher_arch) train_fetches = model.train(teacher_feed_vars) teacher_loss = train_fetches['loss'] # get all teacher variables teacher_vars = [] for v in teacher_program.list_vars(): try: teacher_vars.append((v.name, v.shape)) except: pass # uncomment the following lines to print all teacher variables # print("="*50 + "teacher_model_vars" + "="*50) # print(teacher_vars) exe.run(teacher_startup_program) assert FLAGS.teacher_pretrained, "teacher_pretrained should be set" checkpoint.load_params(exe, teacher_program, FLAGS.teacher_pretrained) teacher_program = teacher_program.clone(for_test=True) cfg = load_config(FLAGS.config) merge_config(FLAGS.opt) data_name_map = { 'target0': 'target0', 'target1': 'target1', 'target2': 'target2', 'image': 'image', 'gt_bbox': 'gt_bbox', 'gt_class': 'gt_class', 'gt_score': 'gt_score' } merge(teacher_program, fluid.default_main_program(), data_name_map, place) yolo_output_names = [ 'strided_slice_0.tmp_0', 'strided_slice_1.tmp_0', 'strided_slice_2.tmp_0', 'strided_slice_3.tmp_0', 'strided_slice_4.tmp_0', 'transpose_0.tmp_0', 'strided_slice_5.tmp_0', 'strided_slice_6.tmp_0', 'strided_slice_7.tmp_0', 'strided_slice_8.tmp_0', 'strided_slice_9.tmp_0', 'transpose_2.tmp_0', 'strided_slice_10.tmp_0', 'strided_slice_11.tmp_0', 'strided_slice_12.tmp_0', 'strided_slice_13.tmp_0', 'strided_slice_14.tmp_0', 'transpose_4.tmp_0' ] distill_pairs = [['teacher_conv2d_6.tmp_1', 'conv2d_20.tmp_1'], ['teacher_conv2d_14.tmp_1', 'conv2d_28.tmp_1'], ['teacher_conv2d_22.tmp_1', 'conv2d_36.tmp_1']] distill_loss = l2_distill( distill_pairs, 100) if not cfg.use_fine_grained_loss else split_distill( yolo_output_names, 1000) loss = distill_loss + loss lr_builder = create('LearningRate') optim_builder = create('OptimizerBuilder') lr = lr_builder() opt = optim_builder(lr) opt.minimize(loss) exe.run(fluid.default_startup_program()) fuse_bn = getattr(model.backbone, 'norm_type', None) == 'affine_channel' ignore_params = cfg.finetune_exclude_pretrained_params \ if 'finetune_exclude_pretrained_params' in cfg else [] if FLAGS.resume_checkpoint: checkpoint.load_checkpoint(exe, fluid.default_main_program(), FLAGS.resume_checkpoint) start_iter = checkpoint.global_step() elif cfg.pretrain_weights and fuse_bn and not ignore_params: checkpoint.load_and_fusebn(exe, fluid.default_main_program(), cfg.pretrain_weights) elif cfg.pretrain_weights: checkpoint.load_params(exe, fluid.default_main_program(), cfg.pretrain_weights, ignore_params=ignore_params) build_strategy = fluid.BuildStrategy() build_strategy.fuse_all_reduce_ops = False build_strategy.fuse_all_optimizer_ops = False # only enable sync_bn in multi GPU devices sync_bn = getattr(model.backbone, 'norm_type', None) == 'sync_bn' build_strategy.sync_batch_norm = sync_bn and devices_num > 1 \ and cfg.use_gpu exec_strategy = fluid.ExecutionStrategy() # iteration number when CompiledProgram tries to drop local execution scopes. # Set it to be 1 to save memory usages, so that unused variables in # local execution scopes can be deleted after each iteration. exec_strategy.num_iteration_per_drop_scope = 1 parallel_main = fluid.CompiledProgram( fluid.default_main_program()).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) compiled_eval_prog = fluid.CompiledProgram(eval_prog) # whether output bbox is normalized in model output layer is_bbox_normalized = False if hasattr(model, 'is_bbox_normalized') and \ callable(model.is_bbox_normalized): is_bbox_normalized = model.is_bbox_normalized() map_type = cfg.map_type if 'map_type' in cfg else '11point' best_box_ap_list = [0.0, 0] #[map, iter] cfg_name = os.path.basename(FLAGS.config).split('.')[0] save_dir = os.path.join(cfg.save_dir, cfg_name) train_loader.start() for step_id in range(start_iter, cfg.max_iters): teacher_loss_np, distill_loss_np, loss_np, lr_np = exe.run( parallel_main, fetch_list=[ 'teacher_' + teacher_loss.name, distill_loss.name, loss.name, lr.name ]) if step_id % cfg.log_iter == 0: logger.info( "step {} lr {:.6f}, loss {:.6f}, distill_loss {:.6f}, teacher_loss {:.6f}" .format(step_id, lr_np[0], loss_np[0], distill_loss_np[0], teacher_loss_np[0])) if step_id % cfg.snapshot_iter == 0 and step_id != 0 or step_id == cfg.max_iters - 1: save_name = str( step_id) if step_id != cfg.max_iters - 1 else "model_final" checkpoint.save(exe, fluid.default_main_program(), os.path.join(save_dir, save_name)) if FLAGS.save_inference: feeded_var_names = ['image', 'im_size'] targets = list(fetches.values()) fluid.io.save_inference_model(save_dir + '/infer', feeded_var_names, targets, exe, eval_prog) # eval results = eval_run(exe, compiled_eval_prog, eval_loader, eval_keys, eval_values, eval_cls, cfg) resolution = None box_ap_stats = eval_results(results, cfg.metric, cfg.num_classes, resolution, is_bbox_normalized, FLAGS.output_eval, map_type, cfg['EvalReader']['dataset']) if box_ap_stats[0] > best_box_ap_list[0]: best_box_ap_list[0] = box_ap_stats[0] best_box_ap_list[1] = step_id checkpoint.save(exe, fluid.default_main_program(), os.path.join(save_dir, "best_model")) if FLAGS.save_inference: feeded_var_names = ['image', 'im_size'] targets = list(fetches.values()) fluid.io.save_inference_model(save_dir + '/infer', feeded_var_names, targets, exe, eval_prog) logger.info("Best test box ap: {}, in step: {}".format( best_box_ap_list[0], best_box_ap_list[1])) train_loader.reset()
def do_train(args): device = set_device("gpu" if args.use_cuda else "cpu") fluid.enable_dygraph(device) if args.eager_run else None # set seed for CE random_seed = eval(str(args.random_seed)) if random_seed is not None: fluid.default_main_program().random_seed = random_seed fluid.default_startup_program().random_seed = random_seed # define inputs inputs = [ Input([None, None], "int64", name="src_word"), Input([None, None], "int64", name="src_pos"), Input([None, args.n_head, None, None], "float32", name="src_slf_attn_bias"), Input([None, None], "int64", name="trg_word"), Input([None, None], "int64", name="trg_pos"), Input([None, args.n_head, None, None], "float32", name="trg_slf_attn_bias"), Input([None, args.n_head, None, None], "float32", name="trg_src_attn_bias"), ] labels = [ Input([None, 1], "int64", name="label"), Input([None, 1], "float32", name="weight"), ] # def dataloader train_loader, eval_loader = create_data_loader(args, device) # define model transformer = Transformer(args.src_vocab_size, args.trg_vocab_size, args.max_length + 1, args.n_layer, args.n_head, args.d_key, args.d_value, args.d_model, args.d_inner_hid, args.prepostprocess_dropout, args.attention_dropout, args.relu_dropout, args.preprocess_cmd, args.postprocess_cmd, args.weight_sharing, args.bos_idx, args.eos_idx) transformer.prepare(fluid.optimizer.Adam( learning_rate=fluid.layers.noam_decay( args.d_model, args.warmup_steps, learning_rate=args.learning_rate), beta1=args.beta1, beta2=args.beta2, epsilon=float(args.eps), parameter_list=transformer.parameters()), CrossEntropyCriterion(args.label_smooth_eps), inputs=inputs, labels=labels) ## init from some checkpoint, to resume the previous training if args.init_from_checkpoint: transformer.load(args.init_from_checkpoint) ## init from some pretrain models, to better solve the current task if args.init_from_pretrain_model: transformer.load(args.init_from_pretrain_model, reset_optimizer=True) # model train transformer.fit(train_data=train_loader, eval_data=eval_loader, epochs=args.epoch, eval_freq=1, save_freq=1, save_dir=args.save_model, callbacks=[TrainCallback(args)])
def split_distill(split_output_names, weight): """ Add fine grained distillation losses. Each loss is composed by distill_reg_loss, distill_cls_loss and distill_obj_loss """ student_var = [] for name in split_output_names: student_var.append( fluid.default_main_program().global_block().var(name)) s_x0, s_y0, s_w0, s_h0, s_obj0, s_cls0 = student_var[0:6] s_x1, s_y1, s_w1, s_h1, s_obj1, s_cls1 = student_var[6:12] s_x2, s_y2, s_w2, s_h2, s_obj2, s_cls2 = student_var[12:18] teacher_var = [] for name in split_output_names: teacher_var.append( fluid.default_main_program().global_block().var('teacher_' + name)) t_x0, t_y0, t_w0, t_h0, t_obj0, t_cls0 = teacher_var[0:6] t_x1, t_y1, t_w1, t_h1, t_obj1, t_cls1 = teacher_var[6:12] t_x2, t_y2, t_w2, t_h2, t_obj2, t_cls2 = teacher_var[12:18] def obj_weighted_reg(sx, sy, sw, sh, tx, ty, tw, th, tobj): loss_x = fluid.layers.sigmoid_cross_entropy_with_logits( sx, fluid.layers.sigmoid(tx)) loss_y = fluid.layers.sigmoid_cross_entropy_with_logits( sy, fluid.layers.sigmoid(ty)) loss_w = fluid.layers.abs(sw - tw) loss_h = fluid.layers.abs(sh - th) loss = fluid.layers.sum([loss_x, loss_y, loss_w, loss_h]) weighted_loss = fluid.layers.reduce_mean(loss * fluid.layers.sigmoid(tobj)) return weighted_loss def obj_weighted_cls(scls, tcls, tobj): loss = fluid.layers.sigmoid_cross_entropy_with_logits( scls, fluid.layers.sigmoid(tcls)) weighted_loss = fluid.layers.reduce_mean( fluid.layers.elementwise_mul(loss, fluid.layers.sigmoid(tobj), axis=0)) return weighted_loss def obj_loss(sobj, tobj): obj_mask = fluid.layers.cast(tobj > 0., dtype="float32") obj_mask.stop_gradient = True loss = fluid.layers.reduce_mean( fluid.layers.sigmoid_cross_entropy_with_logits(sobj, obj_mask)) return loss distill_reg_loss0 = obj_weighted_reg(s_x0, s_y0, s_w0, s_h0, t_x0, t_y0, t_w0, t_h0, t_obj0) distill_reg_loss1 = obj_weighted_reg(s_x1, s_y1, s_w1, s_h1, t_x1, t_y1, t_w1, t_h1, t_obj1) distill_reg_loss2 = obj_weighted_reg(s_x2, s_y2, s_w2, s_h2, t_x2, t_y2, t_w2, t_h2, t_obj2) distill_reg_loss = fluid.layers.sum( [distill_reg_loss0, distill_reg_loss1, distill_reg_loss2]) distill_cls_loss0 = obj_weighted_cls(s_cls0, t_cls0, t_obj0) distill_cls_loss1 = obj_weighted_cls(s_cls1, t_cls1, t_obj1) distill_cls_loss2 = obj_weighted_cls(s_cls2, t_cls2, t_obj2) distill_cls_loss = fluid.layers.sum( [distill_cls_loss0, distill_cls_loss1, distill_cls_loss2]) distill_obj_loss0 = obj_loss(s_obj0, t_obj0) distill_obj_loss1 = obj_loss(s_obj1, t_obj1) distill_obj_loss2 = obj_loss(s_obj2, t_obj2) distill_obj_loss = fluid.layers.sum( [distill_obj_loss0, distill_obj_loss1, distill_obj_loss2]) loss = (distill_reg_loss + distill_cls_loss + distill_obj_loss) * weight return loss
def test_deefcf(self): seed = 90 if DATA_PATH: (users_np, items_np, labels_np, num_users, num_items, matrix) = load_data(DATA_PATH) else: (users_np, items_np, labels_np, num_users, num_items, matrix) = get_data() paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) startup = fluid.Program() main = fluid.Program() scope = fluid.core.Scope() with new_program_scope(main=main, startup=startup, scope=scope): users = fluid.layers.data('users', [1], dtype='int32') items = fluid.layers.data('items', [1], dtype='int32') labels = fluid.layers.data('labels', [1], dtype='float32') deepcf = DeepCF(num_users, num_items, matrix) prediction = deepcf(users, items) loss = fluid.layers.reduce_sum( fluid.layers.log_loss(prediction, labels)) adam = fluid.optimizer.AdamOptimizer(0.01) adam.minimize(loss) exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) exe.run(startup) for e in range(NUM_EPOCHES): sys.stderr.write('epoch %d\n' % e) for slice in range(0, BATCH_SIZE * NUM_BATCHES, BATCH_SIZE): if slice + BATCH_SIZE >= users_np.shape[0]: break static_loss = exe.run( main, feed={ users.name: users_np[slice:slice + BATCH_SIZE], items.name: items_np[slice:slice + BATCH_SIZE], labels.name: labels_np[slice:slice + BATCH_SIZE] }, fetch_list=[loss])[0] sys.stderr.write('static loss %s\n' % static_loss) with fluid.dygraph.guard(): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) deepcf = DeepCF(num_users, num_items, matrix) adam = fluid.optimizer.AdamOptimizer( 0.01, parameter_list=deepcf.parameters()) for e in range(NUM_EPOCHES): sys.stderr.write('epoch %d\n' % e) for slice in range(0, BATCH_SIZE * NUM_BATCHES, BATCH_SIZE): if slice + BATCH_SIZE >= users_np.shape[0]: break prediction = deepcf( to_variable(users_np[slice:slice + BATCH_SIZE]), to_variable(items_np[slice:slice + BATCH_SIZE])) loss = fluid.layers.reduce_sum( fluid.layers.log_loss( prediction, to_variable(labels_np[slice:slice + BATCH_SIZE]))) loss.backward() adam.minimize(loss) deepcf.clear_gradients() dy_loss = loss.numpy() sys.stderr.write('dynamic loss: %s %s\n' % (slice, dy_loss)) with fluid.dygraph.guard(): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) deepcf2 = DeepCF(num_users, num_items, matrix) adam2 = fluid.optimizer.AdamOptimizer( 0.01, parameter_list=deepcf2.parameters()) fluid.set_flags({'FLAGS_sort_sum_gradient': True}) for e in range(NUM_EPOCHES): sys.stderr.write('epoch %d\n' % e) for slice in range(0, BATCH_SIZE * NUM_BATCHES, BATCH_SIZE): if slice + BATCH_SIZE >= users_np.shape[0]: break prediction2 = deepcf2( to_variable(users_np[slice:slice + BATCH_SIZE]), to_variable(items_np[slice:slice + BATCH_SIZE])) loss2 = fluid.layers.reduce_sum( fluid.layers.log_loss( prediction2, to_variable(labels_np[slice:slice + BATCH_SIZE]))) loss2.backward() adam2.minimize(loss2) deepcf2.clear_gradients() dy_loss2 = loss2.numpy() sys.stderr.write('dynamic loss: %s %s\n' % (slice, dy_loss2)) with fluid.dygraph.guard(): with _test_eager_guard(): paddle.seed(seed) paddle.framework.random._manual_program_seed(seed) fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed deepcf = DeepCF(num_users, num_items, matrix) adam = fluid.optimizer.AdamOptimizer( 0.01, parameter_list=deepcf.parameters()) for e in range(NUM_EPOCHES): sys.stderr.write('epoch %d\n' % e) for slice in range(0, BATCH_SIZE * NUM_BATCHES, BATCH_SIZE): if slice + BATCH_SIZE >= users_np.shape[0]: break prediction = deepcf( to_variable(users_np[slice:slice + BATCH_SIZE]), to_variable(items_np[slice:slice + BATCH_SIZE])) loss = fluid.layers.reduce_sum( fluid.layers.log_loss( prediction, to_variable(labels_np[slice:slice + BATCH_SIZE]))) loss.backward() adam.minimize(loss) deepcf.clear_gradients() eager_loss = loss.numpy() sys.stderr.write('eager loss: %s %s\n' % (slice, eager_loss)) self.assertEqual(static_loss, dy_loss) self.assertEqual(static_loss, dy_loss2) self.assertEqual(static_loss, eager_loss)
def train(word_dict, net_method, use_cuda, seed, quality, save_dirname=None): BATCH_SIZE = 128 PASS_NUM = 100 dict_dim = len(word_dict) class_dim = 2 target_val_acc = quality # Seed for batch producer random.seed(seed) # Seed for weight initialization fluid.default_startup_program().random_seed = seed # Setup input features and label as data layers data = fluid.layers.data( name="words", shape=[1], dtype="int64", lod_level=1) label = fluid.layers.data(name="label", shape=[1], dtype="int64") cost, acc_out, prediction = net_method( data, label, input_dim=dict_dim, class_dim=class_dim) # Initialize a test program for obtaining test accuracy and cost test_program = fluid.default_main_program().clone(for_test=True) # Setup Adam optimizer adam = fluid.optimizer.Adam(learning_rate=0.0005) #Learning rate of 5e-4 works for conv models and 2e-3 for LSTM model optimize_ops, params_grads = adam.minimize(cost) # Create reader to iterate over training set train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.imdb.train(word_dict), buf_size=25000), batch_size=BATCH_SIZE) # Setup place and executor for runtime place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder(feed_list=[data, label], place=place) # Create reader to iterate over validation set test_reader = paddle.batch( paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) def train_loop(main_program): exe.run(fluid.default_startup_program()) for pass_id in xrange(PASS_NUM): train_loss_set = [] train_acc_set = [] # Calculate average training loss and accuracy # across all mini-batches in the training set for batch_id, data in enumerate(train_reader()): cost_val, acc_val = exe.run(main_program, feed=feeder.feed(data), fetch_list=[cost, acc_out]) train_loss_set.append(float(cost_val)) train_acc_set.append(float(acc_val)) train_loss = np.array(train_loss_set).mean() train_acc = np.array(train_acc_set).mean() * 100 # Calculate average valication loss and accuracy # across all mini-batches in the validation set acc_set = [] avg_loss_set = [] for tid, test_data in enumerate(test_reader()): avg_loss_np, acc_np = exe.run( program=test_program, feed=feeder.feed(test_data), fetch_list=[cost, acc_out]) acc_set.append(float(acc_np)) avg_loss_set.append(float(avg_loss_np)) acc_val = np.array(acc_set).mean() * 100 avg_loss_val = np.array(avg_loss_set).mean() print("Epoch =", pass_id, ", train-accuracy =", train_acc, ", train-loss =", train_loss, ", validation-accuracy =", acc_val, ", validation-loss =", avg_loss_val) if acc_val > target_val_acc: ## Exit the program on reaching desired accuracy value break train_loop(fluid.default_main_program())
def train(place, save_dirname): if args.data_set == "cifar10": class_dim = 10 data_shape = [3, 32, 32] elif args.data_set == "imagenet": class_dim = 102 data_shape = [3, 224, 224] else: raise ValueError("%s dataset is not supported" % data_set) images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') if args.model == "vgg": print("train vgg") net = vgg16(images) elif args.model == "resnet": print("train resnet") if args.data_set == "cifar10": net = resnet_cifar10(images) elif args.data_set == "imagenet": net = resnet_imagenet(images) else: raise ValueError("%s dataset is not supported" % args.data_set) else: raise ValueError("%s network is not supported" % args.model) predict = fluid.layers.fc(input=net, size=class_dim, act='softmax') cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(x=cost) acc = fluid.layers.accuracy(input=predict, label=label) #Test program test_program = fluid.default_main_program().clone(for_test=True) optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) optimizer.minimize(avg_cost) BATCH_SIZE = args.train_batch_size PASS_NUM = 100 train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.flowers.train() if args.data_set == 'imagenet' else paddle.dataset.cifar.train10(), buf_size=128 * 10), batch_size=args.train_batch_size) test_reader = paddle.batch( paddle.dataset.flowers.test() if args.data_set == 'imagenet' else paddle.dataset.cifar.test10(), batch_size=args.inf_batch_size) exe = fluid.Executor(place) feeder = fluid.DataFeeder(place=place, feed_list=[images, label]) exe.run(fluid.default_startup_program()) main_program = fluid.default_main_program() for pass_id in range(PASS_NUM): for batch_id, data in enumerate(train_reader()): train_image = np.array( map(lambda x: x[0].reshape(data_shape), data)).astype("float32") train_label = np.array(map(lambda x: x[1], data)).astype("int64") train_label = train_label.reshape([-1, 1]) exe.run(main_program, feed={'pixel': train_image, 'label': train_label}) if (batch_id % 100) == 0: acc_list = [] avg_loss_list = [] for tid, test_data in enumerate(test_reader()): test_image = np.array( map(lambda x: x[0].reshape(data_shape), test_data)).astype("float32") test_label = np.array(map(lambda x: x[1], test_data)).astype("int64") test_label = test_label.reshape([-1, 1]) loss_t, acc_t = exe.run( program=test_program, feed={"pixel": test_image, "label": test_label}, fetch_list=[avg_cost, acc]) if math.isnan(float(loss_t)): sys.exit("got NaN loss, training failed.") acc_list.append(float(acc_t)) avg_loss_list.append(float(loss_t)) acc_value = np.array(acc_list).mean() avg_loss_value = np.array(avg_loss_list).mean() print( 'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Accuracy {3:2.2}'. format(pass_id, batch_id + 1, float(avg_loss_value), float(acc_value))) if acc_value > args.threshold: print( 'Save inference model with test accuracy of {0} at {1}'. format(float(acc_value), save_dirname)) fluid.io.save_inference_model(save_dirname, ["pixel"], [predict], exe) return
def test_transformer_sort_gradient_float32(self): seed = 90 with guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed backward_strategy = fluid.dygraph.BackwardStrategy() backward_strategy.sort_sum_gradient = True transformer = TransFormer('transformer', ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size, ModelHyperParams.max_length + 1, ModelHyperParams.n_layer, ModelHyperParams.n_head, ModelHyperParams.d_key, ModelHyperParams.d_value, ModelHyperParams.d_model, ModelHyperParams.d_inner_hid, ModelHyperParams.prepostprocess_dropout, ModelHyperParams.attention_dropout, ModelHyperParams.relu_dropout, ModelHyperParams.preprocess_cmd, ModelHyperParams.postprocess_cmd, ModelHyperParams.weight_sharing, TrainTaskConfig.label_smooth_eps, use_py_reader=use_py_reader, is_test=False) if sync: lr_decay = fluid.layers.learning_rate_scheduler.noam_decay( ModelHyperParams.d_model, TrainTaskConfig.warmup_steps) with fluid.default_main_program()._lr_schedule_guard(): learning_rate = lr_decay * TrainTaskConfig.learning_rate optimizer = fluid.optimizer.Adam(learning_rate=learning_rate, beta1=TrainTaskConfig.beta1, beta2=TrainTaskConfig.beta2, epsilon=TrainTaskConfig.eps) else: optimizer = fluid.optimizer.SGD(learning_rate=0.003) dy_param_init = dict() dy_param_updated = dict() for i in range(batch_num): enc_inputs, dec_inputs, label, weights = create_data() dy_sum_cost, dy_avg_cost, dy_predict, dy_token_num = transformer( enc_inputs, dec_inputs, label, weights) if i == 0: for param in transformer.parameters(): dy_param_init[param.name] = param.numpy() dy_avg_cost.backward(backward_strategy) optimizer.minimize(dy_avg_cost) transformer.clear_gradients() if i == batch_num - 1: for param in transformer.parameters(): dy_param_updated[param.name] = param.numpy() dy_avg_cost_value = dy_avg_cost.numpy() dy_sum_cost_value = dy_sum_cost.numpy() dy_predict_value = dy_predict.numpy() dy_token_num_value = dy_token_num.numpy() with new_program_scope(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed transformer = TransFormer('transformer', ModelHyperParams.src_vocab_size, ModelHyperParams.trg_vocab_size, ModelHyperParams.max_length + 1, ModelHyperParams.n_layer, ModelHyperParams.n_head, ModelHyperParams.d_key, ModelHyperParams.d_value, ModelHyperParams.d_model, ModelHyperParams.d_inner_hid, ModelHyperParams.prepostprocess_dropout, ModelHyperParams.attention_dropout, ModelHyperParams.relu_dropout, ModelHyperParams.preprocess_cmd, ModelHyperParams.postprocess_cmd, ModelHyperParams.weight_sharing, TrainTaskConfig.label_smooth_eps, use_py_reader=use_py_reader, is_test=False) exe = fluid.Executor(fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) optimizer = fluid.optimizer.SGD(learning_rate=0.003) data_input_names = encoder_data_input_fields + decoder_data_input_fields[: -1] + label_data_input_fields all_inputs = make_all_inputs(data_input_names) enc_inputs_len = len(encoder_data_input_fields) dec_inputs_len = len(decoder_data_input_fields[:-1]) enc_inputs = all_inputs[0:enc_inputs_len] dec_inputs = all_inputs[enc_inputs_len:enc_inputs_len + dec_inputs_len] label = all_inputs[-2] weights = all_inputs[-1] static_param_updated = dict() static_param_init = dict() static_param_name_list = list() static_sum_cost, static_avg_cost, static_predict, static_token_num = transformer( enc_inputs, dec_inputs, label, weights) optimizer.minimize(static_avg_cost) for param in transformer.parameters(): static_param_name_list.append(param.name) out = exe.run(fluid.default_startup_program(), fetch_list=static_param_name_list) for i in range(len(static_param_name_list)): static_param_init[static_param_name_list[i]] = out[i] static_sum_cost_value = None static_avg_cost_value = None static_predict_value = None static_token_num_value = None for i in range(batch_num): feed_dict = create_feed_dict_list(create_data(True)) fetch_list = [ static_sum_cost, static_avg_cost, static_predict, static_token_num ] fetch_list.extend(static_param_name_list) out = exe.run(fluid.default_main_program(), feed=feed_dict, fetch_list=fetch_list) static_sum_cost_value = out[0] static_avg_cost_value = out[1] static_predict_value = out[2] static_token_num_value = out[3] if i == batch_num - 1: for k in range(4, len(out)): static_param_updated[static_param_name_list[ k - 4]] = out[k] self.assertTrue( np.array_equal(static_avg_cost_value, dy_avg_cost_value)) self.assertTrue( np.array_equal(static_sum_cost_value, dy_sum_cost_value)) self.assertTrue(np.array_equal(static_predict_value, dy_predict_value)) self.assertTrue( np.array_equal(static_token_num_value, dy_token_num_value)) for key, value in six.iteritems(static_param_init): self.assertTrue(np.array_equal(value, dy_param_init[key])) for key, value in six.iteritems(static_param_updated): self.assertTrue(np.array_equal(value, dy_param_updated[key]))
def train(nn_type, use_cuda, parallel, save_dirname=None, model_filename=None, params_filename=None, is_local=True): if use_cuda and not fluid.core.is_compiled_with_cuda(): return img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') if nn_type == 'mlp': net_conf = mlp else: net_conf = conv_net if parallel: places = fluid.layers.get_places() pd = fluid.layers.ParallelDo(places) with pd.do(): img_ = pd.read_input(img) label_ = pd.read_input(label) prediction, avg_loss, acc = net_conf(img_, label_) for o in [avg_loss, acc]: pd.write_output(o) avg_loss, acc = pd() # get mean loss and acc through every devices. avg_loss = fluid.layers.mean(avg_loss) acc = fluid.layers.mean(acc) else: prediction, avg_loss, acc = net_conf(img, label) test_program = fluid.default_main_program().clone(for_test=True) optimizer = fluid.optimizer.Adam(learning_rate=0.001) optimizer.minimize(avg_loss) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=500), batch_size=BATCH_SIZE) test_reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=BATCH_SIZE) feeder = fluid.DataFeeder(feed_list=[img, label], place=place) def train_loop(main_program): exe.run(fluid.default_startup_program()) PASS_NUM = 100 for pass_id in range(PASS_NUM): for batch_id, data in enumerate(train_reader()): # train a mini-batch, fetch nothing exe.run(main_program, feed=feeder.feed(data)) if (batch_id + 1) % 10 == 0: acc_set = [] avg_loss_set = [] for test_data in test_reader(): acc_np, avg_loss_np = exe.run( program=test_program, feed=feeder.feed(test_data), fetch_list=[acc, avg_loss]) acc_set.append(float(acc_np)) avg_loss_set.append(float(avg_loss_np)) # get test acc and loss acc_val = numpy.array(acc_set).mean() avg_loss_val = numpy.array(avg_loss_set).mean() if float(acc_val ) > 0.2: # Smaller value to increase CI speed if save_dirname is not None: fluid.io.save_inference_model( save_dirname, ["img"], [prediction], exe, model_filename=model_filename, params_filename=params_filename) return else: print( 'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'. format(pass_id, batch_id + 1, float(avg_loss_val), float(acc_val))) if math.isnan(float(avg_loss_val)): sys.exit("got NaN loss, training failed.") raise AssertionError("Loss of recognize digits is too large") if is_local: train_loop(fluid.default_main_program()) else: port = os.getenv("PADDLE_INIT_PORT", "6174") pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... trainers = int(os.getenv("TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) training_role = os.getenv("TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": pserver_prog = t.get_pserver_program(current_endpoint) pserver_startup = t.get_startup_program(current_endpoint, pserver_prog) exe.run(pserver_startup) exe.run(pserver_prog) elif training_role == "TRAINER": train_loop(t.get_trainer_program())
def compress(args): train_reader = None test_reader = None if args.data == "mnist": import paddle.dataset.mnist as reader train_reader = reader.train() val_reader = reader.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone(for_test=True) opt = create_optimizer(args) opt.minimize(avg_cost) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) _logger.info("Load pretrained model from {}".format( args.pretrained_model)) fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.batch(val_reader, batch_size=args.batch_size) train_reader = paddle.batch(train_reader, batch_size=args.batch_size, drop_last=True) train_feeder = feeder = fluid.DataFeeder([image, label], place) val_feeder = feeder = fluid.DataFeeder([image, label], place, program=val_program) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) def train(epoch, program): build_strategy = fluid.BuildStrategy() exec_strategy = fluid.ExecutionStrategy() train_program = fluid.compiler.CompiledProgram( program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) batch_id = 0 for data in train_reader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( train_program, feed=train_feeder.feed(data), fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) batch_id += 1 test(0, val_program) params = get_pruned_params(args, fluid.default_main_program()) _logger.info("FLOPs before pruning: {}".format( flops(fluid.default_main_program()))) pruner = Pruner(args.criterion) pruned_val_program, _, _ = pruner.prune(val_program, fluid.global_scope(), params=params, ratios=[args.pruned_ratio] * len(params), place=place, only_graph=True) pruned_program, _, _ = pruner.prune(fluid.default_main_program(), fluid.global_scope(), params=params, ratios=[args.pruned_ratio] * len(params), place=place) _logger.info("FLOPs after pruning: {}".format(flops(pruned_program))) for i in range(args.num_epochs): train(i, pruned_program) if i % args.test_period == 0: test(i, pruned_val_program) save_model(exe, pruned_val_program, os.path.join(args.model_path, str(i))) if args.save_inference: infer_model_path = os.path.join(args.model_path, "infer_models", str(i)) fluid.io.save_inference_model(infer_model_path, ["image"], [out], exe, pruned_val_program) _logger.info( "Saved inference model into [{}]".format(infer_model_path))
def get_model(args): lstm_size = 512 emb_dim = 512 crop_size = 1500 data = fluid.layers.data( name="words", shape=[1], lod_level=1, dtype='int64') sentence = fluid.layers.embedding( input=data, size=[len(word_dict), emb_dim]) sentence = fluid.layers.fc(input=sentence, size=lstm_size, act='tanh') rnn = fluid.layers.DynamicRNN() with rnn.block(): word = rnn.step_input(sentence) prev_hidden = rnn.memory(value=0.0, shape=[lstm_size]) prev_cell = rnn.memory(value=0.0, shape=[lstm_size]) def gate_common( ipt, hidden, size, ): gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True) gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False) gate = fluid.layers.sums(input=[gate0, gate1]) return gate forget_gate = fluid.layers.sigmoid( x=gate_common(word, prev_hidden, lstm_size)) input_gate = fluid.layers.sigmoid( x=gate_common(word, prev_hidden, lstm_size)) output_gate = fluid.layers.sigmoid( x=gate_common(word, prev_hidden, lstm_size)) cell_gate = fluid.layers.tanh( x=gate_common(word, prev_hidden, lstm_size)) cell = fluid.layers.sums(input=[ fluid.layers.elementwise_mul( x=forget_gate, y=prev_cell), fluid.layers.elementwise_mul( x=input_gate, y=cell_gate) ]) hidden = fluid.layers.elementwise_mul( x=output_gate, y=fluid.layers.tanh(x=cell)) rnn.update_memory(prev_cell, cell) rnn.update_memory(prev_hidden, hidden) rnn.output(hidden) last = fluid.layers.sequence_pool(rnn(), 'last') logit = fluid.layers.fc(input=last, size=2, act='softmax') loss = fluid.layers.cross_entropy( input=logit, label=fluid.layers.data( name='label', shape=[1], dtype='int64')) loss = fluid.layers.mean(x=loss) # add acc batch_size_tensor = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy(input=logit, label=fluid.layers.data(name='label', \ shape=[1], dtype='int64'), total=batch_size_tensor) inference_program = fluid.default_main_program().clone() with fluid.program_guard(inference_program): inference_program = fluid.io.get_inference_program( target_vars=[batch_acc, batch_size_tensor]) adam = fluid.optimizer.Adam() train_reader = batch( paddle.reader.shuffle( crop_sentence(imdb.train(word_dict), crop_size), buf_size=25000), batch_size=args.batch_size) test_reader = batch( paddle.reader.shuffle( crop_sentence(imdb.test(word_dict), crop_size), buf_size=25000), batch_size=args.batch_size) return loss, inference_program, adam, train_reader, test_reader, batch_acc
def train(use_cuda, save_dirname, is_local=True): scale_infer, avg_cost = model() # test program test_program = fluid.default_main_program().clone(for_test=True) sgd_optimizer = SGDOptimizer(learning_rate=0.2) sgd_optimizer.minimize(avg_cost) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = Executor(place) train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.movielens.train(), buf_size=8192), batch_size=BATCH_SIZE) test_reader = paddle.batch( paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) feed_order = [ 'user_id', 'gender_id', 'age_id', 'job_id', 'movie_id', 'category_id', 'movie_title', 'score' ] def train_loop(main_program): exe.run(framework.default_startup_program()) feed_list = [ main_program.global_block().var(var_name) for var_name in feed_order ] feeder = fluid.DataFeeder(feed_list, place) PASS_NUM = 100 for pass_id in range(PASS_NUM): for batch_id, data in enumerate(train_reader()): # train a mini-batch outs = exe.run(program=main_program, feed=feeder.feed(data), fetch_list=[avg_cost]) out = np.array(outs[0]) if (batch_id + 1) % 10 == 0: avg_cost_set = [] for test_data in test_reader(): avg_cost_np = exe.run(program=test_program, feed=feeder.feed(test_data), fetch_list=[avg_cost]) avg_cost_set.append(avg_cost_np[0]) break # test only 1 segment for speeding up CI # get test avg_cost test_avg_cost = np.array(avg_cost_set).mean() if test_avg_cost < 6.0: # if avg_cost less than 6.0, we think our code is good. if save_dirname is not None: fluid.io.save_inference_model(save_dirname, [ "user_id", "gender_id", "age_id", "job_id", "movie_id", "category_id", "movie_title" ], [scale_infer], exe) return if math.isnan(float(out[0])): sys.exit("got NaN loss, training failed.") if is_local: train_loop(fluid.default_main_program()) else: port = os.getenv("PADDLE_INIT_PORT", "6174") pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... trainers = int(os.getenv("TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) training_role = os.getenv("TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": pserver_prog = t.get_pserver_program(current_endpoint) pserver_startup = t.get_startup_program(current_endpoint, pserver_prog) exe.run(pserver_startup) exe.run(pserver_prog) elif training_role == "TRAINER": train_loop(t.get_trainer_program())
def train(learning_rate, batch_size, num_epochs, init_model=None, model_save_dir='model', parallel=True, use_nccl=True, lr_strategy=None, class_dim = 10000, layers = 50): image_shape = [3, 224, 224] image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') if parallel: places = fluid.layers.get_places() pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl) with pd.do(): image_ = pd.read_input(image) label_ = pd.read_input(label) out = SE_ResNeXt(input=image_, class_dim=class_dim, layers=layers) cost = fluid.layers.cross_entropy(input=out, label=label_) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label_, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label_, k=5) pd.write_output(avg_cost) pd.write_output(acc_top1) pd.write_output(acc_top5) avg_cost, acc_top1, acc_top5 = pd() avg_cost = fluid.layers.mean(x=avg_cost) acc_top1 = fluid.layers.mean(x=acc_top1) acc_top5 = fluid.layers.mean(x=acc_top5) else: out = SE_ResNeXt(input=image, class_dim=class_dim, layers = layers) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) inference_program = fluid.default_main_program().clone(for_test=True) if "piecewise_decay" in lr_strategy: bd = lr_strategy["piecewise_decay"]["bd"] lr = lr_strategy["piecewise_decay"]["lr"] optimizer = fluid.optimizer.Momentum( learning_rate=fluid.layers.piecewise_decay( boundaries=bd, values=lr), momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) elif "cosine_decay" in lr_strategy: step_each_epoch = lr_strategy["cosine_decay"]["step_each_epoch"] epochs = lr_strategy["cosine_decay"]["epochs"] optimizer = fluid.optimizer.Momentum( learning_rate=cosine_decay(learning_rate=learning_rate, step_each_epoch=step_each_epoch, epochs=epochs), momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) else: optimizer = fluid.optimizer.Momentum( learning_rate=learning_rate, momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) opts = optimizer.minimize(avg_cost) fluid.memory_optimize(fluid.default_main_program()) #inference_program = fluid.default_main_program().clone() #with fluid.program_guard(inference_program): # inference_program = fluid.io.get_inference_program([avg_cost, acc_top1, acc_top5]) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) #fluid.io.save_inference_model('./debug/', # ["image"], # [out], # exe, # main_program=None, # model_filename='model', # params_filename='params') #exit() start_epoch = 0 if init_model is not None: load_model = model_save_dir + "/" + str(init_model) fluid.io.load_persistables(exe, load_model) start_epoch = int(init_model) + 1 train_reader = paddle.batch(reader.train(), batch_size=batch_size) test_reader = paddle.batch(reader.test(), batch_size=batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) for pass_id in range(start_epoch, num_epochs): train_info = [[], [], []] test_info = [[], [], []] for batch_id, data in enumerate(train_reader()): t1 = time.time() loss, acc1, acc5 = exe.run(fluid.default_main_program(), feed=feeder.feed(data), fetch_list=[avg_cost, acc_top1, acc_top5]) t2 = time.time() period = t2 - t1 train_info[0].append(loss[0]) train_info[1].append(acc1[0]) train_info[2].append(acc5[0]) if batch_id % 10 == 0: print("Pass {0}, trainbatch {1}, loss {2}, acc1 {3}," "acc5 {4} time {5}".format(pass_id, \ batch_id, loss[0], acc1[0], acc5[0], \ "%2.2f sec" % period)) sys.stdout.flush() train_loss = np.array(train_info[0]).mean() train_acc1 = np.array(train_info[1]).mean() train_acc5 = np.array(train_info[2]).mean() for batch_id, data in enumerate(test_reader()): t1 = time.time() loss, acc1, acc5 = exe.run(inference_program, feed=feeder.feed(data), fetch_list=[avg_cost, acc_top1, acc_top5]) t2 = time.time() period = t2 - t1 test_info[0].append(loss[0]) test_info[1].append(acc1[0]) test_info[2].append(acc5[0]) if batch_id % 10 == 0: print("Pass {0}, testbatch {1}, loss {2}, acc1 {3}," "acc5 {4} time {5}".format(pass_id, \ batch_id, loss[0], acc1[0], acc5[0], \ "%2.2f sec" % period)) sys.stdout.flush() test_loss = np.array(test_info[0]).mean() test_acc1 = np.array(test_info[1]).mean() test_acc5 = np.array(test_info[2]).mean() print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}," "test_loss {4}, test_acc1 {5}, test_acc5 {6}".format(pass_id, \ train_loss, train_acc1, train_acc5, test_loss, \ test_acc1, test_acc5)) sys.stdout.flush() model_path = os.path.join(model_save_dir, str(pass_id)) if not os.path.isdir(model_path): os.makedirs(model_path) fluid.io.save_persistables(exe, model_path)
def testSetNumpy(self): seed = 90 hidden_size = 10 vocab_size = 1000 num_layers = 1 num_steps = 3 init_scale = 0.1 batch_size = 4 batch_num = 200 with fluid.dygraph.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed # TODO: marsyang1993 Change seed to ptb_model = PtbModel("ptb_model", hidden_size=hidden_size, vocab_size=vocab_size, num_layers=num_layers, num_steps=num_steps, init_scale=init_scale) bd = [] lr_arr = [1.0] # this a fake lr decay strategy for i in range(1, 10): bd.append(100 * i) new_lr = 1.0 lr_arr.append(new_lr) place = fluid.CPUPlace( ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0) adam = Adam(learning_rate=fluid.layers.piecewise_decay( boundaries=bd, values=lr_arr)) dy_param_updated = dict() dy_param_init = dict() dy_loss = None last_hidden = None last_cell = None for i in range(batch_num): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') x_data = x_data.reshape((-1, num_steps, 1)) y_data = y_data.reshape((-1, 1)) init_hidden_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') init_cell_data = np.zeros( (num_layers, batch_size, hidden_size), dtype='float32') x = to_variable(x_data) y = to_variable(y_data) init_hidden = to_variable(init_hidden_data) init_cell = to_variable(init_cell_data) dy_loss, last_hidden, last_cell = ptb_model( x, y, init_hidden, init_cell) if i == 0: for param in ptb_model.parameters(): dy_param_init[param.name] = param.numpy() dy_loss.backward() adam.minimize(dy_loss) ptb_model.clear_gradients() if i == batch_num - 1: for param in ptb_model.parameters(): dy_param_updated[param.name] = param.numpy() # check optimizer opti_dict = adam.state_dict() np_opti_dict = {} # set to zero for k, v in opti_dict.items(): np_t = v.numpy() np_opti_dict[v.name] = np_t var = v._ivar.value().get_tensor() var.set(np.zeros_like(np_t), place) self.assertTrue(np.sum(np.abs(v.numpy())) == 0) if isinstance(adam._learning_rate, LearningRateDecay): adam._learning_rate.step_num = 0 adam.set_dict(np_opti_dict) opti_dict = adam.state_dict() for k, v in opti_dict.items(): self.assertTrue( np.array_equal(v.numpy(), self.base_opti[v.name])) # check parameter state_dict = ptb_model.state_dict() np_state_dict = {} for k, v in state_dict.items(): np_t = v.numpy() np_state_dict[v.name] = np_t var = v._ivar.value().get_tensor() var.set(np.zeros_like(np_t), place) ptb_model.set_dict(np_state_dict) state_dict = ptb_model.state_dict() for k, v in state_dict.items(): new_t = v.numpy() base_t = self.model_base[v.name] self.assertTrue(np.array_equal(new_t, base_t))
def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True): PASS_NUM = 100 EMBED_SIZE = 32 HIDDEN_SIZE = 256 N = 5 BATCH_SIZE = 32 IS_SPARSE = is_sparse def __network__(words): embed_first = fluid.layers.embedding( input=words[0], size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=IS_SPARSE, param_attr='shared_w') embed_second = fluid.layers.embedding( input=words[1], size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=IS_SPARSE, param_attr='shared_w') embed_third = fluid.layers.embedding( input=words[2], size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=IS_SPARSE, param_attr='shared_w') embed_forth = fluid.layers.embedding( input=words[3], size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=IS_SPARSE, param_attr='shared_w') concat_embed = fluid.layers.concat( input=[embed_first, embed_second, embed_third, embed_forth], axis=1) hidden1 = fluid.layers.fc(input=concat_embed, size=HIDDEN_SIZE, act='sigmoid') predict_word = fluid.layers.fc(input=hidden1, size=dict_size, act='softmax') cost = fluid.layers.cross_entropy(input=predict_word, label=words[4]) avg_cost = fluid.layers.mean(cost) return avg_cost, predict_word word_dict = paddle.dataset.imikolov.build_dict() dict_size = len(word_dict) first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64') second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64') third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64') forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64') next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64') if not is_parallel: avg_cost, predict_word = __network__( [first_word, second_word, third_word, forth_word, next_word]) else: places = fluid.layers.get_places() pd = fluid.layers.ParallelDo(places) with pd.do(): avg_cost, predict_word = __network__( map(pd.read_input, [ first_word, second_word, third_word, forth_word, next_word ])) pd.write_output(avg_cost) avg_cost = fluid.layers.mean(pd()) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) sgd_optimizer.minimize(avg_cost) train_reader = paddle.batch( paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) feeder = fluid.DataFeeder( feed_list=[first_word, second_word, third_word, forth_word, next_word], place=place) def train_loop(main_program): exe.run(fluid.default_startup_program()) for pass_id in range(PASS_NUM): for data in train_reader(): avg_cost_np = exe.run(main_program, feed=feeder.feed(data), fetch_list=[avg_cost]) if avg_cost_np[0] < 5.0: if save_dirname is not None: fluid.io.save_inference_model(save_dirname, [ 'firstw', 'secondw', 'thirdw', 'forthw' ], [predict_word], exe) return if math.isnan(float(avg_cost_np[0])): sys.exit("got NaN loss, training failed.") raise AssertionError("Cost is too large {0:2.2}".format(avg_cost_np[0])) if is_local: train_loop(fluid.default_main_program()) else: port = os.getenv("PADDLE_INIT_PORT", "6174") pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... trainers = int(os.getenv("TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) training_role = os.getenv("TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": pserver_prog = t.get_pserver_program(current_endpoint) pserver_startup = t.get_startup_program(current_endpoint, pserver_prog) exe.run(pserver_startup) exe.run(pserver_prog) elif training_role == "TRAINER": train_loop(t.get_trainer_program())
batch_labels1 = dic['batch_labels1'] batch_reg_target1 = dic['batch_reg_target1'] batch_centerness1 = dic['batch_centerness1'] batch_labels2 = dic['batch_labels2'] batch_reg_target2 = dic['batch_reg_target2'] batch_centerness2 = dic['batch_centerness2'] batch_labels3 = dic['batch_labels3'] batch_reg_target3 = dic['batch_reg_target3'] batch_centerness3 = dic['batch_centerness3'] batch_labels4 = dic['batch_labels4'] batch_reg_target4 = dic['batch_reg_target4'] batch_centerness4 = dic['batch_centerness4'] print() results = exe.run(fluid.default_main_program(), feed={ 'cls_logits0': cls_logits0_ndarray, 'cls_logits1': cls_logits1_ndarray, 'cls_logits2': cls_logits2_ndarray, 'cls_logits3': cls_logits3_ndarray, 'cls_logits4': cls_logits4_ndarray, 'bboxes_reg0': bboxes_reg0_ndarray, 'bboxes_reg1': bboxes_reg1_ndarray, 'bboxes_reg2': bboxes_reg2_ndarray, 'bboxes_reg3': bboxes_reg3_ndarray, 'bboxes_reg4': bboxes_reg4_ndarray, 'centerness0': centerness0_ndarray, 'centerness1': centerness1_ndarray, 'centerness2': centerness2_ndarray, 'centerness3': centerness3_ndarray,
def main(): env = os.environ cfg = load_config(FLAGS.config) if 'architecture' in cfg: main_arch = cfg.architecture else: raise ValueError("'architecture' not specified in config file.") merge_config(FLAGS.opt) if 'log_iter' not in cfg: cfg.log_iter = 20 # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) if cfg.use_gpu: devices_num = fluid.core.get_cuda_device_count() else: devices_num = int(os.environ.get('CPU_NUM', 1)) if 'FLAGS_selected_gpus' in env: device_id = int(env['FLAGS_selected_gpus']) else: device_id = 0 place = fluid.CUDAPlace(device_id) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) # build program model = create(main_arch) inputs_def = cfg['TrainReader']['inputs_def'] train_feed_vars, train_loader = model.build_inputs(**inputs_def) train_fetches = model.train(train_feed_vars) loss = train_fetches['loss'] start_iter = 0 train_reader = create_reader(cfg.TrainReader, (cfg.max_iters - start_iter) * devices_num, cfg) train_loader.set_sample_list_generator(train_reader, place) eval_prog = fluid.Program() with fluid.program_guard(eval_prog, fluid.default_startup_program()): with fluid.unique_name.guard(): model = create(main_arch) inputs_def = cfg['EvalReader']['inputs_def'] test_feed_vars, eval_loader = model.build_inputs(**inputs_def) fetches = model.eval(test_feed_vars) eval_prog = eval_prog.clone(True) eval_reader = create_reader(cfg.EvalReader) eval_loader.set_sample_list_generator(eval_reader, place) teacher_cfg = load_config(FLAGS.teacher_config) merge_config(FLAGS.opt) teacher_arch = teacher_cfg.architecture teacher_program = fluid.Program() teacher_startup_program = fluid.Program() with fluid.program_guard(teacher_program, teacher_startup_program): with fluid.unique_name.guard(): teacher_feed_vars = OrderedDict() for name, var in train_feed_vars.items(): teacher_feed_vars[name] = teacher_program.global_block( )._clone_variable(var, force_persistable=False) model = create(teacher_arch) train_fetches = model.train(teacher_feed_vars) teacher_loss = train_fetches['loss'] exe.run(teacher_startup_program) assert FLAGS.teacher_pretrained, "teacher_pretrained should be set" checkpoint.load_params(exe, teacher_program, FLAGS.teacher_pretrained) teacher_program = teacher_program.clone(for_test=True) data_name_map = { 'target0': 'target0', 'target1': 'target1', 'target2': 'target2', 'image': 'image', 'gt_bbox': 'gt_bbox', 'gt_class': 'gt_class', 'gt_score': 'gt_score' } merge(teacher_program, fluid.default_main_program(), data_name_map, place) yolo_output_names = [ 'strided_slice_0.tmp_0', 'strided_slice_1.tmp_0', 'strided_slice_2.tmp_0', 'strided_slice_3.tmp_0', 'strided_slice_4.tmp_0', 'transpose_0.tmp_0', 'strided_slice_5.tmp_0', 'strided_slice_6.tmp_0', 'strided_slice_7.tmp_0', 'strided_slice_8.tmp_0', 'strided_slice_9.tmp_0', 'transpose_2.tmp_0', 'strided_slice_10.tmp_0', 'strided_slice_11.tmp_0', 'strided_slice_12.tmp_0', 'strided_slice_13.tmp_0', 'strided_slice_14.tmp_0', 'transpose_4.tmp_0' ] assert cfg.use_fine_grained_loss, \ "Only support use_fine_grained_loss=True, Please set it in config file or '-o use_fine_grained_loss=true'" distill_loss = split_distill(yolo_output_names, 1000) loss = distill_loss + loss lr_builder = create('LearningRate') optim_builder = create('OptimizerBuilder') lr = lr_builder() opt = optim_builder(lr) opt.minimize(loss) exe.run(fluid.default_startup_program()) checkpoint.load_params(exe, fluid.default_main_program(), cfg.pretrain_weights) assert FLAGS.pruned_params is not None, \ "FLAGS.pruned_params is empty!!! Please set it by '--pruned_params' option." pruned_params = FLAGS.pruned_params.strip().split(",") logger.info("pruned params: {}".format(pruned_params)) pruned_ratios = [float(n) for n in FLAGS.pruned_ratios.strip().split(",")] logger.info("pruned ratios: {}".format(pruned_ratios)) assert len(pruned_params) == len(pruned_ratios), \ "The length of pruned params and pruned ratios should be equal." assert pruned_ratios > [0] * len(pruned_ratios) and pruned_ratios < [1] * len(pruned_ratios), \ "The elements of pruned ratios should be in range (0, 1)." pruner = Pruner() distill_prog = pruner.prune(fluid.default_main_program(), fluid.global_scope(), params=pruned_params, ratios=pruned_ratios, place=place, only_graph=False)[0] base_flops = flops(eval_prog) eval_prog = pruner.prune(eval_prog, fluid.global_scope(), params=pruned_params, ratios=pruned_ratios, place=place, only_graph=True)[0] pruned_flops = flops(eval_prog) logger.info("FLOPs -{}; total FLOPs: {}; pruned FLOPs: {}".format( float(base_flops - pruned_flops) / base_flops, base_flops, pruned_flops)) build_strategy = fluid.BuildStrategy() build_strategy.fuse_all_reduce_ops = False build_strategy.fuse_all_optimizer_ops = False build_strategy.fuse_elewise_add_act_ops = True # only enable sync_bn in multi GPU devices sync_bn = getattr(model.backbone, 'norm_type', None) == 'sync_bn' build_strategy.sync_batch_norm = sync_bn and devices_num > 1 \ and cfg.use_gpu exec_strategy = fluid.ExecutionStrategy() # iteration number when CompiledProgram tries to drop local execution scopes. # Set it to be 1 to save memory usages, so that unused variables in # local execution scopes can be deleted after each iteration. exec_strategy.num_iteration_per_drop_scope = 1 parallel_main = fluid.CompiledProgram(distill_prog).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) compiled_eval_prog = fluid.compiler.CompiledProgram(eval_prog) # parse eval fetches extra_keys = [] if cfg.metric == 'COCO': extra_keys = ['im_info', 'im_id', 'im_shape'] if cfg.metric == 'VOC': extra_keys = ['gt_bbox', 'gt_class', 'is_difficult'] eval_keys, eval_values, eval_cls = parse_fetches(fetches, eval_prog, extra_keys) # whether output bbox is normalized in model output layer is_bbox_normalized = False if hasattr(model, 'is_bbox_normalized') and \ callable(model.is_bbox_normalized): is_bbox_normalized = model.is_bbox_normalized() map_type = cfg.map_type if 'map_type' in cfg else '11point' best_box_ap_list = [0.0, 0] #[map, iter] cfg_name = os.path.basename(FLAGS.config).split('.')[0] save_dir = os.path.join(cfg.save_dir, cfg_name) train_loader.start() for step_id in range(start_iter, cfg.max_iters): teacher_loss_np, distill_loss_np, loss_np, lr_np = exe.run( parallel_main, fetch_list=[ 'teacher_' + teacher_loss.name, distill_loss.name, loss.name, lr.name ]) if step_id % cfg.log_iter == 0: logger.info( "step {} lr {:.6f}, loss {:.6f}, distill_loss {:.6f}, teacher_loss {:.6f}" .format(step_id, lr_np[0], loss_np[0], distill_loss_np[0], teacher_loss_np[0])) if step_id % cfg.snapshot_iter == 0 and step_id != 0 or step_id == cfg.max_iters - 1: save_name = str( step_id) if step_id != cfg.max_iters - 1 else "model_final" checkpoint.save(exe, distill_prog, os.path.join(save_dir, save_name)) # eval results = eval_run(exe, compiled_eval_prog, eval_loader, eval_keys, eval_values, eval_cls) resolution = None box_ap_stats = eval_results(results, cfg.metric, cfg.num_classes, resolution, is_bbox_normalized, FLAGS.output_eval, map_type, cfg['EvalReader']['dataset']) if box_ap_stats[0] > best_box_ap_list[0]: best_box_ap_list[0] = box_ap_stats[0] best_box_ap_list[1] = step_id checkpoint.save(exe, distill_prog, os.path.join("./", "best_model")) logger.info("Best test box ap: {}, in step: {}".format( best_box_ap_list[0], best_box_ap_list[1])) train_loader.reset()
# In[15]: # 获取分类器,用cnn进行分类 predict = resnet(images, [2, 2, 2, 2]) # In[16]: # 获取损失函数和准确率 cost = fluid.layers.cross_entropy(input=predict, label=label) # 交叉熵 avg_cost = fluid.layers.mean(cost) # 计算cost中所有元素的平均值 acc = fluid.layers.accuracy(input=predict, label=label) #使用输入和标签计算准确率 # In[17]: # 获取测试程序 test_program = fluid.default_main_program().clone(for_test=True) # 定义优化方法Adam boundaries = [150, 250] values = [0.1, 0.01, 0.001] optimizer = fluid.optimizer.Adam(learning_rate=0.001) # optimizer = fluid.optimizer.SGD(learning_rate=fluid.layers.piecewise_decay(boundaries=boundaries,values=values), # regularization=fluid.regularizer.L2Decay(regularization_coeff=5e-4)) optimizer.minimize(avg_cost) print("完成") # In[18]: # 定义使用CPU还是GPU,使用CPU时use_cuda = False,使用GPU时use_cuda = True use_cuda = True place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
def test_compression(self): """ Model: mobilenet_v1 data: mnist step1: Training one epoch step2: pruning flops step3: fine-tune one epoch step4: check top1_acc. """ if not fluid.core.is_compiled_with_cuda(): return class_dim = 10 image_shape = [1, 28, 28] image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') image.stop_gradient = False label = fluid.layers.data(name='label', shape=[1], dtype='int64') out = MobileNet().net(input=image, class_dim=class_dim) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone(for_test=False) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) optimizer = fluid.optimizer.Momentum( momentum=0.9, learning_rate=0.01, regularization=fluid.regularizer.L2Decay(4e-5)) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128) val_feed_list = [('img', image.name), ('label', label.name)] val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5', acc_top5.name)] train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=128) train_feed_list = [('img', image.name), ('label', label.name)] train_fetch_list = [('loss', avg_cost.name)] com_pass = Compressor(place, fluid.global_scope(), fluid.default_main_program(), train_reader=train_reader, train_feed_list=train_feed_list, train_fetch_list=train_fetch_list, eval_program=val_program, eval_reader=val_reader, eval_feed_list=val_feed_list, eval_fetch_list=val_fetch_list, train_optimizer=optimizer) com_pass.config('./filter_pruning/compress.yaml') eval_graph = com_pass.run() self.assertTrue( abs((com_pass.context.eval_results['acc_top1'][-1] - 0.969) / 0.969) < 0.02)
def test_run(self): x = layers.data( name='x', shape=[-1, self.batch_size, self.hidden_size], dtype='float32') sequence_length = layers.data( name="sequence_length", shape=[-1], dtype='float32') rnn_out, last_hidden, last_cell = basic_lstm( x, None, None, self.hidden_size, num_layers=self.num_layers, \ batch_first = self.batch_first, bidirectional=self.is_bidirect, sequence_length=sequence_length, forget_bias = self.forget_bias ) last_hidden.persisbale = True rnn_out.persisbale = True if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) else: place = core.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) param_list = fluid.default_main_program().block(0).all_parameters() # process weight and bias gate_weight = [] gate_bias = [] for i in range(self.num_layers): gate_w_name = "basic_lstm_layers_" + str(i) + "/BasicLSTMUnit_0.w_0" gate_b_name = "basic_lstm_layers_" + str(i) + "/BasicLSTMUnit_0.b_0" gate_w = np.array(fluid.global_scope().find_var(gate_w_name) .get_tensor()) gate_w = np.random.uniform( -0.1, 0.1, size=gate_w.shape).astype('float32') fluid.global_scope().find_var(gate_w_name).get_tensor().set(gate_w, place) gate_b = np.array(fluid.global_scope().find_var(gate_b_name) .get_tensor()) gate_b = np.random.uniform( -0.1, 0.1, size=gate_b.shape).astype('float32') fluid.global_scope().find_var(gate_b_name).get_tensor().set(gate_b, place) gate_weight.append(gate_w) gate_bias.append(gate_b) if self.is_bidirect: for i in range(self.num_layers): gate_w_name = "basic_lstm_reverse_layers_" + str( i) + "/BasicLSTMUnit_0.w_0" gate_b_name = "basic_lstm_reverse_layers_" + str( i) + "/BasicLSTMUnit_0.b_0" gate_w = np.array(fluid.global_scope().find_var(gate_w_name) .get_tensor()) gate_w = np.random.uniform( -0.1, 0.1, size=gate_w.shape).astype('float32') fluid.global_scope().find_var(gate_w_name).get_tensor().set( gate_w, place) gate_b = np.array(fluid.global_scope().find_var(gate_b_name) .get_tensor()) gate_b = np.random.uniform( -0.1, 0.1, size=gate_b.shape).astype('float32') fluid.global_scope().find_var(gate_b_name).get_tensor().set( gate_b, place) gate_weight.append(gate_w) gate_bias.append(gate_b) step_input_np = np.random.uniform(-0.1, 0.1, ( self.seq_len, self.batch_size, self.hidden_size)).astype('float32') sequence_length_np = np.random.randint( self.seq_len // 2, self.seq_len, size=(self.batch_size)).astype('int64') out = exe.run( feed={'x': step_input_np, 'sequence_length': sequence_length_np}, fetch_list=[rnn_out, last_hidden, last_cell]) api_rnn_out = out[0] api_last_hidden = out[1] api_last_cell = out[2] np_out = lstm_np( step_input_np, None, None, self.hidden_size, gate_weight, gate_bias, num_layers=self.num_layers, batch_first=self.batch_first, is_bidirect=self.is_bidirect, sequence_length=sequence_length_np) self.assertTrue(np.allclose(api_rnn_out, np_out[0], rtol=1e-4, atol=0)) self.assertTrue( np.allclose( api_last_hidden, np_out[1], rtol=1e-4, atol=0)) self.assertTrue( np.allclose( api_last_cell, np_out[2], rtol=1e-4, atol=0))
def test_get_places(self): places = fluid.layers.get_places() cpu = fluid.CPUPlace() exe = fluid.Executor(cpu) exe.run(fluid.default_main_program()) self.assertEqual(places.type, fluid.core.VarDesc.VarType.PLACE_LIST)
def main(): args = parse_args() print(args) num_layers = args.num_layers src_vocab_size = args.src_vocab_size tar_vocab_size = args.tar_vocab_size batch_size = args.batch_size dropout = args.dropout init_scale = args.init_scale max_grad_norm = args.max_grad_norm hidden_size = args.hidden_size place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() with fluid.dygraph.guard(place): args.enable_ce = True if args.enable_ce: fluid.default_startup_program().random_seed = 102 fluid.default_main_program().random_seed = 102 np.random.seed(102) random.seed(102) # Training process if args.attention: model = AttentionModel( hidden_size, src_vocab_size, tar_vocab_size, batch_size, num_layers=num_layers, init_scale=init_scale, dropout=dropout) else: model = BaseModel( hidden_size, src_vocab_size, tar_vocab_size, batch_size, num_layers=num_layers, init_scale=init_scale, dropout=dropout) gloabl_norm_clip = GradClipByGlobalNorm(max_grad_norm) lr = args.learning_rate opt_type = args.optimizer if opt_type == "sgd": optimizer = fluid.optimizer.SGD(lr, parameter_list=model.parameters()) elif opt_type == "adam": optimizer = fluid.optimizer.Adam(lr, parameter_list=model.parameters()) else: print("only support [sgd|adam]") raise Exception("opt type not support") train_data_prefix = args.train_data_prefix eval_data_prefix = args.eval_data_prefix test_data_prefix = args.test_data_prefix vocab_prefix = args.vocab_prefix src_lang = args.src_lang tar_lang = args.tar_lang print("begin to load data") raw_data = reader.raw_data(src_lang, tar_lang, vocab_prefix, train_data_prefix, eval_data_prefix, test_data_prefix, args.max_len) print("finished load data") train_data, valid_data, test_data, _ = raw_data def prepare_input(batch, epoch_id=0): src_ids, src_mask, tar_ids, tar_mask = batch res = {} src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1])) in_tar = tar_ids[:, :-1] label_tar = tar_ids[:, 1:] in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1])) label_tar = label_tar.reshape( (label_tar.shape[0], label_tar.shape[1], 1)) inputs = [src_ids, in_tar, label_tar, src_mask, tar_mask] return inputs, np.sum(tar_mask) # get train epoch size def eval(data, epoch_id=0): model.eval() eval_data_iter = reader.get_data_iter(data, batch_size, mode='eval') total_loss = 0.0 word_count = 0.0 for batch_id, batch in enumerate(eval_data_iter): input_data_feed, word_num = prepare_input( batch, epoch_id) loss = model(input_data_feed) total_loss += loss * batch_size word_count += word_num ppl = np.exp(total_loss.numpy() / word_count) model.train() return ppl max_epoch = args.max_epoch for epoch_id in range(max_epoch): model.train() start_time = time.time() if args.enable_ce: train_data_iter = reader.get_data_iter( train_data, batch_size, enable_ce=True) else: train_data_iter = reader.get_data_iter(train_data, batch_size) total_loss = 0 word_count = 0.0 batch_times = [] for batch_id, batch in enumerate(train_data_iter): batch_start_time = time.time() input_data_feed, word_num = prepare_input( batch, epoch_id=epoch_id) word_count += word_num loss = model(input_data_feed) # print(loss.numpy()[0]) loss.backward() optimizer.minimize(loss, grad_clip = gloabl_norm_clip) model.clear_gradients() total_loss += loss * batch_size batch_end_time = time.time() batch_time = batch_end_time - batch_start_time batch_times.append(batch_time) if batch_id > 0 and batch_id % 100 == 0: print("-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f" % (epoch_id, batch_id, batch_time, np.exp(total_loss.numpy() / word_count))) total_loss = 0.0 word_count = 0.0 end_time = time.time() epoch_time = end_time - start_time print( "\nTrain epoch:[%d]; Epoch Time: %.5f; avg_time: %.5f s/step\n" % (epoch_id, epoch_time, sum(batch_times) / len(batch_times))) dir_name = os.path.join(args.model_path, "epoch_" + str(epoch_id)) print("begin to save", dir_name) paddle.fluid.save_dygraph(model.state_dict(), dir_name) print("save finished") dev_ppl = eval(valid_data) print("dev ppl", dev_ppl) test_ppl = eval(test_data) print("test ppl", test_ppl)
def train(use_cuda, save_dirname, is_local): x = fluid.layers.data(name='x', shape=[13], dtype='float32') y_predict = fluid.layers.fc(input=x, size=1, act=None) y = fluid.layers.data(name='y', shape=[1], dtype='float32') cost = fluid.layers.square_error_cost(input=y_predict, label=y) avg_cost = fluid.layers.mean(cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) sgd_optimizer.minimize(avg_cost) BATCH_SIZE = 20 train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.uci_housing.train(), buf_size=500), batch_size=BATCH_SIZE) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) def train_loop(main_program): feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) exe.run(fluid.default_startup_program()) PASS_NUM = 100 for pass_id in range(PASS_NUM): for data in train_reader(): avg_loss_value, = exe.run(main_program, feed=feeder.feed(data), fetch_list=[avg_cost]) print(avg_loss_value) if avg_loss_value[0] < 10.0: if save_dirname is not None: fluid.io.save_inference_model(save_dirname, ['x'], [y_predict], exe) return if math.isnan(float(avg_loss_value)): sys.exit("got NaN loss, training failed.") raise AssertionError("Fit a line cost is too large, {0:2.2}".format( avg_loss_value[0])) if is_local: train_loop(fluid.default_main_program()) else: port = os.getenv("PADDLE_INIT_PORT", "6174") pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... trainers = int(os.getenv("TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) training_role = os.getenv("TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": pserver_prog = t.get_pserver_program(current_endpoint) pserver_startup = t.get_startup_program(current_endpoint, pserver_prog) exe.run(pserver_startup) exe.run(pserver_prog) elif training_role == "TRAINER": train_loop(t.get_trainer_program())
def __call__( self, bboxes, scores ): def create_tmp_var(program, name, dtype, shape, lod_leval): return program.current_block().create_var(name=name, dtype=dtype, shape=shape, lod_leval=lod_leval) def _soft_nms_for_cls(dets, sigma, thres): """soft_nms_for_cls""" dets_final = [] while len(dets) > 0: maxpos = np.argmax(dets[:, 0]) dets_final.append(dets[maxpos].copy()) ts, tx1, ty1, tx2, ty2 = dets[maxpos] scores = dets[:, 0] x1 = dets[:, 1] y1 = dets[:, 2] x2 = dets[:, 3] y2 = dets[:, 4] eta = 0 if self.normalized else 1 areas = (x2 - x1 + eta) * (y2 - y1 + eta) xx1 = np.maximum(tx1, x1) yy1 = np.maximum(ty1, y1) xx2 = np.minimum(tx2, x2) yy2 = np.minimum(ty2, y2) w = np.maximum(0.0, xx2 - xx1 + eta) h = np.maximum(0.0, yy2 - yy1 + eta) inter = w * h ovr = inter / (areas + areas[maxpos] - inter) weight = np.exp(-(ovr * ovr) / sigma) scores = scores * weight idx_keep = np.where(scores >= thres) dets[:, 0] = scores dets = dets[idx_keep] dets_final = np.array(dets_final).reshape(-1, 5) return dets_final def _soft_nms(bboxes, scores): bboxes = np.array(bboxes) scores = np.array(scores) class_nums = scores.shape[-1] softnms_thres = self.score_threshold softnms_sigma = self.softnms_sigma keep_top_k = self.keep_top_k cls_boxes = [[] for _ in range(class_nums)] cls_ids = [[] for _ in range(class_nums)] start_idx = 1 if self.background_label == 0 else 0 for j in range(start_idx, class_nums): inds = np.where(scores[:, j] >= softnms_thres)[0] scores_j = scores[inds, j] rois_j = bboxes[inds, j, :] dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(np.float32, copy=False) cls_rank = np.argsort(-dets_j[:, 0]) dets_j = dets_j[cls_rank] cls_boxes[j] = _soft_nms_for_cls( dets_j, sigma=softnms_sigma, thres=softnms_thres ) cls_ids[j] = np.array( [j]*cls_boxes[j].shape[0] ).reshape(-1,1) cls_boxes = np.vstack(cls_boxes[start_idx:]) cls_ids = np.vstack(cls_ids[start_idx:]) pred_result = np.hstack( [cls_ids, cls_boxes] ) # Limit to max_per_image detections **over all classes** image_scores = cls_boxes[:,0] if len(image_scores) > keep_top_k: image_thresh = np.sort(image_scores)[-keep_top_k] keep = np.where(cls_boxes[:, 0] >= image_thresh)[0] pred_result = pred_result[keep, :] res = fluid.LoDTensor() res.set_lod([[0, pred_result.shape[0]]]) if pred_result.shape[0] == 0: pred_result = np.array( [[1]], dtype=np.float32 ) res.set(pred_result, fluid.CPUPlace()) return res pred_result = create_tmp_var(fluid.default_main_program(), name='softnms_pred_result', dtype='float32', shape=[6], lod_leval=1) fluid.layers.py_func(func=_soft_nms, x=[bboxes, scores], out=pred_result) return pred_result
def FullyConnected(input, size, act=None, name=None, use_bias=True): """ A wrapper around `tf.layers.Dense`. One difference to maintain backward-compatibility: Default weight initializer is variance_scaling_initializer(2.0). Variable Names: * ``W``: weights of shape [in_dim, out_dim] * ``b``: bias """ param_attr = ParamAttr(name='{}_W'.format(name)) bias_attr = ParamAttr(name='{}_b'.format(name)) ret = fluid.layers.fc(input=input, size=size, act=act, param_attr=param_attr, bias_attr=bias_attr) #var_W = fluid.global_scope().find_var(param_attr.name).get_tensor() ret.variables = VariableHolder(W=param_attr.name) return ret if __name__ == '__main__': import paddle.fluid as fluid x = fluid.layers.data(name='state', shape=[3], dtype='float32') fc1 = FullyConnected('policy/fc1', x, size=256, act='relu') logger.info("fc1:{}".format(fc1)) vars = fluid.default_main_program().list_vars()
places = fluid.layers.get_places(device_count=0, device_type=device_type) pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl) with pd.do(): x_ = pd.read_input(x) y_ = pd.read_input(y) y_predict = fluid.layers.fc(input=x_, size=1, act=None) cost = fluid.layers.square_error_cost(input=y_predict, label=y_) avg_cost = fluid.layers.mean(x=cost) pd.write_output(avg_cost) cost = pd() avg_cost = fluid.layers.mean(x=cost) sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.01) sgd_optimizer.minimize(avg_cost) fluid.memory_optimize(fluid.default_main_program(), print_log=True) # fluid.release_memory(fluid.default_main_program()) BATCH_SIZE = 200 # fix the order of training data train_reader = paddle.batch( paddle.dataset.uci_housing.train(), batch_size=BATCH_SIZE) # train_reader = paddle.batch( # paddle.reader.shuffle( # paddle.dataset.uci_housing.train(), buf_size=500), # batch_size=BATCH_SIZE) feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) exe = fluid.Executor(place)