def train(conf_dict, to_static): """ train process """ program_translator = ProgramTranslator() program_translator.enable(to_static) # Get device if fluid.is_compiled_with_cuda(): place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() with fluid.dygraph.guard(place): paddle.manual_seed(SEED) paddle.framework.random._manual_program_seed(SEED) conf_dict['dict_size'] = len(vocab) conf_dict['seq_len'] = args.seq_len net = BOW(conf_dict) loss = HingeLoss(conf_dict) optimizer = fluid.optimizer.AdamOptimizer( learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08, parameter_list=net.parameters()) metric = fluid.metrics.Auc(name="auc") global_step = 0 losses = [] train_loader = fluid.io.DataLoader.from_generator( capacity=16, return_list=True, iterable=True, use_double_buffer=True) get_train_examples = simnet_process.get_reader("train", epoch=args.epoch) train_loader.set_sample_list_generator( paddle.batch(get_train_examples, batch_size=args.batch_size), place) for left, pos_right, neg_right in train_loader(): left = fluid.layers.reshape(left, shape=[-1, 1]) pos_right = fluid.layers.reshape(pos_right, shape=[-1, 1]) neg_right = fluid.layers.reshape(neg_right, shape=[-1, 1]) net.train() global_step += 1 left_feat, pos_score = net(left, pos_right) pred = pos_score _, neg_score = net(left, neg_right) avg_cost = loss.compute(pos_score, neg_score) losses.append(np.mean(avg_cost.numpy())) avg_cost.backward() optimizer.minimize(avg_cost) net.clear_gradients() return losses
class TestReturnBase(unittest.TestCase): def setUp(self): self.input = np.ones((1)).astype('int32') self.place = fluid.CUDAPlace( 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() self.init_dygraph_func() self.program_translator = ProgramTranslator() def init_dygraph_func(self): self.dygraph_func = test_return_base def run_dygraph_mode(self): self.program_translator.enable(False) with fluid.dygraph.guard(): res = self.dygraph_func(self.input) if isinstance(res, (tuple)): return tuple(r.numpy() for r in res) elif isinstance(res, core.VarBase): return res.numpy() return res def run_static_mode(self): self.program_translator.enable(True) with fluid.dygraph.guard(): res = self.dygraph_func(self.input) if isinstance(res, tuple): return tuple(r.numpy() for r in res) elif isinstance(res, core.VarBase): return res.numpy() return res def test_transformed_static_result(self): dygraph_res = self.run_dygraph_mode() static_res = self.run_static_mode() if isinstance(dygraph_res, tuple): self.assertTrue(isinstance(static_res, tuple)) self.assertEqual(len(dygraph_res), len(static_res)) for i in range(len(dygraph_res)): self.assertTrue( np.allclose(dygraph_res[i], static_res[i]), msg='dygraph res is {}\nstatic_res is {}'.format( dygraph_res[i], static_res[i])) elif isinstance(dygraph_res, np.ndarray): self.assertTrue(np.allclose(dygraph_res, static_res), msg='dygraph res is {}\nstatic_res is {}'.format( dygraph_res, static_res)) else: self.assertEqual(dygraph_res, static_res)
def predict_dygraph(data): program_translator = ProgramTranslator() program_translator.enable(False) with fluid.dygraph.guard(place): se_resnext = SeResNeXt() model_dict, _ = fluid.dygraph.load_dygraph(DY_STATE_DICT_SAVE_PATH) se_resnext.set_dict(model_dict) se_resnext.eval() label = np.random.random([1, 1]).astype("int64") img = fluid.dygraph.to_variable(data) label = fluid.dygraph.to_variable(label) pred_res, _, _, _ = se_resnext(img, label) return pred_res.numpy()
def test_switch_eval_and_train(self): program_translator = ProgramTranslator() with fluid.dygraph.guard(): linear_net = Linear() x_data = np.random.random((4, 10)).astype('float32') x = fluid.dygraph.to_variable(x_data) linear_net(x) _, partial_layer = linear_net.forward.program_cache.last()[-1] # check default mode is for training self.assertEqual(partial_layer.program, partial_layer._train_program) # switch to run test program after `eval()` linear_net.eval() linear_net(x) self.assertEqual(partial_layer.program, partial_layer._infer_program) # switch back into training linear_net.train() linear_net(x) self.assertEqual(partial_layer.program, partial_layer._train_program)
def train(to_static): program_translator = ProgramTranslator() program_translator.enable(to_static) random.seed(0) np.random.seed(0) place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( ) else fluid.CPUPlace() with fluid.dygraph.guard(place): fluid.default_startup_program().random_seed = 1000 fluid.default_main_program().random_seed = 1000 skip_gram_model = SkipGram("skip_gram_model", vocab_size, embedding_size) adam = fluid.optimizer.AdamOptimizer( learning_rate=learning_rate, parameter_list=skip_gram_model.parameters()) step = 0 ret = [] for center_words, target_words, label, eval_words in build_batch( dataset, batch_size, epoch_num): center_words_var = fluid.dygraph.to_variable(center_words) target_words_var = fluid.dygraph.to_variable(target_words) label_var = fluid.dygraph.to_variable(label) pred, loss = skip_gram_model(center_words_var, target_words_var, label_var) loss.backward() adam.minimize(loss) skip_gram_model.clear_gradients() step += 1 mean_loss = np.mean(loss.numpy()) print("step %d / %d, loss %f" % (step, total_steps, mean_loss)) ret.append(mean_loss) return np.array(ret)
def test_save_load(self): # train and save model train_layer = self.train_and_save_model() # load model program_translator = ProgramTranslator() program_translator.enable(False) loaded_layer = fluid.dygraph.jit.load(self.model_path) self.load_and_inference(train_layer, loaded_layer) self.load_dygraph_state_dict(train_layer) self.load_and_finetune(train_layer, loaded_layer) program_translator.enable(True)
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import unittest import paddle import paddle.fluid as fluid from paddle.static import InputSpec from paddle.fluid.dygraph import to_variable, declarative, ProgramTranslator, Layer, jit from paddle.fluid.dygraph.dygraph_to_static.program_translator import ConcreteProgram, StaticFunction from test_basic_api_transformation import dyfunc_to_variable program_trans = ProgramTranslator() class SimpleNet(Layer): def __init__(self): super(SimpleNet, self).__init__() self.linear = fluid.dygraph.Linear(10, 3) @declarative(input_spec=[InputSpec(shape=[None, 10], dtype='float32')]) def forward(self, x, a=1, b=2): y = self.inner_function(x) return y # `declarative` is not essential, add it to test for robustness. @declarative def inner_function(self, x):
def train(to_static): program_translator = ProgramTranslator() program_translator.enable(to_static) random.seed(0) np.random.seed(0) place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() with fluid.dygraph.guard(place): fluid.default_startup_program().random_seed = 1000 fluid.default_main_program().random_seed = 1000 model = YOLOv3(3, is_train=True) boundaries = cfg.lr_steps gamma = cfg.lr_gamma step_num = len(cfg.lr_steps) learning_rate = cfg.learning_rate values = [learning_rate * (gamma**i) for i in range(step_num + 1)] lr = fluid.dygraph.PiecewiseDecay(boundaries=boundaries, values=values, begin=0) lr = fluid.layers.linear_lr_warmup( learning_rate=lr, warmup_steps=cfg.warm_up_iter, start_lr=0.0, end_lr=cfg.learning_rate, ) optimizer = fluid.optimizer.Momentum( learning_rate=lr, regularization=fluid.regularizer.L2Decay(cfg.weight_decay), momentum=cfg.momentum, parameter_list=model.parameters()) start_time = time.time() snapshot_loss = 0 snapshot_time = 0 total_sample = 0 input_size = cfg.input_size shuffle = True shuffle_seed = None total_iter = cfg.max_iter mixup_iter = total_iter - cfg.no_mixup_iter train_reader = FakeDataReader().reader() smoothed_loss = SmoothedValue() ret = [] for iter_id, data in enumerate(train_reader()): prev_start_time = start_time start_time = time.time() img = np.array([x[0] for x in data]).astype('float32') img = to_variable(img) gt_box = np.array([x[1] for x in data]).astype('float32') gt_box = to_variable(gt_box) gt_label = np.array([x[2] for x in data]).astype('int32') gt_label = to_variable(gt_label) gt_score = np.array([x[3] for x in data]).astype('float32') gt_score = to_variable(gt_score) loss = model(img, gt_box, gt_label, gt_score, None, None) smoothed_loss.add_value(np.mean(loss.numpy())) snapshot_loss += loss.numpy() snapshot_time += start_time - prev_start_time total_sample += 1 print("Iter {:d}, loss {:.6f}, time {:.5f}".format( iter_id, smoothed_loss.get_mean_value(), start_time - prev_start_time)) ret.append(smoothed_loss.get_mean_value()) loss.backward() optimizer.minimize(loss) model.clear_gradients() return np.array(ret)
def train(args, fake_data_reader, to_static): program_translator = ProgramTranslator() program_translator.enable(to_static) config = parse_config(args.config) train_config = merge_configs(config, 'train', vars(args)) valid_config = merge_configs(config, 'valid', vars(args)) print_configs(train_config, 'Train') place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() random.seed(0) np.random.seed(0) with fluid.dygraph.guard(place): paddle.seed(1000) paddle.framework.random._manual_program_seed(1000) video_model = TSM_ResNet("TSM", train_config, 'Train') optimizer = create_optimizer(train_config.TRAIN, video_model.parameters()) train_reader = fake_data_reader.create_reader() ret = [] for epoch in range(train_config.TRAIN.epoch): video_model.train() total_loss = 0.0 total_acc1 = 0.0 total_acc5 = 0.0 total_sample = 0 for batch_id, data in enumerate(train_reader()): x_data = np.array([item[0] for item in data]) y_data = np.array([item[1] for item in data]).reshape([-1, 1]) imgs = to_variable(x_data) labels = to_variable(y_data) labels.stop_gradient = True outputs = video_model(imgs) loss = fluid.layers.cross_entropy(input=outputs, label=labels, ignore_index=-1) avg_loss = fluid.layers.mean(loss) acc_top1 = fluid.layers.accuracy(input=outputs, label=labels, k=1) acc_top5 = fluid.layers.accuracy(input=outputs, label=labels, k=5) avg_loss.backward() optimizer.minimize(avg_loss) video_model.clear_gradients() total_loss += avg_loss.numpy()[0] total_acc1 += acc_top1.numpy()[0] total_acc5 += acc_top5.numpy()[0] total_sample += 1 print('TRAIN Epoch {}, iter {}, loss = {}, acc1 {}, acc5 {}'. format(epoch, batch_id, avg_loss.numpy()[0], acc_top1.numpy()[0], acc_top5.numpy()[0])) ret.extend([ avg_loss.numpy()[0], acc_top1.numpy()[0], acc_top5.numpy()[0] ]) print( 'TRAIN End, Epoch {}, avg_loss= {}, avg_acc1= {}, avg_acc5= {}' .format(epoch, total_loss / total_sample, total_acc1 / total_sample, total_acc5 / total_sample)) return ret
def train(train_reader, to_static): program_translator = ProgramTranslator() program_translator.enable(to_static) np.random.seed(SEED) with fluid.dygraph.guard(place): paddle.seed(SEED) paddle.framework.random._manual_program_seed(SEED) se_resnext = SeResNeXt() optimizer = optimizer_setting(train_parameters, se_resnext.parameters()) for epoch_id in range(EPOCH_NUM): total_loss = 0.0 total_acc1 = 0.0 total_acc5 = 0.0 total_sample = 0 step_idx = 0 speed_list = [] for step_id, data in enumerate(train_reader()): dy_x_data = np.array([x[0].reshape(3, 224, 224) for x in data]).astype('float32') y_data = np.array([x[1] for x in data ]).astype('int64').reshape(BATCH_SIZE, 1) img = to_variable(dy_x_data) label = to_variable(y_data) label.stop_gradient = True pred, avg_loss, acc_top1, acc_top5 = se_resnext(img, label) dy_out = avg_loss.numpy() avg_loss.backward() optimizer.minimize(avg_loss) se_resnext.clear_gradients() lr = optimizer._global_learning_rate().numpy() total_loss += dy_out total_acc1 += acc_top1.numpy() total_acc5 += acc_top5.numpy() total_sample += 1 if step_id % PRINT_STEP == 0: if step_id == 0: logging.info( "epoch %d | step %d, loss %0.3f, acc1 %0.3f, acc5 %0.3f" % \ ( epoch_id, step_id, total_loss / total_sample, \ total_acc1 / total_sample, total_acc5 / total_sample)) avg_batch_time = time.time() else: speed = PRINT_STEP / (time.time() - avg_batch_time) speed_list.append(speed) logging.info( "epoch %d | step %d, loss %0.3f, acc1 %0.3f, acc5 %0.3f, speed %.3f steps/s" % \ ( epoch_id, step_id, total_loss / total_sample, \ total_acc1 / total_sample, total_acc5 / total_sample, speed)) avg_batch_time = time.time() step_idx += 1 if step_idx == STEP_NUM: if to_static: fluid.dygraph.jit.save(se_resnext, MODEL_SAVE_PREFIX, [img], output_spec=[pred]) else: fluid.dygraph.save_dygraph(se_resnext.state_dict(), DY_STATE_DICT_SAVE_PATH) break return pred.numpy(), avg_loss.numpy(), acc_top1.numpy( ), acc_top5.numpy()
def setUp(self): self.input = np.ones((1)).astype('int32') self.place = fluid.CUDAPlace( 0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() self.init_dygraph_func() self.program_translator = ProgramTranslator()