def test_main(self): N = 10 img_expected_res = [] lbl_expected_res = [] with fluid.program_guard(fluid.Program(), fluid.Program()): data_file = fluid.layers.io.open_recordio_file( './mnist_for_preprocessor_test.recordio', shapes=[[-1, 784], [-1, 1]], lod_levels=[0, 0], dtypes=['float32', 'int64']) img, lbl = fluid.layers.io.read_file(data_file) if fluid.core.is_compiled_with_cuda(): place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) for _ in range(N): img_v, lbl_v = exe.run(fetch_list=[img, lbl]) img_expected_res.append(img_v / 2) lbl_expected_res.append(lbl_v + 1) img_actual_res = [] lbl_actual_res = [] with fluid.program_guard(fluid.Program(), fluid.Program()): data_file = fluid.layers.io.open_recordio_file( './mnist_for_preprocessor_test.recordio', shapes=[[-1, 784], [-1, 1]], lod_levels=[0, 0], dtypes=['float32', 'int64']) preprocessor = fluid.layers.io.Preprocessor(reader=data_file) with preprocessor.block(): img, lbl = preprocessor.inputs() img_out = img / 2 lbl_out = lbl + 1 preprocessor.outputs(img_out, lbl_out) data_file = fluid.layers.io.double_buffer(preprocessor()) img, lbl = fluid.layers.io.read_file(data_file) if fluid.core.is_compiled_with_cuda(): place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) for _ in range(N): img_v, lbl_v = exe.run(fetch_list=[img, lbl]) img_actual_res.append(img_v) lbl_actual_res.append(lbl_v) for idx in range(N): np.allclose(img_expected_res[idx], img_actual_res[idx]) np.allclose(lbl_expected_res[idx], lbl_actual_res[idx])
def program_scope_guard(self): prog = fluid.Program() startup_prog = fluid.Program() scope = fluid.core.Scope() with fluid.scope_guard(scope): with fluid.program_guard(prog, startup_prog): yield
def __fn__(*args, **kwargs): prog = fluid.Program() startup_prog = fluid.Program() scope = fluid.core.Scope() with fluid.scope_guard(scope): with fluid.program_guard(prog, startup_prog): fn(*args, **kwargs)
def __impl__(self): prog = fluid.Program() startup_prog = fluid.Program() scope = fluid.core.Scope() with fluid.scope_guard(scope): with fluid.program_guard(prog, startup_prog): main(use_cuda, parallel, nn_type, combine)
def get_main_program(self): main = fluid.Program() with fluid.program_guard(main): self.net_conf() return main
def main(self, thread_num): file_list = [ './mnist_0.recordio', './mnist_1.recordio', './mnist_2.recordio' ] with fluid.program_guard(fluid.Program(), fluid.Program()): data_files = fluid.layers.open_files( filenames=file_list, thread_num=thread_num, shapes=[(-1, 784), (-1, 1)], lod_levels=[0, 0], dtypes=['float32', 'int64']) img, label = fluid.layers.read_file(data_files) if fluid.core.is_compiled_with_cuda(): place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) batch_count = 0 while True: try: img_val, = exe.run(fetch_list=[img]) except fluid.core.EnforceNotMet as ex: self.assertIn("There is no next data.", ex.message) break batch_count += 1 self.assertLessEqual(img_val.shape[0], self.batch_size) self.assertEqual(batch_count, self.num_batch * 3)
def test_main(self): main = fluid.Program() startup = fluid.Program() startup.random_seed = 1 with fluid.scope_guard(fluid.core.Scope()): with fluid.program_guard(main, startup): data = fluid.layers.data( name='image', shape=[3, 224, 224], dtype='float32') label = fluid.layers.data( name='label', shape=[1], dtype='int64') out = Lenet(data, class_dim=102) loss = fluid.layers.cross_entropy(input=out, label=label) loss = fluid.layers.mean(loss) opt = fluid.optimizer.Momentum( learning_rate=0.1, momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) opt.minimize(loss) place = fluid.CUDAPlace(0) feeder = fluid.DataFeeder(place=place, feed_list=[data, label]) reader = feeder.decorate_reader( paddle.batch( flowers.train(), batch_size=16), multi_devices=True) exe = fluid.Executor(place) exe.run(startup) pe = fluid.ParallelExecutor( use_cuda=True, loss_name=loss.name, main_program=main) for batch_id, data in enumerate(reader()): loss_np = np.array(pe.run(feed=data, fetch_list=[loss.name])[0]) print batch_id, loss_np if batch_id == 2: break
def test_with_place(place): out_grad = np.random.random_sample(self.x.shape).astype(np.float32) x_grad = out_grad sum_axis = range(0, len(self.x.shape)) del sum_axis[self.axis] y_grad = np.sum(out_grad, axis=tuple(sum_axis)) var_dict = locals() var_dict['y'] = self.y var_dict['x'] = self.x var_dict['out'] = self.out var_dict['y@GRAD'] = y_grad var_dict['x@GRAD'] = x_grad var_dict['out@GRAD'] = out_grad var_names = ['x', 'y', 'out', 'y@GRAD', 'x@GRAD', 'out@GRAD'] ground_truth = {name: var_dict[name] for name in var_names} program = fluid.Program() with fluid.program_guard(program): block = program.global_block() for name in ground_truth: block.create_var( name=name, dtype='float32', shape=ground_truth[name].shape) elementwise_add_op = block.append_op( type="elementwise_add", inputs={ "X": block.var('x'), "Y": block.var('y'), }, outputs={"Out": block.var('out'), }, attrs={"axis": self.axis, }) # generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( elementwise_add_op.desc, set(), []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) for var_name in grad_op_desc.output_arg_names(): block.desc.var(var_name.encode("ascii")) grad_op_desc.infer_var_type(block.desc) grad_op_desc.infer_shape(block.desc) for arg in grad_op_desc.output_arg_names(): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) exe = fluid.Executor(place) out = exe.run(program, feed={ name: var_dict[name] for name in ['x', 'y', 'out@GRAD'] }, fetch_list=['x@GRAD', 'y@GRAD']) self.__assert_close(x_grad, out[0], "x@GRAD") self.__assert_close(y_grad, out[1], "y@GRAD", atol=1.4)
def test_dropout_layer(self): main_program = Program() startup_program = Program() with fluid.program_guard(main_program, startup_program): images = fluid.layers.data( name='pixel', shape=[3, 48, 48], dtype='float32') fluid.layers.dropout(x=images, dropout_prob=0.5) print str(main_program)
def net_profiler(self, state, profile_path='/tmp/profile'): enable_if_gpu = state == 'GPU' or state == "All" if enable_if_gpu and not core.is_compiled_with_cuda(): return startup_program = fluid.Program() main_program = fluid.Program() with fluid.program_guard(main_program, startup_program): image = fluid.layers.data(name='x', shape=[784], dtype='float32') hidden1 = fluid.layers.fc(input=image, size=64, act='relu') i = layers.zeros(shape=[1], dtype='int64') counter = fluid.layers.zeros( shape=[1], dtype='int64', force_cpu=True) until = layers.fill_constant([1], dtype='int64', value=10) data_arr = layers.array_write(hidden1, i) cond = fluid.layers.less_than(x=counter, y=until) while_op = fluid.layers.While(cond=cond) with while_op.block(): hidden_n = fluid.layers.fc(input=hidden1, size=64, act='relu') layers.array_write(hidden_n, i, data_arr) fluid.layers.increment(x=counter, value=1, in_place=True) layers.less_than(x=counter, y=until, cond=cond) hidden_n = layers.array_read(data_arr, i) hidden2 = fluid.layers.fc(input=hidden_n, size=64, act='relu') predict = fluid.layers.fc(input=hidden2, size=10, act='softmax') label = fluid.layers.data(name='y', shape=[1], dtype='int64') cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(cost) batch_size = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy( input=predict, label=label, total=batch_size) optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9) opts = optimizer.minimize(avg_cost, startup_program=startup_program) place = fluid.CPUPlace() if state == 'CPU' else fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup_program) pass_acc_calculator = fluid.average.WeightedAverage() with profiler.profiler(state, 'total', profile_path) as prof: for iter in range(10): if iter == 2: profiler.reset_profiler() x = np.random.random((32, 784)).astype("float32") y = np.random.randint(0, 10, (32, 1)).astype("int64") outs = exe.run(main_program, feed={'x': x, 'y': y}, fetch_list=[avg_cost, batch_acc, batch_size]) acc = np.array(outs[1]) b_size = np.array(outs[2]) pass_acc_calculator.add(value=acc, weight=b_size) pass_acc = pass_acc_calculator.eval()
def test_elementwise_add_with_act(self): main_program = Program() startup_program = Program() with fluid.program_guard(main_program, startup_program): image1 = fluid.layers.data( name='pixel1', shape=[3, 48, 48], dtype='float32') image2 = fluid.layers.data( name='pixel2', shape=[3, 48, 48], dtype='float32') fluid.layers.elementwise_add(x=image1, y=image2, act='relu') print(main_program)
def __impl__(*args, **kwargs): prog = fluid.Program() startup_prog = fluid.Program() scope = fluid.core.Scope() with fluid.scope_guard(scope): with fluid.program_guard(prog, startup_prog): main( use_cuda=use_cuda, is_sparse=is_sparse, is_parallel=is_parallel)
def main(): train = fluid.Program() startup = fluid.Program() with fluid.program_guard(train, startup): train_args = network_cfg(is_train=True) test = fluid.Program() with fluid.program_guard(test, fluid.Program()): test_args = network_cfg(is_train=False) # startup place = fluid.CUDAPlace(0) exe = fluid.Executor(place=place) exe.run(startup) train_exe = fluid.ParallelExecutor( use_cuda=True, loss_name=train_args['loss'].name, main_program=train) fetch_var_list = [var.name for var in train_args['log']] for i in xrange(sys.maxint): result = map(numpy.array, train_exe.run(fetch_list=fetch_var_list if i % 1000 == 0 else [])) if len(result) != 0: print 'Train: ', result if i % 1000 == 0: test_exe = fluid.ParallelExecutor( use_cuda=True, main_program=test, share_vars_from=train_exe) loss = [] acc = [] try: while True: loss_np, acc_np = map( numpy.array, test_exe.run(fetch_list=fetch_var_list)) loss.append(loss_np[0]) acc.append(acc_np[0]) except: test_args['file'].reset() print 'TEST: ', numpy.mean(loss), numpy.mean(acc)
def test_img_conv_group(self): main_program = Program() startup_program = Program() with fluid.program_guard(main_program, startup_program): images = fluid.layers.data( name='pixel', shape=[3, 48, 48], dtype='float32') conv1 = conv_block(images, 64, 2, [0.3, 0]) conv_block(conv1, 256, 3, [0.4, 0.4, 0]) print str(main_program)
def test_batch_norm_layer(self): main_program = Program() startup_program = Program() with fluid.program_guard(main_program, startup_program): images = fluid.layers.data( name='pixel', shape=[3, 48, 48], dtype='float32') hidden1 = fluid.layers.batch_norm(input=images) hidden2 = fluid.layers.fc(input=hidden1, size=128, act='relu') fluid.layers.batch_norm(input=hidden2) print str(main_program)
def run_local(self, place): main = fluid.Program() with fluid.program_guard(main): x = layers.data( shape=[32, 32], dtype='float32', name='X', append_batch_size=False) fluid.initializer.Constant(value=2.3)(x, main.global_block()) o = layers.scale(x=x, scale=10.0) exe = fluid.Executor(place) self.local_out = exe.run(main, fetch_list=[o])
def get_expect_trainer_ops(self): trainer = fluid.Program() with fluid.program_guard(trainer): optimize_ops, params_grads = self.net_conf() delete_ops(trainer.global_block(), optimize_ops) ops = [op.type for op in trainer.global_block().ops] + [ "split_byref", "send_vars", "send_barrier", "recv", "recv", "fetch_barrier", "concat" ] ops.insert(ops.index("elementwise_add_grad") + 1, "send_vars") return ops
def setUp(self): with fluid.program_guard(fluid.Program(), fluid.Program()): reader = paddle.batch(mnist.train(), batch_size=32) feeder = fluid.DataFeeder( feed_list=[ # order is image and label fluid.layers.data( name='image', shape=[784]), fluid.layers.data( name='label', shape=[1], dtype='int64'), ], place=fluid.CPUPlace()) self.num_batches = fluid.recordio_writer.convert_reader_to_recordio_file( './mnist_for_preprocessor_test.recordio', reader, feeder)
def get_model(args): if args.data_set == "cifar10": classdim = 10 if args.data_format == 'NCHW': data_shape = [3, 32, 32] else: data_shape = [32, 32, 3] else: classdim = 102 if args.data_format == 'NCHW': data_shape = [3, 224, 224] else: data_shape = [224, 224, 3] # Input data images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # Train program net = vgg16_bn_drop(images) predict = fluid.layers.fc(input=net, size=classdim, act='softmax') cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(x=cost) # Evaluator batch_size_tensor = fluid.layers.create_tensor(dtype='int64') batch_acc = fluid.layers.accuracy( input=predict, label=label, total=batch_size_tensor) # inference program inference_program = fluid.default_main_program().clone() with fluid.program_guard(inference_program): inference_program = fluid.io.get_inference_program( target_vars=[batch_acc, batch_size_tensor]) # Optimization optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate) # data reader train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.cifar.train10() if args.data_set == 'cifar10' else paddle.dataset.flowers.train(), buf_size=5120), batch_size=args.batch_size) test_reader = paddle.batch( paddle.dataset.cifar.test10() if args.data_set == 'cifar10' else paddle.dataset.flowers.test(), batch_size=args.batch_size) return avg_cost, inference_program, optimizer, train_reader, test_reader, batch_acc
def setUpClass(cls): # Convert mnist to recordio file with fluid.program_guard(fluid.Program(), fluid.Program()): reader = paddle.batch(mnist.train(), batch_size=4) feeder = fluid.DataFeeder( feed_list=[ # order is image and label fluid.layers.data( name='image', shape=[784]), fluid.layers.data( name='label', shape=[1], dtype='int64'), ], place=fluid.CPUPlace()) fluid.recordio_writer.convert_reader_to_recordio_file( MNIST_RECORDIO_FILE, reader, feeder)
def init_client(self, place, port): main = fluid.Program() with fluid.program_guard(main): x = layers.data( shape=[32, 32], dtype='float32', name='X', append_batch_size=False) fluid.initializer.Constant(value=2.3)(x, main.global_block()) get_var = main.global_block().create_var( name="scale_0.tmp_0", # server side var dtype="float32", persistable=False, shape=[32, 32]) o = layers.Send("127.0.0.1:%d" % port, [x], [get_var]) exe = fluid.Executor(place) self.dist_out = exe.run(main, fetch_list=o) # o is a list
def setUp(self): self.batch_size = 64 # Convert mnist to recordio file with fluid.program_guard(fluid.Program(), fluid.Program()): reader = paddle.batch(mnist.train(), batch_size=self.batch_size) feeder = fluid.DataFeeder( feed_list=[ # order is image and label fluid.layers.data( name='image', shape=[784]), fluid.layers.data( name='label', shape=[1], dtype='int64'), ], place=fluid.CPUPlace()) self.num_batch = fluid.recordio_writer.convert_reader_to_recordio_file( './mnist_0.recordio', reader, feeder) copyfile('./mnist_0.recordio', './mnist_1.recordio') copyfile('./mnist_0.recordio', './mnist_2.recordio')
def parallel_exe(self, train_inputs, seed): main = fluid.Program() startup = fluid.Program() startup.random_seed = seed with fluid.program_guard(main, startup): data = fluid.layers.data( name='image', shape=[3, 224, 224], dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') out = Lenet(data, class_dim=102) loss = fluid.layers.cross_entropy(input=out, label=label) loss = fluid.layers.mean(loss) opt = fluid.optimizer.Momentum( learning_rate=0.1, momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) opt.minimize(loss) # TODO(zcd): I found that onece the memory optimizer is open, # parallel_exe doesn't fetch some variable, such as conv2d_0.b_0@GRAD, # conv2d_1.b_0@GRAD. Those variables should not be pruned. # fluid.memory_optimize(main) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup) feeder = fluid.DataFeeder(place=place, feed_list=[data, label]) pe = fluid.ParallelExecutor( use_cuda=True, loss_name=loss.name, main_program=main) fetch_list = [] all_vars = main.global_block().vars for k, v in all_vars.iteritems(): if 'tmp' not in k and k[0] is not '_' or v.persistable: fetch_list.append(k) for data in train_inputs: ret = pe.run(fetch_list, feed=feeder.feed(data)) for i in range(len(fetch_list)): assert not math.isnan(np.sum(ret[i])) and \ not math.isinf(np.sum(ret[i]))
def check_network_convergence(self, build_strategy=None): main = fluid.Program() startup = fluid.Program() with fluid.program_guard(main, startup): loss = simple_fc_net() test_program = main.clone(for_test=True) opt = fluid.optimizer.SGD(learning_rate=0.001) opt.minimize(loss) batch_size = 32 image = np.random.normal(size=(batch_size, 784)).astype('float32') label = np.random.randint(0, 10, (batch_size, 1), dtype="int64") place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup) feed_dict = {'image': image, 'label': label} train_exe = fluid.ParallelExecutor( use_cuda=True, loss_name=loss.name, main_program=main, build_strategy=build_strategy) test_exe = fluid.ParallelExecutor( use_cuda=True, main_program=test_program, share_vars_from=train_exe, build_strategy=build_strategy) for i in xrange(5): test_loss, = test_exe.run([loss.name], feed=feed_dict) test_loss = np.array(test_loss) train_loss, = train_exe.run([loss.name], feed=feed_dict) train_loss = np.array(train_loss) self.assertTrue( np.allclose( train_loss, test_loss, atol=1e-8), "Train loss: " + str(train_loss) + "\n Test loss:" + str(test_loss))
def test_main(self, decorator_callback=None): # use new program with fluid.program_guard(fluid.Program(), fluid.Program()): data_file = fluid.layers.open_recordio_file( './mnist.recordio', shapes=[[-1, 784], [-1, 1]], lod_levels=[0, 0], dtypes=['float32', 'int64']) if decorator_callback is not None: data_file = decorator_callback(data_file) img, label = fluid.layers.read_file(data_file) hidden = fluid.layers.fc(input=img, size=100, act='tanh') prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') loss = fluid.layers.cross_entropy(input=prediction, label=label) avg_loss = fluid.layers.mean(loss) fluid.optimizer.Adam(learning_rate=1e-3).minimize(avg_loss) if fluid.core.is_compiled_with_cuda(): place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) avg_loss_np = [] # train a pass batch_id = 0 while True: try: tmp, = exe.run(fetch_list=[avg_loss]) except fluid.core.EnforceNotMet as ex: self.assertIn("There is no next data.", ex.message) break avg_loss_np.append(tmp) batch_id += 1 self.assertEqual(batch_id, self.num_batches) self.assertLess(avg_loss_np[-1], avg_loss_np[0])
def test_train_dyn_rnn(self): main_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(main_program, startup_program): sentence = fluid.layers.data( name='word', shape=[1], dtype='int64', lod_level=1) sent_emb = fluid.layers.embedding( input=sentence, size=[len(self.word_dict), 32], dtype='float32') rnn = fluid.layers.DynamicRNN() with rnn.block(): in_ = rnn.step_input(sent_emb) mem = rnn.memory(shape=[100], dtype='float32') out_ = fluid.layers.fc(input=[in_, mem], size=100, act='tanh') rnn.update_memory(mem, out_) rnn.output(out_) last = fluid.layers.sequence_last_step(input=rnn()) logits = fluid.layers.fc(input=last, size=1, act=None) label = fluid.layers.data(name='label', shape=[1], dtype='float32') loss = fluid.layers.sigmoid_cross_entropy_with_logits( x=logits, label=label) loss = fluid.layers.mean(loss) sgd = fluid.optimizer.Adam(1e-3) sgd.minimize(loss=loss) cpu = fluid.CPUPlace() exe = fluid.Executor(cpu) exe.run(startup_program) feeder = fluid.DataFeeder(feed_list=[sentence, label], place=cpu) data = next(self.train_data()) loss_0 = exe.run(main_program, feed=feeder.feed(data), fetch_list=[loss])[0] for _ in xrange(100): val = exe.run(main_program, feed=feeder.feed(data), fetch_list=[loss])[0] # loss should be small after 100 mini-batch self.assertLess(val[0], loss_0[0])
def main(): sys.path.append(os.getcwd()) some_test_failed = False for module_name in sys.argv[1:]: buffer = cStringIO.StringIO() main = fluid.Program() startup = fluid.Program() scope = fluid.core.Scope() with fluid.program_guard(main, startup): with fluid.scope_guard(scope): with fluid.unique_name.guard(): test_loader = unittest.TestLoader() module = importlib.import_module(module_name) tests = test_loader.loadTestsFromModule(module) res = unittest.TextTestRunner(stream=buffer).run(tests) if not res.wasSuccessful(): some_test_failed = True print >> sys.stderr, module_name, 'failed\n', buffer.getvalue( ) if some_test_failed: exit(1)
def init_serv(self, place): main = fluid.Program() with fluid.program_guard(main): serv = layers.ListenAndServ( "127.0.0.1:0", ["X"], optimizer_mode=False) with serv.do(): out_var = main.global_block().create_var( name="scale_0.tmp_0", psersistable=True, dtype="float32", shape=[32, 32]) x = layers.data( shape=[32, 32], dtype='float32', name="X", append_batch_size=False) fluid.initializer.Constant(value=1.0)(x, main.global_block()) layers.scale(x=x, scale=10.0, out=out_var) self.server_exe = fluid.Executor(place) self.server_exe.run(main)
def main(args): """ Call the configuration function of the model, build the model and load data, then start training. model_config: a json file with the model configurations,such as dropout rate ,learning rate,num tasks and so on; context_pooling: it means the pooling type of context prediction; PreGNNContextpredModel: It is an unsupervised pretraining model which use subgraphs to predict their surrounding graph structures. Our goal is to pre-train a GNN so that it maps nodes appearing in similar structural contexts to nearby embeddings. """ model_config = json.load(open(args.model_config, 'r')) if not args.dropout_rate is None: model_config['dropout_rate'] = args.dropout_rate model_config['context_pooling'] = args.context_pooling ### build model train_prog = fluid.Program() test_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(train_prog, startup_prog): with fluid.unique_name.guard(): model = PreGNNContextpredModel(model_config) model.forward() opt = fluid.optimizer.Adam(learning_rate=args.lr) if args.distributed: opt = get_distributed_optimizer(opt) opt.minimize(model.loss) with fluid.program_guard(test_prog, fluid.Program()): with fluid.unique_name.guard(): model = PreGNNContextpredModel(model_config) model.forward(is_test=True) # Use CUDAPlace for GPU training, or use CPUPlace for CPU training. place = fluid.CUDAPlace(int(os.environ.get('FLAGS_selected_gpus', 0))) \ if args.use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_prog) if not args.init_model is None and not args.init_model == "": load_partial_params(exe, args.init_model, train_prog) ### load data # PreGNNContextPredFeaturizer: # It is used along with `PreGNNContextPredModel`. It inherits from the super class `Featurizer` which is used for feature extractions. The `Featurizer` has two functions: `gen_features` for converting from a single raw smiles to a single graph data, `collate_fn` for aggregating a sublist of graph data into a big batch. # k is the number of layer,l1 and l2 are the different size of context,usually l1 < l2. # splitter: # split type of the dataset:random,scaffold,random with scaffold. Here is randomsplit. # `ScaffoldSplitter` will firstly order the compounds according to Bemis-Murcko scaffold, # then take the first `frac_train` proportion as the train set, the next `frac_valid` proportion as the valid set # and the rest as the test set. `ScaffoldSplitter` can better evaluate the generalization ability of the model on # out-of-distribution samples. Note that other splitters like `RandomSplitter`, `RandomScaffoldSplitter` # and `IndexSplitter` is also available." k = model_config['layer_num'] l1 = k - 1 l2 = l1 + args.context_size featurizer = PreGNNContextPredFeaturizer( model.substruct_graph_wrapper, model.context_graph_wrapper, k, l1, l2) dataset = load_zinc_dataset(args.data_path, featurizer=featurizer) splitter = RandomSplitter() train_dataset, _, test_dataset = splitter.split( dataset, frac_train=0.9, frac_valid=0, frac_test=0.1) if args.distributed: indices = list(range(fleet.worker_index(), len(train_dataset), fleet.worker_num())) train_dataset = train_dataset[indices] print("Train/Test num: %s/%s" % (len(train_dataset), len(test_dataset))) ### start train # Load the train function and calculate the train loss and test loss in each epoch. # Here we set the epoch is in range of max epoch,you can change it if you want. # Then we will calculate the train loss ,test loss and print them. # Finally we save the best epoch to the model according to the dataset. list_test_loss = [] for epoch_id in range(args.max_epoch): train_loss = train(args, exe, train_prog, model, train_dataset, featurizer) test_loss = evaluate(args, exe, test_prog, model, test_dataset, featurizer) if not args.distributed or fleet.worker_index() == 0: fluid.io.save_params(exe, '%s/epoch%s' % (args.model_dir, epoch_id), train_prog) list_test_loss.append(test_loss) print("epoch:%d train/loss:%s" % (epoch_id, train_loss)) print("epoch:%d test/loss:%s" % (epoch_id, test_loss)) if not args.distributed or fleet.worker_index() == 0: best_epoch_id = np.argmin(list_test_loss) fluid.io.load_params(exe, '%s/epoch%d' % (args.model_dir, best_epoch_id), train_prog) fluid.io.save_params(exe, '%s/epoch_best' % (args.model_dir), train_prog) return list_test_loss[best_epoch_id]
def main(args): """"Main function.""" dataset = load(args.dataset) # normalize indegree = dataset.graph.indegree() norm = np.zeros_like(indegree, dtype="float32") norm[indegree > 0] = np.power(indegree[indegree > 0], -0.5) dataset.graph.node_feat["norm"] = np.expand_dims(norm, -1) data = expand_data_dim(dataset) place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() precompute_program = fluid.Program() startup_program = fluid.Program() train_program = fluid.Program() val_program = train_program.clone(for_test=True) test_program = train_program.clone(for_test=True) # precompute message passing and gather initializer = [] with fluid.program_guard(precompute_program, startup_program): gw = pgl.graph_wrapper.StaticGraphWrapper(name="graph", place=place, graph=dataset.graph) cached_h = MessagePassing(gw, gw.node_feat["words"], num_layers=args.num_layers, norm=gw.node_feat['norm']) train_cached_h, init = pre_gather(cached_h, 'train', data['train_index']) initializer.append(init) val_cached_h, init = pre_gather(cached_h, 'val', data['val_index']) initializer.append(init) test_cached_h, init = pre_gather(cached_h, 'test', data['test_index']) initializer.append(init) exe = fluid.Executor(place) gw.initialize(place) for init in initializer: init(place) # get train features, val features and test features np_train_cached_h, np_val_cached_h, np_test_cached_h = exe.run( precompute_program, feed={}, fetch_list=[train_cached_h, val_cached_h, test_cached_h], return_numpy=True) initializer = [] with fluid.program_guard(train_program, startup_program): with fluid.unique_name.guard(): train_handle = calculate_loss('train', np_train_cached_h, data['train_label'], dataset.num_classes, args) initializer += train_handle['initializer'] adam = fluid.optimizer.Adam( learning_rate=args.lr, regularization=fluid.regularizer.L2DecayRegularizer( regularization_coeff=args.weight_decay)) adam.minimize(train_handle['loss']) with fluid.program_guard(val_program, startup_program): with fluid.unique_name.guard(): val_handle = calculate_loss('val', np_val_cached_h, data['val_label'], dataset.num_classes, args) initializer += val_handle['initializer'] with fluid.program_guard(test_program, startup_program): with fluid.unique_name.guard(): test_handle = calculate_loss('test', np_test_cached_h, data['test_label'], dataset.num_classes, args) initializer += test_handle['initializer'] exe.run(startup_program) for init in initializer: init(place) dur = [] for epoch in range(args.epochs): if epoch >= 3: t0 = time.time() train_loss_t = exe.run(train_program, feed={}, fetch_list=[train_handle['loss']], return_numpy=True)[0] if epoch >= 3: time_per_epoch = 1.0 * (time.time() - t0) dur.append(time_per_epoch) val_loss_t, val_acc_t = exe.run( val_program, feed={}, fetch_list=[val_handle['loss'], val_handle['acc']], return_numpy=True) log.info("Epoch %d " % epoch + "(%.5lf sec) " % np.mean(dur) + "Train Loss: %f " % train_loss_t + "Val Loss: %f " % val_loss_t + "Val Acc: %f " % val_acc_t) test_loss_t, test_acc_t = exe.run( test_program, feed={}, fetch_list=[test_handle['loss'], test_handle['acc']], return_numpy=True) log.info("Test Accuracy: %f" % test_acc_t)
def check_network_convergence(self, is_sparse, build_strategy=None, use_cuda=True): os.environ['CPU_NUM'] = str(4) main = fluid.Program() startup = fluid.Program() with fluid.program_guard(main, startup): word = fluid.layers.data(name='word_data', shape=[1], dtype='int64', lod_level=1) predicate = fluid.layers.data(name='verb_data', shape=[1], dtype='int64', lod_level=1) ctx_n2 = fluid.layers.data(name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1) ctx_n1 = fluid.layers.data(name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1) ctx_0 = fluid.layers.data(name='ctx_0_data', shape=[1], dtype='int64', lod_level=1) ctx_p1 = fluid.layers.data(name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1) ctx_p2 = fluid.layers.data(name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1) mark = fluid.layers.data(name='mark_data', shape=[1], dtype='int64', lod_level=1) feature_out = db_lstm(**locals()) target = fluid.layers.data(name='target', shape=[1], dtype='int64', lod_level=1) crf_cost = fluid.layers.linear_chain_crf( input=feature_out, label=target, param_attr=fluid.ParamAttr(name='crfw', learning_rate=1e-1)) avg_cost = fluid.layers.mean(crf_cost) sgd_optimizer = fluid.optimizer.SGD( learning_rate=fluid.layers.exponential_decay( learning_rate=0.01, decay_steps=100000, decay_rate=0.5, staircase=True)) sgd_optimizer.minimize(avg_cost) train_data = paddle.batch(paddle.reader.shuffle( paddle.dataset.conll05.test(), buf_size=8192), batch_size=16) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup) pe = fluid.ParallelExecutor(use_cuda=use_cuda, loss_name=avg_cost.name, build_strategy=build_strategy) feeder = fluid.DataFeeder(feed_list=[ word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, predicate, mark, target ], place=fluid.CPUPlace()) data = train_data() for i in range(10): cur_batch = next(data) print( pe.run(feed=feeder.feed(cur_batch), fetch_list=[avg_cost.name])[0])
def test_errors(self): with program_guard(Program(), Program()): anchor_np = np.random.random((2, 4)).astype("float32") positive_np = np.random.random((2, 4)).astype("float32") labels_np = np.random.random((2)).astype("float32") anchor_data = fluid.data(name='anchor', shape=[2, 4], dtype='float32') positive_data = fluid.data(name='positive', shape=[2, 4], dtype='float32') labels_data = fluid.data(name='labels', shape=[2], dtype='float32') def test_anchor_Variable(): # the anchor type must be Variable fluid.layers.npair_loss(anchor=anchor_np, positive=positive_data, labels=labels_data) def test_positive_Variable(): # the positive type must be Variable fluid.layers.npair_loss(anchor=anchor_data, positive=positive_np, labels=labels_data) def test_labels_Variable(): # the labels type must be Variable fluid.layers.npair_loss(anchor=anchor_data, positive=positive_data, labels=labels_np) self.assertRaises(TypeError, test_anchor_Variable) self.assertRaises(TypeError, test_positive_Variable) self.assertRaises(TypeError, test_labels_Variable) def test_anchor_type(): # dtype must be float32 or float64 anchor_data1 = fluid.data(name='anchor1', shape=[2, 4], dtype='int32') fluid.layers.npair_loss(anchor=anchor_data, positive=positive_data, labels=labels_np) def test_positive_type(): # dtype must be float32 or float64 positive_data1 = fluid.data(name='positive1', shape=[2, 4], dtype='int32') fluid.layers.npair_loss(anchor=anchor_data, positive=positive_data1, labels=labels_np) def test_labels_type(): # dtype must be float32 or float64 labels_data1 = fluid.data(name='labels1', shape=[2], dtype='int32') fluid.layers.npair_loss(anchor=anchor_data, positive=positive_data, labels=labels_data1) self.assertRaises(TypeError, test_anchor_type) self.assertRaises(TypeError, test_positive_type) self.assertRaises(TypeError, test_labels_type)
def main(): cfg = load_config(FLAGS.config) if 'architecture' in cfg: main_arch = cfg.architecture else: raise ValueError("'architecture' not specified in config file.") merge_config(FLAGS.opt) # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) # check if paddlepaddle version is satisfied check_version() if 'test_feed' not in cfg: test_feed = create(main_arch + 'TestFeed') else: test_feed = create(cfg.test_feed) test_images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img) test_feed.dataset.add_images(test_images) place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) model = create(main_arch) startup_prog = fluid.Program() infer_prog = fluid.Program() with fluid.program_guard(infer_prog, startup_prog): with fluid.unique_name.guard(): loader, feed_vars = create_feed(test_feed, iterable=True) test_fetches = model.test(feed_vars) infer_prog = infer_prog.clone(True) reader = create_reader(test_feed) loader.set_sample_list_generator(reader, place) exe.run(startup_prog) if cfg.weights: checkpoint.load_params(exe, infer_prog, cfg.weights) # parse infer fetches assert cfg.metric in ['COCO', 'VOC', 'OID', 'WIDERFACE'], \ "unknown metric type {}".format(cfg.metric) extra_keys = [] if cfg['metric'] in ['COCO', 'OID']: extra_keys = ['im_info', 'im_id', 'im_shape'] if cfg['metric'] == 'VOC' or cfg['metric'] == 'WIDERFACE': extra_keys = ['im_id', 'im_shape'] keys, values, _ = parse_fetches(test_fetches, infer_prog, extra_keys) # parse dataset category if cfg.metric == 'COCO': from ppdet.utils.coco_eval import bbox2out, mask2out, get_category_info if cfg.metric == 'OID': from ppdet.utils.oid_eval import bbox2out, get_category_info if cfg.metric == "VOC": from ppdet.utils.voc_eval import bbox2out, get_category_info if cfg.metric == "WIDERFACE": from ppdet.utils.widerface_eval_utils import bbox2out, get_category_info anno_file = getattr(test_feed.dataset, 'annotation', None) with_background = getattr(test_feed, 'with_background', True) use_default_label = getattr(test_feed, 'use_default_label', False) clsid2catid, catid2name = get_category_info(anno_file, with_background, use_default_label) # whether output bbox is normalized in model output layer is_bbox_normalized = False if hasattr(model, 'is_bbox_normalized') and \ callable(model.is_bbox_normalized): is_bbox_normalized = model.is_bbox_normalized() # use tb-paddle to log image if FLAGS.use_tb: from tb_paddle import SummaryWriter tb_writer = SummaryWriter(FLAGS.tb_log_dir) tb_image_step = 0 tb_image_frame = 0 # each frame can display ten pictures at most. imid2path = reader.imid2path for iter_id, data in enumerate(loader()): outs = exe.run(infer_prog, feed=data, fetch_list=values, return_numpy=False) res = { k: (np.array(v), v.recursive_sequence_lengths()) for k, v in zip(keys, outs) } logger.info('Infer iter {}'.format(iter_id)) bbox_results = None mask_results = None if 'bbox' in res: bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized) if 'mask' in res: mask_results = mask2out([res], clsid2catid, model.mask_head.resolution) # visualize result im_ids = res['im_id'][0] for im_id in im_ids: image_path = imid2path[int(im_id)] image = Image.open(image_path).convert('RGB') # use tb-paddle to log original image if FLAGS.use_tb: original_image_np = np.array(image) tb_writer.add_image("original/frame_{}".format(tb_image_frame), original_image_np, tb_image_step, dataformats='HWC') image = visualize_results(image, int(im_id), catid2name, FLAGS.draw_threshold, bbox_results, mask_results) # use tb-paddle to log image with bbox if FLAGS.use_tb: infer_image_np = np.array(image) tb_writer.add_image("bbox/frame_{}".format(tb_image_frame), infer_image_np, tb_image_step, dataformats='HWC') tb_image_step += 1 if tb_image_step % 10 == 0: tb_image_step = 0 tb_image_frame += 1 save_name = get_save_image_name(FLAGS.output_dir, image_path) logger.info("Detection bbox results save in {}".format(save_name)) image.save(save_name, quality=95)
def graph_apis(self, use_cuda=False, for_ci=True): main = fluid.Program() startup = fluid.Program() with fluid.unique_name.guard(): with fluid.program_guard(main, startup): feeds, loss = conv_block() opt = fluid.optimizer.Adam(learning_rate=0.001) opt.minimize(loss) graph = IrGraph(core.Graph(main.desc), for_test=False) backup_graph = graph.clone() self.assertEqual(len(graph.all_nodes()), len(backup_graph.all_nodes())) build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = False origin_binary = fluid.CompiledProgram(graph.graph).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) backup_binary = fluid.CompiledProgram( backup_graph.graph).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup) iters = 5 batch_size = 8 train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=batch_size) feeder = fluid.DataFeeder(feed_list=feeds, place=place) def _train(binary): for _ in range(iters): data = next(train_reader()) loss_v = exe.run(binary, feed=feeder.feed(data), fetch_list=[loss.name]) if not for_ci: print('{}: {}'.format('loss', loss_v)) _train(origin_binary) _train(backup_binary) checkponit_dir = "checkpoint_gpu" if use_cuda else "checkpoint_cpu" def _set_zero(var_name, scope, place): var = scope.find_var(var_name).get_tensor() var_array = np.zeros(var._get_dims()).astype("float32") var.set(var_array, place) sum_before = np.sum( np.array( fluid.global_scope().find_var('conv2d_1.w_0').get_tensor())) fluid.io._save_persistable_nodes(exe, checkponit_dir, graph) _set_zero('conv2d_1.w_0', fluid.global_scope(), place) set_after = np.sum( np.array( fluid.global_scope().find_var('conv2d_1.w_0').get_tensor())) self.assertEqual(set_after, 0) fluid.io._load_persistable_nodes(exe, checkponit_dir, graph) sum_after = np.sum( np.array( fluid.global_scope().find_var('conv2d_1.w_0').get_tensor())) self.assertEqual(sum_before, sum_after) marked_nodes = set() for op in graph.all_op_nodes(): if op.name().find('conv2d') > -1: marked_nodes.add(op) if not for_ci: graph.draw('.', 'residual', marked_nodes) backup_marked_nodes = set() for op in backup_graph.all_op_nodes(): if op.name().find('conv2d') > -1: backup_marked_nodes.add(op) backup_graph.draw('.', 'backup', backup_marked_nodes) self.assertFalse(graph.has_circle()) self.assertEqual(graph.graph_num(), 1) nodes = graph.topology_sort() self.assertEqual(len(nodes), len(graph.all_op_nodes())) nodes_map = graph.build_adjacency_list() self.assertEqual(len(nodes_map), len(graph.all_op_nodes())) nodes_num = len(graph.all_nodes()) graph.safe_remove_nodes(marked_nodes) self.assertEqual(len(graph.all_nodes()), nodes_num - len(marked_nodes))
def test_errors(self): with program_guard(Program(), Program()): # The input type of cast_op must be Variable. x1 = fluid.create_lod_tensor( np.array([[-1]]), [[1]], fluid.MLUPlace(0)) self.assertRaises(TypeError, fluid.layers.cast, x1, 'int32')
def context(self, trainable=True, pretrained=True): """context for transfer learning. Args: trainable (bool): Set parameters in program to be trainable. pretrained (bool) : Whether to load pretrained model. Returns: inputs (dict): key is 'image', corresponding vaule is image tensor. outputs (dict): key is : 'classification', corresponding value is the result of classification. 'feature_map', corresponding value is the result of the layer before the fully connected layer. context_prog (fluid.Program): program for transfer learning. """ context_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(context_prog, startup_prog): with fluid.unique_name.guard(): image = fluid.layers.data(name="image", shape=[3, 224, 224], dtype="float32") mobile_net = MobileNetV2() output, feature_map = mobile_net.net(input=image, class_dim=len( self.label_list)) name_prefix = '@HUB_{}@'.format(self.name) inputs = {'image': name_prefix + image.name} outputs = { 'classification': name_prefix + output.name, 'feature_map': name_prefix + feature_map.name } add_vars_prefix(context_prog, name_prefix) add_vars_prefix(startup_prog, name_prefix) global_vars = context_prog.global_block().vars inputs = { key: global_vars[value] for key, value in inputs.items() } outputs = { key: global_vars[value] for key, value in outputs.items() } place = fluid.CPUPlace() exe = fluid.Executor(place) # pretrained if pretrained: def _if_exist(var): b = os.path.exists( os.path.join(self.default_pretrained_model_path, var.name)) return b fluid.io.load_vars(exe, self.default_pretrained_model_path, context_prog, predicate=_if_exist) else: exe.run(startup_prog) # trainable for param in context_prog.global_block().iter_parameters(): param.trainable = trainable return inputs, outputs, context_prog
def main(args): dataset = load(args.dataset) # normalize indegree = dataset.graph.indegree() norm = np.zeros_like(indegree, dtype="float32") norm[indegree > 0] = np.power(indegree[indegree > 0], -0.5) dataset.graph.node_feat["norm"] = np.expand_dims(norm, -1) place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() train_program = fluid.Program() startup_program = fluid.Program() test_program = fluid.Program() hidden_size = 16 with fluid.program_guard(train_program, startup_program): gw = pgl.graph_wrapper.GraphWrapper( name="graph", place=place, node_feat=dataset.graph.node_feat_info()) output = pgl.layers.gcn(gw, gw.node_feat["words"], hidden_size, activation="relu", norm=gw.node_feat['norm'], name="gcn_layer_1") output = fluid.layers.dropout( output, 0.5, dropout_implementation='upscale_in_train') output = pgl.layers.gcn(gw, output, dataset.num_classes, activation=None, norm=gw.node_feat['norm'], name="gcn_layer_2") node_index = fluid.layers.data( "node_index", shape=[None, 1], dtype="int64", append_batch_size=False) node_label = fluid.layers.data( "node_label", shape=[None, 1], dtype="int64", append_batch_size=False) pred = fluid.layers.gather(output, node_index) loss, pred = fluid.layers.softmax_with_cross_entropy( logits=pred, label=node_label, return_softmax=True) acc = fluid.layers.accuracy(input=pred, label=node_label, k=1) loss = fluid.layers.mean(loss) test_program = train_program.clone(for_test=True) with fluid.program_guard(train_program, startup_program): adam = fluid.optimizer.Adam( learning_rate=1e-2, regularization=fluid.regularizer.L2DecayRegularizer( regularization_coeff=0.0005)) adam.minimize(loss) exe = fluid.Executor(place) exe.run(startup_program) feed_dict = gw.to_feed(dataset.graph) train_index = dataset.train_index train_label = np.expand_dims(dataset.y[train_index], -1) train_index = np.expand_dims(train_index, -1) val_index = dataset.val_index val_label = np.expand_dims(dataset.y[val_index], -1) val_index = np.expand_dims(val_index, -1) test_index = dataset.test_index test_label = np.expand_dims(dataset.y[test_index], -1) test_index = np.expand_dims(test_index, -1) dur = [] for epoch in range(200): if epoch >= 3: t0 = time.time() feed_dict["node_index"] = np.array(train_index, dtype="int64") feed_dict["node_label"] = np.array(train_label, dtype="int64") train_loss, train_acc = exe.run(train_program, feed=feed_dict, fetch_list=[loss, acc], return_numpy=True) if epoch >= 3: time_per_epoch = 1.0 * (time.time() - t0) dur.append(time_per_epoch) feed_dict["node_index"] = np.array(val_index, dtype="int64") feed_dict["node_label"] = np.array(val_label, dtype="int64") val_loss, val_acc = exe.run(test_program, feed=feed_dict, fetch_list=[loss, acc], return_numpy=True) log.info("Epoch %d " % epoch + "(%.5lf sec) " % np.mean(dur) + "Train Loss: %f " % train_loss + "Train Acc: %f " % train_acc + "Val Loss: %f " % val_loss + "Val Acc: %f " % val_acc) feed_dict["node_index"] = np.array(test_index, dtype="int64") feed_dict["node_label"] = np.array(test_label, dtype="int64") test_loss, test_acc = exe.run(test_program, feed=feed_dict, fetch_list=[loss, acc], return_numpy=True) log.info("Accuracy: %f" % test_acc)
def main(): env = os.environ FLAGS.dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env if FLAGS.dist: trainer_id = int(env['PADDLE_TRAINER_ID']) local_seed = (99 + trainer_id) random.seed(local_seed) np.random.seed(local_seed) if FLAGS.enable_ce: random.seed(0) np.random.seed(0) cfg = load_config(FLAGS.config) if 'architecture' in cfg: main_arch = cfg.architecture else: raise ValueError("'architecture' not specified in config file.") merge_config(FLAGS.opt) if 'log_iter' not in cfg: cfg.log_iter = 20 # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) # check if paddlepaddle version is satisfied check_version() if not FLAGS.dist or trainer_id == 0: print_total_cfg(cfg) if cfg.use_gpu: devices_num = fluid.core.get_cuda_device_count() else: devices_num = int(os.environ.get('CPU_NUM', 1)) if 'FLAGS_selected_gpus' in env: device_id = int(env['FLAGS_selected_gpus']) else: device_id = 0 place = fluid.CUDAPlace(device_id) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) lr_builder = create('LearningRate') optim_builder = create('OptimizerBuilder') # build program startup_prog = fluid.Program() train_prog = fluid.Program() if FLAGS.enable_ce: startup_prog.random_seed = 1000 train_prog.random_seed = 1000 with fluid.program_guard(train_prog, startup_prog): with fluid.unique_name.guard(): model = create(main_arch) if FLAGS.fp16: assert (getattr(model.backbone, 'norm_type', None) != 'affine_channel'), \ '--fp16 currently does not support affine channel, ' \ ' please modify backbone settings to use batch norm' with mixed_precision_context(FLAGS.loss_scale, FLAGS.fp16) as ctx: inputs_def = cfg['TrainReader']['inputs_def'] feed_vars, train_loader = model.build_inputs(**inputs_def) train_fetches = model.train(feed_vars) loss = train_fetches['loss'] if FLAGS.fp16: loss *= ctx.get_loss_scale_var() lr = lr_builder() optimizer = optim_builder(lr) optimizer.minimize(loss) if FLAGS.fp16: loss /= ctx.get_loss_scale_var() # parse train fetches train_keys, train_values, _ = parse_fetches(train_fetches) train_values.append(lr) if FLAGS.eval: eval_prog = fluid.Program() with fluid.program_guard(eval_prog, startup_prog): with fluid.unique_name.guard(): model = create(main_arch) inputs_def = cfg['EvalReader']['inputs_def'] feed_vars, eval_loader = model.build_inputs(**inputs_def) fetches = model.eval(feed_vars) eval_prog = eval_prog.clone(True) eval_reader = create_reader(cfg.EvalReader) eval_loader.set_sample_list_generator(eval_reader, place) # parse eval fetches extra_keys = [] if cfg.metric == 'COCO': extra_keys = ['im_info', 'im_id', 'im_shape'] if cfg.metric == 'VOC': extra_keys = ['gt_bbox', 'gt_class', 'is_difficult'] if cfg.metric == 'WIDERFACE': extra_keys = ['im_id', 'im_shape', 'gt_bbox'] eval_keys, eval_values, eval_cls = parse_fetches(fetches, eval_prog, extra_keys) # compile program for multi-devices build_strategy = fluid.BuildStrategy() build_strategy.fuse_all_optimizer_ops = False # only enable sync_bn in multi GPU devices sync_bn = getattr(model.backbone, 'norm_type', None) == 'sync_bn' build_strategy.sync_batch_norm = sync_bn and devices_num > 1 \ and cfg.use_gpu exec_strategy = fluid.ExecutionStrategy() # iteration number when CompiledProgram tries to drop local execution scopes. # Set it to be 1 to save memory usages, so that unused variables in # local execution scopes can be deleted after each iteration. exec_strategy.num_iteration_per_drop_scope = 1 if FLAGS.dist: dist_utils.prepare_for_multi_process(exe, build_strategy, startup_prog, train_prog) exec_strategy.num_threads = 1 exe.run(startup_prog) compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) if FLAGS.eval: compiled_eval_prog = fluid.compiler.CompiledProgram(eval_prog) fuse_bn = getattr(model.backbone, 'norm_type', None) == 'affine_channel' ignore_params = cfg.finetune_exclude_pretrained_params \ if 'finetune_exclude_pretrained_params' in cfg else [] start_iter = 0 if FLAGS.resume_checkpoint: checkpoint.load_checkpoint(exe, train_prog, FLAGS.resume_checkpoint) start_iter = checkpoint.global_step() elif cfg.pretrain_weights and fuse_bn and not ignore_params: checkpoint.load_and_fusebn(exe, train_prog, cfg.pretrain_weights) elif cfg.pretrain_weights: checkpoint.load_params( exe, train_prog, cfg.pretrain_weights, ignore_params=ignore_params) train_reader = create_reader(cfg.TrainReader, (cfg.max_iters - start_iter) * devices_num, cfg) train_loader.set_sample_list_generator(train_reader, place) # whether output bbox is normalized in model output layer is_bbox_normalized = False if hasattr(model, 'is_bbox_normalized') and \ callable(model.is_bbox_normalized): is_bbox_normalized = model.is_bbox_normalized() # if map_type not set, use default 11point, only use in VOC eval map_type = cfg.map_type if 'map_type' in cfg else '11point' train_stats = TrainingStats(cfg.log_smooth_window, train_keys) train_loader.start() start_time = time.time() end_time = time.time() cfg_name = os.path.basename(FLAGS.config).split('.')[0] save_dir = os.path.join(cfg.save_dir, cfg_name) time_stat = deque(maxlen=cfg.log_smooth_window) best_box_ap_list = [0.0, 0] #[map, iter] # use tb-paddle to log data if FLAGS.use_tb: from tb_paddle import SummaryWriter tb_writer = SummaryWriter(FLAGS.tb_log_dir) tb_loss_step = 0 tb_mAP_step = 0 for it in range(start_iter, cfg.max_iters): start_time = end_time end_time = time.time() time_stat.append(end_time - start_time) time_cost = np.mean(time_stat) eta_sec = (cfg.max_iters - it) * time_cost eta = str(datetime.timedelta(seconds=int(eta_sec))) outs = exe.run(compiled_train_prog, fetch_list=train_values) stats = {k: np.array(v).mean() for k, v in zip(train_keys, outs[:-1])} # use tb-paddle to log loss if FLAGS.use_tb: if it % cfg.log_iter == 0: for loss_name, loss_value in stats.items(): tb_writer.add_scalar(loss_name, loss_value, tb_loss_step) tb_loss_step += 1 train_stats.update(stats) logs = train_stats.log() if it % cfg.log_iter == 0 and (not FLAGS.dist or trainer_id == 0): strs = 'iter: {}, lr: {:.6f}, {}, time: {:.3f}, eta: {}'.format( it, np.mean(outs[-1]), logs, time_cost, eta) logger.info(strs) if (it > 0 and it % cfg.snapshot_iter == 0 or it == cfg.max_iters - 1) \ and (not FLAGS.dist or trainer_id == 0): save_name = str(it) if it != cfg.max_iters - 1 else "model_final" checkpoint.save(exe, train_prog, os.path.join(save_dir, save_name)) if FLAGS.eval: # evaluation results = eval_run(exe, compiled_eval_prog, eval_loader, eval_keys, eval_values, eval_cls) resolution = None if 'mask' in results[0]: resolution = model.mask_head.resolution box_ap_stats = eval_results( results, cfg.metric, cfg.num_classes, resolution, is_bbox_normalized, FLAGS.output_eval, map_type, cfg['EvalReader']['dataset']) # use tb_paddle to log mAP if FLAGS.use_tb: tb_writer.add_scalar("mAP", box_ap_stats[0], tb_mAP_step) tb_mAP_step += 1 if box_ap_stats[0] > best_box_ap_list[0]: best_box_ap_list[0] = box_ap_stats[0] best_box_ap_list[1] = it checkpoint.save(exe, train_prog, os.path.join(save_dir, "best_model")) logger.info("Best test box ap: {}, in iter: {}".format( best_box_ap_list[0], best_box_ap_list[1])) train_loader.reset()
def context(self, trainable=True, pretrained=True, get_prediction=False): """ Distill the Head Features, so as to perform transfer learning. Args: trainable (bool): whether to set parameters trainable. pretrained (bool): whether to load default pretrained model. get_prediction (bool): whether to get prediction. Returns: inputs(dict): the input variables. outputs(dict): the output variables. context_prog (Program): the program to execute transfer learning. """ context_prog = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(context_prog, startup_program): with fluid.unique_name.guard(): # image image = fluid.layers.data(name='image', shape=[3, 300, 300], dtype='float32') # backbone backbone = MobileNet(**self.mobilenet_config) # body_feats body_feats = backbone(image) # im_size im_size = fluid.layers.data(name='im_size', shape=[2], dtype='int32') # var_prefix var_prefix = '@HUB_{}@'.format(self.name) # names of inputs inputs = { 'image': var_prefix + image.name, 'im_size': var_prefix + im_size.name } # names of outputs if get_prediction: locs, confs, box, box_var = fluid.layers.multi_box_head( inputs=body_feats, image=image, num_classes=21, **self.multi_box_head_config) pred = fluid.layers.detection_output( loc=locs, scores=confs, prior_box=box, prior_box_var=box_var, **self.output_decoder_config) outputs = {'bbox_out': [var_prefix + pred.name]} else: outputs = { 'body_features': [var_prefix + var.name for var in body_feats] } # add_vars_prefix add_vars_prefix(context_prog, var_prefix) add_vars_prefix(fluid.default_startup_program(), var_prefix) # inputs inputs = { key: context_prog.global_block().vars[value] for key, value in inputs.items() } outputs = { out_key: [ context_prog.global_block().vars[varname] for varname in out_value ] for out_key, out_value in outputs.items() } # trainable for param in context_prog.global_block().iter_parameters(): param.trainable = trainable place = fluid.CPUPlace() exe = fluid.Executor(place) # pretrained if pretrained: def _if_exist(var): return os.path.exists( os.path.join(self.default_pretrained_model_path, var.name)) fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) else: exe.run(startup_program) return inputs, outputs, context_prog
loss = fluid.layers.reduce_mean(loss) optimizer = fluid.optimizer.AdamOptimizer(learning_rate = 0.01) optimizer.minimize(loss) return loss else: return input_text_hidden startup_program = fluid.Program() train_program = fluid.Program() test_program = fluid.Program() with fluid.program_guard(train_program, startup_program): with fluid.unique_name.guard(): loss = build_model(is_training = True) exe = fluid.Executor(fluid.CPUPlace()) exe.run(startup_program) step = 0 for in_text, in_re_text, in_label, in_len in build_batch(batch_size, max_len, epochs, train_reader): out = exe.run(program = train_program, feed = {"text": in_text, "label": in_label, "text_len": in_len, "re_text": in_re_text}, fetch_list = [loss.name]) print("step %d, loss %.5f" % (step, out[0][0])) step += 1
def test_errors(self): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): # test paddle.nn.HSigmoidLoss self.assertRaises(ValueError, paddle.nn.HSigmoidLoss, 6, 1) # test paddle.nn.functional.hsigmoid_loss x = paddle.static.data('x', [4, 6]) label = paddle.static.data('label', [4, 1], 'int64') weight = paddle.static.data('weight', [7, 6]) bias = paddle.static.data('bias', [7]) x_int32 = paddle.static.data('x_int32', [4, 6], 'int32') self.assertRaises(TypeError, F.hsigmoid_loss, x_int32, label, 8, weight) label_float32 = paddle.static.data('label_float32', [4, 1], 'float32') self.assertRaises(TypeError, F.hsigmoid_loss, x, label_float32, 8, weight) weight_int32 = paddle.static.data('weight_int32', [7, 6], 'int32') self.assertRaises(TypeError, F.hsigmoid_loss, x, label, 8, weight_int32) bias_int32 = paddle.static.data('bias_int32', [7], 'int32') self.assertRaises(TypeError, F.hsigmoid_loss, x, label, 8, weight, bias=bias_int32) path_table_int32 = paddle.static.data('path_table_int32', [7], 'int32') self.assertRaises(TypeError, F.hsigmoid_loss, x, label, 8, weight, path_table=path_table_int32) path_code_int32 = paddle.static.data('path_code_int32', [7], 'int32') self.assertRaises(TypeError, F.hsigmoid_loss, x, label, 8, weight, path_code=path_code_int32) # test paddle.nn.HSigmoidLoss paddle.disable_static(self.place) x_arr = np.array([], dtype=np.float32) x = paddle.to_tensor(np.reshape(x_arr, (100000, 0))) label = paddle.to_tensor(0, dtype='int64') self.assertRaises(ValueError, paddle.nn.HSigmoidLoss, x, label) # test paddle.nn.functional.hsigmoid_loss x = paddle.to_tensor(np.reshape(x_arr, (10, 0)), dtype='float32') label = paddle.to_tensor([], dtype='int64') weight = paddle.to_tensor([], dtype='float32') self.assertRaises(ValueError, F.hsigmoid_loss, x, label, 0, weight) paddle.enable_static() # test paddle.fluid.layers.hsigmoid with program_guard(Program()): label = fluid.data('label', [4, 1], 'int64') # The input type must be Variable. self.assertRaises(TypeError, fluid.layers.hsigmoid, 1, label, 2) # The input dtype must be float16, float32, float64. x_int32 = fluid.data(name='x_int32', shape=[4, 3], dtype='int32') self.assertRaises(TypeError, fluid.layers.hsigmoid, x_int32, label, 2) # support the input dtype is float32 x_fp32 = fluid.data(name='x_fp32', shape=[4, 3], dtype='float32') fluid.layers.hsigmoid(x_fp32, label, 2) # The label type must be Variable. self.assertRaises(TypeError, fluid.layers.hsigmoid, x_fp32, 1, 2) # The label dtype must be int64. label_int32 = fluid.data('label_int32', [4, 1], 'int32') self.assertRaises(TypeError, fluid.layers.hsigmoid, x_fp32, label_int32, 2)
def context(self, trainable=True, pretrained=True, override_params=None, phase='train'): """context for transfer learning. Args: trainable (bool): Set parameters in program to be trainable. pretrained (bool) : Whether to load pretrained model. Returns: inputs (dict): key is 'image', corresponding vaule is image tensor. outputs (dict): key is : 'classification', corresponding value is the result of classification. 'feature_map', corresponding value is the result of the layer before the fully connected layer. context_prog (fluid.Program): program for transfer learning. """ if phase in ["dev", "test", "predict", "eval"]: is_test = True elif phase in ["train"]: is_test = False else: raise ValueError( "Phase %s is error, which must be one of train, dev, test, eval and predict." % phase) context_prog = fluid.Program() startup_prog = fluid.Program() with fluid.program_guard(context_prog, startup_prog): with fluid.unique_name.guard(): image = fluid.layers.data(name="image", shape=[3, 224, 224], dtype="float32") efficientnet_b4 = EfficientNetB4( override_params=override_params) output, feature_map = efficientnet_b4.net(input=image, class_dim=len( self.label_list), is_test=is_test) name_prefix = '@HUB_{}@'.format(self.name) inputs = {'image': name_prefix + image.name} outputs = { 'classification': name_prefix + output.name, 'feature_map': name_prefix + feature_map.name } add_vars_prefix(context_prog, name_prefix) add_vars_prefix(startup_prog, name_prefix) global_vars = context_prog.global_block().vars inputs = { key: global_vars[value] for key, value in inputs.items() } outputs = { key: global_vars[value] for key, value in outputs.items() } place = fluid.CPUPlace() exe = fluid.Executor(place) # pretrained if pretrained: def _if_exist(var): b = os.path.exists( os.path.join(self.default_pretrained_model_path, var.name)) return b fluid.io.load_vars(exe, self.default_pretrained_model_path, context_prog, predicate=_if_exist) else: exe.run(startup_prog) # trainable for param in context_prog.global_block().iter_parameters(): param.trainable = trainable return inputs, outputs, context_prog
def main(args): bert_config = BertConfig(args.bert_config_path) bert_config.print_config() if args.use_xpu: paddle.enable_static() if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = get_device_num() elif args.use_xpu: xpu_id = int(os.getenv('FLAGS_selected_xpus', '0')) place = fluid.XPUPlace(xpu_id) dev_count = len([place]) else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exe = fluid.Executor(place) task_name = args.task_name.lower() processors = { 'xnli': reader.XnliProcessor, 'cola': reader.ColaProcessor, 'mrpc': reader.MrpcProcessor, 'mnli': reader.MnliProcessor, } processor = processors[task_name](data_dir=args.data_dir, vocab_path=args.vocab_path, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=args.in_tokens, random_seed=args.random_seed) num_labels = len(processor.get_labels()) if not (args.do_train or args.do_val or args.do_test): raise ValueError("For args `do_train`, `do_val` and `do_test`, at " "least one of them must be True.") train_program = fluid.Program() startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed train_program.random_seed = args.random_seed if args.do_train: # NOTE: If num_trainers > 1, the shuffle_seed must be set, because # the order of batch data generated by reader # must be the same in the respective processes. shuffle_seed = 1 if num_trainers > 1 else None train_data_generator = processor.data_generator( batch_size=args.batch_size, phase='train', epoch=args.epoch, dev_count=dev_count, shuffle=args.shuffle, shuffle_seed=shuffle_seed) num_train_examples = processor.get_num_examples(phase='train') if args.in_tokens: max_train_steps = args.epoch * num_train_examples // ( args.batch_size // args.max_seq_len) // dev_count else: max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count warmup_steps = int(max_train_steps * args.warmup_proportion) print("Device count: %d" % dev_count) print("Num train examples: %d" % num_train_examples) print("Max train steps: %d" % max_train_steps) print("Num warmup steps: %d" % warmup_steps) with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_data_loader, loss, probs, accuracy, num_seqs = create_model( args, bert_config=bert_config, num_labels=num_labels) scheduled_lr, loss_scaling = optimization( loss=loss, warmup_steps=warmup_steps, num_train_steps=max_train_steps, learning_rate=args.learning_rate, train_program=train_program, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16, use_dynamic_loss_scaling=args.use_dynamic_loss_scaling, init_loss_scaling=args.init_loss_scaling, incr_every_n_steps=args.incr_every_n_steps, decr_every_n_nan_or_inf=args.decr_every_n_nan_or_inf, incr_ratio=args.incr_ratio, decr_ratio=args.decr_ratio) if args.do_val: dev_prog = fluid.Program() with fluid.program_guard(dev_prog, startup_prog): with fluid.unique_name.guard(): dev_data_loader, loss, probs, accuracy, num_seqs = create_model( args, bert_config=bert_config, num_labels=num_labels) dev_prog = dev_prog.clone(for_test=True) dev_data_loader.set_batch_generator( processor.data_generator( batch_size=args.batch_size, phase='dev', epoch=1, dev_count=1, shuffle=False), place) if args.do_test: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_data_loader, loss, probs, accuracy, num_seqs = create_model( args, bert_config=bert_config, num_labels=num_labels) test_prog = test_prog.clone(for_test=True) test_data_loader.set_batch_generator( processor.data_generator( batch_size=args.batch_size, phase='test', epoch=1, dev_count=1, shuffle=False), place) exe.run(startup_prog) if args.do_train: if args.init_checkpoint and args.init_pretraining_params: print( "WARNING: args 'init_checkpoint' and 'init_pretraining_params' " "both are set! Only arg 'init_checkpoint' is made valid.") if args.init_checkpoint: init_checkpoint( exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) elif args.init_pretraining_params: init_pretraining_params( exe, args.init_pretraining_params, main_program=startup_prog, use_fp16=args.use_fp16) elif args.do_val or args.do_test: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") init_checkpoint( exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) if args.do_train: exec_strategy = fluid.ExecutionStrategy() exec_strategy.use_experimental_executor = args.use_fast_executor exec_strategy.num_threads = dev_count exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope build_strategy = fluid.BuildStrategy() if args.use_cuda and num_trainers > 1: assert shuffle_seed is not None dist_utils.prepare_for_multi_process(exe, build_strategy, train_program) train_data_generator = fluid.contrib.reader.distributed_batch_reader( train_data_generator) if args.use_xpu: train_compiled_program = train_program else: train_compiled_program = fluid.CompiledProgram(train_program).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) train_data_loader.set_batch_generator(train_data_generator, place) if args.do_train: train_data_loader.start() steps = 0 total_cost, total_acc, total_num_seqs = [], [], [] time_begin = time.time() throughput = [] ce_info = [] total_batch_num=0 # used for benchmark interval_seq_num = 0 while True: try: steps += 1 total_batch_num += 1 # used for benchmark if args.max_iter and total_batch_num == args.max_iter: # used for benchmark return if args.use_fp16: fetch_list = [loss.name, accuracy.name, scheduled_lr.name, num_seqs.name, loss_scaling.name] else: fetch_list = [loss.name, accuracy.name, scheduled_lr.name, num_seqs.name] outputs = exe.run(train_compiled_program, fetch_list=fetch_list) interval_seq_num += np.sum( outputs[3] ) # get the sequence number if steps % args.skip_steps == 0: if args.use_fp16: np_loss, np_acc, np_lr, np_num_seqs, np_scaling = outputs else: np_loss, np_acc, np_lr, np_num_seqs = outputs total_cost.extend(np_loss * np_num_seqs) total_acc.extend(np_acc * np_num_seqs) total_num_seqs.extend(np_num_seqs) if args.verbose: verbose = "train data_loader queue size: %d, " % train_data_loader.queue.size( ) verbose += "learning rate: %f" % np_lr[0] if args.use_fp16: verbose += ", loss scaling: %f" % np_scaling[0] print(verbose) current_example, current_epoch = processor.get_train_progress( ) time_end = time.time() used_time = time_end - time_begin # profiler tools if args.is_profiler and current_epoch == 0 and steps == args.skip_steps: profiler.start_profiler("All") elif args.is_profiler and current_epoch == 0 and steps == args.skip_steps * 2: profiler.stop_profiler("total", args.profiler_path) return log_record = "epoch: {}, progress: {}/{}, step: {}, ave loss: {}, ave acc: {}".format( current_epoch, current_example, num_train_examples, steps, np.sum(total_cost) / np.sum(total_num_seqs), np.sum(total_acc) / np.sum(total_num_seqs)) ce_info.append([np.sum(total_cost) / np.sum(total_num_seqs), np.sum(total_acc) / np.sum(total_num_seqs), used_time]) if steps > 0 : throughput.append( args.skip_steps / used_time) log_record = log_record + ", speed: %f steps/s" % (args.skip_steps / used_time) + ", ips: %f sequence/s" % ( interval_seq_num / used_time ) print(log_record) else: print(log_record) total_cost, total_acc, total_num_seqs = [], [], [] interval_seq_num = 0 time_begin = time.time() if steps % args.save_steps == 0: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.save(program=train_program, model_path=save_path) if steps % args.validation_steps == 0: print("Average throughtput: %s" % (np.average(throughput))) throughput = [] # evaluate dev set if args.do_val: evaluate(exe, dev_prog, dev_data_loader, [loss.name, accuracy.name, num_seqs.name], "dev") # evaluate test set if args.do_test: evaluate(exe, test_prog, test_data_loader, [loss.name, accuracy.name, num_seqs.name], "test") except fluid.core.EOFException: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.save(program=train_program, model_path=save_path) train_data_loader.reset() break if args.enable_ce: card_num = get_cards() ce_cost = 0 ce_acc = 0 ce_time = 0 try: ce_cost = ce_info[-2][0] ce_acc = ce_info[-2][1] ce_time = ce_info[-2][2] except: print("ce info error") print("kpis\ttrain_duration_%s_card%s\t%s" % (args.task_name, card_num, ce_time)) print("kpis\ttrain_cost_%s_card%s\t%f" % (args.task_name, card_num, ce_cost)) print("kpis\ttrain_acc_%s_card%s\t%f" % (args.task_name, card_num, ce_acc)) # final eval on dev set if args.do_val: print("Final validation result:") evaluate(exe, dev_prog, dev_data_loader, [loss.name, accuracy.name, num_seqs.name], "dev") # final eval on test set if args.do_test: print("Final test result:") evaluate(exe, test_prog, test_data_loader, [loss.name, accuracy.name, num_seqs.name], "test")
def test_type_error(self): paddle.enable_static() with program_guard(Program(), Program()): x = [paddle.randn([3, 3]), paddle.randn([3, 3])] # not support to assign list(var) self.assertRaises(TypeError, paddle.assign, x)
def freeze_graph(self, use_cuda, seed, activation_quant_type, weight_quant_type='abs_max', for_ci=False): def build_program(main, startup, is_test): main.random_seed = seed startup.random_seed = seed with fluid.unique_name.guard(): with fluid.program_guard(main, startup): img = fluid.layers.data( name='image', shape=[1, 28, 28], dtype='float32') label = fluid.layers.data( name='label', shape=[1], dtype='int64') loss = conv_net(img, label) if not is_test: opt = fluid.optimizer.Adam(learning_rate=0.001) opt.minimize(loss) return [img, label], loss random.seed(0) np.random.seed(0) main = fluid.Program() startup = fluid.Program() test_program = fluid.Program() feeds, loss = build_program(main, startup, False) build_program(test_program, startup, True) test_program = test_program.clone(for_test=True) main_graph = IrGraph(core.Graph(main.desc), for_test=False) test_graph = IrGraph(core.Graph(test_program.desc), for_test=True) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) scope = fluid.Scope() with fluid.scope_guard(scope): exe.run(startup) transform_pass = QuantizationTransformPass( scope=scope, place=place, activation_quantize_type=activation_quant_type, weight_quantize_type=weight_quant_type) transform_pass.apply(main_graph) transform_pass.apply(test_graph) dev_name = '_gpu_' if use_cuda else '_cpu_' if not for_ci: marked_nodes = set() for op in main_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) main_graph.draw('.', 'main' + dev_name + activation_quant_type + '_' + weight_quant_type, marked_nodes) marked_nodes = set() for op in test_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) test_graph.draw('.', 'test' + dev_name + activation_quant_type + '_' + weight_quant_type, marked_nodes) build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = False binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) quantized_test_program = test_graph.to_program() iters = 5 batch_size = 8 train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=500), batch_size=batch_size) test_reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=batch_size) feeder = fluid.DataFeeder(feed_list=feeds, place=place) with fluid.scope_guard(scope): for _ in range(iters): data = next(train_reader()) loss_v = exe.run(binary, feed=feeder.feed(data), fetch_list=[loss]) if not for_ci: print('{}: {}'.format('loss' + dev_name + activation_quant_type + '_' + weight_quant_type, loss_v)) test_data = next(test_reader()) with fluid.program_guard(quantized_test_program): w_var = fluid.framework._get_var('conv2d_1.w_0.quantized', quantized_test_program) # Testing with fluid.scope_guard(scope): test_loss1, w_quant = exe.run(program=quantized_test_program, feed=feeder.feed(test_data), fetch_list=[loss, w_var]) # Freeze graph for inference, but the weight of fc/conv is still float type. freeze_pass = QuantizationFreezePass( scope=scope, place=place, weight_quantize_type=weight_quant_type) freeze_pass.apply(test_graph) if not for_ci: marked_nodes = set() for op in test_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) test_graph.draw('.', 'test_freeze' + dev_name + activation_quant_type + '_' + weight_quant_type, marked_nodes) server_program = test_graph.to_program() with fluid.scope_guard(scope): test_loss2, = exe.run(program=server_program, feed=feeder.feed(test_data), fetch_list=[loss]) self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3) if not for_ci: print( '{}: {}'.format('test_loss1' + dev_name + activation_quant_type + '_' + weight_quant_type, test_loss1)) print( '{}: {}'.format('test_loss2' + dev_name + activation_quant_type + '_' + weight_quant_type, test_loss2)) w_freeze = np.array(scope.find_var('conv2d_1.w_0').get_tensor()) # Maybe failed, this is due to the calculation precision # self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant)) if not for_ci: print('{}: {}'.format('w_freeze' + dev_name + activation_quant_type + '_' + weight_quant_type, np.sum(w_freeze))) print('{}: {}'.format('w_quant' + dev_name + activation_quant_type + '_' + weight_quant_type, np.sum(w_quant))) # Convert parameter to 8-bit. convert_int8_pass = ConvertToInt8Pass(scope=scope, place=place) convert_int8_pass.apply(test_graph) if not for_ci: marked_nodes = set() for op in test_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) test_graph.draw('.', 'test_int8' + dev_name + activation_quant_type + '_' + weight_quant_type, marked_nodes) server_program_int8 = test_graph.to_program() # Save the 8-bit parameter and model file. with fluid.scope_guard(scope): fluid.io.save_inference_model( 'server_int8' + dev_name + activation_quant_type + '_' + weight_quant_type, ['image', 'label'], [loss], exe, server_program_int8) # Test whether the 8-bit parameter and model file can be loaded successfully. [infer, feed, fetch] = fluid.io.load_inference_model( 'server_int8' + dev_name + activation_quant_type + '_' + weight_quant_type, exe) # Check the loaded 8-bit weight. w_8bit = np.array(scope.find_var('conv2d_1.w_0.int8').get_tensor()) self.assertEqual(w_8bit.dtype, np.int8) self.assertEqual(np.sum(w_8bit), np.sum(w_freeze)) if not for_ci: print('{}: {}'.format('w_8bit' + dev_name + activation_quant_type + '_' + weight_quant_type, np.sum(w_8bit))) print('{}: {}'.format('w_freeze' + dev_name + activation_quant_type + '_' + weight_quant_type, np.sum(w_freeze))) mobile_pass = TransformForMobilePass() mobile_pass.apply(test_graph) if not for_ci: marked_nodes = set() for op in test_graph.all_op_nodes(): if op.name().find('quantize') > -1: marked_nodes.add(op) test_graph.draw('.', 'test_mobile' + dev_name + activation_quant_type + '_' + weight_quant_type, marked_nodes) mobile_program = test_graph.to_program() with fluid.scope_guard(scope): fluid.io.save_inference_model( 'mobile_int8' + dev_name + activation_quant_type + '_' + weight_quant_type, ['image', 'label'], [loss], exe, mobile_program)
def main(args): ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: dev_list = fluid.cuda_places() place = dev_list[0] dev_count = len(dev_list) else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) reader = task_reader.SequenceLabelReader( vocab_path=args.vocab_path, label_map_config=args.label_map_config, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case, in_tokens=args.in_tokens, random_seed=args.random_seed, task_id=args.task_id) if not (args.do_train or args.do_val or args.do_test): raise ValueError("For args `do_train`, `do_val` and `do_test`, at " "least one of them must be True.") startup_prog = fluid.Program() if args.random_seed is not None: startup_prog.random_seed = args.random_seed if args.do_train: train_data_generator = reader.data_generator( input_file=args.train_set, batch_size=args.batch_size, epoch=args.epoch, shuffle=True, phase="train") num_train_examples = reader.get_num_examples(args.train_set) if args.in_tokens: if args.batch_size < args.max_seq_len: raise ValueError( 'if in_tokens=True, batch_size should greater than max_sqelen, got batch_size:%d seqlen:%d' % (args.batch_size, args.max_seq_len)) max_train_steps = args.epoch * num_train_examples // ( args.batch_size // args.max_seq_len) // dev_count else: max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count warmup_steps = int(max_train_steps * args.warmup_proportion) log.info("Device count: %d" % dev_count) log.info("Num train examples: %d" % num_train_examples) log.info("Max train steps: %d" % max_train_steps) log.info("Num warmup steps: %d" % warmup_steps) train_program = fluid.Program() with fluid.program_guard(train_program, startup_prog): with fluid.unique_name.guard(): train_pyreader, graph_vars = create_model( args, pyreader_name='train_reader', ernie_config=ernie_config) scheduled_lr, loss_scaling = optimization( loss=graph_vars["loss"], warmup_steps=warmup_steps, num_train_steps=max_train_steps, learning_rate=args.learning_rate, train_program=train_program, startup_prog=startup_prog, weight_decay=args.weight_decay, scheduler=args.lr_scheduler, use_fp16=args.use_fp16, use_dynamic_loss_scaling=args.use_dynamic_loss_scaling, init_loss_scaling=args.init_loss_scaling, incr_every_n_steps=args.incr_every_n_steps, decr_every_n_nan_or_inf=args.decr_every_n_nan_or_inf, incr_ratio=args.incr_ratio, decr_ratio=args.decr_ratio) if args.verbose: if args.in_tokens: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size // args.max_seq_len) else: lower_mem, upper_mem, unit = fluid.contrib.memory_usage( program=train_program, batch_size=args.batch_size) log.info("Theoretical memory usage in training: %.3f - %.3f %s" % (lower_mem, upper_mem, unit)) if args.do_val or args.do_test: test_prog = fluid.Program() with fluid.program_guard(test_prog, startup_prog): with fluid.unique_name.guard(): test_pyreader, graph_vars = create_model( args, pyreader_name='test_reader', ernie_config=ernie_config) test_prog = test_prog.clone(for_test=True) nccl2_num_trainers = 1 nccl2_trainer_id = 0 if args.is_distributed: trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) worker_endpoints_env = os.getenv("PADDLE_TRAINER_ENDPOINTS") current_endpoint = os.getenv("PADDLE_CURRENT_ENDPOINT") worker_endpoints = worker_endpoints_env.split(",") trainers_num = len(worker_endpoints) log.info("worker_endpoints:{} trainers_num:{} current_endpoint:{} \ trainer_id:{}".format(worker_endpoints, trainers_num, current_endpoint, trainer_id)) # prepare nccl2 env. config = fluid.DistributeTranspilerConfig() config.mode = "nccl2" t = fluid.DistributeTranspiler(config=config) t.transpile(trainer_id, trainers=worker_endpoints_env, current_endpoint=current_endpoint, program=train_program if args.do_train else test_prog, startup_program=startup_prog) nccl2_num_trainers = trainers_num nccl2_trainer_id = trainer_id exe = fluid.Executor(place) exe.run(startup_prog) if args.do_train: if args.init_checkpoint and args.init_pretraining_params: log.info( "WARNING: args 'init_checkpoint' and 'init_pretraining_params' " "both are set! Only arg 'init_checkpoint' is made valid.") if args.init_checkpoint: init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) elif args.init_pretraining_params: init_pretraining_params(exe, args.init_pretraining_params, main_program=startup_prog, use_fp16=args.use_fp16) elif args.do_val or args.do_test: if not args.init_checkpoint: raise ValueError("args 'init_checkpoint' should be set if" "only doing validation or testing!") init_checkpoint(exe, args.init_checkpoint, main_program=startup_prog, use_fp16=args.use_fp16) if args.do_train: exec_strategy = fluid.ExecutionStrategy() if args.use_fast_executor: exec_strategy.use_experimental_executor = True exec_strategy.num_threads = dev_count exec_strategy.num_iteration_per_drop_scope = args.num_iteration_per_drop_scope train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, loss_name=graph_vars["loss"].name, exec_strategy=exec_strategy, main_program=train_program, num_trainers=nccl2_num_trainers, trainer_id=nccl2_trainer_id) train_pyreader.decorate_tensor_provider(train_data_generator) else: train_exe = None if args.do_val or args.do_test: test_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, main_program=test_prog, share_vars_from=train_exe) if args.do_train: train_pyreader.start() steps = 0 graph_vars["learning_rate"] = scheduled_lr time_begin = time.time() while True: try: steps += 1 if steps % args.skip_steps != 0: train_exe.run(fetch_list=[]) else: fetch_list = [ graph_vars["num_infer"].name, graph_vars["num_label"].name, graph_vars["num_correct"].name, graph_vars["loss"].name, graph_vars['learning_rate'].name, ] out = train_exe.run(fetch_list=fetch_list) num_infer, num_label, num_correct, np_loss, np_lr = out lr = float(np_lr[0]) loss = np_loss.mean() precision, recall, f1 = calculate_f1( num_label, num_infer, num_correct) if args.verbose: log.info( "train pyreader queue size: %d, learning rate: %f" % (train_pyreader.queue.size(), lr if warmup_steps > 0 else args.learning_rate)) current_example, current_epoch = reader.get_train_progress( ) time_end = time.time() used_time = time_end - time_begin log.info( "epoch: %d, progress: %d/%d, step: %d, loss: %f, " "f1: %f, precision: %f, recall: %f, speed: %f steps/s" % (current_epoch, current_example, num_train_examples, steps, loss, f1, precision, recall, args.skip_steps / used_time)) time_begin = time.time() if nccl2_trainer_id == 0 and steps % args.save_steps == 0: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) if nccl2_trainer_id == 0 and steps % args.validation_steps == 0: # evaluate dev set if args.do_val: evaluate_wrapper(reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, steps) # evaluate test set if args.do_test: predict_wrapper(reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, steps) except fluid.core.EOFException: save_path = os.path.join(args.checkpoints, "step_" + str(steps)) fluid.io.save_persistables(exe, save_path, train_program) train_pyreader.reset() break # final eval on dev set if nccl2_trainer_id == 0 and args.do_val: evaluate_wrapper(reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, 'final') if nccl2_trainer_id == 0 and args.do_test: predict_wrapper(reader, exe, test_prog, test_pyreader, graph_vars, current_epoch, 'final')
def test_with_place(place, shape, begin_norm_axis): # attr epsilon = 0.00001 x_shape = shape D = reduce(mul, x_shape[begin_norm_axis:len(x_shape)], 1) scale_shape = [D] np.random.seed(123) x = np.random.random_sample(x_shape).astype(np.float32) scale = np.random.random_sample(scale_shape).astype( np.float32) if has_scale else None bias = np.random.random_sample(scale_shape).astype( np.float32) if has_bias else None y_grad = (np.random.random_sample(x_shape) * y_grad_scale).astype( np.float32) # reference forward & backward y, mean, variance = _reference_layer_norm_naive( x, scale, bias, epsilon, begin_norm_axis) x_grad, scale_grad, bias_grad = _reference_layer_norm_grad( x, y_grad, scale, bias, mean, variance, begin_norm_axis) var_dict = locals() var_dict['y@GRAD'] = y_grad var_names = ['x', 'mean', 'variance', 'y', 'y@GRAD'] if has_scale: var_names += ['scale'] if has_bias: var_names += ['bias'] ground_truth = {name: var_dict[name] for name in var_names} program = fluid.Program() with fluid.program_guard(program): block = program.global_block() for name in ground_truth: block.create_var(name=name, dtype='float32', shape=ground_truth[name].shape) inputs = {"X": block.var('x')} fetch_list = [ 'y', 'mean', 'variance', 'x@GRAD', ] if has_scale: inputs["Scale"] = block.var('scale') fetch_list += ['scale@GRAD'] if has_bias: inputs["Bias"] = block.var('bias') fetch_list += ['bias@GRAD'] layer_norm_op = block.append_op( type="layer_norm", inputs=inputs, outputs={ "Y": block.var('y'), "Mean": block.var('mean'), # share the same memory "Variance": block.var('variance'), # share the same memory }, attrs={ "epsilon": epsilon, "begin_norm_axis": begin_norm_axis }) # generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( layer_norm_op.desc, set(), []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) for var_name in grad_op_desc.output_arg_names(): block.desc.var(var_name.encode("ascii")) grad_op_desc.infer_var_type(block.desc) grad_op_desc.infer_shape(block.desc) for arg in grad_op_desc.output_arg_names(): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) program._sync_with_cpp() exe = fluid.Executor(place) out = exe.run(program, feed={ name: var_dict[name] for name in ['x', 'scale', 'bias', 'y@GRAD'] }, fetch_list=fetch_list) self.__assert_close(y, out[0], "y") self.__assert_close(mean, out[1], "mean") self.__assert_close(variance, out[2], "variance", 1e-3) self.__assert_close(x_grad, out[3], "x_grad") if has_scale: self.__assert_close(scale_grad, out[fetch_list.index('scale@GRAD')], "scale_grad", 1e-3) if has_bias: self.__assert_close(bias_grad, out[fetch_list.index('bias@GRAD')], "bias_grad")
def check_network_convergence(self, method, use_cuda=True, memory_opt=True, iter=50, batch_size=None, allow_op_delay=False, feed_dict=None, seed=None, use_parallel_executor=True, use_reduce=False, use_ir_memory_optimize=True, enable_inplace=True, fuse_elewise_add_act_ops=False, fuse_relu_depthwise_conv=False, optimizer=fluid.optimizer.Adam, use_fast_executor=False, enable_sequential_execution=False): def run_executor(exe, binary, feed, fetch_list): res = exe.run(binary, feed=feed, fetch_list=fetch_list) return res main = fluid.Program() startup = fluid.Program() startup.random_seed = 1 # Fix random seed main.random_seed = 1 with fluid.program_guard(main, startup): if seed is not None: startup.random_seed = seed main.random_seed = seed loss = method(use_feed=feed_dict is not None) if optimizer: optimizer().minimize(loss) if memory_opt: fluid.memory_optimize(main) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup) exec_strategy = fluid.ExecutionStrategy() exec_strategy.allow_op_delay = allow_op_delay if use_fast_executor: exec_strategy.use_experimental_executor = True build_strategy = fluid.BuildStrategy() build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce \ if use_reduce else fluid.BuildStrategy.ReduceStrategy.AllReduce build_strategy.fuse_elewise_add_act_ops = fuse_elewise_add_act_ops build_strategy.fuse_relu_depthwise_conv = fuse_relu_depthwise_conv build_strategy.memory_optimize = False if memory_opt else use_ir_memory_optimize # python memory optimization is conflict with inplace pass. # Use ir graph memory optimization after inplace pass is the correct way. build_strategy.enable_inplace = False if memory_opt else enable_inplace build_strategy.enable_sequential_execution = enable_sequential_execution if use_cuda and core.is_compiled_with_cuda(): build_strategy.remove_unnecessary_lock = True if use_parallel_executor: binary = compiler.CompiledProgram(main).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) else: binary = compiler.CompiledProgram(main) if batch_size is not None: batch_size *= fluid.core.get_cuda_device_count( ) if use_cuda else int( os.environ.get('CPU_NUM', multiprocessing.cpu_count())) begin = time.time() first_loss, = run_executor(exe=exe, binary=binary, feed=feed_dict, fetch_list=[loss.name]) for i in range(iter): run_executor(exe=exe, binary=binary, feed=feed_dict, fetch_list=[]) last_loss, = run_executor(exe=exe, binary=binary, feed=feed_dict, fetch_list=[loss.name]) end = time.time() if batch_size is not None: print("%.4f Instance per second" % ((batch_size * iter + 2) / (end - begin))) avg_last_loss_val = np.array(last_loss).mean() avg_first_loss_val = np.array(first_loss).mean() if math.isnan(float(avg_last_loss_val)) or math.isnan( float(avg_first_loss_val)): sys.exit("got NaN loss, training failed.") print(first_loss, last_loss) # self.assertGreater(first_loss[0], last_loss[0]) return first_loss, last_loss
def train(args): """ Train Program """ if not os.path.exists(args.save_path): os.makedirs(args.save_path) # data data_config data_conf = { "batch_size": args.batch_size, "max_turn_num": args.max_turn_num, "max_turn_len": args.max_turn_len, "_EOS_": args._EOS_, } dam = Net(args.max_turn_num, args.max_turn_len, args.vocab_size, args.emb_size, args.stack_num, args.channel1_num, args.channel2_num) train_program = fluid.Program() train_startup = fluid.Program() if "CE_MODE_X" in os.environ: train_program.random_seed = 110 train_startup.random_seed = 110 with fluid.program_guard(train_program, train_startup): with fluid.unique_name.guard(): if args.use_pyreader: train_pyreader = dam.create_py_reader(capacity=10, name='train_reader') else: dam.create_data_layers() loss, logits = dam.create_network() loss.persistable = True logits.persistable = True # gradient clipping fluid.clip.set_gradient_clip( clip=fluid.clip.GradientClipByValue(max=1.0, min=-1.0)) optimizer = fluid.optimizer.Adam( learning_rate=fluid.layers.exponential_decay( learning_rate=args.learning_rate, decay_steps=400, decay_rate=0.9, staircase=True)) optimizer.minimize(loss) print("begin memory optimization ...") print( time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) fluid.memory_optimize(train_program) print("end memory optimization ...") print( time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) test_program = fluid.Program() test_startup = fluid.Program() if "CE_MODE_X" in os.environ: test_program.random_seed = 110 test_startup.random_seed = 110 with fluid.program_guard(test_program, test_startup): with fluid.unique_name.guard(): if args.use_pyreader: test_pyreader = dam.create_py_reader(capacity=10, name='test_reader') else: dam.create_data_layers() loss, logits = dam.create_network() loss.persistable = True logits.persistable = True test_program = test_program.clone(for_test=True) if args.use_cuda: place = fluid.CUDAPlace(0) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) print("device count %d" % dev_count) print("theoretical memory usage: ") print( fluid.contrib.memory_usage(program=train_program, batch_size=args.batch_size)) exe = fluid.Executor(place) exe.run(train_startup) exe.run(test_startup) train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, loss_name=loss.name, main_program=train_program) test_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda, main_program=test_program, share_vars_from=train_exe) if args.word_emb_init is not None: print("start loading word embedding init ...") if six.PY2: word_emb = np.array(pickle.load(open(args.word_emb_init, 'rb'))).astype('float32') else: word_emb = np.array( pickle.load(open(args.word_emb_init, 'rb'), encoding="bytes")).astype('float32') dam.set_word_embedding(word_emb, place) print("finish init word embedding ...") print("start loading data ...") with open(args.data_path, 'rb') as f: if six.PY2: train_data, val_data, test_data = pickle.load(f) else: train_data, val_data, test_data = pickle.load(f, encoding="bytes") print("finish loading data ...") val_batches = reader.build_batches(val_data, data_conf) batch_num = len(train_data[six.b('y')]) // args.batch_size val_batch_num = len(val_batches["response"]) print_step = max(1, batch_num // (dev_count * 100)) save_step = max(1, batch_num // (dev_count * 10)) print("begin model training ...") print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) def train_with_feed(step): """ Train on one epoch data by feeding """ ave_cost = 0.0 for it in six.moves.xrange(batch_num // dev_count): feed_list = [] for dev in six.moves.xrange(dev_count): index = it * dev_count + dev batch_data = reader.make_one_batch_input(train_batches, index) feed_dict = dict(zip(dam.get_feed_names(), batch_data)) feed_list.append(feed_dict) cost = train_exe.run(feed=feed_list, fetch_list=[loss.name]) ave_cost += np.array(cost[0]).mean() step = step + 1 if step % print_step == 0: print("processed: [" + str(step * dev_count * 1.0 / batch_num) + "] ave loss: [" + str(ave_cost / print_step) + "]") ave_cost = 0.0 if (args.save_path is not None) and (step % save_step == 0): save_path = os.path.join(args.save_path, "step_" + str(step)) print("Save model at step %d ... " % step) print( time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) fluid.io.save_persistables(exe, save_path, train_program) score_path = os.path.join(args.save_path, 'score.' + str(step)) test_with_feed(test_exe, test_program, dam.get_feed_names(), [logits.name], score_path, val_batches, val_batch_num, dev_count) result_file_path = os.path.join(args.save_path, 'result.' + str(step)) evaluate(score_path, result_file_path) return step, np.array(cost[0]).mean() def train_with_pyreader(step): """ Train on one epoch with pyreader """ def data_provider(): """ Data reader """ for index in six.moves.xrange(batch_num): yield reader.make_one_batch_input(train_batches, index) train_pyreader.decorate_tensor_provider(data_provider) ave_cost = 0.0 train_pyreader.start() while True: try: cost = train_exe.run(fetch_list=[loss.name]) ave_cost += np.array(cost[0]).mean() step = step + 1 if step % print_step == 0: print("processed: [" + str(step * dev_count * 1.0 / batch_num) + "] ave loss: [" + str(ave_cost / print_step) + "]") ave_cost = 0.0 if (args.save_path is not None) and (step % save_step == 0): save_path = os.path.join(args.save_path, "step_" + str(step)) print("Save model at step %d ... " % step) print( time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) fluid.io.save_persistables(exe, save_path, train_program) score_path = os.path.join(args.save_path, 'score.' + str(step)) test_with_pyreader(test_exe, test_program, test_pyreader, [logits.name], score_path, val_batches, val_batch_num, dev_count) result_file_path = os.path.join(args.save_path, 'result.' + str(step)) evaluate(score_path, result_file_path) except fluid.core.EOFException: train_pyreader.reset() break return step, np.array(cost[0]).mean() # train over different epoches global_step, train_time = 0, 0.0 for epoch in six.moves.xrange(args.num_scan_data): shuffle_train = reader.unison_shuffle( train_data, seed=110 if ("CE_MODE_X" in os.environ) else None) train_batches = reader.build_batches(shuffle_train, data_conf) begin_time = time.time() if args.use_pyreader: global_step, last_cost = train_with_pyreader(global_step) else: global_step, last_cost = train_with_feed(global_step) pass_time_cost = time.time() - begin_time train_time += pass_time_cost print("Pass {0}, pass_time_cost {1}".format( epoch, "%2.2f sec" % pass_time_cost)) # For internal continuous evaluation if "CE_MODE_X" in os.environ: card_num = get_cards() print("kpis\ttrain_cost_card%d\t%f" % (card_num, last_cost)) print("kpis\ttrain_duration_card%d\t%f" % (card_num, train_time))
def main(args): args = parser.parse_args() ernie_config = ErnieConfig(args.ernie_config_path) ernie_config.print_config() if args.use_cuda: place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0'))) dev_count = fluid.core.get_cuda_device_count() else: place = fluid.CPUPlace() dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) exe = fluid.Executor(place) reader = task_reader.ExtractEmbeddingReader( vocab_path=args.vocab_path, max_seq_len=args.max_seq_len, do_lower_case=args.do_lower_case) startup_prog = fluid.Program() data_generator = reader.data_generator(input_file=args.data_set, batch_size=args.batch_size, epoch=1, shuffle=False) total_examples = reader.get_num_examples(args.data_set) print("Device count: %d" % dev_count) print("Total num examples: %d" % total_examples) infer_program = fluid.Program() with fluid.program_guard(infer_program, startup_prog): with fluid.unique_name.guard(): pyreader, graph_vars = create_model(args, pyreader_name='reader', ernie_config=ernie_config) infer_program = infer_program.clone(for_test=True) exe.run(startup_prog) if args.init_pretraining_params: init_pretraining_params(exe, args.init_pretraining_params, main_program=startup_prog) else: raise ValueError( "WARNING: args 'init_pretraining_params' must be specified") exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_threads = dev_count pyreader.decorate_tensor_provider(data_generator) pyreader.start() total_cls_emb = [] total_top_layer_emb = [] total_labels = [] while True: try: cls_emb, unpad_top_layer_emb = exe.run( program=infer_program, fetch_list=[ graph_vars["cls_embeddings"].name, graph_vars["top_layer_embeddings"].name ], return_numpy=False) # batch_size * embedding_size total_cls_emb.append(np.array(cls_emb)) total_top_layer_emb.append(np.array(unpad_top_layer_emb)) except fluid.core.EOFException: break print(len(total_cls_emb)) print(np.array(total_cls_emb).shape) total_cls_emb = np.concatenate(total_cls_emb) print('total_cls_emb=============', total_cls_emb.shape) print('total_top_layer_emb=============', np.array(total_top_layer_emb).shape) total_top_layer_emb = np.concatenate(total_top_layer_emb) print('total_top_layer_emb=============', total_top_layer_emb.shape) with open(os.path.join(args.output_dir, "word2id_cls_emb.npy"), "wb") as cls_emb_file: np.save(cls_emb_file, total_cls_emb) with open(os.path.join(args.output_dir, "word2id_top_layer_emb.npy"), "wb") as top_layer_emb_file: np.save(top_layer_emb_file, total_top_layer_emb)
def main(): if FLAGS.eval is False: raise ValueError( "Currently only supports `--eval==True` while training in `quantization`." ) env = os.environ FLAGS.dist = 'PADDLE_TRAINER_ID' in env \ and 'PADDLE_TRAINERS_NUM' in env \ and int(env['PADDLE_TRAINERS_NUM']) > 1 num_trainers = int(env.get('PADDLE_TRAINERS_NUM', 1)) if FLAGS.dist: trainer_id = int(env['PADDLE_TRAINER_ID']) import random local_seed = (99 + trainer_id) random.seed(local_seed) np.random.seed(local_seed) cfg = load_config(FLAGS.config) merge_config(FLAGS.opt) check_config(cfg) # check if set use_gpu=True in paddlepaddle cpu version check_gpu(cfg.use_gpu) # check if paddlepaddle version is satisfied check_version() main_arch = cfg.architecture if cfg.use_gpu: devices_num = fluid.core.get_cuda_device_count() else: devices_num = int(os.environ.get('CPU_NUM', 1)) if 'FLAGS_selected_gpus' in env: device_id = int(env['FLAGS_selected_gpus']) else: device_id = 0 place = fluid.CUDAPlace(device_id) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) lr_builder = create('LearningRate') optim_builder = create('OptimizerBuilder') # build program startup_prog = fluid.Program() train_prog = fluid.Program() with fluid.program_guard(train_prog, startup_prog): with fluid.unique_name.guard(): model = create(main_arch) inputs_def = cfg['TrainReader']['inputs_def'] feed_vars, train_loader = model.build_inputs(**inputs_def) if FLAGS.use_pact: feed_vars['image'].stop_gradient = False train_fetches = model.train(feed_vars) loss = train_fetches['loss'] lr = lr_builder() optimizer = optim_builder(lr) optimizer.minimize(loss) # parse train fetches train_keys, train_values, _ = parse_fetches(train_fetches) train_values.append(lr) if FLAGS.eval: eval_prog = fluid.Program() with fluid.program_guard(eval_prog, startup_prog): with fluid.unique_name.guard(): model = create(main_arch) inputs_def = cfg['EvalReader']['inputs_def'] feed_vars, eval_loader = model.build_inputs(**inputs_def) fetches = model.eval(feed_vars) eval_prog = eval_prog.clone(True) eval_reader = create_reader(cfg.EvalReader) # When iterable mode, set set_sample_list_generator(eval_reader, place) eval_loader.set_sample_list_generator(eval_reader) # parse eval fetches extra_keys = [] if cfg.metric == 'COCO': extra_keys = ['im_info', 'im_id', 'im_shape'] if cfg.metric == 'VOC': extra_keys = ['gt_bbox', 'gt_class', 'is_difficult'] if cfg.metric == 'WIDERFACE': extra_keys = ['im_id', 'im_shape', 'gt_bbox'] eval_keys, eval_values, eval_cls = parse_fetches( fetches, eval_prog, extra_keys) # compile program for multi-devices build_strategy = fluid.BuildStrategy() build_strategy.fuse_all_optimizer_ops = False build_strategy.fuse_elewise_add_act_ops = True build_strategy.fuse_all_reduce_ops = False # only enable sync_bn in multi GPU devices sync_bn = getattr(model.backbone, 'norm_type', None) == 'sync_bn' sync_bn = False build_strategy.sync_batch_norm = sync_bn and devices_num > 1 \ and cfg.use_gpu exec_strategy = fluid.ExecutionStrategy() # iteration number when CompiledProgram tries to drop local execution scopes. # Set it to be 1 to save memory usages, so that unused variables in # local execution scopes can be deleted after each iteration. exec_strategy.num_iteration_per_drop_scope = 1 if FLAGS.dist: dist_utils.prepare_for_multi_process(exe, build_strategy, startup_prog, train_prog) exec_strategy.num_threads = 1 exe.run(startup_prog) not_quant_pattern = [] if FLAGS.not_quant_pattern: not_quant_pattern = FLAGS.not_quant_pattern config = { 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max', 'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'], 'not_quant_pattern': not_quant_pattern } ignore_params = cfg.finetune_exclude_pretrained_params \ if 'finetune_exclude_pretrained_params' in cfg else [] fuse_bn = getattr(model.backbone, 'norm_type', None) == 'affine_channel' if cfg.pretrain_weights and fuse_bn and not ignore_params: checkpoint.load_and_fusebn(exe, train_prog, cfg.pretrain_weights) elif cfg.pretrain_weights: checkpoint.load_params(exe, train_prog, cfg.pretrain_weights, ignore_params=ignore_params) if FLAGS.use_pact: act_preprocess_func = pact optimizer_func = get_optimizer executor = exe else: act_preprocess_func = None optimizer_func = None executor = None # insert quantize op in train_prog, return type is CompiledProgram train_prog_quant = quant_aware(train_prog, place, config, scope=None, act_preprocess_func=act_preprocess_func, optimizer_func=optimizer_func, executor=executor, for_test=False) compiled_train_prog = train_prog_quant.with_data_parallel( loss_name=loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) if FLAGS.eval: # insert quantize op in eval_prog eval_prog = quant_aware(eval_prog, place, config, scope=None, act_preprocess_func=act_preprocess_func, optimizer_func=optimizer_func, executor=executor, for_test=True) compiled_eval_prog = fluid.CompiledProgram(eval_prog) start_iter = 0 train_reader = create_reader(cfg.TrainReader, (cfg.max_iters - start_iter) * devices_num, cfg, devices_num=devices_num, num_trainers=num_trainers) # When iterable mode, set set_sample_list_generator(train_reader, place) train_loader.set_sample_list_generator(train_reader) # whether output bbox is normalized in model output layer is_bbox_normalized = False if hasattr(model, 'is_bbox_normalized') and \ callable(model.is_bbox_normalized): is_bbox_normalized = model.is_bbox_normalized() # if map_type not set, use default 11point, only use in VOC eval map_type = cfg.map_type if 'map_type' in cfg else '11point' train_stats = TrainingStats(cfg.log_iter, train_keys) train_loader.start() start_time = time.time() end_time = time.time() cfg_name = os.path.basename(FLAGS.config).split('.')[0] save_dir = os.path.join(cfg.save_dir, cfg_name) time_stat = deque(maxlen=cfg.log_iter) best_box_ap_list = [0.0, 0] #[map, iter] for it in range(start_iter, cfg.max_iters): start_time = end_time end_time = time.time() time_stat.append(end_time - start_time) time_cost = np.mean(time_stat) eta_sec = (cfg.max_iters - it) * time_cost eta = str(datetime.timedelta(seconds=int(eta_sec))) outs = exe.run(compiled_train_prog, fetch_list=train_values) stats = {k: np.array(v).mean() for k, v in zip(train_keys, outs[:-1])} train_stats.update(stats) logs = train_stats.log() if it % cfg.log_iter == 0 and (not FLAGS.dist or trainer_id == 0): strs = 'iter: {}, lr: {:.6f}, {}, time: {:.3f}, eta: {}'.format( it, np.mean(outs[-1]), logs, time_cost, eta) logger.info(strs) if (it > 0 and it % cfg.snapshot_iter == 0 or it == cfg.max_iters - 1) \ and (not FLAGS.dist or trainer_id == 0): save_name = str(it) if it != cfg.max_iters - 1 else "model_final" if FLAGS.eval: # evaluation results = eval_run(exe, compiled_eval_prog, eval_loader, eval_keys, eval_values, eval_cls, cfg=cfg) resolution = None if 'mask' in results[0]: resolution = model.mask_head.resolution box_ap_stats = eval_results(results, cfg.metric, cfg.num_classes, resolution, is_bbox_normalized, FLAGS.output_eval, map_type, cfg['EvalReader']['dataset']) if box_ap_stats[0] > best_box_ap_list[0]: best_box_ap_list[0] = box_ap_stats[0] best_box_ap_list[1] = it save_checkpoint(exe, eval_prog, os.path.join(save_dir, "best_model"), train_prog) logger.info("Best test box ap: {}, in iter: {}".format( best_box_ap_list[0], best_box_ap_list[1])) train_loader.reset()
def test_dtype1(): with fluid.program_guard(fluid.Program(), fluid.Program()): data = fluid.data(name="data", shape=[10], dtype="float64") paddle.sum(data, dtype="float32")
def test_with_place(place, data_layout, shape): # attr epsilon = 0.00001 momentum = 0.9 if data_layout == "NCHW": n, c, h, w = shape[0], shape[1], shape[2], shape[3] else: n, h, w, c = shape[0], shape[1], shape[2], shape[3] scale_shape = [c] np.random.seed(123) x = np.random.random_sample(shape).astype(np.float32) scale = np.random.random_sample(scale_shape).astype(np.float32) bias = np.random.random_sample(scale_shape).astype(np.float32) mean = np.zeros(scale_shape).astype(np.float32) variance = np.ones(scale_shape).astype(np.float32) y_grad = np.random.random_sample(shape).astype(np.float32) y, mean_out, variance_out, saved_mean, saved_variance, x_grad, scale_grad, bias_grad = self.ref_forward_backward( x, y_grad, scale, bias, mean, variance, epsilon, momentum, shape, data_layout) var_dict = locals() var_dict['y@GRAD'] = y_grad var_names = [ 'x', 'scale', 'bias', 'mean', 'variance', 'y', 'saved_mean', 'saved_variance' ] ground_truth = {name: var_dict[name] for name in var_names} program = fluid.Program() with fluid.program_guard(program): block = program.global_block() for name in ground_truth: block.create_var( name=name, dtype='float32', shape=ground_truth[name].shape) bn_op = block.append_op( type="batch_norm", inputs={ "X": block.var('x'), "Scale": block.var('scale'), "Bias": block.var('bias'), "Mean": block.var('mean'), "Variance": block.var('variance') }, outputs={ "Y": block.var('y'), "MeanOut": block.var('mean'), # share the same memory "VarianceOut": block.var('variance'), # share the same memory "SavedMean": block.var('saved_mean'), "SavedVariance": block.var('saved_variance') }, attrs={ "momentum": momentum, "epsilon": epsilon, "is_test": False, "data_layout": data_layout, "use_mkldnn": self.use_mkldnn }) block.create_var(name='y@GRAD', dtype='float32', shape=y.shape) # generate backward op_desc grad_op_desc_list, op_grad_to_var = core.get_grad_op_desc( bn_op.desc, set(), []) grad_op_desc = grad_op_desc_list[0] new_op_desc = block.desc.append_op() new_op_desc.copy_from(grad_op_desc) for var_name in grad_op_desc.output_arg_names(): block.desc.var(var_name.encode("ascii")) grad_op_desc.infer_var_type(block.desc) grad_op_desc.infer_shape(block.desc) for arg in grad_op_desc.output_arg_names(): grad_var = block.desc.find_var(arg.encode("ascii")) grad_var.set_dtype(core.VarDesc.VarType.FP32) exe = fluid.Executor(place) out = exe.run( program, feed={ name: var_dict[name] for name in ['x', 'scale', 'bias', 'mean', 'variance', 'y@GRAD'] }, fetch_list=[ 'y', 'mean', 'variance', 'saved_mean', 'saved_variance', 'x@GRAD', 'scale@GRAD', 'bias@GRAD' ]) self.__assert_close(y, out[0], "y") self.__assert_close(mean_out, out[1], "mean") self.__assert_close(variance_out, out[2], "variance", 1e-3) self.__assert_close(saved_mean, out[3], "saved_mean") self.__assert_close(saved_variance, out[4], "saved_variance", 1e-3) self.__assert_close(x_grad, out[5], "x_grad") self.__assert_close(scale_grad, out[6], "scale_grad") self.__assert_close(bias_grad, out[7], "bias_grad") print "op test forward passed: ", str(place), data_layout
def test_type(): with fluid.program_guard(fluid.Program(), fluid.Program()): data = fluid.data(name="data", shape=[10], dtype="int32") paddle.sum(data, dtype="bool")
def test_nested_net_with_backward_and_lodtensor(self): def external_cond(i, j, x, mem_array): return layers.less_than(i, array_len) def external_body(i, j, x, mem_array): def internal_cond(j, x, mem_array): return layers.less_than(j, array_len2) def internal_body(j, x, mem_array): inner_data = layers.array_read(array=data_array, i=j) inner_prev = layers.array_read(array=mem_array, i=j) inner_sum_0 = layers.elementwise_add(x=inner_data, y=inner_prev) inner_sum_1 = layers.elementwise_add(x=x, y=inner_sum_0) j = layers.increment(x=j, in_place=True) layers.array_write(inner_sum_1, i=j, array=mem_array) return [j, x, mem_array] outer_data = layers.array_read(array=data_array, i=i) outer_prev = layers.array_read(array=mem_array, i=i) outer_sum_0 = layers.elementwise_add(x=outer_data, y=outer_prev) outer_sum_1 = layers.elementwise_add(x=x, y=outer_sum_0) i = layers.increment(x=i, in_place=True) layers.array_write(outer_sum_1, i=i, array=mem_array) j, x, mem_array = layers.while_loop(internal_cond, internal_body, [j, x, mem_array]) return [i, j, x, mem_array] main_program = Program() startup_program = Program() with fluid.program_guard(main_program, startup_program): d0 = fluid.data(name='d0', shape=[10], dtype='float32') d1 = fluid.data(name='d1', shape=[10], dtype='float32') d2 = fluid.data(name='d2', shape=[10], dtype='float32') x = fluid.data(name='x', shape=[10], dtype='float32') x.stop_gradient = False i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True init = layers.zeros(shape=[10], dtype='float32') mem_array = layers.array_write(x=init, i=i) data_array = layers.array_write(x=d0, i=i) i = layers.increment(i) layers.array_write(d1, i, array=data_array) i = layers.increment(i) layers.array_write(d2, i, array=data_array) i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True array_len = layers.fill_constant(shape=[1], dtype='int64', value=1) j = layers.fill_constant(shape=[1], dtype='int64', value=1) j.stop_gradient = True array_len2 = layers.fill_constant(shape=[1], dtype='int64', value=3) out = layers.while_loop(external_cond, external_body, [i, j, x, mem_array]) sum_result = layers.array_read(array=mem_array, i=j) mean = layers.mean(sum_result) append_backward(mean) place = fluid.CUDAPlace( 0) if core.is_compiled_with_cuda() else fluid.CPUPlace() exe = fluid.Executor(place) d = [] for i in range(3): d.append(np.random.random(size=[10]).astype('float32')) feed_x = np.ones(10).astype('float32') data_sum = d[0] + d[1] + d[2] + 3 * feed_x x_grad = [0.3] * 10 res = exe.run(main_program, feed={ 'd0': d[0], 'd1': d[1], 'd2': d[2], 'x': feed_x }, fetch_list=[sum_result.name, x.grad_name]) self.assertTrue(np.allclose(res[0], data_sum)) self.assertTrue(np.allclose(res[1], x_grad))
train_lr_img_list = sorted( load_file_list(im_path=train_lr_img_path, im_format='*.png')) valid_hr_img_list = sorted( load_file_list(im_path=valid_hr_img_path, im_format='*.png')) valid_lr_img_list = sorted( load_file_list(im_path=valid_lr_img_path, im_format='*.png')) # load im data train_hr_imgs = im_read(train_hr_img_list) train_lr_imgs = im_read(train_lr_img_list) valid_hr_imgs = im_read(valid_hr_img_list) valid_lr_imgs = im_read(valid_lr_img_list) # LOAD VGG vgg19_program = fluid.Program() with fluid.program_guard(vgg19_program): vgg19_input = fluid.layers.data(name='vgg19_input', shape=[224, 224, 3], dtype='float32') vgg19_input_transpose = fluid.layers.transpose(vgg19_input, perm=[0, 3, 1, 2]) # define vgg19 _, vgg_target_emb = vgg19(vgg19_input_transpose) # DEFINE MODEL ==> SRGAN_g SRGAN_d SRGAN_g_program = fluid.Program() with fluid.program_guard(SRGAN_g_program): # Low resolution image t_image = fluid.layers.data(name='t_image', shape=[96, 96, 3], dtype='float32')
def build_normal_program(self, test_program, batch_size, dims, loc_float, scale_float, other_loc_float, other_scale_float, scale_np, other_scale_np, loc_np, other_loc_np, values_np): with fluid.program_guard(test_program): loc = layers.data(name='loc', shape=[dims], dtype='float32') scale = layers.data(name='scale', shape=[dims], dtype='float32') other_loc = layers.data(name='other_loc', shape=[dims], dtype='float32') other_scale = layers.data(name='other_scale', shape=[dims], dtype='float32') values = layers.data(name='values', shape=[dims], dtype='float32') normal_float = Normal(loc_float, scale_float) other_normal_float = Normal(other_loc_float, other_scale_float) normal_float_np_broadcast = Normal(loc_float, scale_np) other_normal_float_np_broadcast = Normal(other_loc_float, other_scale_np) normal_np = Normal(loc_np, scale_np) other_normal_np = Normal(other_loc_np, other_scale_np) normal_variable = Normal(loc, scale) other_normal_variable = Normal(other_loc, other_scale) sample_float = normal_float.sample([batch_size, dims]) sample_float_np_broadcast = normal_float_np_broadcast.sample( [batch_size, dims]) sample_np = normal_np.sample([batch_size, dims]) sample_variable = normal_variable.sample([batch_size, dims]) entropy_float = normal_float.entropy() entropy_float_np_broadcast = normal_float_np_broadcast.entropy() entropy_np = normal_np.entropy() entropy_variable = normal_variable.entropy() lp_float_np_broadcast = normal_float_np_broadcast.log_prob(values) lp_np = normal_np.log_prob(values) lp_variable = normal_variable.log_prob(values) kl_float = normal_float.kl_divergence(other_normal_float) kl_float_np_broadcast = normal_float_np_broadcast.kl_divergence( other_normal_float_np_broadcast) kl_np = normal_np.kl_divergence(other_normal_np) kl_variable = normal_variable.kl_divergence(other_normal_variable) fetch_list = [ sample_float, sample_float_np_broadcast, sample_np, sample_variable, entropy_float, entropy_float_np_broadcast, entropy_np, entropy_variable, lp_float_np_broadcast, lp_np, lp_variable, kl_float, kl_float_np_broadcast, kl_np, kl_variable ] feed_vars = { 'loc': loc_np, 'scale': scale_np, 'other_loc': other_loc_np, 'other_scale': other_scale_np, 'values': values_np } return feed_vars, fetch_list
def train(): args = parse_args() print_arguments(args) # check whether the installed paddle is compiled with GPU check_gpu(args.use_gpu) if not os.path.isdir(args.save_dir): os.makedirs(args.save_dir) assert args.model in ['MSG', 'SSG'], \ "--model can only be 'MSG' or 'SSG'" # build model if args.enable_ce: SEED = 102 fluid.default_main_program().random_seed = SEED framework.default_startup_program().random_seed = SEED startup = fluid.Program() train_prog = fluid.Program() with fluid.program_guard(train_prog, startup): with fluid.unique_name.guard(): train_model = PointNet2ClsMSG(args.num_classes, args.num_points) \ if args.model == "MSG" else \ PointNet2ClsSSG(args.num_classes, args.num_points) train_model.build_model(bn_momentum=args.bn_momentum) train_feeds = train_model.get_feeds() train_loader = train_model.get_loader() train_outputs = train_model.get_outputs() train_loss = train_outputs['loss'] lr = fluid.layers.exponential_decay( learning_rate=args.lr, decay_steps=args.decay_steps, decay_rate=args.lr_decay, staircase=True) lr = fluid.layers.clip(lr, 1e-5, args.lr) params = [] for var in train_prog.list_vars(): if fluid.io.is_parameter(var): params.append(var.name) optimizer = fluid.optimizer.Adam(learning_rate=lr, regularization=fluid.regularizer.L2Decay(args.weight_decay)) optimizer.minimize(train_loss, parameter_list=params) train_keys, train_values = parse_outputs(train_outputs) test_prog = fluid.Program() with fluid.program_guard(test_prog, startup): with fluid.unique_name.guard(): test_model = PointNet2ClsMSG(args.num_classes, args.num_points) \ if args.model == "MSG" else \ PointNet2ClsSSG(args.num_classes, args.num_points) test_model.build_model() test_feeds = test_model.get_feeds() test_outputs = test_model.get_outputs() test_loader = test_model.get_loader() test_prog = test_prog.clone(True) test_keys, test_values = parse_outputs(test_outputs) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup) if args.resume: if not os.path.isdir(args.resume): assert os.path.exists("{}.pdparams".format(args.resume)), \ "Given resume weight {}.pdparams not exist.".format(args.resume) assert os.path.exists("{}.pdopt".format(args.resume)), \ "Given resume optimizer state {}.pdopt not exist.".format(args.resume) fluid.load(train_prog, args.resume, exe) build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = False build_strategy.fuse_all_optimizer_ops = False train_compile_prog = fluid.compiler.CompiledProgram( train_prog).with_data_parallel(loss_name=train_loss.name, build_strategy=build_strategy) test_compile_prog = fluid.compiler.CompiledProgram(test_prog) def save_model(exe, prog, path): if os.path.isdir(path): shutil.rmtree(path) logger.info("Save model to {}".format(path)) fluid.save(prog, path) # get reader trans_list = [ PointcloudScale(), PointcloudRotate(), PointcloudRotatePerturbation(), PointcloudTranslate(), PointcloudJitter(), PointcloudRandomInputDropout(), ] modelnet_reader = ModelNet40ClsReader(args.data_dir, mode='train', transforms=trans_list) train_reader = modelnet_reader.get_reader(args.batch_size, args.num_points) train_loader.set_sample_list_generator(train_reader, place) modelnet_reader = ModelNet40ClsReader(args.data_dir, mode='test', transforms=None) test_reader = modelnet_reader.get_reader(args.batch_size, args.num_points) test_loader.set_sample_list_generator(test_reader, place) train_stat = Stat() test_stat = Stat() ce_time = 0 ce_loss = [] for epoch_id in range(args.epoch): try: train_loader.start() train_iter = 0 train_periods = [] while True: cur_time = time.time() train_outs = exe.run(train_compile_prog, fetch_list=train_values + [lr.name]) period = time.time() - cur_time train_periods.append(period) train_stat.update(train_keys, train_outs[:-1]) if train_iter % args.log_interval == 0: log_str = "" for name, values in zip(train_keys + ['learning_rate'], train_outs): log_str += "{}: {:.5f}, ".format(name, np.mean(values)) if name == 'loss': ce_loss.append(np.mean(values)) logger.info("[TRAIN] Epoch {}, batch {}: {}time: {:.2f}".format(epoch_id, train_iter, log_str, period)) train_iter += 1 except fluid.core.EOFException: logger.info("[TRAIN] Epoch {} finished, {}average time: {:.2f}".format(epoch_id, train_stat.get_mean_log(), np.mean(train_periods[1:]))) ce_time = np.mean(train_periods[1:]) save_model(exe, train_prog, os.path.join(args.save_dir, str(epoch_id))) # evaluation if not args.enable_ce: try: test_loader.start() test_iter = 0 test_periods = [] while True: cur_time = time.time() test_outs = exe.run(test_compile_prog, fetch_list=test_values) period = time.time() - cur_time test_periods.append(period) test_stat.update(test_keys, test_outs) if test_iter % args.log_interval == 0: log_str = "" for name, value in zip(test_keys, test_outs): log_str += "{}: {:.4f}, ".format(name, np.mean(value)) logger.info("[TEST] Epoch {}, batch {}: {}time: {:.2f}".format(epoch_id, test_iter, log_str, period)) test_iter += 1 except fluid.core.EOFException: logger.info("[TEST] Epoch {} finished, {}average time: {:.2f}".format(epoch_id, test_stat.get_mean_log(), np.mean(test_periods[1:]))) finally: test_loader.reset() test_stat.reset() test_periods = [] finally: train_loader.reset() train_stat.reset() train_periods = [] # only for ce if args.enable_ce: card_num = get_cards() _loss = 0 _time = 0 try: _time = ce_time _loss = np.mean(ce_loss[1:]) except: print("ce info error") print("kpis\ttrain_cls_%s_duration_card%s\t%s" % (args.model, card_num, _time)) print("kpis\ttrain_cls_%s_loss_card%s\t%f" % (args.model, card_num, _loss))
def check_network_convergence(self, method, memory_opt=True, iter=50, batch_size=None, allow_op_delay=False, feed_dict=None, seed=None, use_parallel_executor=True, balance_parameter_opt_between_cards=False): def run_executor(exe, feed, fetch_list, program=None): if isinstance(exe, fluid.ParallelExecutor): res = exe.run(fetch_list=fetch_list, feed=feed) elif isinstance(exe, fluid.Executor): if program is None: program = fluid.default_main_program() res = exe.run(program=program, feed=feed, fetch_list=fetch_list) else: raise ValueError('Unkown type exe') return res main = fluid.Program() startup = fluid.Program() startup.random_seed = 1 # Fix random seed with fluid.program_guard(main, startup): if seed is not None: startup.random_seed = seed loss = method(use_feed=feed_dict is not None) adam = fluid.optimizer.Adam() adam.minimize(loss) if memory_opt: fluid.memory_optimize(main) place = fluid.CUDAPlace(0) startup_exe = fluid.Executor(place) startup_exe.run(startup) exec_strategy = fluid.ExecutionStrategy() exec_strategy.allow_op_delay = allow_op_delay build_strategy = fluid.BuildStrategy() build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce if balance_parameter_opt_between_cards else fluid.BuildStrategy.ReduceStrategy.AllReduce if use_parallel_executor: exe = fluid.ParallelExecutor( True, loss_name=loss.name, exec_strategy=exec_strategy, build_strategy=build_strategy) else: exe = fluid.Executor(place=place) if batch_size is not None: batch_size *= fluid.core.get_cuda_device_count() begin = time.time() first_loss, = run_executor( exe=exe, feed=feed_dict, fetch_list=[loss.name]) first_loss = np.array(first_loss) for i in xrange(iter): run_executor(exe=exe, feed=feed_dict, fetch_list=[]) last_loss, = run_executor( exe=exe, feed=feed_dict, fetch_list=[loss.name]) end = time.time() if batch_size is not None: print "%.4f Instance per second" % ( (batch_size * iter + 2) / (end - begin)) last_loss = np.array(last_loss) print first_loss, last_loss # self.assertGreater(first_loss[0], last_loss[0]) return first_loss, last_loss
def test_multivariateNormalDiag_distribution(self, batch_size=2, tolerance=1e-6): test_program = fluid.Program() loc_np = np.random.random(batch_size, ).astype('float32') scale_np = np.diag(np.random.random(batch_size, )).astype('float32') other_loc_np = np.random.random(batch_size, ).astype('float32') other_scale_np = np.diag(np.random.random( batch_size, )).astype('float32') with fluid.program_guard(test_program): loc = layers.data(name='loc', shape=[ batch_size, ], dtype='float32', append_batch_size=False) scale = layers.data(name='scale', shape=[batch_size, batch_size], dtype='float32', append_batch_size=False) other_loc = layers.data(name='other_loc', shape=[ batch_size, ], dtype='float32', append_batch_size=False) other_scale = layers.data(name='other_scale', shape=[batch_size, batch_size], dtype='float32', append_batch_size=False) multivariate_np = MultivariateNormalDiag(loc, scale) other_multivariate_np = MultivariateNormalDiag( other_loc, other_scale) entropy_np = multivariate_np.entropy() other_entropy_np = other_multivariate_np.entropy() kl_np = multivariate_np.kl_divergence(other_multivariate_np) self.executor.run(fluid.default_main_program()) np_multivariate = MultivariateNormalDiagNumpy(loc_np, scale_np) np_other_multivariate = MultivariateNormalDiagNumpy( other_loc_np, other_scale_np) gt_entropy_np = np_multivariate.entropy() gt_kl_np = np_multivariate.kl_divergence(np_other_multivariate) # result calculated by paddle [output_entropy_np, output_kl_np] = self.executor.run(program=test_program, feed={ 'loc': loc_np, 'scale': scale_np, 'other_loc': other_loc_np, 'other_scale': other_scale_np }, fetch_list=[entropy_np, kl_np]) np.testing.assert_allclose(output_entropy_np, gt_entropy_np, rtol=tolerance, atol=tolerance) np.testing.assert_allclose(output_kl_np, gt_kl_np, rtol=tolerance, atol=tolerance)