def not_test_raw_api(self): prog = Program() startup_prog = Program() with program_guard(prog, startup_prog): image = layers.data(name='x', shape=[784], dtype='float32') label = layers.data(name='y', shape=[1], dtype='int64') limit = layers.fill_constant(shape=[1], dtype='int64', value=5) cond = layers.less_than(x=label, y=limit) true_image, false_image = split_lod_tensor(input=image, mask=cond) true_out = layers.create_tensor(dtype='float32') true_cond = ConditionalBlock([cond]) with true_cond.block(): hidden = layers.fc(input=true_image, size=100, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') layers.assign(input=prob, output=true_out) false_out = layers.create_tensor(dtype='float32') false_cond = ConditionalBlock([cond]) with false_cond.block(): hidden = layers.fc(input=false_image, size=200, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') layers.assign(input=prob, output=false_out) prob = merge_lod_tensor( in_true=true_out, in_false=false_out, mask=cond, x=image) loss = layers.cross_entropy(input=prob, label=label) avg_loss = layers.mean(loss) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer.minimize(avg_loss, startup_prog) train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), batch_size=10) place = core.CPUPlace() exe = Executor(place) exe.run(startup_prog) PASS_NUM = 100 for pass_id in range(PASS_NUM): for data in train_reader(): x_data = np.array([x[0] for x in data]).astype("float32") y_data = np.array([x[1] for x in data]).astype("int64") y_data = np.expand_dims(y_data, axis=1) outs = exe.run(prog, feed={'x': x_data, 'y': y_data}, fetch_list=[avg_loss]) print(outs[0]) if outs[0] < 1.0: return self.assertFalse(True)
def train_mnist(): epoch_num = 10 if args.benchmark: epoch_num = 1 BATCH_SIZE = 32 with fluid.dygraph.guard(): mnist = MNIST("mnist") #adam = AdamOptimizer(learning_rate=0.001) adam = MomentumOptimizer(learning_rate=0.01, momentum=0.5) train_reader = paddle.batch(paddle.dataset.mnist.train(), batch_size=BATCH_SIZE, drop_last=True) test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=BATCH_SIZE, drop_last=True) eval_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=10, drop_last=True) for epoch in range(epoch_num): batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') progress = ProgressMeter(len(list(train_reader())) - 1, batch_time, data_time, losses, prefix="epoch: [{}]".format(epoch)) end = Tools.time() for batch_id, data in enumerate(train_reader()): data_time.update(Tools.time() - end) dy_x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') dy_x_data = normalize(dy_x_data, 0.1307, 0.3081) y_data = np.array([x[1] for x in data ]).astype('int64').reshape(BATCH_SIZE, 1) img = to_variable(dy_x_data) label = to_variable(y_data) label.stop_gradient = True cost, acc = mnist(img, label) loss = fluid.layers.cross_entropy(cost, label) avg_loss = fluid.layers.mean(loss) avg_loss.backward() adam.minimize(avg_loss) mnist.clear_gradients() batch_time.update(Tools.time() - end) dy_out = avg_loss.numpy()[0] losses.update(dy_out, BATCH_SIZE) if batch_id % 10 == 0: progress.print(batch_id) end = Tools.time() #if batch_id % 100 == 0: # print("Loss at epoch {} step {}: {:}".format(epoch, batch_id, avg_loss.numpy())) mnist.eval() test_cost, test_acc = test_train(test_reader, mnist, BATCH_SIZE) test_p(eval_reader, mnist, 10) mnist.train() print("Loss at epoch {} , Test avg_loss is: {}, acc is: {}".format( epoch, test_cost, test_acc))
def test_ifelse(self): prog = Program() startup_prog = Program() with program_guard(prog, startup_prog): image = layers.data(name='x', shape=[784], dtype='float32') label = layers.data(name='y', shape=[1], dtype='int64') limit = layers.fill_constant_batch_size_like(input=label, dtype='int64', shape=[1], value=5.0) cond = layers.less_than(x=label, y=limit) ie = layers.IfElse(cond) with ie.true_block(): true_image = ie.input(image) hidden = layers.fc(input=true_image, size=100, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') ie.output(prob) with ie.false_block(): false_image = ie.input(image) hidden = layers.fc(input=false_image, size=200, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') ie.output(prob) prob = ie() loss = layers.cross_entropy(input=prob[0], label=label) avg_loss = layers.mean(loss) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer.minimize(avg_loss, startup_prog) train_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), batch_size=200) place = core.CPUPlace() exe = Executor(place) exe.run(kwargs['startup_program']) PASS_NUM = 100 for pass_id in range(PASS_NUM): for data in train_reader(): x_data = np.array(map(lambda x: x[0], data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = y_data.reshape((y_data.shape[0], 1)) outs = exe.run(kwargs['main_program'], feed={ 'x': x_data, 'y': y_data }, fetch_list=[avg_loss]) print outs[0] if outs[0] < 1.0: return self.assertFalse(True)
def test_ifelse(self): prog = Program() startup_prog = Program() with program_guard(prog, startup_prog): image = layers.data(name='x', shape=[784], dtype='float32') label = layers.data(name='y', shape=[1], dtype='int64') limit = layers.fill_constant_batch_size_like( input=label, dtype='int64', shape=[1], value=5.0) cond = layers.less_than(x=label, y=limit) ie = layers.IfElse(cond) with ie.true_block(): true_image = ie.input(image) hidden = layers.fc(input=true_image, size=100, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') ie.output(prob) with ie.false_block(): false_image = ie.input(image) hidden = layers.fc(input=false_image, size=200, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') ie.output(prob) prob = ie() loss = layers.cross_entropy(input=prob[0], label=label) avg_loss = layers.mean(loss) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer.minimize(avg_loss, startup_prog) train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), batch_size=200) place = core.CPUPlace() exe = Executor(place) exe.run(kwargs['startup_program']) PASS_NUM = 100 for pass_id in range(PASS_NUM): for data in train_reader(): x_data = np.array(map(lambda x: x[0], data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = y_data.reshape((y_data.shape[0], 1)) outs = exe.run(kwargs['main_program'], feed={'x': x_data, 'y': y_data}, fetch_list=[avg_loss]) print outs[0] if outs[0] < 1.0: return self.assertFalse(True)
def get_optimizer(self): optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) return optimizer
def get_optimizer_dygraph(self, parameter_list): optimizer = MomentumOptimizer( learning_rate=0.001, momentum=0.9, parameter_list=parameter_list) return optimizer
# dev_ds.data_types = types place = F.CUDAPlace(1) with FD.guard(place): model = moco.builder.MoCo(args.moco_dim, args.moco_k, args.moco_m, args.moco_t, args.mlp) # if args.init_checkpoint is not None: # log.info('loading checkpoint from %s' % args.init_checkpoint) # sd, _ = FD.load_dygraph(args.init_checkpoint) # model.set_dict(sd) g_clip = F.clip.GradientClipByGlobalNorm(1.0) # experimental l2 = F.regularizer.L2Decay(regularization_coeff=args.weight_decay) opt = Mome(args.lr, parameter_list=model.parameters(), momentum=args.momentum, regularization=l2) num = (args.moco_k + 1) // args.batch_size for epoch in range(args.epochs): losses = [] for step, d in enumerate( tqdm(train_ds.start(place), desc='training')): sen_q, seg_q, sen_k, seg_k = d #print(sen_q.shape) #print(seg_q.shape) loss = model(sen_q, seg_q, sen_k, seg_k) loss.backward() opt.minimize(loss) model.clear_gradients() losses.append(loss.numpy()) print('[', epoch, ']', step, '/', num, 'loss:',
def test_raw_api(self): prog = Program() startup_prog = Program() with program_guard(prog, startup_prog): image = layers.data(name='x', shape=[784], dtype='float32') label = layers.data(name='y', shape=[1], dtype='int64') limit = layers.fill_constant_batch_size_like( input=label, dtype='int64', shape=[1], value=5.0) cond = layers.less_than(x=label, y=limit) true_image, false_image = layers.split_lod_tensor( input=image, mask=cond) true_out = layers.create_tensor(dtype='float32') true_cond = layers.ConditionalBlock([true_image]) with true_cond.block(): hidden = layers.fc(input=true_image, size=100, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') layers.assign(input=prob, output=true_out) false_out = layers.create_tensor(dtype='float32') false_cond = layers.ConditionalBlock([false_image]) with false_cond.block(): hidden = layers.fc(input=false_image, size=200, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') layers.assign(input=prob, output=false_out) prob = layers.merge_lod_tensor( in_true=true_out, in_false=false_out, mask=cond, x=image) loss = layers.cross_entropy(input=prob, label=label) avg_loss = layers.mean(loss) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer.minimize(avg_loss, startup_prog) train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), batch_size=200) place = core.CPUPlace() exe = Executor(place) exe.run(startup_prog) PASS_NUM = 100 for pass_id in range(PASS_NUM): for data in train_reader(): x_data = np.array(map(lambda x: x[0], data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.expand_dims(y_data, axis=1) outs = exe.run(prog, feed={'x': x_data, 'y': y_data}, fetch_list=[avg_loss]) print outs[0] if outs[0] < 1.0: return self.assertFalse(True)
def check_network_convergence(self, use_cuda=True, use_mem_opt=False, iter_num=5): prog = Program() startup_prog = Program() prog.random_seed = 100 startup_prog.random_seed = 100 with program_guard(prog, startup_prog): image = layers.data(name='x', shape=[784], dtype='float32') label = layers.data(name='y', shape=[1], dtype='int64') limit = layers.fill_constant(shape=[1], dtype='int64', value=5) cond = layers.less_than(x=label, y=limit) ie = layers.IfElse(cond) with ie.true_block(): true_image = ie.input(image) hidden = layers.fc(input=true_image, size=100, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') ie.output(prob) with ie.false_block(): false_image = ie.input(image) hidden = layers.fc(input=false_image, size=200, act='tanh') prob = layers.fc(input=hidden, size=10, act='softmax') ie.output(prob) prob = ie() loss = layers.cross_entropy(input=prob[0], label=label) avg_loss = layers.mean(loss) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer.minimize(avg_loss, startup_prog) train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=200) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = Executor(place) exec_strategy = fluid.ExecutionStrategy() exec_strategy.use_cuda = use_cuda build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = use_mem_opt train_cp = compiler.CompiledProgram(fluid.default_main_program()) train_cp = train_cp.with_data_parallel( loss_name=avg_loss.name, exec_strategy=exec_strategy, build_strategy=build_strategy) fetch_list = [avg_loss.name] exe.run(startup_prog) PASS_NUM = 100 loop = 0 ret = [] for pass_id in range(PASS_NUM): for data in train_reader(): x_data = np.array([x[0] for x in data]).astype("float32") y_data = np.array([x[1] for x in data]).astype("int64") y_data = y_data.reshape((y_data.shape[0], 1)) outs = exe.run(train_cp, feed={'x': x_data, 'y': y_data}, fetch_list=[avg_loss]) loop += 1 ret.append(outs[0]) if iter_num == loop: return ret return ret