def create_rnn_op(self): x = layers.data(shape=[self.sent_len, self.batch_size, self.input_dim], dtype='float32', name='x', append_batch_size=False, **self.p_info) x.stop_gradient = False h_boot = layers.data(shape=[self.input_dim], dtype='float32', name='h_boot', **self.p_info) h_boot.stop_gradient = False rnn = layers.StaticRNN(main_program=self.main_program) with rnn.step(): h_pre = rnn.memory(init=h_boot) x_t = rnn.step_input(x) h = layers.scale(x=layers.elementwise_add(x=h_pre, y=x_t, **self.p_info), scale=self.py_rnn.scale, **self.p_info) rnn.update_memory(h_pre, h) rnn.output(h) return rnn()
def test_grad(self): place = core.CPUPlace() program = Program() x = layers.data( name='x', shape=[1], dtype='float32', main_program=program, stop_gradient=False) y = layers.data( name='y', shape=[1], dtype='bool', main_program=program, stop_gradient=False) level = 0 out_true, out_false = layers.split_lod_tensor( input=x, mask=y, level=level, main_program=program) out = layers.merge_lod_tensor( in_true=out_true, in_false=out_false, mask=y, x=x, level=level, main_program=program) mean = layers.mean(x=out, main_program=program) append_backward_ops(mean) tensor = core.LoDTensor() tensor.set(np.arange(10).reshape(10, 1).astype('float32'), place) tensor.set_lod([[0, 3, 9, 10]]) mask_np = np.array([0, 1, 0]).astype('bool') mask_np = np.expand_dims(mask_np, axis=1) mask = core.LoDTensor() mask.set(mask_np, place) exe = Executor(place) scope = core.Scope() g_vars = program.global_block().var(x.name + "@GRAD") g_out = [ item.sum() for item in map(np.array, exe.run(program, feed={'x': tensor, 'y': mask}, fetch_list=[g_vars], scope=scope, return_numpy=False)) ] g_out_sum = np.array(g_out).sum() self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
def create_rnn_op(self): x = layers.data( shape=[self.sent_len, self.batch_size, self.input_dim], dtype='float32', name='x', append_batch_size=False, **self.p_info) x.stop_gradient = False h_boot = layers.data( shape=[self.input_dim], dtype='float32', name='h_boot', **self.p_info) h_boot.stop_gradient = False rnn = layers.StaticRNN(main_program=self.main_program) with rnn.step(): h_pre = rnn.memory(init=h_boot) x_t = rnn.step_input(x) h = layers.scale( x=layers.elementwise_add( x=h_pre, y=x_t, **self.p_info), scale=self.py_rnn.scale, **self.p_info) rnn.update_memory(h_pre, h) rnn.output(h) return rnn()
def create_rnn_op(self): x = layers.data(shape=[self.sent_len, self.batch_size, self.input_dim], dtype='float32', name='x', append_batch_size=False, **self.p_info) x.stop_gradient = False h_boot1 = layers.data(shape=[self.batch_size, self.input_dim], dtype='float32', name='h_boot1', append_batch_size=False, **self.p_info) h_boot1.stop_gradient = False h_boot2 = layers.data(shape=[self.batch_size, self.input_dim], dtype='float32', name='h_boot2', append_batch_size=False, **self.p_info) h_boot2.stop_gradient = False rnn = layers.StaticRNN(main_program=self.main_program) with rnn.step(): h_pre1 = rnn.memory(init=h_boot1) h_pre2 = rnn.memory(init=h_boot2) x_t = rnn.step_input(x) mem1 = layers.scale(x=h_pre1, scale=1.0, **self.p_info) mem2 = layers.scale(x=h_pre2, scale=1.0, **self.p_info) out = layers.sums(input=[mem1, x_t, mem2], **self.p_info) rnn.update_memory(h_pre1, mem1) rnn.update_memory(h_pre2, mem2) rnn.output(out) return rnn()
def test_recognize_digits_conv(self): program = Program() with program_guard(program, startup_program=Program()): images = layers.data(name='pixel', shape=[1, 28, 28], dtype='float32') label = layers.data(name='label', shape=[1], dtype='int32') conv_pool_1 = nets.simple_img_conv_pool(input=images, filter_size=5, num_filters=2, pool_size=2, pool_stride=2, act="relu") conv_pool_2 = nets.simple_img_conv_pool(input=conv_pool_1, filter_size=5, num_filters=4, pool_size=2, pool_stride=2, act="relu") predict = layers.fc(input=conv_pool_2, size=10, act="softmax") cost = layers.cross_entropy(input=predict, label=label) avg_cost = layers.mean(x=cost) program.append_backward(avg_cost) print(str(program))
def test_recognize_digits_conv(self): program = Program() with program_guard(program, startup_program=Program()): images = layers.data( name='pixel', shape=[1, 28, 28], dtype='float32') label = layers.data(name='label', shape=[1], dtype='int32') conv_pool_1 = nets.simple_img_conv_pool( input=images, filter_size=5, num_filters=2, pool_size=2, pool_stride=2, act="relu") conv_pool_2 = nets.simple_img_conv_pool( input=conv_pool_1, filter_size=5, num_filters=4, pool_size=2, pool_stride=2, act="relu") predict = layers.fc(input=conv_pool_2, size=10, act="softmax") cost = layers.cross_entropy(input=predict, label=label) avg_cost = layers.mean(x=cost) program.append_backward(avg_cost) print(str(program))
def test_sigmoid_cross_entropy(self): program = Program() with program_guard(program): dat = layers.data(name='data', shape=[10], dtype='float32') lbl = layers.data(name='label', shape=[10], dtype='float32') self.assertIsNotNone( layers.sigmoid_cross_entropy_with_logits(x=dat, label=lbl)) print(str(program))
def test_ifelse(self): kwargs = {'startup_program': Program(), 'main_program': Program()} image = layers.data(name='x', shape=[784], dtype='float32', **kwargs) label = layers.data(name='y', shape=[1], dtype='int64', **kwargs) limit = layers.fill_constant_batch_size_like( input=label, dtype='int64', shape=[1], value=5.0, **kwargs) cond = layers.less_than(x=label, y=limit, **kwargs) ie = layers.IfElse(cond, **kwargs) with ie.true_block(): true_image = ie.input(image) hidden = layers.fc(input=true_image, size=100, act='tanh', **kwargs) prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) ie.output(prob) with ie.false_block(): false_image = ie.input(image) hidden = layers.fc(input=false_image, size=200, act='tanh', **kwargs) prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) ie.output(prob) prob = ie() loss = layers.cross_entropy(input=prob[0], label=label, **kwargs) avg_loss = layers.mean(x=loss, **kwargs) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer.minimize(avg_loss, kwargs['startup_program']) train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), batch_size=200) place = core.CPUPlace() exe = Executor(place) exe.run(kwargs['startup_program']) PASS_NUM = 100 for pass_id in range(PASS_NUM): for data in train_reader(): x_data = np.array(map(lambda x: x[0], data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = y_data.reshape((y_data.shape[0], 1)) outs = exe.run(kwargs['main_program'], feed={'x': x_data, 'y': y_data}, fetch_list=[avg_loss]) print outs[0] if outs[0] < 1.0: return self.assertFalse(True)
def test_sigmoid_cross_entropy(self): program = Program() with program_guard(program): dat = layers.data(name='data', shape=[10], dtype='float32') lbl = layers.data(name='label', shape=[10], dtype='float32') self.assertIsNotNone( layers.sigmoid_cross_entropy_with_logits( x=dat, label=lbl)) print(str(program))
def get_usr_combined_features(): # FIXME(dzh) : old API integer_value(10) may has range check. # currently we don't have user configurated check. USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1 uid = layers.data(name='user_id', shape=[1], dtype='int64') usr_emb = layers.embedding( input=uid, dtype='float32', size=[USR_DICT_SIZE, 32], param_attr='user_table', is_sparse=IS_SPARSE) usr_fc = layers.fc(input=usr_emb, size=32) USR_GENDER_DICT_SIZE = 2 usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64') usr_gender_emb = layers.embedding( input=usr_gender_id, size=[USR_GENDER_DICT_SIZE, 16], param_attr='gender_table', is_sparse=IS_SPARSE) usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64") usr_age_emb = layers.embedding( input=usr_age_id, size=[USR_AGE_DICT_SIZE, 16], is_sparse=IS_SPARSE, param_attr='age_table') usr_age_fc = layers.fc(input=usr_age_emb, size=16) USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64") usr_job_emb = layers.embedding( input=usr_job_id, size=[USR_JOB_DICT_SIZE, 16], param_attr='job_table', is_sparse=IS_SPARSE) usr_job_fc = layers.fc(input=usr_job_emb, size=16) concat_embed = layers.concat( input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], axis=1) usr_combined_features = layers.fc(input=concat_embed, size=200, act="tanh") return usr_combined_features
def get_mov_combined_features(): MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 mov_id = layers.data(name='movie_id', shape=[1], dtype='int64') mov_emb = layers.embedding(input=mov_id, dtype='float32', size=[MOV_DICT_SIZE, 32], param_attr=smart_attr(MOV_DICT_SIZE, name='movie_table'), is_sparse=IS_SPARSE) mov_fc = layers.fc(input=mov_emb, size=32, param_attr=smart_attr(32)) CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) # category_id = layers.data(name='category_id', shape=[1], dtype='int64') # mov_categories_emb = layers.embedding( # input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE, # param_attr=smart_attr(CATEGORY_DICT_SIZE)) # mov_categories_hidden = layers.sequence_pool( # input=mov_categories_emb, pool_type="sum") MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) mov_title_id = layers.data(name='movie_title', shape=[1], dtype='int64') mov_title_emb = layers.embedding( input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE, param_attr=smart_attr(MOV_TITLE_DICT_SIZE)) mov_title_conv = nets.sequence_conv_pool(input=mov_title_emb, num_filters=32, filter_size=3, act="tanh", pool_type="sum") concat_embed = layers.concat( # input=[mov_fc, mov_categories_hidden, mov_title_conv], axis=1) input=[mov_fc, mov_title_conv], axis=1) # FIXME(dzh) : need tanh operator mov_combined_features = layers.fc(input=concat_embed, size=200, act="tanh", param_attr=smart_attr(200)) return mov_combined_features
def get_usr_combined_features(): # FIXME(dzh) : old API integer_value(10) may has range check. # currently we don't have user configurated check. USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1 uid = layers.data(name='user_id', shape=[1], dtype='int64') usr_emb = layers.embedding(input=uid, dtype='float32', size=[USR_DICT_SIZE, 32], param_attr='user_table', is_sparse=IS_SPARSE) usr_fc = layers.fc(input=usr_emb, size=32) USR_GENDER_DICT_SIZE = 2 usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64') usr_gender_emb = layers.embedding(input=usr_gender_id, size=[USR_GENDER_DICT_SIZE, 16], param_attr='gender_table', is_sparse=IS_SPARSE) usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table) usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64") usr_age_emb = layers.embedding(input=usr_age_id, size=[USR_AGE_DICT_SIZE, 16], is_sparse=IS_SPARSE, param_attr='age_table') usr_age_fc = layers.fc(input=usr_age_emb, size=16) USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1 usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64") usr_job_emb = layers.embedding(input=usr_job_id, size=[USR_JOB_DICT_SIZE, 16], param_attr='job_table', is_sparse=IS_SPARSE) usr_job_fc = layers.fc(input=usr_job_emb, size=16) concat_embed = layers.concat( input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], axis=1) usr_combined_features = layers.fc(input=concat_embed, size=200, act="tanh") return usr_combined_features
def test_fit_a_line(self): program = Program() with program_guard(program, startup_program=Program()): x = layers.data(name='x', shape=[13], dtype='float32') y_predict = layers.fc(input=x, size=1, act=None) y = layers.data(name='y', shape=[1], dtype='float32') cost = layers.square_error_cost(input=y_predict, label=y) avg_cost = layers.mean(x=cost) self.assertIsNotNone(avg_cost) program.append_backward(avg_cost) print(str(program))
def test_simple_forward(self): d0 = layers.data( "d0", shape=[10], append_batch_size=False, dtype='float32') d1 = layers.data( "d1", shape=[10], append_batch_size=False, dtype='float32') d2 = layers.data( "d2", shape=[10], append_batch_size=False, dtype='float32') i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True init = layers.zeros(shape=[10], dtype='float32') mem_array = layers.array_write(x=init, i=i) data_array = layers.array_write(x=d0, i=i) i = layers.increment(i) layers.array_write(d1, i, array=data_array) i = layers.increment(i) layers.array_write(d2, i, array=data_array) i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = True array_len = layers.fill_constant(shape=[1], dtype='int64', value=3) array_len.stop_gradient = True cond = layers.less_than(x=i, y=array_len) while_op = layers.While(cond=cond) with while_op.block(): d = layers.array_read(array=data_array, i=i) prev = layers.array_read(array=mem_array, i=i) result = layers.sums(input=[d, prev]) i = layers.increment(x=i, in_place=True) layers.array_write(result, i=i, array=mem_array) layers.less_than(x=i, y=array_len, cond=cond) sum_result = layers.array_read(array=mem_array, i=i) loss = layers.mean(x=sum_result) append_backward_ops(loss) cpu = core.CPUPlace() exe = Executor(cpu) d = [] for i in xrange(3): d.append(numpy.random.random(size=[10]).astype('float32')) outs = exe.run(feed={'d0': d[0], 'd1': d[1], 'd2': d[2]}, fetch_list=[sum_result]) self.assertAlmostEqual(numpy.sum(d), numpy.sum(outs[0]), delta=0.01)
def test_recognize_digits_mlp(self): program = Program() with program_guard(program, startup_program=Program()): # Change g_program, so the rest layers use `g_program` images = layers.data(name='pixel', shape=[784], dtype='float32') label = layers.data(name='label', shape=[1], dtype='int32') hidden1 = layers.fc(input=images, size=128, act='relu') hidden2 = layers.fc(input=hidden1, size=64, act='relu') predict = layers.fc(input=hidden2, size=10, act='softmax') cost = layers.cross_entropy(input=predict, label=label) avg_cost = layers.mean(x=cost) self.assertIsNotNone(avg_cost) print(str(program))
def main(self, tensor, mask, expect_true, expect_false, expect_out, level=0): place = self.place() program = Program() x = layers.data(name='x', shape=[1], main_program=program) x.persistable = True y = layers.data(name='y', shape=[1], main_program=program) y.persistable = True out_true, out_false = layers.split_lod_tensor(input=x, mask=y, level=level, main_program=program) out_true.persistable = True out_false.persistable = True out = layers.merge_lod_tensor(in_true=out_true, in_false=out_false, mask=y, x=x, level=level, main_program=program) out.persistable = True exe = Executor(place) scope = core.Scope() exe.run(program, feed={ 'x': tensor, 'y': mask }, scope=scope, return_numpy=False) var_true = scope.find_var(out_true.name).get_tensor() var_false = scope.find_var(out_false.name).get_tensor() var_out = scope.find_var(out.name).get_tensor() self.check_tensor_same(var_true, expect_true) self.check_tensor_same(var_false, expect_false) self.check_tensor_same(var_out, expect_out)
def main(self, tensor, expect_array, expect_lod, expect_max_len, level=0): place = self.place() program = Program() x = layers.data(name='x', shape=[10], main_program=program) x.persistable = True table = layers.lod_rank_table(x, level=level, main_program=program) max_len = layers.max_sequence_len(table, main_program=program) max_len.persistable = True array = layers.lod_tensor_to_array(x, table, main_program=program) array.persistable = True result = layers.array_to_lod_tensor(array, table, main_program=program) result.persistable = True exe = Executor(place) scope = core.Scope() exe.run(program, feed={'x': tensor}, scope=scope) var = scope.find_var(array.name) array = var.get_lod_tensor_array() if expect_array is not None and expect_lod is not None: self.check_array_same(array, expect_array, expect_lod) self.check_tensor_same( scope.find_var(result.name).get_tensor(), tensor) self.assertEqual( numpy.array(scope.find_var(max_len.name).get_tensor())[0], expect_max_len)
def main(self, tensor, expect_array, expect_lod, expect_max_len, level=0): place = self.place() program = Program() x = layers.data(name='x', shape=[10], main_program=program) x.persistable = True table = layers.lod_rank_table(x, level=level, main_program=program) max_len = layers.max_sequence_len(table, main_program=program) max_len.persistable = True array = layers.lod_tensor_to_array(x, table, main_program=program) array.persistable = True result = layers.array_to_lod_tensor(array, table, main_program=program) result.persistable = True exe = Executor(place) scope = core.Scope() exe.run(program, feed={'x': tensor}, scope=scope) var = scope.find_var(array.name) array = var.get_lod_tensor_array() if expect_array is not None and expect_lod is not None: self.check_array_same(array, expect_array, expect_lod) self.check_tensor_same(scope.find_var(result.name).get_tensor(), tensor) self.assertEqual( numpy.array(scope.find_var(max_len.name).get_tensor())[0], expect_max_len)
def test_simple_conv2d(self): program = Program() with program_guard(program, startup_program=Program()): images = layers.data(name='pixel', shape=[3, 48, 48], dtype='int32') layers.conv2d(input=images, num_filters=3, filter_size=[4, 4]) print(str(program))
def test_grad(self): place = core.CPUPlace() program = Program() x = layers.data( name='x', shape=[1], dtype='float32', main_program=program, stop_gradient=False) table = layers.lod_rank_table(x, level=0, main_program=program) array = layers.lod_tensor_to_array(x, table, main_program=program) result = layers.array_to_lod_tensor(array, table, main_program=program) mean = layers.mean(x=result, main_program=program) append_backward_ops(mean) tensor = core.LoDTensor() tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place) tensor.set_lod([[0, 3, 9, 10]]) g_vars = program.global_block().var(x.name + "@GRAD") exe = Executor(place) g_out = [ numpy.array(item).sum() for item in exe.run(program, feed={'x': tensor}, fetch_list=[g_vars], return_numpy=False) ] g_out_sum = numpy.array(g_out).sum() self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
def test_grad(self): place = core.CPUPlace() program = Program() x = layers.data(name='x', shape=[1], dtype='float32', main_program=program, stop_gradient=False) table = layers.lod_rank_table(x, level=0, main_program=program) array = layers.lod_tensor_to_array(x, table, main_program=program) result = layers.array_to_lod_tensor(array, table, main_program=program) mean = layers.mean(x=result, main_program=program) append_backward_ops(mean) tensor = core.LoDTensor() tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place) tensor.set_lod([[0, 3, 9, 10]]) g_vars = program.global_block().var(x.name + "@GRAD") exe = Executor(place) g_out = [ numpy.array(item).sum() for item in exe.run(program, feed={'x': tensor}, fetch_list=[g_vars], return_numpy=False) ] g_out_sum = numpy.array(g_out).sum() self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
def test_forward(self): data = layers.data(name='X', shape=[1], dtype='float32') data.stop_gradient = False cond = layers.ConditionalBlock(inputs=[data]) out = layers.create_tensor(dtype='float32') with cond.block(): hidden = layers.fc(input=data, size=10) layers.assign(hidden, out) cpu = core.CPUPlace() exe = Executor(cpu) exe.run(default_startup_program()) x = numpy.random.random(size=(10, 1)).astype('float32') outs = exe.run(feed={'X': x}, fetch_list=[out])[0] print outs loss = layers.mean(x=out) append_backward_ops(loss=loss) outs = exe.run(feed={'X': x}, fetch_list=[ default_main_program().block(0).var(data.name + "@GRAD") ])[0] print outs
def test_forward(self): data = layers.data(name='X', shape=[1], dtype='float32') data.stop_gradient = False cond = layers.ConditionalBlock(inputs=[data]) out = layers.create_tensor(dtype='float32') with cond.block(): hidden = layers.fc(input=data, size=10) layers.assign(hidden, out) cpu = core.CPUPlace() exe = Executor(cpu) exe.run(default_startup_program()) x = numpy.random.random(size=(10, 1)).astype('float32') outs = exe.run(feed={'X': x}, fetch_list=[out])[0] print outs loss = layers.mean(x=out) append_backward_ops(loss=loss) outs = exe.run( feed={'X': x}, fetch_list=[ default_main_program().block(0).var(data.name + "@GRAD") ])[0] print outs
def test_mul(self): a = data(name='a', shape=[784], dtype='float32') b = data( name='b', shape=[784, 100], dtype='float32', append_batch_size=False) out = mul(x=a, y=b) place = core.CPUPlace() a_np = numpy.random.random((100, 784)).astype('float32') b_np = numpy.random.random((784, 100)).astype('float32') exe = Executor(place) outs = exe.run(feed={'a': a_np, 'b': b_np}, fetch_list=[out]) out = outs[0] self.assertEqual((100, 100), out.shape) self.assertTrue(numpy.allclose(out, numpy.dot(a_np, b_np)))
def test_shrink_rnn_memory(self): x = layers.data('x', shape=[100], dtype='float32') x.stop_gradient = False table = layers.lod_rank_table(x=x) i = layers.zeros(dtype='int64', shape=[1]) mem1 = layers.shrink_memory(x=x, i=i, table=table) i = layers.increment(x=i) i.stop_gradient = True mem2 = layers.shrink_memory(x=mem1, i=i, table=table) i = layers.increment(x=i) i.stop_gradient = True mem3 = layers.shrink_memory(x=mem2, i=i, table=table) cpu = core.CPUPlace() tensor = core.LoDTensor() tensor.set_lod([[0, 2, 5, 6]]) tensor_np = numpy.random.random(size=(3, 100)).astype('float32') tensor.set(tensor_np, cpu) exe = Executor(cpu) outs = exe.run(feed={'x': tensor}, fetch_list=[mem1, mem2, mem3]) self.assertTrue(numpy.allclose(tensor_np[0:3], outs[0])) self.assertTrue(numpy.allclose(tensor_np[0:2], outs[1])) self.assertTrue(numpy.allclose(tensor_np[0:1], outs[2])) mem3_mean = layers.mean(x=mem3) append_backward_ops(loss=mem3_mean) x_grad = exe.run( feed={'x': tensor}, fetch_list=[main_program.global_block().var('x@GRAD')])[0] self.assertAlmostEqual(1.0, x_grad.sum(), delta=0.1)
def test_word_embedding(self): program = Program() with program_guard(program, startup_program=Program()): dict_size = 10000 embed_size = 32 first_word = layers.data(name='firstw', shape=[1], dtype='int64') second_word = layers.data(name='secondw', shape=[1], dtype='int64') third_word = layers.data(name='thirdw', shape=[1], dtype='int64') forth_word = layers.data(name='forthw', shape=[1], dtype='int64') next_word = layers.data(name='nextw', shape=[1], dtype='int64') embed_first = layers.embedding( input=first_word, size=[dict_size, embed_size], dtype='float32', param_attr='shared_w') embed_second = layers.embedding( input=second_word, size=[dict_size, embed_size], dtype='float32', param_attr='shared_w') embed_third = layers.embedding( input=third_word, size=[dict_size, embed_size], dtype='float32', param_attr='shared_w') embed_forth = layers.embedding( input=forth_word, size=[dict_size, embed_size], dtype='float32', param_attr='shared_w') concat_embed = layers.concat( input=[embed_first, embed_second, embed_third, embed_forth], axis=1) hidden1 = layers.fc(input=concat_embed, size=256, act='sigmoid') predict_word = layers.fc(input=hidden1, size=dict_size, act='softmax') cost = layers.cross_entropy(input=predict_word, label=next_word) avg_cost = layers.mean(x=cost) self.assertIsNotNone(avg_cost) print(str(program))
def get_mov_combined_features(): MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1 mov_id = layers.data(name='movie_id', shape=[1], dtype='int64') mov_emb = layers.embedding( input=mov_id, dtype='float32', size=[MOV_DICT_SIZE, 32], param_attr='movie_table', is_sparse=IS_SPARSE) mov_fc = layers.fc(input=mov_emb, size=32) CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories()) category_id = layers.data(name='category_id', shape=[1], dtype='int64') mov_categories_emb = layers.embedding( input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_categories_hidden = layers.sequence_pool( input=mov_categories_emb, pool_type="sum") MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict()) mov_title_id = layers.data(name='movie_title', shape=[1], dtype='int64') mov_title_emb = layers.embedding( input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE) mov_title_conv = nets.sequence_conv_pool( input=mov_title_emb, num_filters=32, filter_size=3, act="tanh", pool_type="sum") concat_embed = layers.concat( input=[mov_fc, mov_categories_hidden, mov_title_conv], axis=1) # FIXME(dzh) : need tanh operator mov_combined_features = layers.fc(input=concat_embed, size=200, act="tanh") return mov_combined_features
def main(self, tensor, mask, expect_true, expect_false, expect_out, level=0): place = self.place() program = Program() x = layers.data(name='x', shape=[1], main_program=program) x.persistable = True y = layers.data(name='y', shape=[1], main_program=program) y.persistable = True out_true, out_false = layers.split_lod_tensor( input=x, mask=y, level=level, main_program=program) out_true.persistable = True out_false.persistable = True out = layers.merge_lod_tensor( in_true=out_true, in_false=out_false, mask=y, x=x, level=level, main_program=program) out.persistable = True exe = Executor(place) scope = core.Scope() exe.run(program, feed={'x': tensor, 'y': mask}, scope=scope, return_numpy=False) var_true = scope.find_var(out_true.name).get_tensor() var_false = scope.find_var(out_false.name).get_tensor() var_out = scope.find_var(out.name).get_tensor() self.check_tensor_same(var_true, expect_true) self.check_tensor_same(var_false, expect_false) self.check_tensor_same(var_out, expect_out)
def test_word_embedding(self): program = Program() with program_guard(program, startup_program=Program()): dict_size = 10000 embed_size = 32 first_word = layers.data(name='firstw', shape=[1], dtype='int64') second_word = layers.data(name='secondw', shape=[1], dtype='int64') third_word = layers.data(name='thirdw', shape=[1], dtype='int64') forth_word = layers.data(name='forthw', shape=[1], dtype='int64') next_word = layers.data(name='nextw', shape=[1], dtype='int64') embed_first = layers.embedding(input=first_word, size=[dict_size, embed_size], dtype='float32', param_attr='shared_w') embed_second = layers.embedding(input=second_word, size=[dict_size, embed_size], dtype='float32', param_attr='shared_w') embed_third = layers.embedding(input=third_word, size=[dict_size, embed_size], dtype='float32', param_attr='shared_w') embed_forth = layers.embedding(input=forth_word, size=[dict_size, embed_size], dtype='float32', param_attr='shared_w') concat_embed = layers.concat( input=[embed_first, embed_second, embed_third, embed_forth], axis=1) hidden1 = layers.fc(input=concat_embed, size=256, act='sigmoid') predict_word = layers.fc(input=hidden1, size=dict_size, act='softmax') cost = layers.cross_entropy(input=predict_word, label=next_word) avg_cost = layers.mean(x=cost) self.assertIsNotNone(avg_cost) print(str(program))
def test_linear_chain_crf(self): program = Program() with program_guard(program, startup_program=Program()): label_dict_len = 10 images = layers.data(name='pixel', shape=[784], dtype='float32') label = layers.data(name='label', shape=[1], dtype='int32') hidden = layers.fc(input=images, size=128) crf = layers.linear_chain_crf( input=hidden, label=label, param_attr=ParamAttr(name="crfw")) crf_decode = layers.crf_decoding( input=hidden, param_attr=ParamAttr(name="crfw")) layers.chunk_eval( input=crf_decode, label=label, chunk_scheme="IOB", num_chunk_types=(label_dict_len - 1) / 2) self.assertNotEqual(crf, None) self.assertNotEqual(crf_decode, None) print(str(program))
def test_linear_chain_crf(self): program = Program() with program_guard(program, startup_program=Program()): label_dict_len = 10 images = layers.data(name='pixel', shape=[784], dtype='float32') label = layers.data(name='label', shape=[1], dtype='int32') hidden = layers.fc(input=images, size=128) crf = layers.linear_chain_crf(input=hidden, label=label, param_attr=ParamAttr(name="crfw")) crf_decode = layers.crf_decoding(input=hidden, param_attr=ParamAttr(name="crfw")) layers.chunk_eval(input=crf_decode, label=label, chunk_scheme="IOB", num_chunk_types=(label_dict_len - 1) / 2) self.assertNotEqual(crf, None) self.assertNotEqual(crf_decode, None) print(str(program))
def create_rnn_op(self): x = layers.data( shape=[self.sent_len, self.batch_size, self.input_dim], dtype='float32', name='x', append_batch_size=False, **self.p_info) x.stop_gradient = False h_boot = layers.data( shape=[self.input_dim], dtype='float32', name='h_boot', **self.p_info) h_boot.stop_gradient = False rnn = layers.StaticRNN(main_program=self.main_program) with rnn.step(): h_pre = rnn.memory(init=h_boot) x_t = rnn.step_input(x) temp_l = layers.fc(input=x_t, size=self.input_dim, param_attr='W', bias_attr=False, **self.p_info) temp_r = layers.fc(input=h_pre, size=self.input_dim, param_attr='U', bias_attr=False, **self.p_info) h = layers.sigmoid( x=layers.elementwise_add( x=temp_l, y=temp_r, **self.p_info), **self.p_info) rnn.update_memory(h_pre, h) rnn.output(h) return rnn()
def test_program_clone_with_parameter(self): main_program = Program() startup_program = Program() kwargs = { 'main_program': main_program, 'startup_program': startup_program } d = layers.data(name='x', shape=[784], dtype='float32', **kwargs) hidden = layers.fc(input=d, size=100, **kwargs) layers.fc(input=hidden, size=100, **kwargs) new_program = main_program.clone() self.assertNotEqual(0, len(new_program.blocks[0].all_parameters()))
def encoder_decoder(): # encoder src_word_id = layers.data(name="src_word_id", shape=[1], dtype='int64', lod_level=1) src_embedding = layers.embedding(input=src_word_id, size=[dict_size, word_dim], dtype='float32', is_sparse=IS_SPARSE, param_attr=fluid.ParamAttr(name='vemb')) fc1 = fluid.layers.fc(input=src_embedding, size=hidden_dim * 4, act='tanh') lstm_hidden0, lstm_0 = layers.dynamic_lstm(input=fc1, size=hidden_dim * 4) encoder_out = layers.sequence_pool(input=lstm_hidden0, pool_type="last") # decoder trg_language_word = layers.data(name="target_language_word", shape=[1], dtype='int64', lod_level=1) trg_embedding = layers.embedding(input=trg_language_word, size=[dict_size, word_dim], dtype='float32', is_sparse=IS_SPARSE, param_attr=fluid.ParamAttr(name='vemb')) rnn = fluid.layers.DynamicRNN() with rnn.block(): current_word = rnn.step_input(trg_embedding) mem = rnn.memory(init=encoder_out) fc1 = fluid.layers.fc(input=[current_word, mem], size=decoder_size, act='tanh') out = fluid.layers.fc(input=fc1, size=target_dict_dim, act='softmax') rnn.update_memory(mem, fc1) rnn.output(out) return rnn()
def create_rnn_op(self): x = layers.data(shape=[self.sent_len, self.batch_size, self.input_dim], dtype='float32', name='x', append_batch_size=False, **self.p_info) x.stop_gradient = False h_boot = layers.data(shape=[self.input_dim], dtype='float32', name='h_boot', **self.p_info) h_boot.stop_gradient = False rnn = layers.StaticRNN(main_program=self.main_program) with rnn.step(): h_pre = rnn.memory(init=h_boot) x_t = rnn.step_input(x) temp_l = layers.fc(input=x_t, size=self.input_dim, param_attr='W', bias_attr=False, **self.p_info) temp_r = layers.fc(input=h_pre, size=self.input_dim, param_attr='U', bias_attr=False, **self.p_info) h = layers.sigmoid(x=layers.elementwise_add(x=temp_l, y=temp_r, **self.p_info), **self.p_info) rnn.update_memory(h_pre, h) rnn.output(h) return rnn()
def create_rnn_op(self): x = layers.data( shape=[self.sent_len, self.batch_size, self.input_dim], dtype='float32', name='x', append_batch_size=False, **self.p_info) x.stop_gradient = False h_boot1 = layers.data( shape=[self.batch_size, self.input_dim], dtype='float32', name='h_boot1', append_batch_size=False, **self.p_info) h_boot1.stop_gradient = False h_boot2 = layers.data( shape=[self.batch_size, self.input_dim], dtype='float32', name='h_boot2', append_batch_size=False, **self.p_info) h_boot2.stop_gradient = False rnn = layers.StaticRNN(main_program=self.main_program) with rnn.step(): h_pre1 = rnn.memory(init=h_boot1) h_pre2 = rnn.memory(init=h_boot2) x_t = rnn.step_input(x) mem1 = layers.scale(x=h_pre1, scale=1.0, **self.p_info) mem2 = layers.scale(x=h_pre2, scale=1.0, **self.p_info) out = layers.sums(input=[mem1, x_t, mem2], **self.p_info) rnn.update_memory(h_pre1, mem1) rnn.update_memory(h_pre2, mem2) rnn.output(out) return rnn()
def model(): usr_combined_features = get_usr_combined_features() mov_combined_features = get_mov_combined_features() # need cos sim inference = layers.cos_sim(X=usr_combined_features, Y=mov_combined_features) label = layers.data(name='score', shape=[1], dtype='float32') square_cost = layers.square_error_cost(input=inference, label=label) avg_cost = layers.mean(x=square_cost) return avg_cost
def encoder_decoder(): # encoder src_word_id = layers.data( name="src_word_id", shape=[1], dtype='int64', lod_level=1) src_embedding = layers.embedding( input=src_word_id, size=[dict_size, word_dim], dtype='float32', is_sparse=IS_SPARSE, param_attr=fluid.ParamAttr(name='vemb')) fc1 = fluid.layers.fc(input=src_embedding, size=hidden_dim * 4, act='tanh') lstm_hidden0, lstm_0 = layers.dynamic_lstm(input=fc1, size=hidden_dim * 4) encoder_out = layers.sequence_pool(input=lstm_hidden0, pool_type="last") # decoder trg_language_word = layers.data( name="target_language_word", shape=[1], dtype='int64', lod_level=1) trg_embedding = layers.embedding( input=trg_language_word, size=[dict_size, word_dim], dtype='float32', is_sparse=IS_SPARSE, param_attr=fluid.ParamAttr(name='vemb')) rnn = fluid.layers.DynamicRNN() with rnn.block(): current_word = rnn.step_input(trg_embedding) mem = rnn.memory(init=encoder_out) fc1 = fluid.layers.fc(input=[current_word, mem], size=decoder_size, act='tanh') out = fluid.layers.fc(input=fc1, size=target_dict_dim, act='softmax') rnn.update_memory(mem, fc1) rnn.output(out) return rnn()
def test_lod_rank_table(self): x = data(name='x', shape=[100]) cpu = core.CPUPlace() rank_table = lod_rank_table(x=x, level=1) rank_table.persistable = True exe = Executor(cpu) scope = core.Scope() tensor = core.LoDTensor() tensor.set(numpy.random.random(size=(17, 100)), cpu) tensor.set_lod([[0, 1, 3], [0, 5, 6, 7], [0, 3, 4, 9, 10, 13, 16, 17]]) exe.run(scope=scope, feed={'x': tensor}) var = scope.find_var(rank_table.name) table = var.get_lod_rank_table() self.assertEqual([(0, 5), (1, 1), (2, 1)], table.items())
def test_nvprof(self): if not fluid.core.is_compile_gpu(): return epoc = 8 dshape = [4, 3, 28, 28] data = layers.data(name='data', shape=[3, 28, 28], dtype='float32') conv = layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1]) place = fluid.GPUPlace(0) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof: for i in range(epoc): input = np.random.random(dshape).astype('float32') exe.run(fluid.default_main_program(), feed={'data': input})
def create_rnn_op(self): x = layers.data(shape=[self.sent_len, self.batch_size, self.input_dim], dtype='float32', name='x', append_batch_size=False, **self.p_info) x.stop_gradient = False rnn = layers.StaticRNN(main_program=self.main_program) with rnn.step(): mem_pre = rnn.memory(shape=[-1, self.input_dim], batch_ref=x) x_t = rnn.step_input(x) mem = layers.elementwise_add(x=mem_pre, y=x_t, **self.p_info) rnn.update_memory(mem_pre, mem) rnn.output(mem) return rnn()
def create_rnn_op(self): x = layers.data( shape=[self.sent_len, self.batch_size, self.input_dim], dtype='float32', name='x', append_batch_size=False, **self.p_info) x.stop_gradient = False rnn = layers.StaticRNN(main_program=self.main_program) with rnn.step(): mem_pre = rnn.memory(shape=[-1, self.input_dim], batch_ref=x) x_t = rnn.step_input(x) mem = layers.elementwise_add(x=mem_pre, y=x_t, **self.p_info) rnn.update_memory(mem_pre, mem) rnn.output(mem) return rnn()
def main(): rnn_out = encoder_decoder() label = layers.data(name="target_language_next_word", shape=[1], dtype='int64', lod_level=1) cost = layers.cross_entropy(input=rnn_out, label=label) avg_cost = fluid.layers.mean(x=cost) optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4) optimizer.minimize(avg_cost) train_data = paddle.batch(paddle.reader.shuffle( paddle.dataset.wmt14.train(dict_size), buf_size=1000), batch_size=batch_size) place = core.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) batch_id = 0 for pass_id in xrange(2): for data in train_data(): word_data = to_lodtensor(map(lambda x: x[0], data), place) trg_word = to_lodtensor(map(lambda x: x[1], data), place) trg_word_next = to_lodtensor(map(lambda x: x[2], data), place) outs = exe.run(framework.default_main_program(), feed={ 'src_word_id': word_data, 'target_language_word': trg_word, 'target_language_next_word': trg_word_next }, fetch_list=[avg_cost]) avg_cost_val = np.array(outs[0]) print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) + " avg_cost=" + str(avg_cost_val)) if batch_id > 3: exit(0) batch_id += 1
def main(): rnn_out = encoder_decoder() label = layers.data( name="target_language_next_word", shape=[1], dtype='int64', lod_level=1) cost = layers.cross_entropy(input=rnn_out, label=label) avg_cost = fluid.layers.mean(x=cost) optimizer = fluid.optimizer.Adagrad(learning_rate=1e-4) optimizer.minimize(avg_cost) train_data = paddle.batch( paddle.reader.shuffle( paddle.dataset.wmt14.train(dict_size), buf_size=1000), batch_size=batch_size) place = core.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) batch_id = 0 for pass_id in xrange(2): for data in train_data(): word_data = to_lodtensor(map(lambda x: x[0], data), place) trg_word = to_lodtensor(map(lambda x: x[1], data), place) trg_word_next = to_lodtensor(map(lambda x: x[2], data), place) outs = exe.run(framework.default_main_program(), feed={ 'src_word_id': word_data, 'target_language_word': trg_word, 'target_language_next_word': trg_word_next }, fetch_list=[avg_cost]) avg_cost_val = np.array(outs[0]) print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) + " avg_cost=" + str(avg_cost_val)) if batch_id > 3: exit(0) batch_id += 1
def test_fit_line_inference_model(self): MODEL_DIR = "./tmp/inference_model" init_program = Program() program = Program() x = layers.data( name='x', shape=[2], dtype='float32', main_program=program, startup_program=init_program) y = layers.data( name='y', shape=[1], dtype='float32', main_program=program, startup_program=init_program) y_predict = layers.fc(input=x, size=1, act=None, main_program=program, startup_program=init_program) cost = layers.square_error_cost( input=y_predict, label=y, main_program=program, startup_program=init_program) avg_cost = layers.mean( x=cost, main_program=program, startup_program=init_program) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) sgd_optimizer.minimize(avg_cost, init_program) place = core.CPUPlace() exe = executor.Executor(place) exe.run(init_program, feed={}, fetch_list=[]) for i in xrange(100): tensor_x = np.array( [[1, 1], [1, 2], [3, 4], [5, 2]]).astype("float32") tensor_y = np.array([[-2], [-3], [-7], [-7]]).astype("float32") exe.run(program, feed={'x': tensor_x, 'y': tensor_y}, fetch_list=[avg_cost]) save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe, program) expected = exe.run(program, feed={'x': tensor_x, 'y': tensor_y}, fetch_list=[avg_cost])[0] reload(executor) # reload to build a new scope exe = executor.Executor(place) [infer_prog, feed_var_names, fetch_vars] = load_inference_model( MODEL_DIR, exe) outs = exe.run( infer_prog, feed={feed_var_names[0]: tensor_x, feed_var_names[1]: tensor_y}, fetch_list=fetch_vars) actual = outs[0] self.assertEqual(feed_var_names, ["x", "y"]) self.assertEqual(len(fetch_vars), 1) self.assertEqual(str(fetch_vars[0]), str(avg_cost)) self.assertEqual(expected, actual)
def test_read_write(self): x = [ layers.data(name='x0', shape=[100]), layers.data(name='x1', shape=[100]), layers.data(name='x2', shape=[100]) ] for each_x in x: each_x.stop_gradient = False i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = False arr = layers.array_write(x=x[0], i=i) i = layers.increment(x=i) arr = layers.array_write(x=x[1], i=i, array=arr) i = layers.increment(x=i) arr = layers.array_write(x=x[2], i=i, array=arr) i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = False a0 = layers.array_read(array=arr, i=i) i = layers.increment(x=i) a1 = layers.array_read(array=arr, i=i) i = layers.increment(x=i) a2 = layers.array_read(array=arr, i=i) mean_a0 = layers.mean(x=a0) mean_a1 = layers.mean(x=a1) mean_a2 = layers.mean(x=a2) a_sum = layers.sums(input=[mean_a0, mean_a1, mean_a2]) mean_x0 = layers.mean(x=x[0]) mean_x1 = layers.mean(x=x[1]) mean_x2 = layers.mean(x=x[2]) x_sum = layers.sums(input=[mean_x0, mean_x1, mean_x2]) scope = core.Scope() cpu = core.CPUPlace() exe = Executor(cpu) tensor = numpy.random.random(size=(100, 100)).astype('float32') outs = exe.run(feed={ 'x0': tensor, 'x1': tensor, 'x2': tensor }, fetch_list=[a_sum, x_sum], scope=scope) self.assertEqual(outs[0], outs[1]) total_sum = layers.sums(input=[a_sum, x_sum]) total_sum_scaled = layers.scale(x=total_sum, scale=1 / 6.0) append_backward_ops(total_sum_scaled) g_vars = map(default_main_program().global_block().var, [each_x.name + "@GRAD" for each_x in x]) g_out = [ item.sum() for item in exe.run(feed={ 'x0': tensor, 'x1': tensor, 'x2': tensor }, fetch_list=g_vars) ] g_out_sum = numpy.array(g_out).sum() # since our final gradient is 1 and the neural network are all linear # with mean_op. # the input gradient should also be 1 self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
def test_raw_api(self): kwargs = {'startup_program': Program(), 'main_program': Program()} image = layers.data(name='x', shape=[784], dtype='float32', **kwargs) label = layers.data(name='y', shape=[1], dtype='int64', **kwargs) limit = layers.fill_constant_batch_size_like(input=label, dtype='int64', shape=[1], value=5.0, **kwargs) cond = layers.less_than(x=label, y=limit, **kwargs) true_image, false_image = layers.split_lod_tensor(input=image, mask=cond, **kwargs) true_out = layers.create_tensor(dtype='float32', **kwargs) true_cond = layers.ConditionalBlock([true_image], **kwargs) with true_cond.block(): hidden = layers.fc(input=true_image, size=100, act='tanh', **kwargs) prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) layers.assign(input=prob, output=true_out, **kwargs) false_out = layers.create_tensor(dtype='float32', **kwargs) false_cond = layers.ConditionalBlock([false_image], **kwargs) with false_cond.block(): hidden = layers.fc(input=false_image, size=200, act='tanh', **kwargs) prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) layers.assign(input=prob, output=false_out, **kwargs) prob = layers.merge_lod_tensor(in_true=true_out, in_false=false_out, mask=cond, x=image, **kwargs) loss = layers.cross_entropy(input=prob, label=label, **kwargs) avg_loss = layers.mean(x=loss, **kwargs) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer.minimize(avg_loss, kwargs['startup_program']) train_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), batch_size=200) place = core.CPUPlace() exe = Executor(place) exe.run(kwargs['startup_program']) PASS_NUM = 100 for pass_id in range(PASS_NUM): for data in train_reader(): x_data = np.array(map(lambda x: x[0], data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.expand_dims(y_data, axis=1) outs = exe.run(kwargs['main_program'], feed={ 'x': x_data, 'y': y_data }, fetch_list=[avg_loss]) print outs[0] if outs[0] < 1.0: return self.assertFalse(True)
def test_conv2d_transpose(self): program = Program() with program_guard(program): img = layers.data(name='pixel', shape=[3, 2, 2], dtype='float32') layers.conv2d_transpose(input=img, num_filters=10, output_size=28) print(str(program))
def test_read_write(self): x = [ layers.data( name='x0', shape=[100]), layers.data( name='x1', shape=[100]), layers.data( name='x2', shape=[100]) ] for each_x in x: each_x.stop_gradient = False i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = False arr = layers.array_write(x=x[0], i=i) i = layers.increment(x=i) arr = layers.array_write(x=x[1], i=i, array=arr) i = layers.increment(x=i) arr = layers.array_write(x=x[2], i=i, array=arr) i = layers.zeros(shape=[1], dtype='int64') i.stop_gradient = False a0 = layers.array_read(array=arr, i=i) i = layers.increment(x=i) a1 = layers.array_read(array=arr, i=i) i = layers.increment(x=i) a2 = layers.array_read(array=arr, i=i) mean_a0 = layers.mean(x=a0) mean_a1 = layers.mean(x=a1) mean_a2 = layers.mean(x=a2) a_sum = layers.sums(input=[mean_a0, mean_a1, mean_a2]) mean_x0 = layers.mean(x=x[0]) mean_x1 = layers.mean(x=x[1]) mean_x2 = layers.mean(x=x[2]) x_sum = layers.sums(input=[mean_x0, mean_x1, mean_x2]) scope = core.Scope() cpu = core.CPUPlace() exe = Executor(cpu) tensor = numpy.random.random(size=(100, 100)).astype('float32') outs = exe.run(feed={'x0': tensor, 'x1': tensor, 'x2': tensor}, fetch_list=[a_sum, x_sum], scope=scope) self.assertEqual(outs[0], outs[1]) total_sum = layers.sums(input=[a_sum, x_sum]) total_sum_scaled = layers.scale(x=total_sum, scale=1 / 6.0) append_backward_ops(total_sum_scaled) g_vars = map(default_main_program().global_block().var, [each_x.name + "@GRAD" for each_x in x]) g_out = [ item.sum() for item in exe.run( feed={'x0': tensor, 'x1': tensor, 'x2': tensor}, fetch_list=g_vars) ] g_out_sum = numpy.array(g_out).sum() # since our final gradient is 1 and the neural network are all linear # with mean_op. # the input gradient should also be 1 self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
import paddle.v2.fluid.evaluator as evaluator import paddle.v2.fluid.framework as framework import paddle.v2.fluid.layers as layers import paddle.v2.fluid.nets as nets from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.optimizer import AdamOptimizer from paddle.v2.fluid.initializer import NormalInitializer import numpy as np import time BATCH_SIZE = 128 PASS_NUM = 5 SEED = 1 DTYPE = "float32" images = layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE) label = layers.data(name='label', shape=[1], dtype='int64') conv_pool_1 = nets.simple_img_conv_pool(input=images, filter_size=5, num_filters=20, pool_size=2, pool_stride=2, act="relu") conv_pool_2 = nets.simple_img_conv_pool(input=conv_pool_1, filter_size=5, num_filters=50, pool_size=2, pool_stride=2, act="relu") # TODO(dzhwinter) : refine the initializer and random seed settting
def test_raw_api(self): kwargs = {'startup_program': Program(), 'main_program': Program()} image = layers.data(name='x', shape=[784], dtype='float32', **kwargs) label = layers.data(name='y', shape=[1], dtype='int64', **kwargs) limit = layers.fill_constant_batch_size_like( input=label, dtype='int64', shape=[1], value=5.0, **kwargs) cond = layers.less_than(x=label, y=limit, **kwargs) true_image, false_image = layers.split_lod_tensor( input=image, mask=cond, **kwargs) true_out = layers.create_tensor(dtype='float32', **kwargs) true_cond = layers.ConditionalBlock([true_image], **kwargs) with true_cond.block(): hidden = layers.fc(input=true_image, size=100, act='tanh', **kwargs) prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) layers.assign(input=prob, output=true_out, **kwargs) false_out = layers.create_tensor(dtype='float32', **kwargs) false_cond = layers.ConditionalBlock([false_image], **kwargs) with false_cond.block(): hidden = layers.fc(input=false_image, size=200, act='tanh', **kwargs) prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) layers.assign(input=prob, output=false_out, **kwargs) prob = layers.merge_lod_tensor( in_true=true_out, in_false=false_out, mask=cond, x=image, **kwargs) loss = layers.cross_entropy(input=prob, label=label, **kwargs) avg_loss = layers.mean(x=loss, **kwargs) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer.minimize(avg_loss, kwargs['startup_program']) train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), batch_size=200) place = core.CPUPlace() exe = Executor(place) exe.run(kwargs['startup_program']) PASS_NUM = 100 for pass_id in range(PASS_NUM): for data in train_reader(): x_data = np.array(map(lambda x: x[0], data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.expand_dims(y_data, axis=1) outs = exe.run(kwargs['main_program'], feed={'x': x_data, 'y': y_data}, fetch_list=[avg_loss]) print outs[0] if outs[0] < 1.0: return self.assertFalse(True)
def test_ifelse(self): kwargs = {'startup_program': Program(), 'main_program': Program()} image = layers.data(name='x', shape=[784], dtype='float32', **kwargs) label = layers.data(name='y', shape=[1], dtype='int64', **kwargs) limit = layers.fill_constant_batch_size_like(input=label, dtype='int64', shape=[1], value=5.0, **kwargs) cond = layers.less_than(x=label, y=limit, **kwargs) ie = layers.IfElse(cond, **kwargs) with ie.true_block(): true_image = ie.input(image) hidden = layers.fc(input=true_image, size=100, act='tanh', **kwargs) prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) ie.output(prob) with ie.false_block(): false_image = ie.input(image) hidden = layers.fc(input=false_image, size=200, act='tanh', **kwargs) prob = layers.fc(input=hidden, size=10, act='softmax', **kwargs) ie.output(prob) prob = ie() loss = layers.cross_entropy(input=prob[0], label=label, **kwargs) avg_loss = layers.mean(x=loss, **kwargs) optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer.minimize(avg_loss, kwargs['startup_program']) train_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), batch_size=200) place = core.CPUPlace() exe = Executor(place) exe.run(kwargs['startup_program']) PASS_NUM = 100 for pass_id in range(PASS_NUM): for data in train_reader(): x_data = np.array(map(lambda x: x[0], data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = y_data.reshape((y_data.shape[0], 1)) outs = exe.run(kwargs['main_program'], feed={ 'x': x_data, 'y': y_data }, fetch_list=[avg_loss]) print outs[0] if outs[0] < 1.0: return self.assertFalse(True)