예제 #1
0
def fast_preprocess_layer(img, input_size, normalize, subtract_means, to_float, mean=MEANS, std=STD):
    ''' 对图片预处理。用paddle而不使用numpy来得到更快的速度。预测时使用。 '''

    # NCHW
    img = P.transpose(img, perm=[0, 3, 1, 2])
    img = P.image_resize(img, out_shape=[input_size, input_size], resample="BILINEAR")

    if normalize:
        m = P.create_tensor(dtype='float32')
        P.assign(np.array(mean).astype(np.float32), m)
        m = P.reshape(m, (1, 3, 1, 1))
        m = P.expand_as(m, target_tensor=img)
        v = P.create_tensor(dtype='float32')
        P.assign(np.array(std).astype(np.float32), v)
        v = P.reshape(v, (1, 3, 1, 1))
        v = P.expand_as(v, target_tensor=img)
        img = (img - m) / v
    elif subtract_means:
        m = P.create_tensor(dtype='float32')
        P.assign(np.array(mean).astype(np.float32), m)
        m = P.reshape(m, (1, 3, 1, 1))
        m = P.expand_as(m, target_tensor=img)
        img = (img - m)
    elif to_float:  # 只是归一化
        img = img / 255

    # 换成RGB格式
    img_rgb = P.concat([img[:, 2:3, :, :], img[:, 1:2, :, :], img[:, 0:1, :, :]], axis=1)

    # Return value is in channel order [n, c, h, w] and RGB
    return img_rgb
예제 #2
0
    def not_test_raw_api(self):
        prog = Program()
        startup_prog = Program()
        with program_guard(prog, startup_prog):
            image = layers.data(name='x', shape=[784], dtype='float32')

            label = layers.data(name='y', shape=[1], dtype='int64')

            limit = layers.fill_constant(shape=[1], dtype='int64', value=5)
            cond = layers.less_than(x=label, y=limit)
            true_image, false_image = split_lod_tensor(input=image, mask=cond)

            true_out = layers.create_tensor(dtype='float32')
            true_cond = ConditionalBlock([cond])

            with true_cond.block():
                hidden = layers.fc(input=true_image, size=100, act='tanh')
                prob = layers.fc(input=hidden, size=10, act='softmax')
                layers.assign(input=prob, output=true_out)

            false_out = layers.create_tensor(dtype='float32')
            false_cond = ConditionalBlock([cond])

            with false_cond.block():
                hidden = layers.fc(input=false_image, size=200, act='tanh')
                prob = layers.fc(input=hidden, size=10, act='softmax')
                layers.assign(input=prob, output=false_out)

            prob = merge_lod_tensor(
                in_true=true_out, in_false=false_out, mask=cond, x=image)
            loss = layers.cross_entropy(input=prob, label=label)
            avg_loss = layers.mean(loss)

            optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
            optimizer.minimize(avg_loss, startup_prog)

        train_reader = paddle.batch(
            paddle.reader.shuffle(
                paddle.dataset.mnist.train(), buf_size=8192),
            batch_size=10)

        place = core.CPUPlace()
        exe = Executor(place)

        exe.run(startup_prog)
        PASS_NUM = 100
        for pass_id in range(PASS_NUM):
            for data in train_reader():
                x_data = np.array([x[0] for x in data]).astype("float32")
                y_data = np.array([x[1] for x in data]).astype("int64")
                y_data = np.expand_dims(y_data, axis=1)

                outs = exe.run(prog,
                               feed={'x': x_data,
                                     'y': y_data},
                               fetch_list=[avg_loss])
                print(outs[0])
                if outs[0] < 1.0:
                    return
        self.assertFalse(True)
예제 #3
0
    def test_square_error_cost(self):
        input_val = np.random.uniform(0.1, 0.5, (2, 3)).astype("float32")
        label_val = np.random.uniform(0.1, 0.5, (2, 3)).astype("float32")

        sub = input_val - label_val
        np_result = sub * sub

        input_var = layers.create_tensor(dtype="float32", name="input")
        label_var = layers.create_tensor(dtype="float32", name="label")

        layers.assign(input=input_val, output=input_var)
        layers.assign(input=label_val, output=label_var)
        output = layers.square_error_cost(input=input_var, label=label_var)

        for use_cuda in ([False, True]
                         if core.is_compiled_with_cuda() else [False]):

            place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
            exe = Executor(place)
            result = exe.run(fluid.default_main_program(),
                             feed={
                                 "input": input_var,
                                 "label": label_var
                             },
                             fetch_list=[output])

            self.assertTrue(np.isclose(np_result, result).all())
예제 #4
0
    def __call__(self, batch_C_prime, I_r_size):
        C = self.build_C()
        P = self.build_P(I_r_size)
        inv_delta_C = self.build_inv_delta_C(C).astype('float32')
        P_hat = self.build_P_hat(C, P).astype('float32')

        inv_delta_C_tensor = layers.create_tensor(dtype='float32')
        layers.assign(inv_delta_C, inv_delta_C_tensor)
        inv_delta_C_tensor.stop_gradient = True
        P_hat_tensor = layers.create_tensor(dtype='float32')
        layers.assign(P_hat, P_hat_tensor)
        P_hat_tensor.stop_gradient = True

        batch_C_ex_part_tensor = self.get_expand_tensor(batch_C_prime)
        #         batch_C_ex_part_tensor = create_tmp_var(
        #             fluid.default_main_program(),
        #             name='batch_C_ex_part_tensor',
        #             dtype='float32', shape=[-1, 3, 2])
        #         layers.py_func(func=get_batch_C_expand,
        #             x=[batch_C_prime], out=[batch_C_ex_part_tensor])

        batch_C_ex_part_tensor.stop_gradient = True

        batch_C_prime_with_zeros = layers.concat(
            [batch_C_prime, batch_C_ex_part_tensor], axis=1)
        batch_T = layers.matmul(inv_delta_C_tensor, batch_C_prime_with_zeros)
        batch_P_prime = layers.matmul(P_hat_tensor, batch_T)
        return batch_P_prime
예제 #5
0
    def test_forward(self):
        data = layers.data(name='X', shape=[1], dtype='float32')
        data.stop_gradient = False
        cond = layers.ConditionalBlock(inputs=[data])
        out = layers.create_tensor(dtype='float32')
        with cond.block():
            hidden = layers.fc(input=data, size=10)
            layers.assign(hidden, out)

        cpu = core.CPUPlace()
        exe = Executor(cpu)
        exe.run(default_startup_program())

        x = numpy.random.random(size=(10, 1)).astype('float32')

        outs = exe.run(feed={'X': x}, fetch_list=[out])[0]
        print outs
        loss = layers.mean(out)
        append_backward(loss=loss)
        outs = exe.run(
            feed={'X': x},
            fetch_list=[
                default_main_program().block(0).var(data.name + "@GRAD")
            ])[0]
        print outs
예제 #6
0
    def test_forward(self):
        data = layers.data(name='X', shape=[1], dtype='float32')
        data.stop_gradient = False
        cond = ConditionalBlock(inputs=[data])
        out = layers.create_tensor(dtype='float32')
        with cond.block():
            hidden = layers.fc(input=data, size=10)
            layers.assign(hidden, out)

        cpu = core.CPUPlace()
        exe = Executor(cpu)
        exe.run(default_startup_program())

        x = numpy.random.random(size=(10, 1)).astype('float32')

        outs = exe.run(feed={'X': x}, fetch_list=[out])[0]
        print(outs)
        loss = layers.mean(out)
        append_backward(loss=loss)
        outs = exe.run(feed={'X': x},
                       fetch_list=[
                           default_main_program().block(0).var(data.name +
                                                               "@GRAD")
                       ])[0]
        print(outs)
예제 #7
0
    def test_uniform_name(self):
        name = 'test_uniform'
        uniform1 = Uniform(0.0, 1.0, name=name)
        self.assertEqual(uniform1.name, name)

        uniform2 = Uniform(0.0, 1.0)
        self.assertEqual(uniform2.name, 'Uniform')

        paddle.enable_static()

        sample = uniform1.sample([2])
        self.assertEqual(self.get_prefix(sample.name), name + '_sample')

        entropy = uniform1.entropy()
        self.assertEqual(self.get_prefix(entropy.name), name + '_entropy')

        value_npdata = np.array([0.8], dtype="float32")
        value_tensor = layers.create_tensor(dtype="float32")
        layers.assign(value_npdata, value_tensor)

        lp = uniform1.log_prob(value_tensor)
        self.assertEqual(self.get_prefix(lp.name), name + '_log_prob')

        p = uniform1.probs(value_tensor)
        self.assertEqual(self.get_prefix(p.name), name + '_probs')
예제 #8
0
    def test_normal_name(self):
        name = 'test_normal'
        normal1 = Normal(0.0, 1.0, name=name)
        self.assertEqual(normal1.name, name)

        normal2 = Normal(0.0, 1.0)
        self.assertEqual(normal2.name, 'Normal')

        paddle.enable_static()

        sample = normal1.sample([2])
        self.assertEqual(self.get_prefix(sample.name), name + '_sample')

        entropy = normal1.entropy()
        self.assertEqual(self.get_prefix(entropy.name), name + '_entropy')

        value_npdata = np.array([0.8], dtype="float32")
        value_tensor = layers.create_tensor(dtype="float32")
        layers.assign(value_npdata, value_tensor)

        lp = normal1.log_prob(value_tensor)
        self.assertEqual(self.get_prefix(lp.name), name + '_log_prob')

        p = normal1.probs(value_tensor)
        self.assertEqual(self.get_prefix(p.name), name + '_probs')

        kl = normal1.kl_divergence(normal2)
        self.assertEqual(self.get_prefix(kl.name), name + '_kl_divergence')
예제 #9
0
    def test_categorical_name(self):
        name = 'test_categorical'
        categorical1 = Categorical([0.4, 0.6], name=name)
        self.assertEqual(categorical1.name, name)

        categorical2 = Categorical([0.5, 0.5])
        self.assertEqual(categorical2.name, 'Categorical')

        paddle.enable_static()

        sample = categorical1.sample([2])
        self.assertEqual(self.get_prefix(sample.name), name + '_sample')

        entropy = categorical1.entropy()
        self.assertEqual(self.get_prefix(entropy.name), name + '_entropy')

        kl = categorical1.kl_divergence(categorical2)
        self.assertEqual(self.get_prefix(kl.name), name + '_kl_divergence')

        value_npdata = np.array([0], dtype="int64")
        value_tensor = layers.create_tensor(dtype="int64")
        layers.assign(value_npdata, value_tensor)

        p = categorical1.probs(value_tensor)
        self.assertEqual(self.get_prefix(p.name), name + '_probs')

        lp = categorical1.log_prob(value_tensor)
        self.assertEqual(self.get_prefix(lp.name), name + '_log_prob')
예제 #10
0
 def test_fetch_var(self):
     self.set_input()
     x = layers.create_tensor(dtype="int32", persistable=True, name="x")
     layers.assign(input=self.val, output=x)
     exe = fluid.Executor(fluid.CPUPlace())
     exe.run(fluid.default_main_program(), feed={}, fetch_list=[])
     fetched_x = fluid.executor._fetch_var("x")
     self.assertTrue(numpy.array_equal(fetched_x, self.val),
                     "fetch_x=%s val=%s" % (fetched_x, self.val))
     self.assertEqual(fetched_x.dtype, self.val.dtype)
예제 #11
0
    def test_assign(self):
        main_program = fluid.Program()
        with fluid.program_guard(main_program):
            x = layers.create_tensor(dtype=self.dtype)
            layers.assign(input=self.value, output=x)

        exe = fluid.Executor(self.place)
        [fetched_x] = exe.run(main_program, feed={}, fetch_list=[x])
        self.assertTrue(numpy.array_equal(fetched_x, self.value),
                        "fetch_x=%s val=%s" % (fetched_x, self.value))
        self.assertEqual(fetched_x.dtype, self.value.dtype)
예제 #12
0
    def test_load(self):
        main_prog = fluid.Program()
        start_prog = fluid.Program()
        with fluid.program_guard(main_prog, start_prog):
            var = layers.create_tensor(dtype='float32')
            layers.load(var, file_path='./model/w')

        exe = fluid.Executor(fluid.CPUPlace())
        exe.run(start_prog)
        ret = exe.run(main_prog, fetch_list=[var.name])
        self.assertTrue(np.array_equal(self.ones, ret[0]))
예제 #13
0
 def test_fetch_var(self):
     val = numpy.array([1, 3, 5]).astype(numpy.int32)
     x = layers.create_tensor(dtype="int32", persistable=True, name="x")
     layers.assign(input=val, output=x)
     exe = fluid.Executor(fluid.CPUPlace())
     exe.run(fluid.default_main_program(), feed={}, fetch_list=[])
     fetched_x = fluid.fetch_var("x")
     self.assertTrue(
         numpy.array_equal(fetched_x, val),
         "fetch_x=%s val=%s" % (fetched_x, val))
     self.assertEqual(fetched_x.dtype, val.dtype)
예제 #14
0
 def test_assign(self):
     val = (-100 + 200 * numpy.random.random(size=(2, 5))).astype(
         numpy.int32)
     x = layers.create_tensor(dtype="float32")
     layers.assign(input=val, output=x)
     exe = fluid.Executor(fluid.CPUPlace())
     fetched_x = exe.run(fluid.default_main_program(),
                         feed={},
                         fetch_list=[x])[0]
     self.assertTrue(numpy.array_equal(fetched_x, val),
                     "fetch_x=%s val=%s" % (fetched_x, val))
     self.assertEqual(fetched_x.dtype, val.dtype)
예제 #15
0
 def test_assign(self):
     val = (
         -100 + 200 * numpy.random.random(size=(2, 5))).astype(numpy.int32)
     x = layers.create_tensor(dtype="float32")
     layers.assign(input=val, output=x)
     exe = fluid.Executor(fluid.CPUPlace())
     fetched_x = exe.run(fluid.default_main_program(),
                         feed={},
                         fetch_list=[x])[0]
     self.assertTrue(
         numpy.array_equal(fetched_x, val),
         "fetch_x=%s val=%s" % (fetched_x, val))
     self.assertEqual(fetched_x.dtype, val.dtype)
예제 #16
0
파일: tps.py 프로젝트: Felix1120/PaddleOCR
    def __call__(self, batch_C_prime, I_r_size):
        """
        Generate the grid for the grid_sampler.
        Args:
            batch_C_prime: the matrix of the geometric transformation
            I_r_size: the shape of the input image
        Return: 
            batch_P_prime: the grid for the grid_sampler 
        """
        C = self.build_C()
        P = self.build_P(I_r_size)
        inv_delta_C = self.build_inv_delta_C(C).astype('float32')
        P_hat = self.build_P_hat(C, P).astype('float32')

        inv_delta_C_tensor = layers.create_tensor(dtype='float32')
        layers.assign(inv_delta_C, inv_delta_C_tensor)
        inv_delta_C_tensor.stop_gradient = True
        P_hat_tensor = layers.create_tensor(dtype='float32')
        layers.assign(P_hat, P_hat_tensor)
        P_hat_tensor.stop_gradient = True

        batch_C_ex_part_tensor = self.get_expand_tensor(batch_C_prime)
        #         batch_C_ex_part_tensor = create_tmp_var(
        #             fluid.default_main_program(),
        #             name='batch_C_ex_part_tensor',
        #             dtype='float32', shape=[-1, 3, 2])
        #         layers.py_func(func=get_batch_C_expand,
        #             x=[batch_C_prime], out=[batch_C_ex_part_tensor])

        batch_C_ex_part_tensor.stop_gradient = True

        batch_C_prime_with_zeros = layers.concat(
            [batch_C_prime, batch_C_ex_part_tensor], axis=1)
        batch_T = layers.matmul(inv_delta_C_tensor, batch_C_prime_with_zeros)
        batch_P_prime = layers.matmul(P_hat_tensor, batch_T)
        return batch_P_prime
예제 #17
0
    def test_masked_select(self):

        mask_shape = [4, 1]
        shape = [4, 4]
        data = np.random.random(mask_shape).astype("float32")
        input_data = np.random.random(shape).astype("float32")
        mask_data = data > 0.5
        mask_data_b = np.broadcast_to(mask_data, shape)
        npresult = input_data[np.where(mask_data_b)]

        input_var = layers.create_tensor(dtype="float32", name="input")
        mask_var = layers.create_tensor(dtype="bool", name="mask")

        output = layers.masked_select(input=input_var, mask=mask_var)
        for use_cuda in ([False, True]
                         if core.is_compiled_with_cuda() else [False]):
            place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
            exe = Executor(place)
            result = exe.run(fluid.default_main_program(),
                             feed={"input": input_data,
                                   "mask": mask_data},
                             fetch_list=[output])

            self.assertTrue(np.isclose(npresult, result).all())
    def test_distribution_error(self):
        distribution = Distribution()

        self.assertRaises(NotImplementedError, distribution.sample)
        self.assertRaises(NotImplementedError, distribution.entropy)

        normal = Normal(0.0, 1.0)
        self.assertRaises(NotImplementedError, distribution.kl_divergence,
                          normal)

        value_npdata = np.array([0.8], dtype="float32")
        value_tensor = layers.create_tensor(dtype="float32")
        self.assertRaises(NotImplementedError, distribution.log_prob,
                          value_tensor)
        self.assertRaises(NotImplementedError, distribution.probs,
                          value_tensor)
예제 #19
0
    def beam_search():
        max_len = layers.fill_constant(shape=[1],
                                       dtype=start_tokens.dtype,
                                       value=max_out_len,
                                       force_cpu=True)
        step_idx = layers.fill_constant(shape=[1],
                                        dtype=start_tokens.dtype,
                                        value=0,
                                        force_cpu=True)
        cond = layers.less_than(x=step_idx,
                                y=max_len)  # default force_cpu=True
        while_op = layers.While(cond)
        # array states will be stored for each step.
        ids = layers.array_write(layers.reshape(start_tokens, (-1, 1)),
                                 step_idx)
        scores = layers.array_write(init_scores, step_idx)
        # cell states will be overwrited at each step.
        # caches contains states of history steps in decoder self-attention
        # and static encoder output projections in encoder-decoder attention
        # to reduce redundant computation.
        caches = [
            {
                "k":  # for self attention
                layers.fill_constant_batch_size_like(
                    input=start_tokens,
                    shape=[-1, n_head, 0, d_key],
                    dtype=enc_output.dtype,
                    value=0),
                "v":  # for self attention
                layers.fill_constant_batch_size_like(
                    input=start_tokens,
                    shape=[-1, n_head, 0, d_value],
                    dtype=enc_output.dtype,
                    value=0),
                "static_k":  # for encoder-decoder attention
                layers.create_tensor(dtype=enc_output.dtype),
                "static_v":  # for encoder-decoder attention
                layers.create_tensor(dtype=enc_output.dtype)
            } for i in range(n_layer)
        ]

        with while_op.block():
            pre_ids = layers.array_read(array=ids, i=step_idx)
            # Since beam_search_op dosen't enforce pre_ids' shape, we can do
            # inplace reshape here which actually change the shape of pre_ids.
            pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True)
            pre_scores = layers.array_read(array=scores, i=step_idx)
            # gather cell states corresponding to selected parent
            pre_src_attn_bias = layers.gather(trg_src_attn_bias,
                                              index=parent_idx)
            pre_pos = layers.elementwise_mul(
                x=layers.fill_constant_batch_size_like(
                    input=pre_src_attn_bias,  # cann't use lod tensor here
                    value=1,
                    shape=[-1, 1, 1],
                    dtype=pre_ids.dtype),
                y=step_idx,
                axis=0)
            logits = wrap_decoder(trg_vocab_size,
                                  max_in_len,
                                  n_layer,
                                  n_head,
                                  d_key,
                                  d_value,
                                  d_model,
                                  d_inner_hid,
                                  prepostprocess_dropout,
                                  attention_dropout,
                                  relu_dropout,
                                  preprocess_cmd,
                                  postprocess_cmd,
                                  weight_sharing,
                                  dec_inputs=(pre_ids, pre_pos, None,
                                              pre_src_attn_bias),
                                  enc_output=enc_output,
                                  caches=caches,
                                  gather_idx=parent_idx,
                                  bos_idx=bos_idx)
            # intra-beam topK
            topk_scores, topk_indices = layers.topk(
                input=layers.softmax(logits), k=beam_size)
            accu_scores = layers.elementwise_add(x=layers.log(topk_scores),
                                                 y=pre_scores,
                                                 axis=0)
            # beam_search op uses lod to differentiate branches.
            accu_scores = layers.lod_reset(accu_scores, pre_ids)
            # topK reduction across beams, also contain special handle of
            # end beams and end sentences(batch reduction)
            selected_ids, selected_scores, gather_idx = layers.beam_search(
                pre_ids=pre_ids,
                pre_scores=pre_scores,
                ids=topk_indices,
                scores=accu_scores,
                beam_size=beam_size,
                end_id=eos_idx,
                return_parent_idx=True)
            layers.increment(x=step_idx, value=1.0, in_place=True)
            # cell states(caches) have been updated in wrap_decoder,
            # only need to update beam search states here.
            layers.array_write(selected_ids, i=step_idx, array=ids)
            layers.array_write(selected_scores, i=step_idx, array=scores)
            layers.assign(gather_idx, parent_idx)
            layers.assign(pre_src_attn_bias, trg_src_attn_bias)
            length_cond = layers.less_than(x=step_idx, y=max_len)
            finish_cond = layers.logical_not(layers.is_empty(x=selected_ids))
            layers.logical_and(x=length_cond, y=finish_cond, out=cond)

        finished_ids, finished_scores = layers.beam_search_decode(
            ids, scores, beam_size=beam_size, end_id=eos_idx)
        return finished_ids, finished_scores
예제 #20
0
    def test_raw_api(self):
        prog = Program()
        startup_prog = Program()
        with program_guard(prog, startup_prog):
            image = layers.data(name='x', shape=[784], dtype='float32')

            label = layers.data(name='y', shape=[1], dtype='int64')

            limit = layers.fill_constant_batch_size_like(
                input=label, dtype='int64', shape=[1], value=5.0)
            cond = layers.less_than(x=label, y=limit)
            true_image, false_image = layers.split_lod_tensor(
                input=image, mask=cond)

            true_out = layers.create_tensor(dtype='float32')
            true_cond = layers.ConditionalBlock([true_image])

            with true_cond.block():
                hidden = layers.fc(input=true_image, size=100, act='tanh')
                prob = layers.fc(input=hidden, size=10, act='softmax')
                layers.assign(input=prob, output=true_out)

            false_out = layers.create_tensor(dtype='float32')
            false_cond = layers.ConditionalBlock([false_image])

            with false_cond.block():
                hidden = layers.fc(input=false_image, size=200, act='tanh')
                prob = layers.fc(input=hidden, size=10, act='softmax')
                layers.assign(input=prob, output=false_out)

            prob = layers.merge_lod_tensor(
                in_true=true_out, in_false=false_out, mask=cond, x=image)
            loss = layers.cross_entropy(input=prob, label=label)
            avg_loss = layers.mean(loss)

            optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
            optimizer.minimize(avg_loss, startup_prog)

        train_reader = paddle.batch(
            paddle.reader.shuffle(
                paddle.dataset.mnist.train(), buf_size=8192),
            batch_size=200)

        place = core.CPUPlace()
        exe = Executor(place)

        exe.run(startup_prog)
        PASS_NUM = 100
        for pass_id in range(PASS_NUM):
            for data in train_reader():
                x_data = np.array(map(lambda x: x[0], data)).astype("float32")
                y_data = np.array(map(lambda x: x[1], data)).astype("int64")
                y_data = np.expand_dims(y_data, axis=1)

                outs = exe.run(prog,
                               feed={'x': x_data,
                                     'y': y_data},
                               fetch_list=[avg_loss])
                print outs[0]
                if outs[0] < 1.0:
                    return
        self.assertFalse(True)
예제 #21
0
        def beam_search():
            """Beam search function"""

            max_len = layers.fill_constant(shape=[1],
                                           dtype=start_tokens.dtype,
                                           value=self.max_out_len,
                                           force_cpu=True)
            min_len = layers.fill_constant(shape=[1],
                                           dtype=start_tokens.dtype,
                                           value=self.min_out_len)
            neg_inf = layers.fill_constant(shape=[1],
                                           dtype='float32',
                                           value=-INF)
            step_idx = layers.fill_constant(shape=[1],
                                            dtype=start_tokens.dtype,
                                            value=0,
                                            force_cpu=True)
            step_next_idx = layers.fill_constant(shape=[1],
                                                 dtype=start_tokens.dtype,
                                                 value=1,
                                                 force_cpu=True)
            cond = layers.less_than(x=step_idx,
                                    y=max_len)  # default force_cpu=True
            while_op = layers.While(cond)
            # array states will be stored for each step.
            ids = layers.array_write(layers.reshape(start_tokens, (-1, 1)),
                                     step_idx)
            scores = layers.array_write(init_scores, step_idx)
            # cell states will be overwrited at each step.
            # caches contains states of history steps in decoder self-attention
            # and static encoder output projections in encoder-decoder attention
            # to reduce redundant computation.
            caches = [
                {
                    "k":  # for self attention
                        layers.fill_constant_batch_size_like(
                            input=start_tokens,
                            shape=[-1, self._n_head, 0, self._emb_size // self._n_head],
                            dtype=enc_words_output.dtype,
                            value=0),
                    "v":  # for self attention
                        layers.fill_constant_batch_size_like(
                            input=start_tokens,
                            shape=[-1, self._n_head, 0, self._emb_size // self._n_head],
                            dtype=enc_words_output.dtype,
                            value=0),
                    "static_k_word":  # for encoder-decoder attention
                        layers.create_tensor(dtype=enc_words_output.dtype),
                    "static_v_word":  # for encoder-decoder attention
                        layers.create_tensor(dtype=enc_words_output.dtype),
                    "static_k_sent":  # for encoder-decoder attention
                        layers.create_tensor(dtype=enc_sents_output.dtype),
                    "static_v_sent":  # for encoder-decoder attention
                        layers.create_tensor(dtype=enc_sents_output.dtype)
                } for i in range(self._dec_n_layer)
            ]

            trigram_blocking = TrigramBlocking(start_tokens,
                                               self.tokenizer,
                                               use_fp16=self._use_fp16,
                                               beam_size=self.beam_size)

            with while_op.block():
                pre_ids = layers.array_read(array=ids, i=step_idx)
                pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True)
                # Since beam_search_op dosen't enforce pre_ids' shape, we can do
                # inplace reshape here which actually change the shape of pre_ids.
                # pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True)
                pre_scores = layers.array_read(array=scores, i=step_idx)
                # gather cell states corresponding to selected parent
                pre_src_words_attn_bias = layers.gather(
                    tgt_src_words_attn_bias, index=parent_idx)
                pre_src_sents_attn_bias = layers.gather(
                    tgt_src_sents_attn_bias, index=parent_idx)
                pre_graph_attn_bias = layers.gather(graph_attn_bias,
                                                    index=parent_idx)
                pre_pos = layers.elementwise_mul(
                    x=layers.fill_constant_batch_size_like(
                        input=
                        pre_src_sents_attn_bias,  # cann't use lod tensor here
                        value=1,
                        shape=[-1, 1, 1],
                        dtype=pre_ids.dtype),
                    y=step_idx,
                    axis=0)

                logits = self.decode(
                    dec_input=(pre_ids, pre_pos, None, pre_src_words_attn_bias,
                               pre_src_sents_attn_bias, pre_graph_attn_bias),
                    enc_words_output=enc_words_output,
                    enc_sents_output=enc_sents_output,
                    caches=caches,
                    gather_idx=parent_idx)

                # prevent generating end token if length less than min_out_len
                eos_index = layers.fill_constant(
                    shape=[layers.shape(logits)[0]],
                    dtype='int64',
                    value=self.eos_idx)
                eos_index = fluid.one_hot(eos_index, depth=self.voc_size)
                less_cond = layers.cast(layers.less_than(x=step_idx,
                                                         y=min_len),
                                        dtype='float32')
                less_val = layers.elementwise_mul(less_cond, neg_inf)
                eos_val = layers.elementwise_mul(eos_index, less_val, axis=0)
                revised_logits = layers.elementwise_add(logits,
                                                        eos_val,
                                                        axis=0)

                # topK reduction across beams, also contain special handle of
                # end beams and end sentences(batch reduction)
                topk_scores, topk_indices = layers.topk(
                    input=layers.softmax(revised_logits), k=self.beam_size)

                # Roll-Back previous-scores for length-penalty
                # previous-scores has been length-penaltied, before this timestep length-penalty, need roll-back
                # because of doing this, we need store the length-penaltied score in `scores`
                # while calculating use the un-penaltied score
                # -> safe for step_idx == 0 (initialization state), because previous-score == 0
                pre_timestep_length_penalty = fluid.layers.pow(
                    ((5.0 + fluid.layers.cast(step_idx, pre_scores.dtype)) /
                     6.0), self.len_penalty)
                pre_scores_wo_len_penalty = fluid.layers.elementwise_mul(
                    pre_scores, pre_timestep_length_penalty)

                # calc trigram-blocking delta scores for current alive sequence
                if self.block_trigram:
                    trigram_blocking.update_seq(pre_ids, parent_idx)
                    trigram_blocking.expand_cand_seq(topk_indices)
                    fluid.layers.py_func(
                        func=trigram_blocking.blocking_forward,
                        x=[
                            trigram_blocking.cand_seq,
                            trigram_blocking.id2is_full_token
                        ],
                        out=trigram_blocking.delta_score_out,
                        backward_func=None)
                    layers.Print(trigram_blocking.delta_score_out,
                                 summarize=100,
                                 message="trigram_blocking.delta_score_out")
                    pre_scores_wo_len_penalty = fluid.layers.elementwise_add(
                        x=trigram_blocking.delta_score_out,
                        y=pre_scores_wo_len_penalty,
                        axis=0)
                # => [N, topk]

                accu_scores = layers.elementwise_add(
                    x=layers.log(topk_scores),
                    y=pre_scores_wo_len_penalty,
                    axis=0)

                cur_timestep_length_penalty = layers.pow(
                    ((5.0 + layers.cast(step_next_idx, accu_scores.dtype)) /
                     6.0), self.len_penalty)
                curr_scores = layers.elementwise_div(
                    accu_scores, cur_timestep_length_penalty)

                # beam_search op uses lod to differentiate branches.
                curr_scores = layers.lod_reset(curr_scores, pre_ids)
                topk_indices = layers.lod_reset(topk_indices, pre_ids)
                selected_ids, selected_scores, gather_idx = layers.beam_search(
                    pre_ids=pre_ids,
                    pre_scores=pre_scores,
                    ids=topk_indices,
                    scores=curr_scores,
                    beam_size=self.beam_size,
                    end_id=self.eos_idx,
                    return_parent_idx=True)

                layers.increment(x=step_idx, value=1.0, in_place=True)
                layers.increment(x=step_next_idx, value=1.0, in_place=True)
                # cell states(caches) have been updated in wrap_decoder,
                # only need to update beam search states here.
                layers.array_write(selected_ids, i=step_idx, array=ids)
                layers.array_write(selected_scores, i=step_idx, array=scores)
                layers.assign(gather_idx, parent_idx)
                layers.assign(pre_src_words_attn_bias, tgt_src_words_attn_bias)
                layers.assign(pre_src_sents_attn_bias, tgt_src_sents_attn_bias)
                layers.assign(pre_graph_attn_bias, graph_attn_bias)

                length_cond = layers.less_than(x=step_idx, y=max_len)
                finish_cond = layers.logical_not(
                    layers.is_empty(x=selected_ids))
                layers.logical_and(x=length_cond, y=finish_cond, out=cond)

            finished_ids, finished_scores = layers.beam_search_decode(
                ids, scores, beam_size=self.beam_size, end_id=self.eos_idx)

            return finished_ids, finished_scores
예제 #22
0
def fast_decode(src_vocab_size,
                trg_vocab_size,
                max_in_len,
                n_layer,
                n_head,
                d_key,
                d_value,
                d_model,
                d_inner_hid,
                prepostprocess_dropout,
                attention_dropout,
                relu_dropout,
                preprocess_cmd,
                postprocess_cmd,
                weight_sharing,
                beam_size,
                max_out_len,
                bos_idx,
                eos_idx,
                model_input=None):
    """
    Use beam search to decode. Caches will be used to store states of history
    steps which can make the decoding faster.
    """
    enc_inputs = (model_input.src_word, model_input.src_pos,
                  model_input.src_slf_attn_bias)
    dec_inputs = (model_input.trg_word, model_input.init_score,
                  model_input.init_idx, model_input.trg_src_attn_bias)

    enc_output = wrap_encoder(src_vocab_size,
                              max_in_len,
                              n_layer,
                              n_head,
                              d_key,
                              d_value,
                              d_model,
                              d_inner_hid,
                              prepostprocess_dropout,
                              attention_dropout,
                              relu_dropout,
                              preprocess_cmd,
                              postprocess_cmd,
                              weight_sharing,
                              enc_inputs,
                              bos_idx=bos_idx)
    start_tokens, init_scores, parent_idx, trg_src_attn_bias = dec_inputs
    max_len = layers.fill_constant(shape=[1],
                                   dtype=start_tokens.dtype,
                                   value=max_out_len,
                                   force_cpu=True)
    step_idx = layers.fill_constant(shape=[1],
                                    dtype=start_tokens.dtype,
                                    value=0,
                                    force_cpu=True)
    # array states will be stored for each step.
    ids = layers.array_write(layers.reshape(start_tokens, (-1, 1)), step_idx)
    scores = layers.array_write(init_scores, step_idx)
    # cell states will be overwrited at each step.
    # caches contains states of history steps in decoder self-attention
    # and static encoder output projections in encoder-decoder attention
    # to reduce redundant computation.
    caches = []
    for i in range(n_layer):
        caches.append({
            "k":  # for self attention
            layers.fill_constant_batch_size_like(
                input=start_tokens,
                shape=[-1, n_head, 0, d_key],
                dtype=enc_output.dtype,
                value=0),
            "v":  # for self attention
            layers.fill_constant_batch_size_like(
                input=start_tokens,
                shape=[-1, n_head, 0, d_value],
                dtype=enc_output.dtype,
                value=0),
            "static_k":  # for encoder-decoder attention
            layers.create_tensor(dtype=enc_output.dtype),
            "static_v":  # for encoder-decoder attention
            layers.create_tensor(dtype=enc_output.dtype)
        })

    _do_beam_search(trg_vocab_size, max_in_len, n_layer, n_head, d_key,
                    d_value, d_model, d_inner_hid, prepostprocess_dropout,
                    attention_dropout, relu_dropout, preprocess_cmd,
                    postprocess_cmd, weight_sharing, beam_size, max_len,
                    bos_idx, eos_idx, ids, scores, parent_idx,
                    trg_src_attn_bias, caches, enc_output, step_idx)
    finished_ids, finished_scores = layers.beam_search_decode(
        ids, scores, beam_size=beam_size, end_id=eos_idx)
    return finished_ids, finished_scores
예제 #23
0
exe.run(start_prog)

# simulate saving model
fluid.io.save_persistables(exe, dirname="old", main_program=main_prog)

#############################################################################
# The following section illustrates what user should do to adjust parameter #
#############################################################################

# The target 'weight' is the concatenation of original 'weight' and a
# supplement weight filled 0 of shape (4, 8)
zeros = np.zeros((4, 8)).astype('float32')
main_prog = fluid.Program()
start_prog = fluid.Program()
with fluid.program_guard(main_prog, start_prog):
    w_part1 = layers.create_tensor(dtype='float32')
    layers.load(w_part1, file_path='old/weight')
    w_part2 = layers.assign(zeros)

    new_w = layers.concat([w_part1, w_part2], axis=0)
    main_prog.current_block().append_op(type='save',
                                        inputs={'X': [new_w]},
                                        outputs={},
                                        attrs={'file_path': 'new/weight'})

exe = fluid.Executor(fluid.CPUPlace())
exe.run(start_prog)
ret = exe.run(main_prog, fetch_list=[new_w.name])
target = np.concatenate((ones, zeros), axis=0)
assert np.array_equal(ret[0], target)