Example #1
0
    def net_profiler(self, state, profile_path='/tmp/profile'):
        enable_if_gpu = state == 'GPU' or state == "All"
        if enable_if_gpu and not core.is_compiled_with_cuda():
            return
        startup_program = fluid.Program()
        main_program = fluid.Program()

        with fluid.program_guard(main_program, startup_program):
            image = fluid.layers.data(name='x', shape=[784], dtype='float32')
            hidden1 = fluid.layers.fc(input=image, size=64, act='relu')
            i = layers.zeros(shape=[1], dtype='int64')
            counter = fluid.layers.zeros(shape=[1],
                                         dtype='int64',
                                         force_cpu=True)
            until = layers.fill_constant([1], dtype='int64', value=10)
            data_arr = layers.array_write(hidden1, i)
            cond = fluid.layers.less_than(x=counter, y=until)
            while_op = fluid.layers.While(cond=cond)
            with while_op.block():
                hidden_n = fluid.layers.fc(input=hidden1, size=64, act='relu')
                layers.array_write(hidden_n, i, data_arr)
                fluid.layers.increment(x=counter, value=1, in_place=True)
                layers.less_than(x=counter, y=until, cond=cond)

            hidden_n = layers.array_read(data_arr, i)
            hidden2 = fluid.layers.fc(input=hidden_n, size=64, act='relu')
            predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
            label = fluid.layers.data(name='y', shape=[1], dtype='int64')
            cost = fluid.layers.cross_entropy(input=predict, label=label)
            avg_cost = fluid.layers.mean(cost)
            batch_size = fluid.layers.create_tensor(dtype='int64')
            batch_acc = fluid.layers.accuracy(input=predict,
                                              label=label,
                                              total=batch_size)

        optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9)
        opts = optimizer.minimize(avg_cost, startup_program=startup_program)

        place = fluid.CPUPlace() if state == 'CPU' else fluid.CUDAPlace(0)
        exe = fluid.Executor(place)
        exe.run(startup_program)

        pass_acc_calculator = fluid.average.WeightedAverage()
        with profiler.profiler(state, 'total', profile_path) as prof:
            for iter in range(10):
                if iter == 2:
                    profiler.reset_profiler()
                x = np.random.random((32, 784)).astype("float32")
                y = np.random.randint(0, 10, (32, 1)).astype("int64")

                outs = exe.run(main_program,
                               feed={
                                   'x': x,
                                   'y': y
                               },
                               fetch_list=[avg_cost, batch_acc, batch_size])
                acc = np.array(outs[1])
                b_size = np.array(outs[2])
                pass_acc_calculator.add(value=acc, weight=b_size)
                pass_acc = pass_acc_calculator.eval()
Example #2
0
    def check_switch(self, value):
        x = layers.fill_constant(shape=[1], dtype='float32', value=value)
        zero_var = layers.fill_constant(shape=[1], dtype='float32', value=0.0)
        one_var = layers.fill_constant(shape=[1], dtype='float32', value=1.0)
        two_var = layers.fill_constant(shape=[1], dtype='float32', value=2.0)
        three_var = layers.fill_constant(shape=[1], dtype='float32', value=3.0)

        result = layers.create_global_var(shape=[1],
                                          value=-1.0,
                                          dtype='float32',
                                          persistable=True)

        with layers.Switch() as switch:
            with switch.case(layers.less_than(x, zero_var)):
                layers.assign(zero_var, result)
            with switch.case(layers.less_than(x, one_var)):
                layers.assign(one_var, result)
            with switch.case(layers.less_than(x, two_var)):
                layers.assign(two_var, result)
            with switch.default():
                layers.assign(three_var, result)

        cpu = core.CPUPlace()
        exe = Executor(cpu)
        exe.run(default_startup_program())

        out = exe.run(feed={}, fetch_list=[result])[0][0]
        return out
                def is_finished(alive_log_prob, finished_scores,
                                finished_in_finished):

                    max_out_len = 200
                    max_length_penalty = layers.pow(
                        layers.fill_constant([1],
                                             dtype='float32',
                                             value=((5.0 + max_out_len) /
                                                    6.0)), alpha)

                    lower_bound_alive_score = layers.slice(
                        alive_log_prob, starts=[0], ends=[1],
                        axes=[0]) / max_length_penalty

                    lowest_score_of_fininshed_in_finished = finished_scores * finished_in_finished
                    lowest_score_of_fininshed_in_finished += (
                        1.0 - finished_in_finished) * -INF
                    lowest_score_of_fininshed_in_finished = layers.reduce_min(
                        lowest_score_of_fininshed_in_finished)

                    met = layers.less_than(
                        lower_bound_alive_score,
                        lowest_score_of_fininshed_in_finished)
                    met = layers.cast(met, 'float32')
                    bound_is_met = layers.reduce_sum(met)

                    finished_eos_num = layers.reduce_sum(finished_in_finished)

                    finish_cond = layers.less_than(
                        finished_eos_num,
                        layers.fill_constant([1],
                                             dtype='float32',
                                             value=beam_size))

                    return finish_cond
Example #4
0
    def check_switch(self, value):
        x = layers.fill_constant(shape=[1], dtype='float32', value=value)

        zero_var = layers.fill_constant(shape=[1], dtype='float32', value=0.0)
        one_var = layers.fill_constant(shape=[1], dtype='float32', value=1.0)
        two_var = layers.fill_constant(shape=[1], dtype='float32', value=2.0)
        three_var = layers.fill_constant(shape=[1], dtype='float32', value=3.0)

        result = layers.create_global_var(
            shape=[1], value=-1.0, dtype='float32', persistable=True)

        with layers.Switch() as switch:
            with switch.case(layers.less_than(x, zero_var)):
                layers.assign(zero_var, result)
            with switch.case(layers.less_than(x, one_var)):
                layers.assign(one_var, result)
            with switch.case(layers.less_than(x, two_var)):
                layers.assign(two_var, result)
            with switch.default():
                layers.assign(three_var, result)

        cpu = core.CPUPlace()
        exe = Executor(cpu)
        exe.run(default_startup_program())

        out = exe.run(feed={}, fetch_list=[result])[0][0]
        return out
    def simple_net(self):
        d0 = layers.data(
            "d0", shape=[10], append_batch_size=False, dtype='float32')
        d1 = layers.data(
            "d1", shape=[10], append_batch_size=False, dtype='float32')
        d2 = layers.data(
            "d2", shape=[10], append_batch_size=False, dtype='float32')
        # fill_constant npu op doesn't support int64
        i = layers.zeros(shape=[1], dtype='int32')
        i = layers.cast(i, 'int64')
        i.stop_gradient = True
        init = layers.zeros(shape=[10], dtype='float32')
        mem_array = layers.array_write(x=init, i=i)
        data_array = layers.array_write(x=d0, i=i)
        i = layers.increment(i)
        layers.array_write(d1, i, array=data_array)
        i = layers.increment(i)
        layers.array_write(d2, i, array=data_array)
        i = layers.zeros(shape=[1], dtype='int32')
        i = layers.cast(i, 'int64')
        i.stop_gradient = True
        array_len = layers.fill_constant(shape=[1], dtype='int32', value=5)
        array_len = layers.cast(array_len, 'int64')
        array_len.stop_gradient = True
        cond = layers.ones(shape=[1], dtype='int32')
        cond = layers.cast(cond, 'bool')
        j = layers.fill_constant(shape=[1], dtype='int32', value=1)
        j = layers.cast(j, 'int64')
        j.stop_gradient = True
        array_len2 = layers.fill_constant(shape=[1], dtype='int32', value=3)
        array_len2 = layers.cast(array_len2, 'int64')
        array_len2.stop_gradient = True
        cond2 = layers.logical_or(x=j, y=array_len2)
        cond2 = layers.ones(shape=[1], dtype='int32')
        cond2 = layers.cast(cond2, 'bool')
        while_op = layers.While(cond=cond)
        while_op2 = layers.While(cond=cond2)
        with while_op.block():
            d = layers.array_read(array=data_array, i=i)
            prev = layers.array_read(array=mem_array, i=i)
            result = layers.sums(input=[d, prev])

            i = layers.increment(x=i, in_place=True)
            layers.array_write(result, i=i, array=mem_array)
            layers.less_than(x=i, y=array_len, cond=cond)

            with while_op2.block():
                d2 = layers.array_read(array=data_array, i=j)
                prev2 = layers.array_read(array=mem_array, i=j)
                result2 = layers.sums(input=[d2, prev2])

                j = layers.increment(x=j, in_place=True)
                layers.array_write(result2, i=j, array=mem_array)
                layers.less_than(x=j, y=array_len2, cond=cond2)
        sum_result = layers.array_read(array=mem_array, i=j)
        loss = layers.mean(sum_result)
        return loss, sum_result
Example #6
0
    def net_profiler(self, state, profile_path='/tmp/profile'):
        enable_if_gpu = state == 'GPU' or state == "All"
        if enable_if_gpu and not core.is_compiled_with_cuda():
            return
        startup_program = fluid.Program()
        main_program = fluid.Program()

        with fluid.program_guard(main_program, startup_program):
            image = fluid.layers.data(name='x', shape=[784], dtype='float32')
            hidden1 = fluid.layers.fc(input=image, size=64, act='relu')
            i = layers.zeros(shape=[1], dtype='int64')
            counter = fluid.layers.zeros(
                shape=[1], dtype='int64', force_cpu=True)
            until = layers.fill_constant([1], dtype='int64', value=10)
            data_arr = layers.array_write(hidden1, i)
            cond = fluid.layers.less_than(x=counter, y=until)
            while_op = fluid.layers.While(cond=cond)
            with while_op.block():
                hidden_n = fluid.layers.fc(input=hidden1, size=64, act='relu')
                layers.array_write(hidden_n, i, data_arr)
                fluid.layers.increment(x=counter, value=1, in_place=True)
                layers.less_than(x=counter, y=until, cond=cond)

            hidden_n = layers.array_read(data_arr, i)
            hidden2 = fluid.layers.fc(input=hidden_n, size=64, act='relu')
            predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
            label = fluid.layers.data(name='y', shape=[1], dtype='int64')
            cost = fluid.layers.cross_entropy(input=predict, label=label)
            avg_cost = fluid.layers.mean(cost)
            batch_size = fluid.layers.create_tensor(dtype='int64')
            batch_acc = fluid.layers.accuracy(
                input=predict, label=label, total=batch_size)

        optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9)
        opts = optimizer.minimize(avg_cost, startup_program=startup_program)

        place = fluid.CPUPlace() if state == 'CPU' else fluid.CUDAPlace(0)
        exe = fluid.Executor(place)
        exe.run(startup_program)

        pass_acc_calculator = fluid.average.WeightedAverage()
        with profiler.profiler(state, 'total', profile_path) as prof:
            for iter in range(10):
                if iter == 2:
                    profiler.reset_profiler()
                x = np.random.random((32, 784)).astype("float32")
                y = np.random.randint(0, 10, (32, 1)).astype("int64")

                outs = exe.run(main_program,
                               feed={'x': x,
                                     'y': y},
                               fetch_list=[avg_cost, batch_acc, batch_size])
                acc = np.array(outs[1])
                b_size = np.array(outs[2])
                pass_acc_calculator.add(value=acc, weight=b_size)
                pass_acc = pass_acc_calculator.eval()
Example #7
0
    def test_simple_forward(self):
        d0 = layers.data(
            "d0", shape=[10], append_batch_size=False, dtype='float32')
        d1 = layers.data(
            "d1", shape=[10], append_batch_size=False, dtype='float32')
        d2 = layers.data(
            "d2", shape=[10], append_batch_size=False, dtype='float32')
        i = layers.zeros(shape=[1], dtype='int64')
        i.stop_gradient = True
        init = layers.zeros(shape=[10], dtype='float32')
        mem_array = layers.array_write(x=init, i=i)
        data_array = layers.array_write(x=d0, i=i)

        i = layers.increment(i)
        layers.array_write(d1, i, array=data_array)

        i = layers.increment(i)
        layers.array_write(d2, i, array=data_array)

        i = layers.zeros(shape=[1], dtype='int64')
        i.stop_gradient = True

        array_len = layers.fill_constant(shape=[1], dtype='int64', value=3)
        array_len.stop_gradient = True
        cond = layers.less_than(x=i, y=array_len)

        while_op = layers.While(cond=cond)
        with while_op.block():
            d = layers.array_read(array=data_array, i=i)
            prev = layers.array_read(array=mem_array, i=i)
            result = layers.sums(input=[d, prev])

            i = layers.increment(x=i, in_place=True)
            layers.array_write(result, i=i, array=mem_array)
            layers.less_than(x=i, y=array_len, cond=cond)

        sum_result = layers.array_read(array=mem_array, i=i)
        loss = layers.mean(sum_result)

        append_backward(loss)

        cpu = core.CPUPlace()
        exe = Executor(cpu)
        d = []

        for i in xrange(3):
            d.append(numpy.random.random(size=[10]).astype('float32'))

        outs = exe.run(feed={'d0': d[0],
                             'd1': d[1],
                             'd2': d[2]},
                       fetch_list=[sum_result])
        self.assertAlmostEqual(numpy.sum(d), numpy.sum(outs[0]), delta=0.01)
Example #8
0
    def test_return_single_var(self):
        def fn_1():
            return layers.fill_constant(shape=[4, 2], dtype='int32', value=1)

        def fn_2():
            return layers.fill_constant(shape=[4, 2], dtype='int32', value=2)

        def fn_3():
            return layers.fill_constant(shape=[4, 3], dtype='int32', value=3)

        main_program = Program()
        startup_program = Program()
        with program_guard(main_program, startup_program):
            x = layers.fill_constant(shape=[1], dtype='float32', value=0.3)
            y = layers.fill_constant(shape=[1], dtype='float32', value=0.1)
            z = layers.fill_constant(shape=[1], dtype='float32', value=0.2)
            pred_2 = layers.less_than(x, y)  # false: 0.3 < 0.1
            pred_1 = layers.less_than(z, x)  # true: 0.2 < 0.3

            # call fn_1
            out_0 = layers.case(pred_fn_pairs=[(pred_1, fn_1), (pred_1, fn_2)],
                                default=fn_3)

            # call fn_2
            out_1 = layers.case(pred_fn_pairs=[(pred_2, fn_1), (pred_1, fn_2)],
                                default=fn_3)

            # call default fn_3
            out_2 = layers.case(pred_fn_pairs=((pred_2, fn_1), (pred_2, fn_2)),
                                default=fn_3)

            # no default, call fn_2
            out_3 = layers.case(pred_fn_pairs=[(pred_1, fn_2)])

            # no default, call fn_2. but pred_2 is false
            out_4 = layers.case(pred_fn_pairs=[(pred_2, fn_2)])

            place = fluid.CUDAPlace(
                0) if core.is_compiled_with_cuda() else fluid.CPUPlace()
            exe = fluid.Executor(place)

            res = exe.run(main_program,
                          fetch_list=[out_0, out_1, out_2, out_3, out_4])

            self.assertTrue(np.allclose(res[0], 1))
            self.assertTrue(np.allclose(res[1], 2))
            self.assertTrue(np.allclose(res[2], 3))
            self.assertTrue(np.allclose(res[3], 2))
            self.assertTrue(np.allclose(res[4], 2))
Example #9
0
    def test_return_single_var(self):
        """
        pseudocode:

        if 0.23 < 0.1:
            return 2
        else:
            return -1
        """
        def true_func():
            return layers.fill_constant(shape=[2, 3], dtype='int32', value=2)

        def false_func():
            return layers.fill_constant(shape=[3, 2], dtype='int32', value=-1)

        main_program = Program()
        startup_program = Program()
        with program_guard(main_program, startup_program):
            x = layers.fill_constant(shape=[1], dtype='float32', value=0.1)
            y = layers.fill_constant(shape=[1], dtype='float32', value=0.23)
            pred = layers.less_than(y, x)
            out = layers.cond(pred, true_func, false_func)
            # out is one tensor

        place = fluid.CUDAPlace(
            0) if core.is_compiled_with_cuda() else fluid.CPUPlace()
        exe = fluid.Executor(place)
        ret = exe.run(main_program, fetch_list=[out.name])
        self.assertTrue(
            np.allclose(np.asarray(ret), np.full((3, 2), -1, np.int32)))
    def is_finished(self, step_idx, source_length, alive_log_probs, finished_scores, finished_in_finished):
        """
            is_finished
        """
        base_1 = layers.cast(source_length, 'float32') + 55.0
        base_1 /= 6.0
        max_length_penalty = layers.pow(base_1, self.alpha)

        flat_alive_log_probs = layers.reshape(alive_log_probs, [-1])
        lower_bound_alive_scores_1 = layers.gather(flat_alive_log_probs, [self.get_alive_index])
        
        lower_bound_alive_scores = lower_bound_alive_scores_1 / max_length_penalty
        
        lowest_score_of_finished_in_finish = layers.reduce_min(finished_scores * finished_in_finished, dim=1)

        finished_in_finished = layers.cast(finished_in_finished, 'bool')
        lowest_score_of_finished_in_finish += \
                        ((1.0 - layers.cast(layers.reduce_any(finished_in_finished, 1), 'float32')) * -INF)
        
        #print lowest_score_of_finished_in_finish
        bound_is_met = layers.reduce_all(layers.greater_than(lowest_score_of_finished_in_finish, 
                                                             lower_bound_alive_scores))

        decode_length = source_length + 50
        length_cond = layers.less_than(x=step_idx, y=decode_length)

        return layers.logical_and(x=layers.logical_not(bound_is_met), y=length_cond)
Example #11
0
    def not_test_raw_api(self):
        prog = Program()
        startup_prog = Program()
        with program_guard(prog, startup_prog):
            image = layers.data(name='x', shape=[784], dtype='float32')

            label = layers.data(name='y', shape=[1], dtype='int64')

            limit = layers.fill_constant(shape=[1], dtype='int64', value=5)
            cond = layers.less_than(x=label, y=limit)
            true_image, false_image = split_lod_tensor(input=image, mask=cond)

            true_out = layers.create_tensor(dtype='float32')
            true_cond = ConditionalBlock([cond])

            with true_cond.block():
                hidden = layers.fc(input=true_image, size=100, act='tanh')
                prob = layers.fc(input=hidden, size=10, act='softmax')
                layers.assign(input=prob, output=true_out)

            false_out = layers.create_tensor(dtype='float32')
            false_cond = ConditionalBlock([cond])

            with false_cond.block():
                hidden = layers.fc(input=false_image, size=200, act='tanh')
                prob = layers.fc(input=hidden, size=10, act='softmax')
                layers.assign(input=prob, output=false_out)

            prob = merge_lod_tensor(
                in_true=true_out, in_false=false_out, mask=cond, x=image)
            loss = layers.cross_entropy(input=prob, label=label)
            avg_loss = layers.mean(loss)

            optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
            optimizer.minimize(avg_loss, startup_prog)

        train_reader = paddle.batch(
            paddle.reader.shuffle(
                paddle.dataset.mnist.train(), buf_size=8192),
            batch_size=10)

        place = core.CPUPlace()
        exe = Executor(place)

        exe.run(startup_prog)
        PASS_NUM = 100
        for pass_id in range(PASS_NUM):
            for data in train_reader():
                x_data = np.array([x[0] for x in data]).astype("float32")
                y_data = np.array([x[1] for x in data]).astype("int64")
                y_data = np.expand_dims(y_data, axis=1)

                outs = exe.run(prog,
                               feed={'x': x_data,
                                     'y': y_data},
                               fetch_list=[avg_loss])
                print(outs[0])
                if outs[0] < 1.0:
                    return
        self.assertFalse(True)
Example #12
0
    def compare_ifelse_op_and_numpy(self, place):
        self.set_test_case()

        prog = Program()
        startup_prog = Program()
        with program_guard(prog, startup_prog):
            src = layers.data(name='data', shape=[1], dtype='float32')
            cond = layers.fill_constant(
                [1], dtype='float32', value=self.cond_value)
            ifcond = layers.less_than(x=src, y=cond)
            ie = layers.IfElse(ifcond)
            with ie.true_block():
                true_target = ie.input(src)
                true_target = fluid.layers.exp(true_target)
                ie.output(true_target)

            with ie.false_block():
                false_target = ie.input(src)
                false_target = fluid.layers.tanh(false_target)
                ie.output(false_target)
            if_out = ie()
            out = layers.reduce_sum(if_out)

            exe = fluid.Executor(place)
            exe.run(fluid.default_startup_program())
            fetch_list = [out]
            o1, = exe.run(fluid.default_main_program(),
                          feed={'data': self.data},
                          fetch_list=[out])
            o2 = self.numpy_cal()

            self.assertTrue(
                np.allclose(
                    o1, o2, atol=1e-8),
                "IfElse result : " + str(o1) + "\n Numpy result :" + str(o2))
    def test_ifelse(self):
        prog = Program()
        startup_prog = Program()
        with program_guard(prog, startup_prog):
            image = layers.data(name='x', shape=[784], dtype='float32')

            label = layers.data(name='y', shape=[1], dtype='int64')

            limit = layers.fill_constant_batch_size_like(input=label,
                                                         dtype='int64',
                                                         shape=[1],
                                                         value=5.0)
            cond = layers.less_than(x=label, y=limit)
            ie = layers.IfElse(cond)

            with ie.true_block():
                true_image = ie.input(image)
                hidden = layers.fc(input=true_image, size=100, act='tanh')
                prob = layers.fc(input=hidden, size=10, act='softmax')
                ie.output(prob)

            with ie.false_block():
                false_image = ie.input(image)
                hidden = layers.fc(input=false_image, size=200, act='tanh')
                prob = layers.fc(input=hidden, size=10, act='softmax')
                ie.output(prob)

            prob = ie()
            loss = layers.cross_entropy(input=prob[0], label=label)
            avg_loss = layers.mean(loss)

            optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
            optimizer.minimize(avg_loss, startup_prog)
        train_reader = paddle.batch(paddle.reader.shuffle(
            paddle.dataset.mnist.train(), buf_size=8192),
                                    batch_size=200)

        place = core.CPUPlace()
        exe = Executor(place)

        exe.run(kwargs['startup_program'])
        PASS_NUM = 100
        for pass_id in range(PASS_NUM):
            for data in train_reader():
                x_data = np.array(map(lambda x: x[0], data)).astype("float32")
                y_data = np.array(map(lambda x: x[1], data)).astype("int64")
                y_data = y_data.reshape((y_data.shape[0], 1))

                outs = exe.run(kwargs['main_program'],
                               feed={
                                   'x': x_data,
                                   'y': y_data
                               },
                               fetch_list=[avg_loss])
                print outs[0]
                if outs[0] < 1.0:
                    return
        self.assertFalse(True)
Example #14
0
    def build_program(self, compile_program=True):
        startup_program = fluid.Program()
        main_program = fluid.Program()
        with fluid.program_guard(main_program, startup_program):
            image = fluid.layers.data(name='x', shape=[784], dtype='float32')
            hidden1 = fluid.layers.fc(input=image, size=64, act='relu')
            i = layers.zeros(shape=[1], dtype='int64')
            counter = fluid.layers.zeros(shape=[1],
                                         dtype='int64',
                                         force_cpu=True)
            until = layers.fill_constant([1], dtype='int64', value=10)
            data_arr = layers.array_write(hidden1, i)
            cond = fluid.layers.less_than(x=counter, y=until)
            while_op = fluid.layers.While(cond=cond)
            with while_op.block():
                hidden_n = fluid.layers.fc(input=hidden1, size=64, act='relu')
                layers.array_write(hidden_n, i, data_arr)
                fluid.layers.increment(x=counter, value=1, in_place=True)
                layers.less_than(x=counter, y=until, cond=cond)

            hidden_n = layers.array_read(data_arr, i)
            hidden2 = fluid.layers.fc(input=hidden_n, size=64, act='relu')
            predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
            label = fluid.layers.data(name='y', shape=[1], dtype='int64')
            cost = fluid.layers.cross_entropy(input=predict, label=label)
            avg_cost = fluid.layers.mean(cost)
            batch_size = fluid.layers.create_tensor(dtype='int64')
            batch_acc = fluid.layers.accuracy(input=predict,
                                              label=label,
                                              total=batch_size)

        optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9)
        opts = optimizer.minimize(avg_cost, startup_program=startup_program)

        if compile_program:
            # TODO(luotao): profiler tool may have bug with multi-thread parallel executor.
            # https://github.com/PaddlePaddle/Paddle/pull/25200#issuecomment-650483092
            exec_strategy = fluid.ExecutionStrategy()
            exec_strategy.num_threads = 1
            train_program = fluid.compiler.CompiledProgram(
                main_program).with_data_parallel(loss_name=avg_cost.name,
                                                 exec_strategy=exec_strategy)
        else:
            train_program = main_program
        return train_program, startup_program, avg_cost, batch_size, batch_acc
Example #15
0
 def test_exceptions(self):
     i = layers.zeros(shape=[2], dtype='int64')
     array_len = layers.fill_constant(shape=[2], dtype='int64', value=1)
     cond = layers.less_than(x=i, y=array_len)
     with self.assertRaises(TypeError):
         layers.While(cond=cond)
     cond = layers.cast(cond, dtype='float64')
     with self.assertRaises(TypeError):
         layers.While(cond=cond)
Example #16
0
    def test_ifelse(self):
        prog = Program()
        startup_prog = Program()
        with program_guard(prog, startup_prog):
            image = layers.data(name='x', shape=[784], dtype='float32')

            label = layers.data(name='y', shape=[1], dtype='int64')

            limit = layers.fill_constant_batch_size_like(
                input=label, dtype='int64', shape=[1], value=5.0)
            cond = layers.less_than(x=label, y=limit)
            ie = layers.IfElse(cond)

            with ie.true_block():
                true_image = ie.input(image)
                hidden = layers.fc(input=true_image, size=100, act='tanh')
                prob = layers.fc(input=hidden, size=10, act='softmax')
                ie.output(prob)

            with ie.false_block():
                false_image = ie.input(image)
                hidden = layers.fc(input=false_image, size=200, act='tanh')
                prob = layers.fc(input=hidden, size=10, act='softmax')
                ie.output(prob)

            prob = ie()
            loss = layers.cross_entropy(input=prob[0], label=label)
            avg_loss = layers.mean(loss)

            optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
            optimizer.minimize(avg_loss, startup_prog)
        train_reader = paddle.batch(
            paddle.reader.shuffle(
                paddle.dataset.mnist.train(), buf_size=8192),
            batch_size=200)

        place = core.CPUPlace()
        exe = Executor(place)

        exe.run(kwargs['startup_program'])
        PASS_NUM = 100
        for pass_id in range(PASS_NUM):
            for data in train_reader():
                x_data = np.array(map(lambda x: x[0], data)).astype("float32")
                y_data = np.array(map(lambda x: x[1], data)).astype("int64")
                y_data = y_data.reshape((y_data.shape[0], 1))

                outs = exe.run(kwargs['main_program'],
                               feed={'x': x_data,
                                     'y': y_data},
                               fetch_list=[avg_loss])
                print outs[0]
                if outs[0] < 1.0:
                    return
        self.assertFalse(True)
def build_and_run_program(place, batch_size, beam_size, stop_gradient=False):
    fluid.default_startup_program().random_seed = 1
    fluid.default_main_program().random_seed = 1
    np.random.seed(2)

    x = layers.assign(
        np.random.rand(batch_size, beam_size, 32).astype("float32"))
    indices = fluid.data(shape=[None, beam_size], dtype="int64", name="indices")
    step_idx = layers.fill_constant(
        shape=[1], dtype="int64", value=0, force_cpu=True)
    max_len = layers.fill_constant(
        shape=[1], dtype="int64", value=10, force_cpu=True)
    cond = layers.less_than(x=step_idx, y=max_len)
    while_op = layers.While(cond)
    scores = layers.array_write(x, step_idx)
    with while_op.block():
        bs = layers.cast(layers.shape(x)[0], "int64")
        for _ in range(20):
            bs = layers.cast(bs, 'int64')
        bs.stop_gradient = stop_gradient
        batch_pos = layers.expand(
            layers.unsqueeze(
                layers.range(
                    0, bs, 1, dtype=bs.dtype), [1]), [1, beam_size])
        topk_coordinates = layers.stack([batch_pos, indices], axis=2)
        topk_coordinates.stop_gradient = stop_gradient
        score = layers.gather_nd(x, topk_coordinates)
        layers.increment(x=step_idx, value=1.0, in_place=True)
        layers.array_write(score, i=step_idx, array=scores)
        length_cond = layers.less_than(x=step_idx, y=max_len)
        layers.assign(length_cond, cond)

    out = layers.tensor_array_to_tensor(scores, axis=0, use_stack=True)[0]
    loss = layers.reduce_mean(out)
    opt = fluid.optimizer.Adam(0.01)
    opt.minimize(loss)
    exe = fluid.Executor(place)
    data = np.random.random_integers(
        low=0, high=beam_size - 1, size=(batch_size, beam_size)).astype("int64")
    loss_val, = exe.run(feed={"indices": data}, fetch_list=[loss])

    return loss_val
Example #18
0
    def decrement(self):
        new_scale = self.scale / self.factor
        one = layers.fill_constant(shape=[1], dtype='float32', value=1.0)
        less_than_one = layers.less_than(new_scale, one)
        with layers.Switch() as switch:
            with switch.case(less_than_one):
                layers.assign(one, self.scale)
            with switch.default():
                layers.assign(new_scale, self.scale)

        layers.assign(layers.zeros_like(self.good_steps), self.good_steps)
Example #19
0
    def append_cond_op(self, program):
        def true_func():
            return layers.fill_constant(shape=[2, 3], dtype='int32', value=2)

        def false_func():
            return layers.fill_constant(shape=[3, 2], dtype='int32', value=-1)

        with fluid.program_guard(program):
            x = layers.fill_constant(shape=[1], dtype='float32', value=0.1)
            y = layers.fill_constant(shape=[1], dtype='float32', value=0.23)
            pred = layers.less_than(y, x)
            out = layers.cond(pred, true_func, false_func)
Example #20
0
    def build_program(self, compile_program=True):
        startup_program = fluid.Program()
        main_program = fluid.Program()
        with fluid.program_guard(main_program, startup_program):
            image = fluid.layers.data(name='x', shape=[784], dtype='float32')
            hidden1 = fluid.layers.fc(input=image, size=64, act='relu')
            i = layers.zeros(shape=[1], dtype='int64')
            counter = fluid.layers.zeros(
                shape=[1], dtype='int64', force_cpu=True)
            until = layers.fill_constant([1], dtype='int64', value=10)
            data_arr = layers.array_write(hidden1, i)
            cond = fluid.layers.less_than(x=counter, y=until)
            while_op = fluid.layers.While(cond=cond)
            with while_op.block():
                hidden_n = fluid.layers.fc(input=hidden1, size=64, act='relu')
                layers.array_write(hidden_n, i, data_arr)
                fluid.layers.increment(x=counter, value=1, in_place=True)
                layers.less_than(x=counter, y=until, cond=cond)

            hidden_n = layers.array_read(data_arr, i)
            hidden2 = fluid.layers.fc(input=hidden_n, size=64, act='relu')
            predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
            label = fluid.layers.data(name='y', shape=[1], dtype='int64')
            cost = fluid.layers.cross_entropy(input=predict, label=label)
            avg_cost = fluid.layers.mean(cost)
            batch_size = fluid.layers.create_tensor(dtype='int64')
            batch_acc = fluid.layers.accuracy(
                input=predict, label=label, total=batch_size)

        optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9)
        opts = optimizer.minimize(avg_cost, startup_program=startup_program)

        if compile_program:
            train_program = fluid.compiler.CompiledProgram(
                main_program).with_data_parallel(loss_name=avg_cost.name)
        else:
            train_program = main_program
        return train_program, startup_program, avg_cost, batch_size, batch_acc
Example #21
0
 def increment(self):
     enough_steps = layers.less_than(self.increment_every,
                                     self.good_steps + 1)
     with layers.Switch() as switch:
         with switch.case(enough_steps):
             new_scale = self.scale * self.factor
             scale_valid = layers.isfinite(new_scale)
             with layers.Switch() as switch2:
                 with switch2.case(scale_valid):
                     layers.assign(new_scale, self.scale)
                     layers.assign(layers.zeros_like(self.good_steps),
                                   self.good_steps)
                 with switch2.default():
                     layers.increment(self.good_steps)
         with switch.default():
             layers.increment(self.good_steps)
Example #22
0
    def test_error(self):
        def fn_1():
            return layers.fill_constant(shape=[4, 2], dtype='int32', value=1)

        main_program = Program()
        startup_program = Program()
        with program_guard(main_program, startup_program):
            x = layers.fill_constant(shape=[1], dtype='float32', value=0.23)
            z = layers.fill_constant(shape=[1], dtype='float32', value=0.2)
            pred_1 = layers.less_than(z, x)  # true

            # The type of 'pred_fn_pairs' in case must be list or  tuple
            def type_error_pred_fn_pairs():
                layers.case(pred_fn_pairs=1, default=fn_1)

            self.assertRaises(TypeError, type_error_pred_fn_pairs)

            # The elements' type of 'pred_fn_pairs' in Op(case) must be tuple
            def type_error_pred_fn_1():
                layers.case(pred_fn_pairs=[1], default=fn_1)

            self.assertRaises(TypeError, type_error_pred_fn_1)

            # The tuple's size of 'pred_fn_pairs' in Op(case) must be 2
            def type_error_pred_fn_2():
                layers.case(pred_fn_pairs=[(1, 2, 3)], default=fn_1)

            self.assertRaises(TypeError, type_error_pred_fn_2)

            # The pred's type of 'pred_fn_pairs' in Op(case) must be bool Variable
            def type_error_pred():
                layers.case(pred_fn_pairs=[(1, fn_1)], default=fn_1)

            self.assertRaises(TypeError, type_error_pred)

            # The function of pred_fn_pairs in case must be callable
            def type_error_fn():
                layers.case(pred_fn_pairs=[(pred_1, 2)], default=fn_1)

            self.assertRaises(TypeError, type_error_fn)

            # The default in Op(case) must be callable
            def type_error_default():
                layers.case(pred_fn_pairs=[(pred_1, fn_1)], default=fn_1())

            self.assertRaises(TypeError, type_error_default)
Example #23
0
    def pairwise_hinge(self):
        """pairwise model"""
        poi_repr = L.split(self.poi_repr, 2, dim=0)
        pos_repr, neg_repr = poi_repr
        pos_pred = L.cos_sim(self.query_repr, pos_repr)
        neg_pred = L.cos_sim(self.query_repr, neg_repr)

        mode = 'hinge_loss'
        # log(1 + e-z), max(0, 1 - z)
        if 'hinge_loss' == mode:
            theta_z = L.relu(1 + neg_pred - pos_pred)
        elif 'logistic_loss' == mode:
            theta_z = L.log(1 + L.exp(neg_pred - pos_pred))
        self.loss = L.reduce_mean(theta_z)
        pos_cnt = L.reduce_sum(L.cast(L.greater_than(pos_pred, neg_pred), dtype="float32"))
        neg_cnt = L.reduce_sum(L.cast(L.less_than(pos_pred, neg_pred), dtype="float32"))
        self.order = pos_cnt / (1e-5 + neg_cnt)
        self.metrics = [self.loss, self.order]
Example #24
0
    def test_input_type_error(self):
        main_program = Program()
        startup_program = Program()
        with program_guard(main_program, startup_program):
            src = layers.data(name='data', shape=[1], dtype='float32')
            const_value = layers.fill_constant(
                [1], dtype='float32', value=123.0)
            ifcond = layers.less_than(x=src, y=const_value)
            with self.assertRaises(TypeError):
                ie = layers.IfElse(set())
            with self.assertRaises(TypeError):
                ie = layers.IfElse(ifcond, set())

            with self.assertRaises(TypeError):
                ie = layers.IfElse(ifcond)
                with ie.true_block():
                    true_target = ie.input(src)
                    true_target = fluid.layers.exp(true_target)
                    ie.output([])
Example #25
0
    def increment(self):
        enough_steps = layers.less_than(self.increment_every,
                                        self.good_steps + 1)

        def increment_step():
            layers.increment(self.good_steps)

        def maybe_update():
            new_scale = self.scale * self.factor
            scale_valid = layers.isfinite(new_scale)

            def update_scale_and_step():
                layers.assign(new_scale, self.scale)
                layers.assign(
                    layers.zeros_like(self.good_steps), self.good_steps)

            layers.cond(scale_valid, update_scale_and_step)

        layers.cond(enough_steps, maybe_update, increment_step)
Example #26
0
 def pairwise_loss(self):
     """pairwise model"""
     # TODO: for neg_num neg poi, split num should be (neg_num + 1) on dim 0
     poi_repr = L.split(self.poi_repr, [1 * self.batch_size, self.neg_num * self.batch_size], dim=0)
     pos_repr, neg_repr = poi_repr
     # size [-1 x emb_size]
     # size [-1*n x emb_size]
     prefix_expand = L.reshape(L.expand(self.query_repr, [1, self.neg_num]), [-1, self.hidden_size])
     # size [-1*n x 1]
     neg_pred_n = self.safe_cosine_sim(neg_repr, prefix_expand)
     # size [-1 x 1]
     pos_pred = self.safe_cosine_sim(pos_repr, self.query_repr)
     cost = self.loss_neg_log_of_pos(pos_pred, L.reshape(neg_pred_n, [-1, self.neg_num]), 15)
     self.loss = L.mean(x=cost)
     # size [-1 x 1]
     neg_avg = L.reduce_mean(L.reshape(neg_pred_n, [-1, self.neg_num]), dim=1, keep_dim=True)
     pos_cnt = L.reduce_sum(L.cast(L.greater_than(pos_pred, neg_avg), dtype="float32"))
     neg_cnt = L.reduce_sum(L.cast(L.less_than(pos_pred, neg_avg), dtype="float32"))
     # equal to positive and negative order
     self.order = pos_cnt / (1e-5 + neg_cnt)
     self.metrics = [self.loss, self.order]
Example #27
0
    def build_graph_with_sub_graph(self):
        def linear_fc(num):
            data = fluid.layers.data(name='image',
                                     shape=[1, 32, 32],
                                     dtype='float32')
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
            hidden = data
            for _ in six.moves.xrange(num):
                hidden = fluid.layers.fc(hidden, size=128, act='relu')
            loss = fluid.layers.cross_entropy(input=hidden, label=label)
            loss = fluid.layers.mean(loss)
            return loss

        main_program = Program()
        startup_program = Program()

        def true_func():
            return linear_fc(3)

        def false_func():
            return linear_fc(5)

        with program_guard(main_program, startup_program):
            x = layers.fill_constant(shape=[1], dtype='float32', value=0.1)
            y = layers.fill_constant(shape=[1], dtype='float32', value=0.23)
            pred = layers.less_than(y, x)
            out = layers.cond(pred, true_func, false_func)

        core_graph = core.Graph(main_program.desc)
        # We should create graph for test, otherwise it will throw a
        # error that it cannot find the node of "STEP_COUNTER"
        graph = IrGraph(core_graph, for_test=True)
        sub_graph = graph.get_sub_graph(0)
        all_sub_graphs = graph.all_sub_graphs(
            for_test=True)  # same reason for subgraph
        # Should return graph and sub_graphs at the same time. If only return sub_graph, the graph will
        # be destructed and the sub_graphs will be empty.
        return graph, all_sub_graphs
Example #28
0
    def build_program(self):
        def true_func():
            return layers.fill_constant(shape=[1, 2], dtype='int32',
                                        value=1), layers.fill_constant(
                                            shape=[2, 3],
                                            dtype='bool',
                                            value=True)

        def false_func():
            return layers.fill_constant(shape=[3, 4], dtype='float32',
                                        value=3), layers.fill_constant(
                                            shape=[4, 5],
                                            dtype='int64',
                                            value=2)

        main_program = Program()
        startup_program = Program()
        with program_guard(main_program, startup_program):
            x = layers.fill_constant(shape=[1], dtype='float32', value=0.1)
            y = layers.fill_constant(shape=[1], dtype='float32', value=0.23)
            pred = layers.less_than(x, y)
            out = layers.cond(pred, true_func, false_func)
            # out is a tuple containing 2 tensors
            return main_program, startup_program, out
def decoder_decode(context, is_sparse):
    init_state = context
    array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length)
    counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True)

    # fill the first element with init_state
    state_array = pd.create_array('float32')
    pd.array_write(init_state, array=state_array, i=counter)

    # ids, scores as memory
    ids_array = pd.create_array('int64')
    scores_array = pd.create_array('float32')

    init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=2)
    init_scores = pd.data(name="init_scores",
                          shape=[1],
                          dtype="float32",
                          lod_level=2)

    pd.array_write(init_ids, array=ids_array, i=counter)
    pd.array_write(init_scores, array=scores_array, i=counter)

    cond = pd.less_than(x=counter, y=array_len)

    while_op = pd.While(cond=cond)
    with while_op.block():
        pre_ids = pd.array_read(array=ids_array, i=counter)
        pre_state = pd.array_read(array=state_array, i=counter)
        pre_score = pd.array_read(array=scores_array, i=counter)

        # expand the lod of pre_state to be the same with pre_score
        pre_state_expanded = pd.sequence_expand(pre_state, pre_score)

        pre_ids_emb = pd.embedding(input=pre_ids,
                                   size=[dict_size, word_dim],
                                   dtype='float32',
                                   is_sparse=is_sparse)

        # use rnn unit to update rnn
        current_state = pd.fc(input=[pre_state_expanded, pre_ids_emb],
                              size=decoder_size,
                              act='tanh')
        current_state_with_lod = pd.lod_reset(x=current_state, y=pre_score)
        # use score to do beam search
        current_score = pd.fc(input=current_state_with_lod,
                              size=target_dict_dim,
                              act='softmax')
        topk_scores, topk_indices = pd.topk(current_score, k=50)
        selected_ids, selected_scores = pd.beam_search(pre_ids,
                                                       topk_indices,
                                                       topk_scores,
                                                       beam_size,
                                                       end_id=10,
                                                       level=0)

        pd.increment(x=counter, value=1, in_place=True)

        # update the memories
        pd.array_write(current_state, array=state_array, i=counter)
        pd.array_write(selected_ids, array=ids_array, i=counter)
        pd.array_write(selected_scores, array=scores_array, i=counter)

        pd.less_than(x=counter, y=array_len, cond=cond)

    translation_ids, translation_scores = pd.beam_search_decode(
        ids=ids_array, scores=scores_array)

    # return init_ids, init_scores

    return translation_ids, translation_scores
def decode(context, is_sparse):
    init_state = context
    array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length)
    counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True)

    # fill the first element with init_state
    state_array = pd.create_array('float32')
    pd.array_write(init_state, array=state_array, i=counter)

    # ids, scores as memory
    ids_array = pd.create_array('int64')
    scores_array = pd.create_array('float32')

    init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=2)
    init_scores = pd.data(
        name="init_scores", shape=[1], dtype="float32", lod_level=2)

    pd.array_write(init_ids, array=ids_array, i=counter)
    pd.array_write(init_scores, array=scores_array, i=counter)

    cond = pd.less_than(x=counter, y=array_len)

    while_op = pd.While(cond=cond)
    with while_op.block():
        pre_ids = pd.array_read(array=ids_array, i=counter)
        pre_state = pd.array_read(array=state_array, i=counter)
        pre_score = pd.array_read(array=scores_array, i=counter)

        # expand the lod of pre_state to be the same with pre_score
        pre_state_expanded = pd.sequence_expand(pre_state, pre_score)

        pre_ids_emb = pd.embedding(
            input=pre_ids,
            size=[dict_size, word_dim],
            dtype='float32',
            is_sparse=is_sparse)

        # use rnn unit to update rnn
        current_state = pd.fc(input=[pre_state_expanded, pre_ids_emb],
                              size=decoder_size,
                              act='tanh')
        current_state_with_lod = pd.lod_reset(x=current_state, y=pre_score)
        # use score to do beam search
        current_score = pd.fc(input=current_state_with_lod,
                              size=target_dict_dim,
                              act='softmax')
        topk_scores, topk_indices = pd.topk(current_score, k=topk_size)
        selected_ids, selected_scores = pd.beam_search(
            pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0)

        pd.increment(x=counter, value=1, in_place=True)

        # update the memories
        pd.array_write(current_state, array=state_array, i=counter)
        pd.array_write(selected_ids, array=ids_array, i=counter)
        pd.array_write(selected_scores, array=scores_array, i=counter)

        pd.less_than(x=counter, y=array_len, cond=cond)

    translation_ids, translation_scores = pd.beam_search_decode(
        ids=ids_array, scores=scores_array)

    # return init_ids, init_scores

    return translation_ids, translation_scores
Example #31
0
    def test_raw_api(self):
        prog = Program()
        startup_prog = Program()
        with program_guard(prog, startup_prog):
            image = layers.data(name='x', shape=[784], dtype='float32')

            label = layers.data(name='y', shape=[1], dtype='int64')

            limit = layers.fill_constant_batch_size_like(
                input=label, dtype='int64', shape=[1], value=5.0)
            cond = layers.less_than(x=label, y=limit)
            true_image, false_image = layers.split_lod_tensor(
                input=image, mask=cond)

            true_out = layers.create_tensor(dtype='float32')
            true_cond = layers.ConditionalBlock([true_image])

            with true_cond.block():
                hidden = layers.fc(input=true_image, size=100, act='tanh')
                prob = layers.fc(input=hidden, size=10, act='softmax')
                layers.assign(input=prob, output=true_out)

            false_out = layers.create_tensor(dtype='float32')
            false_cond = layers.ConditionalBlock([false_image])

            with false_cond.block():
                hidden = layers.fc(input=false_image, size=200, act='tanh')
                prob = layers.fc(input=hidden, size=10, act='softmax')
                layers.assign(input=prob, output=false_out)

            prob = layers.merge_lod_tensor(
                in_true=true_out, in_false=false_out, mask=cond, x=image)
            loss = layers.cross_entropy(input=prob, label=label)
            avg_loss = layers.mean(loss)

            optimizer = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
            optimizer.minimize(avg_loss, startup_prog)

        train_reader = paddle.batch(
            paddle.reader.shuffle(
                paddle.dataset.mnist.train(), buf_size=8192),
            batch_size=200)

        place = core.CPUPlace()
        exe = Executor(place)

        exe.run(startup_prog)
        PASS_NUM = 100
        for pass_id in range(PASS_NUM):
            for data in train_reader():
                x_data = np.array(map(lambda x: x[0], data)).astype("float32")
                y_data = np.array(map(lambda x: x[1], data)).astype("int64")
                y_data = np.expand_dims(y_data, axis=1)

                outs = exe.run(prog,
                               feed={'x': x_data,
                                     'y': y_data},
                               fetch_list=[avg_loss])
                print outs[0]
                if outs[0] < 1.0:
                    return
        self.assertFalse(True)
Example #32
0
    def infilling_decode(self):
        if self.task_type == "dialog":
            emb_num = 4
        else:
            emb_num = 3
        input_shapes = [[-1, self.max_seq_len, 1]] * emb_num + \
                       [[-1, self.max_seq_len, self.max_seq_len]]
        input_dtypes = ['int64'] * emb_num + ['float32']
        input_lod_levels = [0] * emb_num + [0]

        shapes = input_shapes + [[-1, self.max_seq_len, 1],
                                 [-1, self.max_seq_len, 1], [-1, 1], [-1],
                                 [-1, 1, self.max_seq_len], [-1, 1]]
        dtypes = input_dtypes + [
            'int64', 'int64', 'float32', 'int32', 'float32', 'int64'
        ]
        lod_levels = input_lod_levels + [2, 2, 2, 0, 0, 0]

        inputs = self.to_ternsor(shapes, dtypes, lod_levels)
        pyreader = fluid.io.DataLoader.from_generator(feed_list=inputs,
                                                      capacity=50,
                                                      iterable=False)

        emb_ids = {}
        for key, value in zip(self.emb_keys, inputs[:emb_num]):
            emb_ids[key] = value

        input_mask = inputs[emb_num]
        tgt_ids, tgt_pos, init_scores, parent_idx, tgt_input_mask, data_ids = inputs[
            -6:]

        ernie = ErnieModel(emb_ids=emb_ids,
                           input_mask=input_mask,
                           config=self.ernie_config,
                           use_fp16=self.use_fp16,
                           task_type=self.task_type,
                           decoding=True,
                           gather_idx=parent_idx)

        max_len = layers.fill_constant(shape=[1],
                                       dtype=tgt_ids.dtype,
                                       value=self.max_dec_len,
                                       force_cpu=True)
        step_idx = layers.fill_constant(shape=[1],
                                        dtype=tgt_ids.dtype,
                                        value=0,
                                        force_cpu=True)
        pos_idx = layers.fill_constant(shape=[1],
                                       dtype=tgt_ids.dtype,
                                       value=1,
                                       force_cpu=True)
        cond = layers.less_than(x=step_idx, y=max_len)
        while_op = layers.While(cond)

        ids = layers.array_write(layers.reshape(tgt_ids, (-1, 1)), step_idx)
        pos_biases = layers.array_write(layers.reshape(tgt_pos, (-1, 1)),
                                        step_idx)
        scores = layers.array_write(init_scores, step_idx)
        tgt_masks = layers.array_write(tgt_input_mask, step_idx)

        with while_op.block():
            pre_ids = layers.array_read(array=ids, i=step_idx)
            pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True)
            pre_scores = layers.array_read(array=scores, i=step_idx)
            pos_bias = layers.array_read(array=pos_biases, i=step_idx)
            pos_bias = layers.gather(input=pos_bias, index=parent_idx)
            tmp_mask = layers.array_read(tgt_masks, i=step_idx)

            def gen_batch_like(value,
                               dtype="int64",
                               shape=[-1, 1, 1],
                               is_scalar=True):
                if is_scalar:
                    return layers.fill_constant_batch_size_like(
                        input=parent_idx,
                        value=value,
                        shape=shape,
                        dtype=dtype)
                else:
                    return layers.elementwise_mul(
                        x=layers.fill_constant_batch_size_like(
                            input=parent_idx,
                            value=1,
                            shape=shape,
                            dtype=dtype),
                        y=value,
                        axis=0)

            tmp_mask = layers.gather(input=tmp_mask, index=parent_idx)
            append_0_mask = gen_batch_like(0.0, dtype=tmp_mask.dtype)
            append_1_mask = gen_batch_like(1.0, dtype=tmp_mask.dtype)
            tmp_mask = layers.concat([tmp_mask, append_1_mask], axis=2)
            pre_mask = layers.concat([tmp_mask, append_0_mask], axis=2)
            cur_mask = layers.concat([tmp_mask, append_1_mask], axis=2)

            cur_ids = gen_batch_like(self.attn_id)
            pre_pos = gen_batch_like(step_idx, is_scalar=False)
            cur_pos = gen_batch_like(pos_idx, is_scalar=False)
            if self.continuous_position:
                pre_pos = pre_pos + pos_bias
                cur_pos = cur_pos + pos_bias

            dec_emb_ids = {
                "word_embedding": layers.concat([pre_ids, cur_ids], axis=1),
                "pos_embedding": layers.concat([pre_pos, cur_pos], axis=1)
            }
            if self.task_type == "dialog":
                role_ids = gen_batch_like(0)
                turn_ids = gen_batch_like(0)
                dec_emb_ids["role_embedding"] = layers.concat(
                    [role_ids, role_ids], axis=1)
                dec_emb_ids["turn_embedding"] = layers.concat(
                    [turn_ids, turn_ids], axis=1)
            else:
                sent_ids = gen_batch_like(self.tgt_type_id)
                dec_emb_ids["sent_embedding"] = layers.concat(
                    [sent_ids, sent_ids], axis=1)
            dec_mask = layers.concat([pre_mask, cur_mask], axis=1)

            dec_out = ernie.encode(dec_emb_ids,
                                   dec_mask,
                                   parent_idx,
                                   remove_query=True)
            fc_out = self.cal_logit(dec_out[:, 1:, :], None)
            topk_scores, topk_indices = layers.topk(
                input=layers.softmax(fc_out), k=self.beam_size)
            pre_lenpen = layers.pow(
                (5.0 + layers.cast(step_idx, pre_scores.dtype)) / 6.0,
                self.length_penalty)
            cur_lenpen = layers.pow(
                (5.0 + layers.cast(pos_idx, pre_scores.dtype)) / 6.0,
                self.length_penalty)
            accu_scores = layers.elementwise_add(x=layers.log(topk_scores),
                                                 y=pre_scores * pre_lenpen,
                                                 axis=0) / cur_lenpen
            topk_indices = layers.lod_reset(topk_indices, pre_ids)
            accu_scores = layers.lod_reset(accu_scores, pre_ids)
            selected_ids, selected_scores, gather_idx = layers.beam_search(
                pre_ids=pre_ids,
                pre_scores=pre_scores,
                ids=topk_indices,
                scores=accu_scores,
                beam_size=self.beam_size,
                end_id=self.eos_idx,
                return_parent_idx=True)

            layers.increment(x=step_idx, value=1.0, in_place=True)
            layers.increment(x=pos_idx, value=1.0, in_place=True)
            layers.array_write(selected_ids, i=step_idx, array=ids)
            layers.array_write(selected_scores, i=step_idx, array=scores)
            layers.array_write(tmp_mask, i=step_idx, array=tgt_masks)
            layers.array_write(pos_bias, i=step_idx, array=pos_biases)

            layers.assign(gather_idx, parent_idx)
            length_cond = layers.less_than(x=step_idx, y=max_len)
            finish_cond = layers.logical_not(layers.is_empty(x=selected_ids))
            layers.logical_and(x=length_cond, y=finish_cond, out=cond)

        finished_ids, finished_scores = layers.beam_search_decode(
            ids, scores, beam_size=self.beam_size, end_id=self.eos_idx)

        graph_vars = {
            "finished_ids": finished_ids,
            "finished_scores": finished_scores,
            "data_ids": data_ids
        }

        for k, v in graph_vars.items():
            v.persistable = True

        return pyreader, graph_vars
Example #33
0
    def test_simple_forward(self):
        d0 = layers.data("d0",
                         shape=[10],
                         append_batch_size=False,
                         dtype='float32')
        d1 = layers.data("d1",
                         shape=[10],
                         append_batch_size=False,
                         dtype='float32')
        d2 = layers.data("d2",
                         shape=[10],
                         append_batch_size=False,
                         dtype='float32')

        i = layers.zeros(shape=[1], dtype='int64')
        i.stop_gradient = True

        init = layers.zeros(shape=[10], dtype='float32')
        mem_array = layers.array_write(x=init, i=i)
        data_array = layers.array_write(x=d0, i=i)

        i = layers.increment(i)
        layers.array_write(d1, i, array=data_array)

        i = layers.increment(i)
        layers.array_write(d2, i, array=data_array)

        i = layers.zeros(shape=[1], dtype='int64')
        i.stop_gradient = True

        array_len = layers.fill_constant(shape=[1], dtype='int64', value=1)
        array_len.stop_gradient = True
        cond = layers.less_than(x=i, y=array_len)

        j = layers.fill_constant(shape=[1], dtype='int64', value=1)
        j.stop_gradient = True

        array_len2 = layers.fill_constant(shape=[1], dtype='int64', value=3)
        array_len2.stop_gradient = True
        cond2 = layers.less_than(x=j, y=array_len2)

        while_op = layers.While(cond=cond)
        while_op2 = layers.While(cond=cond2)
        with while_op.block():
            d = layers.array_read(array=data_array, i=i)
            prev = layers.array_read(array=mem_array, i=i)
            result = layers.sums(input=[d, prev])

            i = layers.increment(x=i, in_place=True)
            layers.array_write(result, i=i, array=mem_array)
            layers.less_than(x=i, y=array_len, cond=cond)

            with while_op2.block():
                d2 = layers.array_read(array=data_array, i=j)
                prev2 = layers.array_read(array=mem_array, i=j)
                result2 = layers.sums(input=[d2, prev2])

                j = layers.increment(x=j, in_place=True)
                layers.array_write(result2, i=j, array=mem_array)
                layers.less_than(x=j, y=array_len2, cond=cond2)

        sum_result = layers.array_read(array=mem_array, i=j)
        loss = layers.mean(sum_result)

        append_backward(loss)

        cpu = core.CPUPlace()
        exe = Executor(cpu)
        d = []

        for i in range(3):
            d.append(numpy.random.random(size=[10]).astype('float32'))

        outs = exe.run(feed={
            'd0': d[0],
            'd1': d[1],
            'd2': d[2]
        },
                       fetch_list=[sum_result])
        self.assertAlmostEqual(numpy.sum(d), numpy.sum(outs[0]), delta=0.01)
Example #34
0
    def inference(self, model, inputs, outputs):
        """
        Run inference.

        Args:
            inputs(dict): Its key is input name(str) and its value is a Variable.
            model(object): A generate model. Need to implement `_generation_network` and `_calc_logits`.

        Returns:
            dict(str:Variable): Its key is output name(str) and its value is a Variable.
        """
        # prepare while loop
        max_len = layers.fill_constant(
            shape=[1], dtype="int64", value=self.max_dec_len, force_cpu=True)
        min_len = layers.fill_constant(
            shape=[1], dtype="int64", value=self.min_dec_len, force_cpu=True)
        step_idx = layers.fill_constant(
            shape=[1], dtype="int64", value=0, force_cpu=True)

        ids = layers.array_write(layers.reshape(inputs["tgt_ids"], (-1, 1)), step_idx)
        pos_biases = layers.array_write(layers.reshape(inputs["tgt_pos"], (-1, 1)), step_idx)
        scores = layers.array_write(inputs["init_score"], step_idx)
        tgt_generation_mask = layers.array_write(inputs["tgt_generation_mask"], step_idx)
        parent_idx = inputs["parent_idx"]

        if self.decoding_strategy == "beam_search":
            beam_size = self.beam_size
        else:
            beam_size = 1

        eos_penalty = np.zeros(self.vocab_size, dtype="float32")
        eos_penalty[self.eos_id] = -1e9
        eos_penalty = layers.assign(eos_penalty)

        token_penalty = np.zeros(self.vocab_size, dtype="float32")
        token_penalty[self.unk_id] = -1e9
        if self.mask_id >= 0:
            token_penalty[self.mask_id] = -1e9
        token_penalty = layers.assign(token_penalty)

        # start while loop
        cond = layers.less_than(x=step_idx, y=max_len)
        while_op = layers.While(cond)
        with while_op.block():
            pre_ids = layers.array_read(array=ids, i=step_idx)
            pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True)
            pre_scores = layers.array_read(array=scores, i=step_idx)
            pos_bias = layers.array_read(array=pos_biases, i=step_idx)
            pos_bias = layers.gather(input=pos_bias, index=parent_idx)

            tmp_tgt_generation_mask = layers.array_read(tgt_generation_mask, i=step_idx)
            dtype = tmp_tgt_generation_mask.dtype

            append_mask = layers.fill_constant_batch_size_like(
                    input=pre_ids,
                    value=1.0,
                    shape=[-1, 1, 1],
                    dtype=dtype)
            tmp_tgt_generation_mask = layers.concat([tmp_tgt_generation_mask, append_mask], axis=2)
            pre_mask = tmp_tgt_generation_mask = layers.gather(input=tmp_tgt_generation_mask, index=parent_idx)

            pre_sent = layers.fill_constant_batch_size_like(
                    input=pre_mask,
                    value=1,
                    shape=[-1, 1, 1],
                    dtype=pre_ids.dtype)

            if self.continuous_position:
                pre_pos = layers.elementwise_mul(
                    x=layers.fill_constant_batch_size_like(
                        input=pre_mask,
                        value=1,
                        shape=[-1, 1, 1],
                        dtype=pre_ids.dtype), y=step_idx, axis=0) + pos_bias
            else:
                pre_pos = layers.elementwise_mul(
                    x=layers.fill_constant_batch_size_like(
                        input=pre_mask,
                        value=1,
                        shape=[-1, 1, 1],
                        dtype=pre_ids.dtype), y=step_idx, axis=0)

            if self.use_role:
                pre_role = layers.fill_constant_batch_size_like(
                        input=pre_mask,
                        value=0,
                        shape=[-1, 1, 1],
                        dtype=pre_ids.dtype)
            else:
                pre_role = None

            dec_out, _ = model._generation_network(
                token_ids=pre_ids,
                type_ids=pre_sent,
                pos_ids=pre_pos,
                role_ids=pre_role,
                generation_mask=tmp_tgt_generation_mask,
                gather_idx=parent_idx)
            logits = model._calc_logits(dec_out)

            # ignore unk and mask token
            if self.ignore_unk:
                logits = layers.elementwise_add(logits, token_penalty, axis=1)

            # min dec length
            min_len_cond = layers.less_than(x=step_idx, y=min_len)
            def min_len_penalty():
                """Plus minimum length penalty."""
                return layers.elementwise_add(logits, eos_penalty, axis=1)
            def no_penalty():
                """No penalty."""
                return logits
            logits = layers.case([(min_len_cond, min_len_penalty)], default=no_penalty)

            # get probs
            probs = layers.softmax(logits / self.temperature)

            if self.decoding_strategy == "beam_search":
                topk_scores, topk_indices = layers.topk(
                    input=probs, k=beam_size)
            else:
                if self.decoding_strategy.startswith("sampling"):
                    sampling_ids = layers.sampling_id(probs, dtype="int")
                elif self.decoding_strategy.startswith("topk_sampling"):
                    topk_probs, _ = layers.topk(input=probs, k=self.topk)
                    ge_cond = layers.cast(
                        layers.greater_equal(
                            probs,
                            layers.unsqueeze(topk_probs[:, -1], [1])),
                        "float32")
                    old_probs = probs
                    probs = probs * ge_cond / layers.reduce_sum(topk_probs, dim=-1, keep_dim=True)
                    sampling_ids = layers.sampling_id(probs, dtype="int")
                    probs = old_probs
                else:
                    raise ValueError(self.decoding_strategy)

                sampling_scores = layers.one_hot(
                    layers.unsqueeze(sampling_ids, [1]), probs.shape[1]
                )
                sampling_scores = sampling_scores * probs - (1 - sampling_scores) * 1e3
                topk_scores, topk_indices = layers.topk(
                    input=sampling_scores, k=1)

            pre_len = layers.cast(step_idx, "float32")
            layers.increment(x=step_idx, value=1.0, in_place=True)
            cur_len = layers.cast(step_idx, "float32")

            # update scores
            if self.length_average:
                accu_scores = layers.elementwise_add(
                    x=layers.log(topk_scores), y=pre_scores * pre_len, axis=0) / cur_len
            elif self.length_penalty > 0:
                pre_lp = layers.pow((5 + pre_len) / 6, self.length_penalty)
                cur_lp = layers.pow((5 + cur_len) / 6, self.length_penalty)
                accu_scores = layers.elementwise_add(
                    x=layers.log(topk_scores), y=pre_scores * pre_lp, axis=0) / cur_lp
            else:
                accu_scores = layers.elementwise_add(
                    x=layers.log(topk_scores), y=pre_scores, axis=0)
            topk_indices = layers.lod_reset(topk_indices, pre_ids)
            accu_scores = layers.lod_reset(accu_scores, pre_ids)
            selected_ids, selected_scores, gather_idx = layers.beam_search(
                pre_ids=pre_ids,
                pre_scores=pre_scores,
                ids=topk_indices,
                scores=accu_scores,
                beam_size=beam_size,
                end_id=self.eos_id,
                return_parent_idx=True)

            layers.array_write(selected_ids, i=step_idx, array=ids)
            layers.array_write(selected_scores, i=step_idx, array=scores)
            layers.array_write(pre_mask, i=step_idx, array=tgt_generation_mask)
            layers.array_write(pos_bias, i=step_idx, array=pos_biases)

            layers.assign(gather_idx, parent_idx)

            length_cond = layers.less_than(x=step_idx, y=max_len)
            finish_cond = layers.logical_not(layers.is_empty(x=selected_ids))
            layers.logical_and(x=length_cond, y=finish_cond, out=cond)

        finished_ids, finished_scores = layers.beam_search_decode(
            ids, scores, beam_size=beam_size, end_id=self.eos_id)

        predictions = {
            "finished_ids": finished_ids,
            "finished_scores": finished_scores,
            "token_ids": inputs["token_ids"],
            "data_id": inputs["data_id"]
        }
        return predictions
def decoder_decode(context, is_sparse):
    init_state = context
    array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length)
    counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True)

    # fill the first element with init_state
    state_array = pd.create_array('float32')
    pd.array_write(init_state, array=state_array, i=counter)

    # ids, scores as memory
    ids_array = pd.create_array('int64')
    scores_array = pd.create_array('float32')

    init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=2)
    init_scores = pd.data(
        name="init_scores", shape=[1], dtype="float32", lod_level=2)

    pd.array_write(init_ids, array=ids_array, i=counter)
    pd.array_write(init_scores, array=scores_array, i=counter)

    cond = pd.less_than(x=counter, y=array_len)

    while_op = pd.While(cond=cond)
    with while_op.block():
        pre_ids = pd.array_read(array=ids_array, i=counter)
        pre_state = pd.array_read(array=state_array, i=counter)
        pre_score = pd.array_read(array=scores_array, i=counter)

        # expand the recursive_sequence_lengths of pre_state to be the same with pre_score
        pre_state_expanded = pd.sequence_expand(pre_state, pre_score)

        pre_ids_emb = pd.embedding(
            input=pre_ids,
            size=[dict_size, word_dim],
            dtype='float32',
            is_sparse=is_sparse)

        # use rnn unit to update rnn
        current_state = pd.fc(input=[pre_state_expanded, pre_ids_emb],
                              size=decoder_size,
                              act='tanh')
        current_state_with_lod = pd.lod_reset(x=current_state, y=pre_score)
        # use score to do beam search
        current_score = pd.fc(input=current_state_with_lod,
                              size=target_dict_dim,
                              act='softmax')
        topk_scores, topk_indices = pd.topk(current_score, k=beam_size)
        # calculate accumulated scores after topk to reduce computation cost
        accu_scores = pd.elementwise_add(
            x=pd.log(topk_scores), y=pd.reshape(
                pre_score, shape=[-1]), axis=0)
        selected_ids, selected_scores = pd.beam_search(
            pre_ids,
            pre_score,
            topk_indices,
            accu_scores,
            beam_size,
            end_id=10,
            level=0)

        pd.increment(x=counter, value=1, in_place=True)

        # update the memories
        pd.array_write(current_state, array=state_array, i=counter)
        pd.array_write(selected_ids, array=ids_array, i=counter)
        pd.array_write(selected_scores, array=scores_array, i=counter)

        # update the break condition: up to the max length or all candidates of
        # source sentences have ended.
        length_cond = pd.less_than(x=counter, y=array_len)
        finish_cond = pd.logical_not(pd.is_empty(x=selected_ids))
        pd.logical_and(x=length_cond, y=finish_cond, out=cond)

    translation_ids, translation_scores = pd.beam_search_decode(
        ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10)

    # return init_ids, init_scores

    return translation_ids, translation_scores
Example #36
0
    def beam_search():
        max_len = layers.fill_constant(
            shape=[1], dtype=start_tokens.dtype, value=max_out_len)
        step_idx = layers.fill_constant(
            shape=[1], dtype=start_tokens.dtype, value=0)
        cond = layers.less_than(x=step_idx, y=max_len)
        while_op = layers.While(cond)
        # array states will be stored for each step.
        ids = layers.array_write(start_tokens, step_idx)
        scores = layers.array_write(init_scores, step_idx)
        # cell states will be overwrited at each step.
        # caches contains states of history steps to reduce redundant
        # computation in decoder.
        caches = [{
            "k": layers.fill_constant_batch_size_like(
                input=start_tokens,
                shape=[-1, 0, d_model],
                dtype=enc_output.dtype,
                value=0),
            "v": layers.fill_constant_batch_size_like(
                input=start_tokens,
                shape=[-1, 0, d_model],
                dtype=enc_output.dtype,
                value=0)
        } for i in range(n_layer)]
        with while_op.block():
            pre_ids = layers.array_read(array=ids, i=step_idx)
            pre_scores = layers.array_read(array=scores, i=step_idx)
            # sequence_expand can gather sequences according to lod thus can be
            # used in beam search to sift states corresponding to selected ids.
            pre_src_attn_bias = layers.sequence_expand(
                x=trg_src_attn_bias, y=pre_scores)
            pre_enc_output = layers.sequence_expand(x=enc_output, y=pre_scores)
            pre_caches = [{
                "k": layers.sequence_expand(
                    x=cache["k"], y=pre_scores),
                "v": layers.sequence_expand(
                    x=cache["v"], y=pre_scores),
            } for cache in caches]
            pre_pos = layers.elementwise_mul(
                x=layers.fill_constant_batch_size_like(
                    input=pre_enc_output,  # cann't use pre_ids here since it has lod
                    value=1,
                    shape=[-1, 1],
                    dtype=pre_ids.dtype),
                y=layers.increment(
                    x=step_idx, value=1.0, in_place=False),
                axis=0)
            logits = wrap_decoder(
                trg_vocab_size,
                max_in_len,
                n_layer,
                n_head,
                d_key,
                d_value,
                d_model,
                d_inner_hid,
                dropout_rate,
                weight_sharing,
                dec_inputs=(
                    pre_ids, pre_pos, None, pre_src_attn_bias, trg_data_shape,
                    slf_attn_pre_softmax_shape, slf_attn_post_softmax_shape,
                    src_attn_pre_softmax_shape, src_attn_post_softmax_shape),
                enc_output=pre_enc_output,
                caches=pre_caches)
            topk_scores, topk_indices = layers.topk(
                input=layers.softmax(logits), k=beam_size)
            accu_scores = layers.elementwise_add(
                x=layers.log(topk_scores),
                y=layers.reshape(
                    pre_scores, shape=[-1]),
                axis=0)
            # beam_search op uses lod to distinguish branches.
            topk_indices = layers.lod_reset(topk_indices, pre_ids)
            selected_ids, selected_scores = layers.beam_search(
                pre_ids=pre_ids,
                pre_scores=pre_scores,
                ids=topk_indices,
                scores=accu_scores,
                beam_size=beam_size,
                end_id=eos_idx)
            layers.increment(x=step_idx, value=1.0, in_place=True)
            # update states
            layers.array_write(selected_ids, i=step_idx, array=ids)
            layers.array_write(selected_scores, i=step_idx, array=scores)
            layers.assign(pre_src_attn_bias, trg_src_attn_bias)
            layers.assign(pre_enc_output, enc_output)
            for i in range(n_layer):
                layers.assign(pre_caches[i]["k"], caches[i]["k"])
                layers.assign(pre_caches[i]["v"], caches[i]["v"])
            layers.assign(
                layers.elementwise_add(
                    x=slf_attn_pre_softmax_shape,
                    y=attn_pre_softmax_shape_delta),
                slf_attn_pre_softmax_shape)
            layers.assign(
                layers.elementwise_add(
                    x=slf_attn_post_softmax_shape,
                    y=attn_post_softmax_shape_delta),
                slf_attn_post_softmax_shape)

            length_cond = layers.less_than(x=step_idx, y=max_len)
            finish_cond = layers.logical_not(layers.is_empty(x=selected_ids))
            layers.logical_and(x=length_cond, y=finish_cond, out=cond)

        finished_ids, finished_scores = layers.beam_search_decode(
            ids, scores, beam_size=beam_size, end_id=eos_idx)
        return finished_ids, finished_scores