Esempio n. 1
0
    def test_cond_op_in_condition(self):
        main_program = fluid.Program()
        startup_program = fluid.Program()

        with fluid.program_guard(main_program, startup_program):
            a = fluid.layers.fill_constant(shape=[1],
                                           dtype='float32',
                                           value=1.23)
            a.stop_gradient = False
            b = fluid.layers.fill_constant(shape=[1],
                                           dtype='float32',
                                           value=1.24)
            b.stop_gradient = False
            out = fluid.layers.cond(
                a < b, lambda: fluid.layers.cond(
                    a - b < -1.0, lambda: fluid.layers.elementwise_add(a, b),
                    lambda: fluid.layers.elementwise_mul(a, b)), lambda: fluid
                .layers.cond(a == b, lambda: fluid.layers.elementwise_sub(
                    a, b), lambda: fluid.layers.elementwise_pow(a, b)))
            append_backward(out)

        place = fluid.CUDAPlace(
            0) if core.is_compiled_with_cuda() else fluid.CPUPlace()
        exe = fluid.Executor(place)
        ret = exe.run(main_program, fetch_list=[out, a.grad_name, b.grad_name])
        # Note: fill_constant has loss of precision, so we assertAlmostEqual.
        self.assertAlmostEqual(ret[0][0], 1.5252)
        self.assertAlmostEqual(ret[1][0], 1.24)
        self.assertAlmostEqual(ret[2][0], 1.23)
Esempio n. 2
0
    def test_forward(self):
        data = layers.data(name='X', shape=[1], dtype='float32')
        data.stop_gradient = False
        cond = layers.ConditionalBlock(inputs=[data])
        out = layers.create_tensor(dtype='float32')
        with cond.block():
            hidden = layers.fc(input=data, size=10)
            layers.assign(hidden, out)

        cpu = core.CPUPlace()
        exe = Executor(cpu)
        exe.run(default_startup_program())

        x = numpy.random.random(size=(10, 1)).astype('float32')

        outs = exe.run(feed={'X': x}, fetch_list=[out])[0]
        print outs
        loss = layers.mean(out)
        append_backward(loss=loss)
        outs = exe.run(
            feed={'X': x},
            fetch_list=[
                default_main_program().block(0).var(data.name + "@GRAD")
            ])[0]
        print outs
    def test_grad(self):
        place = core.CPUPlace()
        program = Program()

        with program_guard(program):
            x = layers.data(
                name='x', shape=[1], dtype='float32', stop_gradient=False)
            table = layers.lod_rank_table(x, level=0)
            array = layers.lod_tensor_to_array(x, table)
            result = layers.array_to_lod_tensor(array, table)

            mean = layers.mean(result)

            append_backward(mean)

        tensor = core.LoDTensor()
        tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place)
        tensor.set_lod([[0, 3, 9, 10]])

        g_vars = program.global_block().var(x.name + "@GRAD")

        exe = Executor(place)
        g_out = [
            numpy.array(item).sum()
            for item in exe.run(program,
                                feed={'x': tensor},
                                fetch_list=[g_vars],
                                return_numpy=False)
        ]
        g_out_sum = numpy.array(g_out).sum()

        self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
Esempio n. 4
0
    def test_grad(self):
        place = core.CPUPlace()
        program = Program()

        with program_guard(program):
            x = layers.data(name='x',
                            shape=[1],
                            dtype='float32',
                            stop_gradient=False)
            table = layers.lod_rank_table(x, level=0)
            array = layers.lod_tensor_to_array(x, table)
            result = layers.array_to_lod_tensor(array, table)

            mean = layers.mean(result)

            append_backward(mean)

        tensor = core.LoDTensor()
        tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place)
        tensor.set_recursive_sequence_lengths([[3, 6, 1]])

        g_vars = program.global_block().var(x.name + "@GRAD")

        exe = Executor(place)
        g_out = [
            numpy.array(item).sum() for item in exe.run(program,
                                                        feed={'x': tensor},
                                                        fetch_list=[g_vars],
                                                        return_numpy=False)
        ]
        g_out_sum = numpy.array(g_out).sum()

        self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
Esempio n. 5
0
 def build_network(self, only_forward, **kargs):
     x = layers.data('x', shape=[3], dtype='float32', lod_level=1)
     x.stop_gradient = False
     layers.Print(input=x, **kargs)
     loss = layers.mean(x)
     append_backward(loss=loss)
     return loss
Esempio n. 6
0
    def test_extremely_simple_net_with_op_in_condition(self):
        main_program = fluid.Program()
        startup_program = fluid.Program()
        with fluid.program_guard(main_program, startup_program):
            a = fluid.layers.fill_constant(shape=[1],
                                           dtype='float32',
                                           value=1.23)
            a.stop_gradient = False
            b = fluid.layers.fill_constant(shape=[1],
                                           dtype='float32',
                                           value=1.25)
            b.stop_gradient = False
            out = layers.cond(a - b < -1.0, lambda: a, lambda: b)
        append_backward(out)

        place = fluid.CUDAPlace(
            0) if core.is_compiled_with_cuda() else fluid.CPUPlace()
        exe = fluid.Executor(place)
        ret = exe.run(main_program,
                      fetch_list=[out, b, a.grad_name, b.grad_name])
        # Note: fill_constant has loss of precision, you have to assertEqual
        # with values doens't lose precision in float-point number.
        self.assertEqual(ret[0][0], ret[1][0])
        self.assertEqual(ret[2][0], 0.0)
        self.assertEqual(ret[3][0], 1.0)
Esempio n. 7
0
    def test_assign_LoDTensorArray(self):
        main_program = Program()
        startup_program = Program()
        with program_guard(main_program):
            x = fluid.data(name='x', shape=[100, 10], dtype='float32')
            x.stop_gradient = False
            y = fluid.layers.fill_constant(shape=[100, 10],
                                           dtype='float32',
                                           value=1)
            z = fluid.layers.elementwise_add(x=x, y=y)
            i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0)
            init_array = fluid.layers.array_write(x=z, i=i)
            array = fluid.layers.assign(init_array)
            sums = fluid.layers.array_read(array=init_array, i=i)
            mean = fluid.layers.mean(sums)
            append_backward(mean)

        place = fluid.CUDAPlace(
            0) if core.is_compiled_with_cuda() else fluid.CPUPlace()
        exe = fluid.Executor(place)
        feed_x = np.random.random(size=(100, 10)).astype('float32')
        ones = np.ones((100, 10)).astype('float32')
        feed_add = feed_x + ones
        res = exe.run(main_program,
                      feed={'x': feed_x},
                      fetch_list=[sums.name, x.grad_name])
        self.assertTrue(np.allclose(res[0], feed_add))
        self.assertTrue(np.allclose(res[1], ones / 1000.0))
    def test_forward_backward_single_tensor_output(self):
        program = Program()
        with program_guard(program):
            x = layers.data(name='x', shape=[2], dtype='float32')
            x.stop_gradient = False  # For test gradient
            mask = layers.data(name='mask', shape=[1], dtype='int32')

            out = program.current_block().create_var(
                dtype='float32', type=core.VarDesc.VarType.LOD_TENSOR)

            select_output(x, out, mask)
            y = select_input(out, mask)
            mean = layers.mean(y)
            append_backward(mean)

        place = fluid.CUDAPlace(
            0) if core.is_compiled_with_cuda() else fluid.CPUPlace()
        exe = Executor(place)

        feed_x = np.asarray([1.3, -1.4]).astype(np.float32)
        feed_mask = np.asarray([0]).astype(np.int32)
        ret = exe.run(program,
                      feed={
                          'x': feed_x,
                          'mask': feed_mask
                      },
                      fetch_list=[y.name, x.grad_name])
        x_grad = np.asarray([0.5, 0.5]).astype(np.float32)
        self.assertTrue(np.allclose(np.asarray(ret[0]), feed_x))
        self.assertTrue(np.allclose(np.asarray(ret[1]), x_grad))
Esempio n. 9
0
    def test_forward(self):
        data = layers.data(name='X', shape=[1], dtype='float32')
        data.stop_gradient = False
        cond = ConditionalBlock(inputs=[data])
        out = layers.create_tensor(dtype='float32')
        with cond.block():
            hidden = layers.fc(input=data, size=10)
            layers.assign(hidden, out)

        cpu = core.CPUPlace()
        exe = Executor(cpu)
        exe.run(default_startup_program())

        x = numpy.random.random(size=(10, 1)).astype('float32')

        outs = exe.run(feed={'X': x}, fetch_list=[out])[0]
        print(outs)
        loss = layers.mean(out)
        append_backward(loss=loss)
        outs = exe.run(feed={'X': x},
                       fetch_list=[
                           default_main_program().block(0).var(data.name +
                                                               "@GRAD")
                       ])[0]
        print(outs)
Esempio n. 10
0
    def build_graph(self, only_forward=False):
        x_tensor = fluid.layers.data(
            name='x_tensor',
            shape=[self.x_tensor_dim],
            dtype='float32',
            lod_level=1)
        x_tensor.stop_gradient = False

        static_input_tensor = fluid.layers.data(
            name='static_input_tensor',
            shape=[self.static_input_tensor_dim],
            dtype='float32',
            lod_level=1)
        static_input_tensor.stop_gradient = False

        if only_forward:
            static_input_out_array = self._program.global_block().create_var(
                name='static_input_out_array',
                type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
                dtype='float32')
            static_input_out_array.stop_gradient = True

        rnn = fluid.layers.DynamicRNN()
        with rnn.block():
            step_x = rnn.step_input(x_tensor)
            step_static_input = rnn.static_input(static_input_tensor)
            if only_forward:
                fluid.layers.array_write(
                    x=step_static_input,
                    i=rnn.step_idx,
                    array=static_input_out_array)
            last = fluid.layers.sequence_pool(
                input=step_static_input, pool_type='last')
            projected = fluid.layers.fc(input=[step_x, last],
                                        size=self.output_dim)
            rnn.output(projected)

        if only_forward:
            static_input_step_outs = []
            step_idx = fluid.layers.fill_constant(
                shape=[1], dtype='int64', value=0)
            step_idx.stop_gradient = True

            for i in xrange(self._max_sequence_len):
                step_out = fluid.layers.array_read(static_input_out_array,
                                                   step_idx)
                step_out.stop_gradient = True
                static_input_step_outs.append(step_out)
                fluid.layers.increment(x=step_idx, value=1.0, in_place=True)

        if only_forward:
            return static_input_step_outs

        last = fluid.layers.sequence_pool(input=rnn(), pool_type='last')
        loss = fluid.layers.mean(last)
        append_backward(loss)
        static_input_grad = self._program.global_block().var(
            framework.grad_var_name('static_input_tensor'))
        return static_input_grad, loss
Esempio n. 11
0
 def build_network(self, only_forward, **kargs):
     x = layers.data('x', shape=[3], dtype='float32', lod_level=1)
     x.stop_gradient = False
     printed = layers.Print(input=x, **kargs)
     if only_forward: return printed
     loss = layers.mean(printed)
     append_backward(loss=loss)
     return loss
    def build_graph(self, only_forward=False):
        x_tensor = fluid.layers.data(name='x_tensor',
                                     shape=[self.x_tensor_dim],
                                     dtype='float32',
                                     lod_level=1)
        x_tensor.stop_gradient = False

        static_input_tensor = fluid.layers.data(
            name='static_input_tensor',
            shape=[self.static_input_tensor_dim],
            dtype='float32',
            lod_level=1)
        static_input_tensor.stop_gradient = False

        if only_forward:
            static_input_out_array = self._program.global_block().create_var(
                name='static_input_out_array',
                type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
                dtype='float32')
            static_input_out_array.stop_gradient = True

        rnn = fluid.layers.DynamicRNN()
        with rnn.block():
            step_x = rnn.step_input(x_tensor)
            step_static_input = rnn.static_input(static_input_tensor)
            if only_forward:
                fluid.layers.array_write(x=step_static_input,
                                         i=rnn.step_idx,
                                         array=static_input_out_array)
            last = fluid.layers.sequence_pool(input=step_static_input,
                                              pool_type='last')
            projected = fluid.layers.fc(input=[step_x, last],
                                        size=self.output_dim)
            rnn.output(projected)

        if only_forward:
            static_input_step_outs = []
            step_idx = fluid.layers.fill_constant(shape=[1],
                                                  dtype='int64',
                                                  value=0)
            step_idx.stop_gradient = True

            for i in xrange(self._max_sequence_len):
                step_out = fluid.layers.array_read(static_input_out_array,
                                                   step_idx)
                step_out.stop_gradient = True
                static_input_step_outs.append(step_out)
                fluid.layers.increment(x=step_idx, value=1.0, in_place=True)

        if only_forward:
            return static_input_step_outs

        last = fluid.layers.sequence_pool(input=rnn(), pool_type='last')
        loss = fluid.layers.mean(last)
        append_backward(loss)
        static_input_grad = self._program.global_block().var(
            framework.grad_var_name('static_input_tensor'))
        return static_input_grad, loss
Esempio n. 13
0
    def test_grad(self):
        place = core.CPUPlace()
        program = Program()
        with program_guard(program):
            x = layers.data(name='x',
                            shape=[1],
                            dtype='float32',
                            stop_gradient=False)
            y = layers.data(name='y',
                            shape=[1],
                            dtype='bool',
                            stop_gradient=False)

            level = 0

            out_true, out_false = split_lod_tensor(input=x,
                                                   mask=y,
                                                   level=level)
            out = merge_lod_tensor(in_true=out_true,
                                   in_false=out_false,
                                   mask=y,
                                   x=x,
                                   level=level)
            mean = layers.mean(out)

            append_backward(mean)

        tensor = core.LoDTensor()
        tensor.set(np.arange(10).reshape(10, 1).astype('float32'), place)
        tensor.set_recursive_sequence_lengths([[3, 6, 1]])

        mask_np = np.array([0, 1, 0]).astype('bool')
        mask_np = np.expand_dims(mask_np, axis=1)

        mask = core.LoDTensor()
        mask.set(mask_np, place)

        exe = Executor(place)
        scope = core.Scope()

        g_vars = program.global_block().var(x.name + "@GRAD")
        g_out = [
            item.sum() for item in map(
                np.array,
                exe.run(program,
                        feed={
                            'x': tensor,
                            'y': mask
                        },
                        fetch_list=[g_vars],
                        scope=scope,
                        return_numpy=False))
        ]

        g_out_sum = np.array(g_out).sum()

        self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
Esempio n. 14
0
    def test_backward(self):
        self.check_forward()

        append_backward(self.output)

        ana_grad = [np.array(x) for x in self.backward()]

        num_grad = self.get_numerical_gradient()
        for idx, name in enumerate(self.data_field):
            self.assertEqual(num_grad[idx].shape, ana_grad[idx].shape)
            self.assertTrue(
                np.isclose(num_grad[idx], ana_grad[idx], rtol=0.1).all())
Esempio n. 15
0
    def test_simple_forward(self):
        d0 = layers.data(
            "d0", shape=[10], append_batch_size=False, dtype='float32')
        d1 = layers.data(
            "d1", shape=[10], append_batch_size=False, dtype='float32')
        d2 = layers.data(
            "d2", shape=[10], append_batch_size=False, dtype='float32')
        i = layers.zeros(shape=[1], dtype='int64')
        i.stop_gradient = True
        init = layers.zeros(shape=[10], dtype='float32')
        mem_array = layers.array_write(x=init, i=i)
        data_array = layers.array_write(x=d0, i=i)

        i = layers.increment(i)
        layers.array_write(d1, i, array=data_array)

        i = layers.increment(i)
        layers.array_write(d2, i, array=data_array)

        i = layers.zeros(shape=[1], dtype='int64')
        i.stop_gradient = True

        array_len = layers.fill_constant(shape=[1], dtype='int64', value=3)
        array_len.stop_gradient = True
        cond = layers.less_than(x=i, y=array_len)

        while_op = layers.While(cond=cond)
        with while_op.block():
            d = layers.array_read(array=data_array, i=i)
            prev = layers.array_read(array=mem_array, i=i)
            result = layers.sums(input=[d, prev])

            i = layers.increment(x=i, in_place=True)
            layers.array_write(result, i=i, array=mem_array)
            layers.less_than(x=i, y=array_len, cond=cond)

        sum_result = layers.array_read(array=mem_array, i=i)
        loss = layers.mean(sum_result)

        append_backward(loss)

        cpu = core.CPUPlace()
        exe = Executor(cpu)
        d = []

        for i in xrange(3):
            d.append(numpy.random.random(size=[10]).astype('float32'))

        outs = exe.run(feed={'d0': d[0],
                             'd1': d[1],
                             'd2': d[2]},
                       fetch_list=[sum_result])
        self.assertAlmostEqual(numpy.sum(d), numpy.sum(outs[0]), delta=0.01)
Esempio n. 16
0
    def test_backward(self):
        self.check_forward()

        append_backward(self.output)

        ana_grad = [np.array(x) for x in self.backward()]

        num_grad = self.get_numerical_gradient()
        for idx, name in enumerate(self.data_field):
            self.assertEqual(num_grad[idx].shape, ana_grad[idx].shape)
            self.assertTrue(
                np.isclose(
                    num_grad[idx], ana_grad[idx], rtol=0.1).all())
Esempio n. 17
0
    def test_cond_inside_cond(self):
        """
        pseudocode:
        for i in range(1, 10):
            a = 2 * i
            if i < 5:
                if i >= 3:
                    return a + a 
                else:
                    return a - a
            else:
                if i < 8:
                    return a * a
                else:
                    return a / a
        """

        def less_than_branch(i, a):
            return layers.cond(i >= 3.0, lambda: layers.elementwise_add(a, a),
                               lambda: layers.elementwise_sub(a, a))

        def greater_equal_branch(i, a):
            return layers.cond(i < 8.0, lambda: layers.elementwise_mul(a, a),
                               lambda: layers.elementwise_div(a, a))

        main_program = Program()
        startup_program = Program()
        with program_guard(main_program, startup_program):
            i = fluid.data(name="i", shape=[1], dtype='float32')
            a = 2.0 * i
            out = layers.cond(i < 5.0, lambda: less_than_branch(i, a),
                              lambda: greater_equal_branch(i, a))
            mean = layers.mean(out)
            append_backward(mean)

        place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda(
        ) else fluid.CPUPlace()
        exe = fluid.Executor(place)
        for feed_i in range(0, 10):
            expected_a = 2.0 * feed_i
            if feed_i < 5:
                expected_ret = expected_a + expected_a if feed_i >= 3 else 0.0
                expected_a_grad = 2.0 if feed_i >= 3 else 0.0
            else:
                expected_ret = expected_a * expected_a if feed_i < 8 else 1.0
                expected_a_grad = 2.0 * expected_a if feed_i < 8 else 0.0
            ret = exe.run(main_program,
                          feed={'i': np.full((1), feed_i, np.float32)},
                          fetch_list=[out.name, a.grad_name])
            self.assertEqual(ret[0][0], expected_ret)
            self.assertEqual(ret[1][0], expected_a_grad)
Esempio n. 18
0
    def test_backward(self, numeric_grad_delta=1e-5, max_relative_error=1e-7):
        self.check_forward()

        with fluid.program_guard(self.main_program, self.startup_program):
            append_backward(self.loss)

        ana_grad = [np.array(x) for x in self.backward()]

        num_grad = self.get_numerical_gradient(delta=numeric_grad_delta)
        self.assert_is_close(num_grad,
                             ana_grad,
                             'x',
                             max_relative_error=max_relative_error,
                             msg_prefix="Gradient Check On %s" %
                             str(self.place))
Esempio n. 19
0
    def test_while_loop_backward(self):
        def cond(i, x):
            return layers.less_than(i, eleven)

        def body(j, x):
            # TODO: In while block, if the var created in parent block
            # participates in the calculation of gradient, the result of gradient
            # is incorrect because each step scope always returns the same value
            # generated by last step.
            # Here we call `assign` op in while block to avoid this bug, and working on fixing it in next PR.
            i = layers.assign(j)
            x = layers.elementwise_mul(x=i, y=i)
            j = layers.increment(j)
            return [j, x]

        main_program = Program()
        startup_program = Program()
        with fluid.program_guard(main_program, startup_program):
            i = fluid.data(name='i', shape=[1], dtype='float32')
            i.stop_gradient = False
            eleven = layers.fill_constant(shape=[1], dtype='float32', value=11)
            one = layers.fill_constant(shape=[1], dtype='float32', value=1)
            x = fluid.data(name='x', shape=[1], dtype='float32')
            x.stop_gradient = False

            out = layers.while_loop(cond, body, [i, x])
            mean = layers.mean(out[1])
            append_backward(mean)

        place = fluid.CUDAPlace(
            0) if core.is_compiled_with_cuda() else fluid.CPUPlace()
        exe = fluid.Executor(place)

        feed_i = np.ones(1).astype('float32')
        feed_x = np.ones(1).astype('float32')
        data = np.asarray([100]).astype('float32')
        i_grad = np.asarray([110]).astype('float32')

        res = exe.run(main_program,
                      feed={
                          'i': feed_i,
                          'x': feed_x
                      },
                      fetch_list=[mean.name, i.grad_name])
        self.assertTrue(np.allclose(np.asarray(res[0]), data))
        self.assertTrue(np.allclose(np.asarray(res[1]), i_grad),
                        msg=" \nres = \n{} \n\n ans = \n{}".format(
                            res[1], i_grad))
Esempio n. 20
0
    def test_adam_optimizer(self):
        init_program = framework.Program()
        program = framework.Program()
        block = program.global_block()
        mul_x = block.create_parameter(dtype="float32",
                                       shape=[5, 10],
                                       lod_level=0,
                                       name="mul.x",
                                       optimize_attr={'learning_rate': 1.1})
        mul_y = block.create_var(dtype="float32",
                                 shape=[10, 8],
                                 lod_level=0,
                                 name="mul.y")
        mul_out = block.create_var(dtype="float32",
                                   shape=[5, 8],
                                   lod_level=0,
                                   name="mul.out")
        block.append_op(type="mul",
                        inputs={
                            "X": mul_x,
                            "Y": mul_y
                        },
                        outputs={"Out": mul_out},
                        attrs={"x_num_col_dims": 1})
        mean_out = block.create_var(dtype="float32",
                                    shape=[1],
                                    lod_level=0,
                                    name="mean.out")
        block.append_op(type="mean",
                        inputs={"X": mul_out},
                        outputs={"Out": mean_out})
        learning_rate = 0.01
        adam_optimizer = self.MockAdam(learning_rate=learning_rate,
                                       beta1=0.9,
                                       beta2=0.999)
        params_grads = append_backward(mean_out)
        self.assertEqual(len(params_grads), 1)
        self.assertEqual(len(adam_optimizer.get_accumulators()), 0)
        with framework.program_guard(program, init_program):
            opts = adam_optimizer.apply_gradients(params_grads)
        self.assertEqual(len(opts), 2)
        self.assertEqual([op.type for op in opts], ["scale", "adam"])

        # Check accumulators
        accumulators = adam_optimizer.get_accumulators()
        self.assertEqual(len(accumulators), 4)
        self.assertTrue(adam_optimizer.get_moment1_str() in accumulators)
        self.assertTrue(adam_optimizer.get_moment2_str() in accumulators)
        moment1_acc = accumulators[adam_optimizer.get_moment1_str()]
        moment2_acc = accumulators[adam_optimizer.get_moment2_str()]
        self.assertEqual(len(moment1_acc), 1)
        self.assertEqual(len(moment2_acc), 1)
        self.assertTrue(mul_x.name in moment1_acc)
        self.assertTrue(mul_x.name in moment2_acc)

        # Check init_program
        init_ops = init_program.global_block().ops
        self.assertEqual(len(init_ops), 5)
        self.assertEqual(init_ops[0].type, "fill_constant")
        self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
Esempio n. 21
0
 def test_l2decay_regularizer(self):
     program = framework.Program()
     block = program.global_block()
     mul_x = block.create_parameter(
         dtype="float32",
         shape=[5, 10],
         lod_level=0,
         name="mul.x",
         regularizer=regularizer.L1DecayRegularizer(0.5))
     self.assertTrue(mul_x.regularizer is not None)
     self.assertTrue(
         isinstance(mul_x.regularizer, regularizer.L1DecayRegularizer))
     mul_y = block.create_var(
         dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
     mul_out = block.create_var(
         dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
     block.append_op(
         type="mul",
         inputs={"X": mul_x,
                 "Y": mul_y},
         outputs={"Out": mul_out},
         attrs={"x_num_col_dims": 1})
     mean_out = block.create_var(
         dtype="float32", shape=[1], lod_level=0, name="mean.out")
     block.append_op(
         type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
     params_grads = append_backward(mean_out)
     self.assertEqual(len(params_grads), 1)
     count_ops = len(block.ops)
     params_grads = optimizer.append_regularization_ops(params_grads)
     self.assertEqual(len(params_grads), 1)
     self.assertEqual(len(block.ops), count_ops + 3)
     self.assertEqual(block.ops[-1].type, 'elementwise_add')
     self.assertEqual(block.ops[-2].type, 'scale')
     self.assertEqual(block.ops[-3].type, 'sign')
Esempio n. 22
0
 def _generate_backward(self, main_program, startup_program, loss):
     with program_guard(main_program, startup_program):
         params_grads = append_backward(
             loss, distop_context=self._dist_context.dist_op_context)
     self._completer.complete_backward_annotation(main_program)
     self._dist_context.block_state.parse_backward_blocks(main_program)
     return params_grads
Esempio n. 23
0
    def test_backward(self, rtol=0.01):
        self.check_forward()
        num_grad = self.get_numerical_gradient()

        with fluid.program_guard(self.main_program, self.startup_program):
            append_backward(self.output)

        ana_grad = [np.array(x) for x in self.backward()]

        for idx, name in enumerate(self.data_field):
            self.assertEqual(num_grad[idx].shape, ana_grad[idx].shape)
            self.assertTrue(
                np.isclose(num_grad[idx], ana_grad[idx], rtol=rtol).all(),
                "num_grad (" + name + ") has diff at " + str(self.place) +
                "\nExpect " + str(num_grad[idx]) + "\n" + "But Got" +
                str(ana_grad[idx]) + " in class " + self.__class__.__name__)
Esempio n. 24
0
    def test_vanilla_momentum_optimizer(self):
        init_program = framework.Program()
        program = framework.Program()
        block = program.global_block()
        mul_x = block.create_parameter(dtype="float32",
                                       shape=[5, 10],
                                       lod_level=0,
                                       name="mul.x",
                                       optimize_attr={'learning_rate': 1.1})
        mul_y = block.create_var(dtype="float32",
                                 shape=[10, 8],
                                 lod_level=0,
                                 name="mul.y")
        mul_out = block.create_var(dtype="float32",
                                   shape=[5, 8],
                                   lod_level=0,
                                   name="mul.out")
        block.append_op(type="mul",
                        inputs={
                            "X": mul_x,
                            "Y": mul_y
                        },
                        outputs={"Out": mul_out},
                        attrs={"x_num_col_dims": 1})
        learning_rate = 0.01
        momentum_optimizer = self.MockMomentum(learning_rate=learning_rate,
                                               momentum=0.2)
        mean_out = block.create_var(dtype="float32",
                                    shape=[1],
                                    lod_level=0,
                                    name="mean.out")
        block.append_op(type="mean",
                        inputs={"X": mul_out},
                        outputs={"Out": mean_out})
        params_grads = append_backward(mean_out)
        self.assertEqual(len(params_grads), 1)
        self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
        opts = momentum_optimizer.create_optimization_pass(
            params_grads, mul_out, init_program)
        self.assertEqual(len(opts), 3)
        sgd_op = opts[-1]
        self.assertEqual([op.type for op in opts],
                         ["fill_constant", "elementwise_mul", "momentum"])
        self.assertFalse(sgd_op.attr('use_nesterov'))

        # Check accumulators
        accumulators = momentum_optimizer.get_accumulators()
        self.assertEqual(len(accumulators), 1)
        self.assertTrue(momentum_optimizer.get_velocity_str() in accumulators)
        velocity_acc = accumulators[momentum_optimizer.get_velocity_str()]
        self.assertEqual(len(velocity_acc), 1)
        self.assertTrue(mul_x.name in velocity_acc)

        # Check init_program
        init_ops = init_program.global_block().ops
        self.assertEqual(len(init_ops), 2)
        self.assertEqual(init_ops[0].type, "fill_constant")
        self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
        self.assertEqual(init_ops[1].type, "fill_constant")
        self.assertAlmostEqual(init_ops[1].attr('value'), 0.0)
Esempio n. 25
0
    def test_decayed_adagrad_optimizer(self):
        init_program = framework.Program()
        program = framework.Program()
        block = program.global_block()
        mul_x = block.create_parameter(dtype="float32",
                                       shape=[5, 10],
                                       lod_level=0,
                                       name="mul.x",
                                       optimize_attr={'learning_rate': 1.1})
        mul_y = block.create_var(dtype="float32",
                                 shape=[10, 8],
                                 lod_level=0,
                                 name="mul.y")
        mul_out = block.create_var(dtype="float32",
                                   shape=[5, 8],
                                   lod_level=0,
                                   name="mul.out")
        block.append_op(type="mul",
                        inputs={
                            "X": mul_x,
                            "Y": mul_y
                        },
                        outputs={"Out": mul_out},
                        attrs={"x_num_col_dims": 1})
        mean_out = block.create_var(dtype="float32",
                                    shape=[1],
                                    lod_level=0,
                                    name="mean.out")
        block.append_op(type="mean",
                        inputs={"X": mul_out},
                        outputs={"Out": mean_out})
        learning_rate = 0.01
        decayed_adagrad_optimizer = self.MockDecayedAdagrad(
            learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6)
        params_grads = append_backward(mean_out)
        self.assertEqual(len(params_grads), 1)
        self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0)
        opts = decayed_adagrad_optimizer.create_optimization_pass(
            params_grads, mul_out, init_program)
        self.assertEqual(len(opts), 3)
        self.assertEqual(
            [op.type for op in opts],
            ["fill_constant", "elementwise_mul", "decayed_adagrad"])

        # Check accumulators
        accumulators = decayed_adagrad_optimizer.get_accumulators()
        self.assertEqual(len(accumulators), 1)
        self.assertTrue(
            decayed_adagrad_optimizer.get_moment_str() in accumulators)
        moment_acc = accumulators[decayed_adagrad_optimizer.get_moment_str()]
        self.assertEqual(len(moment_acc), 1)
        self.assertTrue(mul_x.name in moment_acc)

        # Check init_program
        init_ops = init_program.global_block().ops
        self.assertEqual(len(init_ops), 2)
        self.assertEqual(init_ops[0].type, "fill_constant")
        self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
        self.assertEqual(init_ops[1].type, "fill_constant")
        self.assertAlmostEqual(init_ops[1].attr('value'), 0.0)
Esempio n. 26
0
    def _get_gradient(self,
                      input_to_check,
                      place,
                      output_names,
                      no_grad_set,
                      parallel=False):
        prog = Program()
        block = prog.global_block()
        self._append_ops(block)
        loss = append_loss_ops(block, output_names)
        param_grad_list = append_backward(loss=loss,
                                          parameter_list=input_to_check,
                                          no_grad_set=no_grad_set)

        inputs = self._get_inputs(block)
        feed_dict = self.feed_var(inputs, place)

        fetch_list = [g for p, g in param_grad_list]
        if parallel:
            use_cuda = False
            if isinstance(place, fluid.CUDAPlace(0)):
                use_cuda = True
            executor = fluid.ParallelExecutor(use_cuda=use_cuda,
                                              loss_name=loss.name,
                                              main_program=prog)
        else:
            executor = Executor(place)
        return list(
            map(np.array,
                executor.run(prog, feed_dict, fetch_list, return_numpy=False)))
Esempio n. 27
0
 def test_l2decay_regularizer(self):
     program = framework.Program()
     block = program.global_block()
     mul_x = block.create_parameter(
         dtype="float32",
         shape=[5, 10],
         lod_level=0,
         name="mul.x",
         regularizer=regularizer.L1DecayRegularizer(0.5))
     self.assertTrue(mul_x.regularizer is not None)
     self.assertTrue(
         isinstance(mul_x.regularizer, regularizer.L1DecayRegularizer))
     mul_y = block.create_var(
         dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
     mul_out = block.create_var(
         dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
     block.append_op(
         type="mul",
         inputs={"X": mul_x,
                 "Y": mul_y},
         outputs={"Out": mul_out},
         attrs={"x_num_col_dims": 1})
     mean_out = block.create_var(
         dtype="float32", shape=[1], lod_level=0, name="mean.out")
     block.append_op(
         type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
     params_grads = append_backward(mean_out)
     self.assertEqual(len(params_grads), 1)
     count_ops = len(block.ops)
     params_grads = optimizer.append_regularization_ops(params_grads)
     self.assertEqual(len(params_grads), 1)
     self.assertEqual(len(block.ops), count_ops + 3)
     self.assertEqual(block.ops[-1].type, 'sum')
     self.assertEqual(block.ops[-2].type, 'scale')
     self.assertEqual(block.ops[-3].type, 'sign')
Esempio n. 28
0
 def test_api(self, use_cuda=False):
     for x_stop_gradient in [False, True]:
         for y_stop_gradient in [False, True]:
             with fluid.program_guard(Program(), Program()):
                 cond = fluid.layers.data(name='cond',
                                          shape=self.shape,
                                          dtype='bool')
                 x = fluid.layers.data(name='x',
                                       shape=self.shape,
                                       dtype='float32')
                 y = fluid.layers.data(name='y',
                                       shape=self.shape,
                                       dtype='float32')
                 x.stop_gradient = x_stop_gradient
                 y.stop_gradient = y_stop_gradient
                 result = paddle.where(cond, x, y)
                 append_backward(layers.mean(result))
                 for use_cuda in [False, True]:
                     if (use_cuda
                             and (not fluid.core.is_compiled_with_cuda())):
                         break
                     place = (fluid.CUDAPlace(0)
                              if use_cuda else fluid.CPUPlace())
                     exe = fluid.Executor(place)
                     fetch_list = [result, result.grad_name]
                     if (x_stop_gradient is False):
                         fetch_list.append(x.grad_name)
                     if (y_stop_gradient is False):
                         fetch_list.append(y.grad_name)
                     out = exe.run(fluid.default_main_program(),
                                   feed={
                                       'cond': self.cond,
                                       'x': self.x,
                                       'y': self.y
                                   },
                                   fetch_list=fetch_list)
                     assert np.array_equal(out[0], self.out)
                     if (x_stop_gradient is False):
                         assert np.array_equal(out[2],
                                               self.ref_x_backward(out[1]))
                         if (y.stop_gradient is False):
                             assert np.array_equal(
                                 out[3], self.ref_y_backward(out[1]))
                     elif (y.stop_gradient is False):
                         assert np.array_equal(out[2],
                                               self.ref_y_backward(out[1]))
Esempio n. 29
0
    def test_while_loop_backward2(self):
        def cond(i, x):
            return i < 3

        def body(i, x):
            x = x * i
            i = i + 1
            return [i, x]

        main_program = Program()
        startup_program = Program()
        with fluid.program_guard(main_program, startup_program):
            i = fluid.data(name='i', shape=[1], dtype='float32')
            i.stop_gradient = False
            x = fluid.data(name='x', shape=[1], dtype='float32')
            x.stop_gradient = False

            out = layers.while_loop(cond, body, [i, x])
            mean = layers.mean(out[1])
            append_backward(mean)

        place = fluid.CUDAPlace(
            0) if core.is_compiled_with_cuda() else fluid.CPUPlace()
        exe = fluid.Executor(place)

        feed_i = np.ones(1).astype('float32')
        feed_x = np.ones(1).astype('float32')
        data = np.asarray([2]).astype('float32')
        i_grad = np.asarray([3]).astype('float32')
        x_grad = np.asarray([2]).astype('float32')

        res = exe.run(main_program,
                      feed={
                          'i': feed_i,
                          'x': feed_x
                      },
                      fetch_list=[mean.name, i.grad_name, x.grad_name])
        self.assertTrue(np.allclose(np.asarray(res[0]), data))
        self.assertTrue(np.allclose(np.asarray(res[1]), i_grad),
                        msg=" \nres = \n{} \n\n ans = \n{}".format(
                            res[1], i_grad))
        self.assertTrue(np.allclose(np.asarray(res[2]), x_grad),
                        msg=" \nres = \n{} \n\n ans = \n{}".format(
                            res[2], x_grad))
Esempio n. 30
0
    def test_api(self):
        for x_stop_gradient in [False, True]:
            for y_stop_gradient in [False, True]:
                train_prog = fluid.Program()
                startup = fluid.Program()
                with fluid.program_guard(train_prog, startup):
                    cond = fluid.data(name='cond',
                                      shape=self.shape,
                                      dtype='bool')
                    x = fluid.data(name='x', shape=self.shape, dtype='float32')
                    y = fluid.data(name='y', shape=self.shape, dtype='float32')

                    x.stop_gradient = x_stop_gradient
                    y.stop_gradient = y_stop_gradient

                    result = paddle.where(cond, x, y)
                    append_backward(fluid.layers.mean(result))

                    exe = fluid.Executor(self.place)
                    exe.run(startup)

                    fetch_list = [result, result.grad_name]
                    if x_stop_gradient is False:
                        fetch_list.append(x.grad_name)
                    if y_stop_gradient is False:
                        fetch_list.append(y.grad_name)
                    out = exe.run(train_prog,
                                  feed={
                                      'cond': self.cond,
                                      'x': self.x,
                                      'y': self.y
                                  },
                                  fetch_list=fetch_list)
                    assert np.array_equal(out[0], self.out)

                    if x_stop_gradient is False:
                        assert np.array_equal(out[2],
                                              self.ref_x_backward(out[1]))
                        if y.stop_gradient is False:
                            assert np.array_equal(out[3],
                                                  self.ref_y_backward(out[1]))
                    elif y.stop_gradient is False:
                        assert np.array_equal(out[2],
                                              self.ref_y_backward(out[1]))
Esempio n. 31
0
 def setUp(self):
     self.main_program = Program()
     switch_main_program(self.main_program)
     x = layers.data('x', shape=[100], dtype='float32')
     x.stop_gradient = False
     rank_table_tensor = layers.data(
         'rank_table_tensor', shape=[1], dtype='float32', lod_level=1)
     table = layers.lod_rank_table(x=rank_table_tensor)
     i = layers.zeros(dtype='int64', shape=[1])
     self.mem1 = layers.shrink_memory(x=x, i=i, table=table)
     i = layers.increment(x=i)
     i.stop_gradient = True
     self.mem2 = layers.shrink_memory(x=self.mem1, i=i, table=table)
     i = layers.increment(x=i)
     i.stop_gradient = True
     self.mem3 = layers.shrink_memory(x=self.mem2, i=i, table=table)
     mem3_mean = layers.mean(self.mem3)
     append_backward(loss=mem3_mean)
     self.x_grad = self.main_program.global_block().var('x@GRAD')
Esempio n. 32
0
 def setUp(self):
     self.main_program = Program()
     switch_main_program(self.main_program)
     x = layers.data('x', shape=[100], dtype='float32')
     x.stop_gradient = False
     rank_table_tensor = layers.data(
         'rank_table_tensor', shape=[1], dtype='float32', lod_level=1)
     table = layers.lod_rank_table(x=rank_table_tensor)
     i = layers.zeros(dtype='int64', shape=[1])
     self.mem1 = layers.shrink_memory(x=x, i=i, table=table)
     i = layers.increment(x=i)
     i.stop_gradient = True
     self.mem2 = layers.shrink_memory(x=self.mem1, i=i, table=table)
     i = layers.increment(x=i)
     i.stop_gradient = True
     self.mem3 = layers.shrink_memory(x=self.mem2, i=i, table=table)
     mem3_mean = layers.mean(self.mem3)
     append_backward(loss=mem3_mean)
     self.x_grad = self.main_program.global_block().var('x@GRAD')
Esempio n. 33
0
    def test_while_loop_backward(self):
        def cond(i, x):
            return layers.less_than(i, eleven)

        def body(i, x):
            x = layers.elementwise_mul(x=i, y=i)
            i = layers.increment(i)
            return [i, x]

        main_program = Program()
        startup_program = Program()
        with fluid.program_guard(main_program, startup_program):
            i = fluid.data(name='i', shape=[1], dtype='float32')
            i.stop_gradient = False
            eleven = layers.fill_constant(shape=[1], dtype='float32', value=11)
            one = layers.fill_constant(shape=[1], dtype='float32', value=1)
            x = fluid.data(name='x', shape=[1], dtype='float32')
            x.stop_gradient = False

            out = layers.while_loop(cond, body, [i, x])
            mean = layers.mean(out[1])
            append_backward(mean)

        place = fluid.CUDAPlace(
            0) if core.is_compiled_with_cuda() else fluid.CPUPlace()
        exe = fluid.Executor(place)

        feed_i = np.ones(1).astype('float32')
        feed_x = np.ones(1).astype('float32')
        data = np.asarray([100]).astype('float32')
        i_grad = np.asarray([110]).astype('float32')

        res = exe.run(main_program,
                      feed={
                          'i': feed_i,
                          'x': feed_x
                      },
                      fetch_list=[mean.name, i.grad_name])
        self.assertTrue(np.allclose(np.asarray(res[0]), data))
        self.assertTrue(np.allclose(np.asarray(res[1]), i_grad),
                        msg=" \nres = \n{} \n\n ans = \n{}".format(
                            res[1], i_grad))
Esempio n. 34
0
    def test_adamax_optimizer(self):
        init_program = framework.Program()
        program = framework.Program()
        block = program.global_block()
        mul_x = block.create_parameter(
            dtype="float32",
            shape=[5, 10],
            lod_level=0,
            name="mul.x",
            optimize_attr={'learning_rate': 1.1})
        mul_y = block.create_var(
            dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
        mul_out = block.create_var(
            dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
        block.append_op(
            type="mul",
            inputs={"X": mul_x,
                    "Y": mul_y},
            outputs={"Out": mul_out},
            attrs={"x_num_col_dims": 1})
        mean_out = block.create_var(
            dtype="float32", shape=[1], lod_level=0, name="mean.out")
        block.append_op(
            type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
        learning_rate = 0.01
        adamax_optimizer = self.MockAdamax(
            learning_rate=learning_rate, beta1=0.9, beta2=0.999)
        params_grads = append_backward(mean_out)
        self.assertEqual(len(params_grads), 1)
        self.assertEqual(len(adamax_optimizer.get_accumulators()), 0)
        opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out,
                                                         init_program)
        self.assertEqual(len(opts), 4)
        self.assertEqual(
            [op.type for op in opts],
            ["fill_constant", "elementwise_mul", "adamax", "scale"])

        # Check accumulators
        accumulators = adamax_optimizer.get_accumulators()
        self.assertEqual(len(accumulators), 2)
        self.assertTrue(adamax_optimizer.get_moment_str() in accumulators)
        self.assertTrue(adamax_optimizer.get_inf_norm_str() in accumulators)
        moment_acc = accumulators[adamax_optimizer.get_moment_str()]
        inf_norm_acc = accumulators[adamax_optimizer.get_inf_norm_str()]
        self.assertEqual(len(moment_acc), 1)
        self.assertEqual(len(inf_norm_acc), 1)
        self.assertTrue(mul_x.name in moment_acc)
        self.assertTrue(mul_x.name in inf_norm_acc)

        # Check init_program
        init_ops = init_program.global_block().ops
        self.assertEqual(len(init_ops), 4)
        self.assertEqual(init_ops[0].type, "fill_constant")
        self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
Esempio n. 35
0
    def test_all_parameters(self):
        x = layers.data('x', shape=[3], dtype='float32', lod_level=1)
        x.stop_gradient = False

        for print_tensor_name in [True, False]:
            for print_tensor_type in [True, False]:
                for print_tensor_shape in [True, False]:
                    for print_tensor_lod in [True, False]:
                        layers.Print(
                            input=x,
                            print_tensor_name=print_tensor_name,
                            print_tensor_type=print_tensor_type,
                            print_tensor_shape=print_tensor_shape,
                            print_tensor_lod=print_tensor_lod,
                        )
        loss = layers.mean(x)
        append_backward(loss=loss)
        exe = Executor(self.place)
        outs = exe.run(feed={'x': self.x_tensor},
                       fetch_list=[loss],
                       return_numpy=False)
Esempio n. 36
0
    def test_simple_net(self):
        paddle.enable_static()
        main_program = fluid.Program()
        startup_program = fluid.Program()
        with fluid.program_guard(main_program, startup_program):
            loss, sum_result = self.simple_net()

            append_backward(loss)

            npu_place = paddle.NPUPlace(0)
            exe = Executor(npu_place)
            d = []

            for i in range(3):
                d.append(numpy.random.random(size=[10]).astype('float32'))

            outs = exe.run(feed={'d0': d[0],
                                 'd1': d[1],
                                 'd2': d[2]},
                           fetch_list=[sum_result])
            self.assertAlmostEqual(numpy.sum(d), numpy.sum(outs[0]), delta=0.01)
Esempio n. 37
0
    def test_vanilla_momentum_optimizer(self):
        init_program = framework.Program()
        program = framework.Program()
        block = program.global_block()
        mul_x = block.create_parameter(
            dtype="float32",
            shape=[5, 10],
            lod_level=0,
            name="mul.x",
            optimize_attr={'learning_rate': 1.1})
        mul_y = block.create_var(
            dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
        mul_out = block.create_var(
            dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
        block.append_op(
            type="mul",
            inputs={"X": mul_x,
                    "Y": mul_y},
            outputs={"Out": mul_out},
            attrs={"x_num_col_dims": 1})
        learning_rate = 0.01
        momentum_optimizer = self.MockMomentum(
            learning_rate=learning_rate, momentum=0.2)
        mean_out = block.create_var(
            dtype="float32", shape=[1], lod_level=0, name="mean.out")
        block.append_op(
            type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
        params_grads = append_backward(mean_out)
        self.assertEqual(len(params_grads), 1)
        self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
        opts = momentum_optimizer.create_optimization_pass(
            params_grads, mul_out, init_program)
        self.assertEqual(len(opts), 3)
        sgd_op = opts[-1]
        self.assertEqual([op.type for op in opts],
                         ["fill_constant", "elementwise_mul", "momentum"])
        self.assertFalse(sgd_op.attr('use_nesterov'))

        # Check accumulators
        accumulators = momentum_optimizer.get_accumulators()
        self.assertEqual(len(accumulators), 1)
        self.assertTrue(momentum_optimizer.get_velocity_str() in accumulators)
        velocity_acc = accumulators[momentum_optimizer.get_velocity_str()]
        self.assertEqual(len(velocity_acc), 1)
        self.assertTrue(mul_x.name in velocity_acc)

        # Check init_program
        init_ops = init_program.global_block().ops
        self.assertEqual(len(init_ops), 2)
        self.assertEqual(init_ops[0].type, "fill_constant")
        self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate)
        self.assertEqual(init_ops[1].type, "fill_constant")
        self.assertAlmostEqual(init_ops[1].attr('value'), 0.0)
Esempio n. 38
0
    def _generate_backward(self, main_program, startup_program, loss,
                           parameter_list, no_grad_set, callbacks):

        with program_guard(main_program, startup_program):
            params_grads = append_backward(
                loss,
                parameter_list,
                no_grad_set,
                callbacks,
                distop_context=self._dist_context.dist_op_context)
        complete_backward_annotation(
            main_program, dist_context=self._dist_context)

        return params_grads
Esempio n. 39
0
def parallelizer(program_func, rank):
    from paddle.distributed.auto_parallel.completion import Completer
    from paddle.distributed.auto_parallel.partitioner import Partitioner
    from paddle.distributed.auto_parallel.dist_context import DistributedContext

    main_program, start_program, loss = program_func()

    dist_context = DistributedContext()
    completer = Completer(dist_context)
    completer.complete_forward_annotation(main_program)
    dist_context.block_state.parse_forward_blocks(main_program)

    with program_guard(main_program, start_program):
        params_grads = append_backward(
            loss, distop_context=dist_context.dist_op_context)
    completer.complete_backward_annotation(main_program)

    dist_context.block_state.parse_backward_blocks(main_program)
    partitioner = Partitioner(dist_context, rank)
    dist_main_prog, _, _ = partitioner.partition(main_program, start_program,
                                                 [])

    return dist_main_prog, dist_context
Esempio n. 40
0
    def test_read_write(self):
        x = [
            layers.data(name='x0', shape=[100]),
            layers.data(name='x1', shape=[100]),
            layers.data(name='x2', shape=[100])
        ]
        for each_x in x:
            each_x.stop_gradient = False

        tensor = numpy.random.random(size=(100, 100)).astype('float32')
        a_sum, x_sum = _test_read_write(x)

        place = core.CPUPlace()
        exe = Executor(place)
        outs = exe.run(feed={
            'x0': tensor,
            'x1': tensor,
            'x2': tensor
        },
                       fetch_list=[a_sum, x_sum],
                       scope=core.Scope())
        self.assertEqual(outs[0], outs[1])

        total_sum = layers.sums(input=[a_sum, x_sum])
        total_sum_scaled = layers.scale(x=total_sum, scale=1 / 6.0)

        append_backward(total_sum_scaled)

        g_vars = list(
            map(default_main_program().global_block().var,
                [each_x.name + "@GRAD" for each_x in x]))
        g_out = [
            item.sum() for item in exe.run(feed={
                'x0': tensor,
                'x1': tensor,
                'x2': tensor
            },
                                           fetch_list=g_vars)
        ]
        g_out_sum = numpy.array(g_out).sum()

        # since our final gradient is 1 and the neural network are all linear
        # with mean_op.
        # the input gradient should also be 1
        self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)

        with fluid.dygraph.guard(place):
            tensor1 = fluid.dygraph.to_variable(tensor)
            tensor2 = fluid.dygraph.to_variable(tensor)
            tensor3 = fluid.dygraph.to_variable(tensor)
            x_dygraph = [tensor1, tensor2, tensor3]
            for each_x in x_dygraph:
                each_x.stop_gradient = False
            a_sum_dygraph, x_sum_dygraph = _test_read_write(x_dygraph)
            self.assertEqual(a_sum_dygraph, x_sum_dygraph)

            total_sum_dygraph = layers.sums(
                input=[a_sum_dygraph, x_sum_dygraph])
            total_sum_scaled_dygraph = layers.scale(x=total_sum_dygraph,
                                                    scale=1 / 6.0)
            total_sum_scaled_dygraph.backward()
            g_out_dygraph = [
                item._grad_ivar().numpy().sum() for item in x_dygraph
            ]
            g_out_sum_dygraph = numpy.array(g_out_dygraph).sum()

            self.assertAlmostEqual(1.0, g_out_sum_dygraph, delta=0.1)
Esempio n. 41
0
    def _get_gradient(self, input_to_check, place, output_names, no_grad_set):
        prog = Program()
        block = prog.global_block()
        inputs_with_np = {
            key: value
            for (key, value) in OpTest._create_var_descs_(
                block, getattr(self, 'inputs', {}))
        }
        outputs_with_np = {
            key: val
            for (key, val) in OpTest._create_var_descs_(
                block, getattr(self, 'outputs', {}))
        }
        inputs = {
            k: [item[0] for item in inputs_with_np[k]]
            for k in inputs_with_np
        }
        outputs = {
            k: [item[0] for item in outputs_with_np[k]]
            for k in outputs_with_np
        }

        op = block.append_op(
            type=self.op_type,
            inputs=inputs,
            outputs=outputs,
            attrs=getattr(self, 'attrs', {}))

        # infer variable type and infer shape in compile-time
        op.desc.infer_var_type(block.desc)
        op.desc.infer_shape(block.desc)

        mean_inputs = map(block.var, output_names)

        if len(mean_inputs) == 1:
            loss = block.create_var(dtype=mean_inputs[0].dtype, shape=[1])
            op = block.append_op(
                inputs={"X": mean_inputs}, outputs={"Out": loss}, type='mean')
            op.desc.infer_var_type(block.desc)
            op.desc.infer_shape(block.desc)
        else:
            avg_sum = []
            for cur_loss in mean_inputs:
                cur_avg_loss = block.create_var(dtype=cur_loss.dtype, shape=[1])
                op = block.append_op(
                    inputs={"X": [cur_loss]},
                    outputs={"Out": [cur_avg_loss]},
                    type="mean")
                op.desc.infer_var_type(block.desc)
                op.desc.infer_shape(block.desc)
                avg_sum.append(cur_avg_loss)

            loss_sum = block.create_var(dtype=avg_sum[0].dtype, shape=[1])
            op_sum = block.append_op(
                inputs={"X": avg_sum}, outputs={"Out": loss_sum}, type='sum')
            op_sum.desc.infer_var_type(block.desc)
            op_sum.desc.infer_shape(block.desc)

            loss = block.create_var(dtype=loss_sum.dtype, shape=[1])
            op_loss = block.append_op(
                inputs={"X": loss_sum},
                outputs={"Out": loss},
                type='scale',
                attrs={'scale': 1.0 / float(len(avg_sum))})
            op_loss.desc.infer_var_type(block.desc)
            op_loss.desc.infer_shape(block.desc)

        param_grad_list = append_backward(
            loss=loss, parameter_list=input_to_check, no_grad_set=no_grad_set)

        feed_dict = {
            item[0].name: OpTest._numpy_to_lod_tensor(item[1], item[2], place)
            for p_name in inputs_with_np for item in inputs_with_np[p_name]
        }

        fetch_list = [g for p, g in param_grad_list]
        executor = Executor(place)
        return map(np.array,
                   executor.run(prog, feed_dict, fetch_list,
                                return_numpy=False))
Esempio n. 42
0
    def test_read_write(self):
        x = [
            layers.data(
                name='x0', shape=[100]), layers.data(
                    name='x1', shape=[100]), layers.data(
                        name='x2', shape=[100])
        ]

        for each_x in x:
            each_x.stop_gradient = False

        i = layers.zeros(shape=[1], dtype='int64')
        i.stop_gradient = False
        arr = layers.array_write(x=x[0], i=i)
        i = layers.increment(x=i)
        arr = layers.array_write(x=x[1], i=i, array=arr)
        i = layers.increment(x=i)
        arr = layers.array_write(x=x[2], i=i, array=arr)

        i = layers.zeros(shape=[1], dtype='int64')
        i.stop_gradient = False
        a0 = layers.array_read(array=arr, i=i)
        i = layers.increment(x=i)
        a1 = layers.array_read(array=arr, i=i)
        i = layers.increment(x=i)
        a2 = layers.array_read(array=arr, i=i)

        mean_a0 = layers.mean(a0)
        mean_a1 = layers.mean(a1)
        mean_a2 = layers.mean(a2)

        a_sum = layers.sums(input=[mean_a0, mean_a1, mean_a2])

        mean_x0 = layers.mean(x[0])
        mean_x1 = layers.mean(x[1])
        mean_x2 = layers.mean(x[2])

        x_sum = layers.sums(input=[mean_x0, mean_x1, mean_x2])

        scope = core.Scope()
        cpu = core.CPUPlace()

        exe = Executor(cpu)

        tensor = numpy.random.random(size=(100, 100)).astype('float32')

        outs = exe.run(feed={'x0': tensor,
                             'x1': tensor,
                             'x2': tensor},
                       fetch_list=[a_sum, x_sum],
                       scope=scope)
        self.assertEqual(outs[0], outs[1])

        total_sum = layers.sums(input=[a_sum, x_sum])
        total_sum_scaled = layers.scale(x=total_sum, scale=1 / 6.0)

        append_backward(total_sum_scaled)

        g_vars = map(default_main_program().global_block().var,
                     [each_x.name + "@GRAD" for each_x in x])
        g_out = [
            item.sum()
            for item in exe.run(
                feed={'x0': tensor,
                      'x1': tensor,
                      'x2': tensor},
                fetch_list=g_vars)
        ]
        g_out_sum = numpy.array(g_out).sum()

        # since our final gradient is 1 and the neural network are all linear
        # with mean_op.
        # the input gradient should also be 1
        self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)