Exemple #1
0
 def add(self, name, seed):
     if seed in self.seeds_:
         raise ValueError('seed {} already exists'.format(seed))
     self.seeds_.add(seed)
     if name in self.states_:
         raise ValueError('state {} already exists'.format(name))
     orig_rng_state = paddle.get_cuda_rng_state()
     paddle.seed(seed)
     self.states_[name] = paddle.get_cuda_rng_state()
     paddle.set_cuda_rng_state(orig_rng_state)
Exemple #2
0
 def rng_state(self, name=MODEL_PARALLEL_RNG):
     if name not in self.states_:
         raise ValueError('state {} does not exist'.format(name))
     orig_cuda_rng_state = paddle.get_cuda_rng_state()
     paddle.set_cuda_rng_state(self.states_[name])
     try:
         yield
     finally:
         self.states_[name] = paddle.get_cuda_rng_state()
         paddle.set_cuda_rng_state(orig_cuda_rng_state)
Exemple #3
0
def swith_rng_state(rng_state):
    orig_cuda_rng_state = paddle.get_cuda_rng_state()
    paddle.set_cuda_rng_state(rng_state)
    try:
        yield
    finally:
        paddle.set_cuda_rng_state(orig_cuda_rng_state)
Exemple #4
0
def _swith_rng_state_tracker(rng_state, tracker):
    orig_cuda_rng_state = paddle.get_cuda_rng_state()
    orig_cuda_rng_tracker = get_rng_state_tracker().get_states_tracker()

    paddle.set_cuda_rng_state(rng_state)
    get_rng_state_tracker().set_states_tracker(tracker)
    try:
        yield
    finally:
        paddle.set_cuda_rng_state(orig_cuda_rng_state)
        get_rng_state_tracker().set_states_tracker(orig_cuda_rng_tracker)
Exemple #5
0
    def forward(ctx, run_function, preserve_rng_state, *args):
        if framework._dygraph_tracer()._has_grad:
            check_recompute_necessary(args)

        # store for recomputing 
        ctx.run_function = run_function
        ctx.preserve_rng_state = preserve_rng_state

        # NOTE the number of outputs of backward() should be equal to the number of tensors in forward()'s input
        # the order of tensors in backward()'s output should be the same as tensors in forward()'s input
        # None tensor inputs will be filtered in backward inputs.

        # save input for backward
        ctx.inputs = []
        ctx.tensor_indices = []
        tensor_inputs = []
        for i, arg in enumerate(args):
            if paddle.is_tensor(arg):
                tensor_inputs.append(arg)
                ctx.tensor_indices.append(i)
                ctx.inputs.append(None)
            else:
                ctx.inputs.append(arg)
        ctx.save_for_backward(*tensor_inputs)

        # NOTE recompute with restore RNG only support one senario where one process for one cuda gpu.
        # one process with multiple gpu and mix-gpu-cpu senarios are not support
        if ctx.preserve_rng_state:
            cur_device = paddle.get_device()
            if 'gpu:' not in cur_device:
                raise RuntimeError(
                    "Recompute with RNG perserve is not support current device: {}.".
                    format(cur_device))
            ctx.fw_cuda_rng_state = paddle.get_cuda_rng_state()

        # TODO support AMP
        tracer = framework._dygraph_tracer()
        ctx.is_fw_autocast = False if tracer._amp_level == core.AmpLevel.O0 else True
        if tracer._amp_level == core.AmpLevel.O2:
            ctx.amp_level = 'O2'
        elif tracer._amp_level in (core.AmpLevel.O1, core.AmpLevel.O0):
            ctx.amp_level = 'O1'
        else:
            raise ValueError("unsupported amp level: {}".format(
                tracer._amp_level))
        ctx.amp_white_list, ctx.amp_black_list = tracer._get_amp_op_list()

        with paddle.no_grad():
            outputs = run_function(*args)
        return outputs
    def test_gen_dropout_dygraph(self):
        gen = paddle.seed(12343)

        fluid.enable_dygraph()

        gen.manual_seed(111111111)
        st = paddle.get_cuda_rng_state()

        x = fluid.layers.uniform_random([2, 10],
                                        dtype="float32",
                                        min=0.0,
                                        max=1.0)
        x_again = fluid.layers.uniform_random([2, 10],
                                              dtype="float32",
                                              min=0.0,
                                              max=1.0)
        x_third = fluid.layers.uniform_random([2, 10],
                                              dtype="float32",
                                              min=0.0,
                                              max=1.0)
        print("x: {}".format(x.numpy()))
        print("x_again: {}".format(x_again.numpy()))
        x = x + x_again + x_third
        y = fluid.layers.dropout(x, 0.5)

        paddle.set_cuda_rng_state(st)

        x1 = fluid.layers.uniform_random([2, 10],
                                         dtype="float32",
                                         min=0.0,
                                         max=1.0)
        x1_again = fluid.layers.uniform_random([2, 10],
                                               dtype="float32",
                                               min=0.0,
                                               max=1.0)
        x1_third = fluid.layers.uniform_random([2, 10],
                                               dtype="float32",
                                               min=0.0,
                                               max=1.0)
        x1 = x1 + x1_again + x1_third
        y1 = fluid.layers.dropout(x1, 0.5)
        y_np = y.numpy()
        y1_np = y1.numpy()

        if core.is_compiled_with_cuda():
            print(">>>>>>> dropout dygraph >>>>>>>")
            self.assertTrue(np.allclose(y_np, y1_np))
    def test_gen_TruncatedNormal_initializer(self):
        fluid.disable_dygraph()

        gen = paddle.seed(123123143)
        cur_state = paddle.get_cuda_rng_state()

        startup_program = fluid.Program()
        train_program = fluid.Program()
        with fluid.program_guard(train_program, startup_program):
            # example 1:
            # attr shape is a list which doesn't contain tensor Variable.
            x = fluid.layers.uniform_random(shape=[2, 10])
            result_1 = fluid.layers.fc(
                input=x,
                size=10,
                param_attr=fluid.initializer.TruncatedNormal(loc=0.0,
                                                             scale=2.0))
            result_2 = fluid.layers.fc(
                input=x,
                size=10,
                param_attr=fluid.initializer.TruncatedNormal(loc=0.0,
                                                             scale=2.0))

            exe = fluid.Executor(fluid.CPUPlace())
            exe.run(startup_program)
            out1 = exe.run(train_program,
                           feed={},
                           fetch_list=[result_1, result_2])

        paddle.seed(123123143)
        with fluid.program_guard(train_program, startup_program):
            exe.run(startup_program)
            out2 = exe.run(train_program,
                           feed={},
                           fetch_list=[result_1, result_2])

        out1_res1 = np.array(out1[0])
        out1_res2 = np.array(out1[1])
        out2_res1 = np.array(out2[0])
        out2_res2 = np.array(out2[1])

        if core.is_compiled_with_cuda():
            print(">>>>>>> truncated normal static >>>>>>>")
            self.assertTrue(np.allclose(out1_res1, out2_res1))
            self.assertTrue(np.allclose(out1_res2, out2_res2))
            self.assertTrue(not np.allclose(out1_res2, out1_res1))
Exemple #8
0
def main():
    args = parser.parse_args()
    os.makedirs(args.save, exist_ok=True)

    # save the configurations
    t = time.localtime()
    timestamp = time.strftime('%b-%d-%Y_%H%M', t)
    with open(os.path.join(args.save, 'args-{}.txt'.format(timestamp)),
              'w') as fh:
        json.dump(args.__dict__, fh, indent=2)

    print('Start at : {}'.format(timestamp))

    # show non-default args
    default_args = parser.parse_args([args.data, args.save])
    for key in args.__dict__:
        if args.__dict__[key] != default_args.__dict__[key]:
            print('{}: {} | default ({})'.format(key, args.__dict__[key],
                                                 default_args.__dict__[key]))

    if args.seed is not None:
        random.seed(args.seed)
        paddle.seed(args.seed)
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    ngpus_per_node = len(paddle.get_cuda_rng_state())
    print('ngpus per node is {}'.format(ngpus_per_node))
    if args.distributed:
        dist.spawn(main_worker,
                   nprocs=ngpus_per_node,
                   args=(args.gpu, ngpus_per_node, args),
                   started_port=6671)
    else:
        # Simply call main_worker function
        main_worker(args.gpu, ngpus_per_node, args)
    def test_generator_gaussian_random_dygraph(self):
        """Test Generator seed."""
        fluid.enable_dygraph()

        paddle.seed(12312321111)
        x = fluid.layers.gaussian_random([120], dtype="float32")
        st1 = paddle.get_cuda_rng_state()
        x1 = fluid.layers.gaussian_random([120], dtype="float32")
        paddle.set_cuda_rng_state(st1)
        x2 = fluid.layers.gaussian_random([120], dtype="float32")
        paddle.seed(12312321111)
        x3 = fluid.layers.gaussian_random([120], dtype="float32")
        x_np = x.numpy()
        x1_np = x1.numpy()
        x2_np = x2.numpy()
        x3_np = x3.numpy()

        if core.is_compiled_with_cuda():
            print(">>>>>>> gaussian random dygraph >>>>>>>")
            self.assertTrue(np.allclose(x1_np, x2_np))
            self.assertTrue(np.allclose(x_np, x3_np))
Exemple #10
0
    def forward(ctx, run_function, all_outputs, *args):
        check_recompute_necessary(args)

        # store for recomputing
        ctx.run_function = run_function

        # store the rng states
        ctx.fwd_cuda_rng_state = paddle.get_cuda_rng_state()
        ctx.fwd_cuda_rng_state_tracker = get_rng_state_tracker(
        ).get_states_tracker()

        # save input for backward
        ctx.inputs = []
        ctx.tensor_indices = []
        ctx.tensor_shapes = []
        tensor_inputs = []

        cur_device = paddle.get_device()
        assert 'gpu:' in paddle.get_device(
        ), "Recompute with RNG is not support current device: {}.".format(
            cur_device)

        # TODO support AMP
        tracer = framework._dygraph_tracer()
        ctx.is_fw_autocast = False if tracer._amp_level == core.AmpLevel.O0 else True
        if tracer._amp_level == core.AmpLevel.O2:
            ctx.amp_level = 'O2'
        elif tracer._amp_level in (core.AmpLevel.O1, core.AmpLevel.O0):
            ctx.amp_level = 'O1'
        else:
            raise ValueError("unsupported amp level: {}".format(
                tracer._amp_level))
        ctx.amp_white_list, ctx.amp_black_list = tracer._get_amp_op_list()

        with paddle.no_grad():
            outputs = run_function(*args)

        for i, arg in enumerate(args):
            if paddle.is_tensor(arg):
                state = arg.stop_gradient
                if _recompute_partition:
                    ctx.tensor_shapes.append(arg.shape)
                    partition = _split_activation(arg.detach()).clone()
                    # TODO(shenliang03) not use calculate stream to D2H to speed
                    arg = partition.cpu() if _recompute_offload else partition
                else:
                    arg = arg.cpu() if _recompute_offload else arg
                arg.stop_gradient = state
                tensor_inputs.append(arg)
                ctx.tensor_indices.append(i)
                ctx.inputs.append(None)
            else:
                ctx.inputs.append(arg)

        ctx.save_for_backward(*tensor_inputs)

        if paddle.is_tensor(outputs):
            all_outputs += [outputs]
            return outputs
        else:
            all_outputs += outputs
            return tuple(outputs)
Exemple #11
0
def get_rng_state():
    return get_cuda_rng_state()