def train(self, place, iters, feed, use_cinn=False, seed=1234):
        np.random.seed(seed)
        paddle.seed(seed)
        if paddle.is_compiled_with_cuda():
            paddle.set_flags({'FLAGS_cudnn_deterministic': 1})
        set_cinn_flag(use_cinn)

        startup_program = paddle.static.Program()
        main_program = paddle.static.Program()

        loss = self.build_program(main_program, startup_program)
        exe = paddle.static.Executor(place)

        compiled_prog = paddle.static.CompiledProgram(
            main_program).with_data_parallel(loss_name=loss.name)
        loss_vals = []
        scope = paddle.static.Scope()

        with paddle.static.scope_guard(scope):
            exe.run(startup_program)
            for step in range(iters):
                loss_v = exe.run(compiled_prog,
                                 feed=feed[step],
                                 fetch_list=[loss],
                                 return_numpy=True)
                loss_vals.append(loss_v[0][0])
        return loss_vals
def train(dot_save_dir, prefix, seed=1234):
    np.random.seed(seed)
    paddle.seed(seed)
    if paddle.is_compiled_with_cuda():
        paddle.set_flags({'FLAGS_cudnn_deterministic': 1})

    startup_program = paddle.static.Program()
    main_program = paddle.static.Program()
    img, label, loss = build_program(main_program, startup_program)

    place = paddle.CUDAPlace(
        0) if paddle.is_compiled_with_cuda() else paddle.CPUPlace()
    exe = paddle.static.Executor(place)
    exe.run(startup_program)

    build_strategy = paddle.static.BuildStrategy()
    build_strategy.debug_graphviz_path = os.path.join(dot_save_dir, prefix)
    compiled_program = paddle.static.CompiledProgram(
        main_program, build_strategy).with_data_parallel(loss_name=loss.name)

    iters = 100
    feed = rand_data(img.name, label.name, iters)
    loss_values = []
    for step in range(iters):
        loss_v = exe.run(compiled_program,
                         feed=feed[step],
                         fetch_list=[loss],
                         return_merged=False)
        loss_values.append(loss_v[0][0][0])
    return loss_values
def set_cinn_flag(val):
    cinn_compiled = False
    try:
        paddle.set_flags({'FLAGS_use_cinn': val})
        cinn_compiled = True
    except ValueError:
        logger.warning("The used paddle is not compiled with CINN.")
    return cinn_compiled
예제 #4
0
    def setUpClass(cls):
        if not paddle.is_compiled_with_cuda():
            return

        paddle.enable_static()
        paddle.set_flags({'FLAGS_cudnn_deterministic': True})
        _clip_by_global_norm_using_mp_type(True)
        fleet.init(role_maker=get_role_maker())
예제 #5
0
 def setUp(self):
     if can_use_cuda_graph():
         paddle.set_flags({
             'FLAGS_allocator_strategy': 'auto_growth',
             'FLAGS_sync_nccl_allreduce': False,
             'FLAGS_cudnn_deterministic': True,
             'FLAGS_use_stream_safe_cuda_allocator': False,
         })
    def init(self):
        if paddle.is_compiled_with_cuda():
            paddle.set_flags({'FLAGS_cudnn_deterministic': 1})
        self.rtol = 1e-6
        self.atol = 1e-8

        rank = paddle.distributed.get_rank()
        paddle.seed(rank + 2021)
        random.seed(rank + 2021)
        np.random.seed(rank + 2021)
예제 #7
0
    def setUp(self):
        paddle.enable_static()
        if paddle.is_compiled_with_cuda():
            paddle.set_flags({'FLAGS_cudnn_deterministic': 1})

        seed = int(os.environ.get('SEED', -1))
        if seed <= 0:
            seed = np.random.randint(low=1, high=1000000, size=[1])[0]
            os.environ['SEED'] = str(seed)
        self.seed = seed
        paddle.seed(self.seed)

        self.rtol = 1e-5
        self.atol = 1e-8
        self.equal_nan = False

        self.init()
예제 #8
0
    def test_compute_type_fp16_nan(self):
        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
            if core.is_float16_supported(place):
                with fluid.dygraph.guard(place):
                    paddle.set_flags(
                        {'FLAGS_gemm_use_half_precision_compute_type': True})
                    input_x = np.random.random([2, 8, 16]).astype("float16")
                    input_y = np.random.random([2, 16, 8]).astype("float16")
                    for i in range(0, 16, 2):
                        input_x[:, :, i] += 60000
                        input_x[:, :, i + 1] -= 60000
                    input_y[:, :, :] = 1.5

                    x = paddle.to_tensor(input_x)
                    y = paddle.to_tensor(input_y)
                    result = paddle.matmul(x, y)
                    result_np = np.matmul(input_x, input_y)
                    self.assertFalse(
                        paddle.isfinite(result)[0, 0, 0])  # contains nan/inf
                    self.assertTrue(np.isfinite(result_np)[0, 0, 0])
                    paddle.set_flags(
                        {'FLAGS_gemm_use_half_precision_compute_type': False})
예제 #9
0
 def use_fast_math(enabled):
     paddle.set_flags({'FLAGS_use_fast_math': enabled})
예제 #10
0
 def set_flags(self, enable_autotune):
     if paddle.is_compiled_with_cuda():
         if enable_autotune:
             paddle.set_flags({'FLAGS_conv_workspace_size_limit': -1})
         else:
             paddle.set_flags({'FLAGS_conv_workspace_size_limit': 512})
예제 #11
0
 def setUp(self):
     paddle.enable_static()
     if paddle.is_compiled_with_cuda():
         paddle.set_flags({'FLAGS_cudnn_deterministic': 1})
예제 #12
0
 def setUp(self):
     paddle.set_flags({'FLAGS_eager_delete_tensor_gb': 0.0})