예제 #1
0
def custom_relu_static(func,
                       device,
                       dtype,
                       np_x,
                       use_func=True,
                       test_infer=False):
    paddle.enable_static()
    paddle.set_device(device)

    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x = static.data(name='X', shape=[None, 8], dtype=dtype)
            x.stop_gradient = False
            out = func(x) if use_func else paddle.nn.functional.relu(x)
            static.append_backward(out)

            exe = static.Executor()
            exe.run(static.default_startup_program())
            # in static mode, x data has been covered by out
            out_v = exe.run(static.default_main_program(),
                            feed={'X': np_x},
                            fetch_list=[out.name])

    paddle.disable_static()
    return out_v
예제 #2
0
    def check_static_result(self, place):
        from paddle.distributed.fleet.meta_parallel.parallel_layers.random import dropout
        with static.program_guard(static.Program(), static.Program()):
            input = static.data(name="input", shape=[40, 40], dtype="float32")
            res1 = dropout(
                input,
                p=0.3,
                training=True,
                mode='upscale_in_train',
                rng_name='seed0')
            res2 = dropout(
                input,
                p=0.3,
                training=True,
                mode='upscale_in_train',
                rng_name='seed1')
            res3 = dropout(input, p=0.3)

            in_np = np.random.random([40, 40]).astype("float32")

            exe = static.Executor(place)
            res_list = [res1, res2]
            for i in range(2):
                out1, out2 = exe.run(static.default_main_program(),
                                     feed={"input": in_np},
                                     fetch_list=res_list)
                self.assertTrue(np.allclose(out1, out2))
예제 #3
0
def custom_relu_static_pe(func, device, dtype, np_x, use_func=True):
    paddle.enable_static()
    paddle.set_device(device)

    places = static.cpu_places() if device is 'cpu' else static.cuda_places()
    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x = static.data(name='X', shape=[None, 8], dtype=dtype)
            x.stop_gradient = False
            out = func(x) if use_func else paddle.nn.functional.relu(x)
            static.append_backward(out)

            exe = static.Executor()
            exe.run(static.default_startup_program())

            # in static mode, x data has been covered by out
            compiled_prog = static.CompiledProgram(
                static.default_main_program()).with_data_parallel(
                    loss_name=out.name, places=places)
            out_v = exe.run(compiled_prog,
                            feed={'X': np_x},
                            fetch_list=[out.name])

    paddle.disable_static()
    return out_v
예제 #4
0
def linear_static(func, device, dtype, np_x, np_weight, np_bias):
    paddle.enable_static()
    paddle.set_device(device)
    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x = static.data(name="x", shape=[None, np_x.shape[1]], dtype=dtype)
            weight = static.data(name="weight",
                                 shape=np_weight.shape,
                                 dtype=dtype)
            bias = static.data(name="bias", shape=np_bias.shape, dtype=dtype)
            x.stop_gradient = False
            weight.stop_gradient = False
            bias.stop_gradient = False
            out = func(x, weight, bias)
            mean_out = paddle.mean(out)
            static.append_backward(mean_out)

            exe = static.Executor()
            exe.run(static.default_startup_program())

            out_v, x_grad_v, weight_grad_v, bias_grad_v = exe.run(
                static.default_main_program(),
                feed={
                    "x": np_x.astype(dtype),
                    "weight": np_weight.astype(dtype),
                    "bias": np_bias.astype(dtype)
                },
                fetch_list=[
                    out.name, x.name + "@GRAD", weight.name + "@GRAD",
                    bias.name + "@GRAD"
                ])
    paddle.disable_static()
    return out_v, x_grad_v, weight_grad_v, bias_grad_v
예제 #5
0
    def run_prog(self, a, b):
        main_program, out = self.get_prog()
        place = paddle.set_device('npu')

        exe = static.Executor(place)
        out_ = exe.run(main_program, feed={"a": a, "b": b}, fetch_list=[out])
        return out_
예제 #6
0
    def test_static(self):
        mp, sp = static.Program(), static.Program()
        with static.program_guard(mp, sp):
            x = static.data("x", shape=[10, 10, 2], dtype="float64")
            out = paddle.as_complex(x)

        exe = static.Executor()
        exe.run(sp)
        [out_np] = exe.run(mp, feed={"x": self.x}, fetch_list=[out])
        self.assertTrue(np.allclose(self.out, out_np))
예제 #7
0
    def check_static_result(self, place):
        import paddle.distributed.fleet.meta_parallel.parallel_layers.random as random
        with static.program_guard(static.Program(), static.Program()):
            res1 = random.determinate_seed('seed0')

            exe = static.Executor(place)
            res_list = [res1]
            for i in range(2):
                out1, = exe.run(static.default_main_program(),
                                fetch_list=res_list)
                self.assertEqual(out1, np.cast['int32'](self.rng1.random()))
예제 #8
0
def test_static_assert_true(self, x_list, p_list):
    for p in p_list:
        for x in x_list:
            with static.program_guard(static.Program(), static.Program()):
                input_data = static.data("X", shape=x.shape, dtype=x.dtype)
                output = paddle.linalg.cond(input_data, p)
                exe = static.Executor()
                result = exe.run(feed={"X": x}, fetch_list=[output])
                expected_output = np.linalg.cond(x, p)
                np.testing.assert_allclose(result[0],
                                           expected_output,
                                           rtol=5e-5)
예제 #9
0
def run_inference(drop_last):
    loader = paddle.io.DataLoader.from_generator(feed_list=[x],
            capacity=8, drop_last=drop_last)
    loader.set_batch_generator(batch_generator, static.cpu_places())

    exe = static.Executor(paddle.CPUPlace())
    prog = static.CompiledProgram(static.default_main_program())
    prog = prog.with_data_parallel()

    result = []
    for data in loader():
        each_ret, = exe.run(prog, feed=data, fetch_list=[y])
        result.extend(each_ret)
    return result
예제 #10
0
    def test_in_static_mode(self):
        def init_input_output(dtype):
            input = np.random.random(self._shape).astype(
                dtype) + 1j * np.random.random(self._shape).astype(dtype)
            return {'x': input}, numpy_apis[self.api](input)

        for dtype in self.dtypes:
            input_dict, np_res = init_input_output(dtype)
            for place in self.places:
                with static.program_guard(static.Program()):
                    x = static.data(name="x", shape=self._shape, dtype=dtype)
                    out = paddle_apis[self.api](x)

                    exe = static.Executor(place)
                    out_value = exe.run(feed=input_dict, fetch_list=[out.name])
                    self.assertTrue(np.array_equal(np_res, out_value[0]))
예제 #11
0
    def test_in_static_mode(self):
        def init_input_output(dtype):
            input = np.random.random(self._shape).astype(dtype)
            return {'x': input}, psi(input)

        for dtype in self.dtypes:
            input_dict, sc_res = init_input_output(dtype)
            for place in self.places:
                with static.program_guard(static.Program()):
                    x = static.data(name="x", shape=self._shape, dtype=dtype)
                    out = paddle.digamma(x)

                    exe = static.Executor(place)
                    out_value = exe.run(feed=input_dict, fetch_list=[out.name])
                    self.assertEqual(
                        np.allclose(out_value[0], sc_res, rtol=1e-5), True)
예제 #12
0
def custom_relu_static_inference(func, device, np_data, np_label, path_prefix):
    paddle.set_device(device)

    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            # simple module
            data = static.data(name='data',
                               shape=[None, 1, 28, 28],
                               dtype='float32')
            label = static.data(name='label', shape=[None, 1], dtype='int64')

            hidden = static.nn.fc(data, size=128)
            hidden = func(hidden)
            hidden = static.nn.fc(hidden, size=128)
            predict = static.nn.fc(hidden, size=10, activation='softmax')
            loss = paddle.nn.functional.cross_entropy(input=hidden,
                                                      label=label)
            avg_loss = paddle.mean(loss)

            opt = paddle.optimizer.SGD(learning_rate=0.1)
            opt.minimize(avg_loss)

            # run start up model
            exe = static.Executor()
            exe.run(static.default_startup_program())

            # train
            for i in range(4):
                avg_loss_v = exe.run(static.default_main_program(),
                                     feed={
                                         'data': np_data,
                                         'label': np_label
                                     },
                                     fetch_list=[avg_loss])

            # save inference model
            static.save_inference_model(path_prefix, [data], [predict], exe)

            # get train predict value
            predict_v = exe.run(static.default_main_program(),
                                feed={
                                    'data': np_data,
                                    'label': np_label
                                },
                                fetch_list=[predict])

    return predict_v
예제 #13
0
def concat_static(func, dtype, np_inputs, axis_v, with_attr=False):
    paddle.enable_static()
    paddle.set_device("cpu")
    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x1 = static.data(name="x1", shape=[2, 3], dtype=dtype)
            x2 = static.data(name="x2", shape=[2, 3], dtype=dtype)
            if with_attr:
                axis = axis_v
            else:
                axis = paddle.full(shape=[1], dtype='int64', fill_value=axis_v)
            x1.stop_gradient = False
            x2.stop_gradient = False

            total_time = 0
            for i in range(TEST_TIME):
                start = time.time()
                out = func([x1, x2], axis)
                total_time += time.time() - start
            print("- static mode concat time cost: {} s".format(total_time /
                                                                TEST_TIME))

            # mean only support float, so here use sum
            sum_out = paddle.sum(out)
            static.append_backward(sum_out)

            exe = static.Executor()
            exe.run(static.default_startup_program())

            if with_attr:
                feed_dict = {
                    "x1": np_inputs[0].astype(dtype),
                    "x2": np_inputs[1].astype(dtype)
                }
            else:
                feed_dict = {
                    "x1": np_inputs[0].astype(dtype),
                    "x2": np_inputs[1].astype(dtype),
                    "axis": axis
                }
            out_v, x1_grad_v, x2_grad_v = exe.run(
                static.default_main_program(),
                feed=feed_dict,
                fetch_list=[out.name, x1.name + "@GRAD", x2.name + "@GRAD"])
    paddle.disable_static()
    return out_v, x1_grad_v, x2_grad_v
예제 #14
0
    def test_conj_static_mode(self):
        def init_input_output(dtype):
            input = rand([2, 20, 2, 3]).astype(dtype) + 1j * rand(
                [2, 20, 2, 3]).astype(dtype)
            return {'x': input}, np.conj(input)

        for dtype in self._dtypes:
            input_dict, np_res = init_input_output(dtype)
            for place in self._places:
                with static.program_guard(static.Program()):
                    x_dtype = np.complex64 if dtype == "float32" else np.complex128
                    x = static.data(
                        name="x", shape=[2, 20, 2, 3], dtype=x_dtype)
                    out = paddle.conj(x)

                    exe = static.Executor(place)
                    out_value = exe.run(feed=input_dict, fetch_list=[out.name])
                    self.assertTrue(np.array_equal(np_res, out_value[0]))
예제 #15
0
def conj_static(func, shape, dtype, np_input):
    paddle.enable_static()
    paddle.set_device("cpu")
    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x = static.data(name="x", shape=shape, dtype=dtype)
            x.stop_gradient = False
            out = func(x)
            sum_out = paddle.sum(out)
            static.append_backward(sum_out)

            exe = static.Executor()
            exe.run(static.default_startup_program())

            out_v, x_grad_v = exe.run(static.default_main_program(),
                                      feed={"x": np_input},
                                      fetch_list=[out.name, x.name + "@GRAD"])
    paddle.disable_static()
    return out_v, x_grad_v
예제 #16
0
    def train(self, place, restore):
        exe = static.Executor(place)
        exe.run(self._startup_prog)

        params = []
        for pass_id in range(2):
            for batch_id in range(3):
                exe.run(program=self._train_program, feed={'x': gen_data()})
                tmp_param = np.array(static.global_scope().find_var(
                    self._param_name).get_tensor())
                params.append(tmp_param)

            with self._ema.apply(exe, restore):
                final_ema = np.array(static.global_scope().find_var(
                    self._param_name).get_tensor())
                exe.run(program=self._test_program, feed={'x': gen_data()})
            if not restore:
                self._ema.restore(exe)

        return params, final_ema
예제 #17
0
def test_relu2_static(device, dtype):
    paddle.enable_static()
    paddle.set_device(device)

    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x = static.data(name='X', shape=[None, 8], dtype=dtype)
            x.stop_gradient = False
            out = librelu2_op.relu2(x)
            static.append_backward(out)
            print(static.default_main_program())

            exe = static.Executor()
            exe.run(static.default_startup_program())

            x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
            out, = exe.run(static.default_main_program(),
                           feed={'X': x},
                           fetch_list=[out.name])
            print(out)
예제 #18
0
 def test_static_save_and_load_inference_model(self):
     paddle.enable_static()
     np_data = np.random.random((1, 1, 28, 28)).astype("float32")
     np_label = np.random.random((1, 1)).astype("int64")
     path_prefix = "custom_op_inference/custom_relu"
     for device in self.devices:
         predict = custom_relu_static_inference(
             self.custom_ops[0], device, np_data, np_label, path_prefix)
         # load inference model
         with static.scope_guard(static.Scope()):
             exe = static.Executor()
             [inference_program, feed_target_names,
              fetch_targets] = static.load_inference_model(path_prefix, exe)
             predict_infer = exe.run(inference_program,
                                     feed={feed_target_names[0]: np_data},
                                     fetch_list=fetch_targets)
             self.assertTrue(
                 np.array_equal(predict, predict_infer),
                 "custom op predict: {},\n custom op infer predict: {}".
                 format(predict, predict_infer))
     paddle.disable_static()
예제 #19
0
def test_relu2_static(device, dtype, use_custom=True):
    paddle.enable_static()
    paddle.set_device(device)

    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x = static.data(name='X', shape=[None, 8], dtype=dtype)
            x.stop_gradient = False
            out = custom_relu_op_rf.relu2(
                x) if use_custom else paddle.nn.functional.relu(x)
            static.append_backward(out)
            print(static.default_main_program())

            places = static.cuda_places()
            print(places)
            exe = static.Executor()
            compiled_prog = static.CompiledProgram(
                static.default_main_program()).with_data_parallel(
                    loss_name=out.name, places=static.cuda_places())

            x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
            out, = exe.run(compiled_prog, feed={'X': x}, fetch_list=[out.name])
            print(out)
예제 #20
0
def linear_static(func, dtype, np_x, np_weight, np_bias):
    paddle.enable_static()
    paddle.set_device("cpu")
    with static.scope_guard(static.Scope()):
        with static.program_guard(static.Program()):
            x = static.data(name="x", shape=np_x.shape, dtype=dtype)
            weight = static.data(
                name="weight", shape=np_weight.shape, dtype=dtype)
            bias = static.data(name="bias", shape=np_bias.shape, dtype=dtype)
            out = func(x, weight, bias)

            exe = static.Executor()
            exe.run(static.default_startup_program())

            out_v, = exe.run(static.default_main_program(),
                             feed={
                                 "x": np_x.astype(dtype),
                                 "weight": np_weight.astype(dtype),
                                 "bias": np_bias.astype(dtype)
                             },
                             fetch_list=[out.name])
    paddle.disable_static()
    return out_v
def search_mobilenetv2_block(config, args, image_size):
    image_shape = [3, image_size, image_size]
    transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
    if args.data == 'cifar10':
        train_dataset = paddle.vision.datasets.Cifar10(mode='train',
                                                       transform=transform,
                                                       backend='cv2')
        val_dataset = paddle.vision.datasets.Cifar10(mode='test',
                                                     transform=transform,
                                                     backend='cv2')

    elif args.data == 'imagenet':
        train_dataset = imagenet_reader.ImageNetDataset(mode='train')
        val_dataset = imagenet_reader.ImageNetDataset(mode='val')

    places = static.cuda_places() if args.use_gpu else static.cpu_places()
    place = places[0]
    if args.is_server:
        sa_nas = SANAS(config,
                       server_addr=(args.server_address, args.port),
                       search_steps=args.search_steps,
                       is_server=True)
    else:
        sa_nas = SANAS(config,
                       server_addr=(args.server_address, args.port),
                       search_steps=args.search_steps,
                       is_server=False)

    for step in range(args.search_steps):
        archs = sa_nas.next_archs()[0]

        train_program = static.Program()
        test_program = static.Program()
        startup_program = static.Program()
        with static.program_guard(train_program, startup_program):
            data_shape = [None] + image_shape
            data = static.data(name='data', shape=data_shape, dtype='float32')
            label = static.data(name='label', shape=[None, 1], dtype='int64')
            if args.data == 'cifar10':
                paddle.assign(paddle.reshape(label, [-1, 1]), label)
            train_loader = paddle.io.DataLoader(train_dataset,
                                                places=places,
                                                feed_list=[data, label],
                                                drop_last=True,
                                                batch_size=args.batch_size,
                                                return_list=False,
                                                shuffle=True,
                                                use_shared_memory=True,
                                                num_workers=4)
            val_loader = paddle.io.DataLoader(val_dataset,
                                              places=place,
                                              feed_list=[data, label],
                                              drop_last=False,
                                              batch_size=args.batch_size,
                                              return_list=False,
                                              shuffle=False)
            data = conv_bn_layer(input=data,
                                 num_filters=32,
                                 filter_size=3,
                                 stride=2,
                                 padding='SAME',
                                 act='relu6',
                                 name='mobilenetv2_conv1')
            data = archs(data)[0]
            data = conv_bn_layer(input=data,
                                 num_filters=1280,
                                 filter_size=1,
                                 stride=1,
                                 padding='SAME',
                                 act='relu6',
                                 name='mobilenetv2_last_conv')
            data = F.adaptive_avg_pool2d(data,
                                         output_size=[1, 1],
                                         name='mobilenetv2_last_pool')
            output = static.nn.fc(
                x=data,
                size=args.class_dim,
                weight_attr=ParamAttr(name='mobilenetv2_fc_weights'),
                bias_attr=ParamAttr(name='mobilenetv2_fc_offset'))

            softmax_out = F.softmax(output)
            cost = F.cross_entropy(softmax_out, label=label)
            avg_cost = paddle.mean(cost)
            acc_top1 = paddle.metric.accuracy(input=softmax_out,
                                              label=label,
                                              k=1)
            acc_top5 = paddle.metric.accuracy(input=softmax_out,
                                              label=label,
                                              k=5)
            test_program = train_program.clone(for_test=True)

            optimizer = paddle.optimizer.Momentum(
                learning_rate=0.1,
                momentum=0.9,
                weight_decay=paddle.regularizer.L2Decay(1e-4))
            optimizer.minimize(avg_cost)

        current_flops = flops(train_program)
        print('step: {}, current_flops: {}'.format(step, current_flops))
        if current_flops > int(321208544):
            continue

        exe = static.Executor(place)
        exe.run(startup_program)

        build_strategy = static.BuildStrategy()
        train_compiled_program = static.CompiledProgram(
            train_program).with_data_parallel(loss_name=avg_cost.name,
                                              build_strategy=build_strategy)
        for epoch_id in range(args.retain_epoch):
            for batch_id, data in enumerate(train_loader()):
                fetches = [avg_cost.name]
                s_time = time.time()
                outs = exe.run(train_compiled_program,
                               feed=data,
                               fetch_list=fetches)[0]
                batch_time = time.time() - s_time
                if batch_id % 10 == 0:
                    _logger.info(
                        'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms'
                        .format(step, epoch_id, batch_id, outs[0], batch_time))

        reward = []
        for batch_id, data in enumerate(val_loader()):
            test_fetches = [avg_cost.name, acc_top1.name, acc_top5.name]
            batch_reward = exe.run(test_program,
                                   feed=data,
                                   fetch_list=test_fetches)
            reward_avg = np.mean(np.array(batch_reward), axis=1)
            reward.append(reward_avg)

            _logger.info(
                'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'
                .format(step, batch_id, batch_reward[0], batch_reward[1],
                        batch_reward[2]))

        finally_reward = np.mean(np.array(reward), axis=0)
        _logger.info(
            'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format(
                finally_reward[0], finally_reward[1], finally_reward[2]))

        sa_nas.reward(float(finally_reward[1]))
예제 #22
0
def search(config, args, image_size, is_server=True):
    places = static.cuda_places() if args.use_gpu else static.cpu_places()
    place = places[0]
    if is_server:
        ### start a server and a client
        sa_nas = SANAS(config,
                       server_addr=(args.server_address, args.port),
                       search_steps=args.search_steps,
                       is_server=True)
    else:
        ### start a client
        sa_nas = SANAS(config,
                       server_addr=(args.server_address, args.port),
                       init_temperature=init_temperature,
                       is_server=False)

    image_shape = [3, image_size, image_size]
    for step in range(args.search_steps):
        archs = sa_nas.next_archs()[0]

        train_program = static.Program()
        test_program = static.Program()
        startup_program = static.Program()
        train_fetch_list, _, train_loader = build_program(train_program,
                                                          startup_program,
                                                          image_shape,
                                                          archs,
                                                          args,
                                                          is_train=True)

        current_params = count_parameters_in_MB(
            train_program.global_block().all_parameters(), 'cifar10')
        _logger.info('step: {}, current_params: {}M'.format(
            step, current_params))
        if current_params > float(3.77):
            continue

        test_fetch_list, _, test_loader = build_program(test_program,
                                                        startup_program,
                                                        image_shape,
                                                        archs,
                                                        args,
                                                        is_train=False)
        test_program = test_program.clone(for_test=True)

        exe = static.Executor(place)
        exe.run(startup_program)

        train_reader = reader.train_valid(batch_size=args.batch_size,
                                          is_train=True,
                                          is_shuffle=True)
        test_reader = reader.train_valid(batch_size=args.batch_size,
                                         is_train=False,
                                         is_shuffle=False)

        train_loader.set_batch_generator(train_reader, places=place)
        test_loader.set_batch_generator(test_reader, places=place)

        build_strategy = static.BuildStrategy()
        train_compiled_program = static.CompiledProgram(
            train_program).with_data_parallel(
                loss_name=train_fetch_list[0].name,
                build_strategy=build_strategy)

        valid_top1_list = []
        for epoch_id in range(args.retain_epoch):
            train_top1 = train(train_compiled_program, exe, epoch_id,
                               train_loader, train_fetch_list, args)
            _logger.info("TRAIN: step: {}, Epoch {}, train_acc {:.6f}".format(
                step, epoch_id, train_top1))
            valid_top1 = valid(test_program, exe, epoch_id, test_loader,
                               test_fetch_list, args)
            _logger.info("TEST: Epoch {}, valid_acc {:.6f}".format(
                epoch_id, valid_top1))
            valid_top1_list.append(valid_top1)
        sa_nas.reward(float(valid_top1_list[-1] + valid_top1_list[-2]) / 2)
예제 #23
0
def final_test(config, args, image_size, token=None):
    assert token != None, "If you want to start a final experiment, you must input a token."
    places = static.cuda_places() if args.use_gpu else static.cpu_places()
    place = places[0]
    sa_nas = SANAS(config,
                   server_addr=(args.server_address, args.port),
                   is_server=True)

    image_shape = [3, image_size, image_size]
    archs = sa_nas.tokens2arch(token)[0]

    train_program = static.Program()
    test_program = static.Program()
    startup_program = static.Program()
    train_fetch_list, (data,
                       label), train_loader = build_program(train_program,
                                                            startup_program,
                                                            image_shape,
                                                            archs,
                                                            args,
                                                            is_train=True)

    current_params = count_parameters_in_MB(
        train_program.global_block().all_parameters(), 'cifar10')
    _logger.info('current_params: {}M'.format(current_params))
    test_fetch_list, _, test_loader = build_program(test_program,
                                                    startup_program,
                                                    image_shape,
                                                    archs,
                                                    args,
                                                    is_train=False)
    test_program = test_program.clone(for_test=True)

    exe = static.Executor(place)
    exe.run(startup_program)

    train_reader = reader.train_valid(batch_size=args.batch_size,
                                      is_train=True,
                                      is_shuffle=True)
    test_reader = reader.train_valid(batch_size=args.batch_size,
                                     is_train=False,
                                     is_shuffle=False)

    train_loader.set_batch_generator(train_reader, places=place)
    test_loader.set_batch_generator(test_reader, places=place)

    build_strategy = static.BuildStrategy()
    train_compiled_program = static.CompiledProgram(
        train_program).with_data_parallel(loss_name=train_fetch_list[0].name,
                                          build_strategy=build_strategy)

    valid_top1_list = []
    for epoch_id in range(args.retain_epoch):
        train_top1 = train(train_compiled_program, exe, epoch_id, train_loader,
                           train_fetch_list, args)
        _logger.info("TRAIN: Epoch {}, train_acc {:.6f}".format(
            epoch_id, train_top1))
        valid_top1 = valid(test_program, exe, epoch_id, test_loader,
                           test_fetch_list, args)
        _logger.info("TEST: Epoch {}, valid_acc {:.6f}".format(
            epoch_id, valid_top1))
        valid_top1_list.append(valid_top1)

        output_dir = os.path.join('darts_output', str(epoch_id))
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        static.save_inference_model(output_dir, [data], test_fetch_list, exe)
예제 #24
0
def test_search_result(tokens, image_size, args, config):
    places = static.cuda_places() if args.use_gpu else static.cpu_places()
    place = places[0]

    sa_nas = SANAS(config,
                   server_addr=(args.server_address, args.port),
                   search_steps=args.search_steps,
                   is_server=True)

    image_shape = [3, image_size, image_size]
    if args.data == 'cifar10':
        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
        train_dataset = paddle.vision.datasets.Cifar10(mode='train',
                                                       transform=transform,
                                                       backend='cv2')
        val_dataset = paddle.vision.datasets.Cifar10(mode='test',
                                                     transform=transform,
                                                     backend='cv2')

    elif args.data == 'imagenet':
        train_dataset = imagenet_reader.ImageNetDataset(mode='train')
        val_dataset = imagenet_reader.ImageNetDataset(mode='val')

    archs = sa_nas.tokens2arch(tokens)[0]

    train_program = static.Program()
    test_program = static.Program()
    startup_program = static.Program()
    train_loader, avg_cost, acc_top1, acc_top5 = build_program(
        train_program, startup_program, image_shape, train_dataset, archs,
        args, places)

    current_flops = flops(train_program)
    print('current_flops: {}'.format(current_flops))
    test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program(
        test_program,
        startup_program,
        image_shape,
        val_dataset,
        archs,
        args,
        place,
        is_test=True)

    test_program = test_program.clone(for_test=True)

    exe = static.Executor(place)
    exe.run(startup_program)

    build_strategy = static.BuildStrategy()
    train_compiled_program = static.CompiledProgram(
        train_program).with_data_parallel(loss_name=avg_cost.name,
                                          build_strategy=build_strategy)
    for epoch_id in range(args.retain_epoch):
        for batch_id, data in enumerate(train_loader()):
            fetches = [avg_cost.name]
            s_time = time.time()
            outs = exe.run(train_compiled_program,
                           feed=data,
                           fetch_list=fetches)[0]
            batch_time = time.time() - s_time
            if batch_id % 10 == 0:
                _logger.info(
                    'TRAIN: epoch: {}, batch: {}, cost: {}, batch_time: {}ms'.
                    format(epoch_id, batch_id, outs[0], batch_time))

        reward = []
        for batch_id, data in enumerate(test_loader()):
            test_fetches = [
                test_avg_cost.name, test_acc_top1.name, test_acc_top5.name
            ]
            batch_reward = exe.run(test_program,
                                   feed=data,
                                   fetch_list=test_fetches)
            reward_avg = np.mean(np.array(batch_reward), axis=1)
            reward.append(reward_avg)

            _logger.info(
                'TEST: batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'.
                format(batch_id, batch_reward[0], batch_reward[1],
                       batch_reward[2]))

        finally_reward = np.mean(np.array(reward), axis=0)
        _logger.info(
            'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format(
                finally_reward[0], finally_reward[1], finally_reward[2]))
예제 #25
0
image = static.data(name='image', shape=[None, 784], dtype='float32')
label = static.data(name='label', shape=[None, 1], dtype='int64')

# Define DataLoader
loader = paddle.io.DataLoader.from_generator(feed_list=[image, label],
                                             capacity=16,
                                             iterable=ITERABLE)

# Define network
loss = simple_net(image, label)

# Set data source of DataLoader
#
# If DataLoader is iterable, places must be given and the number of places must be the same with device number.
#  - If you are using GPU, call `paddle.static.cuda_places()` to get all GPU places.
#  - If you are using CPU, call `paddle.static.cpu_places()` to get all CPU places.
#
# If DataLoader is not iterable, places can be None.
places = static.cuda_places() if USE_GPU else static.cpu_places()
set_data_source(loader, places)

exe = static.Executor(places[0])
exe.run(static.default_startup_program())

prog = static.CompiledProgram(
    static.default_main_program()).with_data_parallel(loss_name=loss.name)

if loader.iterable:
    train_iterable(exe, prog, loss, loader)
else:
    train_non_iterable(exe, prog, loss, loader)
예제 #26
0
def search_mobilenetv2(config, args, image_size, is_server=True):
    places = static.cuda_places() if args.use_gpu else static.cpu_places()
    place = places[0]
    if is_server:
        ### start a server and a client
        rl_nas = RLNAS(key='lstm',
                       configs=config,
                       is_sync=False,
                       server_addr=(args.server_address, args.port),
                       controller_batch_size=1,
                       controller_decay_steps=1000,
                       controller_decay_rate=0.8,
                       lstm_num_layers=1,
                       hidden_size=10,
                       temperature=1.0)
    else:
        ### start a client
        rl_nas = RLNAS(key='lstm',
                       configs=config,
                       is_sync=False,
                       server_addr=(args.server_address, args.port),
                       lstm_num_layers=1,
                       hidden_size=10,
                       temperature=1.0,
                       controller_batch_size=1,
                       controller_decay_steps=1000,
                       controller_decay_rate=0.8,
                       is_server=False)

    image_shape = [3, image_size, image_size]
    if args.data == 'cifar10':
        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
        train_dataset = paddle.vision.datasets.Cifar10(mode='train',
                                                       transform=transform,
                                                       backend='cv2')
        val_dataset = paddle.vision.datasets.Cifar10(mode='test',
                                                     transform=transform,
                                                     backend='cv2')

    elif args.data == 'imagenet':
        train_dataset = imagenet_reader.ImageNetDataset(mode='train')
        val_dataset = imagenet_reader.ImageNetDataset(mode='val')

    for step in range(args.search_steps):
        archs = rl_nas.next_archs(1)[0][0]

        train_program = static.Program()
        test_program = static.Program()
        startup_program = static.Program()
        train_loader, avg_cost, acc_top1, acc_top5 = build_program(
            train_program, startup_program, image_shape, train_dataset, archs,
            args, places)

        test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program(
            test_program,
            startup_program,
            image_shape,
            val_dataset,
            archs,
            args,
            place,
            is_test=True)
        test_program = test_program.clone(for_test=True)

        exe = static.Executor(place)
        exe.run(startup_program)

        build_strategy = static.BuildStrategy()
        train_compiled_program = static.CompiledProgram(
            train_program).with_data_parallel(loss_name=avg_cost.name,
                                              build_strategy=build_strategy)
        for epoch_id in range(args.retain_epoch):
            for batch_id, data in enumerate(train_loader()):
                fetches = [avg_cost.name]
                s_time = time.time()
                outs = exe.run(train_compiled_program,
                               feed=data,
                               fetch_list=fetches)[0]
                batch_time = time.time() - s_time
                if batch_id % 10 == 0:
                    _logger.info(
                        'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms'
                        .format(step, epoch_id, batch_id, outs[0], batch_time))

        reward = []
        for batch_id, data in enumerate(test_loader()):
            test_fetches = [
                test_avg_cost.name, test_acc_top1.name, test_acc_top5.name
            ]
            batch_reward = exe.run(test_program,
                                   feed=data,
                                   fetch_list=test_fetches)
            reward_avg = np.mean(np.array(batch_reward), axis=1)
            reward.append(reward_avg)

            _logger.info(
                'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'
                .format(step, batch_id, batch_reward[0], batch_reward[1],
                        batch_reward[2]))

        finally_reward = np.mean(np.array(reward), axis=0)
        _logger.info(
            'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format(
                finally_reward[0], finally_reward[1], finally_reward[2]))

        rl_nas.reward(np.float32(finally_reward[1]))
예제 #27
0
# coding=utf-8
import numpy
import paddle
import paddle.static as static
import paddle.nn.functional as F

# 开启静态图模式
paddle.enable_static()
paddle.set_device('cpu')

# 网络结构定义
x = static.data(name='X', shape=[None, 13], dtype='float32')
y = static.data(name='Y', shape=[None, 1], dtype='float32')
predict = static.nn.fc(x=x, size=1)
loss = F.square_error_cost(input=predict, label=y)
avg_loss = paddle.mean(loss)

# 执行环境准备
exe = static.Executor(paddle.CPUPlace())
exe.run(static.default_startup_program())

# 执行网络
x = numpy.random.random(size=(7, 13)).astype('float32')
y = numpy.random.random(size=(8, 1)).astype('float32')
loss_data, = exe.run(static.default_main_program(),
                     feed={
                         'X': x,
                         'Y': y
                     },
                     fetch_list=[avg_loss.name])
예제 #28
0
import paddle
import paddle.static as static

paddle.enable_static()

x = static.data(name="x", shape=[10, 10], dtype='float32')
y = static.nn.fc(x, 10)
z = static.nn.fc(y, 10)

place = paddle.CPUPlace()
exe = static.Executor(place)
exe.run(static.default_startup_program())
prog = static.default_main_program()

static.save(prog, "./temp")
static.load(prog, "./temp")
예제 #29
0
opt = paddle.optimizer.Adam(learning_rate=0.001)
opt.minimize(loss)

# data loader
transform = Compose([Normalize(mean=[127.5], std=[127.5], data_format='CHW')])
train_dataset = paddle.vision.datasets.MNIST(mode='train', transform=transform)
train_loader = paddle.io.DataLoader(train_dataset,
                                    feed_list=[image, label],
                                    batch_size=BATCH_SIZE,
                                    shuffle=True,
                                    drop_last=True,
                                    num_workers=2)

# prepare
exe = static.Executor()
exe.run(static.default_startup_program())

places = paddle.static.cuda_places()
compiled_program = static.CompiledProgram(
    static.default_main_program()).with_data_parallel(loss_name=loss.name,
                                                      places=places)

# train
for epoch_id in range(EPOCH_NUM):
    for batch_id, (image_data, label_data) in enumerate(train_loader()):
        loss_data = exe.run(compiled_program,
                            feed={
                                'image': image_data,
                                'label': label_data
                            },
예제 #30
0
def search_mobilenetv2(config, args, image_size, is_server=True):
    places = static.cuda_places() if args.use_gpu else static.cpu_places()
    place = places[0]
    if is_server:
        ### start a server and a client
        rl_nas = RLNAS(
            key='ddpg',
            configs=config,
            is_sync=False,
            obs_dim=26,  ### step + length_of_token
            server_addr=(args.server_address, args.port))
    else:
        ### start a client
        rl_nas = RLNAS(key='ddpg',
                       configs=config,
                       is_sync=False,
                       obs_dim=26,
                       server_addr=(args.server_address, args.port),
                       is_server=False)

    image_shape = [3, image_size, image_size]
    if args.data == 'cifar10':
        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
        train_dataset = paddle.vision.datasets.Cifar10(mode='train',
                                                       transform=transform,
                                                       backend='cv2')
        val_dataset = paddle.vision.datasets.Cifar10(mode='test',
                                                     transform=transform,
                                                     backend='cv2')

    elif args.data == 'imagenet':
        train_dataset = imagenet_reader.ImageNetDataset(mode='train')
        val_dataset = imagenet_reader.ImageNetDataset(mode='val')

    for step in range(args.search_steps):
        if step == 0:
            action_prev = [1. for _ in rl_nas.range_tables]
        else:
            action_prev = rl_nas.tokens[0]
        obs = [step]
        obs.extend(action_prev)
        archs = rl_nas.next_archs(obs=obs)[0][0]

        train_program = static.Program()
        test_program = static.Program()
        startup_program = static.Program()
        train_loader, avg_cost, acc_top1, acc_top5 = build_program(
            train_program, startup_program, image_shape, train_dataset, archs,
            args, places)

        test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program(
            test_program,
            startup_program,
            image_shape,
            val_dataset,
            archs,
            args,
            place,
            is_test=True)
        test_program = test_program.clone(for_test=True)

        exe = static.Executor(place)
        exe.run(startup_program)

        build_strategy = static.BuildStrategy()
        train_compiled_program = static.CompiledProgram(
            train_program).with_data_parallel(loss_name=avg_cost.name,
                                              build_strategy=build_strategy)
        for epoch_id in range(args.retain_epoch):
            for batch_id, data in enumerate(train_loader()):
                fetches = [avg_cost.name]
                s_time = time.time()
                outs = exe.run(train_compiled_program,
                               feed=data,
                               fetch_list=fetches)[0]
                batch_time = time.time() - s_time
                if batch_id % 10 == 0:
                    _logger.info(
                        'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms'
                        .format(step, epoch_id, batch_id, outs[0], batch_time))

        reward = []
        for batch_id, data in enumerate(test_loader()):
            test_fetches = [
                test_avg_cost.name, test_acc_top1.name, test_acc_top5.name
            ]
            batch_reward = exe.run(test_program,
                                   feed=data,
                                   fetch_list=test_fetches)
            reward_avg = np.mean(np.array(batch_reward), axis=1)
            reward.append(reward_avg)

            _logger.info(
                'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'
                .format(step, batch_id, batch_reward[0], batch_reward[1],
                        batch_reward[2]))

        finally_reward = np.mean(np.array(reward), axis=0)
        _logger.info(
            'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format(
                finally_reward[0], finally_reward[1], finally_reward[2]))

        obs = np.expand_dims(obs, axis=0).astype('float32')
        actions = rl_nas.tokens
        obs_next = [step + 1]
        obs_next.extend(actions[0])
        obs_next = np.expand_dims(obs_next, axis=0).astype('float32')

        if step == args.search_steps - 1:
            terminal = np.expand_dims([True], axis=0).astype(np.bool)
        else:
            terminal = np.expand_dims([False], axis=0).astype(np.bool)
        rl_nas.reward(np.expand_dims(np.float32(finally_reward[1]), axis=0),
                      obs=obs,
                      actions=actions.astype('float32'),
                      obs_next=obs_next,
                      terminal=terminal)

        if step == 2:
            sys.exit(0)