Exemple #1
0
    def deploy(self):
        model_list = glob(
            os.path.join(self.result_dir, self.dataset, 'model', '*.pdparams'))
        if not len(model_list) == 0:
            model_list.sort()
            it = int(model_list[-1].split('_')[-1].split('.')[0])
            self.load(os.path.join(self.result_dir, self.dataset, 'model'), it)
            print(" [*] Load SUCCESS")
        else:
            print(" [*] Load FAILURE")
            return
        self.genA2B.eval(), self.genB2A.eval()
        real_A, _ = next(iter(self.testA_loader))

        class Output(fluid.dygraph.Layer):
            def __init__(self, model, i):
                super().__init__()
                self.model = model
                self.i = i

            def forward(self, x):
                y = self.model(x)
                return y[self.i]

        in_var = real_A
        model = Output(self.genA2B, 0)
        out_dygraph, static_layer = TracedLayer.trace(model, inputs=[in_var])

        out_static_graph = static_layer([in_var])
        print(len(out_static_graph))
        print(out_static_graph[0].shape)

        dirname = './save_infer_model'
        static_layer.save_inference_model(dirname=dirname)
        print(f"Save static layer in the directory: `{dirname}`")
Exemple #2
0
def train_dygraph(dev_id, epoch_num, use_multi_gpu):
    strategy = fluid.dygraph.parallel.prepare_context(
    ) if use_multi_gpu else None

    resnet = ResNet()
    optimizer = optimizer_setting(train_parameters,
                                  parameter_list=resnet.parameters())

    if use_multi_gpu:
        resnet = fluid.dygraph.parallel.DataParallel(resnet, strategy)

    traced_layer = None
    reader = create_reader()

    if use_multi_gpu:
        reader = fluid.contrib.distributed_batch_reader(reader)

    image_shape = train_parameters['input_size']

    for epoch_id in six.moves.range(epoch_num):
        for i, data in enumerate(reader()):
            image_np = np.array([np.reshape(x[0], image_shape) for x in data])
            label_np = np.array([x[1] for x in data])

            image = to_variable(image_np, zero_copy=False)
            label = to_variable(label_np, zero_copy=False)
            label.stop_gradient = True

            if i == 0 and epoch_id == 0:
                out, traced_layer = TracedLayer.trace(resnet, image)
            else:
                out = resnet(image)

            loss = fluid.layers.cross_entropy(out, label)
            avg_loss = fluid.layers.mean(loss)
            avg_loss_val = avg_loss.numpy()

            if use_multi_gpu:
                avg_loss = resnet.scale_loss(avg_loss)

            avg_loss.backward()

            if use_multi_gpu:
                resnet.apply_collective_grads()

            optimizer.minimize(avg_loss)

            resnet.clear_gradients()

            if i % 10 == 0:
                print('Epoch {}, batch {}, avg_loss {}'.format(
                    epoch_id, i, avg_loss_val))

    if dev_id == 0:
        traced_layer.save_inference_model('./infer_dygraph')
Exemple #3
0
    def _parse_model(self, model, input_shape):
        _logger.debug("Parsing model with input: {}".format(input_shape))
        data = np.ones(tuple(input_shape)).astype("float32")
        in_var = paddle.to_tensor(data)
        out_dygraph, static_layer = TracedLayer.trace(model, inputs=[in_var])
        graph = GraphWrapper(static_layer.program)

        visited = {}
        for name, param in model.named_parameters():
            group = collect_convs([param.name], graph,
                                  visited)[0]  # [(name, axis, pruned_idx)]
            if len(group) > 0:
                self.groups.append(group)
        _logger.debug("Found {} groups.".format(len(self.groups)))
Exemple #4
0
def infer(files, save_static_path=None):
    result_list = []
    place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace()
    print('train with {}'.format(place))
    with fluid.dygraph.guard(place):
        params, _ = fluid.load_dygraph('{}/crnn_best'.format('output/baidu_model'))#train_parameters['save_model_dir']))
        # crnn = CRNN(train_parameters["class_dim"] + 1, 1)
        crnn = CRNN(3828, 1)
        crnn.load_dict(params)
        crnn.eval()
        for file in tqdm(files):
            img = precess_img(file)
            img = fluid.dygraph.to_variable(img).astype('float32')
            if save_static_path is not None:
                out_dygraph, static_layer = TracedLayer.trace(crnn, inputs=[img])
                # 将转换后的模型保存
                static_layer.save_inference_model(save_static_path, feed=[0], fetch=[0])
            pred = crnn(img)
            output = utils.greedy_decode(pred.numpy(), blank=train_parameters["class_dim"])
            p_s = "".join([train_parameters['r_label_dict'][c] for c in output[0]])
            result_list.append('{0}\t{1}'.format(os.path.basename(file), p_s))
            break
    return result_list
Exemple #5
0
    def ptb_rnn_cpu_float32(self, is_sparse):
        seed = 90
        hidden_size = 10
        vocab_size = 1000
        num_layers = 1
        num_steps = 3
        init_scale = 0.1
        batch_size = 4
        batch_num = 200
        traced_layer = None

        with fluid.dygraph.guard():
            paddle.seed(seed)
            paddle.framework.random._manual_program_seed(seed)
            # TODO: marsyang1993 Change seed to
            ptb_model = PtbModel(hidden_size=hidden_size,
                                 vocab_size=vocab_size,
                                 num_layers=num_layers,
                                 num_steps=num_steps,
                                 init_scale=init_scale,
                                 is_sparse=is_sparse)

            sgd = SGDOptimizer(learning_rate=1e-3,
                               parameter_list=ptb_model.parameters())
            dy_param_updated = dict()
            dy_param_init = dict()
            dy_loss = None
            last_hidden = None
            last_cell = None

            helper = DyGraphProgramDescTracerTestHelper(self)
            program = None

            for i in range(batch_num):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                y_data = y_data.reshape((-1, 1))
                init_hidden_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                init_cell_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                if i % 5 == 0 and _in_legacy_dygraph():
                    outs, traced_layer = TracedLayer.trace(
                        ptb_model, [x, y, init_hidden, init_cell])
                    outs_static = traced_layer([x, y, init_hidden, init_cell])
                    helper.assertEachVar(outs, outs_static)

                    if program is not None:
                        self.assertTrue(
                            is_equal_program(traced_layer.program, program))

                    program = traced_layer.program

                    traced_layer.save_inference_model(
                        './infe_imperative_ptb_rnn', feed=list(range(4)))
                else:
                    outs = ptb_model(x, y, init_hidden, init_cell)

                dy_loss, last_hidden, last_cell = outs

                if i == 0:
                    for param in ptb_model.parameters():
                        dy_param_init[param.name] = param.numpy()
                dy_loss.backward()
                sgd.minimize(dy_loss)
                ptb_model.clear_gradients()
                if i == batch_num - 1:
                    for param in ptb_model.parameters():
                        dy_param_updated[param.name] = param.numpy()

            dy_loss_value = dy_loss.numpy()
            dy_last_cell_value = last_cell.numpy()
            dy_last_hidden_value = last_hidden.numpy()

        with new_program_scope():
            paddle.seed(seed)
            paddle.framework.random._manual_program_seed(seed)
            ptb_model = PtbModel(hidden_size=hidden_size,
                                 vocab_size=vocab_size,
                                 num_layers=num_layers,
                                 num_steps=num_steps,
                                 init_scale=init_scale,
                                 is_sparse=is_sparse)

            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
            sgd = SGDOptimizer(learning_rate=1e-3)
            x = fluid.layers.data(name="x",
                                  shape=[-1, num_steps],
                                  dtype='int64')
            y = fluid.layers.data(name="y", shape=[-1, 1], dtype='float32')
            init_hidden = fluid.layers.data(name="init_hidden",
                                            shape=[1],
                                            dtype='float32')
            init_cell = fluid.layers.data(name="init_cell",
                                          shape=[1],
                                          dtype='float32')

            static_loss, static_last_hidden, static_last_cell = ptb_model(
                x, y, init_hidden, init_cell)
            sgd.minimize(static_loss)
            static_param_updated = dict()
            static_param_init = dict()
            static_param_name_list = list()
            for param in ptb_model.parameters():
                static_param_name_list.append(param.name)

            out = exe.run(framework.default_startup_program(),
                          fetch_list=static_param_name_list)
            for i in range(len(static_param_name_list)):
                static_param_init[static_param_name_list[i]] = out[i]
            static_loss_value = None
            static_last_cell_value = None
            static_last_hidden_value = None
            for i in range(batch_num):
                x_data = np.arange(12).reshape(4, 3).astype('int64')
                y_data = np.arange(1, 13).reshape(4, 3).astype('int64')
                x_data = x_data.reshape((-1, num_steps, 1))
                y_data = y_data.reshape((-1, 1))
                init_hidden_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                init_cell_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                fetch_list = [
                    static_loss, static_last_hidden, static_last_cell
                ]
                fetch_list.extend(static_param_name_list)
                out = exe.run(fluid.default_main_program(),
                              feed={
                                  "x": x_data,
                                  "y": y_data,
                                  "init_hidden": init_hidden_data,
                                  "init_cell": init_cell_data
                              },
                              fetch_list=fetch_list)
                static_loss_value = out[0]
                static_last_hidden_value = out[1]
                static_last_cell_value = out[2]

                if i == batch_num - 1:
                    for k in range(3, len(out)):
                        static_param_updated[static_param_name_list[
                            k - 3]] = out[k]

        self.assertTrue(np.array_equal(static_loss_value, dy_loss_value))
        self.assertTrue(
            np.array_equal(static_last_cell_value, dy_last_cell_value))
        self.assertTrue(
            np.array_equal(static_last_hidden_value, dy_last_hidden_value))
        for key, value in six.iteritems(static_param_init):
            self.assertTrue(np.array_equal(value, dy_param_init[key]))
        for key, value in six.iteritems(static_param_updated):
            self.assertTrue(np.array_equal(value, dy_param_updated[key]))
    def test_resnet_float32(self):
        seed = 90

        batch_size = train_parameters["batch_size"]
        batch_num = 10

        traced_layer = None

        with fluid.dygraph.guard():
            paddle.manual_seed(seed)
            paddle.framework.random._manual_program_seed(seed)

            resnet = ResNet()
            optimizer = optimizer_setting(train_parameters,
                                          parameter_list=resnet.parameters())
            np.random.seed(seed)

            batch_py_reader = fluid.io.PyReader(capacity=1)
            batch_py_reader.decorate_sample_list_generator(
                paddle.batch(self.reader_decorator(
                    paddle.dataset.flowers.train(use_xmap=False)),
                             batch_size=batch_size,
                             drop_last=True),
                places=fluid.CPUPlace())

            dy_param_init_value = {}
            for param in resnet.parameters():
                dy_param_init_value[param.name] = param.numpy()

            helper = DyGraphProgramDescTracerTestHelper(self)
            program = None

            for batch_id, data in enumerate(batch_py_reader()):
                if batch_id >= batch_num:
                    break

                img = data[0]
                label = data[1]
                label.stop_gradient = True

                out = None
                if batch_id % 5 == 0:
                    out, traced_layer = TracedLayer.trace(resnet, img)
                    if program is not None:
                        self.assertTrue(
                            is_equal_program(program, traced_layer.program))

                    traced_layer.save_inference_model(
                        './infer_imperative_resnet')

                    program = traced_layer.program
                else:
                    out = resnet(img)

                if traced_layer is not None:
                    resnet.eval()
                    traced_layer._switch(is_test=True)
                    out_dygraph = resnet(img)
                    out_static = traced_layer([img])
                    traced_layer._switch(is_test=False)
                    helper.assertEachVar(out_dygraph, out_static)
                    resnet.train()

                loss = fluid.layers.cross_entropy(input=out, label=label)
                avg_loss = fluid.layers.mean(x=loss)

                dy_out = avg_loss.numpy()

                if batch_id == 0:
                    for param in resnet.parameters():
                        if param.name not in dy_param_init_value:
                            dy_param_init_value[param.name] = param.numpy()

                avg_loss.backward()

                dy_grad_value = {}
                for param in resnet.parameters():
                    if param.trainable:
                        np_array = np.array(
                            param._grad_ivar().value().get_tensor())
                        dy_grad_value[param.name +
                                      core.grad_var_suffix()] = np_array

                optimizer.minimize(avg_loss)
                resnet.clear_gradients()

                dy_param_value = {}
                for param in resnet.parameters():
                    dy_param_value[param.name] = param.numpy()

        with new_program_scope():
            paddle.manual_seed(seed)
            paddle.framework.random._manual_program_seed(seed)

            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))

            resnet = ResNet()
            optimizer = optimizer_setting(train_parameters)

            np.random.seed(seed)
            train_reader = paddle.batch(
                paddle.dataset.flowers.train(use_xmap=False),
                batch_size=batch_size)

            img = fluid.layers.data(name='pixel',
                                    shape=[3, 224, 224],
                                    dtype='float32')
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
            out = resnet(img)
            loss = fluid.layers.cross_entropy(input=out, label=label)
            avg_loss = fluid.layers.mean(x=loss)
            optimizer.minimize(avg_loss)

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
            static_grad_name_list = []
            for param in resnet.parameters():
                static_param_name_list.append(param.name)
            for param in resnet.parameters():
                if param.trainable:
                    static_grad_name_list.append(param.name +
                                                 core.grad_var_suffix())

            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            for batch_id, data in enumerate(train_reader()):
                if batch_id >= batch_num:
                    break

                static_x_data = np.array([
                    x[0].reshape(3, 224, 224) for x in data
                ]).astype('float32')
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape([batch_size, 1])

                if traced_layer is not None:
                    traced_layer([static_x_data])

                fetch_list = [avg_loss.name]
                fetch_list.extend(static_param_name_list)
                fetch_list.extend(static_grad_name_list)
                out = exe.run(fluid.default_main_program(),
                              feed={
                                  "pixel": static_x_data,
                                  "label": y_data
                              },
                              fetch_list=fetch_list)

                static_param_value = {}
                static_grad_value = {}
                static_out = out[0]
                param_start_pos = 1
                grad_start_pos = len(static_param_name_list) + param_start_pos
                for i in range(param_start_pos,
                               len(static_param_name_list) + param_start_pos):
                    static_param_value[static_param_name_list[
                        i - param_start_pos]] = out[i]
                for i in range(grad_start_pos,
                               len(static_grad_name_list) + grad_start_pos):
                    static_grad_value[static_grad_name_list[
                        i - grad_start_pos]] = out[i]

        print("static", static_out)
        print("dygraph", dy_out)
        self.assertTrue(np.allclose(static_out, dy_out))

        self.assertEqual(len(dy_param_init_value),
                         len(static_param_init_value))

        for key, value in six.iteritems(static_param_init_value):
            self.assertTrue(np.allclose(value, dy_param_init_value[key]))
            self.assertTrue(np.isfinite(value.all()))
            self.assertFalse(np.isnan(value.any()))

        self.assertEqual(len(dy_grad_value), len(static_grad_value))
        for key, value in six.iteritems(static_grad_value):
            self.assertTrue(np.allclose(value, dy_grad_value[key]))
            self.assertTrue(np.isfinite(value.all()))
            self.assertFalse(np.isnan(value.any()))

        self.assertEqual(len(dy_param_value), len(static_param_value))
        for key, value in six.iteritems(static_param_value):
            self.assertTrue(np.allclose(value, dy_param_value[key]))
            self.assertTrue(np.isfinite(value.all()))
            self.assertFalse(np.isnan(value.any()))
Exemple #7
0
    def transformer_sort_gradient_float32(self, is_sparse):
        seed = 90

        with guard():
            fluid.set_flags({'FLAGS_sort_sum_gradient': True})
            paddle.seed(seed)
            paddle.framework.random._manual_program_seed(seed)
            transformer = TransFormer(ModelHyperParams.src_vocab_size,
                                      ModelHyperParams.trg_vocab_size,
                                      ModelHyperParams.max_length + 1,
                                      ModelHyperParams.n_layer,
                                      ModelHyperParams.n_head,
                                      ModelHyperParams.d_key,
                                      ModelHyperParams.d_value,
                                      ModelHyperParams.d_model,
                                      ModelHyperParams.d_inner_hid,
                                      ModelHyperParams.prepostprocess_dropout,
                                      ModelHyperParams.attention_dropout,
                                      ModelHyperParams.relu_dropout,
                                      ModelHyperParams.preprocess_cmd,
                                      ModelHyperParams.postprocess_cmd,
                                      ModelHyperParams.weight_sharing,
                                      TrainTaskConfig.label_smooth_eps,
                                      use_py_reader=use_py_reader,
                                      is_test=False,
                                      is_sparse=is_sparse)
            if sync:
                lr_decay = fluid.layers.learning_rate_scheduler.noam_decay(
                    ModelHyperParams.d_model, TrainTaskConfig.warmup_steps)
                with fluid.default_main_program()._lr_schedule_guard():
                    learning_rate = lr_decay * TrainTaskConfig.learning_rate
                optimizer = fluid.optimizer.Adam(
                    learning_rate=learning_rate,
                    beta1=TrainTaskConfig.beta1,
                    beta2=TrainTaskConfig.beta2,
                    epsilon=TrainTaskConfig.eps,
                    parameter_list=transformer.parameters())
            else:
                optimizer = fluid.optimizer.SGD(
                    learning_rate=0.003,
                    parameter_list=transformer.parameters())
            dy_param_init = dict()
            dy_param_updated = dict()

            helper = DyGraphProgramDescTracerTestHelper(self)
            program = None

            for i in range(batch_num):
                enc_inputs, dec_inputs, label, weights = create_data()
                if i % 2 == 0:
                    outs, traced_layer = TracedLayer.trace(
                        transformer, [enc_inputs, dec_inputs, label, weights])

                    ins_static = enc_inputs + dec_inputs + [label, weights]
                    outs_static = traced_layer(ins_static)
                    helper.assertEachVar(outs, outs_static)
                    if program is not None:
                        self.assertTrue(
                            is_equal_program(program, traced_layer.program))

                    program = traced_layer.program
                    traced_layer.save_inference_model(
                        './infer_imperative_transformer',
                        feed=list(range(len(ins_static))),
                        fetch=list(range(len(outs_static))))
                else:
                    outs = transformer(enc_inputs, dec_inputs, label, weights)

                dy_sum_cost, dy_avg_cost, dy_predict, dy_token_num = outs

                if i == 0:
                    for param in transformer.parameters():
                        dy_param_init[param.name] = param.numpy()

                dy_avg_cost.backward()
                optimizer.minimize(dy_avg_cost)
                transformer.clear_gradients()

                if i == batch_num - 1:
                    for param in transformer.parameters():
                        dy_param_updated[param.name] = param.numpy()

            dy_avg_cost_value = dy_avg_cost.numpy()
            dy_sum_cost_value = dy_sum_cost.numpy()
            dy_predict_value = dy_predict.numpy()
            dy_token_num_value = dy_token_num.numpy()

        with new_program_scope():
            paddle.seed(seed)
            paddle.framework.random._manual_program_seed(seed)
            transformer = TransFormer(ModelHyperParams.src_vocab_size,
                                      ModelHyperParams.trg_vocab_size,
                                      ModelHyperParams.max_length + 1,
                                      ModelHyperParams.n_layer,
                                      ModelHyperParams.n_head,
                                      ModelHyperParams.d_key,
                                      ModelHyperParams.d_value,
                                      ModelHyperParams.d_model,
                                      ModelHyperParams.d_inner_hid,
                                      ModelHyperParams.prepostprocess_dropout,
                                      ModelHyperParams.attention_dropout,
                                      ModelHyperParams.relu_dropout,
                                      ModelHyperParams.preprocess_cmd,
                                      ModelHyperParams.postprocess_cmd,
                                      ModelHyperParams.weight_sharing,
                                      TrainTaskConfig.label_smooth_eps,
                                      use_py_reader=use_py_reader,
                                      is_test=False,
                                      is_sparse=is_sparse)
            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
            optimizer = fluid.optimizer.SGD(learning_rate=0.003)

            data_input_names = encoder_data_input_fields + decoder_data_input_fields[:
                                                                                     -1] + label_data_input_fields
            all_inputs = make_all_inputs(data_input_names)
            enc_inputs_len = len(encoder_data_input_fields)
            dec_inputs_len = len(decoder_data_input_fields[:-1])
            enc_inputs = all_inputs[0:enc_inputs_len]
            dec_inputs = all_inputs[enc_inputs_len:enc_inputs_len +
                                    dec_inputs_len]
            label = all_inputs[-2]
            weights = all_inputs[-1]
            static_param_updated = dict()
            static_param_init = dict()
            static_param_name_list = list()
            static_sum_cost, static_avg_cost, static_predict, static_token_num = transformer(
                enc_inputs, dec_inputs, label, weights)
            optimizer.minimize(static_avg_cost)
            for param in transformer.parameters():
                static_param_name_list.append(param.name)
            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)
            for i in range(len(static_param_name_list)):
                static_param_init[static_param_name_list[i]] = out[i]
            static_sum_cost_value = None
            static_avg_cost_value = None
            static_predict_value = None
            static_token_num_value = None
            for i in range(batch_num):
                feed_dict = create_feed_dict_list(create_data(True))
                fetch_list = [
                    static_sum_cost, static_avg_cost, static_predict,
                    static_token_num
                ]

                fetch_list.extend(static_param_name_list)
                out = exe.run(fluid.default_main_program(),
                              feed=feed_dict,
                              fetch_list=fetch_list)
                static_sum_cost_value = out[0]
                static_avg_cost_value = out[1]
                static_predict_value = out[2]
                static_token_num_value = out[3]
                if i == batch_num - 1:
                    for k in range(4, len(out)):
                        static_param_updated[static_param_name_list[
                            k - 4]] = out[k]

        self.assertTrue(
            np.array_equal(static_avg_cost_value, dy_avg_cost_value))
        self.assertTrue(
            np.array_equal(static_sum_cost_value, dy_sum_cost_value))
        self.assertTrue(np.array_equal(static_predict_value, dy_predict_value))
        self.assertTrue(
            np.array_equal(static_token_num_value, dy_token_num_value))

        for key, value in six.iteritems(static_param_init):
            self.assertTrue(np.array_equal(value, dy_param_init[key]))
        for key, value in six.iteritems(static_param_updated):
            self.assertTrue(np.array_equal(value, dy_param_updated[key]))
Exemple #8
0
        def run_dygraph():
            # NOTE(xiongkun03): In new executor, the inplace strategy is on by default, which will cause result of sumop have some differences. So we disable inplace.
            fluid.set_flags({'FLAGS_new_executor_use_inplace': False})
            paddle.seed(seed)
            paddle.framework.random._manual_program_seed(seed)
            transformer = TransFormer(ModelHyperParams.src_vocab_size,
                                      ModelHyperParams.trg_vocab_size,
                                      ModelHyperParams.max_length + 1,
                                      ModelHyperParams.n_layer,
                                      ModelHyperParams.n_head,
                                      ModelHyperParams.d_key,
                                      ModelHyperParams.d_value,
                                      ModelHyperParams.d_model,
                                      ModelHyperParams.d_inner_hid,
                                      ModelHyperParams.prepostprocess_dropout,
                                      ModelHyperParams.attention_dropout,
                                      ModelHyperParams.relu_dropout,
                                      ModelHyperParams.preprocess_cmd,
                                      ModelHyperParams.postprocess_cmd,
                                      ModelHyperParams.weight_sharing,
                                      TrainTaskConfig.label_smooth_eps,
                                      use_py_reader=use_py_reader,
                                      is_test=False,
                                      is_sparse=is_sparse)
            if sync:
                lr_decay = fluid.layers.learning_rate_scheduler.noam_decay(
                    ModelHyperParams.d_model, TrainTaskConfig.warmup_steps)
                with fluid.default_main_program()._lr_schedule_guard():
                    learning_rate = lr_decay * TrainTaskConfig.learning_rate
                optimizer = fluid.optimizer.Adam(
                    learning_rate=learning_rate,
                    beta1=TrainTaskConfig.beta1,
                    beta2=TrainTaskConfig.beta2,
                    epsilon=TrainTaskConfig.eps,
                    parameter_list=transformer.parameters())
            else:
                optimizer = fluid.optimizer.SGD(
                    learning_rate=0.003,
                    parameter_list=transformer.parameters())
            dy_param_init = dict()
            dy_param_updated = dict()

            helper = DyGraphProgramDescTracerTestHelper(self)
            program = None

            for i in range(batch_num):
                enc_inputs, dec_inputs, label, weights = create_data()
                if False:
                    outs, traced_layer = TracedLayer.trace(
                        transformer, [enc_inputs, dec_inputs, label, weights])

                    ins_static = enc_inputs + dec_inputs + [label, weights]
                    outs_static = traced_layer(ins_static)
                    helper.assertEachVar(outs, outs_static)
                    if program is not None:
                        self.assertTrue(
                            is_equal_program(program, traced_layer.program))

                    program = traced_layer.program
                    traced_layer.save_inference_model(
                        './infer_imperative_transformer',
                        feed=list(range(len(ins_static))),
                        fetch=list(range(len(outs_static))))
                else:
                    outs = transformer(enc_inputs, dec_inputs, label, weights)

                dy_sum_cost, dy_avg_cost, dy_predict, dy_token_num = outs

                if i == 0:
                    for param in transformer.parameters():
                        dy_param_init[param.name] = param.numpy()

                dy_avg_cost.backward()
                optimizer.minimize(dy_avg_cost)
                transformer.clear_gradients()

                if i == batch_num - 1:
                    for param in transformer.parameters():
                        dy_param_updated[param.name] = param.numpy()

            dy_avg_cost_value = dy_avg_cost.numpy()
            dy_sum_cost_value = dy_sum_cost.numpy()
            dy_predict_value = dy_predict.numpy()
            dy_token_num_value = dy_token_num.numpy()

            return dy_avg_cost_value, dy_sum_cost_value, dy_predict_value, dy_token_num_value, \
                dy_param_init, dy_param_updated
Exemple #9
0
    def test_mnist_float32(self):
        seed = 90
        epoch_num = 1
        batch_size = 128
        batch_num = 50

        traced_layer = None

        with fluid.dygraph.guard():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed

            mnist = MNIST()
            sgd = SGDOptimizer(learning_rate=1e-3,
                               parameter_list=mnist.parameters())

            batch_py_reader = fluid.io.PyReader(capacity=1)
            batch_py_reader.decorate_sample_list_generator(
                paddle.batch(self.reader_decorator(
                    paddle.dataset.mnist.train()),
                             batch_size=batch_size,
                             drop_last=True),
                places=fluid.CPUPlace())

            mnist.train()
            dy_param_init_value = {}

            helper = DyGraphProgramDescTracerTestHelper(self)
            program = None
            for epoch in range(epoch_num):
                for batch_id, data in enumerate(batch_py_reader()):
                    if batch_id >= batch_num:
                        break
                    img = data[0]
                    dy_x_data = img.numpy()
                    label = data[1]
                    label.stop_gradient = True

                    if batch_id % 10 == 0:
                        cost, traced_layer = TracedLayer.trace(mnist,
                                                               inputs=img)
                        if program is not None:
                            self.assertTrue(program, traced_layer.program)
                        program = traced_layer.program
                        traced_layer.save_inference_model(
                            './infer_imperative_mnist')
                    else:
                        cost = mnist(img)

                    if traced_layer is not None:
                        cost_static = traced_layer([img])
                        helper.assertEachVar(cost, cost_static)

                    loss = fluid.layers.cross_entropy(cost, label)
                    avg_loss = fluid.layers.mean(loss)

                    dy_out = avg_loss.numpy()

                    if epoch == 0 and batch_id == 0:
                        for param in mnist.parameters():
                            dy_param_init_value[param.name] = param.numpy()

                    avg_loss.backward()
                    sgd.minimize(avg_loss)
                    mnist.clear_gradients()

                    dy_param_value = {}
                    for param in mnist.parameters():
                        dy_param_value[param.name] = param.numpy()

        with new_program_scope():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed

            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))

            mnist = MNIST()
            sgd = SGDOptimizer(learning_rate=1e-3)
            train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                        batch_size=batch_size,
                                        drop_last=True)

            img = fluid.layers.data(name='pixel',
                                    shape=[1, 28, 28],
                                    dtype='float32')
            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
            cost = mnist(img)
            loss = fluid.layers.cross_entropy(cost, label)
            avg_loss = fluid.layers.mean(loss)
            sgd.minimize(avg_loss)

            # initialize params and fetch them
            static_param_init_value = {}
            static_param_name_list = []
            for param in mnist.parameters():
                static_param_name_list.append(param.name)

            out = exe.run(fluid.default_startup_program(),
                          fetch_list=static_param_name_list)

            for i in range(len(static_param_name_list)):
                static_param_init_value[static_param_name_list[i]] = out[i]

            for epoch in range(epoch_num):
                for batch_id, data in enumerate(train_reader()):
                    if batch_id >= batch_num:
                        break
                    static_x_data = np.array([
                        x[0].reshape(1, 28, 28) for x in data
                    ]).astype('float32')
                    y_data = np.array([x[1]
                                       for x in data]).astype('int64').reshape(
                                           [batch_size, 1])

                    fetch_list = [avg_loss.name]
                    fetch_list.extend(static_param_name_list)

                    if traced_layer is not None:
                        traced_layer([static_x_data])

                    out = exe.run(fluid.default_main_program(),
                                  feed={
                                      "pixel": static_x_data,
                                      "label": y_data
                                  },
                                  fetch_list=fetch_list)

                    static_param_value = {}
                    static_out = out[0]
                    for i in range(1, len(out)):
                        static_param_value[static_param_name_list[i -
                                                                  1]] = out[i]

        self.assertTrue(np.allclose(dy_x_data.all(), static_x_data.all()))

        for key, value in six.iteritems(static_param_init_value):
            self.assertTrue(np.allclose(value, dy_param_init_value[key]))

        self.assertTrue(np.allclose(static_out, dy_out))

        for key, value in six.iteritems(static_param_value):
            self.assertTrue(np.allclose(value, dy_param_value[key], atol=1e-5))
Exemple #10
0
if __name__ == '__main__':

    exe = fluid.Executor(fluid.CPUPlace())
    # 定义预测过程
    with fluid.dygraph.guard():
        model = HarFcn()
        model.eval()
        # 保存动态图模型
        # fluid.save_dygraph(model.state_dict(), 'lstmfcn')

        # 保存静态图模型
        image = np.random.rand(1, 1, 3, 150).astype(np.float32)
        image = fluid.dygraph.to_variable(image)

        # class paddle.fluid.dygraph.TracedLayer(program, parameters, feed_names, fetch_names)
        out_dygraph, static_layer = TracedLayer.trace(model, inputs=[image])

        # 内部使用Executor运行静态图模型
        out_static_graph = static_layer([image])
        print(out_static_graph[0].shape)  # (2, 10)

        # 将静态图模型保存为预测模型
        static_layer.save_inference_model(dirname='lite')
        print("Saved")

# if __name__ == '__main__':

#     exe = fluid.Executor(fluid.CPUPlace())
#     # 定义预测过程
#     with fluid.dygraph.guard():
#         model = HarFcn()
Exemple #11
0
    state_dict = F.load_dygraph('./model')[0]
    for each in state_dict.keys():
        print(each)
    for key in list(state_dict.keys()):
        if 'encoder_q' in key:
            print(key[10:])
            new_key = key[10:]
            state_dict[new_key] = state_dict[key]
        del state_dict[key]
    for key in list(state_dict.keys()):
        if key == 'classifier.0.weight':
            new_key = 'classifier.weight'
            state_dict[new_key] = state_dict[key]
            del state_dict[key]
        if key == 'classifier.0.bias':
            new_key = 'classifier.bias'
            state_dict[new_key] = state_dict[key]
            del state_dict[key]
        if key == 'classifier.2.weight' or key == 'classifier.2.bias':
            del state_dict[key]
    state_dict['classifier.weight'] = state_dict['classifier.weight'][:1024, :]
    state_dict['classifier.bias'] = state_dict['classifier.bias'][:1024]
    model.load_dict(state_dict)
    sen = np.random.random([16, 64]).astype('int64')
    in_sen = to_variable(sen)
    mask = np.random.random([16, 64]).astype('int64')
    in_mask = to_variable(mask)
    out_dygraph, static_layer = TracedLayer.trace(model,
                                                  inputs=[in_sen, in_mask])
    static_layer.save_inference_model(dirname='./rte50')
Exemple #12
0
class ExampleLayer(fluid.dygraph.Layer):
    def __init__(self):
        super(ExampleLayer, self).__init__()
        self._fc = Linear(3, 10)

    def forward(self, input):
        return self._fc(input)

save_dirname = './saved_infer_model'
in_np = np.random.random([2, 3]).astype('float32')

with fluid.dygraph.guard():
    layer = ExampleLayer()
    in_var = to_variable(in_np)
    out_dygraph, static_layer = TracedLayer.trace(layer, inputs=[in_var])
    print(static_layer.program)

    for i in range(10):
        in_var = to_variable(in_np)
        # print(in_var.name)
        out_var = static_layer([in_var])
        # print(in_var.name)
        print(out_var[0].name)

    static_layer.save_inference_model(save_dirname, feed=[0], fetch=[0])

place = fluid.CPUPlace()
exe = fluid.Executor(place)
program, feed_vars, fetch_vars = fluid.io.load_inference_model(save_dirname,
                                    exe)
Exemple #13
0
    def train_dygraph():
        with fluid.dygraph.guard(get_place()):
            ptb_model = build_model()

            sgd = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay(
                boundaries=bd, values=lr_arr),
                               parameter_list=ptb_model.parameters())

            grad_clip = gradient_clip()

            backward_strategy = fluid.dygraph.BackwardStrategy()
            backward_strategy.sort_sum_gradient = True

            traced_layer = None

            for epoch_id in range(max_epoch):
                total_loss = 0.0
                iters = 0.0

                init_hidden_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')
                init_cell_data = np.zeros(
                    (num_layers, batch_size, hidden_size), dtype='float32')

                train_data_iter = reader.get_data_iter(train_data, batch_size,
                                                       num_steps)

                for batch_id, batch in enumerate(train_data_iter):
                    x_data, y_data = batch
                    x_data = x_data.reshape((-1, num_steps))
                    y_data = y_data.reshape((-1, 1))

                    # x_data = generate_unique_ids()

                    x = to_variable(x_data)
                    y = to_variable(y_data)

                    init_hidden = to_variable(init_hidden_data)
                    init_cell = to_variable(init_cell_data)

                    if traced_layer is None:
                        outs, traced_layer = TracedLayer.trace(
                            ptb_model, [x, y, init_hidden, init_cell])
                    else:
                        outs = ptb_model(x, y, init_hidden, init_cell)

                    dy_loss, last_hidden, last_cell = outs

                    out_loss = dy_loss.numpy()

                    init_hidden_data = last_hidden.numpy()
                    init_cell_data = last_cell.numpy()

                    dy_loss.backward(backward_strategy)
                    sgd.minimize(dy_loss, grad_clip=grad_clip)
                    ptb_model.clear_gradients()

                    total_loss += out_loss
                    iters += num_steps

                    if batch_id > 0 and batch_id % log_interval == 0:
                        ppl = np.exp(total_loss / iters)
                        print(
                            "-- Epoch:[%d]; Batch:[%d]; loss: %.6f; ppl: %.5f"
                            % (epoch_id, batch_id, out_loss, ppl[0]))

                print("one epoch finished", epoch_id)

            traced_layer.save_inference_model(dirname='./infer_dygraph',
                                              fetch=[0])
Exemple #14
0
            image = fluid.dygraph.to_variable(image)
            label = fluid.dygraph.to_variable(label)
            # 执行前向
            test_acc, test_loss = DeepSortNet(image, label)
            total_test_loss += np.mean(test_loss.numpy())
            test_accuracy_manager.update(test_acc.numpy(), BATCH_SIZE)

        print("test accuracy: %.6f , loss %.2f" %
              (test_accuracy_manager.eval(), total_test_loss))
        # 绘制图
        draw_curve(epoch_idx, total_train_loss, accuracy_manager.eval(),
                   total_test_loss, test_accuracy_manager.eval())

    # 保存特征模型
    image = np.random.random([1, 3, 128, 64]).astype('float32')
    image = fluid.dygraph.to_variable(image)
    out_dygraph, static_layer = TracedLayer.trace(DeepSortNet, inputs=[image])
    static_layer.save_inference_model('infer_model_to_feature')

    # 保存预测模型
    for data in train_loader():
        # 投入数据
        image, label = data
        break
    label = label.astype(np.int64)
    image = fluid.dygraph.to_variable(image)
    label = fluid.dygraph.to_variable(label)
    out_dygraph, static_layer = TracedLayer.trace(DeepSortNet,
                                                  inputs=[image, label])
    static_layer.save_inference_model('infer_model_to_eval')