コード例 #1
0
def train_mnist(args):
    epoch_num = args.epoch
    BATCH_SIZE = 64
    place = fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        seed = 33
        np.random.seed(seed)
        fluid.default_startup_program().random_seed = seed
        fluid.default_main_program().random_seed = seed

        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()
        mnist = MNIST("mnist")
        adam = AdamOptimizer(learning_rate=0.001)
        if args.use_data_parallel:
            mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy)

        train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                    batch_size=BATCH_SIZE,
                                    drop_last=True)
        if args.use_data_parallel:
            train_reader = fluid.contrib.reader.distributed_batch_reader(
                train_reader)

        test_reader = paddle.batch(paddle.dataset.mnist.test(),
                                   batch_size=BATCH_SIZE,
                                   drop_last=True)

        for epoch in range(epoch_num):
            for batch_id, data in enumerate(train_reader()):
                dy_x_data = np.array([x[0].reshape(1, 28, 28)
                                      for x in data]).astype('float32')
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape(-1, 1)

                img = to_variable(dy_x_data)
                label = to_variable(y_data)
                label.stop_gradient = True

                cost, acc = mnist(img, label)

                loss = fluid.layers.cross_entropy(cost, label)
                avg_loss = fluid.layers.mean(loss)

                if args.use_data_parallel:
                    avg_loss = mnist.scale_loss(avg_loss)
                    avg_loss.backward()
                    mnist.apply_collective_grads()
                else:
                    avg_loss.backward()

                adam.minimize(avg_loss)
                # save checkpoint
                mnist.clear_gradients()
                if batch_id % 100 == 0:
                    print("Loss at epoch {} step {}: {:}".format(
                        epoch, batch_id, avg_loss.numpy()))
        print("checkpoint saved")
コード例 #2
0
def train_mnist(args, model, tokens=None):
    epoch_num = args.epoch
    BATCH_SIZE = 64

    adam = AdamOptimizer(learning_rate=0.001,
                         parameter_list=model.parameters())

    train_reader = paddle.fluid.io.batch(paddle.dataset.mnist.train(),
                                         batch_size=BATCH_SIZE,
                                         drop_last=True)
    if args.use_data_parallel:
        train_reader = fluid.contrib.reader.distributed_batch_reader(
            train_reader)

    for epoch in range(epoch_num):
        for batch_id, data in enumerate(train_reader()):
            dy_x_data = np.array([x[0].reshape(1, 28, 28)
                                  for x in data]).astype('float32')
            y_data = np.array([x[1]
                               for x in data]).astype('int64').reshape(-1, 1)

            img = to_variable(dy_x_data)
            label = to_variable(y_data)
            label.stop_gradient = True

            cost, acc = model.forward(img, label, tokens=tokens)

            loss = fluid.layers.cross_entropy(cost, label)
            avg_loss = fluid.layers.mean(loss)

            if args.use_data_parallel:
                avg_loss = model.scale_loss(avg_loss)
                avg_loss.backward()
                model.apply_collective_grads()
            else:
                avg_loss.backward()

            adam.minimize(avg_loss)
            # save checkpoint
            model.clear_gradients()
            if batch_id % 1 == 0:
                print("Loss at epoch {} step {}: {:}".format(
                    epoch, batch_id, avg_loss.numpy()))

        model.eval()
        test_acc = test_mnist(model, tokens=tokens)
        model.train()
        print("Loss at epoch {} , acc is: {}".format(epoch, test_acc))

    save_parameters = (not args.use_data_parallel) or (
        args.use_data_parallel
        and fluid.dygraph.parallel.Env().local_rank == 0)
    if save_parameters:
        fluid.save_dygraph(model.state_dict(), "save_temp")
        print("checkpoint saved")
コード例 #3
0
def finetune(args):
    ernie = hub.Module(name="ernie", max_seq_len=args.max_seq_len)
    with fluid.dygraph.guard():
        dataset = hub.dataset.ChnSentiCorp()
        tc = TransformerClassifier(num_classes=dataset.num_labels,
                                   transformer=ernie)
        adam = AdamOptimizer(learning_rate=1e-5,
                             parameter_list=tc.parameters())
        state_dict_path = os.path.join(args.checkpoint_dir,
                                       'dygraph_state_dict')
        if os.path.exists(state_dict_path + '.pdparams'):
            state_dict, _ = fluid.load_dygraph(state_dict_path)
            tc.load_dict(state_dict)

        reader = hub.reader.ClassifyReader(
            dataset=dataset,
            vocab_path=ernie.get_vocab_path(),
            max_seq_len=args.max_seq_len,
            sp_model_path=ernie.get_spm_path(),
            word_dict_path=ernie.get_word_dict_path())
        train_reader = reader.data_generator(batch_size=args.batch_size,
                                             phase='train')

        loss_sum = acc_sum = cnt = 0
        # 执行epoch_num次训练
        for epoch in range(args.num_epoch):
            # 读取训练数据进行训练
            for batch_id, data in enumerate(train_reader()):
                input_ids = np.array(data[0][0]).astype(np.int64)
                position_ids = np.array(data[0][1]).astype(np.int64)
                segment_ids = np.array(data[0][2]).astype(np.int64)
                input_mask = np.array(data[0][3]).astype(np.float32)
                labels = np.array(data[0][4]).astype(np.int64)
                pred = tc(input_ids, position_ids, segment_ids, input_mask)

                acc = fluid.layers.accuracy(pred, to_variable(labels))
                loss = fluid.layers.cross_entropy(pred, to_variable(labels))
                avg_loss = fluid.layers.mean(loss)
                avg_loss.backward()
                # 参数更新
                adam.minimize(avg_loss)

                loss_sum += avg_loss.numpy() * labels.shape[0]
                acc_sum += acc.numpy() * labels.shape[0]
                cnt += labels.shape[0]
                if batch_id % args.log_interval == 0:
                    print('epoch {}: loss {}, acc {}'.format(
                        epoch, loss_sum / cnt, acc_sum / cnt))
                    loss_sum = acc_sum = cnt = 0

                if batch_id % args.save_interval == 0:
                    state_dict = tc.state_dict()
                    fluid.save_dygraph(state_dict, state_dict_path)
コード例 #4
0
def finetune(args):
    with fluid.dygraph.guard():
        resnet50_vd_10w = hub.Module(name="resnet50_vd_10w")
        dataset = hub.dataset.Flowers()
        resnet = ResNet50(num_classes=dataset.num_labels,
                          backbone=resnet50_vd_10w)
        adam = AdamOptimizer(learning_rate=0.001,
                             parameter_list=resnet.parameters())
        state_dict_path = os.path.join(args.checkpoint_dir,
                                       'dygraph_state_dict')
        if os.path.exists(state_dict_path + '.pdparams'):
            state_dict, _ = fluid.load_dygraph(state_dict_path)
            resnet.load_dict(state_dict)

        reader = hub.reader.ImageClassificationReader(
            image_width=resnet50_vd_10w.get_expected_image_width(),
            image_height=resnet50_vd_10w.get_expected_image_height(),
            images_mean=resnet50_vd_10w.get_pretrained_images_mean(),
            images_std=resnet50_vd_10w.get_pretrained_images_std(),
            dataset=dataset)
        train_reader = reader.data_generator(batch_size=args.batch_size,
                                             phase='train')

        loss_sum = acc_sum = cnt = 0
        # 执行epoch_num次训练
        for epoch in range(args.num_epoch):
            # 读取训练数据进行训练
            for batch_id, data in enumerate(train_reader()):
                imgs = np.array(data[0][0])
                labels = np.array(data[0][1])

                pred = resnet(imgs)
                acc = fluid.layers.accuracy(pred, to_variable(labels))
                loss = fluid.layers.cross_entropy(pred, to_variable(labels))
                avg_loss = fluid.layers.mean(loss)
                avg_loss.backward()
                # 参数更新
                adam.minimize(avg_loss)

                loss_sum += avg_loss.numpy() * imgs.shape[0]
                acc_sum += acc.numpy() * imgs.shape[0]
                cnt += imgs.shape[0]
                if batch_id % args.log_interval == 0:
                    print('epoch {}: loss {}, acc {}'.format(
                        epoch, loss_sum / cnt, acc_sum / cnt))
                    loss_sum = acc_sum = cnt = 0

                if batch_id % args.save_interval == 0:
                    state_dict = resnet.state_dict()
                    fluid.save_dygraph(state_dict, state_dict_path)
コード例 #5
0
    def test_train(self):

        main_prog = fluid.Program()
        with fluid.program_guard(main_prog):
            mnist = MNIST()
            adam = AdamOptimizer(learning_rate=0.001,
                                 parameter_list=mnist.parameters())

            exe = fluid.Executor(self.place)
            start = time()

            img = fluid.data(name='img',
                             shape=[None, 1, 28, 28],
                             dtype='float32')
            label = fluid.data(name='label', shape=[None, 1], dtype='int64')
            label.stop_gradient = True

            prediction, acc, avg_loss = mnist(img, label)
            adam.minimize(avg_loss)
        exe.run(fluid.default_startup_program())

        for epoch in range(self.epoch_num):
            for batch_id, data in enumerate(self.train_reader()):
                dy_x_data = np.array([x[0].reshape(1, 28, 28)
                                      for x in data]).astype('float32')
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape(-1, 1)

                out = exe.run(main_prog,
                              fetch_list=[avg_loss, acc],
                              feed={
                                  'img': dy_x_data,
                                  'label': y_data
                              })
                if batch_id % 100 == 0:
                    print(
                        "Loss at epoch {} step {}: loss: {:}, acc: {}, cost: {}"
                        .format(epoch, batch_id, np.array(out[0]),
                                np.array(out[1]),
                                time() - start))
                    if batch_id == 300:
                        # The accuracy of mnist should converge over 0.9 after 300 batch.
                        accuracy = np.array(out[1])
                        self.assertGreater(
                            accuracy,
                            0.9,
                            msg=
                            "The accuracy {} of mnist should converge over 0.9 after 300 batch."
                            .format(accuracy))
                        break
コード例 #6
0
    def train(self, to_static=False):
        prog_trans = ProgramTranslator()
        prog_trans.enable(to_static)

        loss_data = []
        with fluid.dygraph.guard(self.place):
            fluid.default_main_program().random_seed = SEED
            fluid.default_startup_program().random_seed = SEED
            mnist = MNIST()
            adam = AdamOptimizer(learning_rate=0.001,
                                 parameter_list=mnist.parameters())

            for epoch in range(self.epoch_num):
                start = time()
                for batch_id, data in enumerate(self.train_reader()):
                    dy_x_data = np.array([
                        x[0].reshape(1, 28, 28) for x in data
                    ]).astype('float32')
                    y_data = np.array([x[1] for x in data
                                       ]).astype('int64').reshape(-1, 1)

                    img = to_variable(dy_x_data)
                    label = to_variable(y_data)

                    label.stop_gradient = True
                    prediction, acc, avg_loss = mnist(img, label=label)
                    avg_loss.backward()

                    adam.minimize(avg_loss)
                    loss_data.append(avg_loss.numpy()[0])
                    # save checkpoint
                    mnist.clear_gradients()
                    if batch_id % 10 == 0:
                        print(
                            "Loss at epoch {} step {}: loss: {:}, acc: {}, cost: {}"
                            .format(epoch, batch_id, avg_loss.numpy(),
                                    acc.numpy(),
                                    time() - start))
                        start = time()
                    if batch_id == 50:
                        mnist.eval()
                        prediction, acc, avg_loss = mnist(img, label)
                        loss_data.append(avg_loss.numpy()[0])
                        # new save load check
                        self.check_jit_save_load(mnist, [dy_x_data], [img],
                                                 to_static, prediction)
                        break
        return loss_data
コード例 #7
0
def train_mnist(args):
    epoch_num = args.epoch
    BATCH_SIZE = 64
    seed = 33
    np.random.seed(seed)
    start_prog = fluid.Program()
    main_prog = fluid.Program()
    start_prog.random_seed = seed
    main_prog.random_seed = seed
    with fluid.program_guard(main_prog, start_prog):
        exe = fluid.Executor(fluid.CPUPlace())
        mnist = MNIST("mnist")
        adam = AdamOptimizer(learning_rate=0.001)
        train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                    batch_size=BATCH_SIZE,
                                    drop_last=True)
        img = fluid.layers.data(name='pixel',
                                shape=[1, 28, 28],
                                dtype='float32')
        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
        cost = mnist(img)
        loss = fluid.layers.cross_entropy(cost, label)
        avg_loss = fluid.layers.mean(loss)
        adam.minimize(avg_loss)
        out = exe.run(fluid.default_startup_program())
        for epoch in range(epoch_num):
            for batch_id, data in enumerate(train_reader()):
                static_x_data = np.array(
                    [x[0].reshape(1, 28, 28) for x in data]).astype('float32')
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape([BATCH_SIZE, 1])

                fetch_list = [avg_loss.name]
                out = exe.run(fluid.default_main_program(),
                              feed={
                                  "pixel": static_x_data,
                                  "label": y_data
                              },
                              fetch_list=fetch_list)

                static_out = out[0]

                if batch_id % 100 == 0:
                    print("epoch: {}, batch_id: {}, loss: {}".format(
                        epoch, batch_id, static_out))
コード例 #8
0
ファイル: test_save_load.py プロジェクト: sandyhouse/Paddle
    def test_save_load_same_result(self):
        program_translator = ProgramTranslator()
        x_data = np.random.randn(30, 10, 32).astype('float32')
        batch_num = 3

        with fluid.dygraph.guard(place):

            program_translator.enable(True)
            x = fluid.dygraph.to_variable(x_data)
            net = Linear(32, 64)
            adam = AdamOptimizer(learning_rate=0.1,
                                 parameter_list=net.parameters())

            for i in range(batch_num):
                static_out, static_loss = net(x)
                # Update parameters
                static_loss.backward()
                adam.minimize(static_loss)
                net.clear_gradients()
            # Save parameters

            fluid.save_dygraph(net.state_dict(), self.model_path)
            # minimize() will update parameter, call net() to get output and avg_loss.
            # Switch into eval mode.
            net.eval()
            static_out, static_loss = net(x)

        # load parameters into dygraph
        with fluid.dygraph.guard(place):
            dygraph_net = Linear(32, 64)

            # Load parameters
            model_dict, _ = fluid.load_dygraph(self.model_path)
            dygraph_net.set_dict(model_dict)
            # Switch into eval mode.
            dygraph_net.eval()

            x = fluid.dygraph.to_variable(x_data)
            # predict output
            program_translator.enable(False)
            dygraph_out, dygraph_loss = dygraph_net(x)

        self.assertTrue(np.allclose(dygraph_out.numpy(), static_out.numpy()))
        self.assertTrue(np.allclose(dygraph_loss.numpy(), static_loss.numpy()))
コード例 #9
0
    def func_qat(self):
        self.set_vars()

        imperative_qat = ImperativeQuantAware(
            weight_quantize_type=self.weight_quantize_type,
            activation_quantize_type=self.activation_quantize_type,
            fuse_conv_bn=self.fuse_conv_bn)

        with fluid.dygraph.guard():
            # For CI coverage
            conv1 = Conv2D(
                in_channels=3,
                out_channels=2,
                kernel_size=3,
                stride=1,
                padding=1,
                padding_mode='replicate')
            quant_conv1 = QuantizedConv2D(conv1)
            data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32')
            quant_conv1(fluid.dygraph.to_variable(data))

            conv_transpose = Conv2DTranspose(4, 6, (3, 3))
            quant_conv_transpose = QuantizedConv2DTranspose(conv_transpose)
            x_var = paddle.uniform(
                (2, 4, 8, 8), dtype='float32', min=-1.0, max=1.0)
            quant_conv_transpose(x_var)

            seed = 1
            np.random.seed(seed)
            fluid.default_main_program().random_seed = seed
            fluid.default_startup_program().random_seed = seed

            lenet = ImperativeLenet()
            lenet = fix_model_dict(lenet)
            imperative_qat.quantize(lenet)
            adam = AdamOptimizer(
                learning_rate=0.001, parameter_list=lenet.parameters())

            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=32, drop_last=True)
            test_reader = paddle.batch(
                paddle.dataset.mnist.test(), batch_size=32)

            epoch_num = 1
            for epoch in range(epoch_num):
                lenet.train()
                for batch_id, data in enumerate(train_reader()):
                    x_data = np.array([x[0].reshape(1, 28, 28)
                                       for x in data]).astype('float32')
                    y_data = np.array(
                        [x[1] for x in data]).astype('int64').reshape(-1, 1)

                    img = fluid.dygraph.to_variable(x_data)
                    label = fluid.dygraph.to_variable(y_data)
                    out = lenet(img)
                    acc = fluid.layers.accuracy(out, label)
                    loss = fluid.layers.cross_entropy(out, label)
                    avg_loss = fluid.layers.mean(loss)
                    avg_loss.backward()
                    adam.minimize(avg_loss)
                    lenet.clear_gradients()
                    if batch_id % 100 == 0:
                        _logger.info(
                            "Train | At epoch {} step {}: loss = {:}, acc= {:}".
                            format(epoch, batch_id,
                                   avg_loss.numpy(), acc.numpy()))
                    if batch_id == 500:  # For shortening CI time
                        break

                lenet.eval()
                eval_acc_top1_list = []
                for batch_id, data in enumerate(test_reader()):
                    x_data = np.array([x[0].reshape(1, 28, 28)
                                       for x in data]).astype('float32')
                    y_data = np.array(
                        [x[1] for x in data]).astype('int64').reshape(-1, 1)

                    img = fluid.dygraph.to_variable(x_data)
                    label = fluid.dygraph.to_variable(y_data)

                    out = lenet(img)
                    acc_top1 = fluid.layers.accuracy(
                        input=out, label=label, k=1)
                    acc_top5 = fluid.layers.accuracy(
                        input=out, label=label, k=5)

                    if batch_id % 100 == 0:
                        eval_acc_top1_list.append(float(acc_top1.numpy()))
                        _logger.info(
                            "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}".
                            format(epoch, batch_id,
                                   acc_top1.numpy(), acc_top5.numpy()))

                # check eval acc
                eval_acc_top1 = sum(eval_acc_top1_list) / len(
                    eval_acc_top1_list)
                print('eval_acc_top1', eval_acc_top1)
                self.assertTrue(
                    eval_acc_top1 > 0.9,
                    msg="The test acc {%f} is less than 0.9." % eval_acc_top1)

            # test the correctness of `paddle.jit.save`
            data = next(test_reader())
            test_data = np.array([x[0].reshape(1, 28, 28)
                                  for x in data]).astype('float32')
            y_data = np.array(
                [x[1] for x in data]).astype('int64').reshape(-1, 1)
            test_img = fluid.dygraph.to_variable(test_data)
            label = fluid.dygraph.to_variable(y_data)
            lenet.eval()
            fp32_out = lenet(test_img)
            fp32_acc = fluid.layers.accuracy(fp32_out, label).numpy()

        with tempfile.TemporaryDirectory(prefix="qat_save_path_") as tmpdir:
            # save inference quantized model
            imperative_qat.save_quantized_model(
                layer=lenet,
                path=os.path.join(tmpdir, "lenet"),
                input_spec=[
                    paddle.static.InputSpec(
                        shape=[None, 1, 28, 28], dtype='float32')
                ],
                onnx_format=self.onnx_format)
            print('Quantized model saved in %s' % tmpdir)

            if core.is_compiled_with_cuda():
                place = core.CUDAPlace(0)
            else:
                place = core.CPUPlace()
            exe = fluid.Executor(place)
            [inference_program, feed_target_names,
             fetch_targets] = fluid.io.load_inference_model(
                 dirname=tmpdir,
                 executor=exe,
                 model_filename="lenet" + INFER_MODEL_SUFFIX,
                 params_filename="lenet" + INFER_PARAMS_SUFFIX)
            quant_out, = exe.run(inference_program,
                                 feed={feed_target_names[0]: test_data},
                                 fetch_list=fetch_targets)
            paddle.disable_static()
            quant_out = fluid.dygraph.to_variable(quant_out)
            quant_acc = fluid.layers.accuracy(quant_out, label).numpy()
            paddle.enable_static()
            delta_value = fp32_acc - quant_acc
            self.assertLess(delta_value, self.diff_threshold)
コード例 #10
0
def finetune(args):
    module = hub.Module(name="ernie", max_seq_len=args.max_seq_len)
    # Use the appropriate tokenizer to preprocess the data set
    # For ernie_tiny, it will do word segmentation to get subword. More details: https://www.jiqizhixin.com/articles/2019-11-06-9
    if module.name == "ernie_tiny":
        tokenizer = hub.ErnieTinyTokenizer(
            vocab_file=module.get_vocab_path(),
            spm_path=module.get_spm_path(),
            word_dict_path=module.get_word_dict_path(),
        )
    else:
        tokenizer = hub.BertTokenizer(vocab_file=module.get_vocab_path())
    dataset = hub.dataset.ChnSentiCorp(tokenizer=tokenizer,
                                       max_seq_len=args.max_seq_len)

    with fluid.dygraph.guard():
        tc = TransformerClassifier(num_classes=dataset.num_labels,
                                   transformer=module)
        adam = AdamOptimizer(learning_rate=1e-5,
                             parameter_list=tc.parameters())
        state_dict_path = os.path.join(args.checkpoint_dir,
                                       'dygraph_state_dict')
        if os.path.exists(state_dict_path + '.pdparams'):
            state_dict, _ = fluid.load_dygraph(state_dict_path)
            tc.load_dict(state_dict)

        loss_sum = acc_sum = cnt = 0
        for epoch in range(args.num_epoch):
            for batch_id, data in enumerate(
                    dataset.batch_records_generator(
                        phase="train",
                        batch_size=args.batch_size,
                        shuffle=True,
                        pad_to_batch_max_seq_len=False)):
                batch_size = len(data["input_ids"])
                input_ids = np.array(data["input_ids"]).astype(
                    np.int64).reshape([batch_size, -1, 1])
                position_ids = np.array(data["position_ids"]).astype(
                    np.int64).reshape([batch_size, -1, 1])
                segment_ids = np.array(data["segment_ids"]).astype(
                    np.int64).reshape([batch_size, -1, 1])
                input_mask = np.array(data["input_mask"]).astype(
                    np.float32).reshape([batch_size, -1, 1])
                labels = np.array(data["label"]).astype(np.int64).reshape(
                    [batch_size, 1])
                pred = tc(input_ids, position_ids, segment_ids, input_mask)

                acc = fluid.layers.accuracy(pred, to_variable(labels))
                loss = fluid.layers.cross_entropy(pred, to_variable(labels))
                avg_loss = fluid.layers.mean(loss)
                avg_loss.backward()
                adam.minimize(avg_loss)

                loss_sum += avg_loss.numpy() * labels.shape[0]
                acc_sum += acc.numpy() * labels.shape[0]
                cnt += labels.shape[0]
                if batch_id % args.log_interval == 0:
                    print('epoch {}: loss {}, acc {}'.format(
                        epoch, loss_sum / cnt, acc_sum / cnt))
                    loss_sum = acc_sum = cnt = 0

                if batch_id % args.save_interval == 0:
                    state_dict = tc.state_dict()
                    fluid.save_dygraph(state_dict, state_dict_path)
コード例 #11
0
    def test_out_scale_acc(self):
        def _build_static_lenet(main, startup, is_test=False, seed=1000):
            with fluid.unique_name.guard():
                with fluid.program_guard(main, startup):
                    main.random_seed = seed
                    startup.random_seed = seed
                    img = fluid.layers.data(name='image',
                                            shape=[1, 28, 28],
                                            dtype='float32')
                    label = fluid.layers.data(name='label',
                                              shape=[1],
                                              dtype='int64')
                    prediction = StaticLenet(img)
                    if not is_test:
                        loss = fluid.layers.cross_entropy(input=prediction,
                                                          label=label)
                        avg_loss = fluid.layers.mean(loss)
                    else:
                        avg_loss = prediction
            return img, label, avg_loss

        reader = paddle.batch(paddle.dataset.mnist.test(),
                              batch_size=32,
                              drop_last=True)
        weight_quantize_type = 'abs_max'
        activation_quant_type = 'moving_average_abs_max'
        param_init_map = {}
        seed = 1000
        lr = 0.1
        dynamic_out_scale_list = []
        static_out_scale_list = []

        # imperative train
        _logger.info(
            "--------------------------dynamic graph qat--------------------------"
        )
        imperative_out_scale = ImperativeQuantAware()

        with fluid.dygraph.guard():
            np.random.seed(seed)
            fluid.default_main_program().random_seed = seed
            fluid.default_startup_program().random_seed = seed
            lenet = ImperativeLenet()
            fixed_state = {}
            for name, param in lenet.named_parameters():
                p_shape = param.numpy().shape
                p_value = param.numpy()
                if name.endswith("bias"):
                    value = np.zeros_like(p_value).astype('float32')
                else:
                    value = np.random.normal(loc=0.0,
                                             scale=0.01,
                                             size=np.product(p_shape)).reshape(
                                                 p_shape).astype('float32')
                fixed_state[name] = value
                param_init_map[param.name] = value
            lenet.set_dict(fixed_state)
            imperative_out_scale.quantize(lenet)
            adam = AdamOptimizer(learning_rate=lr,
                                 parameter_list=lenet.parameters())
            dynamic_loss_rec = []
            lenet.train()
            for batch_id, data in enumerate(reader()):
                x_data = np.array([x[0].reshape(1, 28, 28)
                                   for x in data]).astype('float32')
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape(-1, 1)

                img = fluid.dygraph.to_variable(x_data)
                label = fluid.dygraph.to_variable(y_data)

                out = lenet(img)
                loss = fluid.layers.cross_entropy(out, label)
                avg_loss = fluid.layers.mean(loss)
                avg_loss.backward()
                adam.minimize(avg_loss)
                lenet.clear_gradients()
                dynamic_loss_rec.append(avg_loss.numpy()[0])
                if batch_id % 100 == 0:
                    _logger.info('{}: {}'.format('loss', avg_loss.numpy()))

            lenet.eval()

        path = "./dynamic_outscale_infer_model/lenet"
        dynamic_save_dir = "./dynamic_outscale_infer_model"

        imperative_out_scale.save_quantized_model(
            layer=lenet,
            path=path,
            input_spec=[
                paddle.static.InputSpec(shape=[None, 1, 28, 28],
                                        dtype='float32')
            ])

        _logger.info(
            "--------------------------static graph qat--------------------------"
        )
        static_loss_rec = []
        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = fluid.Executor(place)

        main = fluid.Program()
        infer = fluid.Program()
        startup = fluid.Program()
        static_img, static_label, static_loss = _build_static_lenet(
            main, startup, False, seed)
        infer_img, _, infer_pre = _build_static_lenet(infer, startup, True,
                                                      seed)
        with fluid.unique_name.guard():
            with fluid.program_guard(main, startup):
                opt = AdamOptimizer(learning_rate=lr)
                opt.minimize(static_loss)

        scope = core.Scope()
        with fluid.scope_guard(scope):
            exe.run(startup)
        for param in main.all_parameters():
            param_tensor = scope.var(param.name).get_tensor()
            param_tensor.set(param_init_map[param.name], place)
        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
        infer_graph = IrGraph(core.Graph(infer.desc), for_test=True)
        transform_pass = QuantizationTransformPass(
            scope=scope,
            place=place,
            activation_quantize_type=activation_quant_type,
            weight_quantize_type=weight_quantize_type,
            quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul'])
        transform_pass.apply(main_graph)
        transform_pass.apply(infer_graph)
        outscale_pass = OutScaleForTrainingPass(scope=scope, place=place)
        outscale_pass.apply(main_graph)
        build_strategy = fluid.BuildStrategy()
        build_strategy.fuse_all_reduce_ops = False
        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
            loss_name=static_loss.name, build_strategy=build_strategy)

        feeder = fluid.DataFeeder(feed_list=[static_img, static_label],
                                  place=place)
        with fluid.scope_guard(scope):
            for batch_id, data in enumerate(reader()):
                loss_v, = exe.run(binary,
                                  feed=feeder.feed(data),
                                  fetch_list=[static_loss])
                static_loss_rec.append(loss_v[0])
                if batch_id % 100 == 0:
                    _logger.info('{}: {}'.format('loss', loss_v))
        scale_inference_pass = OutScaleForInferencePass(scope=scope)
        scale_inference_pass.apply(infer_graph)

        save_program = infer_graph.to_program()
        static_save_dir = "./static_outscale_infer_model"
        with fluid.scope_guard(scope):
            fluid.io.save_inference_model(
                dirname=static_save_dir,
                feeded_var_names=[infer_img.name],
                target_vars=[infer_pre],
                executor=exe,
                main_program=save_program,
                model_filename="lenet" + INFER_MODEL_SUFFIX,
                params_filename="lenet" + INFER_PARAMS_SUFFIX)

        rtol = 1e-05
        atol = 1e-08
        for i, (loss_d,
                loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)):
            diff = np.abs(loss_d - loss_s)
            if diff > (atol + rtol * np.abs(loss_s)):
                _logger.info(
                    "diff({}) at {}, dynamic loss = {}, static loss = {}".
                    format(diff, i, loss_d, loss_s))
                break

        self.assertTrue(np.allclose(np.array(dynamic_loss_rec),
                                    np.array(static_loss_rec),
                                    rtol=rtol,
                                    atol=atol,
                                    equal_nan=True),
                        msg='Failed to do the imperative qat.')

        # load dynamic model
        [dynamic_inference_program, feed_target_names,
         fetch_targets] = (fluid.io.load_inference_model(
             dirname=dynamic_save_dir,
             executor=exe,
             model_filename="lenet" + INFER_MODEL_SUFFIX,
             params_filename="lenet" + INFER_PARAMS_SUFFIX))
        # load static model
        [static_inference_program, feed_target_names,
         fetch_targets] = (fluid.io.load_inference_model(
             dirname=static_save_dir,
             executor=exe,
             model_filename="lenet" + INFER_MODEL_SUFFIX,
             params_filename="lenet" + INFER_PARAMS_SUFFIX))

        dynamic_ops = dynamic_inference_program.global_block().ops
        static_ops = static_inference_program.global_block().ops

        for op in dynamic_ops[:]:
            if op.type == "flatten2" or 'fake' in op.type:
                dynamic_ops.remove(op)

        for op in static_ops[:]:
            if 'fake' in op.type:
                static_ops.remove(op)

        for i in range(len(dynamic_ops)):
            if dynamic_ops[i].has_attr("out_threshold"):
                self.assertTrue(dynamic_ops[i].type == static_ops[i].type)
                self.assertTrue(dynamic_ops[i].attr("out_threshold") ==
                                static_ops[i].attr("out_threshold"))
コード例 #12
0
def finetune(args):
    module = hub.Module(name="ernie", max_seq_len=args.max_seq_len)
    # Use the appropriate tokenizer to preprocess the data set
    tokenizer = hub.BertTokenizer(vocab_file=module.get_vocab_path())
    dataset = hub.dataset.MSRA_NER(tokenizer=tokenizer,
                                   max_seq_len=args.max_seq_len)

    with fluid.dygraph.guard():
        ts = TransformerSeqLabeling(num_classes=dataset.num_labels,
                                    transformer=module)
        adam = AdamOptimizer(learning_rate=1e-5,
                             parameter_list=ts.parameters())
        state_dict_path = os.path.join(args.checkpoint_dir,
                                       'dygraph_state_dict')
        if os.path.exists(state_dict_path + '.pdparams'):
            state_dict, _ = fluid.load_dygraph(state_dict_path)
            ts.load_dict(state_dict)

        loss_sum = total_infer = total_label = total_correct = cnt = 0
        for epoch in range(args.num_epoch):
            for batch_id, data in enumerate(
                    dataset.batch_records_generator(
                        phase="train",
                        batch_size=args.batch_size,
                        shuffle=True,
                        pad_to_batch_max_seq_len=False)):
                batch_size = len(data["input_ids"])
                input_ids = np.array(data["input_ids"]).astype(
                    np.int64).reshape([batch_size, -1, 1])
                position_ids = np.array(data["position_ids"]).astype(
                    np.int64).reshape([batch_size, -1, 1])
                segment_ids = np.array(data["segment_ids"]).astype(
                    np.int64).reshape([batch_size, -1, 1])
                input_mask = np.array(data["input_mask"]).astype(
                    np.float32).reshape([batch_size, -1, 1])
                labels = np.array(data["label"]).astype(np.int64).reshape(
                    -1, 1)
                seq_len = np.array(data["seq_len"]).astype(np.int64).reshape(
                    -1, 1)
                pred, ret_infers = ts(input_ids, position_ids, segment_ids,
                                      input_mask)

                loss = fluid.layers.cross_entropy(pred, to_variable(labels))
                avg_loss = fluid.layers.mean(loss)
                avg_loss.backward()
                adam.minimize(avg_loss)

                loss_sum += avg_loss.numpy() * labels.shape[0]
                label_num, infer_num, correct_num = chunk_eval(
                    labels, ret_infers.numpy(), seq_len, dataset.num_labels, 1)
                cnt += labels.shape[0]

                total_infer += infer_num
                total_label += label_num
                total_correct += correct_num

                if batch_id % args.log_interval == 0:
                    precision, recall, f1 = calculate_f1(
                        total_label, total_infer, total_correct)
                    print('epoch {}: loss {}, f1 {} recall {} precision {}'.
                          format(epoch, loss_sum / cnt, f1, recall, precision))
                    loss_sum = total_infer = total_label = total_correct = cnt = 0

                if batch_id % args.save_interval == 0:
                    state_dict = ts.state_dict()
                    fluid.save_dygraph(state_dict, state_dict_path)
コード例 #13
0
def train_mnist(args):
    epoch_num = 5
    BATCH_SIZE = 256

    place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
        if args.use_data_parallel else fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()
        mnist = MNIST("mnist")
        adam = AdamOptimizer(learning_rate=0.001)
        if args.use_data_parallel:
            mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy)

        if args.use_data_parallel:
            train_reader = fluid.contrib.reader.distributed_sampler(
                paddle.dataset.mnist.train(), batch_size=BATCH_SIZE)
        else:
            train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                        batch_size=BATCH_SIZE,
                                        drop_last=True)

        test_reader = paddle.batch(paddle.dataset.mnist.test(),
                                   batch_size=BATCH_SIZE,
                                   drop_last=True)

        for epoch in range(epoch_num):
            # define eval
            batch_time = AverageMeter('Time', ':6.3f')
            data_time = AverageMeter('Data', ':6.3f')
            losses = AverageMeter('Loss', ':.4e')
            progress = ProgressMeter(len(list(train_reader())) - 1,
                                     batch_time,
                                     data_time,
                                     losses,
                                     prefix="epoch: [{}]".format(epoch))
            end = Tools.time()

            for batch_id, data in enumerate(train_reader()):
                data_time.update(Tools.time() - end)
                dy_x_data = np.array([x[0].reshape(1, 28, 28)
                                      for x in data]).astype('float32')
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape(-1, 1)

                img = to_variable(dy_x_data)
                label = to_variable(y_data)
                label.stop_gradient = True

                cost, acc = mnist(img, label)

                loss = fluid.layers.cross_entropy(cost, label)
                avg_loss = fluid.layers.mean(loss)

                if args.use_data_parallel:
                    avg_loss = mnist.scale_loss(avg_loss)
                    avg_loss.backward()
                    mnist.apply_collective_grads()
                else:
                    avg_loss.backward()

                adam.minimize(avg_loss)
                # save checkpoint
                mnist.clear_gradients()
                batch_time.update(Tools.time() - end)
                dy_out = avg_loss.numpy()[0]
                losses.update(dy_out, BATCH_SIZE)
                if batch_id % 10 == 0:
                    progress.print(batch_id)
                end = Tools.time()

            mnist.eval()
            test_cost, test_acc = test_mnist(test_reader, mnist, BATCH_SIZE)
            mnist.train()
            print("Loss at epoch {} , Test avg_loss is: {}, acc is: {}".format(
                epoch, test_cost, test_acc))

        fluid.dygraph.save_persistables(mnist.state_dict(), "save_dir")
        print("checkpoint saved")

        inference_mnist()
コード例 #14
0
    train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                batch_size=BATCH_SIZE,
                                drop_last=True)
    train_reader = fluid.contrib.reader.distributed_batch_reader(train_reader)

    for epoch in range(epoch_num):
        for batch_id, data in enumerate(train_reader()):
            dy_x_data = np.array([x[0].reshape(1, 28, 28)
                                  for x in data]).astype('float32')
            y_data = np.array([x[1]
                               for x in data]).astype('int64').reshape(-1, 1)

            img = to_variable(dy_x_data)
            label = to_variable(y_data)
            label.stop_gradient = True

            cost, acc = mnist(img, label)

            loss = fluid.layers.cross_entropy(cost, label)
            avg_loss = fluid.layers.mean(loss)

            avg_loss = mnist.scale_loss(avg_loss)
            avg_loss.backward()
            mnist.apply_collective_grads()

            sgd.minimize(avg_loss)
            mnist.clear_gradients()
            if batch_id % 100 == 0 and batch_id is not 0:
                print("epoch: {}, batch_id: {}, loss is: {}".format(
                    epoch, batch_id, avg_loss.numpy()))
コード例 #15
0
def train():
    place = fluid.CPUPlace()
    with fluid.dygraph.guard(place):

        pix2pix_gan = build_pix2pix_gan('pix2pix_gan')

        discriminator_optimizer = AdamOptimizer(learning_rate=2e-4, beta1=0.5)
        generator_optimizer = AdamOptimizer(learning_rate=2e-4, beta1=0.5)

        real_dataset, input_dataset = prepare_dataset(data_dir, is_train=True)
        real_test, input_test = prepare_dataset(data_dir, is_train=False)

        epoch = 0

        if os.path.exists('./model'):
            print('load prev checkpoint...')
            model, _ = fluid.dygraph.load_persistables('./model')
            pix2pix_gan.load_dict(model)
            checkpoint = open("./checkpoint.txt", "r")
            epoch = int(checkpoint.read()) + 1
            checkpoint.close()

        while epoch < num_epochs:

            print("Epoch id: ", epoch)

            total_loss_gen = 0
            total_loss_disc = 0  

            seed = np.random.randint(1000)
            np.random.seed(seed)
            np.random.shuffle(real_dataset)
            np.random.seed(seed)
            np.random.shuffle(input_dataset)

            for tar, inpt in batch_generator(real_dataset, input_dataset, batch_size): 

                target = to_variable(tar)
                input_image = to_variable(inpt)

                gen_loss, disc_generated = pix2pix_gan(input_image, target, None, True)
                gen_loss.backward()
                vars_G = []
                for parm in pix2pix_gan.parameters():
                    if parm.name[:43] == 'pix2pix_gan/build_pix2pix_gan_0/generator_0':
                        vars_G.append(parm)
                generator_optimizer.minimize(gen_loss, parameter_list=vars_G)
                pix2pix_gan.clear_gradients()

                disc_loss = pix2pix_gan(input_image, target, disc_generated, False)
                disc_loss.backward()
                vars_D = []
                for parm in pix2pix_gan.parameters():
                    if parm.name[:47] == 'pix2pix_gan/build_pix2pix_gan_0/discriminator_0':
                        vars_D.append(parm)
                discriminator_optimizer.minimize(disc_loss, parameter_list=vars_D)
                pix2pix_gan.clear_gradients()

                total_loss_gen += gen_loss.numpy()[0]
                total_loss_disc += disc_loss.numpy()[0]

            print("Total generator loss: ", total_loss_gen)
            print("Total discriminator loss: ", total_loss_disc)

            if epoch % 10 == 0:
                # save checkpoint
                fluid.dygraph.save_persistables(pix2pix_gan.state_dict(), "./model")
                checkpoint = open("./checkpoint.txt", "w")
                checkpoint.write(str(epoch))
                checkpoint.close()

                input_image = to_variable(input_test)
                generate_and_save_images(pix2pix_gan, input_image, epoch)

            epoch += 1
コード例 #16
0
def finetune(args):
    ernie = hub.Module(name="ernie", max_seq_len=args.max_seq_len)
    with fluid.dygraph.guard():
        dataset = hub.dataset.MSRA_NER()
        ts = TransformerSequenceLabelLayer(
            num_classes=dataset.num_labels, transformer=ernie)
        adam = AdamOptimizer(learning_rate=1e-5, parameter_list=ts.parameters())
        state_dict_path = os.path.join(args.checkpoint_dir,
                                       'dygraph_state_dict')
        if os.path.exists(state_dict_path + '.pdparams'):
            state_dict, _ = fluid.load_dygraph(state_dict_path)
            ts.load_dict(state_dict)

        reader = hub.reader.SequenceLabelReader(
            dataset=dataset,
            vocab_path=ernie.get_vocab_path(),
            max_seq_len=args.max_seq_len,
            sp_model_path=ernie.get_spm_path(),
            word_dict_path=ernie.get_word_dict_path())
        train_reader = reader.data_generator(
            batch_size=args.batch_size, phase='train')

        loss_sum = total_infer = total_label = total_correct = cnt = 0
        # 执行epoch_num次训练
        for epoch in range(args.num_epoch):
            # 读取训练数据进行训练
            for batch_id, data in enumerate(train_reader()):
                input_ids = np.array(data[0][0]).astype(np.int64)
                position_ids = np.array(data[0][1]).astype(np.int64)
                segment_ids = np.array(data[0][2]).astype(np.int64)
                input_mask = np.array(data[0][3]).astype(np.float32)
                labels = np.array(data[0][4]).astype(np.int64).reshape(-1, 1)
                seq_len = np.squeeze(
                    np.array(data[0][5]).astype(np.int64), axis=1)
                pred, ret_infers = ts(input_ids, position_ids, segment_ids,
                                      input_mask)

                loss = fluid.layers.cross_entropy(pred, to_variable(labels))
                avg_loss = fluid.layers.mean(loss)
                avg_loss.backward()
                # 参数更新
                adam.minimize(avg_loss)

                loss_sum += avg_loss.numpy() * labels.shape[0]
                label_num, infer_num, correct_num = chunk_eval(
                    labels, ret_infers.numpy(), seq_len, dataset.num_labels, 1)
                cnt += labels.shape[0]

                total_infer += infer_num
                total_label += label_num
                total_correct += correct_num

                if batch_id % args.log_interval == 0:
                    precision, recall, f1 = calculate_f1(
                        total_label, total_infer, total_correct)
                    print('epoch {}: loss {}, f1 {} recall {} precision {}'.
                          format(epoch, loss_sum / cnt, f1, recall, precision))
                    loss_sum = total_infer = total_label = total_correct = cnt = 0

                if batch_id % args.save_interval == 0:
                    state_dict = ts.state_dict()
                    fluid.save_dygraph(state_dict, state_dict_path)
コード例 #17
0
    def test_qat_acc(self):
        def _build_static_lenet(main, startup, is_test=False, seed=1000):
            with fluid.unique_name.guard():
                with fluid.program_guard(main, startup):
                    main.random_seed = seed
                    startup.random_seed = seed
                    img = fluid.layers.data(
                        name='image', shape=[1, 28, 28], dtype='float32')
                    label = fluid.layers.data(
                        name='label', shape=[1], dtype='int64')
                    prediction = StaticLenet(img)
                    if not is_test:
                        loss = fluid.layers.cross_entropy(
                            input=prediction, label=label)
                        avg_loss = fluid.layers.mean(loss)
                    else:
                        avg_loss = prediction
            return img, label, avg_loss

        reader = paddle.batch(
            paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
        weight_quantize_type = 'abs_max'
        activation_quant_type = 'moving_average_abs_max'
        param_init_map = {}
        seed = 1000
        lr = 0.001

        # imperative train
        _logger.info(
            "--------------------------dynamic graph qat--------------------------"
        )
        imperative_qat = ImperativeQuantAware(
            weight_quantize_type=weight_quantize_type,
            activation_quantize_type=activation_quant_type,
            quantizable_layer_type=[
                'Conv2D', 'Linear', 'ReLU', 'LeakyReLU', 'ReLU6', 'Tanh',
                'Swish'
            ])

        with fluid.dygraph.guard():
            np.random.seed(seed)
            fluid.default_main_program().random_seed = seed
            fluid.default_startup_program().random_seed = seed
            lenet = ImperativeLenet()
            fixed_state = {}
            for name, param in lenet.named_parameters():
                p_shape = param.numpy().shape
                p_value = param.numpy()
                if name.endswith("bias"):
                    value = np.zeros_like(p_value).astype('float32')
                else:
                    value = np.random.normal(
                        loc=0.0, scale=0.01, size=np.product(p_shape)).reshape(
                            p_shape).astype('float32')
                fixed_state[name] = value
                param_init_map[param.name] = value
            lenet.set_dict(fixed_state)

            imperative_qat.quantize(lenet)
            adam = AdamOptimizer(
                learning_rate=lr, parameter_list=lenet.parameters())
            dynamic_loss_rec = []
            lenet.train()
            for batch_id, data in enumerate(reader()):
                x_data = np.array([x[0].reshape(1, 28, 28)
                                   for x in data]).astype('float32')
                y_data = np.array(
                    [x[1] for x in data]).astype('int64').reshape(-1, 1)

                img = fluid.dygraph.to_variable(x_data)
                label = fluid.dygraph.to_variable(y_data)

                out = lenet(img)
                loss = fluid.layers.cross_entropy(out, label)
                avg_loss = fluid.layers.mean(loss)
                avg_loss.backward()
                adam.minimize(avg_loss)
                lenet.clear_gradients()
                dynamic_loss_rec.append(avg_loss.numpy()[0])
                if batch_id % 100 == 0:
                    _logger.info('{}: {}'.format('loss', avg_loss.numpy()))
                if batch_id > 500:
                    break
            lenet.eval()
        paddle.jit.save(
            layer=lenet,
            path="./dynamic_mnist/model",
            input_spec=[
                paddle.static.InputSpec(
                    shape=[None, 1, 28, 28], dtype='float32')
            ])

        # static graph train
        _logger.info(
            "--------------------------static graph qat--------------------------"
        )
        static_loss_rec = []
        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = fluid.Executor(place)

        main = fluid.Program()
        infer = fluid.Program()
        startup = fluid.Program()
        static_img, static_label, static_loss = _build_static_lenet(
            main, startup, False, seed)
        infer_img, _, infer_pre = _build_static_lenet(infer, startup, True,
                                                      seed)
        with fluid.unique_name.guard():
            with fluid.program_guard(main, startup):
                opt = AdamOptimizer(learning_rate=lr)
                opt.minimize(static_loss)

        scope = core.Scope()
        with fluid.scope_guard(scope):
            exe.run(startup)
        for param in main.all_parameters():
            param_tensor = scope.var(param.name).get_tensor()
            param_tensor.set(param_init_map[param.name], place)

        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
        infer_graph = IrGraph(core.Graph(infer.desc), for_test=True)
        transform_pass = QuantizationTransformPass(
            scope=scope,
            place=place,
            activation_quantize_type=activation_quant_type,
            weight_quantize_type=weight_quantize_type,
            quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul'])
        add_quant_dequant_pass = AddQuantDequantPass(
            scope=scope,
            place=place,
            quantizable_op_type=[
                'relu', 'leaky_relu', 'relu6', 'tanh', 'swish'
            ])
        transform_pass.apply(main_graph)
        transform_pass.apply(infer_graph)
        add_quant_dequant_pass.apply(main_graph)
        add_quant_dequant_pass.apply(infer_graph)
        build_strategy = fluid.BuildStrategy()
        build_strategy.fuse_all_reduce_ops = False
        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
            loss_name=static_loss.name, build_strategy=build_strategy)

        feeder = fluid.DataFeeder(
            feed_list=[static_img, static_label], place=place)
        with fluid.scope_guard(scope):
            for batch_id, data in enumerate(reader()):
                loss_v, = exe.run(binary,
                                  feed=feeder.feed(data),
                                  fetch_list=[static_loss])
                static_loss_rec.append(loss_v[0])
                if batch_id % 100 == 0:
                    _logger.info('{}: {}'.format('loss', loss_v))

        save_program = infer_graph.to_program()
        with fluid.scope_guard(scope):
            fluid.io.save_inference_model("./static_mnist", [infer_img.name],
                                          [infer_pre], exe, save_program)
        rtol = 1e-08
        atol = 1e-10
        for i, (loss_d,
                loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)):
            diff = np.abs(loss_d - loss_s)
            if diff > (atol + rtol * np.abs(loss_s)):
                _logger.info(
                    "diff({}) at {}, dynamic loss = {}, static loss = {}".
                    format(diff, i, loss_d, loss_s))
                break

        self.assertTrue(
            np.allclose(
                np.array(dynamic_loss_rec),
                np.array(static_loss_rec),
                rtol=rtol,
                atol=atol,
                equal_nan=True),
            msg='Failed to do the imperative qat.')
コード例 #18
0
def train_mnist(args):
    epoch_num = args.epoch
    BATCH_SIZE = 32

    trainer_count = fluid.dygraph.parallel.Env().nranks
    place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
        if args.use_data_parallel else fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        if args.ce:
            print("ce mode")
            seed = 33
            np.random.seed(seed)
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed

        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()

        mnist = MNIST("mnist")
        adam = AdamOptimizer(learning_rate=0.001)

        if args.use_data_parallel:
            mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy)

        train_reader = paddle.batch(
            paddle.dataset.mnist.train(), batch_size=BATCH_SIZE, drop_last=True)
        if args.use_data_parallel:
            train_reader = fluid.contrib.reader.distributed_batch_reader(
                train_reader)

        test_reader = paddle.batch(
            paddle.dataset.mnist.test(), batch_size=BATCH_SIZE, drop_last=True)



        for epoch in range(epoch_num):

            total_loss = 0.0
            total_acc = 0.0
            total_sample = 0

            batch_time = AverageMeter('Time', ':6.3f')
            data_time = AverageMeter('Data', ':6.3f')
            losses = AverageMeter('Loss', ':.4e')
            progress = ProgressMeter(len(list(train_reader())) - 1, batch_time, data_time,
                                     losses, prefix="epoch: [{}]".format(epoch))
            end = Tools.time()
            for batch_id, data in enumerate(train_reader()):
                data_time.update(Tools.time() - end)
                dy_x_data = np.array([x[0].reshape(1, 28, 28)
                                      for x in data]).astype('float32')
                y_data = np.array(
                    [x[1] for x in data]).astype('int64').reshape(-1, 1)

                img = to_variable(dy_x_data)
                label = to_variable(y_data)
                label.stop_gradient = True

                cost, acc = mnist(img, label)

                loss = fluid.layers.cross_entropy(cost, label)
                avg_loss = fluid.layers.mean(loss)

                if args.use_data_parallel:
                    avg_loss = mnist.scale_loss(avg_loss)
                    avg_loss.backward()
                    mnist.apply_collective_grads()
                else:
                    avg_loss.backward()

                adam.minimize(avg_loss)
                # save checkpoint
                mnist.clear_gradients()
                batch_time.update(Tools.time() - end)

                total_loss += avg_loss.numpy()
                total_acc += acc.numpy()
                total_sample += 1

                dy_out = avg_loss.numpy()[0]
                losses.update(dy_out, BATCH_SIZE)
                if batch_id % 10 == 0:
                    progress.print(batch_id)
                    print("epoch %d | batch step %d, loss %0.3f acc %0.3f" % \
                          (epoch, batch_id, total_loss / total_sample, total_acc / total_sample))


                if batch_id % 100 == 0:
                    print("Loss at epoch {} step {}: {:}".format(
                        epoch, batch_id, avg_loss.numpy()))
                end = Tools.time()
            mnist.eval()
            test_cost, test_acc = test_mnist(test_reader, mnist, BATCH_SIZE)
            mnist.train()
            if args.ce:
                print("kpis\ttest_acc\t%s" % test_acc)
                print("kpis\ttest_cost\t%s" % test_cost)
            print("Loss at epoch {} , Test avg_loss is: {}, acc is: {}".format(
                epoch, test_cost, test_acc))
コード例 #19
0
ファイル: train.py プロジェクト: jay9z/GAN-pp
def main():
    # Step 0: preparation
    writer = LogWriter(logdir="./log/scalar")
    place = paddle.fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        # Step 1: Define training dataloader
        image_folder = ""
        image_list_file = "dummy_data/fabric_list.txt"
        transform = Transform()  #Normalize2()  # [0,255]-->[0,1]
        x_data = DataLoader(image_folder, image_list_file, transform=transform)
        x_dataloader = fluid.io.DataLoader.from_generator(capacity=2,
                                                          return_list=True)
        x_dataloader.set_sample_generator(x_data, args.batch_size)

        total_batch = len(x_data) // args.batch_size

        # Step 2: Create model
        if args.net == "basic":
            D = Discriminator()
            G = Generator()
            E = Invertor()
        else:
            raise NotImplementedError(
                f"args.net: {args.net} is not Supported!")

        # Step 3: Define criterion and optimizer
        criterion = Basic_Loss
        D_optim = AdamOptimizer(learning_rate=args.lr,
                                parameter_list=D.parameters())
        G_optim = AdamOptimizer(learning_rate=args.lr,
                                parameter_list=G.parameters())
        E_optim = AdamOptimizer(learning_rate=args.lr,
                                parameter_list=E.parameters())

        G_loss_meter = AverageMeter()
        D_loss_meter = AverageMeter()
        E_loss_meter = AverageMeter()

        D.train()
        G.train()
        E.train()

        # Step 4: Slight Training
        iteration = -1
        is_slight_Train = True
        for epoch in range(1, args.epoch_num + 1):
            #optim Discriminator
            for (x, x_labels) in x_dataloader():
                n = x.shape[0]
                if is_slight_Train:
                    iteration += 1
                    x = fluid.layers.cast(x, dtype="float32")
                    x = fluid.layers.transpose(x, perm=[0, 3, 1, 2])
                    preds_x = D(x)
                    preds_x_array = preds_x.numpy()
                    #print("D(x),1",preds_array.shape, np.mean(preds_array))
                    writer.add_scalar(tag="D(x)=1",
                                      step=iteration,
                                      value=np.mean(preds_x_array))
                    if np.mean(preds_x_array) >= 0.98:
                        is_slight_Train = False

                    z = np.random.rand(n, 64)
                    zeros = np.zeros((n, 1))
                    z = to_variable(z)
                    zeros = to_variable(zeros)
                    z = fluid.layers.cast(z, dtype="float32")
                    zeros = fluid.layers.cast(zeros, dtype="int64")
                    preds_fx = D(G(z))
                    preds_fx_array = preds_fx.numpy()
                    writer.add_scalar(tag="D(G(z))=0",
                                      step=iteration,
                                      value=np.mean(preds_fx_array))
                    D_loss = criterion(preds_x, x_labels) + criterion(
                        preds_fx, zeros)
                    D_loss.backward()
                    D_optim.minimize(D_loss)
                    D.clear_gradients()
                    D_loss_meter.update(D_loss.numpy()[0], n)
                    writer.add_scalar(tag="D_loss",
                                      step=iteration,
                                      value=D_loss_meter.avg)
                    print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " +
                          f"STEP{iteration}, " +
                          f"Average D Loss: {D_loss_meter.avg:4f}, ")

                    z = np.random.rand(n, 64)
                    ones = np.ones((n, 1))
                    z = to_variable(z)
                    ones = to_variable(ones)
                    z = fluid.layers.cast(z, dtype="float32")
                    ones = fluid.layers.cast(ones, dtype="int64")
                    preds = D(G(z))
                    preds_array = preds.numpy()
                    writer.add_scalar(tag="D(G(z))=1",
                                      step=iteration,
                                      value=np.mean(preds_array))
                    G_loss = criterion(preds, ones)
                    G_loss.backward()
                    G_optim.minimize(G_loss)
                    G.clear_gradients()
                    G_loss_meter.update(G_loss.numpy()[0], n)
                    writer.add_scalar(tag="G_loss",
                                      step=iteration,
                                      value=G_loss_meter.avg)
                    print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " +
                          f"STEP{iteration}, " +
                          f"Average G Loss: {G_loss_meter.avg:4f}")

            if epoch % args.save_freq == 0 or epoch == args.epoch_num or not is_slight_Train:
                D_model_path = os.path.join(args.checkpoint_folder,
                                            f"D_{args.net}-Epoch-{epoch}")
                G_model_path = os.path.join(args.checkpoint_folder,
                                            f"G_{args.net}-Epoch-{epoch}")

                # save model and optmizer states
                model_dict = D.state_dict()
                fluid.save_dygraph(model_dict, D_model_path)
                optim_dict = D_optim.state_dict()
                fluid.save_dygraph(optim_dict, D_model_path)

                model_dict = G.state_dict()
                fluid.save_dygraph(model_dict, G_model_path)
                optim_dict = G_optim.state_dict()
                fluid.save_dygraph(optim_dict, G_model_path)

                print(
                    f'----- Save model: {D_model_path}.pdparams, {G_model_path}.pdparams'
                )
                if not is_slight_Train:
                    break

        # Step 5:  full training for Generator and Discriminator
        D_optim = AdamOptimizer(learning_rate=args.lr * 10,
                                parameter_list=D.parameters())
        G_optim = AdamOptimizer(learning_rate=args.lr * 10,
                                parameter_list=G.parameters())
        G_loss_meter = AverageMeter()
        D_loss_meter = AverageMeter()

        for epoch in range(1, args.epoch_num + 1):
            for (x, x_labels) in x_dataloader():
                n = x.shape[0]
                iteration += 1
                x = fluid.layers.cast(x, dtype="float32")
                x = fluid.layers.transpose(x, perm=[0, 3, 1, 2])
                preds1 = D(x)
                preds_array = preds1.numpy()
                writer.add_scalar(tag="D(x)=1",
                                  step=iteration,
                                  value=np.mean(preds_array))
                z = np.random.rand(n, 64)
                zeros = np.zeros((n, 1))
                z = to_variable(z)
                zeros = to_variable(zeros)
                z = fluid.layers.cast(z, dtype="float32")
                zeros = fluid.layers.cast(zeros, dtype="int64")
                preds2 = D(G(z))
                preds_array = preds2.numpy()
                #print("DG(z),0:",preds_array.shape, np.mean(preds_array))
                writer.add_scalar(tag="D(G(z))=0",
                                  step=iteration,
                                  value=np.mean(preds_array))
                D_loss = criterion(preds1, x_labels) + criterion(preds2, zeros)
                D_loss.backward()
                D_optim.minimize(D_loss)
                D.clear_gradients()
                D_loss_meter.update(D_loss.numpy()[0], n)
                writer.add_scalar(tag="D_loss",
                                  step=iteration,
                                  value=D_loss_meter.avg)
                print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " +
                      f"STEP{iteration}, " +
                      f"Average D Loss: {D_loss_meter.avg:4f} ")
                z = np.random.rand(n, 64)
                ones = np.ones((n, 1))
                z = to_variable(z)
                ones = to_variable(ones)
                z = fluid.layers.cast(z, dtype="float32")
                ones = fluid.layers.cast(ones, dtype="int64")
                preds = D(G(z))
                preds_array = preds.numpy()
                #print("DG(z),1:",preds_array.shape, np.mean(preds_array))
                writer.add_scalar(tag="D(G(z))=1",
                                  step=iteration,
                                  value=np.mean(preds_array))
                G_loss = criterion(preds, ones)
                G_loss.backward()
                G_optim.minimize(G_loss)
                G.clear_gradients()
                G_loss_meter.update(G_loss.numpy()[0], n)
                writer.add_scalar(tag="G_loss",
                                  step=iteration,
                                  value=G_loss_meter.avg)
                print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " +
                      f"STEP{iteration}, " +
                      f"Average G Loss: {G_loss_meter.avg:4f}")

            if epoch % args.save_freq == 0 or epoch == args.epoch_num:
                D_model_path = os.path.join(args.checkpoint_folder,
                                            f"D_{args.net}-Epoch-{epoch}")
                G_model_path = os.path.join(args.checkpoint_folder,
                                            f"G_{args.net}-Epoch-{epoch}")

                # save model and optmizer states
                model_dict = D.state_dict()
                fluid.save_dygraph(model_dict, D_model_path)
                optim_dict = D_optim.state_dict()
                fluid.save_dygraph(optim_dict, D_model_path)

                model_dict = G.state_dict()
                fluid.save_dygraph(model_dict, G_model_path)
                optim_dict = G_optim.state_dict()
                fluid.save_dygraph(optim_dict, G_model_path)
                print(
                    f'----- Save model: {D_model_path}.pdparams, {G_model_path}.pdparams'
                )

        # Step 6: full training for Inverter
        E_optim = AdamOptimizer(learning_rate=args.lr * 10,
                                parameter_list=E.parameters())
        E_loss_meter = AverageMeter()

        for epoch in range(1, args.epoch_num + 1):
            for (x, x_labels) in x_dataloader():
                n = x.shape[0]
                iteration += 1
                x = fluid.layers.cast(x, dtype="float32")
                image = x.numpy()[0] * 255
                writer.add_image(tag="x", step=iteration, img=image)
                x = fluid.layers.transpose(x, perm=[0, 3, 1, 2])
                invert_x = G(E(x))
                invert_image = fluid.layers.transpose(invert_x,
                                                      perm=[0, 2, 3, 1])
                invert_image = invert_image.numpy()[0] * 255
                #print("D(x),1",preds_array.shape, np.mean(preds_array))
                writer.add_image(tag="invert_x",
                                 step=iteration,
                                 img=invert_image)
                print(np.max(invert_image), np.min(invert_image))
                E_loss = fluid.layers.mse_loss(invert_x, x)
                print("E_loss shape:", E_loss.numpy().shape)
                E_loss.backward()
                E_optim.minimize(E_loss)
                E.clear_gradients()
                E_loss_meter.update(E_loss.numpy()[0], n)
                writer.add_scalar(tag="E_loss",
                                  step=iteration,
                                  value=E_loss_meter.avg)
                print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " +
                      f"STEP{iteration}, " +
                      f"Average E Loss: {E_loss_meter.avg:4f}, ")

            if epoch % args.save_freq == 0 or epoch == args.epoch_num:
                E_model_path = os.path.join(args.checkpoint_folder,
                                            f"E_{args.net}-Epoch-{epoch}")
                # save model and optmizer states
                model_dict = E.state_dict()
                fluid.save_dygraph(model_dict, E_model_path)
                optim_dict = E_optim.state_dict()
                fluid.save_dygraph(optim_dict, E_model_path)
                print(
                    f'----- Save model: {E_model_path}.pdparams, {E_model_path}.pdparams'
                )
コード例 #20
0
def train(use_cuda, save_dirname, is_local, is_increment):
    """
    train
    """
    # predict, avg_cost, feed_order, auc_var, auc_batch, auc_states = model()
    old_model = None
    model_args = model()
    predict = model_args['predict']
    avg_cost = model_args['avg_cost']
    feed_order = model_args['feed_order']
    loader = model_args['loader']
    auc_batch = model_args['auc'][1]

    # 加入 fleet distributed_optimizer 加入分布式策略配置及多机优化
    sgd_optimizer = AdamOptimizer(learning_rate=2e-4)
    # sgd_optimizer = fluid.optimizer.Adam(learning_rate=2e-5)

    if is_local:
        sgd_optimizer.minimize(avg_cost)
        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()

        exe = Executor(place)
        readers = []
        for i in range(16):
            readers.append(data_reader(cluster_train_dir))
        multi_readers = paddle.reader.multiprocess_reader(readers)
        loader.set_sample_generator(
            multi_readers, batch_size=BATCH_SIZE, places=fluid.cpu_places(CPU_NUM))
            # data_reader(cluster_train_dir), batch_size=BATCH_SIZE, places=fluid.cpu_places(CPU_NUM))
        # feeder = fluid.DataFeeder(feed_order, place)
        # train_reader = feeder.decorate_reader(
        #     paddle.batch(paddle.reader.shuffle(
        #         data_reader(cluster_train_dir), buf_size=8192), batch_size=BATCH_SIZE),
        #          multi_devices=False, drop_last=True)

        start_program = fluid.default_startup_program()
        exe.run(start_program)
        main_prog = fluid.default_main_program()

        exec_strategy = fluid.ExecutionStrategy()
        exec_strategy.num_threads = CPU_NUM * 2
        build_strategy = fluid.BuildStrategy()
        build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce # cpu reduce faster
        build_strategy.fuse_broadcast_ops = True
        # build_strategy.async_mode = True
        main_program = fluid.CompiledProgram(main_prog).with_data_parallel(
            loss_name=avg_cost.name, exec_strategy=exec_strategy, build_strategy=build_strategy)
            #loss_name=avg_cost.name, exec_strategy=exec_strategy, build_strategy=build_strategy, places=fluid.cpu_places(CPU_NUM))

        if is_increment:  # load model to fine-tune
            fluid.io.load_params(exe, old_model, main_program)
            for auc_state in model_args['auc'][2]:
                set_zero(place, fluid.global_scope(), auc_state.name)

        # 并行训练,速度更快
        # train_pe = fluid.ParallelExecutor(use_cuda=use_cuda,
        #                                   main_program=main_program, loss_name=avg_cost.name,
        #                                   exec_strategy=exec_strategy, build_strategy=build_strategy)

        cost_list = []
        auc_list = []
        import time
        pass_s_time = time.time()
        for pass_id in range(PASS_NUM):
            s_time = time.time()
            for batch_id, data in enumerate(loader()):
                r_time = time.time() - s_time
                st_time = time.time()
                cost_value, auc_value = exe.run(
                    program=main_program,
                    feed=data,
                    fetch_list=[avg_cost.name, auc_batch.name])
                t_time = time.time() - st_time
                cost_list.append(np.array(cost_value))
                auc_list.append(np.array(auc_value))

                if batch_id % 10 == 0 and batch_id != 0:
                    print "Pass %d, batch %d, cost %s auc %s readtime %f triantime %f" % \
                          (pass_id, batch_id, np.array(cost_list).mean(),
                           np.array(auc_list).mean(), r_time, t_time)
                    cost_list = []
                    auc_list = []
                if batch_id % 1000 == 0:
                    if save_dirname is not None:
                        fluid.io.save_inference_model(
                            save_dirname,
                            feed_order,
                            [predict, avg_cost, auc_batch], exe
                        )
                        fluid.io.save_persistables(exe, save_dirname)
                        infer(cluster_test_dir, save_dirname, feed_order)
                s_time = time.time()
        pass_time = time.time() - pass_s_time
        print("Pass train time: %f" % pass_time)

    else:
        role = role_maker.PaddleCloudRoleMaker()
        # 全异步训练
        config = DistributeTranspilerConfig()
        config.sync_mode = False
        config.runtime_split_send_recv = True
        # 加入 fleet init 初始化环境
        fleet.init(role)

        optimizer = fleet.distributed_optimizer(sgd_optimizer, config)
        optimizer.minimize(avg_cost)

        if fleet.is_server():
            fleet.init_server()
            fleet.run_server()
        # 启动worker
        if fleet.is_worker():
            # 初始化worker配置
            fleet.init_worker()

            place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
            exe = Executor(place)

            feeder = fluid.DataFeeder(feed_order, place)
            train_reader = feeder.decorate_reader(
                paddle.batch(paddle.reader.shuffle(
                    data_reader(cluster_train_dir), buf_size=8192), batch_size=BATCH_SIZE),
                multi_devices=False, drop_last=True)

            exe.run(fleet.startup_program)

            exec_strategy = fluid.ExecutionStrategy()
            exec_strategy.num_threads = CPU_NUM
            build_strategy = fluid.BuildStrategy()
            build_strategy.async_mode = True

            if CPU_NUM > 1:
                build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce

            compiled_prog = fluid.compiler.CompiledProgram(
                fleet.main_program).with_data_parallel(
                loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy)

            for pass_id in range(PASS_NUM):
                cost_list = []
                auc_list = []
                import time
                s_time = time.time()
                for batch_id, data in enumerate(train_reader()):
                    r_time = time.time() - s_time
                    cost_value, auc_value = exe.run(
                        program=compiled_prog, feed=data,
                        fetch_list=[avg_cost.name, auc_batch.name])
                    t_time = time.time() - r_time
                    cost_list.append(np.array(cost_value))
                    auc_list.append(np.array(auc_value))

                    if batch_id % 10 == 0 and batch_id != 0:
                        print "Pass %d, batch %d, cost %s auc %s readtime %f traintime %f" % \
                              (pass_id, batch_id, np.array(cost_list).mean(),
                               np.array(auc_list).mean(), r_time, t_time)
                        cost_list = []
                        auc_list = []
                    if batch_id % 1000 == 0 and fleet.is_first_worker():
                        if save_dirname is not None:
                            fleet.save_inference_model(
                                exe,
                                save_dirname,
                                feed_order,
                                [predict, avg_cost, auc_batch]
                            )
                            fleet.save_persistables(exe, save_dirname)
                            infer(cluster_test_dir, save_dirname, feed_order)
                    s_time = time.time()
        fleet.stop_worker()
コード例 #21
0
def train_mnist(args):
    epoch_num = args.epoch
    BATCH_SIZE = 64

    place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
        if args.use_data_parallel else fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        if args.ce:
            print("ce mode")
            seed = 33
            np.random.seed(seed)
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed

        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()
        mnist = MNIST()
        adam = AdamOptimizer(learning_rate=0.001, parameter_list=mnist.parameters())
        if args.use_data_parallel:
            mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy)

        train_reader = paddle.batch(
            paddle.dataset.mnist.train(), batch_size=BATCH_SIZE, drop_last=True)
        if args.use_data_parallel:
            train_reader = fluid.contrib.reader.distributed_batch_reader(
                train_reader)

        test_reader = paddle.batch(
            paddle.dataset.mnist.test(), batch_size=BATCH_SIZE, drop_last=True)

        for epoch in range(epoch_num):
            for batch_id, data in enumerate(train_reader()):
                dy_x_data = np.array([x[0].reshape(1, 28, 28)
                                      for x in data]).astype('float32')
                y_data = np.array(
                    [x[1] for x in data]).astype('int64').reshape(-1, 1)

                img = to_variable(dy_x_data)
                label = to_variable(y_data)
                label.stop_gradient = True

                cost, acc = mnist(img, label)

                loss = fluid.layers.cross_entropy(cost, label)
                avg_loss = fluid.layers.mean(loss)

                if args.use_data_parallel:
                    avg_loss = mnist.scale_loss(avg_loss)
                    avg_loss.backward()
                    mnist.apply_collective_grads()
                else:
                    avg_loss.backward()

                adam.minimize(avg_loss)
                # save checkpoint
                mnist.clear_gradients()
                if batch_id % 100 == 0:
                    print("Loss at epoch {} step {}: {:}".format(
                        epoch, batch_id, avg_loss.numpy()))

            mnist.eval()
            test_cost, test_acc = test_mnist(test_reader, mnist, BATCH_SIZE)
            mnist.train()
            if args.ce:
                print("kpis\ttest_acc\t%s" % test_acc)
                print("kpis\ttest_cost\t%s" % test_cost)
            print("Loss at epoch {} , Test avg_loss is: {}, acc is: {}".format(
                epoch, test_cost, test_acc))

        save_parameters = (not args.use_data_parallel) or (
            args.use_data_parallel and
            fluid.dygraph.parallel.Env().local_rank == 0)
        if save_parameters:
            fluid.save_dygraph(mnist.state_dict(), "save_temp")
            
            print("checkpoint saved")

            inference_mnist()
コード例 #22
0
    def test_out_scale_acc(self):
        seed = 1000
        lr = 0.1

        imperative_out_scale = ImperativeQuantAware()

        np.random.seed(seed)
        reader = paddle.batch(
            paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
        lenet = ImperativeLenet()
        fixed_state = {}
        for name, param in lenet.named_parameters():
            p_shape = param.numpy().shape
            p_value = param.numpy()
            if name.endswith("bias"):
                value = np.zeros_like(p_value).astype('float32')
            else:
                value = np.random.normal(
                    loc=0.0, scale=0.01,
                    size=np.product(p_shape)).reshape(p_shape).astype('float32')
            fixed_state[name] = value
        lenet.set_dict(fixed_state)
        imperative_out_scale.quantize(lenet)
        adam = AdamOptimizer(
            learning_rate=lr, parameter_list=lenet.parameters())
        dynamic_loss_rec = []
        lenet.train()
        for batch_id, data in enumerate(reader()):
            x_data = np.array([x[0].reshape(1, 28, 28)
                               for x in data]).astype('float32')
            y_data = np.array(
                [x[1] for x in data]).astype('int64').reshape(-1, 1)

            img = fluid.dygraph.to_variable(x_data)
            label = fluid.dygraph.to_variable(y_data)

            out = lenet(img)
            loss = fluid.layers.cross_entropy(out, label)
            avg_loss = fluid.layers.mean(loss)
            avg_loss.backward()
            adam.minimize(avg_loss)
            lenet.clear_gradients()
            dynamic_loss_rec.append(avg_loss.numpy()[0])
            if batch_id % 100 == 0:
                _logger.info('{}: {}'.format('loss', avg_loss.numpy()))

        lenet.eval()

        path = "./save_dynamic_quant_infer_model/lenet"
        save_dir = "./save_dynamic_quant_infer_model"

        imperative_out_scale.save_quantized_model(
            layer=lenet,
            path=path,
            input_spec=[
                paddle.static.InputSpec(
                    shape=[None, 1, 28, 28], dtype='float32')
            ])

        paddle.enable_static()

        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = fluid.Executor(place)

        [inference_program, feed_target_names, fetch_targets] = (
            fluid.io.load_inference_model(
                dirname=save_dir,
                executor=exe,
                model_filename="lenet" + INFER_MODEL_SUFFIX,
                params_filename="lenet" + INFER_PARAMS_SUFFIX))
        model_ops = inference_program.global_block().ops

        conv2d_count, mul_count = 0, 0
        for i, op in enumerate(model_ops):
            if op.type == 'conv2d':
                if conv2d_count > 0:
                    self.assertTrue(
                        'fake_quantize_dequantize' in model_ops[i - 1].type)
                else:
                    self.assertTrue(
                        'fake_quantize_dequantize' not in model_ops[i - 1].type)
                conv2d_count += 1

            if op.type == 'mul':
                if mul_count > 0:
                    self.assertTrue(
                        'fake_quantize_dequantize' in model_ops[i - 1].type)
                else:
                    self.assertTrue(
                        'fake_quantize_dequantize' not in model_ops[i - 1].type)
                mul_count += 1
コード例 #23
0
    def test_qat_save(self):

        imperative_qat = ImperativeQuantAware(
            weight_quantize_type='abs_max',
            activation_quantize_type='moving_average_abs_max',
            quantizable_layer_type=[
                'Conv2D', 'Linear', 'ReLU', 'LeakyReLU', 'ReLU6', 'Tanh',
                'Swish'
            ])

        with fluid.dygraph.guard():
            lenet = ImperativeLenet()
            imperative_qat.quantize(lenet)
            adam = AdamOptimizer(
                learning_rate=0.001, parameter_list=lenet.parameters())
            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=32, drop_last=True)
            test_reader = paddle.batch(
                paddle.dataset.mnist.test(), batch_size=32)

            epoch_num = 1
            for epoch in range(epoch_num):
                lenet.train()
                for batch_id, data in enumerate(train_reader()):
                    x_data = np.array([x[0].reshape(1, 28, 28)
                                       for x in data]).astype('float32')
                    y_data = np.array(
                        [x[1] for x in data]).astype('int64').reshape(-1, 1)

                    img = fluid.dygraph.to_variable(x_data)
                    label = fluid.dygraph.to_variable(y_data)
                    out = lenet(img)
                    acc = fluid.layers.accuracy(out, label)
                    loss = fluid.layers.cross_entropy(out, label)
                    avg_loss = fluid.layers.mean(loss)
                    avg_loss.backward()
                    adam.minimize(avg_loss)
                    lenet.clear_gradients()
                    if batch_id % 100 == 0:
                        _logger.info(
                            "Train | At epoch {} step {}: loss = {:}, acc= {:}".
                            format(epoch, batch_id,
                                   avg_loss.numpy(), acc.numpy()))

                lenet.eval()
                for batch_id, data in enumerate(test_reader()):
                    x_data = np.array([x[0].reshape(1, 28, 28)
                                       for x in data]).astype('float32')
                    y_data = np.array(
                        [x[1] for x in data]).astype('int64').reshape(-1, 1)

                    img = fluid.dygraph.to_variable(x_data)
                    label = fluid.dygraph.to_variable(y_data)

                    out = lenet(img)
                    acc_top1 = fluid.layers.accuracy(
                        input=out, label=label, k=1)
                    acc_top5 = fluid.layers.accuracy(
                        input=out, label=label, k=5)

                    if batch_id % 100 == 0:
                        _logger.info(
                            "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}".
                            format(epoch, batch_id,
                                   acc_top1.numpy(), acc_top5.numpy()))

            # save weights
            model_dict = lenet.state_dict()
            fluid.save_dygraph(model_dict, "save_temp")

            # test the correctness of `paddle.jit.save`
            data = next(test_reader())
            test_data = np.array([x[0].reshape(1, 28, 28)
                                  for x in data]).astype('float32')
            test_img = fluid.dygraph.to_variable(test_data)
            lenet.eval()
            before_save = lenet(test_img)

        # save inference quantized model
        path = "./qat_infer_model/lenet"
        save_dir = "./qat_infer_model"
        paddle.jit.save(
            layer=lenet,
            path=path,
            input_spec=[
                paddle.static.InputSpec(
                    shape=[None, 1, 28, 28], dtype='float32')
            ])
        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = fluid.Executor(place)
        [inference_program, feed_target_names,
         fetch_targets] = fluid.io.load_inference_model(
             dirname=save_dir,
             executor=exe,
             model_filename="lenet" + INFER_MODEL_SUFFIX,
             params_filename="lenet" + INFER_PARAMS_SUFFIX)
        after_save, = exe.run(inference_program,
                              feed={feed_target_names[0]: test_data},
                              fetch_list=fetch_targets)

        self.assertTrue(
            np.allclose(after_save, before_save.numpy()),
            msg='Failed to save the inference quantized model.')
コード例 #24
0
    def test_gnn_float32(self):
        seed = 90

        startup = fluid.Program()
        startup.random_seed = seed
        main = fluid.Program()
        main.random_seed = seed

        scope = fluid.core.Scope()
        with new_program_scope(main=main, startup=startup, scope=scope):
            features = fluid.layers.data(name='features',
                                         shape=[1, 100, 50],
                                         dtype='float32',
                                         append_batch_size=False)
            # Use selected rows when it's supported.
            adj = fluid.layers.data(name='adj',
                                    shape=[1, 100, 100],
                                    dtype='float32',
                                    append_batch_size=False)
            labels = fluid.layers.data(name='labels',
                                       shape=[100, 1],
                                       dtype='int64',
                                       append_batch_size=False)

            model = GCN('test_gcn', 50)
            logits = model(features, adj)
            logits = fluid.layers.reshape(logits, logits.shape[1:])
            # In other example, it's nll with log_softmax. However, paddle's
            # log_loss only supports binary classification now.
            loss = fluid.layers.softmax_with_cross_entropy(logits, labels)
            loss = fluid.layers.reduce_sum(loss)

            adam = AdamOptimizer(learning_rate=1e-3)
            adam.minimize(loss)
            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
            exe.run(startup)
            static_loss = exe.run(feed={
                'features':
                np.ones([1, 100, 50], dtype=np.float32),
                'adj':
                np.ones([1, 100, 100], dtype=np.float32),
                'labels':
                np.ones([100, 1], dtype=np.int64)
            },
                                  fetch_list=[loss])[0]

            static_weight = np.array(
                scope.find_var(model.gc.weight.name).get_tensor())

        with fluid.dygraph.guard():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed

            features = np.ones([1, 100, 50], dtype=np.float32)
            # Use selected rows when it's supported.
            adj = np.ones([1, 100, 100], dtype=np.float32)
            labels = np.ones([100, 1], dtype=np.int64)

            model = GCN('test_gcn', 50)
            logits = model(to_variable(features), to_variable(adj))
            logits = fluid.layers.reshape(logits, logits.shape[1:])
            # In other example, it's nll with log_softmax. However, paddle's
            # log_loss only supports binary classification now.
            loss = fluid.layers.softmax_with_cross_entropy(
                logits, to_variable(labels))
            loss = fluid.layers.reduce_sum(loss)
            loss.backward()
            adam = AdamOptimizer(learning_rate=1e-3)

            adam.minimize(loss)
            model.clear_gradients()
            loss_value = loss.numpy()
            model_gc_weight_value = model.gc.weight.numpy()

        with fluid.dygraph.guard():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed

            features2 = np.ones([1, 100, 50], dtype=np.float32)
            # Use selected rows when it's supported.
            adj2 = np.ones([1, 100, 100], dtype=np.float32)
            labels2 = np.ones([100, 1], dtype=np.int64)

            model2 = GCN('test_gcn', 50)
            logits2 = model2(to_variable(features2), to_variable(adj2))
            logits2 = fluid.layers.reshape(logits2, logits2.shape[1:])
            # In other example, it's nll with log_softmax. However, paddle's
            # log_loss only supports binary classification now.
            loss2 = fluid.layers.softmax_with_cross_entropy(
                logits2, to_variable(labels2))
            loss2 = fluid.layers.reduce_sum(loss2)
            loss2.backward()
            adam2 = AdamOptimizer(learning_rate=1e-3)
            adam2.minimize(loss2)
            model2.clear_gradients()
            loss2_value = loss2.numpy()
            model2_gc_weight_value = model2.gc.weight.numpy()

        self.assertEqual(static_loss, loss_value)
        self.assertTrue(np.allclose(static_weight, model_gc_weight_value))
        self.assertEqual(static_loss, loss2_value)
        self.assertTrue(np.allclose(static_weight, model2_gc_weight_value))
        sys.stderr.write('%s %s\n' % (static_loss, loss_value))
コード例 #25
0
def train_mnist(args):
    epoch_num = args.epoch
    BATCH_SIZE = 64

    place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
        if args.use_data_parallel else fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        if args.ce:
            print("ce mode")
            seed = 33
            np.random.seed(seed)
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed

        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()
        mnist = MNIST()
        adam = AdamOptimizer(learning_rate=0.001,
                             parameter_list=mnist.parameters())
        if args.use_data_parallel:
            mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy)

        train_reader = paddle.batch(reader_decorator(
            paddle.dataset.mnist.train()),
                                    batch_size=BATCH_SIZE,
                                    drop_last=True)
        if args.use_data_parallel:
            train_reader = fluid.contrib.reader.distributed_batch_reader(
                train_reader)

        test_reader = paddle.batch(reader_decorator(
            paddle.dataset.mnist.test()),
                                   batch_size=BATCH_SIZE,
                                   drop_last=True)

        train_loader = fluid.io.DataLoader.from_generator(
            capacity=10, use_multiprocess=True)
        train_loader.set_sample_list_generator(train_reader, places=place)

        test_loader = fluid.io.DataLoader.from_generator(capacity=10,
                                                         use_multiprocess=True)
        test_loader.set_sample_list_generator(test_reader, places=place)

        total_train_time = 0
        for epoch in range(epoch_num):
            stime = time.time()
            for batch_id, data in enumerate(train_loader()):
                img, label = data
                label.stop_gradient = True

                cost, acc = mnist(img, label)

                loss = fluid.layers.cross_entropy(cost, label)
                avg_loss = fluid.layers.mean(loss)

                if args.use_data_parallel:
                    avg_loss = mnist.scale_loss(avg_loss)
                    avg_loss.backward()
                    mnist.apply_collective_grads()
                else:
                    avg_loss.backward()

                adam.minimize(avg_loss)
                # save checkpoint
                mnist.clear_gradients()
                if batch_id % 100 == 0:
                    print("Loss at epoch {} step {}: {:}".format(
                        epoch, batch_id, avg_loss.numpy()))
            total_train_time += (time.time() - stime)

            mnist.eval()
            test_cost, test_acc = test_mnist(test_loader, mnist, BATCH_SIZE)
            mnist.train()
            if args.ce:
                print("kpis\ttest_acc\t%s" % test_acc)
                print("kpis\ttest_cost\t%s" % test_cost)
            print("Loss at epoch {} , Test avg_loss is: {}, acc is: {}".format(
                epoch, test_cost, test_acc))

        save_parameters = (not args.use_data_parallel) or (
            args.use_data_parallel
            and fluid.dygraph.parallel.Env().local_rank == 0)
        if save_parameters:
            fluid.save_dygraph(mnist.state_dict(), "save_temp")

            print("checkpoint saved")

            inference_mnist()
コード例 #26
0
def train(use_cuda, train_sample_dir, test_sample_dir, old_model, output_model,
          is_local, is_increment):
    """
    train
    """
    # predict, avg_cost, feed_order, auc_var, auc_batch, auc_states = model()
    model_args = model()
    navi_predict = model_args['predict'][0]
    voice_navi_predict = model_args['predict'][1]
    speed_navi_predict = model_args['predict'][2]
    avg_cost = model_args['avg_cost']
    feed_order = model_args['feed_order']

    role = role_maker.PaddleCloudRoleMaker()
    # 全异步训练
    config = DistributeTranspilerConfig()
    config.sync_mode = False
    config.runtime_split_send_recv = True

    sgd_optimizer = AdamOptimizer(learning_rate=2e-4)

    if is_local:
        sgd_optimizer.minimize(avg_cost)
        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()

        exe = Executor(place)
        # train_reader = paddle.batch(
        #     paddle.reader.shuffle(
        #         streaming_data_reader(), buf_size=8192), batch_size=BATCH_SIZE)

        feeder = fluid.DataFeeder(feed_order, place)
        train_reader = feeder.decorate_reader(paddle.batch(
            paddle.reader.shuffle(streaming_data_reader(), buf_size=8192),
            batch_size=BATCH_SIZE),
                                              multi_devices=False,
                                              drop_last=True)
        start_program = fluid.default_startup_program()
        exe.run(start_program)
        main_program = fluid.default_main_program()
        if is_increment:  # load model to fine-tune
            fluid.io.load_params(exe, old_model, main_program)
            # for auc_state in model_args['auc'][2]:
            #     set_zero(place, fluid.global_scope(), auc_state.name)

        exec_strategy = fluid.ExecutionStrategy()
        exec_strategy.num_threads = CPU_NUM
        main_program.num_threads = CPU_NUM
        build_strategy = fluid.BuildStrategy()
        build_strategy.async_mode = True

        # 并行训练,速度更快
        train_pe = fluid.ParallelExecutor(use_cuda=use_cuda,
                                          main_program=main_program,
                                          loss_name=avg_cost.name)

        cost_list = []
        for pass_id in range(PASS_NUM):
            for batch_id, data in enumerate(train_reader()):
                cost_value = train_pe.run(feed=data,
                                          fetch_list=[avg_cost.name])
                cost_list.append(np.array(cost_value))

                if batch_id % 100 == 0 and batch_id != 0:
                    print "Pass %d, batch %d, cost %s" % \
                          (pass_id, batch_id, np.array(cost_list).mean())
                    cost_list = []
                if batch_id % 2000 == 0:
                    if output_model is not None:
                        fluid.io.save_inference_model(
                            output_model, feed_order, [
                                navi_predict, voice_navi_predict,
                                speed_navi_predict, avg_cost
                            ], exe)
                        fluid.io.save_persistables(exe, output_model)
                        infer(test_sample_dir, output_model, feed_order)

    else:
        # 加入 fleet init 初始化环境
        fleet.init(role)
        # 加入 fleet distributed_optimizer 加入分布式策略配置及多机优化
        optimizer = fleet.distributed_optimizer(sgd_optimizer, config)
        optimizer.minimize(avg_cost)

        if fleet.is_server():
            if is_increment:
                fleet.init_server(old_model)
            else:
                fleet.init_server()
            fleet.run_server()
        # 启动worker
        if fleet.is_worker():
            # 初始化worker配置
            fleet.init_worker()

            place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()

            exe = Executor(place)
            # train_reader = paddle.batch(
            #     paddle.reader.shuffle(
            #         data_reader(train_sample_dir), buf_size=8192), batch_size=BATCH_SIZE)

            feeder = fluid.DataFeeder(feed_order, place)
            train_reader = feeder.decorate_reader(paddle.batch(
                paddle.reader.shuffle(data_reader(train_sample_dir),
                                      buf_size=8192),
                batch_size=BATCH_SIZE),
                                                  multi_devices=False,
                                                  drop_last=True)
            exe.run(fleet.startup_program)

            exec_strategy = fluid.ExecutionStrategy()
            exec_strategy.num_threads = CPU_NUM
            build_strategy = fluid.BuildStrategy()
            build_strategy.async_mode = True

            if CPU_NUM > 1:
                build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce

            compiled_prog = fluid.compiler.CompiledProgram(
                fleet.main_program).with_data_parallel(
                    loss_name=avg_cost.name,
                    build_strategy=build_strategy,
                    exec_strategy=exec_strategy)

            cost_list = []
            for pass_id in range(PASS_NUM):
                for batch_id, data in enumerate(train_reader()):
                    cost_value = exe.run(program=compiled_prog,
                                         feed=data,
                                         fetch_list=[avg_cost.name])
                    cost_list.append(np.array(cost_value))

                    if batch_id % 100 == 0 and batch_id != 0:
                        print "Pass %d, batch %d, cost %s" % \
                              (pass_id, batch_id, np.array(cost_list).mean())
                        cost_list = []
                    if batch_id % 1000 == 0 and fleet.is_first_worker():
                        if output_model is not None:
                            fleet.save_inference_model(
                                exe, output_model, feed_order, [
                                    navi_predict, voice_navi_predict,
                                    speed_navi_predict, avg_cost
                                ])
                            fleet.save_persistables(exe, output_model)
                            infer(test_sample_dir, output_model, feed_order)
        fleet.stop_worker()