Esempio n. 1
0
def train_mnist(args):
    epoch_num = args.epoch
    BATCH_SIZE = 64
    place = fluid.CPUPlace()
    with fluid.dygraph.guard(place):
        seed = 33
        np.random.seed(seed)
        fluid.default_startup_program().random_seed = seed
        fluid.default_main_program().random_seed = seed

        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()
        mnist = MNIST("mnist")
        adam = AdamOptimizer(learning_rate=0.001)
        if args.use_data_parallel:
            mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy)

        train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                    batch_size=BATCH_SIZE,
                                    drop_last=True)
        if args.use_data_parallel:
            train_reader = fluid.contrib.reader.distributed_batch_reader(
                train_reader)

        test_reader = paddle.batch(paddle.dataset.mnist.test(),
                                   batch_size=BATCH_SIZE,
                                   drop_last=True)

        for epoch in range(epoch_num):
            for batch_id, data in enumerate(train_reader()):
                dy_x_data = np.array([x[0].reshape(1, 28, 28)
                                      for x in data]).astype('float32')
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape(-1, 1)

                img = to_variable(dy_x_data)
                label = to_variable(y_data)
                label.stop_gradient = True

                cost, acc = mnist(img, label)

                loss = fluid.layers.cross_entropy(cost, label)
                avg_loss = fluid.layers.mean(loss)

                if args.use_data_parallel:
                    avg_loss = mnist.scale_loss(avg_loss)
                    avg_loss.backward()
                    mnist.apply_collective_grads()
                else:
                    avg_loss.backward()

                adam.minimize(avg_loss)
                # save checkpoint
                mnist.clear_gradients()
                if batch_id % 100 == 0:
                    print("Loss at epoch {} step {}: {:}".format(
                        epoch, batch_id, avg_loss.numpy()))
        print("checkpoint saved")
Esempio n. 2
0
def train_mnist(args, model, tokens=None):
    epoch_num = args.epoch
    BATCH_SIZE = 64

    adam = AdamOptimizer(learning_rate=0.001,
                         parameter_list=model.parameters())

    train_reader = paddle.fluid.io.batch(paddle.dataset.mnist.train(),
                                         batch_size=BATCH_SIZE,
                                         drop_last=True)
    if args.use_data_parallel:
        train_reader = fluid.contrib.reader.distributed_batch_reader(
            train_reader)

    for epoch in range(epoch_num):
        for batch_id, data in enumerate(train_reader()):
            dy_x_data = np.array([x[0].reshape(1, 28, 28)
                                  for x in data]).astype('float32')
            y_data = np.array([x[1]
                               for x in data]).astype('int64').reshape(-1, 1)

            img = to_variable(dy_x_data)
            label = to_variable(y_data)
            label.stop_gradient = True

            cost, acc = model.forward(img, label, tokens=tokens)

            loss = fluid.layers.cross_entropy(cost, label)
            avg_loss = fluid.layers.mean(loss)

            if args.use_data_parallel:
                avg_loss = model.scale_loss(avg_loss)
                avg_loss.backward()
                model.apply_collective_grads()
            else:
                avg_loss.backward()

            adam.minimize(avg_loss)
            # save checkpoint
            model.clear_gradients()
            if batch_id % 1 == 0:
                print("Loss at epoch {} step {}: {:}".format(
                    epoch, batch_id, avg_loss.numpy()))

        model.eval()
        test_acc = test_mnist(model, tokens=tokens)
        model.train()
        print("Loss at epoch {} , acc is: {}".format(epoch, test_acc))

    save_parameters = (not args.use_data_parallel) or (
        args.use_data_parallel
        and fluid.dygraph.parallel.Env().local_rank == 0)
    if save_parameters:
        fluid.save_dygraph(model.state_dict(), "save_temp")
        print("checkpoint saved")
Esempio n. 3
0
def finetune(args):
    ernie = hub.Module(name="ernie", max_seq_len=args.max_seq_len)
    with fluid.dygraph.guard():
        dataset = hub.dataset.ChnSentiCorp()
        tc = TransformerClassifier(num_classes=dataset.num_labels,
                                   transformer=ernie)
        adam = AdamOptimizer(learning_rate=1e-5,
                             parameter_list=tc.parameters())
        state_dict_path = os.path.join(args.checkpoint_dir,
                                       'dygraph_state_dict')
        if os.path.exists(state_dict_path + '.pdparams'):
            state_dict, _ = fluid.load_dygraph(state_dict_path)
            tc.load_dict(state_dict)

        reader = hub.reader.ClassifyReader(
            dataset=dataset,
            vocab_path=ernie.get_vocab_path(),
            max_seq_len=args.max_seq_len,
            sp_model_path=ernie.get_spm_path(),
            word_dict_path=ernie.get_word_dict_path())
        train_reader = reader.data_generator(batch_size=args.batch_size,
                                             phase='train')

        loss_sum = acc_sum = cnt = 0
        # 执行epoch_num次训练
        for epoch in range(args.num_epoch):
            # 读取训练数据进行训练
            for batch_id, data in enumerate(train_reader()):
                input_ids = np.array(data[0][0]).astype(np.int64)
                position_ids = np.array(data[0][1]).astype(np.int64)
                segment_ids = np.array(data[0][2]).astype(np.int64)
                input_mask = np.array(data[0][3]).astype(np.float32)
                labels = np.array(data[0][4]).astype(np.int64)
                pred = tc(input_ids, position_ids, segment_ids, input_mask)

                acc = fluid.layers.accuracy(pred, to_variable(labels))
                loss = fluid.layers.cross_entropy(pred, to_variable(labels))
                avg_loss = fluid.layers.mean(loss)
                avg_loss.backward()
                # 参数更新
                adam.minimize(avg_loss)

                loss_sum += avg_loss.numpy() * labels.shape[0]
                acc_sum += acc.numpy() * labels.shape[0]
                cnt += labels.shape[0]
                if batch_id % args.log_interval == 0:
                    print('epoch {}: loss {}, acc {}'.format(
                        epoch, loss_sum / cnt, acc_sum / cnt))
                    loss_sum = acc_sum = cnt = 0

                if batch_id % args.save_interval == 0:
                    state_dict = tc.state_dict()
                    fluid.save_dygraph(state_dict, state_dict_path)
Esempio n. 4
0
    def test_train(self):

        main_prog = fluid.Program()
        with fluid.program_guard(main_prog):
            mnist = MNIST()
            adam = AdamOptimizer(learning_rate=0.001,
                                 parameter_list=mnist.parameters())

            exe = fluid.Executor(self.place)
            start = time()

            img = fluid.data(name='img',
                             shape=[None, 1, 28, 28],
                             dtype='float32')
            label = fluid.data(name='label', shape=[None, 1], dtype='int64')
            label.stop_gradient = True

            prediction, acc, avg_loss = mnist(img, label)
            adam.minimize(avg_loss)
        exe.run(fluid.default_startup_program())

        for epoch in range(self.epoch_num):
            for batch_id, data in enumerate(self.train_reader()):
                dy_x_data = np.array([x[0].reshape(1, 28, 28)
                                      for x in data]).astype('float32')
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape(-1, 1)

                out = exe.run(main_prog,
                              fetch_list=[avg_loss, acc],
                              feed={
                                  'img': dy_x_data,
                                  'label': y_data
                              })
                if batch_id % 100 == 0:
                    print(
                        "Loss at epoch {} step {}: loss: {:}, acc: {}, cost: {}"
                        .format(epoch, batch_id, np.array(out[0]),
                                np.array(out[1]),
                                time() - start))
                    if batch_id == 300:
                        # The accuracy of mnist should converge over 0.9 after 300 batch.
                        accuracy = np.array(out[1])
                        self.assertGreater(
                            accuracy,
                            0.9,
                            msg=
                            "The accuracy {} of mnist should converge over 0.9 after 300 batch."
                            .format(accuracy))
                        break
def finetune(args):
    with fluid.dygraph.guard():
        resnet50_vd_10w = hub.Module(name="resnet50_vd_10w")
        dataset = hub.dataset.Flowers()
        resnet = ResNet50(num_classes=dataset.num_labels,
                          backbone=resnet50_vd_10w)
        adam = AdamOptimizer(learning_rate=0.001,
                             parameter_list=resnet.parameters())
        state_dict_path = os.path.join(args.checkpoint_dir,
                                       'dygraph_state_dict')
        if os.path.exists(state_dict_path + '.pdparams'):
            state_dict, _ = fluid.load_dygraph(state_dict_path)
            resnet.load_dict(state_dict)

        reader = hub.reader.ImageClassificationReader(
            image_width=resnet50_vd_10w.get_expected_image_width(),
            image_height=resnet50_vd_10w.get_expected_image_height(),
            images_mean=resnet50_vd_10w.get_pretrained_images_mean(),
            images_std=resnet50_vd_10w.get_pretrained_images_std(),
            dataset=dataset)
        train_reader = reader.data_generator(batch_size=args.batch_size,
                                             phase='train')

        loss_sum = acc_sum = cnt = 0
        # 执行epoch_num次训练
        for epoch in range(args.num_epoch):
            # 读取训练数据进行训练
            for batch_id, data in enumerate(train_reader()):
                imgs = np.array(data[0][0])
                labels = np.array(data[0][1])

                pred = resnet(imgs)
                acc = fluid.layers.accuracy(pred, to_variable(labels))
                loss = fluid.layers.cross_entropy(pred, to_variable(labels))
                avg_loss = fluid.layers.mean(loss)
                avg_loss.backward()
                # 参数更新
                adam.minimize(avg_loss)

                loss_sum += avg_loss.numpy() * imgs.shape[0]
                acc_sum += acc.numpy() * imgs.shape[0]
                cnt += imgs.shape[0]
                if batch_id % args.log_interval == 0:
                    print('epoch {}: loss {}, acc {}'.format(
                        epoch, loss_sum / cnt, acc_sum / cnt))
                    loss_sum = acc_sum = cnt = 0

                if batch_id % args.save_interval == 0:
                    state_dict = resnet.state_dict()
                    fluid.save_dygraph(state_dict, state_dict_path)
Esempio n. 6
0
    def train(self, to_static=False):
        prog_trans = ProgramTranslator()
        prog_trans.enable(to_static)

        loss_data = []
        with fluid.dygraph.guard(self.place):
            fluid.default_main_program().random_seed = SEED
            fluid.default_startup_program().random_seed = SEED
            mnist = MNIST()
            adam = AdamOptimizer(learning_rate=0.001,
                                 parameter_list=mnist.parameters())

            for epoch in range(self.epoch_num):
                start = time()
                for batch_id, data in enumerate(self.train_reader()):
                    dy_x_data = np.array([
                        x[0].reshape(1, 28, 28) for x in data
                    ]).astype('float32')
                    y_data = np.array([x[1] for x in data
                                       ]).astype('int64').reshape(-1, 1)

                    img = to_variable(dy_x_data)
                    label = to_variable(y_data)

                    label.stop_gradient = True
                    prediction, acc, avg_loss = mnist(img, label=label)
                    avg_loss.backward()

                    adam.minimize(avg_loss)
                    loss_data.append(avg_loss.numpy()[0])
                    # save checkpoint
                    mnist.clear_gradients()
                    if batch_id % 10 == 0:
                        print(
                            "Loss at epoch {} step {}: loss: {:}, acc: {}, cost: {}"
                            .format(epoch, batch_id, avg_loss.numpy(),
                                    acc.numpy(),
                                    time() - start))
                        start = time()
                    if batch_id == 50:
                        mnist.eval()
                        prediction, acc, avg_loss = mnist(img, label)
                        loss_data.append(avg_loss.numpy()[0])
                        # new save load check
                        self.check_jit_save_load(mnist, [dy_x_data], [img],
                                                 to_static, prediction)
                        break
        return loss_data
Esempio n. 7
0
def main(args):
    place = set_device(args.device)
    fluid.enable_dygraph(place) if args.dynamic else None

    inputs = [
        Input([None, args.max_seq_len], 'int64', name='words'),
        Input([None, args.max_seq_len], 'int64', name='target'),
        Input([None], 'int64', name='length')
    ]
    labels = [Input([None, args.max_seq_len], 'int64', name='labels')]

    feed_list = None if args.dynamic else [
        x.forward() for x in inputs + labels
    ]
    dataset = LacDataset(args)
    train_path = os.path.join(args.data, "train.tsv")
    test_path = os.path.join(args.data, "test.tsv")

    train_generator = create_lexnet_data_generator(args,
                                                   reader=dataset,
                                                   file_name=train_path,
                                                   place=place,
                                                   mode="train")
    test_generator = create_lexnet_data_generator(args,
                                                  reader=dataset,
                                                  file_name=test_path,
                                                  place=place,
                                                  mode="test")

    train_dataset = create_dataloader(train_generator,
                                      place,
                                      feed_list=feed_list)
    test_dataset = create_dataloader(test_generator,
                                     place,
                                     feed_list=feed_list)

    vocab_size = dataset.vocab_size
    num_labels = dataset.num_labels
    model = SeqTagging(args, vocab_size, num_labels)

    optim = AdamOptimizer(learning_rate=args.base_learning_rate,
                          parameter_list=model.parameters())

    model.prepare(optim,
                  LacLoss(),
                  ChunkEval(num_labels),
                  inputs=inputs,
                  labels=labels,
                  device=args.device)

    if args.resume is not None:
        model.load(args.resume)

    model.fit(train_dataset,
              test_dataset,
              epochs=args.epoch,
              batch_size=args.batch_size,
              eval_freq=args.eval_freq,
              save_freq=args.save_freq,
              save_dir=args.save_dir)
Esempio n. 8
0
def main():
    # Step 0: preparation
    #place = paddle.fluid.CUDAPlace(0)
    with fluid.dygraph.guard():
        # Step 1: Define training dataloader
        image_folder="work/dummy_data"
        image_list_file="work/dummy_data/list.txt"
        transform = TrainAugmentation(224)
        data = BasicDataLoader(image_folder,image_list_file,transform=transform)
        #TODO: create dataloader
        train_dataloader = fluid.io.DataLoader.from_generator(capacity=2,return_list=True)
        train_dataloader.set_sample_generator(data,args.batch_size)
        total_batch = len(data)//args.batch_size
        # Step 2: Create model
        if args.net == "basic":
            #TODO: create basicmodel
            model = PSPNet()
        else:
            raise NotImplementedError(f"args.net: {args.net} is not Supported!")

        # Step 3: Define criterion and optimizer
        criterion = Basic_SegLoss

        # create optimizer
        optimizer = AdamOptimizer(learning_rate=args.lr,parameter_list=model.parameters())
        # Step 4: Training
        for epoch in range(1, args.num_epochs+1):
            train_loss = train(train_dataloader,
                               model,
                               criterion,
                               optimizer,
                               epoch,
                               total_batch)
            print(f"----- Epoch[{epoch}/{args.num_epochs}] Train Loss: {train_loss:.4f}")

            if epoch % args.save_freq == 0 or epoch == args.num_epochs:
                model_path = os.path.join(args.checkpoint_folder, f"{args.net}-Epoch-{epoch}")

                # TODO: save model and optmizer states
                model_dict = model.state_dict()
                fluid.save_dygraph(model_dict,model_path)
                optim_dict = optimizer.state_dict()
                fluid.save_dygraph(optim_dict,model_path)

                print(f'----- Save model: {model_path}.pdparams')
                print(f'----- Save optimizer: {model_path}.pdopt')
Esempio n. 9
0
def main():
    # Step 0: preparation
    place = paddle.fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        # Step 1: Define training dataloader
        basic_augmentation = TrainAugmentation(image_size=256)
        basic_dataloader = BasicDataLoader(image_folder=args.image_folder,
                                            image_list_file=args.image_list_file,
                                            transform=basic_augmentation,
                                            shuffle=True)
        train_dataloader = fluid.io.DataLoader.from_generator(capacity=10, use_multiprocess=True)
        train_dataloader.set_sample_generator(basic_dataloader, batch_size=args.batch_size, places=place)
        total_batch = int(len(basic_dataloader) / args.batch_size)

        
        # Step 2: Create model
        if args.net == "basic":
            model = BasicModel()
        else:
            raise NotImplementedError(f"args.net: {args.net} is not Supported!")

        # Step 3: Define criterion and optimizer
        criterion = Basic_SegLoss
        optimizer = AdamOptimizer(learning_rate=args.lr, parameter_list=model.parameters())
        # create optimizer
        
        # Step 4: Training
        for epoch in range(1, args.num_epochs+1):
            train_loss = train(train_dataloader,
                               model,
                               criterion,
                               optimizer,
                               epoch,
                               total_batch)
            print(f"----- Epoch[{epoch}/{args.num_epochs}] Train Loss: {train_loss:.4f}")

            if epoch % args.save_freq == 0 or epoch == args.num_epochs:
                model_path = os.path.join(args.checkpoint_folder, f"{args.net}-Epoch-{epoch}-Loss-{train_loss}")

                # TODO: save model and optmizer states
                model_dict = model.state_dict()
                fluid.save_dygraph(model_dict, model_path)
                optimizer_dict = optimizer.state_dict()
                fluid.save_dygraph(optimizer_dict, model_path)
                print(f'----- Save model: {model_path}.pdparams')
                print(f'----- Save optimizer: {model_path}.pdopt')
Esempio n. 10
0
def train_mnist(args):
    epoch_num = args.epoch
    BATCH_SIZE = 64
    seed = 33
    np.random.seed(seed)
    start_prog = fluid.Program()
    main_prog = fluid.Program()
    start_prog.random_seed = seed
    main_prog.random_seed = seed
    with fluid.program_guard(main_prog, start_prog):
        exe = fluid.Executor(fluid.CPUPlace())
        mnist = MNIST("mnist")
        adam = AdamOptimizer(learning_rate=0.001)
        train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                    batch_size=BATCH_SIZE,
                                    drop_last=True)
        img = fluid.layers.data(name='pixel',
                                shape=[1, 28, 28],
                                dtype='float32')
        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
        cost = mnist(img)
        loss = fluid.layers.cross_entropy(cost, label)
        avg_loss = fluid.layers.mean(loss)
        adam.minimize(avg_loss)
        out = exe.run(fluid.default_startup_program())
        for epoch in range(epoch_num):
            for batch_id, data in enumerate(train_reader()):
                static_x_data = np.array(
                    [x[0].reshape(1, 28, 28) for x in data]).astype('float32')
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape([BATCH_SIZE, 1])

                fetch_list = [avg_loss.name]
                out = exe.run(fluid.default_main_program(),
                              feed={
                                  "pixel": static_x_data,
                                  "label": y_data
                              },
                              fetch_list=fetch_list)

                static_out = out[0]

                if batch_id % 100 == 0:
                    print("epoch: {}, batch_id: {}, loss: {}".format(
                        epoch, batch_id, static_out))
Esempio n. 11
0
    def test_save_load_same_result(self):
        program_translator = ProgramTranslator()
        x_data = np.random.randn(30, 10, 32).astype('float32')
        batch_num = 3

        with fluid.dygraph.guard(place):

            program_translator.enable(True)
            x = fluid.dygraph.to_variable(x_data)
            net = Linear(32, 64)
            adam = AdamOptimizer(learning_rate=0.1,
                                 parameter_list=net.parameters())

            for i in range(batch_num):
                static_out, static_loss = net(x)
                # Update parameters
                static_loss.backward()
                adam.minimize(static_loss)
                net.clear_gradients()
            # Save parameters

            fluid.save_dygraph(net.state_dict(), self.model_path)
            # minimize() will update parameter, call net() to get output and avg_loss.
            # Switch into eval mode.
            net.eval()
            static_out, static_loss = net(x)

        # load parameters into dygraph
        with fluid.dygraph.guard(place):
            dygraph_net = Linear(32, 64)

            # Load parameters
            model_dict, _ = fluid.load_dygraph(self.model_path)
            dygraph_net.set_dict(model_dict)
            # Switch into eval mode.
            dygraph_net.eval()

            x = fluid.dygraph.to_variable(x_data)
            # predict output
            program_translator.enable(False)
            dygraph_out, dygraph_loss = dygraph_net(x)

        self.assertTrue(np.allclose(dygraph_out.numpy(), static_out.numpy()))
        self.assertTrue(np.allclose(dygraph_loss.numpy(), static_loss.numpy()))
Esempio n. 12
0
def main(args):
    place = set_device(args.device)
    fluid.enable_dygraph(place) if args.dynamic else None

    inputs = [
        Input(
            [None, None], 'int64', name='words'), Input(
                [None], 'int64', name='length'), Input(
                    [None, None], 'int64', name='target')
    ]

    labels = [Input([None, None], 'int64', name='labels')]

    feed_list = None if args.dynamic else [
        x.forward() for x in inputs + labels
    ]

    dataset = LacDataset(args)
    train_dataset = LacDataLoader(args, place, phase="train")

    vocab_size = dataset.vocab_size
    num_labels = dataset.num_labels
    model = SeqTagging(args, vocab_size, num_labels, mode="train")

    optim = AdamOptimizer(
        learning_rate=args.base_learning_rate,
        parameter_list=model.parameters())

    model.prepare(
        optim,
        LacLoss(),
        ChunkEval(num_labels),
        inputs=inputs,
        labels=labels,
        device=args.device)

    if args.init_from_checkpoint:
        model.load(args.init_from_checkpoint)

    if args.init_from_pretrain_model:
        model.load(args.init_from_pretrain_model, reset_optimizer=True)

    model.fit(train_dataset.dataloader,
              epochs=args.epoch,
              batch_size=args.batch_size,
              eval_freq=args.eval_freq,
              save_freq=args.save_freq,
              save_dir=args.save_dir)
Esempio n. 13
0
    def train(self, to_static=False):
        paddle.seed(SEED)
        mnist = MNIST()

        if to_static:
            print("Successfully to apply @to_static.")
            mnist = paddle.jit.to_static(mnist)

        adam = AdamOptimizer(learning_rate=0.001,
                             parameter_list=mnist.parameters())

        scaler = paddle.amp.GradScaler(init_loss_scaling=1024)

        loss_data = []
        for epoch in range(self.epoch_num):
            start = time()
            for batch_id, data in enumerate(self.train_reader()):
                dy_x_data = np.array([x[0].reshape(1, 28, 28)
                                      for x in data]).astype('float32')
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape(-1, 1)

                img = paddle.to_tensor(dy_x_data)
                label = paddle.to_tensor(y_data)
                label.stop_gradient = True

                with paddle.amp.auto_cast():
                    prediction, acc, avg_loss = mnist(img, label=label)

                scaled = scaler.scale(avg_loss)
                scaled.backward()
                scaler.minimize(adam, scaled)

                loss_data.append(avg_loss.numpy()[0])
                # save checkpoint
                mnist.clear_gradients()
                if batch_id % 10 == 0:
                    print(
                        "Loss at epoch {} step {}: loss: {:}, acc: {}, cost: {}"
                        .format(epoch, batch_id, avg_loss.numpy(), acc.numpy(),
                                time() - start))
                    start = time()
                if batch_id == 50:
                    break
        return loss_data
Esempio n. 14
0
    def func_out_scale_acc(self):
        seed = 1000
        lr = 0.001

        weight_quantize_type = 'abs_max'
        activation_quantize_type = 'moving_average_abs_max'
        imperative_out_scale = ImperativeQuantAware(
            weight_quantize_type=weight_quantize_type,
            activation_quantize_type=activation_quantize_type)

        with fluid.dygraph.guard():
            np.random.seed(seed)
            fluid.default_main_program().random_seed = seed
            fluid.default_startup_program().random_seed = seed

            lenet = ImperativeLenet()
            lenet = fix_model_dict(lenet)
            imperative_out_scale.quantize(lenet)

            reader = paddle.batch(paddle.dataset.mnist.test(),
                                  batch_size=32,
                                  drop_last=True)
            adam = AdamOptimizer(learning_rate=lr,
                                 parameter_list=lenet.parameters())
            loss_list = train_lenet(lenet, reader, adam)
            lenet.eval()

        param_save_path = "test_save_quantized_model/lenet.pdparams"
        save_dict = lenet.state_dict()
        paddle.save(save_dict, param_save_path)

        save_path = "./dynamic_outscale_infer_model/lenet"
        imperative_out_scale.save_quantized_model(
            layer=lenet,
            path=save_path,
            input_spec=[
                paddle.static.InputSpec(shape=[None, 1, 28, 28],
                                        dtype='float32')
            ])

        for i in range(len(loss_list) - 1):
            self.assertTrue(loss_list[i] > loss_list[i + 1],
                            msg='Failed to do the imperative qat.')
Esempio n. 15
0
def main():
    #Place = paddle.fluid.CPUPlace()
    Place = paddle.fluid.CUDAPlace(0)
    with fluid.dygraph.guard(Place):
        transform = Transform(256)
        dataload = Dataloader(args.image_folder, args.image_list_file,
                              transform, True)
        train_load = fluid.io.DataLoader.from_generator(capacity=1,
                                                        use_multiprocess=False)
        train_load.set_sample_generator(dataload,
                                        batch_size=args.batch_size,
                                        places=Place)
        total_batch = int(len(dataload) / args.batch_size)

        if args.net == 'deeplab':
            model = DeepLab(59)
        else:
            print("Other model haven't finished....")

        costFunc = SegLoss
        adam = AdamOptimizer(learning_rate=args.lr,
                             parameter_list=model.parameters())

        for epoch in range(1, args.num_epochs + 1):
            train_loss = train(train_load, model, costFunc, adam, epoch,
                               total_batch)
            print(
                f"----- Epoch[{epoch}/{args.num_epochs}] Train Loss: {train_loss}"
            )

            if epoch % args.save_freq == 0 or epoch == args.num_epochs:
                model_path = os.path.join(
                    args.checkpoint_folder,
                    f"{args.net}-Epoch-{epoch}-Loss-{train_loss}")

                model_dict = model.state_dict()
                fluid.save_dygraph(model_dict, model_path)
                optimizer_dict = optimizer.state_dict()
                fluid.save_dygraph(optimizer_dict, model_path)
                print(f'----- Save model: {model_path}.pdparams')
                print(f'----- Save optimizer: {model_path}.pdopt')
    def test_save_quantized_model(self):
        lr = 0.001

        load_param_path = "test_save_quantized_model/lenet.pdparams"
        save_path = "./dynamic_outscale_infer_model_from_checkpoint/lenet"

        weight_quantize_type = 'abs_max'
        activation_quantize_type = 'moving_average_abs_max'
        imperative_out_scale = ImperativeQuantAware(
            weight_quantize_type=weight_quantize_type,
            activation_quantize_type=activation_quantize_type)

        with fluid.dygraph.guard():
            lenet = ImperativeLenet()
            load_dict = paddle.load(load_param_path)
            imperative_out_scale.quantize(lenet)
            lenet.set_dict(load_dict)

            reader = paddle.batch(
                paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
            adam = AdamOptimizer(
                learning_rate=lr, parameter_list=lenet.parameters())
            loss_list = train_lenet(lenet, reader, adam)
            lenet.eval()

        imperative_out_scale.save_quantized_model(
            layer=lenet,
            path=save_path,
            input_spec=[
                paddle.static.InputSpec(
                    shape=[None, 1, 28, 28], dtype='float32')
            ])

        for i in range(len(loss_list) - 1):
            self.assertTrue(
                loss_list[i] > loss_list[i + 1],
                msg='Failed to do the imperative qat.')
Esempio n. 17
0
def train_mnist(args):
    epoch_num = 5
    BATCH_SIZE = 256

    place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
        if args.use_data_parallel else fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()
        mnist = MNIST("mnist")
        adam = AdamOptimizer(learning_rate=0.001)
        if args.use_data_parallel:
            mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy)

        if args.use_data_parallel:
            train_reader = fluid.contrib.reader.distributed_sampler(
                paddle.dataset.mnist.train(), batch_size=BATCH_SIZE)
        else:
            train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                        batch_size=BATCH_SIZE,
                                        drop_last=True)

        test_reader = paddle.batch(paddle.dataset.mnist.test(),
                                   batch_size=BATCH_SIZE,
                                   drop_last=True)

        for epoch in range(epoch_num):
            # define eval
            batch_time = AverageMeter('Time', ':6.3f')
            data_time = AverageMeter('Data', ':6.3f')
            losses = AverageMeter('Loss', ':.4e')
            progress = ProgressMeter(len(list(train_reader())) - 1,
                                     batch_time,
                                     data_time,
                                     losses,
                                     prefix="epoch: [{}]".format(epoch))
            end = Tools.time()

            for batch_id, data in enumerate(train_reader()):
                data_time.update(Tools.time() - end)
                dy_x_data = np.array([x[0].reshape(1, 28, 28)
                                      for x in data]).astype('float32')
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape(-1, 1)

                img = to_variable(dy_x_data)
                label = to_variable(y_data)
                label.stop_gradient = True

                cost, acc = mnist(img, label)

                loss = fluid.layers.cross_entropy(cost, label)
                avg_loss = fluid.layers.mean(loss)

                if args.use_data_parallel:
                    avg_loss = mnist.scale_loss(avg_loss)
                    avg_loss.backward()
                    mnist.apply_collective_grads()
                else:
                    avg_loss.backward()

                adam.minimize(avg_loss)
                # save checkpoint
                mnist.clear_gradients()
                batch_time.update(Tools.time() - end)
                dy_out = avg_loss.numpy()[0]
                losses.update(dy_out, BATCH_SIZE)
                if batch_id % 10 == 0:
                    progress.print(batch_id)
                end = Tools.time()

            mnist.eval()
            test_cost, test_acc = test_mnist(test_reader, mnist, BATCH_SIZE)
            mnist.train()
            print("Loss at epoch {} , Test avg_loss is: {}, acc is: {}".format(
                epoch, test_cost, test_acc))

        fluid.dygraph.save_persistables(mnist.state_dict(), "save_dir")
        print("checkpoint saved")

        inference_mnist()
Esempio n. 18
0
    def func_out_scale_acc(self):
        paddle.disable_static()
        seed = 1000
        lr = 0.1

        qat = ImperativeQuantAware()

        np.random.seed(seed)
        reader = paddle.batch(paddle.dataset.mnist.test(),
                              batch_size=512,
                              drop_last=True)

        lenet = ImperativeLenetWithSkipQuant()
        lenet = fix_model_dict(lenet)
        qat.quantize(lenet)

        adam = AdamOptimizer(learning_rate=lr,
                             parameter_list=lenet.parameters())
        dynamic_loss_rec = []
        lenet.train()
        loss_list = train_lenet(lenet, reader, adam)

        lenet.eval()

        path = "./save_dynamic_quant_infer_model/lenet"
        save_dir = "./save_dynamic_quant_infer_model"

        qat.save_quantized_model(layer=lenet,
                                 path=path,
                                 input_spec=[
                                     paddle.static.InputSpec(
                                         shape=[None, 1, 28, 28],
                                         dtype='float32')
                                 ])

        paddle.enable_static()

        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = fluid.Executor(place)

        [inference_program, feed_target_names,
         fetch_targets] = (fluid.io.load_inference_model(
             dirname=save_dir,
             executor=exe,
             model_filename="lenet" + INFER_MODEL_SUFFIX,
             params_filename="lenet" + INFER_PARAMS_SUFFIX))
        model_ops = inference_program.global_block().ops

        conv2d_count, matmul_count = 0, 0
        conv2d_skip_count, matmul_skip_count = 0, 0
        find_conv2d = False
        find_matmul = False
        for i, op in enumerate(model_ops):
            if op.type == 'conv2d':
                find_conv2d = True
                if op.has_attr("skip_quant"):
                    conv2d_skip_count += 1
                if conv2d_count > 0:
                    self.assertTrue(
                        'fake_quantize_dequantize' in model_ops[i - 1].type)
                else:
                    self.assertTrue(
                        'fake_quantize_dequantize' not in model_ops[i -
                                                                    1].type)
                conv2d_count += 1

            if op.type == 'matmul':
                find_matmul = True
                if op.has_attr("skip_quant"):
                    matmul_skip_count += 1
                if matmul_count > 0:
                    self.assertTrue(
                        'fake_quantize_dequantize' in model_ops[i - 1].type)
                else:
                    self.assertTrue(
                        'fake_quantize_dequantize' not in model_ops[i -
                                                                    1].type)
                matmul_count += 1

        if find_conv2d:
            self.assertTrue(conv2d_skip_count == 1)
        if find_matmul:
            self.assertTrue(matmul_skip_count == 1)
Esempio n. 19
0
    def func_qat(self):
        self.set_vars()

        imperative_qat = ImperativeQuantAware(
            weight_quantize_type=self.weight_quantize_type,
            activation_quantize_type=self.activation_quantize_type,
            fuse_conv_bn=self.fuse_conv_bn)

        with fluid.dygraph.guard():
            # For CI coverage
            conv1 = Conv2D(
                in_channels=3,
                out_channels=2,
                kernel_size=3,
                stride=1,
                padding=1,
                padding_mode='replicate')
            quant_conv1 = QuantizedConv2D(conv1)
            data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32')
            quant_conv1(fluid.dygraph.to_variable(data))

            conv_transpose = Conv2DTranspose(4, 6, (3, 3))
            quant_conv_transpose = QuantizedConv2DTranspose(conv_transpose)
            x_var = paddle.uniform(
                (2, 4, 8, 8), dtype='float32', min=-1.0, max=1.0)
            quant_conv_transpose(x_var)

            seed = 1
            np.random.seed(seed)
            fluid.default_main_program().random_seed = seed
            fluid.default_startup_program().random_seed = seed

            lenet = ImperativeLenet()
            lenet = fix_model_dict(lenet)
            imperative_qat.quantize(lenet)
            adam = AdamOptimizer(
                learning_rate=0.001, parameter_list=lenet.parameters())

            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=32, drop_last=True)
            test_reader = paddle.batch(
                paddle.dataset.mnist.test(), batch_size=32)

            epoch_num = 1
            for epoch in range(epoch_num):
                lenet.train()
                for batch_id, data in enumerate(train_reader()):
                    x_data = np.array([x[0].reshape(1, 28, 28)
                                       for x in data]).astype('float32')
                    y_data = np.array(
                        [x[1] for x in data]).astype('int64').reshape(-1, 1)

                    img = fluid.dygraph.to_variable(x_data)
                    label = fluid.dygraph.to_variable(y_data)
                    out = lenet(img)
                    acc = fluid.layers.accuracy(out, label)
                    loss = fluid.layers.cross_entropy(out, label)
                    avg_loss = fluid.layers.mean(loss)
                    avg_loss.backward()
                    adam.minimize(avg_loss)
                    lenet.clear_gradients()
                    if batch_id % 100 == 0:
                        _logger.info(
                            "Train | At epoch {} step {}: loss = {:}, acc= {:}".
                            format(epoch, batch_id,
                                   avg_loss.numpy(), acc.numpy()))
                    if batch_id == 500:  # For shortening CI time
                        break

                lenet.eval()
                eval_acc_top1_list = []
                for batch_id, data in enumerate(test_reader()):
                    x_data = np.array([x[0].reshape(1, 28, 28)
                                       for x in data]).astype('float32')
                    y_data = np.array(
                        [x[1] for x in data]).astype('int64').reshape(-1, 1)

                    img = fluid.dygraph.to_variable(x_data)
                    label = fluid.dygraph.to_variable(y_data)

                    out = lenet(img)
                    acc_top1 = fluid.layers.accuracy(
                        input=out, label=label, k=1)
                    acc_top5 = fluid.layers.accuracy(
                        input=out, label=label, k=5)

                    if batch_id % 100 == 0:
                        eval_acc_top1_list.append(float(acc_top1.numpy()))
                        _logger.info(
                            "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}".
                            format(epoch, batch_id,
                                   acc_top1.numpy(), acc_top5.numpy()))

                # check eval acc
                eval_acc_top1 = sum(eval_acc_top1_list) / len(
                    eval_acc_top1_list)
                print('eval_acc_top1', eval_acc_top1)
                self.assertTrue(
                    eval_acc_top1 > 0.9,
                    msg="The test acc {%f} is less than 0.9." % eval_acc_top1)

            # test the correctness of `paddle.jit.save`
            data = next(test_reader())
            test_data = np.array([x[0].reshape(1, 28, 28)
                                  for x in data]).astype('float32')
            y_data = np.array(
                [x[1] for x in data]).astype('int64').reshape(-1, 1)
            test_img = fluid.dygraph.to_variable(test_data)
            label = fluid.dygraph.to_variable(y_data)
            lenet.eval()
            fp32_out = lenet(test_img)
            fp32_acc = fluid.layers.accuracy(fp32_out, label).numpy()

        with tempfile.TemporaryDirectory(prefix="qat_save_path_") as tmpdir:
            # save inference quantized model
            imperative_qat.save_quantized_model(
                layer=lenet,
                path=os.path.join(tmpdir, "lenet"),
                input_spec=[
                    paddle.static.InputSpec(
                        shape=[None, 1, 28, 28], dtype='float32')
                ],
                onnx_format=self.onnx_format)
            print('Quantized model saved in %s' % tmpdir)

            if core.is_compiled_with_cuda():
                place = core.CUDAPlace(0)
            else:
                place = core.CPUPlace()
            exe = fluid.Executor(place)
            [inference_program, feed_target_names,
             fetch_targets] = fluid.io.load_inference_model(
                 dirname=tmpdir,
                 executor=exe,
                 model_filename="lenet" + INFER_MODEL_SUFFIX,
                 params_filename="lenet" + INFER_PARAMS_SUFFIX)
            quant_out, = exe.run(inference_program,
                                 feed={feed_target_names[0]: test_data},
                                 fetch_list=fetch_targets)
            paddle.disable_static()
            quant_out = fluid.dygraph.to_variable(quant_out)
            quant_acc = fluid.layers.accuracy(quant_out, label).numpy()
            paddle.enable_static()
            delta_value = fp32_acc - quant_acc
            self.assertLess(delta_value, self.diff_threshold)
def finetune(args):
    module = hub.Module(name="ernie", max_seq_len=args.max_seq_len)
    # Use the appropriate tokenizer to preprocess the data set
    # For ernie_tiny, it will do word segmentation to get subword. More details: https://www.jiqizhixin.com/articles/2019-11-06-9
    if module.name == "ernie_tiny":
        tokenizer = hub.ErnieTinyTokenizer(
            vocab_file=module.get_vocab_path(),
            spm_path=module.get_spm_path(),
            word_dict_path=module.get_word_dict_path(),
        )
    else:
        tokenizer = hub.BertTokenizer(vocab_file=module.get_vocab_path())
    dataset = hub.dataset.ChnSentiCorp(tokenizer=tokenizer,
                                       max_seq_len=args.max_seq_len)

    with fluid.dygraph.guard():
        tc = TransformerClassifier(num_classes=dataset.num_labels,
                                   transformer=module)
        adam = AdamOptimizer(learning_rate=1e-5,
                             parameter_list=tc.parameters())
        state_dict_path = os.path.join(args.checkpoint_dir,
                                       'dygraph_state_dict')
        if os.path.exists(state_dict_path + '.pdparams'):
            state_dict, _ = fluid.load_dygraph(state_dict_path)
            tc.load_dict(state_dict)

        loss_sum = acc_sum = cnt = 0
        for epoch in range(args.num_epoch):
            for batch_id, data in enumerate(
                    dataset.batch_records_generator(
                        phase="train",
                        batch_size=args.batch_size,
                        shuffle=True,
                        pad_to_batch_max_seq_len=False)):
                batch_size = len(data["input_ids"])
                input_ids = np.array(data["input_ids"]).astype(
                    np.int64).reshape([batch_size, -1, 1])
                position_ids = np.array(data["position_ids"]).astype(
                    np.int64).reshape([batch_size, -1, 1])
                segment_ids = np.array(data["segment_ids"]).astype(
                    np.int64).reshape([batch_size, -1, 1])
                input_mask = np.array(data["input_mask"]).astype(
                    np.float32).reshape([batch_size, -1, 1])
                labels = np.array(data["label"]).astype(np.int64).reshape(
                    [batch_size, 1])
                pred = tc(input_ids, position_ids, segment_ids, input_mask)

                acc = fluid.layers.accuracy(pred, to_variable(labels))
                loss = fluid.layers.cross_entropy(pred, to_variable(labels))
                avg_loss = fluid.layers.mean(loss)
                avg_loss.backward()
                adam.minimize(avg_loss)

                loss_sum += avg_loss.numpy() * labels.shape[0]
                acc_sum += acc.numpy() * labels.shape[0]
                cnt += labels.shape[0]
                if batch_id % args.log_interval == 0:
                    print('epoch {}: loss {}, acc {}'.format(
                        epoch, loss_sum / cnt, acc_sum / cnt))
                    loss_sum = acc_sum = cnt = 0

                if batch_id % args.save_interval == 0:
                    state_dict = tc.state_dict()
                    fluid.save_dygraph(state_dict, state_dict_path)
Esempio n. 21
0
def train_mnist(args):
    epoch_num = args.epoch
    BATCH_SIZE = 32

    trainer_count = fluid.dygraph.parallel.Env().nranks
    place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
        if args.use_data_parallel else fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        if args.ce:
            print("ce mode")
            seed = 33
            np.random.seed(seed)
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed

        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()

        mnist = MNIST("mnist")
        adam = AdamOptimizer(learning_rate=0.001)

        if args.use_data_parallel:
            mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy)

        train_reader = paddle.batch(
            paddle.dataset.mnist.train(), batch_size=BATCH_SIZE, drop_last=True)
        if args.use_data_parallel:
            train_reader = fluid.contrib.reader.distributed_batch_reader(
                train_reader)

        test_reader = paddle.batch(
            paddle.dataset.mnist.test(), batch_size=BATCH_SIZE, drop_last=True)



        for epoch in range(epoch_num):

            total_loss = 0.0
            total_acc = 0.0
            total_sample = 0

            batch_time = AverageMeter('Time', ':6.3f')
            data_time = AverageMeter('Data', ':6.3f')
            losses = AverageMeter('Loss', ':.4e')
            progress = ProgressMeter(len(list(train_reader())) - 1, batch_time, data_time,
                                     losses, prefix="epoch: [{}]".format(epoch))
            end = Tools.time()
            for batch_id, data in enumerate(train_reader()):
                data_time.update(Tools.time() - end)
                dy_x_data = np.array([x[0].reshape(1, 28, 28)
                                      for x in data]).astype('float32')
                y_data = np.array(
                    [x[1] for x in data]).astype('int64').reshape(-1, 1)

                img = to_variable(dy_x_data)
                label = to_variable(y_data)
                label.stop_gradient = True

                cost, acc = mnist(img, label)

                loss = fluid.layers.cross_entropy(cost, label)
                avg_loss = fluid.layers.mean(loss)

                if args.use_data_parallel:
                    avg_loss = mnist.scale_loss(avg_loss)
                    avg_loss.backward()
                    mnist.apply_collective_grads()
                else:
                    avg_loss.backward()

                adam.minimize(avg_loss)
                # save checkpoint
                mnist.clear_gradients()
                batch_time.update(Tools.time() - end)

                total_loss += avg_loss.numpy()
                total_acc += acc.numpy()
                total_sample += 1

                dy_out = avg_loss.numpy()[0]
                losses.update(dy_out, BATCH_SIZE)
                if batch_id % 10 == 0:
                    progress.print(batch_id)
                    print("epoch %d | batch step %d, loss %0.3f acc %0.3f" % \
                          (epoch, batch_id, total_loss / total_sample, total_acc / total_sample))


                if batch_id % 100 == 0:
                    print("Loss at epoch {} step {}: {:}".format(
                        epoch, batch_id, avg_loss.numpy()))
                end = Tools.time()
            mnist.eval()
            test_cost, test_acc = test_mnist(test_reader, mnist, BATCH_SIZE)
            mnist.train()
            if args.ce:
                print("kpis\ttest_acc\t%s" % test_acc)
                print("kpis\ttest_cost\t%s" % test_cost)
            print("Loss at epoch {} , Test avg_loss is: {}, acc is: {}".format(
                epoch, test_cost, test_acc))
Esempio n. 22
0
    def test_qat_acc(self):
        def _build_static_lenet(main, startup, is_test=False, seed=1000):
            with fluid.unique_name.guard():
                with fluid.program_guard(main, startup):
                    main.random_seed = seed
                    startup.random_seed = seed
                    img = fluid.layers.data(
                        name='image', shape=[1, 28, 28], dtype='float32')
                    label = fluid.layers.data(
                        name='label', shape=[1], dtype='int64')
                    prediction = StaticLenet(img)
                    if not is_test:
                        loss = fluid.layers.cross_entropy(
                            input=prediction, label=label)
                        avg_loss = fluid.layers.mean(loss)
                    else:
                        avg_loss = prediction
            return img, label, avg_loss

        reader = paddle.batch(
            paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
        weight_quantize_type = 'abs_max'
        activation_quant_type = 'moving_average_abs_max'
        param_init_map = {}
        seed = 1000
        lr = 0.001

        # imperative train
        _logger.info(
            "--------------------------dynamic graph qat--------------------------"
        )
        imperative_qat = ImperativeQuantAware(
            weight_quantize_type=weight_quantize_type,
            activation_quantize_type=activation_quant_type,
            quantizable_layer_type=[
                'Conv2D', 'Linear', 'ReLU', 'LeakyReLU', 'ReLU6', 'Tanh',
                'Swish'
            ])

        with fluid.dygraph.guard():
            np.random.seed(seed)
            fluid.default_main_program().random_seed = seed
            fluid.default_startup_program().random_seed = seed
            lenet = ImperativeLenet()
            fixed_state = {}
            for name, param in lenet.named_parameters():
                p_shape = param.numpy().shape
                p_value = param.numpy()
                if name.endswith("bias"):
                    value = np.zeros_like(p_value).astype('float32')
                else:
                    value = np.random.normal(
                        loc=0.0, scale=0.01, size=np.product(p_shape)).reshape(
                            p_shape).astype('float32')
                fixed_state[name] = value
                param_init_map[param.name] = value
            lenet.set_dict(fixed_state)

            imperative_qat.quantize(lenet)
            adam = AdamOptimizer(
                learning_rate=lr, parameter_list=lenet.parameters())
            dynamic_loss_rec = []
            lenet.train()
            for batch_id, data in enumerate(reader()):
                x_data = np.array([x[0].reshape(1, 28, 28)
                                   for x in data]).astype('float32')
                y_data = np.array(
                    [x[1] for x in data]).astype('int64').reshape(-1, 1)

                img = fluid.dygraph.to_variable(x_data)
                label = fluid.dygraph.to_variable(y_data)

                out = lenet(img)
                loss = fluid.layers.cross_entropy(out, label)
                avg_loss = fluid.layers.mean(loss)
                avg_loss.backward()
                adam.minimize(avg_loss)
                lenet.clear_gradients()
                dynamic_loss_rec.append(avg_loss.numpy()[0])
                if batch_id % 100 == 0:
                    _logger.info('{}: {}'.format('loss', avg_loss.numpy()))
                if batch_id > 500:
                    break
            lenet.eval()
        paddle.jit.save(
            layer=lenet,
            path="./dynamic_mnist/model",
            input_spec=[
                paddle.static.InputSpec(
                    shape=[None, 1, 28, 28], dtype='float32')
            ])

        # static graph train
        _logger.info(
            "--------------------------static graph qat--------------------------"
        )
        static_loss_rec = []
        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = fluid.Executor(place)

        main = fluid.Program()
        infer = fluid.Program()
        startup = fluid.Program()
        static_img, static_label, static_loss = _build_static_lenet(
            main, startup, False, seed)
        infer_img, _, infer_pre = _build_static_lenet(infer, startup, True,
                                                      seed)
        with fluid.unique_name.guard():
            with fluid.program_guard(main, startup):
                opt = AdamOptimizer(learning_rate=lr)
                opt.minimize(static_loss)

        scope = core.Scope()
        with fluid.scope_guard(scope):
            exe.run(startup)
        for param in main.all_parameters():
            param_tensor = scope.var(param.name).get_tensor()
            param_tensor.set(param_init_map[param.name], place)

        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
        infer_graph = IrGraph(core.Graph(infer.desc), for_test=True)
        transform_pass = QuantizationTransformPass(
            scope=scope,
            place=place,
            activation_quantize_type=activation_quant_type,
            weight_quantize_type=weight_quantize_type,
            quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul'])
        add_quant_dequant_pass = AddQuantDequantPass(
            scope=scope,
            place=place,
            quantizable_op_type=[
                'relu', 'leaky_relu', 'relu6', 'tanh', 'swish'
            ])
        transform_pass.apply(main_graph)
        transform_pass.apply(infer_graph)
        add_quant_dequant_pass.apply(main_graph)
        add_quant_dequant_pass.apply(infer_graph)
        build_strategy = fluid.BuildStrategy()
        build_strategy.fuse_all_reduce_ops = False
        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
            loss_name=static_loss.name, build_strategy=build_strategy)

        feeder = fluid.DataFeeder(
            feed_list=[static_img, static_label], place=place)
        with fluid.scope_guard(scope):
            for batch_id, data in enumerate(reader()):
                loss_v, = exe.run(binary,
                                  feed=feeder.feed(data),
                                  fetch_list=[static_loss])
                static_loss_rec.append(loss_v[0])
                if batch_id % 100 == 0:
                    _logger.info('{}: {}'.format('loss', loss_v))

        save_program = infer_graph.to_program()
        with fluid.scope_guard(scope):
            fluid.io.save_inference_model("./static_mnist", [infer_img.name],
                                          [infer_pre], exe, save_program)
        rtol = 1e-08
        atol = 1e-10
        for i, (loss_d,
                loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)):
            diff = np.abs(loss_d - loss_s)
            if diff > (atol + rtol * np.abs(loss_s)):
                _logger.info(
                    "diff({}) at {}, dynamic loss = {}, static loss = {}".
                    format(diff, i, loss_d, loss_s))
                break

        self.assertTrue(
            np.allclose(
                np.array(dynamic_loss_rec),
                np.array(static_loss_rec),
                rtol=rtol,
                atol=atol,
                equal_nan=True),
            msg='Failed to do the imperative qat.')
Esempio n. 23
0
        layer.clear_gradients()

        if batch_id % 200 == 0:
            print("Loss at step {}: {:}".format(batch_id, avg_loss.numpy()))
    return avg_loss


'''
Part 3. Train & Save
'''
# enable dygraph mode
place = fluid.CUDAPlace(0) if USE_CUDA else fluid.CPUPlace()
fluid.enable_dygraph(place)
# create network
mnist = MNIST()
adam = AdamOptimizer(learning_rate=0.001, parameter_list=mnist.parameters())
# create train data loader
train_reader = paddle.batch(reader_decorator(paddle.dataset.mnist.train()),
                            batch_size=BATCH_SIZE,
                            drop_last=True)
train_loader = fluid.io.DataLoader.from_generator(capacity=5)
train_loader.set_sample_list_generator(train_reader, places=place)
# train
for epoch in range(EPOCH_NUM):
    train_one_epoch(mnist, train_loader)
# save
fluid.dygraph.jit.save(layer=mnist, model_path=MODEL_PATH)
'''
Part 4. Load & Inference
'''
# load model by jit.load & inference
Esempio n. 24
0
        if label is not None:
            acc = fluid.layers.accuracy(input=x, label=label)
            return x, acc
        else:
            return x


place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id)
with fluid.dygraph.guard(place):
    epoch_num = 5
    BATCH_SIZE = 64

    strategy = fluid.dygraph.parallel.prepare_context()
    mnist = MNIST("mnist")
    # sgd = SGDOptimizer(learning_rate=0.001)
    sgd = AdamOptimizer(learning_rate=0.001)
    # sgd = MomentumOptimizer(learning_rate=0.001, momentum=0.9)
    mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy)

    train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                batch_size=BATCH_SIZE,
                                drop_last=True)
    train_reader = fluid.contrib.reader.distributed_batch_reader(train_reader)

    for epoch in range(epoch_num):
        for batch_id, data in enumerate(train_reader()):
            dy_x_data = np.array([x[0].reshape(1, 28, 28)
                                  for x in data]).astype('float32')
            y_data = np.array([x[1]
                               for x in data]).astype('int64').reshape(-1, 1)
Esempio n. 25
0
def train():
    place = fluid.CPUPlace()
    with fluid.dygraph.guard(place):

        pix2pix_gan = build_pix2pix_gan('pix2pix_gan')

        discriminator_optimizer = AdamOptimizer(learning_rate=2e-4, beta1=0.5)
        generator_optimizer = AdamOptimizer(learning_rate=2e-4, beta1=0.5)

        real_dataset, input_dataset = prepare_dataset(data_dir, is_train=True)
        real_test, input_test = prepare_dataset(data_dir, is_train=False)

        epoch = 0

        if os.path.exists('./model'):
            print('load prev checkpoint...')
            model, _ = fluid.dygraph.load_persistables('./model')
            pix2pix_gan.load_dict(model)
            checkpoint = open("./checkpoint.txt", "r")
            epoch = int(checkpoint.read()) + 1
            checkpoint.close()

        while epoch < num_epochs:

            print("Epoch id: ", epoch)

            total_loss_gen = 0
            total_loss_disc = 0  

            seed = np.random.randint(1000)
            np.random.seed(seed)
            np.random.shuffle(real_dataset)
            np.random.seed(seed)
            np.random.shuffle(input_dataset)

            for tar, inpt in batch_generator(real_dataset, input_dataset, batch_size): 

                target = to_variable(tar)
                input_image = to_variable(inpt)

                gen_loss, disc_generated = pix2pix_gan(input_image, target, None, True)
                gen_loss.backward()
                vars_G = []
                for parm in pix2pix_gan.parameters():
                    if parm.name[:43] == 'pix2pix_gan/build_pix2pix_gan_0/generator_0':
                        vars_G.append(parm)
                generator_optimizer.minimize(gen_loss, parameter_list=vars_G)
                pix2pix_gan.clear_gradients()

                disc_loss = pix2pix_gan(input_image, target, disc_generated, False)
                disc_loss.backward()
                vars_D = []
                for parm in pix2pix_gan.parameters():
                    if parm.name[:47] == 'pix2pix_gan/build_pix2pix_gan_0/discriminator_0':
                        vars_D.append(parm)
                discriminator_optimizer.minimize(disc_loss, parameter_list=vars_D)
                pix2pix_gan.clear_gradients()

                total_loss_gen += gen_loss.numpy()[0]
                total_loss_disc += disc_loss.numpy()[0]

            print("Total generator loss: ", total_loss_gen)
            print("Total discriminator loss: ", total_loss_disc)

            if epoch % 10 == 0:
                # save checkpoint
                fluid.dygraph.save_persistables(pix2pix_gan.state_dict(), "./model")
                checkpoint = open("./checkpoint.txt", "w")
                checkpoint.write(str(epoch))
                checkpoint.close()

                input_image = to_variable(input_test)
                generate_and_save_images(pix2pix_gan, input_image, epoch)

            epoch += 1
Esempio n. 26
0
    def test_gnn_float32(self):
        seed = 90

        startup = fluid.Program()
        startup.random_seed = seed
        main = fluid.Program()
        main.random_seed = seed

        scope = fluid.core.Scope()
        with new_program_scope(main=main, startup=startup, scope=scope):
            features = fluid.layers.data(name='features',
                                         shape=[1, 100, 50],
                                         dtype='float32',
                                         append_batch_size=False)
            # Use selected rows when it's supported.
            adj = fluid.layers.data(name='adj',
                                    shape=[1, 100, 100],
                                    dtype='float32',
                                    append_batch_size=False)
            labels = fluid.layers.data(name='labels',
                                       shape=[100, 1],
                                       dtype='int64',
                                       append_batch_size=False)

            model = GCN('test_gcn', 50)
            logits = model(features, adj)
            logits = fluid.layers.reshape(logits, logits.shape[1:])
            # In other example, it's nll with log_softmax. However, paddle's
            # log_loss only supports binary classification now.
            loss = fluid.layers.softmax_with_cross_entropy(logits, labels)
            loss = fluid.layers.reduce_sum(loss)

            adam = AdamOptimizer(learning_rate=1e-3)
            adam.minimize(loss)
            exe = fluid.Executor(fluid.CPUPlace(
            ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0))
            exe.run(startup)
            static_loss = exe.run(feed={
                'features':
                np.ones([1, 100, 50], dtype=np.float32),
                'adj':
                np.ones([1, 100, 100], dtype=np.float32),
                'labels':
                np.ones([100, 1], dtype=np.int64)
            },
                                  fetch_list=[loss])[0]

            static_weight = np.array(
                scope.find_var(model.gc.weight.name).get_tensor())

        with fluid.dygraph.guard():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed

            features = np.ones([1, 100, 50], dtype=np.float32)
            # Use selected rows when it's supported.
            adj = np.ones([1, 100, 100], dtype=np.float32)
            labels = np.ones([100, 1], dtype=np.int64)

            model = GCN('test_gcn', 50)
            logits = model(to_variable(features), to_variable(adj))
            logits = fluid.layers.reshape(logits, logits.shape[1:])
            # In other example, it's nll with log_softmax. However, paddle's
            # log_loss only supports binary classification now.
            loss = fluid.layers.softmax_with_cross_entropy(
                logits, to_variable(labels))
            loss = fluid.layers.reduce_sum(loss)
            loss.backward()
            adam = AdamOptimizer(learning_rate=1e-3)

            adam.minimize(loss)
            model.clear_gradients()
            loss_value = loss.numpy()
            model_gc_weight_value = model.gc.weight.numpy()

        with fluid.dygraph.guard():
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed

            features2 = np.ones([1, 100, 50], dtype=np.float32)
            # Use selected rows when it's supported.
            adj2 = np.ones([1, 100, 100], dtype=np.float32)
            labels2 = np.ones([100, 1], dtype=np.int64)

            model2 = GCN('test_gcn', 50)
            logits2 = model2(to_variable(features2), to_variable(adj2))
            logits2 = fluid.layers.reshape(logits2, logits2.shape[1:])
            # In other example, it's nll with log_softmax. However, paddle's
            # log_loss only supports binary classification now.
            loss2 = fluid.layers.softmax_with_cross_entropy(
                logits2, to_variable(labels2))
            loss2 = fluid.layers.reduce_sum(loss2)
            loss2.backward()
            adam2 = AdamOptimizer(learning_rate=1e-3)
            adam2.minimize(loss2)
            model2.clear_gradients()
            loss2_value = loss2.numpy()
            model2_gc_weight_value = model2.gc.weight.numpy()

        self.assertEqual(static_loss, loss_value)
        self.assertTrue(np.allclose(static_weight, model_gc_weight_value))
        self.assertEqual(static_loss, loss2_value)
        self.assertTrue(np.allclose(static_weight, model2_gc_weight_value))
        sys.stderr.write('%s %s\n' % (static_loss, loss_value))
Esempio n. 27
0
    def test_qat_save(self):

        imperative_qat = ImperativeQuantAware(
            weight_quantize_type='abs_max',
            activation_quantize_type='moving_average_abs_max',
            quantizable_layer_type=[
                'Conv2D', 'Linear', 'ReLU', 'LeakyReLU', 'ReLU6', 'Tanh',
                'Swish'
            ])

        with fluid.dygraph.guard():
            lenet = ImperativeLenet()
            imperative_qat.quantize(lenet)
            adam = AdamOptimizer(
                learning_rate=0.001, parameter_list=lenet.parameters())
            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=32, drop_last=True)
            test_reader = paddle.batch(
                paddle.dataset.mnist.test(), batch_size=32)

            epoch_num = 1
            for epoch in range(epoch_num):
                lenet.train()
                for batch_id, data in enumerate(train_reader()):
                    x_data = np.array([x[0].reshape(1, 28, 28)
                                       for x in data]).astype('float32')
                    y_data = np.array(
                        [x[1] for x in data]).astype('int64').reshape(-1, 1)

                    img = fluid.dygraph.to_variable(x_data)
                    label = fluid.dygraph.to_variable(y_data)
                    out = lenet(img)
                    acc = fluid.layers.accuracy(out, label)
                    loss = fluid.layers.cross_entropy(out, label)
                    avg_loss = fluid.layers.mean(loss)
                    avg_loss.backward()
                    adam.minimize(avg_loss)
                    lenet.clear_gradients()
                    if batch_id % 100 == 0:
                        _logger.info(
                            "Train | At epoch {} step {}: loss = {:}, acc= {:}".
                            format(epoch, batch_id,
                                   avg_loss.numpy(), acc.numpy()))

                lenet.eval()
                for batch_id, data in enumerate(test_reader()):
                    x_data = np.array([x[0].reshape(1, 28, 28)
                                       for x in data]).astype('float32')
                    y_data = np.array(
                        [x[1] for x in data]).astype('int64').reshape(-1, 1)

                    img = fluid.dygraph.to_variable(x_data)
                    label = fluid.dygraph.to_variable(y_data)

                    out = lenet(img)
                    acc_top1 = fluid.layers.accuracy(
                        input=out, label=label, k=1)
                    acc_top5 = fluid.layers.accuracy(
                        input=out, label=label, k=5)

                    if batch_id % 100 == 0:
                        _logger.info(
                            "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}".
                            format(epoch, batch_id,
                                   acc_top1.numpy(), acc_top5.numpy()))

            # save weights
            model_dict = lenet.state_dict()
            fluid.save_dygraph(model_dict, "save_temp")

            # test the correctness of `paddle.jit.save`
            data = next(test_reader())
            test_data = np.array([x[0].reshape(1, 28, 28)
                                  for x in data]).astype('float32')
            test_img = fluid.dygraph.to_variable(test_data)
            lenet.eval()
            before_save = lenet(test_img)

        # save inference quantized model
        path = "./qat_infer_model/lenet"
        save_dir = "./qat_infer_model"
        paddle.jit.save(
            layer=lenet,
            path=path,
            input_spec=[
                paddle.static.InputSpec(
                    shape=[None, 1, 28, 28], dtype='float32')
            ])
        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = fluid.Executor(place)
        [inference_program, feed_target_names,
         fetch_targets] = fluid.io.load_inference_model(
             dirname=save_dir,
             executor=exe,
             model_filename="lenet" + INFER_MODEL_SUFFIX,
             params_filename="lenet" + INFER_PARAMS_SUFFIX)
        after_save, = exe.run(inference_program,
                              feed={feed_target_names[0]: test_data},
                              fetch_list=fetch_targets)

        self.assertTrue(
            np.allclose(after_save, before_save.numpy()),
            msg='Failed to save the inference quantized model.')
def finetune(args):
    module = hub.Module(name="ernie", max_seq_len=args.max_seq_len)
    # Use the appropriate tokenizer to preprocess the data set
    tokenizer = hub.BertTokenizer(vocab_file=module.get_vocab_path())
    dataset = hub.dataset.MSRA_NER(tokenizer=tokenizer,
                                   max_seq_len=args.max_seq_len)

    with fluid.dygraph.guard():
        ts = TransformerSeqLabeling(num_classes=dataset.num_labels,
                                    transformer=module)
        adam = AdamOptimizer(learning_rate=1e-5,
                             parameter_list=ts.parameters())
        state_dict_path = os.path.join(args.checkpoint_dir,
                                       'dygraph_state_dict')
        if os.path.exists(state_dict_path + '.pdparams'):
            state_dict, _ = fluid.load_dygraph(state_dict_path)
            ts.load_dict(state_dict)

        loss_sum = total_infer = total_label = total_correct = cnt = 0
        for epoch in range(args.num_epoch):
            for batch_id, data in enumerate(
                    dataset.batch_records_generator(
                        phase="train",
                        batch_size=args.batch_size,
                        shuffle=True,
                        pad_to_batch_max_seq_len=False)):
                batch_size = len(data["input_ids"])
                input_ids = np.array(data["input_ids"]).astype(
                    np.int64).reshape([batch_size, -1, 1])
                position_ids = np.array(data["position_ids"]).astype(
                    np.int64).reshape([batch_size, -1, 1])
                segment_ids = np.array(data["segment_ids"]).astype(
                    np.int64).reshape([batch_size, -1, 1])
                input_mask = np.array(data["input_mask"]).astype(
                    np.float32).reshape([batch_size, -1, 1])
                labels = np.array(data["label"]).astype(np.int64).reshape(
                    -1, 1)
                seq_len = np.array(data["seq_len"]).astype(np.int64).reshape(
                    -1, 1)
                pred, ret_infers = ts(input_ids, position_ids, segment_ids,
                                      input_mask)

                loss = fluid.layers.cross_entropy(pred, to_variable(labels))
                avg_loss = fluid.layers.mean(loss)
                avg_loss.backward()
                adam.minimize(avg_loss)

                loss_sum += avg_loss.numpy() * labels.shape[0]
                label_num, infer_num, correct_num = chunk_eval(
                    labels, ret_infers.numpy(), seq_len, dataset.num_labels, 1)
                cnt += labels.shape[0]

                total_infer += infer_num
                total_label += label_num
                total_correct += correct_num

                if batch_id % args.log_interval == 0:
                    precision, recall, f1 = calculate_f1(
                        total_label, total_infer, total_correct)
                    print('epoch {}: loss {}, f1 {} recall {} precision {}'.
                          format(epoch, loss_sum / cnt, f1, recall, precision))
                    loss_sum = total_infer = total_label = total_correct = cnt = 0

                if batch_id % args.save_interval == 0:
                    state_dict = ts.state_dict()
                    fluid.save_dygraph(state_dict, state_dict_path)
Esempio n. 29
0
def train_mnist(args):
    epoch_num = args.epoch
    BATCH_SIZE = 64

    place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
        if args.use_data_parallel else fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        if args.ce:
            print("ce mode")
            seed = 33
            np.random.seed(seed)
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed

        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()
        mnist = MNIST()
        adam = AdamOptimizer(learning_rate=0.001,
                             parameter_list=mnist.parameters())
        if args.use_data_parallel:
            mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy)

        train_reader = paddle.batch(reader_decorator(
            paddle.dataset.mnist.train()),
                                    batch_size=BATCH_SIZE,
                                    drop_last=True)
        if args.use_data_parallel:
            train_reader = fluid.contrib.reader.distributed_batch_reader(
                train_reader)

        test_reader = paddle.batch(reader_decorator(
            paddle.dataset.mnist.test()),
                                   batch_size=BATCH_SIZE,
                                   drop_last=True)

        train_loader = fluid.io.DataLoader.from_generator(
            capacity=10, use_multiprocess=True)
        train_loader.set_sample_list_generator(train_reader, places=place)

        test_loader = fluid.io.DataLoader.from_generator(capacity=10,
                                                         use_multiprocess=True)
        test_loader.set_sample_list_generator(test_reader, places=place)

        total_train_time = 0
        for epoch in range(epoch_num):
            stime = time.time()
            for batch_id, data in enumerate(train_loader()):
                img, label = data
                label.stop_gradient = True

                cost, acc = mnist(img, label)

                loss = fluid.layers.cross_entropy(cost, label)
                avg_loss = fluid.layers.mean(loss)

                if args.use_data_parallel:
                    avg_loss = mnist.scale_loss(avg_loss)
                    avg_loss.backward()
                    mnist.apply_collective_grads()
                else:
                    avg_loss.backward()

                adam.minimize(avg_loss)
                # save checkpoint
                mnist.clear_gradients()
                if batch_id % 100 == 0:
                    print("Loss at epoch {} step {}: {:}".format(
                        epoch, batch_id, avg_loss.numpy()))
            total_train_time += (time.time() - stime)

            mnist.eval()
            test_cost, test_acc = test_mnist(test_loader, mnist, BATCH_SIZE)
            mnist.train()
            if args.ce:
                print("kpis\ttest_acc\t%s" % test_acc)
                print("kpis\ttest_cost\t%s" % test_cost)
            print("Loss at epoch {} , Test avg_loss is: {}, acc is: {}".format(
                epoch, test_cost, test_acc))

        save_parameters = (not args.use_data_parallel) or (
            args.use_data_parallel
            and fluid.dygraph.parallel.Env().local_rank == 0)
        if save_parameters:
            fluid.save_dygraph(mnist.state_dict(), "save_temp")

            print("checkpoint saved")

            inference_mnist()
Esempio n. 30
0
def main():
    # Step 0: preparation
    writer = LogWriter(logdir="./log/scalar")
    place = paddle.fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        # Step 1: Define training dataloader
        image_folder = ""
        image_list_file = "dummy_data/fabric_list.txt"
        transform = Transform()  #Normalize2()  # [0,255]-->[0,1]
        x_data = DataLoader(image_folder, image_list_file, transform=transform)
        x_dataloader = fluid.io.DataLoader.from_generator(capacity=2,
                                                          return_list=True)
        x_dataloader.set_sample_generator(x_data, args.batch_size)

        total_batch = len(x_data) // args.batch_size

        # Step 2: Create model
        if args.net == "basic":
            D = Discriminator()
            G = Generator()
            E = Invertor()
        else:
            raise NotImplementedError(
                f"args.net: {args.net} is not Supported!")

        # Step 3: Define criterion and optimizer
        criterion = Basic_Loss
        D_optim = AdamOptimizer(learning_rate=args.lr,
                                parameter_list=D.parameters())
        G_optim = AdamOptimizer(learning_rate=args.lr,
                                parameter_list=G.parameters())
        E_optim = AdamOptimizer(learning_rate=args.lr,
                                parameter_list=E.parameters())

        G_loss_meter = AverageMeter()
        D_loss_meter = AverageMeter()
        E_loss_meter = AverageMeter()

        D.train()
        G.train()
        E.train()

        # Step 4: Slight Training
        iteration = -1
        is_slight_Train = True
        for epoch in range(1, args.epoch_num + 1):
            #optim Discriminator
            for (x, x_labels) in x_dataloader():
                n = x.shape[0]
                if is_slight_Train:
                    iteration += 1
                    x = fluid.layers.cast(x, dtype="float32")
                    x = fluid.layers.transpose(x, perm=[0, 3, 1, 2])
                    preds_x = D(x)
                    preds_x_array = preds_x.numpy()
                    #print("D(x),1",preds_array.shape, np.mean(preds_array))
                    writer.add_scalar(tag="D(x)=1",
                                      step=iteration,
                                      value=np.mean(preds_x_array))
                    if np.mean(preds_x_array) >= 0.98:
                        is_slight_Train = False

                    z = np.random.rand(n, 64)
                    zeros = np.zeros((n, 1))
                    z = to_variable(z)
                    zeros = to_variable(zeros)
                    z = fluid.layers.cast(z, dtype="float32")
                    zeros = fluid.layers.cast(zeros, dtype="int64")
                    preds_fx = D(G(z))
                    preds_fx_array = preds_fx.numpy()
                    writer.add_scalar(tag="D(G(z))=0",
                                      step=iteration,
                                      value=np.mean(preds_fx_array))
                    D_loss = criterion(preds_x, x_labels) + criterion(
                        preds_fx, zeros)
                    D_loss.backward()
                    D_optim.minimize(D_loss)
                    D.clear_gradients()
                    D_loss_meter.update(D_loss.numpy()[0], n)
                    writer.add_scalar(tag="D_loss",
                                      step=iteration,
                                      value=D_loss_meter.avg)
                    print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " +
                          f"STEP{iteration}, " +
                          f"Average D Loss: {D_loss_meter.avg:4f}, ")

                    z = np.random.rand(n, 64)
                    ones = np.ones((n, 1))
                    z = to_variable(z)
                    ones = to_variable(ones)
                    z = fluid.layers.cast(z, dtype="float32")
                    ones = fluid.layers.cast(ones, dtype="int64")
                    preds = D(G(z))
                    preds_array = preds.numpy()
                    writer.add_scalar(tag="D(G(z))=1",
                                      step=iteration,
                                      value=np.mean(preds_array))
                    G_loss = criterion(preds, ones)
                    G_loss.backward()
                    G_optim.minimize(G_loss)
                    G.clear_gradients()
                    G_loss_meter.update(G_loss.numpy()[0], n)
                    writer.add_scalar(tag="G_loss",
                                      step=iteration,
                                      value=G_loss_meter.avg)
                    print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " +
                          f"STEP{iteration}, " +
                          f"Average G Loss: {G_loss_meter.avg:4f}")

            if epoch % args.save_freq == 0 or epoch == args.epoch_num or not is_slight_Train:
                D_model_path = os.path.join(args.checkpoint_folder,
                                            f"D_{args.net}-Epoch-{epoch}")
                G_model_path = os.path.join(args.checkpoint_folder,
                                            f"G_{args.net}-Epoch-{epoch}")

                # save model and optmizer states
                model_dict = D.state_dict()
                fluid.save_dygraph(model_dict, D_model_path)
                optim_dict = D_optim.state_dict()
                fluid.save_dygraph(optim_dict, D_model_path)

                model_dict = G.state_dict()
                fluid.save_dygraph(model_dict, G_model_path)
                optim_dict = G_optim.state_dict()
                fluid.save_dygraph(optim_dict, G_model_path)

                print(
                    f'----- Save model: {D_model_path}.pdparams, {G_model_path}.pdparams'
                )
                if not is_slight_Train:
                    break

        # Step 5:  full training for Generator and Discriminator
        D_optim = AdamOptimizer(learning_rate=args.lr * 10,
                                parameter_list=D.parameters())
        G_optim = AdamOptimizer(learning_rate=args.lr * 10,
                                parameter_list=G.parameters())
        G_loss_meter = AverageMeter()
        D_loss_meter = AverageMeter()

        for epoch in range(1, args.epoch_num + 1):
            for (x, x_labels) in x_dataloader():
                n = x.shape[0]
                iteration += 1
                x = fluid.layers.cast(x, dtype="float32")
                x = fluid.layers.transpose(x, perm=[0, 3, 1, 2])
                preds1 = D(x)
                preds_array = preds1.numpy()
                writer.add_scalar(tag="D(x)=1",
                                  step=iteration,
                                  value=np.mean(preds_array))
                z = np.random.rand(n, 64)
                zeros = np.zeros((n, 1))
                z = to_variable(z)
                zeros = to_variable(zeros)
                z = fluid.layers.cast(z, dtype="float32")
                zeros = fluid.layers.cast(zeros, dtype="int64")
                preds2 = D(G(z))
                preds_array = preds2.numpy()
                #print("DG(z),0:",preds_array.shape, np.mean(preds_array))
                writer.add_scalar(tag="D(G(z))=0",
                                  step=iteration,
                                  value=np.mean(preds_array))
                D_loss = criterion(preds1, x_labels) + criterion(preds2, zeros)
                D_loss.backward()
                D_optim.minimize(D_loss)
                D.clear_gradients()
                D_loss_meter.update(D_loss.numpy()[0], n)
                writer.add_scalar(tag="D_loss",
                                  step=iteration,
                                  value=D_loss_meter.avg)
                print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " +
                      f"STEP{iteration}, " +
                      f"Average D Loss: {D_loss_meter.avg:4f} ")
                z = np.random.rand(n, 64)
                ones = np.ones((n, 1))
                z = to_variable(z)
                ones = to_variable(ones)
                z = fluid.layers.cast(z, dtype="float32")
                ones = fluid.layers.cast(ones, dtype="int64")
                preds = D(G(z))
                preds_array = preds.numpy()
                #print("DG(z),1:",preds_array.shape, np.mean(preds_array))
                writer.add_scalar(tag="D(G(z))=1",
                                  step=iteration,
                                  value=np.mean(preds_array))
                G_loss = criterion(preds, ones)
                G_loss.backward()
                G_optim.minimize(G_loss)
                G.clear_gradients()
                G_loss_meter.update(G_loss.numpy()[0], n)
                writer.add_scalar(tag="G_loss",
                                  step=iteration,
                                  value=G_loss_meter.avg)
                print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " +
                      f"STEP{iteration}, " +
                      f"Average G Loss: {G_loss_meter.avg:4f}")

            if epoch % args.save_freq == 0 or epoch == args.epoch_num:
                D_model_path = os.path.join(args.checkpoint_folder,
                                            f"D_{args.net}-Epoch-{epoch}")
                G_model_path = os.path.join(args.checkpoint_folder,
                                            f"G_{args.net}-Epoch-{epoch}")

                # save model and optmizer states
                model_dict = D.state_dict()
                fluid.save_dygraph(model_dict, D_model_path)
                optim_dict = D_optim.state_dict()
                fluid.save_dygraph(optim_dict, D_model_path)

                model_dict = G.state_dict()
                fluid.save_dygraph(model_dict, G_model_path)
                optim_dict = G_optim.state_dict()
                fluid.save_dygraph(optim_dict, G_model_path)
                print(
                    f'----- Save model: {D_model_path}.pdparams, {G_model_path}.pdparams'
                )

        # Step 6: full training for Inverter
        E_optim = AdamOptimizer(learning_rate=args.lr * 10,
                                parameter_list=E.parameters())
        E_loss_meter = AverageMeter()

        for epoch in range(1, args.epoch_num + 1):
            for (x, x_labels) in x_dataloader():
                n = x.shape[0]
                iteration += 1
                x = fluid.layers.cast(x, dtype="float32")
                image = x.numpy()[0] * 255
                writer.add_image(tag="x", step=iteration, img=image)
                x = fluid.layers.transpose(x, perm=[0, 3, 1, 2])
                invert_x = G(E(x))
                invert_image = fluid.layers.transpose(invert_x,
                                                      perm=[0, 2, 3, 1])
                invert_image = invert_image.numpy()[0] * 255
                #print("D(x),1",preds_array.shape, np.mean(preds_array))
                writer.add_image(tag="invert_x",
                                 step=iteration,
                                 img=invert_image)
                print(np.max(invert_image), np.min(invert_image))
                E_loss = fluid.layers.mse_loss(invert_x, x)
                print("E_loss shape:", E_loss.numpy().shape)
                E_loss.backward()
                E_optim.minimize(E_loss)
                E.clear_gradients()
                E_loss_meter.update(E_loss.numpy()[0], n)
                writer.add_scalar(tag="E_loss",
                                  step=iteration,
                                  value=E_loss_meter.avg)
                print(f"EPOCH[{epoch:03d}/{args.epoch_num:03d}], " +
                      f"STEP{iteration}, " +
                      f"Average E Loss: {E_loss_meter.avg:4f}, ")

            if epoch % args.save_freq == 0 or epoch == args.epoch_num:
                E_model_path = os.path.join(args.checkpoint_folder,
                                            f"E_{args.net}-Epoch-{epoch}")
                # save model and optmizer states
                model_dict = E.state_dict()
                fluid.save_dygraph(model_dict, E_model_path)
                optim_dict = E_optim.state_dict()
                fluid.save_dygraph(optim_dict, E_model_path)
                print(
                    f'----- Save model: {E_model_path}.pdparams, {E_model_path}.pdparams'
                )