Esempio n. 1
0
def train_mnist():
    epoch_num = 10
    if args.benchmark:
        epoch_num = 1
    BATCH_SIZE = 32
    with fluid.dygraph.guard():
        mnist = MNIST("mnist")
        #adam = AdamOptimizer(learning_rate=0.001)
        adam = MomentumOptimizer(learning_rate=0.01, momentum=0.5)
        train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                    batch_size=BATCH_SIZE,
                                    drop_last=True)
        test_reader = paddle.batch(paddle.dataset.mnist.test(),
                                   batch_size=BATCH_SIZE,
                                   drop_last=True)
        eval_reader = paddle.batch(paddle.dataset.mnist.test(),
                                   batch_size=10,
                                   drop_last=True)
        for epoch in range(epoch_num):
            batch_time = AverageMeter('Time', ':6.3f')
            data_time = AverageMeter('Data', ':6.3f')
            losses = AverageMeter('Loss', ':.4e')
            progress = ProgressMeter(len(list(train_reader())) - 1,
                                     batch_time,
                                     data_time,
                                     losses,
                                     prefix="epoch: [{}]".format(epoch))
            end = Tools.time()
            for batch_id, data in enumerate(train_reader()):
                data_time.update(Tools.time() - end)
                dy_x_data = np.array([x[0].reshape(1, 28, 28)
                                      for x in data]).astype('float32')
                dy_x_data = normalize(dy_x_data, 0.1307, 0.3081)
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape(BATCH_SIZE, 1)
                img = to_variable(dy_x_data)
                label = to_variable(y_data)
                label.stop_gradient = True
                cost, acc = mnist(img, label)
                loss = fluid.layers.cross_entropy(cost, label)
                avg_loss = fluid.layers.mean(loss)
                avg_loss.backward()
                adam.minimize(avg_loss)
                mnist.clear_gradients()
                batch_time.update(Tools.time() - end)
                dy_out = avg_loss.numpy()[0]
                losses.update(dy_out, BATCH_SIZE)
                if batch_id % 10 == 0:
                    progress.print(batch_id)
                end = Tools.time()
                #if batch_id % 100 == 0:
                #    print("Loss at epoch {} step {}: {:}".format(epoch, batch_id, avg_loss.numpy()))
            mnist.eval()
            test_cost, test_acc = test_train(test_reader, mnist, BATCH_SIZE)
            test_p(eval_reader, mnist, 10)
            mnist.train()
            print("Loss at epoch {} , Test avg_loss is: {}, acc is: {}".format(
                epoch, test_cost, test_acc))
Esempio n. 2
0
def train():
    with fluid.dygraph.guard(place):
        if args.benchmark:
            args.epoch = 1
        processor = reader.SentaProcessor(data_dir=args.data_dir,
                                          vocab_path=args.vocab_path,
                                          random_seed=args.random_seed)
        num_labels = len(processor.get_labels())

        num_train_examples = processor.get_num_examples(phase="train")

        max_train_steps = args.epoch * num_train_examples // args.batch_size // dev_count

        train_data_generator = processor.data_generator(
            batch_size=args.batch_size,
            phase='train',
            epoch=args.epoch,
            shuffle=True)

        eval_data_generator = processor.data_generator(
            batch_size=args.batch_size,
            phase='dev',
            epoch=args.epoch,
            shuffle=False)

        cnn_net = nets.CNN("cnn_net", args.vocab_size, args.batch_size,
                           args.padding_size)

        sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.lr)
        steps = 0
        total_cost, total_acc, total_num_seqs = [], [], []
        length = len(list(enumerate(train_data_generator())))
        for eop in range(args.epoch):
            time_begin = time.time()
            batch_time = AverageMeter('Time', ':6.3f')
            data_time = AverageMeter('Data', ':6.3f')
            progress = ProgressMeter(length,
                                     batch_time,
                                     data_time,
                                     prefix="epoch: [{}]".format(eop))
            end = Tools.time()
            for batch_id, data in enumerate(train_data_generator()):
                data_time.update(Tools.time() - end)
                steps += 1
                doc = to_variable(
                    np.array([
                        np.pad(x[0][0:args.padding_size],
                               (0, args.padding_size -
                                len(x[0][0:args.padding_size])),
                               'constant',
                               constant_values=(args.vocab_size)) for x in data
                    ]).astype('int64').reshape(-1, 1))

                label = to_variable(
                    np.array([x[1] for x in data
                              ]).astype('int64').reshape(args.batch_size, 1))

                cnn_net.train()
                avg_cost, prediction, acc = cnn_net(doc, label)
                avg_cost.backward()
                batch_time.update(Tools.time() - end)
                np_mask = (doc.numpy() != args.vocab_size).astype('int32')
                word_num = np.sum(np_mask)
                sgd_optimizer.minimize(avg_cost)
                cnn_net.clear_gradients()
                total_cost.append(avg_cost.numpy() * word_num)
                total_acc.append(acc.numpy() * word_num)
                total_num_seqs.append(word_num)

                if steps % args.skip_steps == 0:
                    time_end = time.time()
                    used_time = time_end - time_begin
                    progress.print(batch_id + 1)
                    #print("step: %d, ave loss: %f, "
                    #      "ave acc: %f, speed: %f steps/s" %
                    #      (steps, np.sum(total_cost) / np.sum(total_num_seqs),
                    #       np.sum(total_acc) / np.sum(total_num_seqs),
                    #       args.skip_steps / used_time))
                    total_cost, total_acc, total_num_seqs = [], [], []
                    time_begin = time.time()

                if steps % args.validation_steps == 0:
                    total_eval_cost, total_eval_acc, total_eval_num_seqs = [], [], []
                    cnn_net.eval()
                    eval_steps = 0
                    for eval_batch_id, eval_data in enumerate(
                            eval_data_generator()):
                        eval_np_doc = np.array([
                            np.pad(x[0][0:args.padding_size],
                                   (0, args.padding_size -
                                    len(x[0][0:args.padding_size])),
                                   'constant',
                                   constant_values=(args.vocab_size))
                            for x in eval_data
                        ]).astype('int64').reshape(1, -1)
                        eval_label = to_variable(
                            np.array([x[1] for x in eval_data
                                      ]).astype('int64').reshape(
                                          args.batch_size, 1))
                        eval_doc = to_variable(eval_np_doc.reshape(-1, 1))
                        eval_avg_cost, eval_prediction, eval_acc = cnn_net(
                            eval_doc, eval_label)

                        eval_np_mask = (eval_np_doc !=
                                        args.vocab_size).astype('int32')
                        eval_word_num = np.sum(eval_np_mask)
                        total_eval_cost.append(eval_avg_cost.numpy() *
                                               eval_word_num)
                        total_eval_acc.append(eval_acc.numpy() * eval_word_num)
                        total_eval_num_seqs.append(eval_word_num)

                        eval_steps += 1

                    time_end = time.time()
                    used_time = time_end - time_begin
                    print(
                        "Final validation result: step: %d, ave loss: %f, "
                        "ave acc: %f, speed: %f steps/s" %
                        (steps, np.sum(total_eval_cost) /
                         np.sum(total_eval_num_seqs), np.sum(total_eval_acc) /
                         np.sum(total_eval_num_seqs), eval_steps / used_time))
                    time_begin = time.time()

                # if steps % args.save_steps == 0:
                #     save_path = "save_dir_" + str(steps)
                #     print('save model to: ' + save_path)
                #     fluid.dygraph.save_persistables(cnn_net.state_dict(),
                #                                     save_path)
                end = Tools.time()
Esempio n. 3
0
def train():
    """
    train models
    :return:
    """

    trainer_count = fluid.dygraph.parallel.Env().nranks
    place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
        if args.use_data_parallel else fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()

        transformer = TransFormer(
            'transformer', ModelHyperParams.src_vocab_size,
            ModelHyperParams.trg_vocab_size, ModelHyperParams.max_length + 1,
            ModelHyperParams.n_layer, ModelHyperParams.n_head,
            ModelHyperParams.d_key, ModelHyperParams.d_value,
            ModelHyperParams.d_model, ModelHyperParams.d_inner_hid,
            ModelHyperParams.prepostprocess_dropout,
            ModelHyperParams.attention_dropout, ModelHyperParams.relu_dropout,
            ModelHyperParams.preprocess_cmd, ModelHyperParams.postprocess_cmd,
            ModelHyperParams.weight_sharing, TrainTaskConfig.label_smooth_eps)

        optimizer = fluid.optimizer.SGD(learning_rate=0.003)

        if args.use_data_parallel:
            transformer = fluid.dygraph.parallel.DataParallel(
                transformer, strategy)

        reader = paddle.batch(wmt16.train(ModelHyperParams.src_vocab_size,
                                          ModelHyperParams.trg_vocab_size),
                              batch_size=TrainTaskConfig.batch_size)
        if args.use_data_parallel:
            reader = fluid.contrib.reader.distributed_batch_reader(reader)

        for i in range(200):
            dy_step = 0
            batch_time = AverageMeter('Time', ':6.3f')
            data_time = AverageMeter('Data', ':6.3f')
            progress = ProgressMeter(len(list(reader())) - 1,
                                     batch_time,
                                     data_time,
                                     prefix="epoch: [{}]".format(i))
            end = Tools.time()
            for batch in reader():
                data_time.update(Tools.time() - end)
                np_values = prepare_batch_input(batch,
                                                ModelHyperParams.src_pad_idx,
                                                ModelHyperParams.trg_pad_idx,
                                                ModelHyperParams.n_head)

                enc_inputs, dec_inputs, label, weights = create_data(np_values)
                dy_sum_cost, dy_avg_cost, dy_predict, dy_token_num = transformer(
                    enc_inputs, dec_inputs, label, weights)

                if args.use_data_parallel:
                    dy_avg_cost = transformer.scale_loss(dy_avg_cost)
                    dy_avg_cost.backward()
                    transformer.apply_collective_grads()
                else:
                    dy_avg_cost.backward()

                optimizer.minimize(dy_avg_cost)
                transformer.clear_gradients()
                batch_time.update(Tools.time() - end)
                dy_step = dy_step + 1
                if dy_step % 1 == 0:
                    progress.print(dy_step)
                    print("pass num : {}, batch_id: {}, dy_graph avg loss: {}".
                          format(i, dy_step, dy_avg_cost.numpy()))
                end = Tools.time()
            print("pass : {} finished".format(i))
Esempio n. 4
0
def train_mnist(args):
    epoch_num = 5
    BATCH_SIZE = 256

    place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
        if args.use_data_parallel else fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()
        mnist = MNIST("mnist")
        adam = AdamOptimizer(learning_rate=0.001)
        if args.use_data_parallel:
            mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy)

        if args.use_data_parallel:
            train_reader = fluid.contrib.reader.distributed_sampler(
                paddle.dataset.mnist.train(), batch_size=BATCH_SIZE)
        else:
            train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                        batch_size=BATCH_SIZE,
                                        drop_last=True)

        test_reader = paddle.batch(paddle.dataset.mnist.test(),
                                   batch_size=BATCH_SIZE,
                                   drop_last=True)

        for epoch in range(epoch_num):
            # define eval
            batch_time = AverageMeter('Time', ':6.3f')
            data_time = AverageMeter('Data', ':6.3f')
            losses = AverageMeter('Loss', ':.4e')
            progress = ProgressMeter(len(list(train_reader())) - 1,
                                     batch_time,
                                     data_time,
                                     losses,
                                     prefix="epoch: [{}]".format(epoch))
            end = Tools.time()

            for batch_id, data in enumerate(train_reader()):
                data_time.update(Tools.time() - end)
                dy_x_data = np.array([x[0].reshape(1, 28, 28)
                                      for x in data]).astype('float32')
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape(-1, 1)

                img = to_variable(dy_x_data)
                label = to_variable(y_data)
                label.stop_gradient = True

                cost, acc = mnist(img, label)

                loss = fluid.layers.cross_entropy(cost, label)
                avg_loss = fluid.layers.mean(loss)

                if args.use_data_parallel:
                    avg_loss = mnist.scale_loss(avg_loss)
                    avg_loss.backward()
                    mnist.apply_collective_grads()
                else:
                    avg_loss.backward()

                adam.minimize(avg_loss)
                # save checkpoint
                mnist.clear_gradients()
                batch_time.update(Tools.time() - end)
                dy_out = avg_loss.numpy()[0]
                losses.update(dy_out, BATCH_SIZE)
                if batch_id % 10 == 0:
                    progress.print(batch_id)
                end = Tools.time()

            mnist.eval()
            test_cost, test_acc = test_mnist(test_reader, mnist, BATCH_SIZE)
            mnist.train()
            print("Loss at epoch {} , Test avg_loss is: {}, acc is: {}".format(
                epoch, test_cost, test_acc))

        fluid.dygraph.save_persistables(mnist.state_dict(), "save_dir")
        print("checkpoint saved")

        inference_mnist()
Esempio n. 5
0
def train_resnet():
    trainer_count = fluid.dygraph.parallel.Env().nranks
    place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
        if args.use_data_parallel else fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()

        resnet = ResNet("resnet")
        optimizer = optimizer_setting()

        if args.use_data_parallel:
            resnet = fluid.dygraph.parallel.DataParallel(resnet, strategy)

        if args.use_data_parallel:
            train_reader = fluid.contrib.reader.distributed_sampler(
                paddle.dataset.flowers.train(use_xmap=False),
                batch_size=batch_size * trainer_count)
        else:
            train_reader = paddle.batch(
                paddle.dataset.flowers.train(use_xmap=False),
                batch_size=batch_size)

        test_reader = paddle.batch(
            paddle.dataset.flowers.test(use_xmap=False), batch_size=batch_size)

        #file_name = './model/epoch_0.npz'
        #model_data = np.load( file_name )
        total_pass = len(list(train_reader())) - 1
        #total_batch_size = sum(1 for _ in train_reader())
        #total_batch_size = 10000
        for eop in range(epoch):

            resnet.train()
            total_loss = 0.0
            total_acc1 = 0.0
            total_acc5 = 0.0
            total_sample = 0

            #dict_state = resnet.state_dict()

            #resnet.load_dict( model_data )
            #print("load finished")
            batch_time = AverageMeter('Time', ':6.3f')
            data_time = AverageMeter('Data', ':6.3f')
            progress = ProgressMeter(total_pass, batch_time, data_time,
            prefix="epoch: [{}]".format(eop))
            end = Tools.time()

            for batch_id, data in enumerate(train_reader()):
                data_time.update(Tools.time() - end)
                dy_x_data = np.array(
                    [x[0].reshape(3, 224, 224) for x in data]).astype('float32')
                if len(np.array([x[1]
                                 for x in data]).astype('int64')) != batch_size:
                    continue
                y_data = np.array([x[1] for x in data]).astype('int64').reshape(
                    -1, 1)

                img = to_variable(dy_x_data)
                label = to_variable(y_data)
                label._stop_gradient = True

                out = resnet(img)
                loss = fluid.layers.cross_entropy(input=out, label=label)
                avg_loss = fluid.layers.mean(x=loss)

                acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
                acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)

                dy_out = avg_loss.numpy()

                if args.use_data_parallel:
                    avg_loss = resnet.scale_loss(avg_loss)
                    avg_loss.backward()
                    resnet.apply_collective_grads()
                else:
                    avg_loss.backward()

                optimizer.minimize(avg_loss)
                resnet.clear_gradients()
                batch_time.update(Tools.time() - end)

                total_loss += dy_out
                total_acc1 += acc_top1.numpy()
                total_acc5 += acc_top5.numpy()
                total_sample += 1

                #print("epoch id: %d, batch step: %d, loss: %f" % (eop, batch_id, dy_out))
                if batch_id % 1 == 0:
                    progress.print(batch_id)
                    print( "epoch %d | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f" % \
                           ( eop, batch_id, total_loss / total_sample, \
                             total_acc1 / total_sample, total_acc5 / total_sample))
                end = Tools.time()

            print("epoch %d | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f" % \
                  (eop, batch_id, total_loss / total_sample, \
                   total_acc1 / total_sample, total_acc5 / total_sample))
Esempio n. 6
0
def train_ptb_lm():

    args = parse_args()
    model_type = args.model_type

    vocab_size = 10000
    if model_type == "test":
        num_layers = 1
        batch_size = 2
        hidden_size = 10
        num_steps = 3
        init_scale = 0.1
        max_grad_norm = 5.0
        epoch_start_decay = 1
        max_epoch = 1
        dropout = 0.0
        lr_decay = 0.5
        base_learning_rate = 1.0
    elif model_type == "small":
        num_layers = 2
        batch_size = 20
        hidden_size = 200
        num_steps = 20
        init_scale = 0.1
        max_grad_norm = 5.0
        epoch_start_decay = 4
        max_epoch = 13
        dropout = 0.0
        lr_decay = 0.5
        base_learning_rate = 1.0
    elif model_type == "medium":
        num_layers = 2
        batch_size = 20
        hidden_size = 650
        num_steps = 35
        init_scale = 0.05
        max_grad_norm = 5.0
        epoch_start_decay = 6
        max_epoch = 39
        dropout = 0.5
        lr_decay = 0.8
        base_learning_rate = 1.0
    elif model_type == "large":
        num_layers = 2
        batch_size = 20
        hidden_size = 1500
        num_steps = 35
        init_scale = 0.04
        max_grad_norm = 10.0
        epoch_start_decay = 14
        max_epoch = 55
        dropout = 0.65
        lr_decay = 1.0 / 1.15
        base_learning_rate = 1.0
    else:
        print("model type not support")
        return

    with fluid.dygraph.guard(core.CUDAPlace(0)):
        fluid.default_main_program().random_seed = 33
        fluid.default_startup_program().random_seed = 33
        np.random.seed(33)
        ptb_model = PtbModel(
            "ptb_model",
            hidden_size=hidden_size,
            vocab_size=vocab_size,
            num_layers=num_layers,
            num_steps=num_steps,
            init_scale=init_scale,
            dropout=dropout)

        dy_param_updated = dict()
        dy_param_init = dict()
        dy_loss = None
        last_hidden = None
        last_cell = None

        data_path = args.data_path
        print("begin to load data")
        raw_data = reader.ptb_raw_data(data_path)
        print("finished load data")
        train_data, valid_data, test_data, _ = raw_data

        batch_len = len(train_data) // batch_size
        total_batch_size = (batch_len - 1) // num_steps
        log_interval = total_batch_size // 100

        bd = []
        lr_arr = [1.0]
        for i in range(1, max_epoch):
            bd.append(total_batch_size * i)
            new_lr = base_learning_rate * (lr_decay**
                                           max(i + 1 - epoch_start_decay, 0.0))
            lr_arr.append(new_lr)

        sgd = SGDOptimizer(learning_rate=fluid.layers.piecewise_decay(
            boundaries=bd, values=lr_arr))

        def eval(model, data):
            print("begion to eval")
            total_loss = 0.0
            iters = 0.0
            init_hidden_data = np.zeros(
                (num_layers, batch_size, hidden_size), dtype='float32')
            init_cell_data = np.zeros(
                (num_layers, batch_size, hidden_size), dtype='float32')

            model.eval()
            train_data_iter = reader.get_data_iter(data, batch_size, num_steps)
            for batch_id, batch in enumerate(train_data_iter):
                x_data, y_data = batch
                x_data = x_data.reshape((-1, num_steps, 1))
                y_data = y_data.reshape((-1, 1))
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden,
                                                            init_cell)

                out_loss = dy_loss.numpy()

                init_hidden_data = last_hidden.numpy()
                init_cell_data = last_cell.numpy()

                total_loss += out_loss
                iters += num_steps

            print("eval finished")
            ppl = np.exp(total_loss / iters)
            print("ppl ", batch_id, ppl[0])

        grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(max_grad_norm)
        for epoch_id in range(max_epoch):
            ptb_model.train()
            total_loss = 0.0
            iters = 0.0
            init_hidden_data = np.zeros(
                (num_layers, batch_size, hidden_size), dtype='float32')
            init_cell_data = np.zeros(
                (num_layers, batch_size, hidden_size), dtype='float32')

            train_data_iter = reader.get_data_iter(train_data, batch_size,
                                                   num_steps)

            batch_time = AverageMeter('Time', ':6.3f')
            data_time = AverageMeter('Data', ':6.3f')
            losses = AverageMeter('Loss', ':.4e')
            progress = ProgressMeter(total_batch_size, batch_time, data_time,
                                     prefix="epoch: [{}]".format(epoch_id))
            start_time = time.time()
            end = Tools.time()
            for batch_id, batch in enumerate(train_data_iter):
                data_time.update(Tools.time() - end)
                x_data, y_data = batch
                x_data = x_data.reshape((-1, num_steps, 1))
                y_data = y_data.reshape((-1, 1))
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden,
                                                            init_cell)

                out_loss = dy_loss.numpy()
                init_hidden_data = last_hidden.numpy()
                init_cell_data = last_cell.numpy()
                dy_loss.backward()
                sgd.minimize(dy_loss, grad_clip=grad_clip)
                
                ptb_model.clear_gradients()
                batch_time.update(Tools.time() - end)
                #losses.update(out_loss, batch_size)
                total_loss += out_loss
                iters += num_steps

                if batch_id > 0 and batch_id % log_interval == 0:
                    progress.print(batch_id)
                    ppl = np.exp(total_loss / iters)
                    print(epoch_id, "ppl ", batch_id, ppl[0],
                          sgd._global_learning_rate().numpy())
                end = Tools.time()

            print("one ecpoh finished", epoch_id)
            print("time cost ", time.time() - start_time)
            ppl = np.exp(total_loss / iters)
            print("ppl ", epoch_id, ppl[0])

            eval(ptb_model, valid_data)
        eval(ptb_model, test_data)
Esempio n. 7
0
def train_mnist(args):
    epoch_num = args.epoch
    BATCH_SIZE = 32

    trainer_count = fluid.dygraph.parallel.Env().nranks
    place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
        if args.use_data_parallel else fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        if args.ce:
            print("ce mode")
            seed = 33
            np.random.seed(seed)
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed

        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()

        mnist = MNIST("mnist")
        adam = AdamOptimizer(learning_rate=0.001)

        if args.use_data_parallel:
            mnist = fluid.dygraph.parallel.DataParallel(mnist, strategy)

        train_reader = paddle.batch(
            paddle.dataset.mnist.train(), batch_size=BATCH_SIZE, drop_last=True)
        if args.use_data_parallel:
            train_reader = fluid.contrib.reader.distributed_batch_reader(
                train_reader)

        test_reader = paddle.batch(
            paddle.dataset.mnist.test(), batch_size=BATCH_SIZE, drop_last=True)



        for epoch in range(epoch_num):

            total_loss = 0.0
            total_acc = 0.0
            total_sample = 0

            batch_time = AverageMeter('Time', ':6.3f')
            data_time = AverageMeter('Data', ':6.3f')
            losses = AverageMeter('Loss', ':.4e')
            progress = ProgressMeter(len(list(train_reader())) - 1, batch_time, data_time,
                                     losses, prefix="epoch: [{}]".format(epoch))
            end = Tools.time()
            for batch_id, data in enumerate(train_reader()):
                data_time.update(Tools.time() - end)
                dy_x_data = np.array([x[0].reshape(1, 28, 28)
                                      for x in data]).astype('float32')
                y_data = np.array(
                    [x[1] for x in data]).astype('int64').reshape(-1, 1)

                img = to_variable(dy_x_data)
                label = to_variable(y_data)
                label.stop_gradient = True

                cost, acc = mnist(img, label)

                loss = fluid.layers.cross_entropy(cost, label)
                avg_loss = fluid.layers.mean(loss)

                if args.use_data_parallel:
                    avg_loss = mnist.scale_loss(avg_loss)
                    avg_loss.backward()
                    mnist.apply_collective_grads()
                else:
                    avg_loss.backward()

                adam.minimize(avg_loss)
                # save checkpoint
                mnist.clear_gradients()
                batch_time.update(Tools.time() - end)

                total_loss += avg_loss.numpy()
                total_acc += acc.numpy()
                total_sample += 1

                dy_out = avg_loss.numpy()[0]
                losses.update(dy_out, BATCH_SIZE)
                if batch_id % 10 == 0:
                    progress.print(batch_id)
                    print("epoch %d | batch step %d, loss %0.3f acc %0.3f" % \
                          (epoch, batch_id, total_loss / total_sample, total_acc / total_sample))


                if batch_id % 100 == 0:
                    print("Loss at epoch {} step {}: {:}".format(
                        epoch, batch_id, avg_loss.numpy()))
                end = Tools.time()
            mnist.eval()
            test_cost, test_acc = test_mnist(test_reader, mnist, BATCH_SIZE)
            mnist.train()
            if args.ce:
                print("kpis\ttest_acc\t%s" % test_acc)
                print("kpis\ttest_cost\t%s" % test_cost)
            print("Loss at epoch {} , Test avg_loss is: {}, acc is: {}".format(
                epoch, test_cost, test_acc))
Esempio n. 8
0
def train(args):

    with fluid.dygraph.guard():
        backward_strategy = fluid.dygraph.BackwardStrategy()
        backward_strategy.sort_sum_gradient = True
        ocr_attention = OCRAttention("ocr_attention")

        if Config.learning_rate_decay == "piecewise_decay":
            learning_rate = fluid.layers.piecewise_decay(
                [50000], [Config.LR, Config.LR * 0.01])
        else:
            learning_rate = Config.LR
        optimizer = fluid.optimizer.Adam(learning_rate=0.001)
        dy_param_init_value = {}

        grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(5.0 )

        train_reader = data_reader.data_reader(
            Config.batch_size,
            cycle=args.total_step > 0,
            shuffle=True,
            data_type='train')

        infer_image= './data/data/test_images/'
        infer_files = './data/data/test.list'
        test_reader = data_reader.data_reader(
                Config.batch_size,
                cycle=False,
                data_type="test")
        def eval():
            ocr_attention.eval()
            total_loss = 0.0
            total_step = 0.0
            equal_size = 0
            for data in test_reader():
                data_dict = get_attention_feeder_data(data)

                label_in = to_variable(data_dict["label_in"])
                label_out = to_variable(data_dict["label_out"])

                label_out._stop_gradient = True
                label_out.trainable = False

                img = to_variable(data_dict["pixel"])

                prediction = ocr_attention(img, label_in)
                prediction = fluid.layers.reshape( prediction, [label_out.shape[0] * label_out.shape[1], -1], inplace=False)

                score, topk = layers.topk( prediction, 1)

                seq = topk.numpy()

                seq = seq.reshape( ( args.batch_size, -1))

                mask = data_dict['mask'].reshape( (args.batch_size, -1))
                seq_len = np.sum( mask, -1)

                trans_ref = data_dict["label_out"].reshape( (args.batch_size, -1))
                for i in range( args.batch_size ):
                    length = int(seq_len[i] -1 )
                    trans = seq[i][:length - 1]
                    ref = trans_ref[i][ : length - 1]
                    if np.array_equal( trans, ref ):
                        equal_size += 1

                total_step += args.batch_size
            print( "eval cost", equal_size / total_step )

        total_step = 0
        epoch_num = 20
        if args.benchmark:
            epoch_num = 1
        j = 0
        for i in train_reader():
            j += 1
            if j % 100 == 0:
                print(j)
        print(j)
        #total_pass = len(list(train_reader()))
        #print(total_pass)
        for epoch in range(epoch_num):
            batch_id = 0

            total_loss = 0.0
            batch_time = AverageMeter('Time', ':6.3f')
            data_time = AverageMeter('Data', ':6.3f')
            progress = ProgressMeter(399425, batch_time, data_time,
            prefix="epoch: [{}]".format(epoch))
            end = Tools.time() 
            for data in train_reader():
                data_time.update(Tools.time() - end)
                total_step += 1
                data_dict = get_attention_feeder_data(data)

                label_in = to_variable(data_dict["label_in"])
                label_out = to_variable(data_dict["label_out"])

                label_out._stop_gradient = True
                label_out.trainable = False

                img = to_variable(data_dict["pixel"])

                prediction = ocr_attention(img, label_in)
                prediction = fluid.layers.reshape( prediction, [label_out.shape[0] * label_out.shape[1], -1], inplace=False)
                label_out = fluid.layers.reshape(label_out, [-1, 1], inplace=False)
                loss = fluid.layers.cross_entropy(
                    input=prediction, label=label_out)

                mask = to_variable(data_dict["mask"])

                loss = layers.elementwise_mul( loss, mask, axis=0)
                avg_loss = fluid.layers.reduce_sum(loss)

                total_loss += avg_loss.numpy()
                avg_loss.backward()
                optimizer.minimize(avg_loss, grad_clip=grad_clip)
                ocr_attention.clear_gradients()
                batch_time.update(Tools.time() - end)
                framework._dygraph_tracer()._clear_ops()

                if batch_id > 0 and batch_id % 50 == 0:
                    progress.print(batch_id)
                    print("epoch: {}, batch_id: {}, loss {}".format(epoch, batch_id, total_loss / args.batch_size / 50))

                    total_loss = 0.0

                if total_step > 0 and total_step % 2000 == 0:
                    ocr_attention.eval()
                    eval()
                    ocr_attention.train()

                batch_id +=1
                end = Tools.time()
Esempio n. 9
0
def train():
    
    epoch_num = train_parameters["num_epochs"]
    if args.ce:
        epoch_num = args.epoch
    batch_size = train_parameters["batch_size"]

    trainer_count = fluid.dygraph.parallel.Env().nranks
    place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
        if args.use_data_parallel else fluid.CUDAPlace(0)
    with fluid.dygraph.guard(place):
        if args.ce:
            print("ce mode")
            seed = 90
            np.random.seed(seed)
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context() 
        
        se_resnext = SeResNeXt("se_resnext")
        optimizer = optimizer_setting(train_parameters)
        if args.use_data_parallel:
            se_resnext = fluid.dygraph.parallel.DataParallel(se_resnext, strategy)
        train_reader = paddle.batch(
            paddle.dataset.flowers.train(use_xmap=False),
            batch_size=batch_size,
            drop_last=True
            )
        if args.use_data_parallel:
            train_reader = fluid.contrib.reader.distributed_batch_reader(
                train_reader)
        test_reader = paddle.batch(
            paddle.dataset.flowers.test(use_xmap=False), batch_size=32)       

        for epoch_id in range(epoch_num):
            total_loss = 0.0
            total_acc1 = 0.0
            total_acc5 = 0.0
            total_sample = 0
            batch_time = AverageMeter('Time', ':6.3f')
            data_time = AverageMeter('Data', ':6.3f')
            progress = ProgressMeter(len(list(train_reader())) - 1, batch_time, data_time,
                                      prefix="epoch: [{}]".format(epoch_id))
            end = Tools.time()
            for batch_id, data in enumerate(train_reader()):
                data_time.update(Tools.time() - end)
                dy_x_data = np.array(
                    [x[0].reshape(3, 224, 224)
                    for x in data]).astype('float32')
                y_data = np.array(
                    [x[1] for x in data]).astype('int64').reshape(
                        batch_size, 1)

                img = to_variable(dy_x_data)
                label = to_variable(y_data)
                label.stop_gradient = True

                out = se_resnext(img)
                softmax_out = fluid.layers.softmax(out,use_cudnn=False)
                loss = fluid.layers.cross_entropy(input=softmax_out, label=label)
                avg_loss = fluid.layers.mean(x=loss)
                
                acc_top1 = fluid.layers.accuracy(input=softmax_out, label=label, k=1)
                acc_top5 = fluid.layers.accuracy(input=softmax_out, label=label, k=5)

                dy_out = avg_loss.numpy()
                if args.use_data_parallel:
                    avg_loss = se_resnext.scale_loss(avg_loss)
                    avg_loss.backward()
                    se_resnext.apply_collective_grads()
                else:
                    avg_loss.backward()

                optimizer.minimize(avg_loss)
                se_resnext.clear_gradients()
                batch_time.update(Tools.time() - end)
                lr = optimizer._global_learning_rate().numpy()
                total_loss += dy_out
                total_acc1 += acc_top1.numpy()
                total_acc5 += acc_top5.numpy()
                total_sample += 1
                if batch_id % 1 == 0:
                    progress.print(batch_id)
                    print( "epoch %d | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f lr %0.5f" % \
                           ( epoch_id, batch_id, total_loss / total_sample, \
                             total_acc1 / total_sample, total_acc5 / total_sample, lr))
                end = Tools.time()

            if args.ce:
                print("kpis\ttrain_acc1\t%0.3f" % (total_acc1 / total_sample))
                print("kpis\ttrain_acc5\t%0.3f" % (total_acc5 / total_sample))
                print("kpis\ttrain_loss\t%0.3f" % (total_loss / total_sample))
            print("epoch %d | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f" % \
                  (epoch_id, batch_id, total_loss / total_sample, \
                   total_acc1 / total_sample, total_acc5 / total_sample))
            se_resnext.eval()
            eval(se_resnext, test_reader)
            se_resnext.train()
Esempio n. 10
0
def train(args):
    with fluid.dygraph.guard():
        max_images_num = data_reader.max_images_num()
        shuffle = True
        data_shape = [-1] + data_reader.image_shape()
        #print(data_shape)

        A_pool = ImagePool()
        B_pool = ImagePool()

        A_reader = paddle.batch(data_reader.a_reader(shuffle=shuffle),
                                args.batch_size)()
        B_reader = paddle.batch(data_reader.b_reader(shuffle=shuffle),
                                args.batch_size)()
        A_test_reader = data_reader.a_test_reader()
        B_test_reader = data_reader.b_test_reader()

        cycle_gan = Cycle_Gan("cycle_gan", istrain=True)

        losses = [[], []]
        t_time = 0
        optimizer1 = optimizer_setting()
        optimizer2 = optimizer_setting()
        optimizer3 = optimizer_setting()

        for epoch in range(args.epoch):

            pro_batch_time = AverageMeter('Time', ':6.3f')
            pro_data_time = AverageMeter('Data', ':6.3f')
            progress = ProgressMeter(max_images_num,
                                     pro_batch_time,
                                     pro_data_time,
                                     prefix="epoch: [{}]".format(epoch))
            end = Tools.time()

            batch_id = 0
            for i in range(max_images_num):

                data_A = next(A_reader)
                data_B = next(B_reader)
                pro_data_time.update(Tools.time() - end)

                s_time = time.time()
                data_A = np.array([data_A[0].reshape(3, 256,
                                                     256)]).astype("float32")
                data_B = np.array([data_B[0].reshape(3, 256,
                                                     256)]).astype("float32")
                data_A = to_variable(data_A)
                data_B = to_variable(data_B)

                # optimize the g_A network
                fake_A, fake_B, cyc_A, cyc_B, g_A_loss, g_B_loss, idt_loss_A, idt_loss_B, cyc_A_loss, cyc_B_loss, g_loss = cycle_gan(
                    data_A, data_B, True, False, False)

                g_loss_out = g_loss.numpy()

                g_loss.backward()
                vars_G = []
                for param in cycle_gan.parameters():
                    if param.name[:
                                  52] == "cycle_gan/Cycle_Gan_0/build_generator_resnet_9blocks":
                        vars_G.append(param)

                optimizer1.minimize(g_loss, parameter_list=vars_G)
                cycle_gan.clear_gradients()

                fake_pool_B = B_pool.pool_image(fake_B).numpy()
                fake_pool_B = np.array([fake_pool_B[0].reshape(3, 256, 256)
                                        ]).astype("float32")
                fake_pool_B = to_variable(fake_pool_B)

                fake_pool_A = A_pool.pool_image(fake_A).numpy()
                fake_pool_A = np.array([fake_pool_A[0].reshape(3, 256, 256)
                                        ]).astype("float32")
                fake_pool_A = to_variable(fake_pool_A)

                # optimize the d_A network
                rec_B, fake_pool_rec_B = cycle_gan(data_B, fake_pool_B, False,
                                                   True, False)
                d_loss_A = (fluid.layers.square(fake_pool_rec_B) +
                            fluid.layers.square(rec_B - 1)) / 2.0
                d_loss_A = fluid.layers.reduce_mean(d_loss_A)

                d_loss_A.backward()
                vars_da = []
                for param in cycle_gan.parameters():
                    if param.name[:
                                  47] == "cycle_gan/Cycle_Gan_0/build_gen_discriminator_0":
                        vars_da.append(param)
                optimizer2.minimize(d_loss_A, parameter_list=vars_da)
                cycle_gan.clear_gradients()

                # optimize the d_B network

                rec_A, fake_pool_rec_A = cycle_gan(data_A, fake_pool_A, False,
                                                   False, True)
                d_loss_B = (fluid.layers.square(fake_pool_rec_A) +
                            fluid.layers.square(rec_A - 1)) / 2.0
                d_loss_B = fluid.layers.reduce_mean(d_loss_B)

                d_loss_B.backward()
                vars_db = []
                for param in cycle_gan.parameters():
                    if param.name[:
                                  47] == "cycle_gan/Cycle_Gan_0/build_gen_discriminator_1":
                        vars_db.append(param)
                optimizer3.minimize(d_loss_B, parameter_list=vars_db)

                cycle_gan.clear_gradients()

                batch_time = time.time() - s_time
                t_time += batch_time
                pro_batch_time.update(Tools.time() - end)
                # print(
                #     "epoch{}; batch{}; g_loss:{}; d_A_loss: {}; d_B_loss:{} \
                #     ; \n g_A_loss: {}; g_A_cyc_loss: {}; g_A_idt_loss: {}\
                #     ; g_B_loss: {}; g_B_cyc_loss:  {}; g_B_idt_loss: {}\
                #     ;Batch_time_cost: {:.2f}"\
                #         .format(epoch, batch_id, g_loss_out[0],\
                #                                      d_loss_A.numpy()[0], \
                #                                      d_loss_B.numpy()[0],\
                #                                      g_A_loss.numpy()[0],\
                #                                      cyc_A_loss.numpy()[0], \
                #                                      idt_loss_A.numpy()[0],  \
                #                                      g_B_loss.numpy()[0],\
                #                                      cyc_B_loss.numpy()[0],\
                #                                      idt_loss_B.numpy()[0], \
                #                                      batch_time))
                with open('logging_train.txt', 'a') as log_file:
                    now = time.strftime("%c")
                    log_file.write(
                    "time: {}; epoch{}; batch{}; d_A_loss: {}; g_A_loss: {}; \
                    g_A_cyc_loss: {}; g_A_idt_loss: {}; d_B_loss: {}; \
                    g_B_loss: {}; g_B_cyc_loss: {}; g_B_idt_loss: {}; \
                    Batch_time_cost: {:.2f}\n"                                              .format(now, epoch, \
                        batch_id, d_loss_A[0], g_A_loss[ 0], cyc_A_loss[0], \
                        idt_loss_A[0], d_loss_B[0], g_A_loss[0], \
                        cyc_B_loss[0], idt_loss_B[0], batch_time))
                losses[0].append(g_A_loss[0])
                losses[1].append(d_loss_A[0])
                sys.stdout.flush()
                batch_id += 1

                if batch_id % 10 == 0:
                    progress.print(batch_id)
                    print("epoch{}; | batch step{}; g_A_loss:{}; d_A_loss:{}" \
                        .format(epoch, batch_id, g_A_loss.numpy()[0], d_loss_A.numpy()[0]))

                end = Tools.time()

            if args.save_checkpoints:
                fluid.dygraph.save_persistables(
                    cycle_gan.state_dict(),
                    args.output + "/checkpoints/{}".format(epoch))