def test_main(self):
        main = fluid.Program()
        startup = fluid.Program()
        startup.random_seed = 1
        with fluid.scope_guard(fluid.core.Scope()):
            with fluid.program_guard(main, startup):
                data = fluid.layers.data(
                    name='image', shape=[3, 224, 224], dtype='float32')
                label = fluid.layers.data(
                    name='label', shape=[1], dtype='int64')
                out = Lenet(data, class_dim=102)
                loss = fluid.layers.cross_entropy(input=out, label=label)
                loss = fluid.layers.mean(loss)
                opt = fluid.optimizer.Momentum(
                    learning_rate=0.1,
                    momentum=0.9,
                    regularization=fluid.regularizer.L2Decay(1e-4))

                opt.minimize(loss)
        place = fluid.CUDAPlace(0)
        feeder = fluid.DataFeeder(place=place, feed_list=[data, label])
        reader = feeder.decorate_reader(
            paddle.batch(
                flowers.train(), batch_size=16), multi_devices=True)
        exe = fluid.Executor(place)
        exe.run(startup)
        pe = fluid.ParallelExecutor(
            use_cuda=True, loss_name=loss.name, main_program=main)

        for batch_id, data in enumerate(reader()):
            loss_np = np.array(pe.run(feed=data, fetch_list=[loss.name])[0])
            print batch_id, loss_np
            if batch_id == 2:
                break
    def parallel_exe(self, use_cuda, seed):
        main = fluid.Program()
        startup = fluid.Program()
        startup.random_seed = seed
        with fluid.scope_guard(fluid.core.Scope()):
            with fluid.program_guard(main, startup):
                data = fluid.layers.data(name='image',
                                         shape=[3, 224, 224],
                                         dtype='float32')
                label = fluid.layers.data(name='label',
                                          shape=[1],
                                          dtype='int64')
                out = Lenet(data, class_dim=102)
                loss = fluid.layers.cross_entropy(input=out, label=label)
                loss = fluid.layers.mean(loss)
                opt = fluid.optimizer.Momentum(
                    learning_rate=0.1,
                    momentum=0.9,
                    regularization=fluid.regularizer.L2Decay(1e-4))

                opt.minimize(loss)

        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
        feeder = fluid.DataFeeder(place=place, feed_list=[data, label])
        reader = feeder.decorate_reader(paddle.batch(flowers.train(),
                                                     batch_size=16),
                                        multi_devices=True)

        exe = fluid.Executor(place)
        exe.run(startup)

        pe = fluid.ParallelExecutor(use_cuda=use_cuda,
                                    loss_name=loss.name,
                                    main_program=main)

        for batch_id, data in enumerate(reader()):
            loss_np = pe.run(feed=data, fetch_list=[loss.name])[0]
            print batch_id, loss_np
            if batch_id == 2:
                break
Ejemplo n.º 3
0
base_model_program = fluid.default_main_program().clone()

# 这里再重新加载网络的分类器,大小为本项目的分类大小
model = fluid.layers.fc(input=pool, size=102, act='softmax')

# 获取损失函数和准确率函数
cost = fluid.layers.cross_entropy(input=model, label=label)
avg_cost = fluid.layers.mean(cost)
acc = fluid.layers.accuracy(input=model, label=label)

# 定义优化方法
optimizer = fluid.optimizer.AdamOptimizer(learning_rate=1e-3)
opts = optimizer.minimize(avg_cost)

# 获取flowers数据
train_reader = paddle.batch(flowers.train(), batch_size=16)

# 定义一个使用GPU的执行器
place = fluid.CUDAPlace(0)
# place = fluid.CPUPlace()
exe = fluid.Executor(place)
# 进行参数初始化
exe.run(fluid.default_startup_program())

# 官方提供的原预训练模型
src_pretrain_model_path = 'models/ResNet50_pretrained/'


# 通过这个函数判断模型文件是否存在
def if_exist(var):
    path = os.path.join(src_pretrain_model_path, var.name)
Ejemplo n.º 4
0
def train_parallel_exe(args):

    class_dim = 1000
    image_shape = [3, 224, 224]

    if args.use_recordio:
        reader = fluid.layers.open_recordio_file(
            filename='./flowers_bs_12_3_224_224.recordio',
            shapes=[[-1, 3, 224, 224], [-1, 1]],
            lod_levels=[0, 0],
            dtypes=['float32', 'int64'])
        image, label = fluid.layers.read_file(reader)
    else:
        image = fluid.layers.data(
            name='image', shape=image_shape, dtype='float32')
        label = fluid.layers.data(name='label', shape=[1], dtype='int64')

        place = fluid.CUDAPlace(0)
        feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
        train_reader = feeder.decorate_reader(
            paddle.batch(
                train() if args.use_fake_reader else flowers.train(),
                batch_size=args.batch_size_per_gpu),
            multi_devices=True)

        train_reader_iter = train_reader()
        if args.fix_data_in_gpu:
            data = train_reader_iter.next()
            feed_data = data

    prediction, avg_cost, accuracy, accuracy5 = net_conf(image, label,
                                                         class_dim)

    add_optimizer(args, avg_cost)

    place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    exec_strategy = fluid.ExecutionStrategy()
    exec_strategy.allow_op_delay = True

    build_strategy = fluid.BuildStrategy()
    build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce if args.balance_parameter_opt_between_cards else fluid.BuildStrategy.ReduceStrategy.AllReduce

    exe = fluid.ParallelExecutor(
        loss_name=avg_cost.name,
        use_cuda=True,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)

    time_record = []
    train_start = time.time()
    img_count = 0
    for batch_id in xrange(args.number_iteration):
        if args.do_profile and batch_id >= 5 and batch_id < 8:
            with profiler.profiler('All', 'total',
                                   '/tmp/profile_parallel_exe') as prof:
                if args.use_recordio:
                    exe.run([])
                else:
                    exe.run([],
                            feed=feed_data if args.fix_data_in_gpu else
                            train_reader_iter.next())
            continue

        if args.use_recordio:
            cost_val = exe.run([avg_cost.name] if (batch_id + 1) %
                               args.display_step == 0 else [])
        else:
            cost_val = exe.run(
                [avg_cost.name]
                if (batch_id + 1) % args.display_step == 0 else [],
                feed=feed_data
                if args.fix_data_in_gpu else train_reader_iter.next())

        img_count += args.batch_size

        if (batch_id + 1) % args.display_step == 0:
            train_stop = time.time()
            step_time = train_stop - train_start
            time_record.append(step_time)

            print("iter=%d, cost=%s, elapse=%f, img/sec=%f" %
                  ((batch_id + 1), np.array(cost_val[0]), step_time,
                   img_count / step_time))

            img_count = 0
            train_start = time.time()

    skip_time_record = args.skip_first_steps / args.display_step
    time_record[0:skip_time_record] = []

    if args.show_record_time:
        for i, ele in enumerate(time_record):
            print("iter:{0}, time consume:{1}".format(i, ele))

    img_count = (
        args.number_iteration - args.skip_first_steps) * args.batch_size

    print("average time:{0}, img/sec:{1}".format(
        np.mean(time_record), img_count / np.sum(time_record)))
Ejemplo n.º 5
0
def train_parallel_do(args):

    class_dim = 1000
    image_shape = [3, 224, 224]

    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    places = fluid.layers.get_places()
    pd = fluid.layers.ParallelDo(places, use_nccl=True)

    with pd.do():
        image_ = pd.read_input(image)
        label_ = pd.read_input(label)
        out = SE_ResNeXt(input=image_, class_dim=class_dim)
        cost = fluid.layers.cross_entropy(input=out, label=label_)
        avg_cost = fluid.layers.mean(x=cost)
        accuracy = fluid.layers.accuracy(input=out, label=label_)
        pd.write_output(avg_cost)
        pd.write_output(accuracy)

    avg_cost, accuracy = pd()
    avg_cost = fluid.layers.mean(x=avg_cost)
    # accuracy = fluid.layers.mean(x=accuracy)

    add_optimizer(args, avg_cost)

    place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    train_reader = paddle.batch(
        train() if args.use_fake_reader else flowers.train(),
        batch_size=args.batch_size)

    feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
    train_reader_iter = train_reader()
    if args.fix_data_in_gpu:
        data = train_reader_iter.next()
        feed_data = feeder.feed(data)

    time_record = []
    img_count = 0
    train_start = time.time()

    for batch_id in range(args.number_iteration):
        if args.do_profile and batch_id >= 5 and batch_id < 8:
            with profiler.profiler('All', 'total',
                                   '/tmp/profile_parallel_do') as prof:
                exe.run(fluid.default_main_program(),
                        feed=feed_data if args.fix_data_in_gpu else
                        feeder.feed(train_reader_iter.next()),
                        fetch_list=[],
                        use_program_cache=True)
            continue

        cost_val = exe.run(fluid.default_main_program(),
                           feed=feed_data if args.fix_data_in_gpu else
                           feeder.feed(train_reader_iter.next()),
                           fetch_list=[avg_cost.name]
                           if (batch_id + 1) % args.display_step == 0 else [],
                           use_program_cache=True)

        img_count += args.batch_size

        if (batch_id + 1) % args.display_step == 0:
            train_stop = time.time()
            step_time = train_stop - train_start
            time_record.append(step_time)

            print("iter=%d, cost=%s, elapse=%f, img/sec=%f" %
                  ((batch_id + 1), np.array(cost_val[0]), step_time,
                   img_count / step_time))

            img_count = 0
            train_start = time.time()

    skip_time_record = args.skip_first_steps / args.display_step
    time_record[0:skip_time_record] = []

    if args.show_record_time:
        for i, ele in enumerate(time_record):
            print("iter:{0}, time consume:{1}".format(i, ele))

    img_count = (
        args.number_iteration - args.skip_first_steps) * args.batch_size

    print("average time:{0}, img/sec:{1}".format(
        np.mean(time_record), img_count / np.sum(time_record)))
Ejemplo n.º 6
0
def train_parallel_exe(args,
                       learning_rate,
                       batch_size,
                       num_passes,
                       init_model=None,
                       pretrained_model=None,
                       model_save_dir='model',
                       parallel=True,
                       use_nccl=True,
                       lr_strategy=None,
                       layers=50):
    class_dim = 1000
    image_shape = [3, 224, 224]

    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    if args.model is 'se_resnext':
        out = SE_ResNeXt(input=image, class_dim=class_dim, layers=layers)
    else:
        out = mobile_net(img=image, class_dim=class_dim)

    cost = fluid.layers.cross_entropy(input=out, label=label)
    acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
    acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
    avg_cost = fluid.layers.mean(x=cost)

    test_program = fluid.default_main_program().clone(for_test=True)

    if "piecewise_decay" in lr_strategy:
        bd = lr_strategy["piecewise_decay"]["bd"]
        lr = lr_strategy["piecewise_decay"]["lr"]
        optimizer = fluid.optimizer.Momentum(
            learning_rate=fluid.layers.piecewise_decay(boundaries=bd,
                                                       values=lr),
            momentum=0.9,
            regularization=fluid.regularizer.L2Decay(1e-4))
    elif "cosine_decay" in lr_strategy:
        step_each_epoch = lr_strategy["cosine_decay"]["step_each_epoch"]
        epochs = lr_strategy["cosine_decay"]["epochs"]
        optimizer = fluid.optimizer.Momentum(
            learning_rate=cosine_decay(learning_rate=learning_rate,
                                       step_each_epoch=step_each_epoch,
                                       epochs=epochs),
            momentum=0.9,
            regularization=fluid.regularizer.L2Decay(1e-4))
    else:
        optimizer = fluid.optimizer.Momentum(
            learning_rate=learning_rate,
            momentum=0.9,
            regularization=fluid.regularizer.L2Decay(1e-4))

    opts = optimizer.minimize(avg_cost)

    if args.with_mem_opt:
        fluid.memory_optimize(fluid.default_main_program())

    place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
    fluid.default_startup_program.random_seed = 1000
    exe.run(fluid.default_startup_program())

    if init_model is not None:
        fluid.io.load_persistables(exe, init_model)

    if pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)

    train_reader = paddle.batch(flowers.train(), batch_size=batch_size)
    test_reader = paddle.batch(flowers.test(), batch_size=batch_size)
    feeder = fluid.DataFeeder(place=place, feed_list=[image, label])

    train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
    test_exe = fluid.ParallelExecutor(use_cuda=True,
                                      main_program=test_program,
                                      share_vars_from=train_exe)

    fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name]
    train_speed = []
    for pass_id in range(num_passes):
        train_info = [[], [], []]
        test_info = [[], [], []]
        pass_time = 0
        pass_num = 0
        pass_speed = 0.0
        for batch_id, data in enumerate(train_reader()):
            t1 = time.time()
            loss, acc1, acc5 = train_exe.run(fetch_list,
                                             feed=feeder.feed(data))
            t2 = time.time()
            period = t2 - t1
            pass_time += period
            pass_num += len(data)
            loss = np.mean(np.array(loss))
            acc1 = np.mean(np.array(acc1))
            acc5 = np.mean(np.array(acc5))
            train_info[0].append(loss)
            train_info[1].append(acc1)
            train_info[2].append(acc5)
            if batch_id % 10 == 0:
                print("Pass {0}, trainbatch {1}, loss {2}, \
                       acc1 {3}, acc5 {4} time {5}"
                                                   .format(pass_id, \
                       batch_id, loss, acc1, acc5, \
                       "%2.2f sec" % period))
                sys.stdout.flush()

        train_loss = np.array(train_info[0]).mean()
        train_acc1 = np.array(train_info[1]).mean()
        train_acc5 = np.array(train_info[2]).mean()
        pass_speed = pass_num / pass_time
        train_speed.append(pass_speed)
        if pass_id == num_passes - 1:
            train_acc_top1_kpi.add_record(train_acc1)
            train_acc_top5_kpi.add_record(train_acc5)
            train_cost_kpi.add_record(train_loss)
            mean_pass_speed = np.array(pass_speed).mean()
            train_speed_kpi.add_record(mean_pass_speed)
        for data in test_reader():
            t1 = time.time()
            loss, acc1, acc5 = test_exe.run(fetch_list, feed=feeder.feed(data))
            t2 = time.time()
            period = t2 - t1
            loss = np.mean(np.array(loss))
            acc1 = np.mean(np.array(acc1))
            acc5 = np.mean(np.array(acc5))
            test_info[0].append(loss)
            test_info[1].append(acc1)
            test_info[2].append(acc5)
            if batch_id % 10 == 0:
                print("Pass {0},testbatch {1},loss {2}, \
                       acc1 {3},acc5 {4},time {5}"
                                                  .format(pass_id, \
                       batch_id, loss, acc1, acc5, \
                       "%2.2f sec" % period))
                sys.stdout.flush()

        test_loss = np.array(test_info[0]).mean()
        test_acc1 = np.array(test_info[1]).mean()
        test_acc5 = np.array(test_info[2]).mean()

        print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, \
               test_loss {4}, test_acc1 {5}, test_acc5 {6}, pass_time {7}, train_speed {8}"
                                                           .format(pass_id, \
              train_loss, train_acc1, train_acc5, test_loss, test_acc1, \
              test_acc5, pass_time, pass_num / pass_time))
        sys.stdout.flush()
    train_acc_top1_kpi.persist()
    train_acc_top5_kpi.persist()
    train_cost_kpi.persist()
    train_speed_kpi.persist()
Ejemplo n.º 7
0
def train(args):
    # parameters from arguments
    model_name = args.model
    checkpoint = args.checkpoint
    pretrained_model = args.pretrained_model
    with_memory_optimization = args.with_mem_opt
    model_save_dir = args.model_save_dir
    use_ngraph = os.getenv('FLAGS_use_ngraph')

    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    test_prog = fluid.Program()
    if args.enable_ce:
        startup_prog.random_seed = 1000
        train_prog.random_seed = 1000

    train_py_reader, train_cost, train_acc1, train_acc5, global_lr = build_program(
        is_train=True,
        main_prog=train_prog,
        startup_prog=startup_prog,
        args=args)
    test_py_reader, test_cost, test_acc1, test_acc5 = build_program(
        is_train=False,
        main_prog=test_prog,
        startup_prog=startup_prog,
        args=args)
    test_prog = test_prog.clone(for_test=True)

    if with_memory_optimization and use_ngraph:
        fluid.memory_optimize(train_prog)
        fluid.memory_optimize(test_prog)

    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)


    if checkpoint is not None:
        fluid.io.load_persistables(exe, checkpoint, main_program=train_prog)

    if pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(
            exe, pretrained_model, main_program=train_prog, predicate=if_exist)

    if args.use_gpu:
        device_num = get_device_num()
    else:
        device_num = 1
    train_batch_size = args.batch_size / device_num

    test_batch_size = 16
    if not args.enable_ce:
        # NOTE: the order of batch data generated by batch_reader
        # must be the same in the respective processes.
        shuffle_seed = 1 if num_trainers > 1 else None
        train_reader = reader.train(batch_size=train_batch_size, shuffle_seed=shuffle_seed)
        test_reader = reader.val(batch_size=test_batch_size)
    else:
        # use flowers dataset for CE and set use_xmap False to avoid disorder data
        # but it is time consuming. For faster speed, need another dataset.
        import random
        random.seed(0)
        np.random.seed(0)
        train_reader = paddle.batch(
            flowers.train(use_xmap=False),
            batch_size=train_batch_size,
            drop_last=True)
        test_reader = paddle.batch(
            flowers.test(use_xmap=False), batch_size=test_batch_size)

    train_py_reader.decorate_paddle_reader(train_reader)
    test_py_reader.decorate_paddle_reader(test_reader)

    if not use_ngraph:
        build_strategy = fluid.BuildStrategy()
        build_strategy.memory_optimize = args.with_mem_opt
        build_strategy.enable_inplace = args.with_inplace
        build_strategy.fuse_all_reduce_ops=1
        exec_strategy = fluid.ExecutionStrategy()
        exec_strategy.num_threads = device_num
        exec_strategy.num_iteration_per_drop_scope = 10

        if num_trainers > 1 and args.use_gpu:
            dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog)
            # NOTE: the process is fast when num_threads is 1
            # for multi-process training.
            exec_strategy.num_threads = 1

        train_exe = fluid.ParallelExecutor(
            main_program=train_prog,
            use_cuda=bool(args.use_gpu),
            loss_name=train_cost.name,
            build_strategy=build_strategy,
            exec_strategy=exec_strategy)
    else:
        train_exe = exe

    train_fetch_vars = [train_cost, train_acc1, train_acc5, global_lr]
    train_fetch_list = []
    for var in train_fetch_vars:
        var.persistable=True
        train_fetch_list.append(var.name)

    test_fetch_vars = [test_cost, test_acc1, test_acc5]
    test_fetch_list = []
    for var in test_fetch_vars:
        var.persistable=True
        test_fetch_list.append(var.name)

    params = models.__dict__[args.model]().params
    for pass_id in range(params["num_epochs"]):
        train_py_reader.start()

        train_info = [[], [], []]
        test_info = [[], [], []]
        train_time = []
        batch_id = 0
        time_record=[]
        try:
            while True:
                t1 = time.time()

                if use_ngraph:
                    loss, acc1, acc5, lr = train_exe.run(
                        train_prog, fetch_list=train_fetch_list)
                else:
                    loss, acc1, acc5, lr = train_exe.run(
                        fetch_list=train_fetch_list)
                t2 = time.time()
                time_record.append(t2 - t1)
                loss = np.mean(np.array(loss))
                acc1 = np.mean(np.array(acc1))
                acc5 = np.mean(np.array(acc5))
                train_info[0].append(loss)
                train_info[1].append(acc1)
                train_info[2].append(acc5)
                lr = np.mean(np.array(lr))
                train_time.append(t2-t1)

                if batch_id % 10 == 0:
                    period = np.mean(time_record)
                    time_record=[]
                    print("Pass {0}, trainbatch {1}, loss {2}, \
                        acc1 {3}, acc5 {4}, lr {5}, time {6}"
                          .format(pass_id, batch_id, "%.5f"%loss, "%.5f"%acc1, "%.5f"%acc5, "%.5f" %
                                  lr, "%2.2f sec" % period))
                    sys.stdout.flush()
                batch_id += 1
        except fluid.core.EOFException:
            train_py_reader.reset()

        train_loss = np.array(train_info[0]).mean()
        train_acc1 = np.array(train_info[1]).mean()
        train_acc5 = np.array(train_info[2]).mean()
        train_speed = np.array(train_time).mean() / (train_batch_size *
                                                     device_num)

        test_py_reader.start()

        test_batch_id = 0
        try:
            while True:
                t1 = time.time()
                loss, acc1, acc5 = exe.run(program=test_prog,
                                           fetch_list=test_fetch_list)
                t2 = time.time()
                period = t2 - t1
                loss = np.mean(loss)
                acc1 = np.mean(acc1)
                acc5 = np.mean(acc5)
                test_info[0].append(loss)
                test_info[1].append(acc1)
                test_info[2].append(acc5)
                if test_batch_id % 10 == 0:
                    print("Pass {0},testbatch {1},loss {2}, \
                        acc1 {3},acc5 {4},time {5}"
                          .format(pass_id, test_batch_id, "%.5f"%loss,"%.5f"%acc1, "%.5f"%acc5,
                                  "%2.2f sec" % period))
                    sys.stdout.flush()
                test_batch_id += 1
        except fluid.core.EOFException:
            test_py_reader.reset()

        test_loss = np.array(test_info[0]).mean()
        test_acc1 = np.array(test_info[1]).mean()
        test_acc5 = np.array(test_info[2]).mean()

        print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, "
              "test_loss {4}, test_acc1 {5}, test_acc5 {6}".format(
                  pass_id, "%.5f"%train_loss, "%.5f"%train_acc1, "%.5f"%train_acc5, "%.5f"%test_loss,
                  "%.5f"%test_acc1, "%.5f"%test_acc5))
        sys.stdout.flush()

        model_path = os.path.join(model_save_dir + '/' + model_name,
                                  str(pass_id))
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        fluid.io.save_persistables(exe, model_path, main_program=train_prog)

        # This is for continuous evaluation only
        if args.enable_ce and pass_id == args.num_epochs - 1:
            if device_num == 1:
                # Use the mean cost/acc for training
                print("kpis	train_cost	%s" % train_loss)
                print("kpis	train_acc_top1	%s" % train_acc1)
                print("kpis	train_acc_top5	%s" % train_acc5)
                # Use the mean cost/acc for testing
                print("kpis	test_cost	%s" % test_loss)
                print("kpis	test_acc_top1	%s" % test_acc1)
                print("kpis	test_acc_top5	%s" % test_acc5)
                print("kpis	train_speed	%s" % train_speed)
            else:
                # Use the mean cost/acc for training
                print("kpis	train_cost_card%s	%s" % (device_num, train_loss))
                print("kpis	train_acc_top1_card%s	%s" %
                      (device_num, train_acc1))
                print("kpis	train_acc_top5_card%s	%s" %
                      (device_num, train_acc5))
                # Use the mean cost/acc for testing
                print("kpis	test_cost_card%s	%s" % (device_num, test_loss))
                print("kpis	test_acc_top1_card%s	%s" % (device_num, test_acc1))
                print("kpis	test_acc_top5_card%s	%s" % (device_num, test_acc5))
                print("kpis	train_speed_card%s	%s" % (device_num, train_speed))
Ejemplo n.º 8
0
def train(args):
    # parameters from arguments
    model_name = args.model
    checkpoint = args.checkpoint
    pretrained_model = args.pretrained_model
    with_memory_optimization = args.with_mem_opt
    model_save_dir = args.model_save_dir

    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    test_prog = fluid.Program()
    if args.enable_ce:
        startup_prog.random_seed = 1000
        train_prog.random_seed = 1000

    train_py_reader, train_cost, train_acc1, train_acc5, global_lr = build_program(
        is_train=True,
        main_prog=train_prog,
        startup_prog=startup_prog,
        args=args)
    test_py_reader, test_cost, test_acc1, test_acc5 = build_program(
        is_train=False,
        main_prog=test_prog,
        startup_prog=startup_prog,
        args=args)
    test_prog = test_prog.clone(for_test=True)

    if with_memory_optimization:
        fluid.memory_optimize(train_prog)
        fluid.memory_optimize(test_prog)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    if checkpoint is not None:
        fluid.io.load_persistables(exe, checkpoint, main_program=train_prog)

    if pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(
            exe, pretrained_model, main_program=train_prog, predicate=if_exist)

    if args.use_gpu:
        device_num = get_device_num()
    else:
        device_num = 1
    train_batch_size = args.batch_size / device_num

    test_batch_size = 16
    if not args.enable_ce:
        train_reader = paddle.batch(
            reader.train(), batch_size=train_batch_size, drop_last=True)
        test_reader = paddle.batch(reader.val(), batch_size=test_batch_size)
    else:
        # use flowers dataset for CE and set use_xmap False to avoid disorder data
        # but it is time consuming. For faster speed, need another dataset.
        import random
        random.seed(0)
        np.random.seed(0)
        train_reader = paddle.batch(
            flowers.train(use_xmap=False),
            batch_size=train_batch_size,
            drop_last=True)
        test_reader = paddle.batch(
            flowers.test(use_xmap=False), batch_size=test_batch_size)

    train_py_reader.decorate_paddle_reader(train_reader)
    test_py_reader.decorate_paddle_reader(test_reader)

    # use_ngraph is for CPU only, please refer to README_ngraph.md for details
    use_ngraph = os.getenv('FLAGS_use_ngraph')
    if not use_ngraph:
        train_exe = fluid.ParallelExecutor(
            main_program=train_prog,
            use_cuda=bool(args.use_gpu),
            loss_name=train_cost.name)
    else:
        train_exe = exe

    train_fetch_list = [
        train_cost.name, train_acc1.name, train_acc5.name, global_lr.name
    ]
    test_fetch_list = [test_cost.name, test_acc1.name, test_acc5.name]

    params = models.__dict__[args.model]().params
    for pass_id in range(params["num_epochs"]):

        train_py_reader.start()

        train_info = [[], [], []]
        test_info = [[], [], []]
        train_time = []
        batch_id = 0
        try:
            while True:
                t1 = time.time()

                if use_ngraph:
                    loss, acc1, acc5, lr = train_exe.run(
                        train_prog, fetch_list=train_fetch_list)
                else:
                    loss, acc1, acc5, lr = train_exe.run(
                        fetch_list=train_fetch_list)
                t2 = time.time()
                period = t2 - t1
                loss = np.mean(np.array(loss))
                acc1 = np.mean(np.array(acc1))
                acc5 = np.mean(np.array(acc5))
                train_info[0].append(loss)
                train_info[1].append(acc1)
                train_info[2].append(acc5)
                lr = np.mean(np.array(lr))
                train_time.append(period)

                if batch_id % 10 == 0:
                    print("Pass {0}, trainbatch {1}, loss {2}, \
                        acc1 {3}, acc5 {4}, lr {5}, time {6}"
                          .format(pass_id, batch_id, "%.5f"%loss, "%.5f"%acc1, "%.5f"%acc5, "%.5f" %
                                  lr, "%2.2f sec" % period))
                    sys.stdout.flush()
                batch_id += 1
        except fluid.core.EOFException:
            train_py_reader.reset()

        train_loss = np.array(train_info[0]).mean()
        train_acc1 = np.array(train_info[1]).mean()
        train_acc5 = np.array(train_info[2]).mean()
        train_speed = np.array(train_time).mean() / (train_batch_size *
                                                     device_num)

        test_py_reader.start()

        test_batch_id = 0
        try:
            while True:
                t1 = time.time()
                loss, acc1, acc5 = exe.run(program=test_prog,
                                           fetch_list=test_fetch_list)
                t2 = time.time()
                period = t2 - t1
                loss = np.mean(loss)
                acc1 = np.mean(acc1)
                acc5 = np.mean(acc5)
                test_info[0].append(loss)
                test_info[1].append(acc1)
                test_info[2].append(acc5)
                if test_batch_id % 10 == 0:
                    print("Pass {0},testbatch {1},loss {2}, \
                        acc1 {3},acc5 {4},time {5}"
                          .format(pass_id, test_batch_id, "%.5f"%loss,"%.5f"%acc1, "%.5f"%acc5,
                                  "%2.2f sec" % period))
                    sys.stdout.flush()
                test_batch_id += 1
        except fluid.core.EOFException:
            test_py_reader.reset()

        test_loss = np.array(test_info[0]).mean()
        test_acc1 = np.array(test_info[1]).mean()
        test_acc5 = np.array(test_info[2]).mean()

        print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, "
              "test_loss {4}, test_acc1 {5}, test_acc5 {6}".format(
                  pass_id, "%.5f"%train_loss, "%.5f"%train_acc1, "%.5f"%train_acc5, "%.5f"%test_loss,
                  "%.5f"%test_acc1, "%.5f"%test_acc5))
        sys.stdout.flush()

        model_path = os.path.join(model_save_dir + '/' + model_name,
                                  str(pass_id))
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        fluid.io.save_persistables(exe, model_path, main_program=train_prog)

        # This is for continuous evaluation only
        if args.enable_ce and pass_id == args.num_epochs - 1:
            if device_num == 1:
                # Use the mean cost/acc for training
                print("kpis	train_cost	%s" % train_loss)
                print("kpis	train_acc_top1	%s" % train_acc1)
                print("kpis	train_acc_top5	%s" % train_acc5)
                # Use the mean cost/acc for testing
                print("kpis	test_cost	%s" % test_loss)
                print("kpis	test_acc_top1	%s" % test_acc1)
                print("kpis	test_acc_top5	%s" % test_acc5)
                print("kpis	train_speed	%s" % train_speed)
            else:
                # Use the mean cost/acc for training
                print("kpis	train_cost_card%s	%s" % (device_num, train_loss))
                print("kpis	train_acc_top1_card%s	%s" %
                      (device_num, train_acc1))
                print("kpis	train_acc_top5_card%s	%s" %
                      (device_num, train_acc5))
                # Use the mean cost/acc for testing
                print("kpis	test_cost_card%s	%s" % (device_num, test_loss))
                print("kpis	test_acc_top1_card%s	%s" % (device_num, test_acc1))
                print("kpis	test_acc_top5_card%s	%s" % (device_num, test_acc5))
                print("kpis	train_speed_card%s	%s" % (device_num, train_speed))
Ejemplo n.º 9
0
def train(args, model_path, place):
    # parameters from arguments
    with_memory_optimization = args.with_mem_opt

    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    if args.enable_ce:
        startup_prog.random_seed = 1000
        train_prog.random_seed = 1000

    train_py_reader, data_name, label_name, train_cost, train_acc1, train_acc5, global_lr = build_program(
        is_train=True,
        main_prog=train_prog,
        startup_prog=startup_prog,
        args=args)

    if with_memory_optimization:
        fluid.memory_optimize(train_prog)

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    """
    visible_device = os.getenv('CUDA_VISIBLE_DEVICES')
    if visible_device:
        device_num = len(visible_device.split(','))
    else:
        device_num = subprocess.check_output(
            ['nvidia-smi', '-L']).decode().count('\n')
    """
    device_num = 1

    train_batch_size = args.batch_size / device_num
    if not args.enable_ce:
        train_reader = paddle.batch(
            reader.train(), batch_size=train_batch_size, drop_last=True)
    else:
        # use flowers dataset for CE and set use_xmap False to avoid disorder data
        # but it is time consuming. For faster speed, need another dataset.
        import random
        random.seed(0)
        np.random.seed(0)
        train_reader = paddle.batch(
            flowers.train(use_xmap=False),
            batch_size=train_batch_size,
            drop_last=True)

    train_py_reader.decorate_paddle_reader(train_reader)
    train_exe = fluid.ParallelExecutor(
        main_program=train_prog,
        use_cuda=bool(args.use_gpu),
        loss_name=train_cost.name)

    save_target_vars = [train_cost, train_acc1, train_acc5, global_lr]
    train_fetch_list = [
        train_cost.name, train_acc1.name, train_acc5.name, global_lr.name
    ]

    params = models.__dict__[args.model]().params
    for pass_id in range(params["num_epochs"]):
        train_py_reader.start()
        train_info = [[], [], []]
        train_time = []
        batch_id = 0
        try:
            while True:
                t1 = time.time()
                loss, acc1, acc5, lr = train_exe.run(
                    fetch_list=train_fetch_list)

                t2 = time.time()
                period = t2 - t1
                loss = np.mean(np.array(loss))
                acc1 = np.mean(np.array(acc1))
                acc5 = np.mean(np.array(acc5))
                train_info[0].append(loss)
                train_info[1].append(acc1)
                train_info[2].append(acc5)
                lr = np.mean(np.array(lr))
                train_time.append(period)

                if batch_id % 10 == 0:
                    print("Pass {0}, trainbatch {1}, loss {2}, \
                        acc1 {3}, acc5 {4}, lr{5}, time {6}"
                          .format(pass_id, batch_id, loss, acc1, acc5, "%.5f" %
                                  lr, "%2.2f sec" % period))
                    sys.stdout.flush()
                batch_id += 1
        except fluid.core.EOFException:
            train_py_reader.reset()

        train_loss = np.array(train_info[0]).mean()
        train_acc1 = np.array(train_info[1]).mean()
        train_acc5 = np.array(train_info[2]).mean()
        train_speed = np.array(train_time).mean() / (train_batch_size *
                                                     device_num)

        print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, ".format(
                  pass_id, train_loss, train_acc1, train_acc5))
        sys.stdout.flush()

        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        fluid.io.save_inference_model(model_path, 
                [data_name, label_name],
                save_target_vars,
                exe,
                main_program=train_prog,
                model_filename="__model__",
                params_filename="__params__")
        print("save to", model_path)
Ejemplo n.º 10
0
def train(args):
    # parameters from arguments
    class_dim = args.class_dim
    model_name = args.model
    checkpoint = args.checkpoint
    pretrained_model = args.pretrained_model
    with_memory_optimization = args.with_mem_opt
    model_save_dir = args.model_save_dir
    image_shape = [int(m) for m in args.image_shape.split(",")]

    assert model_name in model_list, "{} is not in lists: {}".format(args.model,
                                                                     model_list)
    #pyreader = vis_reader("imagenet", shuffle_size=10000)
    #image, label = fluid.layers.read_file(pyreader)
    
    #imagenet = dataset.create(name='imagenet100')

    #train_reader = imagenet.train()
    #val_reader = imagenet.val()
    '''
    train_reader = imagenet.train(name="imagenet", part_id=0, part_num=500,
                                  cache='./cached', accelerate=True, infinite=True)
    train_reader = reader_fast.create_imagenet_afs_rawdatareader("train", "imagenet", 
                                                                 cachefolder="./data_cache")
    train_reader = reader_fast.transform_reader("train", train_reader)
    train_reader = reader_fast.create_threaded_reader(train_reader)
    '''

    gpu = os.getenv("CUDA_VISIBLE_DEVICES") or ""
    gpu_nums = len(gpu.split(","))
        
    py_reader = fluid.layers.py_reader(
        capacity=args.batch_size,
        shapes=[[-1] + image_shape, [-1, 1]],
        lod_levels=[0, 0],
        dtypes=["float32", "int64"],
        use_double_buffer=True)

    image, label = fluid.layers.read_file(py_reader)
    #py_reader.decorate_paddle_reader(paddle.batch(train_reader, batch_size=args.batch_size))
    py_reader.decorate_paddle_reader(paddle.batch(reader.train(), batch_size=args.batch_size))

    #image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    #label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    
    # model definition
    model = models.__dict__[model_name]()
    
    if args.enable_ce:
        assert model_name == "SE_ResNeXt50_32x4d"
        fluid.default_startup_program().random_seed = 1000
        model.params["dropout_seed"] = 100
        class_dim = 102
        
    if model_name == "GoogleNet":
        out0, out1, out2 = model.net(input=image, class_dim=class_dim)
        cost0 = fluid.layers.cross_entropy(input=out0, label=label)
        cost1 = fluid.layers.cross_entropy(input=out1, label=label)
        cost2 = fluid.layers.cross_entropy(input=out2, label=label)
        avg_cost0 = fluid.layers.mean(x=cost0)
        avg_cost1 = fluid.layers.mean(x=cost1)
        avg_cost2 = fluid.layers.mean(x=cost2)

        avg_cost = avg_cost0 + 0.3 * avg_cost1 + 0.3 * avg_cost2
        acc_top1 = fluid.layers.accuracy(input=out0, label=label, k=1)
        acc_top5 = fluid.layers.accuracy(input=out0, label=label, k=5)
    else:
        out = model.net(input=image, class_dim=class_dim)
        cost = fluid.layers.cross_entropy(input=out, label=label)
        avg_cost = fluid.layers.sum(x=cost)
        
        #avg_cost = fluid.layers.mean(x=cost)
        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
        acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)

    test_program = fluid.default_main_program().clone(for_test=True)

    # parameters from model and arguments
    params = model.params
    params["total_images"] = args.total_images
    params["lr"] = args.lr
    params["num_epochs"] = args.num_epochs
    params["learning_strategy"]["batch_size"] = args.batch_size
    params["learning_strategy"]["name"] = args.lr_strategy

    # initialize optimizer
    optimizer = optimizer_setting(params)
    opts = optimizer.minimize(avg_cost)

    if with_memory_optimization:
        fluid.memory_optimize(fluid.default_main_program())

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if checkpoint is not None:
        fluid.io.load_persistables(exe, checkpoint)

    if pretrained_model:
        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)
        
    train_batch_size = args.batch_size
    test_batch_size = 16
    
    if not args.enable_ce:
        #train_reader = paddle.batch(reader.train(), batch_size=train_batch_size)
        #test_reader = paddle.batch(reader.val(), batch_size=test_batch_size)
        pass
    else:
        # use flowers dataset for CE and set use_xmap False to avoid disorder data
        # but it is time consuming. For faster speed, need another dataset.
        import random
        random.seed(0)
        np.random.seed(0)
        train_reader = paddle.batch(
            flowers.train(use_xmap=False), batch_size=train_batch_size)
        test_reader = paddle.batch(
            flowers.test(use_xmap=False), batch_size=test_batch_size)

    #feeder = fluid.DataFeeder(place=place, feed_list=[image, label])

    build_strategy = BuildStrategy()
    build_strategy.enable_sequence = True

    train_exe = fluid.ParallelExecutor(
        use_cuda=True if args.use_gpu else False, loss_name=avg_cost.name,
        build_strategy=build_strategy)

    fout = open("origin.startup_program.txt", "w")
    fout.write(str(fluid.default_startup_program()))
    fout.close()
    fout = open("origin.main_program.txt", "w")
    fout.write(str(fluid.default_main_program()))
    fout.close()

    '''
    train_exe = fluid.ParallelExecutor(
        use_cuda=True if args.use_gpu else False, loss_name=avg_cost.name)
    '''
    fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name]

    gpu = os.getenv("CUDA_VISIBLE_DEVICES") or ""
    gpu_nums = len(gpu.split(","))
    for pass_id in range(params["num_epochs"]):
        train_info = [[], [], []]
        test_info = [[], [], []]
        train_time = []
        io_time = []
        t0 = time.time()
        py_reader.start()
        #for batch_id, data in enumerate(train_reader()):
        for batch_id in range(100000):
            t1 = time.time()
            loss, acc1, acc5 = train_exe.run(fetch_list)
            t2 = time.time()
            period = t2 - t1
            io_period = t1 - t0
            loss = np.mean(np.array(loss))
            acc1 = np.mean(np.array(acc1))
            acc5 = np.mean(np.array(acc5))
            train_info[0].append(loss)
            train_info[1].append(acc1)
            train_info[2].append(acc5)
            train_time.append(period)
            io_time.append(io_period)
            if batch_id % 10 == 0 and batch_id > 0:
                print("Pass {0}, trainbatch {1}, loss {2}, \
                       acc1 {3}, acc5 {4} train_time {5} io_time {6} queue_size {7}"
                      .format(pass_id, \
                              batch_id, loss, acc1, acc5, \
                              "%2.2f sec" % np.array(train_time[1:]).mean(), 
                              "%2.2f sec" % np.array(io_time[1:]).mean(), 
                              py_reader.queue.size()))
                
                sys.stdout.flush()
            t0 = time.time()
        
        train_loss = np.array(train_info[0]).mean()
        train_acc1 = np.array(train_info[1]).mean()
        train_acc5 = np.array(train_info[2]).mean()
        train_speed = np.array(train_time).mean() / train_batch_size
        cnt = 0
        '''
        for test_batch_id, data in enumerate(test_reader()):
            t1 = time.time()
            loss, acc1, acc5 = exe.run(test_program,
                                       fetch_list=fetch_list,
                                       feed=feeder.feed(data))
            t2 = time.time()
            period = t2 - t1
            loss = np.mean(loss)
            acc1 = np.mean(acc1)
            acc5 = np.mean(acc5)
            test_info[0].append(loss * len(data))
            test_info[1].append(acc1 * len(data))
            test_info[2].append(acc5 * len(data))
            cnt += len(data)
            if test_batch_id % 10 == 0:
                print("Pass {0},testbatch {1},loss {2}, \
                       acc1 {3},acc5 {4},time {5}"
                                                  .format(pass_id, \
                       test_batch_id, loss, acc1, acc5, \
                       "%2.2f sec" % period))
                sys.stdout.flush()

        test_loss = np.sum(test_info[0]) / cnt
        test_acc1 = np.sum(test_info[1]) / cnt
        test_acc5 = np.sum(test_info[2]) / cnt

        print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, "
              "test_loss {4}, test_acc1 {5}, test_acc5 {6}".format(pass_id, \
              train_loss, train_acc1, train_acc5, test_loss, test_acc1, \
              test_acc5))
        sys.stdout.flush()

        model_path = os.path.join(model_save_dir + '/' + model_name,
                                  str(pass_id))
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        fluid.io.save_persistables(exe, model_path)
        '''
        # This is for continuous evaluation only
        if args.enable_ce and pass_id == args.num_epochs - 1:
            if gpu_nums == 1:
                # Use the mean cost/acc for training
                print("kpis	train_cost	%s" % train_loss)
                print("kpis	train_acc_top1	%s" % train_acc1)
                print("kpis	train_acc_top5	%s" % train_acc5)
                # Use the mean cost/acc for testing
                print("kpis	test_cost	%s" % test_loss)
                print("kpis	test_acc_top1	%s" % test_acc1)
                print("kpis	test_acc_top5	%s" % test_acc5)
                print("kpis	train_speed	%s" % train_speed)
            else:
                # Use the mean cost/acc for training
                print("kpis	train_cost_card%s	%s" % (gpu_nums, train_loss))
                print("kpis	train_acc_top1_card%s	%s" % (gpu_nums, train_acc1))
                print("kpis	train_acc_top5_card%s	%s" % (gpu_nums, train_acc5))
                # Use the mean cost/acc for testing
                print("kpis	test_cost_card%s	%s" % (gpu_nums, test_loss))
                print("kpis	test_acc_top1_card%s	%s" % (gpu_nums, test_acc1))
                print("kpis	test_acc_top5_card%s	%s" % (gpu_nums, test_acc5))
                print("kpis	train_speed_card%s	%s" % (gpu_nums, train_speed))