コード例 #1
0
def train_loop(args, train_program, reader, py_reader, loss, trainer_id,
               weight, lr):

    py_reader.decorate_tensor_provider(
        convert_python_to_tensor(weight, args.batch_size, reader.train()))

    place = paddle.CPUPlace()
    exe = paddle.static.Executor(place)
    exe.run(paddle.static.default_startup_program())

    print("CPU_NUM:" + str(os.getenv("CPU_NUM")))

    train_exe = exe

    for pass_id in range(args.num_passes):
        py_reader.start()
        time.sleep(10)
        epoch_start = time.time()
        batch_id = 0
        start = time.time()
        try:
            while True:

                loss_val = train_exe.run(fetch_list=[loss.name])
                loss_val = np.mean(loss_val)

                if batch_id % args.print_batch == 0:
                    logger.info(
                        "TRAIN --> pass: {} batch: {} loss: {} reader queue:{}"
                        .format(pass_id, batch_id, loss_val.mean(),
                                py_reader.queue.size()))
                if args.with_speed:
                    if batch_id % 500 == 0 and batch_id != 0:
                        elapsed = (time.time() - start)
                        start = time.time()
                        samples = 1001 * args.batch_size * int(
                            os.getenv("CPU_NUM"))
                        logger.info("Time used: {}, Samples/Sec: {}".format(
                            elapsed, samples / elapsed))
                lr.step()

                if batch_id % args.save_step == 0 and batch_id != 0:
                    model_dir = args.model_output_dir + '/pass-' + str(
                        pass_id) + ('/batch-' + str(batch_id))
                    if trainer_id == 0:
                        paddle.static.save(exe, model_dir, train_program)
                        print("model saved in %s" % model_dir)
                batch_id += 1

        except paddle.fluid.core.EOFException:
            py_reader.reset()
            epoch_end = time.time()
            logger.info("Epoch: {0}, Train total expend: {1} ".format(
                pass_id, epoch_end - epoch_start))
            model_dir = args.model_output_dir + '/pass-' + str(pass_id)
            if trainer_id == 0:
                paddle.static.save(exe, model_dir, train_program)
                print("model saved in %s" % model_dir)
コード例 #2
0
def prepare_reader(is_train, pyreader, args, pass_id=0):
    if is_train:
        reader = train(data_dir=args.data_dir, pass_id_as_seed=pass_id)
    else:
        reader = val(data_dir=args.data_dir)
    if is_train:
        bs = args.batch_size / get_device_num()
    else:
        bs = 16
    pyreader.decorate_paddle_reader(paddle.batch(reader, batch_size=bs))
コード例 #3
0
def prepare_reader(is_train, pyreader, args, pass_id=1):
    # NOTE: always use infinite reader for dist training
    if is_train:
        reader = train(
            data_dir=args.data_dir, pass_id_as_seed=pass_id, infinite=True)
    else:
        reader = val(data_dir=args.data_dir)
    if is_train:
        bs = args.batch_size / get_device_num()
    else:
        bs = 16
    pyreader.decorate_paddle_reader(paddle.batch(reader, batch_size=bs))
コード例 #4
0
ファイル: retrain.py プロジェクト: zhaoxiaoze/PaddleHub
def retrain(modelpath):

    model = module.Module(module_dir=args.hub_module_path)

    feed_list, fetch_list, program, generator = model(
        sign_name="feature_map", trainable=False)
    test_program = program.clone()
    # get the dog cat dataset
    train_reader = paddle.batch(reader.train(args.data_dir), batch_size=32)
    val_reader = paddle.batch(reader.val(args.data_dir), batch_size=32)

    with fluid.program_guard(main_program=program):
        with fluid.unique_name.guard(generator):
            img = feed_list[0]
            label = fluid.layers.data(name="label", shape=[1], dtype="int64")
            feature_map = fetch_list[0]
            fc = fluid.layers.fc(input=feature_map, size=2, act="softmax")
            cost = fluid.layers.cross_entropy(input=fc, label=label)
            avg_cost = fluid.layers.mean(cost)
            acc = fluid.layers.accuracy(input=fc, label=label)

            # define the loss
            optimizer = fluid.optimizer.Adam(learning_rate=0.001)
            optimizer.minimize(avg_cost)

            # running on gpu
            place = fluid.CUDAPlace(0)
            feeder = fluid.DataFeeder(feed_list=[img, label], place=place)
            exe = fluid.Executor(place)

            # init all param
            exe.run(fluid.default_startup_program())
            step = 0
            sample_num = 0
            epochs = 50
            # start to train
            for i in range(epochs):
                for batch in train_reader():
                    cost, accuracy = exe.run(
                        feed=feeder.feed(batch),
                        fetch_list=[avg_cost.name, acc.name])
                    step += 1
                    print(
                        "epoch %d and step %d: train cost is %.2f, train acc is %.2f%%"
                        % (i, step, cost, accuracy * 100))

            for iter, batch in enumerate(val_reader()):
                cost, accuracy = exe.run(
                    feed=feeder.feed(batch),
                    fetch_list=[avg_cost.name, acc.name])
                print("batch %d: val cost is %.2f, val acc is %.2f%%" %
                      (iter, cost, accuracy * 100))
コード例 #5
0
def quantize(args):
    val_reader = reader.train()

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()

    assert os.path.exists(args.model_path), "args.model_path doesn't exist"
    assert os.path.isdir(args.model_path), "args.model_path must be a dir"

    exe = fluid.Executor(place)
    quant_post(executor=exe,
               model_dir=args.model_path,
               quantize_model_path=args.save_path,
               sample_generator=val_reader,
               model_filename=args.model_filename,
               params_filename=args.params_filename,
               batch_size=args.batch_size,
               batch_nums=args.batch_num)
コード例 #6
0
ファイル: train.py プロジェクト: jiangjiajun/sz-demo
def prepare_reader(epoch_id, train_py_reader, train_bs, val_bs, trn_dir,
                   img_dim, min_scale, rect_val, args):
    train_reader = reader.train(traindir="%s/%strain" %
                                (args.data_dir, trn_dir),
                                sz=img_dim,
                                min_scale=min_scale,
                                shuffle_seed=epoch_id + 1)
    train_py_reader.decorate_paddle_reader(
        paddle.batch(train_reader, batch_size=train_bs))

    test_reader = reader.test(valdir="%s/%svalidation" %
                              (args.data_dir, trn_dir),
                              bs=val_bs * DEVICE_NUM,
                              sz=img_dim,
                              rect_val=rect_val)
    test_batched_reader = paddle.batch(test_reader,
                                       batch_size=val_bs * DEVICE_NUM)

    return test_batched_reader
コード例 #7
0
ファイル: light_nas_space.py プロジェクト: walloollaw/models
    def create_net(self, tokens=None):
        """Create a network for training by tokens.
        """
        if tokens is None:
            tokens = self.init_tokens()

        bottleneck_params_list = get_bottleneck_params_list(tokens)

        startup_prog = fluid.Program()
        train_prog = fluid.Program()
        test_prog = fluid.Program()
        train_py_reader, train_cost, train_acc1, train_acc5, global_lr = build_program(
            is_train=True,
            main_prog=train_prog,
            startup_prog=startup_prog,
            bottleneck_params_list=bottleneck_params_list)
        test_py_reader, test_cost, test_acc1, test_acc5 = build_program(
            is_train=False,
            main_prog=test_prog,
            startup_prog=startup_prog,
            bottleneck_params_list=bottleneck_params_list)
        test_prog = test_prog.clone(for_test=True)
        train_batch_size = batch_size / 4
        test_batch_size = batch_size
        train_reader = paddle.batch(reader.train(),
                                    batch_size=train_batch_size,
                                    drop_last=True)
        test_reader = paddle.batch(reader.val(), batch_size=test_batch_size)

        with fluid.program_guard(train_prog, startup_prog):
            train_py_reader.decorate_paddle_reader(train_reader)

        with fluid.program_guard(test_prog, startup_prog):
            test_py_reader.decorate_paddle_reader(test_reader)
        return startup_prog, train_prog, test_prog, (
            train_cost, train_acc1, train_acc5,
            global_lr), (test_cost, test_acc1,
                         test_acc5), train_py_reader, test_py_reader
コード例 #8
0
def prepare_reader(epoch_id, train_py_reader, train_bs, val_bs, trn_dir,
                   img_dim, min_scale, rect_val, args=None):
    num_trainers = args.dist_env["num_trainers"] if args.update_method != 'local' else 1
    trainer_id = args.dist_env["trainer_id"] if args.update_method != 'local' else 0
    train_reader = reader.train(
        traindir="%s/%strain" % (args.data_dir, trn_dir),
        sz=img_dim,
        min_scale=min_scale,
        shuffle_seed=epoch_id + 1,
        rank_id=trainer_id,
        size=num_trainers)
    train_py_reader.decorate_paddle_reader(
        paddle.batch(
            train_reader, batch_size=train_bs))

    test_reader = reader.test(
        valdir="%s/%svalidation" % (args.data_dir, trn_dir),
        bs=val_bs * DEVICE_NUM,
        sz=img_dim,
        rect_val=rect_val)
    test_batched_reader = paddle.batch(
        test_reader, batch_size=val_bs * DEVICE_NUM)

    return test_batched_reader
コード例 #9
0
def train_async(args):
    # parameters from arguments

    logging.debug('enter train')
    model_name = args.model
    checkpoint = args.checkpoint
    pretrained_model = args.pretrained_model
    model_save_dir = args.model_save_dir

    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    tmp_prog = fluid.Program()

    if args.enable_ce:
        assert args.model == "ResNet50"
        assert args.loss_name == "arcmargin"
        np.random.seed(0)
        startup_prog.random_seed = 1000
        train_prog.random_seed = 1000
        tmp_prog.random_seed = 1000

    train_py_reader, train_cost, train_acc1, train_acc5, global_lr = build_program(
        is_train=True,
        main_prog=train_prog,
        startup_prog=startup_prog,
        args=args)
    test_feas, image, label = build_program(is_train=False,
                                            main_prog=tmp_prog,
                                            startup_prog=startup_prog,
                                            args=args)
    test_prog = tmp_prog.clone(for_test=True)

    train_fetch_list = [
        global_lr.name, train_cost.name, train_acc1.name, train_acc5.name
    ]
    test_fetch_list = [test_feas.name]

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    exe.run(startup_prog)

    logging.debug('after run startup program')

    if checkpoint is not None:
        fluid.io.load_persistables(exe, checkpoint, main_program=train_prog)

    if pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(exe,
                           pretrained_model,
                           main_program=train_prog,
                           predicate=if_exist)

    if args.use_gpu:
        devicenum = get_gpu_num()
    else:
        devicenum = int(os.environ.get('CPU_NUM', 1))
    assert (args.train_batch_size % devicenum) == 0
    train_batch_size = args.train_batch_size // devicenum
    test_batch_size = args.test_batch_size

    train_reader = paddle.batch(reader.train(args),
                                batch_size=train_batch_size,
                                drop_last=True)
    test_reader = paddle.batch(reader.test(args),
                               batch_size=test_batch_size,
                               drop_last=False)
    test_feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
    train_py_reader.decorate_paddle_reader(train_reader)

    train_exe = fluid.ParallelExecutor(main_program=train_prog,
                                       use_cuda=args.use_gpu,
                                       loss_name=train_cost.name)

    totalruntime = 0
    train_py_reader.start()
    iter_no = 0
    train_info = [0, 0, 0, 0]
    while iter_no <= args.total_iter_num:
        t1 = time.time()
        lr, loss, acc1, acc5 = train_exe.run(fetch_list=train_fetch_list)
        t2 = time.time()
        period = t2 - t1
        lr = np.mean(np.array(lr))
        train_info[0] += np.mean(np.array(loss))
        train_info[1] += np.mean(np.array(acc1))
        train_info[2] += np.mean(np.array(acc5))
        train_info[3] += 1
        if iter_no % args.display_iter_step == 0:
            avgruntime = totalruntime / args.display_iter_step
            avg_loss = train_info[0] / train_info[3]
            avg_acc1 = train_info[1] / train_info[3]
            avg_acc5 = train_info[2] / train_info[3]
            print("[%s] trainbatch %d, lr %.6f, loss %.6f, "\
                    "acc1 %.4f, acc5 %.4f, time %2.2f sec" % \
                    (fmt_time(), iter_no, lr, avg_loss, avg_acc1, avg_acc5, avgruntime))
            sys.stdout.flush()
            totalruntime = 0
        if iter_no % 1000 == 0:
            train_info = [0, 0, 0, 0]

        totalruntime += period

        if iter_no % args.test_iter_step == 0 and iter_no != 0:
            f, l = [], []
            for batch_id, data in enumerate(test_reader()):
                t1 = time.time()
                [feas] = exe.run(test_prog,
                                 fetch_list=test_fetch_list,
                                 feed=test_feeder.feed(data))
                label = np.asarray([x[1] for x in data])
                f.append(feas)
                l.append(label)

                t2 = time.time()
                period = t2 - t1
                if batch_id % 20 == 0:
                    print("[%s] testbatch %d, time %2.2f sec" % \
                            (fmt_time(), batch_id, period))

            f = np.vstack(f)
            l = np.hstack(l)
            recall = recall_topk(f, l, k=1)
            print("[%s] test_img_num %d, trainbatch %d, test_recall %.5f" % \
                    (fmt_time(), len(f), iter_no, recall))
            sys.stdout.flush()

        if iter_no % args.save_iter_step == 0 and iter_no != 0:
            model_path = os.path.join(model_save_dir + '/' + model_name,
                                      str(iter_no))
            if not os.path.isdir(model_path):
                os.makedirs(model_path)
            fluid.io.save_persistables(exe,
                                       model_path,
                                       main_program=train_prog)

        iter_no += 1

    # This is for continuous evaluation only
    if args.enable_ce:
        # Use the mean cost/acc for training
        print("kpis\ttrain_cost\t{}".format(avg_loss))
        print("kpis\ttest_recall\t{}".format(recall))
コード例 #10
0
def train(args):
    # parameters from arguments
    model_name = args.model
    pretrained_model = args.pretrained_model
    model_save_dir = args.model_save_dir

    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    test_prog = fluid.Program()

    train_py_reader, train_cost, train_acc, image, predition, feature_map = build_program(
        is_train=True,
        main_prog=train_prog,
        startup_prog=startup_prog,
        args=args)
    test_py_reader, test_cost, test_acc, image, predition, feature_map = build_program(
        is_train=False,
        main_prog=test_prog,
        startup_prog=startup_prog,
        args=args)
    test_prog = test_prog.clone(for_test=True)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    if pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(exe,
                           pretrained_model,
                           main_program=train_prog,
                           predicate=if_exist)

    if args.create_module:
        assert pretrained_model, "need a pretrained module to create a hub module"
        sign1 = hub.create_signature("classification",
                                     inputs=[image],
                                     outputs=[predition])
        sign2 = hub.create_signature("feature_map",
                                     inputs=[image],
                                     outputs=[feature_map])
        sign3 = hub.create_signature(inputs=[image], outputs=[predition])
        hub.create_module(sign_arr=[sign1, sign2, sign3],
                          program=train_prog,
                          module_dir="hub_module_" + args.model)
        exit()

    visible_device = os.getenv('CUDA_VISIBLE_DEVICES')
    if visible_device:
        device_num = len(visible_device.split(','))
    else:
        device_num = subprocess.check_output(['nvidia-smi',
                                              '-L']).decode().count('\n')

    train_batch_size = args.batch_size / device_num
    test_batch_size = 16
    train_reader = paddle.batch(reader.train(),
                                batch_size=train_batch_size,
                                drop_last=True)
    test_reader = paddle.batch(reader.val(), batch_size=test_batch_size)

    train_py_reader.decorate_paddle_reader(train_reader)
    test_py_reader.decorate_paddle_reader(test_reader)
    train_exe = fluid.ParallelExecutor(main_program=train_prog,
                                       use_cuda=bool(args.use_gpu),
                                       loss_name=train_cost.name)

    train_fetch_list = [train_cost.name, train_acc.name]
    test_fetch_list = [test_cost.name, test_acc.name]

    params = nets.__dict__[args.model]().params

    for pass_id in range(params["num_epochs"]):

        train_py_reader.start()

        train_info = [[], [], []]
        test_info = [[], [], []]
        train_time = []
        batch_id = 0
        try:
            while True:
                t1 = time.time()
                loss, acc = train_exe.run(fetch_list=train_fetch_list)
                t2 = time.time()
                period = t2 - t1
                loss = np.mean(np.array(loss))
                acc = np.mean(np.array(acc))
                train_info[0].append(loss)
                train_info[1].append(acc)
                train_time.append(period)
                if batch_id % 10 == 0:
                    print("Pass {0}, trainbatch {1}, loss {2}, \
                        acc {3}, time {4}".format(pass_id, batch_id, loss, acc,
                                                  "%2.2f sec" % period))
                    sys.stdout.flush()
                batch_id += 1
        except fluid.core.EOFException:
            train_py_reader.reset()

        train_loss = np.array(train_info[0]).mean()
        train_acc = np.array(train_info[1]).mean()
        train_speed = np.array(train_time).mean() / (train_batch_size *
                                                     device_num)

        test_py_reader.start()

        test_batch_id = 0
        try:
            while True:
                t1 = time.time()
                loss, acc = exe.run(program=test_prog,
                                    fetch_list=test_fetch_list)
                t2 = time.time()
                period = t2 - t1
                loss = np.mean(loss)
                acc = np.mean(acc)
                test_info[0].append(loss)
                test_info[1].append(acc)
                if test_batch_id % 10 == 0:
                    print("Pass {0},testbatch {1},loss {2}, \
                        acc {3},time {4}".format(pass_id, test_batch_id, loss,
                                                 acc, "%2.2f sec" % period))
                    sys.stdout.flush()
                test_batch_id += 1
        except fluid.core.EOFException:
            test_py_reader.reset()

        test_loss = np.array(test_info[0]).mean()
        test_acc = np.array(test_info[1]).mean()

        print("End pass {0}, train_loss {1}, train_acc {2}, "
              "test_loss {3}, test_acc {4}".format(pass_id, train_loss,
                                                   train_acc, test_loss,
                                                   test_acc))
        sys.stdout.flush()

        model_path = os.path.join(model_save_dir + '/' + model_name,
                                  str(pass_id))
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        fluid.io.save_persistables(exe, model_path, main_program=train_prog)
コード例 #11
0
ファイル: train.py プロジェクト: zhaoyuchen2018/benchmark
def train(args):
    # parameters from arguments
    model_name = args.model
    checkpoint = args.checkpoint
    pretrained_model = args.pretrained_model
    with_memory_optimization = args.with_mem_opt
    model_save_dir = args.model_save_dir
    use_ngraph = os.getenv('FLAGS_use_ngraph')

    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    test_prog = fluid.Program()
    if args.enable_ce:
        startup_prog.random_seed = 1000
        train_prog.random_seed = 1000

    train_py_reader, train_cost, train_acc1, train_acc5, global_lr = build_program(
        is_train=True,
        main_prog=train_prog,
        startup_prog=startup_prog,
        args=args)
    test_py_reader, test_cost, test_acc1, test_acc5 = build_program(
        is_train=False,
        main_prog=test_prog,
        startup_prog=startup_prog,
        args=args)
    test_prog = test_prog.clone(for_test=True)

    if with_memory_optimization and use_ngraph:
        fluid.memory_optimize(train_prog)
        fluid.memory_optimize(test_prog)

    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)


    if checkpoint is not None:
        fluid.io.load_persistables(exe, checkpoint, main_program=train_prog)

    if pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(
            exe, pretrained_model, main_program=train_prog, predicate=if_exist)

    if args.use_gpu:
        device_num = get_device_num()
    else:
        device_num = 1
    train_batch_size = args.batch_size / device_num

    test_batch_size = 16
    if not args.enable_ce:
        # NOTE: the order of batch data generated by batch_reader
        # must be the same in the respective processes.
        shuffle_seed = 1 if num_trainers > 1 else None
        train_reader = reader.train(batch_size=train_batch_size, shuffle_seed=shuffle_seed)
        test_reader = reader.val(batch_size=test_batch_size)
    else:
        # use flowers dataset for CE and set use_xmap False to avoid disorder data
        # but it is time consuming. For faster speed, need another dataset.
        import random
        random.seed(0)
        np.random.seed(0)
        train_reader = paddle.batch(
            flowers.train(use_xmap=False),
            batch_size=train_batch_size,
            drop_last=True)
        test_reader = paddle.batch(
            flowers.test(use_xmap=False), batch_size=test_batch_size)

    train_py_reader.decorate_paddle_reader(train_reader)
    test_py_reader.decorate_paddle_reader(test_reader)

    if not use_ngraph:
        build_strategy = fluid.BuildStrategy()
        build_strategy.memory_optimize = args.with_mem_opt
        build_strategy.enable_inplace = args.with_inplace
        build_strategy.fuse_all_reduce_ops=1
        exec_strategy = fluid.ExecutionStrategy()
        exec_strategy.num_threads = device_num
        exec_strategy.num_iteration_per_drop_scope = 10

        if num_trainers > 1 and args.use_gpu:
            dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog)
            # NOTE: the process is fast when num_threads is 1
            # for multi-process training.
            exec_strategy.num_threads = 1

        train_exe = fluid.ParallelExecutor(
            main_program=train_prog,
            use_cuda=bool(args.use_gpu),
            loss_name=train_cost.name,
            build_strategy=build_strategy,
            exec_strategy=exec_strategy)
    else:
        train_exe = exe

    train_fetch_vars = [train_cost, train_acc1, train_acc5, global_lr]
    train_fetch_list = []
    for var in train_fetch_vars:
        var.persistable=True
        train_fetch_list.append(var.name)

    test_fetch_vars = [test_cost, test_acc1, test_acc5]
    test_fetch_list = []
    for var in test_fetch_vars:
        var.persistable=True
        test_fetch_list.append(var.name)

    params = models.__dict__[args.model]().params
    for pass_id in range(params["num_epochs"]):
        train_py_reader.start()

        train_info = [[], [], []]
        test_info = [[], [], []]
        train_time = []
        batch_id = 0
        time_record=[]
        try:
            while True:
                t1 = time.time()

                if use_ngraph:
                    loss, acc1, acc5, lr = train_exe.run(
                        train_prog, fetch_list=train_fetch_list)
                else:
                    loss, acc1, acc5, lr = train_exe.run(
                        fetch_list=train_fetch_list)
                t2 = time.time()
                time_record.append(t2 - t1)
                loss = np.mean(np.array(loss))
                acc1 = np.mean(np.array(acc1))
                acc5 = np.mean(np.array(acc5))
                train_info[0].append(loss)
                train_info[1].append(acc1)
                train_info[2].append(acc5)
                lr = np.mean(np.array(lr))
                train_time.append(t2-t1)

                if batch_id % 10 == 0:
                    period = np.mean(time_record)
                    time_record=[]
                    print("Pass {0}, trainbatch {1}, loss {2}, \
                        acc1 {3}, acc5 {4}, lr {5}, time {6}"
                          .format(pass_id, batch_id, "%.5f"%loss, "%.5f"%acc1, "%.5f"%acc5, "%.5f" %
                                  lr, "%2.2f sec" % period))
                    sys.stdout.flush()
                batch_id += 1
        except fluid.core.EOFException:
            train_py_reader.reset()

        train_loss = np.array(train_info[0]).mean()
        train_acc1 = np.array(train_info[1]).mean()
        train_acc5 = np.array(train_info[2]).mean()
        train_speed = np.array(train_time).mean() / (train_batch_size *
                                                     device_num)

        test_py_reader.start()

        test_batch_id = 0
        try:
            while True:
                t1 = time.time()
                loss, acc1, acc5 = exe.run(program=test_prog,
                                           fetch_list=test_fetch_list)
                t2 = time.time()
                period = t2 - t1
                loss = np.mean(loss)
                acc1 = np.mean(acc1)
                acc5 = np.mean(acc5)
                test_info[0].append(loss)
                test_info[1].append(acc1)
                test_info[2].append(acc5)
                if test_batch_id % 10 == 0:
                    print("Pass {0},testbatch {1},loss {2}, \
                        acc1 {3},acc5 {4},time {5}"
                          .format(pass_id, test_batch_id, "%.5f"%loss,"%.5f"%acc1, "%.5f"%acc5,
                                  "%2.2f sec" % period))
                    sys.stdout.flush()
                test_batch_id += 1
        except fluid.core.EOFException:
            test_py_reader.reset()

        test_loss = np.array(test_info[0]).mean()
        test_acc1 = np.array(test_info[1]).mean()
        test_acc5 = np.array(test_info[2]).mean()

        print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, "
              "test_loss {4}, test_acc1 {5}, test_acc5 {6}".format(
                  pass_id, "%.5f"%train_loss, "%.5f"%train_acc1, "%.5f"%train_acc5, "%.5f"%test_loss,
                  "%.5f"%test_acc1, "%.5f"%test_acc5))
        sys.stdout.flush()

        model_path = os.path.join(model_save_dir + '/' + model_name,
                                  str(pass_id))
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        fluid.io.save_persistables(exe, model_path, main_program=train_prog)

        # This is for continuous evaluation only
        if args.enable_ce and pass_id == args.num_epochs - 1:
            if device_num == 1:
                # Use the mean cost/acc for training
                print("kpis	train_cost	%s" % train_loss)
                print("kpis	train_acc_top1	%s" % train_acc1)
                print("kpis	train_acc_top5	%s" % train_acc5)
                # Use the mean cost/acc for testing
                print("kpis	test_cost	%s" % test_loss)
                print("kpis	test_acc_top1	%s" % test_acc1)
                print("kpis	test_acc_top5	%s" % test_acc5)
                print("kpis	train_speed	%s" % train_speed)
            else:
                # Use the mean cost/acc for training
                print("kpis	train_cost_card%s	%s" % (device_num, train_loss))
                print("kpis	train_acc_top1_card%s	%s" %
                      (device_num, train_acc1))
                print("kpis	train_acc_top5_card%s	%s" %
                      (device_num, train_acc5))
                # Use the mean cost/acc for testing
                print("kpis	test_cost_card%s	%s" % (device_num, test_loss))
                print("kpis	test_acc_top1_card%s	%s" % (device_num, test_acc1))
                print("kpis	test_acc_top5_card%s	%s" % (device_num, test_acc5))
                print("kpis	train_speed_card%s	%s" % (device_num, train_speed))
コード例 #12
0
def parallel_exe(args,
                 train_file_list,
                 val_file_list,
                 data_args,
                 learning_rate,
                 batch_size,
                 num_passes,
                 model_save_dir='model',
                 pretrained_model=None):
    image_shape = [3, data_args.resize_h, data_args.resize_w]
    if data_args.dataset == 'coco':
        num_classes = 81
    elif data_args.dataset == 'pascalvoc':
        num_classes = 21

    devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
    devices_num = len(devices.split(","))

    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    gt_box = fluid.layers.data(name='gt_box',
                               shape=[4],
                               dtype='float32',
                               lod_level=1)
    gt_label = fluid.layers.data(name='gt_label',
                                 shape=[1],
                                 dtype='int32',
                                 lod_level=1)
    difficult = fluid.layers.data(name='gt_difficult',
                                  shape=[1],
                                  dtype='int32',
                                  lod_level=1)

    locs, confs, box, box_var = mobile_net(num_classes, image, image_shape)
    nmsed_out = fluid.layers.detection_output(locs,
                                              confs,
                                              box,
                                              box_var,
                                              nms_threshold=0.45)
    loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box, box_var)
    loss = fluid.layers.reduce_sum(loss)

    test_program = fluid.default_main_program().clone(for_test=True)
    with fluid.program_guard(test_program):
        map_eval = fluid.evaluator.DetectionMAP(nmsed_out,
                                                gt_label,
                                                gt_box,
                                                difficult,
                                                num_classes,
                                                overlap_threshold=0.5,
                                                evaluate_difficult=False,
                                                ap_version=args.ap_version)

    if data_args.dataset == 'coco':
        # learning rate decay in 12, 19 pass, respectively
        if '2014' in train_file_list:
            epocs = 82783 / batch_size
            boundaries = [epocs * 12, epocs * 19]
        elif '2017' in train_file_list:
            epocs = 118287 / batch_size
            boundaries = [epcos * 12, epocs * 19]
    elif data_args.dataset == 'pascalvoc':
        epocs = 19200 / batch_size
        boundaries = [epocs * 40, epocs * 60, epocs * 80, epocs * 100]
    values = [
        learning_rate, learning_rate * 0.5, learning_rate * 0.25,
        learning_rate * 0.1, learning_rate * 0.01
    ]
    optimizer = fluid.optimizer.RMSProp(
        learning_rate=fluid.layers.piecewise_decay(boundaries, values),
        regularization=fluid.regularizer.L2Decay(0.00005),
    )

    optimizer.minimize(loss)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    fluid.default_startup_program.random_seed = 1000
    exe.run(fluid.default_startup_program())

    if pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)

    if args.parallel:
        train_exe = fluid.ParallelExecutor(use_cuda=args.use_gpu,
                                           loss_name=loss.name)

    train_reader = paddle.batch(reader.train(data_args, train_file_list),
                                batch_size=batch_size)
    test_reader = paddle.batch(reader.test(data_args, val_file_list),
                               batch_size=batch_size)
    feeder = fluid.DataFeeder(place=place,
                              feed_list=[image, gt_box, gt_label, difficult])

    def save_model(postfix):
        model_path = os.path.join(model_save_dir, postfix)
        if os.path.isdir(model_path):
            shutil.rmtree(model_path)
        print 'save models to %s' % (model_path)
        fluid.io.save_persistables(exe, model_path)

    best_map = 0.

    def test(pass_id, best_map):
        _, accum_map = map_eval.get_map_var()
        map_eval.reset(exe)
        test_map = None
        for data in test_reader():
            test_map = exe.run(test_program,
                               feed=feeder.feed(data),
                               fetch_list=[accum_map])
        if test_map[0] > best_map:
            best_map = test_map[0]
            save_model('best_model')
        print("Test {0}, map {1}".format(pass_id, test_map[0]))

    train_num = 0
    total_train_time = 0.0
    total_iters = 0
    for pass_id in range(num_passes):
        every_pass_loss = []
        iter = 0
        pass_duration = 0.0
        for batch_id, data in enumerate(train_reader()):
            batch_start = time.time()
            if iter == args.iterations:
                break
            if len(data) < devices_num: continue
            if args.parallel:
                loss_v, = train_exe.run(fetch_list=[loss.name],
                                        feed=feeder.feed(data))
            else:
                loss_v, = exe.run(fluid.default_main_program(),
                                  feed=feeder.feed(data),
                                  fetch_list=[loss])
            loss_v = np.mean(np.array(loss_v))
            if batch_id % 20 == 0:
                print("Pass {0}, batch {1}, loss {2}, time {3}".format(
                    pass_id, batch_id, loss_v,
                    time.time() - batch_start))
            if iter >= args.skip_batch_num or pass_id != 0:
                batch_duration = time.time() - batch_start
                pass_duration += batch_duration
                train_num += len(data)
            every_pass_loss.append(loss_v)
            iter += 1
            total_iters += 1
    #test(pass_id, best_map)
        total_train_time += pass_duration
        print("Pass:%d, Loss:%f, Handle Images Duration: %f\n" %
              (pass_id, np.mean(every_pass_loss), pass_duration))
        if pass_id == num_passes - 1:
            examples_per_sec = train_num / total_train_time
            train_cost_kpi.add_record(np.mean(every_pass_loss))
            train_speed_kpi.add_record(
                np.array(examples_per_sec, dtype='float'))
            four_card_speed_kpi.add_record(
                np.array(examples_per_sec, dtype='float'))
    if args.gpu_card_num == 1:
        train_cost_kpi.persist()
        train_speed_kpi.persist()
    else:
        four_card_speed_kpi.persist()
    print("Best test map {0}".format(best_map))
コード例 #13
0
def train_loop(args, train_program, reader, py_reader, loss, trainer_id,
               weight):

    py_reader.decorate_tensor_provider(
        convert_python_to_tensor(weight, args.batch_size, reader.train()))

    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    exec_strategy = fluid.ExecutionStrategy()
    exec_strategy.use_experimental_executor = True

    print("CPU_NUM:" + str(os.getenv("CPU_NUM")))
    exec_strategy.num_threads = int(os.getenv("CPU_NUM"))

    build_strategy = fluid.BuildStrategy()
    if int(os.getenv("CPU_NUM")) > 1:
        build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce

    train_exe = fluid.ParallelExecutor(use_cuda=False,
                                       loss_name=loss.name,
                                       main_program=train_program,
                                       build_strategy=build_strategy,
                                       exec_strategy=exec_strategy)

    for pass_id in range(args.num_passes):
        py_reader.start()
        time.sleep(10)
        epoch_start = time.time()
        batch_id = 0
        start = time.time()
        try:
            while True:

                loss_val = train_exe.run(fetch_list=[loss.name])
                loss_val = np.mean(loss_val)

                if batch_id % args.print_batch == 0:
                    logger.info(
                        "TRAIN --> pass: {} batch: {} loss: {} reader queue:{}"
                        .format(pass_id, batch_id, loss_val.mean(),
                                py_reader.queue.size()))
                if args.with_speed:
                    if batch_id % 500 == 0 and batch_id != 0:
                        elapsed = (time.time() - start)
                        start = time.time()
                        samples = 1001 * args.batch_size * int(
                            os.getenv("CPU_NUM"))
                        logger.info("Time used: {}, Samples/Sec: {}".format(
                            elapsed, samples / elapsed))

                if batch_id % args.save_step == 0 and batch_id != 0:
                    model_dir = args.model_output_dir + '/pass-' + str(
                        pass_id) + ('/batch-' + str(batch_id))
                    if trainer_id == 0:
                        fluid.io.save_params(executor=exe, dirname=model_dir)
                        print("model saved in %s" % model_dir)
                batch_id += 1

        except fluid.core.EOFException:
            py_reader.reset()
            epoch_end = time.time()
            logger.info("Epoch: {0}, Train total expend: {1} ".format(
                pass_id, epoch_end - epoch_start))
            model_dir = args.model_output_dir + '/pass-' + str(pass_id)
            if trainer_id == 0:
                fluid.io.save_params(executor=exe, dirname=model_dir)
                print("model saved in %s" % model_dir)
コード例 #14
0
def train():
    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)

    devices_num = get_device_num() if cfg.use_gpu else 1
    print("Found {} CUDA/CPU devices.".format(devices_num))

    if cfg.debug or args.enable_ce:
        fluid.default_startup_program().random_seed = 1000
        fluid.default_main_program().random_seed = 1000
        random.seed(0)
        np.random.seed(0)

    if not os.path.exists(cfg.model_save_dir):
        os.makedirs(cfg.model_save_dir)

    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) if cfg.use_data_parallel else fluid.CUDAPlace(0)

    with fluid.dygraph.guard(place):
        if args.use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()
        model = YOLOv3(3, is_train=True)

        if cfg.pretrain:
            restore, _ = fluid.load_dygraph(cfg.pretrain)
            model.block.set_dict(restore)

        if cfg.finetune:
            restore, _ = fluid.load_dygraph(cfg.finetune)
            model.set_dict(restore, use_structured_name=True)

        if args.use_data_parallel:
            model = fluid.dygraph.parallel.DataParallel(model, strategy)

        boundaries = cfg.lr_steps
        gamma = cfg.lr_gamma
        step_num = len(cfg.lr_steps)
        learning_rate = cfg.learning_rate
        values = [learning_rate * (gamma ** i) for i in range(step_num + 1)]

        lr = fluid.dygraph.PiecewiseDecay(
            boundaries=boundaries,
            values=values,
            begin=args.start_iter)

        lr = fluid.layers.linear_lr_warmup(
                learning_rate=lr,
                warmup_steps=cfg.warm_up_iter,
                start_lr=0.0,
                end_lr=cfg.learning_rate,
        )

        optimizer = fluid.optimizer.Momentum(
            learning_rate=lr,
            regularization=fluid.regularizer.L2Decay(cfg.weight_decay),
            momentum=cfg.momentum,
            parameter_list=model.parameters()
        )

        start_time = time.time()
        snapshot_loss = 0
        snapshot_time = 0
        total_sample = 0

        input_size = cfg.input_size
        shuffle = True
        shuffle_seed = None
        total_iter = cfg.max_iter - cfg.start_iter
        mixup_iter = total_iter - cfg.no_mixup_iter

        random_sizes = [cfg.input_size]
        if cfg.random_shape:
            random_sizes = [32 * i for i in range(10,20)]

        train_reader = reader.train(
            input_size,
            batch_size=cfg.batch_size,
            shuffle=shuffle,
            shuffle_seed=shuffle_seed,
            total_iter=total_iter * devices_num,
            mixup_iter=mixup_iter * devices_num,
            random_sizes=random_sizes,
            use_multiprocess_reader=cfg.use_multiprocess_reader,
            num_workers=cfg.worker_num)

        if args.use_data_parallel:
            train_reader = fluid.contrib.reader.distributed_batch_reader(train_reader)
        smoothed_loss = SmoothedValue()

        for iter_id, data in enumerate(train_reader()):
            prev_start_time = start_time
            start_time = time.time()

            img = np.array([x[0] for x in data]).astype('float32')
            img = to_variable(img)

            gt_box = np.array([x[1] for x in data]).astype('float32')
            gt_box = to_variable(gt_box)

            gt_label = np.array([x[2] for x in data]).astype('int32')
            gt_label = to_variable(gt_label)

            gt_score = np.array([x[3] for x in data]).astype('float32')
            gt_score = to_variable(gt_score)

            loss = model(img, gt_box, gt_label, gt_score, None, None)
            smoothed_loss.add_value(np.mean(loss.numpy()))
            snapshot_loss += loss.numpy()
            snapshot_time += start_time - prev_start_time
            total_sample += 1

            print("Iter {:d}, loss {:.6f}, time {:.5f}".format(
                iter_id,
                smoothed_loss.get_mean_value(),
                start_time-prev_start_time))

            if args.use_data_parallel:
                loss = model.scale_loss(loss)
                loss.backward()
                model.apply_collective_grads()
            loss.backward()

            optimizer.minimize(loss)
            model.clear_gradients()

            save_parameters = (not args.use_data_parallel) or (
                args.use_data_parallel and
                    fluid.dygraph.parallel.Env().local_rank == 0)
            if save_parameters and iter_id > 1 and iter_id % cfg.snapshot_iter == 0:
                fluid.save_dygraph(model.state_dict(), args.model_save_dir + "/yolov3_{}".format(iter_id))
コード例 #15
0
def get_model(args, is_train, main_prog, startup_prog):
    pyreader = None
    class_dim = 1000
    if args.data_format == 'NCHW':
        dshape = [3, 224, 224]
    else:
        dshape = [224, 224, 3]
    if is_train:
        reader = train(data_dir=args.data_dir)
    else:
        reader = val(data_dir=args.data_dir)

    trainer_count = int(os.getenv("PADDLE_TRAINERS", "1"))
    with fluid.program_guard(main_prog, startup_prog):
        with fluid.unique_name.guard():
            pyreader = fluid.layers.py_reader(
                capacity=args.batch_size * args.gpus,
                shapes=([-1] + dshape, (-1, 1)),
                dtypes=('float32', 'int64'),
                name="train_reader" if is_train else "test_reader",
                use_double_buffer=True)
            input, label = fluid.layers.read_file(pyreader)
            model_def = models.__dict__[args.model](layers=50,
                                                    is_train=is_train)
            predict = model_def.net(input, class_dim=class_dim)

            cost = fluid.layers.cross_entropy(input=predict, label=label)
            avg_cost = fluid.layers.mean(x=cost)

            batch_acc1 = fluid.layers.accuracy(input=predict, label=label, k=1)
            batch_acc5 = fluid.layers.accuracy(input=predict, label=label, k=5)

            optimizer = None
            if is_train:
                start_lr = args.learning_rate
                # n * worker * repeat
                end_lr = args.learning_rate * trainer_count * args.multi_batch_repeat
                total_images = 1281167 / trainer_count
                step = int(
                    total_images /
                    (args.batch_size * args.gpus * args.multi_batch_repeat) +
                    1)
                warmup_steps = step * 5  # warmup 5 passes
                epochs = [30, 60, 80]
                bd = [step * e for e in epochs]
                base_lr = end_lr
                lr = []
                lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]

                # NOTE: we put weight decay in layers config, and remove
                # weight decay on bn layers, so don't add weight decay in
                # optimizer config.
                optimizer = fluid.optimizer.Momentum(
                    learning_rate=utils.learning_rate.lr_warmup(
                        fluid.layers.piecewise_decay(boundaries=bd, values=lr),
                        warmup_steps, start_lr, end_lr),
                    momentum=0.9)
                optimizer.minimize(avg_cost)

    batched_reader = None
    pyreader.decorate_paddle_reader(
        paddle.batch(reader, batch_size=args.batch_size))

    return avg_cost, optimizer, [batch_acc1,
                                 batch_acc5], batched_reader, pyreader
コード例 #16
0
def train(args,
          data_args,
          train_params,
          train_file_list,
          val_file_list):

    model_save_dir = args.model_save_dir
    pretrained_model = args.pretrained_model
    use_gpu = args.use_gpu
    parallel = args.parallel
    enable_ce = args.enable_ce
    is_shuffle = True

    if not use_gpu:
        devices_num = int(os.environ.get('CPU_NUM',
                          multiprocessing.cpu_count()))
    else:
        devices_num = fluid.core.get_cuda_device_count()

    batch_size = train_params['batch_size']
    epoc_num = train_params['epoc_num']
    batch_size_per_device = batch_size // devices_num
    num_workers = 8

    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    test_prog = fluid.Program()

    if enable_ce:
        import random
        random.seed(0)
        np.random.seed(0)
        is_shuffle = False
        startup_prog.random_seed = 111
        train_prog.random_seed = 111
        test_prog.random_seed = 111

    train_py_reader, loss = build_program(
        main_prog=train_prog,
        startup_prog=startup_prog,
        train_params=train_params,
        is_train=True)
    test_py_reader, map_eval, _, _ = build_program(
        main_prog=test_prog,
        startup_prog=startup_prog,
        train_params=train_params,
        is_train=False)

    test_prog = test_prog.clone(for_test=True)
    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    if pretrained_model:
        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))
        fluid.io.load_vars(exe, pretrained_model, main_program=train_prog,
                           predicate=if_exist)

    if parallel:
        loss.persistable = True
        build_strategy = fluid.BuildStrategy()
        build_strategy.enable_inplace = True
        build_strategy.memory_optimize = True
        train_exe = fluid.ParallelExecutor(main_program=train_prog,
            use_cuda=use_gpu, loss_name=loss.name, build_strategy=build_strategy)
    train_reader = reader.train(data_args,
                                train_file_list,
                                batch_size_per_device,
                                shuffle=is_shuffle,
                                num_workers=num_workers,
                                enable_ce=enable_ce)
    test_reader = reader.test(data_args, val_file_list, batch_size)
    train_py_reader.decorate_paddle_reader(train_reader)
    test_py_reader.decorate_paddle_reader(test_reader)

    def save_model(postfix, main_prog):
        model_path = os.path.join(model_save_dir, postfix)
        if os.path.isdir(model_path):
            shutil.rmtree(model_path)
        print('save models to %s' % (model_path))
        fluid.io.save_persistables(exe, model_path, main_program=main_prog)

    best_map = 0.
    def test(epoc_id, best_map):
        _, accum_map = map_eval.get_map_var()
        map_eval.reset(exe)
        every_epoc_map=[] # for CE
        test_py_reader.start()
        try:
            batch_id = 0
            while True:
                test_map, = exe.run(test_prog, fetch_list=[accum_map])
                if batch_id % 10 == 0:
                    every_epoc_map.append(test_map)
                    print("Batch {0}, map {1}".format(batch_id, test_map))
                batch_id += 1
        except fluid.core.EOFException:
            test_py_reader.reset()
        mean_map = np.mean(every_epoc_map)
        print("Epoc {0}, test map {1}".format(epoc_id, test_map[0]))
        if test_map[0] > best_map:
            best_map = test_map[0]
            save_model('best_model', test_prog)
        return best_map, mean_map


    total_time = 0.0
    for epoc_id in range(epoc_num):
        train_reader = reader.train(data_args,
                                train_file_list,
                                batch_size_per_device,
                                shuffle=is_shuffle,
                                num_workers=num_workers,
                                enable_ce=enable_ce)
        train_py_reader.decorate_paddle_reader(train_reader)
        epoch_idx = epoc_id + 1
        start_time = time.time()
        prev_start_time = start_time
        every_epoc_loss = []
        batch_id = 0
        train_py_reader.start()
        while True:
            try:
                prev_start_time = start_time
                start_time = time.time()
                if parallel:
                    loss_v, = train_exe.run(fetch_list=[loss.name])
                else:
                    loss_v, = exe.run(train_prog, fetch_list=[loss])
                loss_v = np.mean(np.array(loss_v))
                every_epoc_loss.append(loss_v)
                if batch_id % 10 == 0:
                    print("Epoc {:d}, batch {:d}, loss {:.6f}, time {:.5f}".format(
                        epoc_id, batch_id, loss_v, start_time - prev_start_time))
                batch_id += 1
            except (fluid.core.EOFException, StopIteration):
                train_reader().close()
                train_py_reader.reset()
                break

        end_time = time.time()
        total_time += end_time - start_time
        if epoc_id % 10 == 0 or epoc_id == epoc_num - 1:
            best_map, mean_map = test(epoc_id, best_map)
            print("Best test map {0}".format(best_map))
            # save model
            save_model(str(epoc_id), train_prog)

    if enable_ce:
        train_avg_loss = np.mean(every_epoc_loss)
        if devices_num == 1:
            print("kpis	train_cost	%s" % train_avg_loss)
            print("kpis	test_acc	%s" % mean_map)
            print("kpis	train_speed	%s" % (total_time / epoch_idx))
        else:
            print("kpis	train_cost_card%s	%s" %
                   (devices_num, train_avg_loss))
            print("kpis	test_acc_card%s	%s" %
                   (devices_num, mean_map))
            print("kpis	train_speed_card%s	%f" %
                   (devices_num, total_time / epoch_idx))
コード例 #17
0
def train(train_file_list,
          data_args,
          init_model_path,
          save_dir,
          dev_file_list=None):
    optimizer = paddle.optimizer.Momentum(
        momentum=cfg.TRAIN.MOMENTUM,
        learning_rate=cfg.TRAIN.LEARNING_RATE,
        regularization=paddle.optimizer.L2Regularization(
            rate=cfg.TRAIN.L2REGULARIZATION),
        learning_rate_decay_a=cfg.TRAIN.LEARNING_RATE_DECAY_A,
        learning_rate_decay_b=cfg.TRAIN.LEARNING_RATE_DECAY_B,
        learning_rate_schedule=cfg.TRAIN.LEARNING_RATE_SCHEDULE)

    cost, detect_out = vgg_ssd_net.net_conf("train")

    parameters = paddle.parameters.create(cost)
    if init_model_path is not None:
        assert os.path.isfile(init_model_path), "Invalid model."
        parameters.init_from_tar(gzip.open(init_model_path))

    trainer = paddle.trainer.SGD(cost=cost,
                                 parameters=parameters,
                                 extra_layers=[detect_out],
                                 update_equation=optimizer)

    feeding = {"image": 0, "bbox": 1}

    train_reader = paddle.batch(
        reader.train(data_args, train_file_list),
        batch_size=cfg.TRAIN.BATCH_SIZE)  # generate a batch image each time

    if dev_file_list is not None:
        dev_reader = paddle.batch(reader.test(data_args, dev_file_list),
                                  batch_size=cfg.TRAIN.BATCH_SIZE)

    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if (event.batch_id + 1) % 1 == 0:
                print("Pass %d, Batch %d, TrainCost %f, Detection mAP=%f" %
                      (event.pass_id, event.batch_id + 1, event.cost,
                       event.metrics["detection_evaluator"]))
                sys.stdout.flush()

        if isinstance(event, paddle.event.EndPass):
            if not (event.pass_id + 1) % 20:
                with gzip.open(
                        os.path.join(save_dir, "params_pass_%05d.tar.gz" %
                                     event.pass_id), "w") as f:
                    trainer.save_parameter_to_tar(f)

            if dev_file_list is not None:
                result = trainer.test(reader=dev_reader, feeding=feeding)
                print("Test with Pass %d, TestCost: %f, Detection mAP=%g" %
                      (event.pass_id, result.cost,
                       result.metrics["detection_evaluator"]))

    trainer.train(reader=train_reader,
                  event_handler=event_handler,
                  num_passes=cfg.TRAIN.NUM_PASS,
                  feeding=feeding)
コード例 #18
0
def train(args, data_args, train_params, train_file_list, val_file_list):

    model_save_dir = args.model_save_dir
    pretrained_model = args.pretrained_model
    use_gpu = args.use_gpu
    parallel = args.parallel
    enable_ce = args.enable_ce
    is_shuffle = True

    if not use_gpu:
        devices_num = int(
            os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
    else:
        devices_num = fluid.core.get_cuda_device_count()

    batch_size = train_params['batch_size']
    epoc_num = train_params['epoc_num']
    batch_size_per_device = batch_size // devices_num
    num_workers = 8

    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    test_prog = fluid.Program()

    train_py_reader, loss = build_program(main_prog=train_prog,
                                          startup_prog=startup_prog,
                                          train_params=train_params,
                                          is_train=True)
    test_py_reader, map_var, _, _ = build_program(main_prog=test_prog,
                                                  startup_prog=startup_prog,
                                                  train_params=train_params,
                                                  is_train=False)

    test_prog = test_prog.clone(for_test=True)

    for param in train_prog.global_block().all_parameters():
        if 'conv' in param.name:
            print param.name, param.shape
    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    if pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(exe,
                           pretrained_model,
                           main_program=train_prog,
                           predicate=if_exist)

    test_reader = reader.test(data_args, val_file_list, batch_size)
    test_py_reader.decorate_paddle_reader(test_reader)
    train_reader = reader.train(data_args,
                                train_file_list,
                                batch_size_per_device,
                                shuffle=is_shuffle,
                                use_multiprocess=args.use_multiprocess,
                                num_workers=num_workers,
                                enable_ce=enable_ce)
    train_py_reader.decorate_paddle_reader(train_reader)

    train_fetch_list = [("loss", loss.name)]
    val_fetch_list = [("map", map_var.name)]
    compressor = Compressor(place,
                            fluid.global_scope(),
                            train_prog,
                            train_reader=train_py_reader,
                            train_feed_list=None,
                            train_fetch_list=train_fetch_list,
                            eval_program=test_prog,
                            eval_reader=test_py_reader,
                            eval_feed_list=None,
                            eval_fetch_list=val_fetch_list,
                            train_optimizer=None)
    compressor.config('./compress.yaml')
    compressor.run()
コード例 #19
0
def train(args,
          data_args,
          train_file_list,
          learning_rate,
          batch_size,
          num_passes,
          model_save_dir,
          pretrained_model=None,
          with_memory_optimization=None):
    image_shape = [3, data_args.resize_h, data_args.resize_w]
    devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
    devices_num = len(devices.split(","))

    image = fluid.layers.data(name='image',shape=[3,1920,1080], dtype='float32')
    size=[1920,1080]
    ground_truth = fluid.layers.data(name='ground_truth',shape=[1,size[1],size[0]], dtype='float32')
    csr_net = net.CSRNet(image,size)
    cost = fluid.layers.cos_sim(csr_net,ground_truth)
    avg_cost = fluid.layers.mean(x=cost)
    epocs = 2859 / batch_size
    optimizer = fluid.optimizer.SGD(1e-6)
    optimizer.minimize(avg_cost)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if pretrained_model:
        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))
        fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)
    if with_memory_optimization:
        fluid.memory_optimize(fluid.default_main_program())
    if args.parallel:
        train_exe = fluid.ParallelExecutor(
            use_cuda=args.use_gpu, loss_name=avg_cost.name)

    train_reader = paddle.batch(
        reader.train(data_args, train_file_list), batch_size=batch_size)
    feeder = fluid.DataFeeder(
        place=place, feed_list=[image, ground_truth])

    def save_model(postfix):
        model_path = os.path.join(model_save_dir, postfix)
        if os.path.isdir(model_path):
            shutil.rmtree(model_path)
        print ('save models to %s' % (model_path))
        fluid.io.save_persistables(exe, model_path)

    best_map = 0.
    train_num = 0
    total_train_time = 0.0
    for pass_id in range(num_passes):
        start_time = time.time()
        prev_start_time = start_time
        # end_time = 0
        every_pass_loss = []
        iter = 0
        pass_duration = 0.0
        for batch_id, data in enumerate(train_reader()):
            print data[0][1]
            print np.array(data[0][1])
            prev_start_time = start_time
            start_time = time.time()
            if args.for_model_ce and iter == args.iterations:
                break
            if len(data) < (devices_num):
                print("There are too few data to train on all devices.")
                continue
            if args.parallel:
                loss_v, = train_exe.run(fetch_list=[avg_cost.name],
                                        feed=feeder.feed(data))
            else:
                loss_v, = exe.run(fluid.default_main_program(),
                                  feed=feeder.feed(data),
                                  fetch_list=[avg_cost])
            # end_time = time.time()
            loss_v = np.mean(np.array(loss_v))
            if batch_id % 20 == 0:
                print("Pass {0}, batch {1}, loss {2}, time {3}".format(
                    pass_id, batch_id, loss_v, start_time - prev_start_time))

            if args.for_model_ce and iter >= args.skip_batch_num or pass_id != 0:
                batch_duration = time.time() - start_time
                pass_duration += batch_duration
                train_num += len(data)
                every_pass_loss.append(loss_v)
                iter += 1
        total_train_time += pass_duration

        if args.for_model_ce and pass_id == num_passes - 1:
            examples_per_sec = train_num / total_train_time
            cost = np.mean(every_pass_loss)
            with open("train_speed_factor.txt", 'w') as f:
                f.write('{:f}\n'.format(examples_per_sec))
            with open("train_cost_factor.txt", 'a+') as f:
                f.write('{:f}\n'.format(cost))

        #best_map = test(pass_id, best_map)
        if pass_id % 10 == 0 or pass_id == num_passes - 1:
            save_model(str(pass_id))
コード例 #20
0
def train():
    learning_rate = cfg.learning_rate
    image_shape = [3, cfg.TRAIN.max_size, cfg.TRAIN.max_size]

    if cfg.enable_ce:
        fluid.default_startup_program().random_seed = 1000
        fluid.default_main_program().random_seed = 1000
        import random
        random.seed(0)
        np.random.seed(0)

    devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
    devices_num = len(devices.split(","))
    total_batch_size = devices_num * cfg.TRAIN.im_per_batch

    use_random = True
    if cfg.enable_ce:
        use_random = False
    model = model_builder.RCNN(
        add_conv_body_func=resnet.add_ResNet50_conv4_body,
        add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head,
        use_pyreader=cfg.use_pyreader,
        use_random=use_random)
    model.build_model(image_shape)
    losses, keys = model.loss()
    loss = losses[0]
    fetch_list = losses

    boundaries = cfg.lr_steps
    gamma = cfg.lr_gamma
    step_num = len(cfg.lr_steps)
    values = [learning_rate * (gamma**i) for i in range(step_num + 1)]

    lr = exponential_with_warmup_decay(learning_rate=learning_rate,
                                       boundaries=boundaries,
                                       values=values,
                                       warmup_iter=cfg.warm_up_iter,
                                       warmup_factor=cfg.warm_up_factor)
    optimizer = fluid.optimizer.Momentum(
        learning_rate=lr,
        regularization=fluid.regularizer.L2Decay(cfg.weight_decay),
        momentum=cfg.momentum)
    optimizer.minimize(loss)
    fetch_list = fetch_list + [lr]

    for var in fetch_list:
        var.persistable = True

    #fluid.memory_optimize(fluid.default_main_program(), skip_opt_set=set(fetch_list))

    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if cfg.pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(cfg.pretrained_model, var.name))

        fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist)

    if cfg.parallel:
        build_strategy = fluid.BuildStrategy()
        build_strategy.memory_optimize = False
        build_strategy.enable_inplace = False

        exec_strategy = fluid.ExecutionStrategy()
        exec_strategy.use_experimental_executor = True
        train_exe = fluid.ParallelExecutor(use_cuda=bool(cfg.use_gpu),
                                           loss_name=loss.name,
                                           build_strategy=build_strategy,
                                           exec_strategy=exec_strategy)
    else:
        train_exe = exe

    shuffle = True
    if cfg.enable_ce:
        shuffle = False
    if cfg.use_pyreader:
        train_reader = reader.train(batch_size=cfg.TRAIN.im_per_batch,
                                    total_batch_size=total_batch_size,
                                    padding_total=cfg.TRAIN.padding_minibatch,
                                    shuffle=shuffle)
        py_reader = model.py_reader
        py_reader.decorate_paddle_reader(train_reader)
    else:
        train_reader = reader.train(batch_size=total_batch_size,
                                    shuffle=shuffle)
        feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())

    def save_model(postfix):
        model_path = os.path.join(cfg.model_save_dir, postfix)
        if os.path.isdir(model_path):
            shutil.rmtree(model_path)
        fluid.io.save_persistables(exe, model_path)

    def train_loop_pyreader():
        py_reader.start()
        train_stats = TrainingStats(cfg.log_window, keys)
        try:
            start_time = time.time()
            prev_start_time = start_time
            for iter_id in range(cfg.max_iter):
                prev_start_time = start_time
                start_time = time.time()
                outs = train_exe.run(fetch_list=[v.name for v in fetch_list])
                stats = {
                    k: np.array(v).mean()
                    for k, v in zip(keys, outs[:-1])
                }
                train_stats.update(stats)
                logs = train_stats.log()
                strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format(
                    now_time(), iter_id, np.mean(outs[-1]), logs,
                    start_time - prev_start_time)
                print(strs)
                sys.stdout.flush()
                if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0:
                    save_model("model_iter{}".format(iter_id))
            end_time = time.time()
            total_time = end_time - start_time
            last_loss = np.array(outs[0]).mean()
            if cfg.enable_ce:
                gpu_num = devices_num
                epoch_idx = iter_id + 1
                loss = last_loss
                print("kpis\teach_pass_duration_card%s\t%s" %
                      (gpu_num, total_time / epoch_idx))
                print("kpis\ttrain_loss_card%s\t%s" % (gpu_num, loss))
        except (StopIteration, fluid.core.EOFException):
            py_reader.reset()

    def train_loop():
        start_time = time.time()
        prev_start_time = start_time
        start = start_time
        train_stats = TrainingStats(cfg.log_window, keys)
        for iter_id, data in enumerate(train_reader()):
            prev_start_time = start_time
            start_time = time.time()
            outs = train_exe.run(fetch_list=[v.name for v in fetch_list],
                                 feed=feeder.feed(data))
            stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])}
            train_stats.update(stats)
            logs = train_stats.log()
            strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format(
                now_time(), iter_id, np.mean(outs[-1]), logs,
                start_time - prev_start_time)
            print(strs)
            sys.stdout.flush()
            if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0:
                save_model("model_iter{}".format(iter_id))
            if (iter_id + 1) == cfg.max_iter:
                break
        end_time = time.time()
        total_time = end_time - start_time
        last_loss = np.array(outs[0]).mean()
        # only for ce
        if cfg.enable_ce:
            gpu_num = devices_num
            epoch_idx = iter_id + 1
            loss = last_loss
            print("kpis\teach_pass_duration_card%s\t%s" %
                  (gpu_num, total_time / epoch_idx))
            print("kpis\ttrain_loss_card%s\t%s" % (gpu_num, loss))

        return np.mean(every_pass_loss)

    if cfg.use_pyreader:
        train_loop_pyreader()
    else:
        train_loop()
    save_model('model_final')
コード例 #21
0
ファイル: train.py プロジェクト: fty8788/PaddleDNN
def train(data_path=None,
          model_type=ModelType.create_classification(),
          batch_size=100,
          num_passes=50,
          class_num=None,
          num_workers=1,
          use_gpu=False):
    '''
    Train the DNN.
    '''
    paddle.init(use_gpu=use_gpu, trainer_count=num_workers)

    # network config
    input_layer = paddle.layer.data(name='input_layer', type=paddle.data_type.dense_vector(feature_dim))
    dnn = create_dnn(input_layer)
    prediction = None
    label = None
    cost = None
    if args.model_type.is_classification():
        prediction = paddle.layer.fc(input=dnn, size=class_num, act=paddle.activation.Softmax())
        label = paddle.layer.data(name='label', type=paddle.data_type.integer_value(class_num))
        cost = paddle.layer.classification_cost(input=prediction, label=label)
    elif args.model_type.is_regression():
        prediction = paddle.layer.fc(input=dnn, size=1, act=paddle.activation.Linear())
        label = paddle.layer.data(name='label', type=paddle.data_type.dense_vector(1))
        cost = paddle.layer.mse_cost(input=prediction, label=label)

    # create parameters
    parameters = paddle.parameters.create(cost)

    # create optimizer
    optimizer = paddle.optimizer.Momentum(momentum=0)

    trainer = paddle.trainer.SGD(
        cost=cost, 
        extra_layers=paddle.evaluator.auc(input=prediction, label=label),
        parameters=parameters, update_equation=optimizer)

    feeding = {'input_layer': 0, 'label': 1}

    # event_handler to print training and testing info
    def event_handler(event):
        if isinstance(event, paddle.event.EndIteration):
            if event.batch_id % 100 == 0:
                print "Pass %d, Batch %d, Cost %f, %s" % (
                    event.pass_id, event.batch_id, event.cost, event.metrics)

        if isinstance(event, paddle.event.EndPass):
            result = trainer.test(
                reader=paddle.batch(reader.test(data_path,
                                            feature_dim+1,
                                            args.model_type.is_classification()),
                            batch_size=batch_size),
                feeding=feeding)
            print "Test %d, Cost %f, %s" % (event.pass_id, result.cost, result.metrics)
            
            model_desc = "{type}".format(
                    type=str(args.model_type))
            with open("%sdnn_%s_pass_%05d.tar" %
                          (args.model_output_prefix, model_desc,
                           event.pass_id), "w") as f:
                parameters.to_tar(f)

    # training
    trainer.train(
        reader=paddle.batch(
            paddle.reader.shuffle(reader.train(data_path,
                                            feature_dim+1,
                                            args.model_type.is_classification()),
                    buf_size=batch_size*10),
            batch_size=batch_size),
        feeding=feeding,
        event_handler=event_handler,
        num_passes=num_passes)
コード例 #22
0
def train_async(args):
    # parameters from arguments

    logging.debug('enter train')
    model_name = args.model
    checkpoint = args.checkpoint
    pretrained_model = args.pretrained_model
    model_save_dir = args.model_save_dir
    if not os.path.exists(model_save_dir):
        os.mkdir(model_save_dir)
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    tmp_prog = fluid.Program()

    train_loader, train_cost, global_lr, train_feas, train_label = build_program(
        is_train=True,
        main_prog=train_prog,
        startup_prog=startup_prog,
        args=args)
    test_loader, test_feas = build_program(is_train=False,
                                           main_prog=tmp_prog,
                                           startup_prog=startup_prog,
                                           args=args)
    test_prog = tmp_prog.clone(for_test=True)

    train_fetch_list = [
        global_lr.name, train_cost.name, train_feas.name, train_label.name
    ]
    test_fetch_list = [test_feas.name]

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
    if num_trainers <= 1 and args.use_gpu:
        places = fluid.framework.cuda_places()
    else:
        places = place

    exe.run(startup_prog)

    if checkpoint is not None:
        fluid.load(program=train_prog, model_path=checkpoint, executor=exe)

    if pretrained_model:
        load_params(exe, train_prog, pretrained_model)

    if args.use_gpu:
        devicenum = get_gpu_num()
    else:
        devicenum = int(os.environ.get('CPU_NUM', 1))
    assert (args.train_batch_size % devicenum) == 0
    train_batch_size = args.train_batch_size / devicenum
    test_batch_size = args.test_batch_size

    train_loader.set_sample_generator(reader.train(args),
                                      batch_size=train_batch_size,
                                      drop_last=True,
                                      places=places)

    test_loader.set_sample_generator(reader.test(args),
                                     batch_size=test_batch_size,
                                     drop_last=False,
                                     places=place)

    train_exe = fluid.ParallelExecutor(main_program=train_prog,
                                       use_cuda=args.use_gpu,
                                       loss_name=train_cost.name)

    totalruntime = 0
    iter_no = 0
    train_info = [0, 0, 0]
    while iter_no <= args.total_iter_num:
        for train_batch in train_loader():
            t1 = time.time()
            lr, loss, feas, label = train_exe.run(feed=train_batch,
                                                  fetch_list=train_fetch_list)
            t2 = time.time()
            period = t2 - t1
            lr = np.mean(np.array(lr))
            train_info[0] += np.mean(np.array(loss))
            train_info[1] += recall_topk(feas, label, k=1)
            train_info[2] += 1
            if iter_no % args.display_iter_step == 0:
                avgruntime = totalruntime / args.display_iter_step
                avg_loss = train_info[0] / train_info[2]
                avg_recall = train_info[1] / train_info[2]
                print("[%s] trainbatch %d, lr %.6f, loss %.6f, "\
                    "recall %.4f, time %2.2f sec" % \
                    (fmt_time(), iter_no, lr, avg_loss, avg_recall, avgruntime))
                sys.stdout.flush()
                totalruntime = 0
            if iter_no % 1000 == 0:
                train_info = [0, 0, 0]

            totalruntime += period

            if iter_no % args.test_iter_step == 0 and iter_no != 0:
                f, l = [], []
                for batch_id, test_batch in enumerate(test_loader()):
                    t1 = time.time()
                    [feas] = exe.run(test_prog,
                                     feed=test_batch,
                                     fetch_list=test_fetch_list)

                    label = np.asarray(test_batch[0]['label'])
                    label = np.squeeze(label)
                    f.append(feas)
                    l.append(label)

                    t2 = time.time()
                    period = t2 - t1
                    if batch_id % 20 == 0:
                        print("[%s] testbatch %d, time %2.2f sec" % \
                            (fmt_time(), batch_id, period))

                f = np.vstack(f)
                l = np.hstack(l)
                recall = recall_topk(f, l, k=1)
                print("[%s] test_img_num %d, trainbatch %d, test_recall %.5f" % \
                    (fmt_time(), len(f), iter_no, recall))
                sys.stdout.flush()

            if iter_no % args.save_iter_step == 0 and iter_no != 0:
                model_path = os.path.join(model_save_dir, model_name,
                                          str(iter_no))
                fluid.save(program=train_prog, model_path=model_path)

            iter_no += 1
コード例 #23
0
def compress(args):
    image_shape = [int(m) for m in args.image_shape.split(",")]

    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    # model definition
    model = models.__dict__[args.model]()

    if args.model is "GoogleNet":
        out0, out1, out2 = model.net(input=image, class_dim=args.class_dim)
        cost0 = fluid.layers.cross_entropy(input=out0, label=label)
        cost1 = fluid.layers.cross_entropy(input=out1, label=label)
        cost2 = fluid.layers.cross_entropy(input=out2, label=label)
        avg_cost0 = fluid.layers.mean(x=cost0)
        avg_cost1 = fluid.layers.mean(x=cost1)
        avg_cost2 = fluid.layers.mean(x=cost2)
        avg_cost = avg_cost0 + 0.3 * avg_cost1 + 0.3 * avg_cost2
        acc_top1 = fluid.layers.accuracy(input=out0, label=label, k=1)
        acc_top5 = fluid.layers.accuracy(input=out0, label=label, k=5)
    else:
        out = model.net(input=image, class_dim=args.class_dim)
        cost = fluid.layers.cross_entropy(input=out, label=label)
        avg_cost = fluid.layers.mean(x=cost)
        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
        acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
    val_program = fluid.default_main_program().clone()
    if args.quant_only:
        boundaries = [
            args.total_images / args.batch_size * 10,
            args.total_images / args.batch_size * 16
        ]
        values = [1e-4, 1e-5, 1e-6]
    else:
        boundaries = [
            args.total_images / args.batch_size * 30,
            args.total_images / args.batch_size * 60,
            args.total_images / args.batch_size * 90
        ]
        values = [0.1, 0.01, 0.001, 0.0001]
    opt = fluid.optimizer.Momentum(
        momentum=0.9,
        learning_rate=fluid.layers.piecewise_decay(boundaries=boundaries,
                                                   values=values),
        regularization=fluid.regularizer.L2Decay(4e-5))

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if args.pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(args.pretrained_model,
                                               var.name))

        fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist)

    val_reader = paddle.batch(reader.val(), batch_size=args.batch_size)
    val_feed_list = [('image', image.name), ('label', label.name)]
    val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5', acc_top5.name)]

    train_reader = paddle.batch(reader.train(),
                                batch_size=args.batch_size,
                                drop_last=True)
    train_feed_list = [('image', image.name), ('label', label.name)]
    train_fetch_list = [('loss', avg_cost.name)]

    teacher_programs = []
    distiller_optimizer = None
    if args.teacher_model:
        teacher_model = models.__dict__[args.teacher_model]()
        # define teacher program
        teacher_program = fluid.Program()
        startup_program = fluid.Program()
        with fluid.program_guard(teacher_program, startup_program):
            img = teacher_program.global_block()._clone_variable(
                image, force_persistable=False)
            predict = teacher_model.net(img,
                                        class_dim=args.class_dim,
                                        conv1_name='res_conv1',
                                        fc_name='res_fc')
        exe.run(startup_program)
        assert args.teacher_pretrained_model and os.path.exists(
            args.teacher_pretrained_model
        ), "teacher_pretrained_model should be set when teacher_model is not None."

        def if_exist(var):
            return os.path.exists(
                os.path.join(args.teacher_pretrained_model, var.name))

        fluid.io.load_vars(exe,
                           args.teacher_pretrained_model,
                           main_program=teacher_program,
                           predicate=if_exist)

        distiller_optimizer = opt
        teacher_programs.append(teacher_program.clone(for_test=True))

    com_pass = Compressor(place,
                          fluid.global_scope(),
                          fluid.default_main_program(),
                          train_reader=train_reader,
                          train_feed_list=train_feed_list,
                          train_fetch_list=train_fetch_list,
                          eval_program=val_program,
                          eval_reader=val_reader,
                          eval_feed_list=val_feed_list,
                          eval_fetch_list=val_fetch_list,
                          teacher_programs=teacher_programs,
                          train_optimizer=opt,
                          distiller_optimizer=distiller_optimizer)
    com_pass.config(args.compress_config)
    com_pass.run()
コード例 #24
0
def train(args, config, train_params, train_file_list):
    batch_size = train_params["batch_size"]
    epoc_num = train_params["epoc_num"]
    optimizer_method = train_params["optimizer_method"]
    use_pyramidbox = train_params["use_pyramidbox"]

    use_gpu = args.use_gpu
    model_save_dir = args.model_save_dir
    pretrained_model = args.pretrained_model
    with_memory_optimization = args.with_mem_opt

    devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
    devices_num = len(devices.split(","))
    batch_size_per_device = batch_size // devices_num
    iters_per_epoc = train_params["train_images"] // batch_size
    num_workers = 8
    is_shuffle = True

    startup_prog = fluid.Program()
    train_prog = fluid.Program()

    #only for ce
    if args.enable_ce:
        SEED = 102
        startup_prog.random_seed = SEED
        train_prog.random_seed = SEED
        num_workers = 1
        pretrained_model = ""
        if args.batch_num != None:
            iters_per_epoc = args.batch_num

    train_py_reader, fetches, loss = build_program(train_params=train_params,
                                                   main_prog=train_prog,
                                                   startup_prog=startup_prog,
                                                   args=args)

    if with_memory_optimization:
        fluid.memory_optimize(train_prog)

    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    start_epoc = 0
    if pretrained_model:
        if pretrained_model.isdigit():
            start_epoc = int(pretrained_model) + 1
            pretrained_model = os.path.join(model_save_dir, pretrained_model)
            print("Resume from %s " % (pretrained_model))

        if not os.path.exists(pretrained_model):
            raise ValueError(
                "The pre-trained model path [%s] does not exist." %
                (pretrained_model))

        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(exe,
                           pretrained_model,
                           main_program=train_prog,
                           predicate=if_exist)
    train_reader = reader.train(config,
                                train_file_list,
                                batch_size_per_device,
                                shuffle=is_shuffle,
                                num_workers=num_workers)
    train_py_reader.decorate_paddle_reader(train_reader)

    if args.parallel:
        train_exe = fluid.ParallelExecutor(main_program=train_prog,
                                           use_cuda=use_gpu,
                                           loss_name=loss.name)

    def save_model(postfix, program):
        model_path = os.path.join(model_save_dir, postfix)
        if os.path.isdir(model_path):
            shutil.rmtree(model_path)

        print('save models to %s' % (model_path))
        fluid.io.save_persistables(exe, model_path, main_program=program)

    total_time = 0.0
    epoch_idx = 0
    face_loss = 0
    head_loss = 0
    for pass_id in range(start_epoc, epoc_num):
        epoch_idx += 1
        start_time = time.time()
        prev_start_time = start_time
        end_time = 0
        batch_id = 0
        train_py_reader.start()
        while True:
            try:
                prev_start_time = start_time
                start_time = time.time()
                if args.parallel:
                    fetch_vars = train_exe.run(
                        fetch_list=[v.name for v in fetches])
                else:
                    fetch_vars = exe.run(train_prog, fetch_list=fetches)
                end_time = time.time()
                fetch_vars = [np.mean(np.array(v)) for v in fetch_vars]
                face_loss = fetch_vars[0]
                head_loss = fetch_vars[1]
                if batch_id % 10 == 0:
                    if not args.use_pyramidbox:
                        print(
                            "Pass {:d}, batch {:d}, loss {:.6f}, time {:.5f}".
                            format(pass_id, batch_id, face_loss,
                                   start_time - prev_start_time))
                    else:
                        print("Pass {:d}, batch {:d}, face loss {:.6f}, " \
                              "head loss {:.6f}, " \
                              "time {:.5f}".format(pass_id,
                               batch_id, face_loss, head_loss,
                               start_time - prev_start_time))
                batch_id += 1
            except (fluid.core.EOFException, StopIteration):
                train_py_reader.reset()
                break
        epoch_end_time = time.time()
        total_time += epoch_end_time - start_time
        save_model(str(pass_id), train_prog)

    # only for ce
    if args.enable_ce:
        gpu_num = get_cards(args)
        print("kpis\teach_pass_duration_card%s\t%s" %
              (gpu_num, total_time / epoch_idx))
        print("kpis\ttrain_face_loss_card%s\t%s" % (gpu_num, face_loss))
        print("kpis\ttrain_head_loss_card%s\t%s" % (gpu_num, head_loss))
コード例 #25
0
def train_async(args):
    # parameters from arguments

    logging.debug('enter train')
    model_name = args.model
    checkpoint = args.checkpoint
    pretrained_model = args.pretrained_model
    model_save_dir = args.model_save_dir

    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    tmp_prog = fluid.Program()

    train_loader, train_cost, train_acc1, train_acc5, global_lr = build_program(
        main_prog=train_prog, startup_prog=startup_prog, args=args)

    train_fetch_list = [
        global_lr.name, train_cost.name, train_acc1.name, train_acc5.name
    ]

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
    if num_trainers <= 1 and args.use_gpu:
        places = fluid.framework.cuda_places()
    else:
        places = place
    exe.run(startup_prog)

    logging.debug('after run startup program')

    if checkpoint is not None:
        fluid.load(program=train_prog, model_path=checkpoint, executor=exe)

    if pretrained_model:
        load_pretrain(train_prog, pretrained_model)

    if args.use_gpu:
        devicenum = get_gpu_num()
    else:
        devicenum = 1 
    assert (args.train_batch_size % devicenum) == 0
    train_batch_size = args.train_batch_size // devicenum

    train_loader.set_sample_generator(
        reader.train(args),
        batch_size=train_batch_size,
        drop_last=True,
        places=places)

    train_exe = fluid.ParallelExecutor(
        main_program=train_prog,
        use_cuda=args.use_gpu,
        loss_name=train_cost.name)

    totalruntime = 0
    iter_no = 0
    train_info = [0, 0, 0, 0]
    while iter_no <= args.total_iter_num:
        for train_batch in train_loader():
            t1 = time.time()
            lr, loss, acc1, acc5 = train_exe.run(feed=train_batch,
                                                 fetch_list=train_fetch_list)
            t2 = time.time()
            period = t2 - t1
            lr = np.mean(np.array(lr))
            train_info[0] += np.mean(np.array(loss))
            train_info[1] += np.mean(np.array(acc1))
            train_info[2] += np.mean(np.array(acc5))
            train_info[3] += 1
            if iter_no % args.display_iter_step == 0:
                avgruntime = totalruntime / args.display_iter_step
                avg_loss = train_info[0] / train_info[3]
                avg_acc1 = train_info[1] / train_info[3]
                avg_acc5 = train_info[2] / train_info[3]
                print("[%s] trainbatch %d, lr %.6f, loss %.6f, "\
                    "acc1 %.4f, acc5 %.4f, time %2.2f sec" % \
                    (fmt_time(), iter_no, lr, avg_loss, avg_acc1, avg_acc5, avgruntime))
                sys.stdout.flush()
                totalruntime = 0
            if iter_no % args.display_iter_step == 0:
                train_info = [0, 0, 0, 0]

            totalruntime += period

            if iter_no % args.save_iter_step == 0 and iter_no != 0:
                model_path = os.path.join(model_save_dir + '/' + model_name,
                                          str(iter_no))
                if not os.path.isdir(model_path):
                    os.makedirs(model_path)
                fluid.save(program=train_prog, model_path=model_path)

            iter_no += 1
コード例 #26
0
def train(args):
    # parameters from arguments
    model_name = args.model
    pretrained_fp32_model = args.pretrained_fp32_model
    checkpoint = args.checkpoint
    model_save_dir = args.model_save_dir
    data_dir = args.data_dir
    activation_quant_type = args.act_quant_type
    weight_quant_type = args.wt_quant_type
    print("Using %s as the actiavtion quantize type." % activation_quant_type)
    print("Using %s as the weight quantize type." % weight_quant_type)

    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    test_prog = fluid.Program()

    _, _, train_py_reader, train_cost, train_acc1, train_acc5, global_lr = build_program(
        is_train=True,
        main_prog=train_prog,
        startup_prog=startup_prog,
        args=args)
    image, out, test_py_reader, test_cost, test_acc1, test_acc5 = build_program(
        is_train=False,
        main_prog=test_prog,
        startup_prog=startup_prog,
        args=args)
    test_prog = test_prog.clone(for_test=True)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)
    main_graph = IrGraph(core.Graph(train_prog.desc), for_test=False)
    test_graph = IrGraph(core.Graph(test_prog.desc), for_test=True)

    if pretrained_fp32_model:
        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_fp32_model, var.name))
        fluid.io.load_vars(
            exe, pretrained_fp32_model, main_program=train_prog, predicate=if_exist)

    if args.use_gpu:
        visible_device = os.getenv('CUDA_VISIBLE_DEVICES')
        if visible_device:
            device_num = len(visible_device.split(','))
        else:
            device_num = subprocess.check_output(
                ['nvidia-smi', '-L']).decode().count('\n')
    else:
        device_num = 1

    train_batch_size = args.batch_size / device_num
    test_batch_size = 1 if activation_quant_type == 'abs_max' else 8
    train_reader = paddle.batch(
        reader.train(data_dir=data_dir), batch_size=train_batch_size, drop_last=True)
    test_reader = paddle.batch(reader.val(data_dir=data_dir), batch_size=test_batch_size)

    train_py_reader.decorate_paddle_reader(train_reader)
    test_py_reader.decorate_paddle_reader(test_reader)

    train_fetch_list = [train_cost.name, train_acc1.name, train_acc5.name, global_lr.name]
    test_fetch_list = [test_cost.name, test_acc1.name, test_acc5.name]

    # 1. Make some quantization transforms in the graph before training and testing.
    # According to the weight and activation quantization type, the graph will be added
    # some fake quantize operators and fake dequantize operators.
    transform_pass = QuantizationTransformPass(
        scope=fluid.global_scope(), place=place,
        activation_quantize_type=activation_quant_type,
        weight_quantize_type=weight_quant_type)
    transform_pass.apply(main_graph)
    transform_pass.apply(test_graph)

    if checkpoint:
        load_persistable_nodes(exe, checkpoint, main_graph)

    build_strategy = fluid.BuildStrategy()
    build_strategy.memory_optimize = False
    build_strategy.enable_inplace = False
    build_strategy.fuse_all_reduce_ops = False
    binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
        loss_name=train_cost.name, build_strategy=build_strategy)
    test_prog = test_graph.to_program()
    params = models.__dict__[args.model]().params
    for pass_id in range(params["num_epochs"]):

        train_py_reader.start()

        train_info = [[], [], []]
        test_info = [[], [], []]
        train_time = []
        batch_id = 0
        try:
            while True:
                t1 = time.time()
                loss, acc1, acc5, lr = exe.run(binary, fetch_list=train_fetch_list)
                t2 = time.time()
                period = t2 - t1
                loss = np.mean(np.array(loss))
                acc1 = np.mean(np.array(acc1))
                acc5 = np.mean(np.array(acc5))
                train_info[0].append(loss)
                train_info[1].append(acc1)
                train_info[2].append(acc5)
                lr = np.mean(np.array(lr))
                train_time.append(period)
                if batch_id % 10 == 0:
                    print("Pass {0}, trainbatch {1}, loss {2}, \
                        acc1 {3}, acc5 {4}, lr {5}, time {6}"
                          .format(pass_id, batch_id, loss, acc1, acc5, "%.6f" %
                                  lr, "%2.2f sec" % period))
                    sys.stdout.flush()
                batch_id += 1
        except fluid.core.EOFException:
            train_py_reader.reset()

        train_loss = np.array(train_info[0]).mean()
        train_acc1 = np.array(train_info[1]).mean()
        train_acc5 = np.array(train_info[2]).mean()

        test_py_reader.start()

        test_batch_id = 0
        try:
            while True:
                t1 = time.time()
                loss, acc1, acc5 = exe.run(program=test_prog,
                                           fetch_list=test_fetch_list)
                t2 = time.time()
                period = t2 - t1
                loss = np.mean(loss)
                acc1 = np.mean(acc1)
                acc5 = np.mean(acc5)
                test_info[0].append(loss)
                test_info[1].append(acc1)
                test_info[2].append(acc5)
                if test_batch_id % 10 == 0:
                    print("Pass {0},testbatch {1},loss {2}, \
                        acc1 {3},acc5 {4},time {5}"
                          .format(pass_id, test_batch_id, loss, acc1, acc5,
                                  "%2.2f sec" % period))
                    sys.stdout.flush()
                test_batch_id += 1
        except fluid.core.EOFException:
            test_py_reader.reset()

        test_loss = np.array(test_info[0]).mean()
        test_acc1 = np.array(test_info[1]).mean()
        test_acc5 = np.array(test_info[2]).mean()

        print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, "
              "test_loss {4}, test_acc1 {5}, test_acc5 {6}".format(
                  pass_id, train_loss, train_acc1, train_acc5, test_loss,
                  test_acc1, test_acc5))
        sys.stdout.flush()

        save_checkpoint_path = os.path.join(model_save_dir,  model_name, str(pass_id))
        if not os.path.isdir(save_checkpoint_path):
            os.makedirs(save_checkpoint_path)
        save_persistable_nodes(exe, save_checkpoint_path, main_graph)

    model_path = os.path.join(model_save_dir, model_name, args.act_quant_type)
    float_path = os.path.join(model_path, 'float')
    int8_path = os.path.join(model_path, 'int8')
    mobile_path = os.path.join(model_path, 'mobile')
    if not os.path.isdir(model_path):
        os.makedirs(model_path)

    # 2. Freeze the graph after training by adjusting the quantize
    # operators' order for the inference.
    freeze_pass = QuantizationFreezePass(
        scope=fluid.global_scope(),
        place=place,
        weight_quantize_type=weight_quant_type)
    freeze_pass.apply(test_graph)
    server_program = test_graph.to_program()
    fluid.io.save_inference_model(
        dirname=float_path,
        feeded_var_names=[image.name],
        target_vars=[out], executor=exe,
        main_program=server_program)

    # 3. Convert the weights into int8_t type.
    # (This step is optional.)
    convert_int8_pass = ConvertToInt8Pass(scope=fluid.global_scope(), place=place)
    convert_int8_pass.apply(test_graph)
    server_int8_program = test_graph.to_program()
    fluid.io.save_inference_model(
        dirname=int8_path,
        feeded_var_names=[image.name],
        target_vars=[out], executor=exe,
        main_program=server_int8_program)

    # 4. Convert the freezed graph for paddle-mobile execution.
    # (This step is optional.)
    mobile_pass = TransformForMobilePass()
    mobile_pass.apply(test_graph)
    mobile_program = test_graph.to_program()
    fluid.io.save_inference_model(
        dirname=mobile_path,
        feeded_var_names=[image.name],
        target_vars=[out], executor=exe,
        main_program=mobile_program)
コード例 #27
0
def train():
    learning_rate = cfg.learning_rate
    image_shape = [3, cfg.TRAIN.max_size, cfg.TRAIN.max_size]

    devices_num = get_device_num()
    total_batch_size = devices_num * cfg.TRAIN.im_per_batch

    use_random = True
    model = model_builder.RCNN(
        add_conv_body_func=resnet.
        add_ResNet50_conv4_body,  # res4: [-1, 1024, 84, 84]
        add_roi_box_head_func=resnet.
        add_ResNet_roi_conv5_head,  # res5: [-1, 2048, 7, 7]
        use_pyreader=cfg.use_pyreader,
        use_random=use_random)
    model.build_model(image_shape)
    losses, keys = model.loss()
    loss = losses[0]
    fetch_list = losses

    boundaries = cfg.lr_steps
    gamma = cfg.lr_gamma
    step_num = len(cfg.lr_steps)
    values = [learning_rate * (gamma**i) for i in range(step_num + 1)]
    lr = exponential_with_warmup_decay(learning_rate=learning_rate,
                                       boundaries=boundaries,
                                       values=values,
                                       warmup_iter=cfg.warm_up_iter,
                                       warmup_factor=cfg.warm_up_factor)

    optimizer = fluid.optimizer.Momentum(
        learning_rate=lr,
        regularization=fluid.regularizer.L2Decay(cfg.weight_decay),
        momentum=cfg.momentum)
    optimizer.minimize(loss)

    fetch_list = fetch_list + [lr]
    for var in fetch_list:
        var.persistable = True

    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if cfg.pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(cfg.pretrained_model, var.name))

        fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist)

    if cfg.parallel:
        build_strategy = fluid.BuildStrategy()
        build_strategy.memory_optimize = False
        build_strategy.enable_inplace = True
        exec_strategy = fluid.ExecutionStrategy()
        exec_strategy.num_iteration_per_drop_scope = 10

        if num_trainers > 1 and cfg.use_gpu:
            dist_utils.prepare_for_multi_process(exe, build_strategy,
                                                 fluid.default_main_program())
            # the process is fast when num_threads is 1 for multi-process training
            exec_strategy.num_threads = 1

        train_exe = fluid.ParallelExecutor(use_cuda=bool(cfg.use_gpu),
                                           loss_name=loss.name,
                                           build_strategy=build_strategy,
                                           exec_strategy=exec_strategy)
    else:
        train_exe = exe

    shuffle = True
    # NOTE: do not shuffle dataset when using multi-process training
    shuffle_seed = None
    if num_trainers > 1:
        shuffle_seed = 1

    if cfg.use_pyreader:
        train_reader = reader.train(batch_size=cfg.TRAIN.im_per_batch,
                                    total_batch_size=total_batch_size,
                                    padding_total=cfg.TRAIN.padding_minibatch,
                                    shuffle=shuffle,
                                    shuffle_seed=shuffle_seed)
        if num_trainers > 1:
            assert shuffle_seed is not None, "If num_trainers > 1, the shuffle_seed must be set, because the order of batch data generated by reader must be the same in the respective processes"
            train_reader = fluid.contrib.reader.distributed_batch_reader(
                train_reader)
        py_reader = model.py_reader
        py_reader.decorate_paddle_reader(train_reader)
    else:
        if num_trainers > 1:
            shuffle = False
        train_reader = reader.train(batch_size=total_batch_size,
                                    shuffle=shuffle)
        feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())

    def save_model(postfix):
        model_path = os.path.join(cfg.model_save_dir, postfix)
        if os.path.isdir(model_path):
            shutil.rmtree(model_path)
        fluid.io.save_persistables(exe, model_path)

    def train_loop_pyreader():
        py_reader.start()
        train_stats = TrainingStats(cfg.log_window, keys)
        try:
            start_time = time.time()
            for iter_id in range(cfg.max_iter):
                prev_start_time = start_time
                start_time = time.time()
                outs = train_exe.run(fetch_list=[v.name for v in fetch_list])
                stats = {
                    k: np.array(v).mean()
                    for k, v in zip(keys, outs[:-1])
                }
                train_stats.update(stats)
                logs = train_stats.log()
                strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format(
                    now_time(), iter_id, np.mean(outs[-1]), logs,
                    start_time - prev_start_time)
                print(strs)
                sys.stdout.flush()
                if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0:
                    save_model("model_iter{}".format(iter_id))
            end_time = time.time()
            total_time = end_time - first_start_time
            last_loss = np.array(outs[0]).mean()
        except (StopIteration, fluid.core.EOFException):
            py_reader.reset()

    def train_loop():
        train_stats = TrainingStats(cfg.log_window, keys)
        start_time = time.time()
        for iter_id, data in enumerate(train_reader()):
            prev_start_time = start_time
            start_time = time.time()
            outs = train_exe.run(fetch_list=[v.name for v in fetch_list],
                                 feed=feeder.feed(data))
            stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])}
            train_stats.update(stats)
            logs = train_stats.log()
            stats = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format(
                now_time(), iter_id, np.mean(outs[-1]), logs,
                start_time - prev_start_time)
            print(stats)
            sys.stdout.flush()
            if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0:
                save_model("model_iter{}".format(iter_id))
            if (iter_id + 1) == cfg.max_iter:
                break
        end_time = time.time()
        total_time = end_time - start_time
        last_loss = np.array(outs[0]).mean()

    if cfg.use_pyreader:
        train_loop_pyreader()
    else:
        train_loop()
    save_model('model_final')
コード例 #28
0
ファイル: train.py プロジェクト: ZC119/count
def train(args,
          train_file_list,
          val_file_list,
          data_args,
          learning_rate,
          batch_size,
          num_passes,
          model_save_dir,
          pretrained_model=None):
    image_shape = [3, data_args.resize_h, data_args.resize_w]

    num_classes = 2

    devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
    devices_num = len(devices.split(","))

    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    gt_box = fluid.layers.data(name='gt_box',
                               shape=[4],
                               dtype='float32',
                               lod_level=1)
    gt_label = fluid.layers.data(name='gt_label',
                                 shape=[1],
                                 dtype='int32',
                                 lod_level=1)
    difficult = fluid.layers.data(name='gt_difficult',
                                  shape=[1],
                                  dtype='int32',
                                  lod_level=1)

    locs, confs, box, box_var = mobile_net(num_classes, image, image_shape)
    nmsed_out = fluid.layers.detection_output(locs,
                                              confs,
                                              box,
                                              box_var,
                                              nms_threshold=args.nms_threshold)
    loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box, box_var)
    loss = fluid.layers.reduce_sum(loss)

    test_program = fluid.default_main_program().clone(for_test=True)
    with fluid.program_guard(test_program):
        map_eval = fluid.evaluator.DetectionMAP(nmsed_out,
                                                gt_label,
                                                gt_box,
                                                difficult,
                                                num_classes,
                                                overlap_threshold=0.5,
                                                evaluate_difficult=False,
                                                ap_version=args.ap_version)

    epocs = 4800 / batch_size
    boundaries = [epocs * 40, epocs * 60, epocs * 80, epocs * 100]
    values = [
        learning_rate, learning_rate * 0.5, learning_rate * 0.25,
        learning_rate * 0.1, learning_rate * 0.01
    ]

    optimizer = fluid.optimizer.RMSProp(
        learning_rate=fluid.layers.piecewise_decay(boundaries, values),
        regularization=fluid.regularizer.L2Decay(0.00005),
    )

    optimizer.minimize(loss)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)

    if args.parallel:
        train_exe = fluid.ParallelExecutor(use_cuda=args.use_gpu,
                                           loss_name=loss.name)

    train_reader = paddle.batch(reader.train(data_args, train_file_list),
                                batch_size=batch_size)
    test_reader = paddle.batch(reader.test(data_args, val_file_list),
                               batch_size=batch_size)
    feeder = fluid.DataFeeder(place=place,
                              feed_list=[image, gt_box, gt_label, difficult])

    def save_model(postfix):
        model_path = os.path.join(model_save_dir, postfix)
        if os.path.isdir(model_path):
            shutil.rmtree(model_path)
        print('save models to %s' % (model_path))
        fluid.io.save_persistables(exe, model_path)

    best_map = 0.

    def test(pass_id, best_map):
        _, accum_map = map_eval.get_map_var()
        map_eval.reset(exe)
        for batch_id, data in enumerate(test_reader()):
            test_map, = exe.run(test_program,
                                feed=feeder.feed(data),
                                fetch_list=[accum_map])
            if batch_id % 20 == 0:
                print("Batch {0}, map {1}".format(batch_id, test_map))
        if test_map[0] > best_map:
            best_map = test_map[0]
            save_model('best_model')
        print("Pass {0}, test map {1}".format(pass_id, test_map))
        return best_map

    train_num = 0
    total_train_time = 0.0
    for pass_id in range(num_passes):
        start_time = time.time()
        prev_start_time = start_time
        # end_time = 0
        every_pass_loss = []
        iter = 0
        pass_duration = 0.0
        for batch_id, data in enumerate(train_reader()):
            prev_start_time = start_time
            start_time = time.time()
            if args.for_model_ce and iter == args.iterations:
                break
            if len(data) < (devices_num * 2):
                print("There are too few data to train on all devices.")
                continue
            if args.parallel:
                loss_v, = train_exe.run(fetch_list=[loss.name],
                                        feed=feeder.feed(data))
            else:
                loss_v, = exe.run(fluid.default_main_program(),
                                  feed=feeder.feed(data),
                                  fetch_list=[loss])
            # end_time = time.time()
            loss_v = np.mean(np.array(loss_v))
            if batch_id % 20 == 0:
                print("Pass {0}, batch {1}, loss {2}, time {3}".format(
                    pass_id, batch_id, loss_v, start_time - prev_start_time))

            if args.for_model_ce and iter >= args.skip_batch_num or pass_id != 0:
                batch_duration = time.time() - start_time
                pass_duration += batch_duration
                train_num += len(data)
                every_pass_loss.append(loss_v)
                iter += 1
        total_train_time += pass_duration

        if args.for_model_ce and pass_id == num_passes - 1:
            examples_per_sec = train_num / total_train_time
            cost = np.mean(every_pass_loss)
            with open("train_speed_factor.txt", 'w') as f:
                f.write('{:f}\n'.format(examples_per_sec))
            with open("train_cost_factor.txt", 'a+') as f:
                f.write('{:f}\n'.format(cost))

        best_map = test(pass_id, best_map)
        if pass_id % 10 == 0 or pass_id == num_passes - 1:
            save_model(str(pass_id))
    print("Best test map {0}".format(best_map))
コード例 #29
0
def train():

    if cfg.debug or args.enable_ce:
        fluid.default_startup_program().random_seed = 1000
        fluid.default_main_program().random_seed = 1000
        random.seed(0)
        np.random.seed(0)

    if not os.path.exists(cfg.model_save_dir):
        os.makedirs(cfg.model_save_dir)

    model = YOLOv3()
    model.build_model()
    input_size = cfg.input_size
    loss = model.loss()
    loss.persistable = True

    devices_num = get_device_num()
    print("Found {} CUDA devices.".format(devices_num))

    learning_rate = cfg.learning_rate
    boundaries = cfg.lr_steps
    gamma = cfg.lr_gamma
    step_num = len(cfg.lr_steps)
    values = [learning_rate * (gamma**i) for i in range(step_num + 1)]

    optimizer = fluid.optimizer.Momentum(
        learning_rate=exponential_with_warmup_decay(
            learning_rate=learning_rate,
            boundaries=boundaries,
            values=values,
            warmup_iter=cfg.warm_up_iter,
            warmup_factor=cfg.warm_up_factor),
        regularization=fluid.regularizer.L2Decay(cfg.weight_decay),
        momentum=cfg.momentum)
    optimizer.minimize(loss)

    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if cfg.pretrain:
        if not os.path.exists(cfg.pretrain):
            print("Pretrain weights not found: {}".format(cfg.pretrain))

        def if_exist(var):
            return os.path.exists(os.path.join(cfg.pretrain, var.name)) \
                and var.name.find('yolo_output') < 0
        fluid.io.load_vars(exe, cfg.pretrain, predicate=if_exist)

    build_strategy = fluid.BuildStrategy()
    build_strategy.memory_optimize = False  #gc and memory optimize may conflict
    syncbn = cfg.syncbn
    if (syncbn and devices_num <= 1) or num_trainers > 1:
        print("Disable syncbn in single device")
        syncbn = False
    build_strategy.sync_batch_norm = syncbn

    exec_strategy = fluid.ExecutionStrategy()
    if cfg.use_gpu and num_trainers > 1:
        dist_utils.prepare_for_multi_process(exe, build_strategy,
                                             fluid.default_main_program())
        exec_strategy.num_threads = 1

    compile_program = fluid.compiler.CompiledProgram(fluid.default_main_program(
    )).with_data_parallel(
        loss_name=loss.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)

    random_sizes = [cfg.input_size]
    if cfg.random_shape:
        random_sizes = [32 * i for i in range(10, 20)]

    total_iter = cfg.max_iter - cfg.start_iter
    mixup_iter = total_iter - cfg.no_mixup_iter

    shuffle = True
    if args.enable_ce:
        shuffle = False
    shuffle_seed = None
    # NOTE: yolov3 is a special model, if num_trainers > 1, each process
    # trian the completed dataset.
    # if num_trainers > 1: shuffle_seed  = 1
    train_reader = reader.train(
        input_size,
        batch_size=cfg.batch_size,
        shuffle=shuffle,
        shuffle_seed=shuffle_seed,
        total_iter=total_iter * devices_num,
        mixup_iter=mixup_iter * devices_num,
        random_sizes=random_sizes,
        use_multiprocess_reader=cfg.use_multiprocess_reader)
    py_reader = model.py_reader
    py_reader.decorate_paddle_reader(train_reader)

    def save_model(postfix):
        model_path = os.path.join(cfg.model_save_dir, postfix)
        if os.path.isdir(model_path):
            shutil.rmtree(model_path)
        fluid.io.save_persistables(exe, model_path)

    fetch_list = [loss]

    py_reader.start()
    smoothed_loss = SmoothedValue()
    try:
        start_time = time.time()
        prev_start_time = start_time
        snapshot_loss = 0
        snapshot_time = 0
        for iter_id in range(cfg.start_iter, cfg.max_iter):
            prev_start_time = start_time
            start_time = time.time()
            losses = exe.run(compile_program,
                             fetch_list=[v.name for v in fetch_list])
            smoothed_loss.add_value(np.mean(np.array(losses[0])))
            snapshot_loss += np.mean(np.array(losses[0]))
            snapshot_time += start_time - prev_start_time
            lr = np.array(fluid.global_scope().find_var('learning_rate')
                          .get_tensor())
            print("Iter {:d}, lr {:.6f}, loss {:.6f}, time {:.5f}".format(
                iter_id, lr[0],
                smoothed_loss.get_mean_value(), start_time - prev_start_time))
            sys.stdout.flush()
            if (iter_id + 1) % cfg.snapshot_iter == 0:
                save_model("model_iter{}".format(iter_id))
                print("Snapshot {} saved, average loss: {}, \
                      average time: {}".format(
                    iter_id + 1, snapshot_loss / float(cfg.snapshot_iter),
                    snapshot_time / float(cfg.snapshot_iter)))
                if args.enable_ce and iter_id == cfg.max_iter - 1:
                    if devices_num == 1:
                        print("kpis\ttrain_cost_1card\t%f" %
                              (snapshot_loss / float(cfg.snapshot_iter)))
                        print("kpis\ttrain_duration_1card\t%f" %
                              (snapshot_time / float(cfg.snapshot_iter)))
                    else:
                        print("kpis\ttrain_cost_8card\t%f" %
                              (snapshot_loss / float(cfg.snapshot_iter)))
                        print("kpis\ttrain_duration_8card\t%f" %
                              (snapshot_time / float(cfg.snapshot_iter)))

                snapshot_loss = 0
                snapshot_time = 0
    except fluid.core.EOFException:
        py_reader.reset()

    save_model('model_final')
コード例 #30
0
def parallel_do(args,
                train_file_list,
                val_file_list,
                data_args,
                learning_rate,
                batch_size,
                num_passes,
                model_save_dir,
                pretrained_model=None):
    image_shape = [3, data_args.resize_h, data_args.resize_w]
    if data_args.dataset == 'coco':
        num_classes = 81
    elif data_args.dataset == 'pascalvoc':
        num_classes = 21

    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    gt_box = fluid.layers.data(name='gt_box',
                               shape=[4],
                               dtype='float32',
                               lod_level=1)
    gt_label = fluid.layers.data(name='gt_label',
                                 shape=[1],
                                 dtype='int32',
                                 lod_level=1)
    difficult = fluid.layers.data(name='gt_difficult',
                                  shape=[1],
                                  dtype='int32',
                                  lod_level=1)

    if args.parallel:
        places = fluid.layers.get_places()
        pd = fluid.layers.ParallelDo(places, use_nccl=args.use_nccl)
        with pd.do():
            image_ = pd.read_input(image)
            gt_box_ = pd.read_input(gt_box)
            gt_label_ = pd.read_input(gt_label)
            difficult_ = pd.read_input(difficult)
            locs, confs, box, box_var = mobile_net(num_classes, image_,
                                                   image_shape)
            loss = fluid.layers.ssd_loss(locs, confs, gt_box_, gt_label_, box,
                                         box_var)
            nmsed_out = fluid.layers.detection_output(locs,
                                                      confs,
                                                      box,
                                                      box_var,
                                                      nms_threshold=0.45)
            loss = fluid.layers.reduce_sum(loss)
            pd.write_output(loss)
            pd.write_output(nmsed_out)

        loss, nmsed_out = pd()
        loss = fluid.layers.mean(loss)
    else:
        locs, confs, box, box_var = mobile_net(num_classes, image, image_shape)
        nmsed_out = fluid.layers.detection_output(locs,
                                                  confs,
                                                  box,
                                                  box_var,
                                                  nms_threshold=0.45)
        loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box,
                                     box_var)
        loss = fluid.layers.reduce_sum(loss)

    test_program = fluid.default_main_program().clone(for_test=True)
    with fluid.program_guard(test_program):
        map_eval = fluid.evaluator.DetectionMAP(nmsed_out,
                                                gt_label,
                                                gt_box,
                                                difficult,
                                                num_classes,
                                                overlap_threshold=0.5,
                                                evaluate_difficult=False,
                                                ap_version=args.ap_version)

    if data_args.dataset == 'coco':
        # learning rate decay in 12, 19 pass, respectively
        if '2014' in train_file_list:
            boundaries = [82783 / batch_size * 12, 82783 / batch_size * 19]
        elif '2017' in train_file_list:
            boundaries = [118287 / batch_size * 12, 118287 / batch_size * 19]
    elif data_args.dataset == 'pascalvoc':
        boundaries = [40000, 60000]
    values = [learning_rate, learning_rate * 0.5, learning_rate * 0.25]
    optimizer = fluid.optimizer.RMSProp(
        learning_rate=fluid.layers.piecewise_decay(boundaries, values),
        regularization=fluid.regularizer.L2Decay(0.00005),
    )

    optimizer.minimize(loss)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)

    train_reader = paddle.batch(reader.train(data_args, train_file_list),
                                batch_size=batch_size)
    test_reader = paddle.batch(reader.test(data_args, val_file_list),
                               batch_size=batch_size)
    feeder = fluid.DataFeeder(place=place,
                              feed_list=[image, gt_box, gt_label, difficult])

    def test(pass_id):
        _, accum_map = map_eval.get_map_var()
        map_eval.reset(exe)
        test_map = None
        for data in test_reader():
            test_map = exe.run(test_program,
                               feed=feeder.feed(data),
                               fetch_list=[accum_map])
        print("Test {0}, map {1}".format(pass_id, test_map[0]))

    for pass_id in range(num_passes):
        start_time = time.time()
        prev_start_time = start_time
        end_time = 0
        for batch_id, data in enumerate(train_reader()):
            prev_start_time = start_time
            start_time = time.time()
            loss_v = exe.run(fluid.default_main_program(),
                             feed=feeder.feed(data),
                             fetch_list=[loss])
            end_time = time.time()
            if batch_id % 20 == 0:
                print("Pass {0}, batch {1}, loss {2}, time {3}".format(
                    pass_id, batch_id, loss_v[0],
                    start_time - prev_start_time))
        test(pass_id)

        if pass_id % 10 == 0 or pass_id == num_passes - 1:
            model_path = os.path.join(model_save_dir, str(pass_id))
            print 'save models to %s' % (model_path)
            fluid.io.save_persistables(exe, model_path)