예제 #1
0
def optimizer_setting(lr):
	batch_size = 1024
	iters = 150000 // batch_size
	boundaries = [i * iters  for i in [60,100,150]]
	values = [ i * lr for i in [1,0.5,0.1,0.05]]
    
	optimizer = fluid.optimizer.Adam(
	    #momentum=0.9,
		learning_rate=exponential_with_warmup_decay(
			learning_rate=lr,
			boundaries=boundaries,
			values=values,
			warmup_iter=200,
			warmup_factor=0.),
		regularization=fluid.regularizer.L2Decay(0.00001), )

	return optimizer
def train():

    if cfg.debug or args.enable_ce:
        fluid.default_startup_program().random_seed = 1000
        fluid.default_main_program().random_seed = 1000
        random.seed(0)
        np.random.seed(0)

    if not os.path.exists(cfg.model_save_dir):
        os.makedirs(cfg.model_save_dir)

    model = YOLOv3()
    model.build_model()
    input_size = cfg.input_size
    loss = model.loss()
    loss.persistable = True

    devices_num = get_device_num()
    print("Found {} CUDA devices.".format(devices_num))

    learning_rate = cfg.learning_rate
    boundaries = cfg.lr_steps
    gamma = cfg.lr_gamma
    step_num = len(cfg.lr_steps)
    values = [learning_rate * (gamma**i) for i in range(step_num + 1)]

    optimizer = fluid.optimizer.Momentum(
        learning_rate=exponential_with_warmup_decay(
            learning_rate=learning_rate,
            boundaries=boundaries,
            values=values,
            warmup_iter=cfg.warm_up_iter,
            warmup_factor=cfg.warm_up_factor),
        regularization=fluid.regularizer.L2Decay(cfg.weight_decay),
        momentum=cfg.momentum)
    optimizer.minimize(loss)

    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if cfg.pretrain:
        if not os.path.exists(cfg.pretrain):
            print("Pretrain weights not found: {}".format(cfg.pretrain))

        def if_exist(var):
            return os.path.exists(os.path.join(cfg.pretrain, var.name)) \
                and var.name.find('yolo_output') < 0
        fluid.io.load_vars(exe, cfg.pretrain, predicate=if_exist)

    build_strategy = fluid.BuildStrategy()
    build_strategy.memory_optimize = False  #gc and memory optimize may conflict
    syncbn = cfg.syncbn
    if (syncbn and devices_num <= 1) or num_trainers > 1:
        print("Disable syncbn in single device")
        syncbn = False
    build_strategy.sync_batch_norm = syncbn

    exec_strategy = fluid.ExecutionStrategy()
    if cfg.use_gpu and num_trainers > 1:
        dist_utils.prepare_for_multi_process(exe, build_strategy,
                                             fluid.default_main_program())
        exec_strategy.num_threads = 1

    compile_program = fluid.compiler.CompiledProgram(fluid.default_main_program(
    )).with_data_parallel(
        loss_name=loss.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)

    random_sizes = [cfg.input_size]
    if cfg.random_shape:
        random_sizes = [32 * i for i in range(10, 20)]

    total_iter = cfg.max_iter - cfg.start_iter
    mixup_iter = total_iter - cfg.no_mixup_iter

    shuffle = True
    if args.enable_ce:
        shuffle = False
    shuffle_seed = None
    # NOTE: yolov3 is a special model, if num_trainers > 1, each process
    # trian the completed dataset.
    # if num_trainers > 1: shuffle_seed  = 1
    train_reader = reader.train(
        input_size,
        batch_size=cfg.batch_size,
        shuffle=shuffle,
        shuffle_seed=shuffle_seed,
        total_iter=total_iter * devices_num,
        mixup_iter=mixup_iter * devices_num,
        random_sizes=random_sizes,
        use_multiprocess_reader=cfg.use_multiprocess_reader)
    py_reader = model.py_reader
    py_reader.decorate_paddle_reader(train_reader)

    def save_model(postfix):
        model_path = os.path.join(cfg.model_save_dir, postfix)
        if os.path.isdir(model_path):
            shutil.rmtree(model_path)
        fluid.io.save_persistables(exe, model_path)

    fetch_list = [loss]

    py_reader.start()
    smoothed_loss = SmoothedValue()
    try:
        start_time = time.time()
        prev_start_time = start_time
        snapshot_loss = 0
        snapshot_time = 0
        for iter_id in range(cfg.start_iter, cfg.max_iter):
            prev_start_time = start_time
            start_time = time.time()
            losses = exe.run(compile_program,
                             fetch_list=[v.name for v in fetch_list])
            smoothed_loss.add_value(np.mean(np.array(losses[0])))
            snapshot_loss += np.mean(np.array(losses[0]))
            snapshot_time += start_time - prev_start_time
            lr = np.array(fluid.global_scope().find_var('learning_rate')
                          .get_tensor())
            print("Iter {:d}, lr {:.6f}, loss {:.6f}, time {:.5f}".format(
                iter_id, lr[0],
                smoothed_loss.get_mean_value(), start_time - prev_start_time))
            sys.stdout.flush()
            if (iter_id + 1) % cfg.snapshot_iter == 0:
                save_model("model_iter{}".format(iter_id))
                print("Snapshot {} saved, average loss: {}, \
                      average time: {}".format(
                    iter_id + 1, snapshot_loss / float(cfg.snapshot_iter),
                    snapshot_time / float(cfg.snapshot_iter)))
                if args.enable_ce and iter_id == cfg.max_iter - 1:
                    if devices_num == 1:
                        print("kpis\ttrain_cost_1card\t%f" %
                              (snapshot_loss / float(cfg.snapshot_iter)))
                        print("kpis\ttrain_duration_1card\t%f" %
                              (snapshot_time / float(cfg.snapshot_iter)))
                    else:
                        print("kpis\ttrain_cost_8card\t%f" %
                              (snapshot_loss / float(cfg.snapshot_iter)))
                        print("kpis\ttrain_duration_8card\t%f" %
                              (snapshot_time / float(cfg.snapshot_iter)))

                snapshot_loss = 0
                snapshot_time = 0
    except fluid.core.EOFException:
        py_reader.reset()

    save_model('model_final')
예제 #3
0
def train():
    learning_rate = cfg.learning_rate
    image_shape = [3, cfg.TRAIN.max_size, cfg.TRAIN.max_size]

    devices_num = get_device_num()
    total_batch_size = devices_num * cfg.TRAIN.im_per_batch

    use_random = True
    model = model_builder.RCNN(
        add_conv_body_func=resnet.
        add_ResNet50_conv4_body,  # res4: [-1, 1024, 84, 84]
        add_roi_box_head_func=resnet.
        add_ResNet_roi_conv5_head,  # res5: [-1, 2048, 7, 7]
        use_pyreader=cfg.use_pyreader,
        use_random=use_random)
    model.build_model(image_shape)
    losses, keys = model.loss()
    loss = losses[0]
    fetch_list = losses

    boundaries = cfg.lr_steps
    gamma = cfg.lr_gamma
    step_num = len(cfg.lr_steps)
    values = [learning_rate * (gamma**i) for i in range(step_num + 1)]
    lr = exponential_with_warmup_decay(learning_rate=learning_rate,
                                       boundaries=boundaries,
                                       values=values,
                                       warmup_iter=cfg.warm_up_iter,
                                       warmup_factor=cfg.warm_up_factor)

    optimizer = fluid.optimizer.Momentum(
        learning_rate=lr,
        regularization=fluid.regularizer.L2Decay(cfg.weight_decay),
        momentum=cfg.momentum)
    optimizer.minimize(loss)

    fetch_list = fetch_list + [lr]
    for var in fetch_list:
        var.persistable = True

    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if cfg.pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(cfg.pretrained_model, var.name))

        fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist)

    if cfg.parallel:
        build_strategy = fluid.BuildStrategy()
        build_strategy.memory_optimize = False
        build_strategy.enable_inplace = True
        exec_strategy = fluid.ExecutionStrategy()
        exec_strategy.num_iteration_per_drop_scope = 10

        if num_trainers > 1 and cfg.use_gpu:
            dist_utils.prepare_for_multi_process(exe, build_strategy,
                                                 fluid.default_main_program())
            # the process is fast when num_threads is 1 for multi-process training
            exec_strategy.num_threads = 1

        train_exe = fluid.ParallelExecutor(use_cuda=bool(cfg.use_gpu),
                                           loss_name=loss.name,
                                           build_strategy=build_strategy,
                                           exec_strategy=exec_strategy)
    else:
        train_exe = exe

    shuffle = True
    # NOTE: do not shuffle dataset when using multi-process training
    shuffle_seed = None
    if num_trainers > 1:
        shuffle_seed = 1

    if cfg.use_pyreader:
        train_reader = reader.train(batch_size=cfg.TRAIN.im_per_batch,
                                    total_batch_size=total_batch_size,
                                    padding_total=cfg.TRAIN.padding_minibatch,
                                    shuffle=shuffle,
                                    shuffle_seed=shuffle_seed)
        if num_trainers > 1:
            assert shuffle_seed is not None, "If num_trainers > 1, the shuffle_seed must be set, because the order of batch data generated by reader must be the same in the respective processes"
            train_reader = fluid.contrib.reader.distributed_batch_reader(
                train_reader)
        py_reader = model.py_reader
        py_reader.decorate_paddle_reader(train_reader)
    else:
        if num_trainers > 1:
            shuffle = False
        train_reader = reader.train(batch_size=total_batch_size,
                                    shuffle=shuffle)
        feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())

    def save_model(postfix):
        model_path = os.path.join(cfg.model_save_dir, postfix)
        if os.path.isdir(model_path):
            shutil.rmtree(model_path)
        fluid.io.save_persistables(exe, model_path)

    def train_loop_pyreader():
        py_reader.start()
        train_stats = TrainingStats(cfg.log_window, keys)
        try:
            start_time = time.time()
            for iter_id in range(cfg.max_iter):
                prev_start_time = start_time
                start_time = time.time()
                outs = train_exe.run(fetch_list=[v.name for v in fetch_list])
                stats = {
                    k: np.array(v).mean()
                    for k, v in zip(keys, outs[:-1])
                }
                train_stats.update(stats)
                logs = train_stats.log()
                strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format(
                    now_time(), iter_id, np.mean(outs[-1]), logs,
                    start_time - prev_start_time)
                print(strs)
                sys.stdout.flush()
                if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0:
                    save_model("model_iter{}".format(iter_id))
            end_time = time.time()
            total_time = end_time - first_start_time
            last_loss = np.array(outs[0]).mean()
        except (StopIteration, fluid.core.EOFException):
            py_reader.reset()

    def train_loop():
        train_stats = TrainingStats(cfg.log_window, keys)
        start_time = time.time()
        for iter_id, data in enumerate(train_reader()):
            prev_start_time = start_time
            start_time = time.time()
            outs = train_exe.run(fetch_list=[v.name for v in fetch_list],
                                 feed=feeder.feed(data))
            stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])}
            train_stats.update(stats)
            logs = train_stats.log()
            stats = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format(
                now_time(), iter_id, np.mean(outs[-1]), logs,
                start_time - prev_start_time)
            print(stats)
            sys.stdout.flush()
            if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0:
                save_model("model_iter{}".format(iter_id))
            if (iter_id + 1) == cfg.max_iter:
                break
        end_time = time.time()
        total_time = end_time - start_time
        last_loss = np.array(outs[0]).mean()

    if cfg.use_pyreader:
        train_loop_pyreader()
    else:
        train_loop()
    save_model('model_final')
예제 #4
0
def train():
    learning_rate = cfg.learning_rate
    image_shape = [3, cfg.TRAIN.max_size, cfg.TRAIN.max_size]

    if cfg.enable_ce:
        fluid.default_startup_program().random_seed = 1000
        fluid.default_main_program().random_seed = 1000
        import random
        random.seed(0)
        np.random.seed(0)

    devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
    devices_num = len(devices.split(","))
    total_batch_size = devices_num * cfg.TRAIN.im_per_batch

    use_random = True
    if cfg.enable_ce:
        use_random = False
    model = model_builder.RCNN(
        add_conv_body_func=resnet.add_ResNet50_conv4_body,
        add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head,
        use_pyreader=cfg.use_pyreader,
        use_random=use_random)
    model.build_model(image_shape)
    losses, keys = model.loss()
    loss = losses[0]
    fetch_list = losses

    boundaries = cfg.lr_steps
    gamma = cfg.lr_gamma
    step_num = len(cfg.lr_steps)
    values = [learning_rate * (gamma**i) for i in range(step_num + 1)]

    lr = exponential_with_warmup_decay(learning_rate=learning_rate,
                                       boundaries=boundaries,
                                       values=values,
                                       warmup_iter=cfg.warm_up_iter,
                                       warmup_factor=cfg.warm_up_factor)
    optimizer = fluid.optimizer.Momentum(
        learning_rate=lr,
        regularization=fluid.regularizer.L2Decay(cfg.weight_decay),
        momentum=cfg.momentum)
    optimizer.minimize(loss)
    fetch_list = fetch_list + [lr]

    for var in fetch_list:
        var.persistable = True

    #fluid.memory_optimize(fluid.default_main_program(), skip_opt_set=set(fetch_list))

    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if cfg.pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(cfg.pretrained_model, var.name))

        fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist)

    if cfg.parallel:
        build_strategy = fluid.BuildStrategy()
        build_strategy.memory_optimize = False
        build_strategy.enable_inplace = False

        exec_strategy = fluid.ExecutionStrategy()
        exec_strategy.use_experimental_executor = True
        train_exe = fluid.ParallelExecutor(use_cuda=bool(cfg.use_gpu),
                                           loss_name=loss.name,
                                           build_strategy=build_strategy,
                                           exec_strategy=exec_strategy)
    else:
        train_exe = exe

    shuffle = True
    if cfg.enable_ce:
        shuffle = False
    if cfg.use_pyreader:
        train_reader = reader.train(batch_size=cfg.TRAIN.im_per_batch,
                                    total_batch_size=total_batch_size,
                                    padding_total=cfg.TRAIN.padding_minibatch,
                                    shuffle=shuffle)
        py_reader = model.py_reader
        py_reader.decorate_paddle_reader(train_reader)
    else:
        train_reader = reader.train(batch_size=total_batch_size,
                                    shuffle=shuffle)
        feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())

    def save_model(postfix):
        model_path = os.path.join(cfg.model_save_dir, postfix)
        if os.path.isdir(model_path):
            shutil.rmtree(model_path)
        fluid.io.save_persistables(exe, model_path)

    def train_loop_pyreader():
        py_reader.start()
        train_stats = TrainingStats(cfg.log_window, keys)
        try:
            start_time = time.time()
            prev_start_time = start_time
            for iter_id in range(cfg.max_iter):
                prev_start_time = start_time
                start_time = time.time()
                outs = train_exe.run(fetch_list=[v.name for v in fetch_list])
                stats = {
                    k: np.array(v).mean()
                    for k, v in zip(keys, outs[:-1])
                }
                train_stats.update(stats)
                logs = train_stats.log()
                strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format(
                    now_time(), iter_id, np.mean(outs[-1]), logs,
                    start_time - prev_start_time)
                print(strs)
                sys.stdout.flush()
                if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0:
                    save_model("model_iter{}".format(iter_id))
            end_time = time.time()
            total_time = end_time - start_time
            last_loss = np.array(outs[0]).mean()
            if cfg.enable_ce:
                gpu_num = devices_num
                epoch_idx = iter_id + 1
                loss = last_loss
                print("kpis\teach_pass_duration_card%s\t%s" %
                      (gpu_num, total_time / epoch_idx))
                print("kpis\ttrain_loss_card%s\t%s" % (gpu_num, loss))
        except (StopIteration, fluid.core.EOFException):
            py_reader.reset()

    def train_loop():
        start_time = time.time()
        prev_start_time = start_time
        start = start_time
        train_stats = TrainingStats(cfg.log_window, keys)
        for iter_id, data in enumerate(train_reader()):
            prev_start_time = start_time
            start_time = time.time()
            outs = train_exe.run(fetch_list=[v.name for v in fetch_list],
                                 feed=feeder.feed(data))
            stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])}
            train_stats.update(stats)
            logs = train_stats.log()
            strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format(
                now_time(), iter_id, np.mean(outs[-1]), logs,
                start_time - prev_start_time)
            print(strs)
            sys.stdout.flush()
            if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0:
                save_model("model_iter{}".format(iter_id))
            if (iter_id + 1) == cfg.max_iter:
                break
        end_time = time.time()
        total_time = end_time - start_time
        last_loss = np.array(outs[0]).mean()
        # only for ce
        if cfg.enable_ce:
            gpu_num = devices_num
            epoch_idx = iter_id + 1
            loss = last_loss
            print("kpis\teach_pass_duration_card%s\t%s" %
                  (gpu_num, total_time / epoch_idx))
            print("kpis\ttrain_loss_card%s\t%s" % (gpu_num, loss))

        return np.mean(every_pass_loss)

    if cfg.use_pyreader:
        train_loop_pyreader()
    else:
        train_loop()
    save_model('model_final')
def train():

    if cfg.debug:
        fluid.default_startup_program().random_seed = 1000
        fluid.default_main_program().random_seed = 1000
        random.seed(0)
        np.random.seed(0)

    if not os.path.exists(cfg.model_save_dir):
        os.makedirs(cfg.model_save_dir)

    model = YOLOv3()
    model.build_model()
    input_size = cfg.input_size
    loss = model.loss()
    loss.persistable = True

    devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
    devices_num = len(devices.split(","))
    print("Found {} CUDA devices.".format(devices_num))

    learning_rate = cfg.learning_rate
    boundaries = cfg.lr_steps
    gamma = cfg.lr_gamma
    step_num = len(cfg.lr_steps)
    values = [learning_rate * (gamma**i) for i in range(step_num + 1)]

    optimizer = fluid.optimizer.Momentum(
        learning_rate=exponential_with_warmup_decay(
            learning_rate=learning_rate,
            boundaries=boundaries,
            values=values,
            warmup_iter=cfg.warm_up_iter,
            warmup_factor=cfg.warm_up_factor),
        regularization=fluid.regularizer.L2Decay(cfg.weight_decay),
        momentum=cfg.momentum)
    optimizer.minimize(loss)

    if cfg.use_gpu:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if cfg.pretrain:
        if not os.path.exists(cfg.pretrain):
            print("Pretrain weights not found: {}".format(cfg.pretrain))

        def if_exist(var):
            return os.path.exists(os.path.join(cfg.pretrain, var.name))

        fluid.io.load_vars(exe, cfg.pretrain, predicate=if_exist)

    build_strategy = fluid.BuildStrategy()
    build_strategy.memory_optimize = True
    build_strategy.sync_batch_norm = cfg.syncbn
    compile_program = fluid.compiler.CompiledProgram(
        fluid.default_main_program()).with_data_parallel(
            loss_name=loss.name, build_strategy=build_strategy)

    random_sizes = [cfg.input_size]
    if cfg.random_shape:
        random_sizes = [32 * i for i in range(10, 20)]

    total_iter = cfg.max_iter - cfg.start_iter
    mixup_iter = total_iter - cfg.no_mixup_iter
    train_reader = reader.train(input_size,
                                batch_size=cfg.batch_size,
                                shuffle=True,
                                total_iter=total_iter * devices_num,
                                mixup_iter=mixup_iter * devices_num,
                                random_sizes=random_sizes,
                                use_multiprocessing=cfg.use_multiprocess)
    py_reader = model.py_reader
    py_reader.decorate_paddle_reader(train_reader)

    def save_model(postfix):
        model_path = os.path.join(cfg.model_save_dir, postfix)
        if os.path.isdir(model_path):
            shutil.rmtree(model_path)
        fluid.io.save_persistables(exe, model_path)

    fetch_list = [loss]

    py_reader.start()
    smoothed_loss = SmoothedValue()
    try:
        start_time = time.time()
        prev_start_time = start_time
        snapshot_loss = 0
        snapshot_time = 0
        for iter_id in range(cfg.start_iter, cfg.max_iter):
            prev_start_time = start_time
            start_time = time.time()
            losses = exe.run(compile_program,
                             fetch_list=[v.name for v in fetch_list])
            smoothed_loss.add_value(np.mean(np.array(losses[0])))
            snapshot_loss += np.mean(np.array(losses[0]))
            snapshot_time += start_time - prev_start_time
            lr = np.array(
                fluid.global_scope().find_var('learning_rate').get_tensor())
            print("Iter {:d}, lr {:.6f}, loss {:.6f}, time {:.5f}".format(
                iter_id, lr[0], smoothed_loss.get_mean_value(),
                start_time - prev_start_time))
            sys.stdout.flush()
            if (iter_id + 1) % cfg.snapshot_iter == 0:
                save_model("model_iter{}".format(iter_id))
                print("Snapshot {} saved, average loss: {}, \
                      average time: {}".format(
                    iter_id + 1, snapshot_loss / float(cfg.snapshot_iter),
                    snapshot_time / float(cfg.snapshot_iter)))
                snapshot_loss = 0
                snapshot_time = 0
    except fluid.core.EOFException:
        py_reader.reset()

    save_model('model_final')
예제 #6
0
def train():
    learning_rate = cfg.learning_rate
    image_shape = [3, cfg.TRAIN.max_size, cfg.TRAIN.max_size]
    num_iterations = cfg.max_iter

    devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
    devices_num = len(devices.split(","))
    total_batch_size = devices_num * cfg.TRAIN.im_per_batch
    model = model_builder.RCNN(
        add_conv_body_func=resnet.add_ResNet50_conv4_body,
        add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head,
        use_pyreader=cfg.use_pyreader,
        use_random=False)
    model.build_model(image_shape)
    losses, keys = model.loss()
    loss = losses[0]
    fetch_list = [loss]

    boundaries = cfg.lr_steps
    gamma = cfg.lr_gamma
    step_num = len(cfg.lr_steps)
    values = [learning_rate * (gamma**i) for i in range(step_num + 1)]

    optimizer = fluid.optimizer.Momentum(
        learning_rate=exponential_with_warmup_decay(
            learning_rate=learning_rate,
            boundaries=boundaries,
            values=values,
            warmup_iter=500,
            warmup_factor=1.0 / 3.0),
        regularization=fluid.regularizer.L2Decay(0.0001),
        momentum=0.9)
    optimizer.minimize(loss)

    fluid.memory_optimize(fluid.default_main_program())

    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if cfg.pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(cfg.pretrained_model, var.name))

        fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist)

    if cfg.parallel:
        train_exe = fluid.ParallelExecutor(use_cuda=bool(cfg.use_gpu),
                                           loss_name=loss.name)

    if cfg.use_pyreader:
        train_reader = reader.train(batch_size=cfg.TRAIN.im_per_batch,
                                    total_batch_size=total_batch_size,
                                    padding_total=cfg.TRAIN.padding_minibatch,
                                    shuffle=False)
        py_reader = model.py_reader
        py_reader.decorate_paddle_reader(train_reader)
    else:
        train_reader = reader.train(batch_size=total_batch_size, shuffle=False)
        feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())

    def run(iterations):
        reader_time = []
        run_time = []
        total_images = 0

        for batch_id in range(iterations):
            start_time = time.time()
            data = next(train_reader())
            end_time = time.time()
            reader_time.append(end_time - start_time)
            start_time = time.time()
            if cfg.parallel:
                outs = train_exe.run(fetch_list=[v.name for v in fetch_list],
                                     feed=feeder.feed(data))
            else:
                outs = exe.run(fluid.default_main_program(),
                               fetch_list=[v.name for v in fetch_list],
                               feed=feeder.feed(data))
            end_time = time.time()
            run_time.append(end_time - start_time)
            total_images += len(data)
            print("Batch {:d}, loss {:.6f} ".format(batch_id,
                                                    np.mean(outs[0])))
        return reader_time, run_time, total_images

    def run_pyreader(iterations):
        reader_time = [0]
        run_time = []
        total_images = 0

        py_reader.start()
        try:
            for batch_id in range(iterations):
                start_time = time.time()
                if cfg.parallel:
                    outs = train_exe.run(
                        fetch_list=[v.name for v in fetch_list])
                else:
                    outs = exe.run(fluid.default_main_program(),
                                   fetch_list=[v.name for v in fetch_list])
                end_time = time.time()
                run_time.append(end_time - start_time)
                total_images += devices_num
                print("Batch {:d}, loss {:.6f} ".format(
                    batch_id, np.mean(outs[0])))
        except fluid.core.EOFException:
            py_reader.reset()

        return reader_time, run_time, total_images

    run_func = run if not cfg.use_pyreader else run_pyreader

    # warm-up
    run_func(2)
    # profiling
    start = time.time()
    if cfg.use_profile:
        with profiler.profiler('GPU', 'total', '/tmp/profile_file'):
            reader_time, run_time, total_images = run_func(num_iterations)
    else:
        reader_time, run_time, total_images = run_func(num_iterations)

    end = time.time()
    total_time = end - start
    print(
        "Total time: {0}, reader time: {1} s, run time: {2} s, images/s: {3}".
        format(total_time, np.sum(reader_time), np.sum(run_time),
               total_images / total_time))
예제 #7
0
파일: train.py 프로젝트: cjt222/EAST_PADDLE
def train():
    learning_rate = cfg.learning_rate
    image_shape = [3, 512, 512]

    if cfg.enable_ce:
        fluid.default_startup_program().random_seed = 1000
        fluid.default_main_program().random_seed = 1000
        import random
        random.seed(0)
        np.random.seed(0)

    devices_num = get_device_num()
    total_batch_size = devices_num * cfg.TRAIN.im_per_batch

    use_random = True
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    with fluid.program_guard(train_prog, startup_prog):
        with fluid.unique_name.guard():
            model = model_builder.EAST(
                add_conv_body_func=resnet.ResNet(),
                use_random=use_random)
            model.build_model(image_shape)
            losses, keys = model.loss()
            loss = losses[0]
            fetch_list = losses

            boundaries = cfg.lr_steps
            gamma = cfg.lr_gamma
            step_num = len(cfg.lr_steps)
            values = [learning_rate * (gamma**i) for i in range(step_num + 1)]

            lr = exponential_with_warmup_decay(
                learning_rate=learning_rate,
                boundaries=boundaries,
                values=values,
                warmup_iter=cfg.warm_up_iter,
                warmup_factor=cfg.warm_up_factor)
            optimizer = fluid.optimizer.AdamOptimizer(learning_rate=lr, regularization=fluid.regularizer.L2Decay(cfg.weight_decay))
            optimizer.minimize(loss)
            fetch_list = fetch_list + [lr]

            for var in fetch_list:
                var.persistable = True

    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    build_strategy = fluid.BuildStrategy()
    build_strategy.fuse_all_optimizer_ops = False
    build_strategy.fuse_elewise_add_act_ops = True
    build_strategy.sync_batch_norm=True
    exec_strategy = fluid.ExecutionStrategy()
    exec_strategy.num_iteration_per_drop_scope = 1
    exe.run(startup_prog)

    if cfg.pretrained_model:
        def if_exist(var):
            return os.path.exists(os.path.join(cfg.pretrained_model, var.name))
        fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist)
    compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
        loss_name=loss.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)


    dataset = icdar.ICDAR2015Dataset()
    data_generator = dataset.get_batch(num_workers=24,
                                     input_size=512,
                                     batch_size=14)

    def train_loop():
        start_time = time.time()
        prev_start_time = start_time
        start = start_time
        train_stats = TrainingStats(cfg.log_window, keys)
        #for iter_id, data in enumerate(next(data_generator)):
        for iter_id in range(100000):
            data = next(data_generator)
            #for data in data_list:
            prev_start_time = start_time
            start_time = time.time()
            outs = exe.run(compiled_train_prog, fetch_list=[v.name for v in fetch_list],
                                 feed={"input_images": data[0],
                                       "input_score_maps": data[2],
                                       "input_geo_maps": data[3],
                                       "input_training_masks": data[4]})
            stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])}
            train_stats.update(stats)
            logs = train_stats.log()
            strs = '{}, batch: {}, lr: {:.5f}, {}, time: {:.3f}'.format(
                now_time(), iter_id,
                np.mean(outs[-1]), logs, start_time - prev_start_time)
            if iter_id % 10 == 0:
                print(strs)
            sys.stdout.flush()
            if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0:
                save_model(exe, "model_iter{}".format(iter_id), train_prog)
            if (iter_id + 1) == cfg.max_iter:
                break
        end_time = time.time()
        total_time = end_time - start_time
        last_loss = np.array(outs[0]).mean()
    train_loop()