def optimizer_setting(lr): batch_size = 1024 iters = 150000 // batch_size boundaries = [i * iters for i in [60,100,150]] values = [ i * lr for i in [1,0.5,0.1,0.05]] optimizer = fluid.optimizer.Adam( #momentum=0.9, learning_rate=exponential_with_warmup_decay( learning_rate=lr, boundaries=boundaries, values=values, warmup_iter=200, warmup_factor=0.), regularization=fluid.regularizer.L2Decay(0.00001), ) return optimizer
def train(): if cfg.debug or args.enable_ce: fluid.default_startup_program().random_seed = 1000 fluid.default_main_program().random_seed = 1000 random.seed(0) np.random.seed(0) if not os.path.exists(cfg.model_save_dir): os.makedirs(cfg.model_save_dir) model = YOLOv3() model.build_model() input_size = cfg.input_size loss = model.loss() loss.persistable = True devices_num = get_device_num() print("Found {} CUDA devices.".format(devices_num)) learning_rate = cfg.learning_rate boundaries = cfg.lr_steps gamma = cfg.lr_gamma step_num = len(cfg.lr_steps) values = [learning_rate * (gamma**i) for i in range(step_num + 1)] optimizer = fluid.optimizer.Momentum( learning_rate=exponential_with_warmup_decay( learning_rate=learning_rate, boundaries=boundaries, values=values, warmup_iter=cfg.warm_up_iter, warmup_factor=cfg.warm_up_factor), regularization=fluid.regularizer.L2Decay(cfg.weight_decay), momentum=cfg.momentum) optimizer.minimize(loss) gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if cfg.pretrain: if not os.path.exists(cfg.pretrain): print("Pretrain weights not found: {}".format(cfg.pretrain)) def if_exist(var): return os.path.exists(os.path.join(cfg.pretrain, var.name)) \ and var.name.find('yolo_output') < 0 fluid.io.load_vars(exe, cfg.pretrain, predicate=if_exist) build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False #gc and memory optimize may conflict syncbn = cfg.syncbn if (syncbn and devices_num <= 1) or num_trainers > 1: print("Disable syncbn in single device") syncbn = False build_strategy.sync_batch_norm = syncbn exec_strategy = fluid.ExecutionStrategy() if cfg.use_gpu and num_trainers > 1: dist_utils.prepare_for_multi_process(exe, build_strategy, fluid.default_main_program()) exec_strategy.num_threads = 1 compile_program = fluid.compiler.CompiledProgram(fluid.default_main_program( )).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) random_sizes = [cfg.input_size] if cfg.random_shape: random_sizes = [32 * i for i in range(10, 20)] total_iter = cfg.max_iter - cfg.start_iter mixup_iter = total_iter - cfg.no_mixup_iter shuffle = True if args.enable_ce: shuffle = False shuffle_seed = None # NOTE: yolov3 is a special model, if num_trainers > 1, each process # trian the completed dataset. # if num_trainers > 1: shuffle_seed = 1 train_reader = reader.train( input_size, batch_size=cfg.batch_size, shuffle=shuffle, shuffle_seed=shuffle_seed, total_iter=total_iter * devices_num, mixup_iter=mixup_iter * devices_num, random_sizes=random_sizes, use_multiprocess_reader=cfg.use_multiprocess_reader) py_reader = model.py_reader py_reader.decorate_paddle_reader(train_reader) def save_model(postfix): model_path = os.path.join(cfg.model_save_dir, postfix) if os.path.isdir(model_path): shutil.rmtree(model_path) fluid.io.save_persistables(exe, model_path) fetch_list = [loss] py_reader.start() smoothed_loss = SmoothedValue() try: start_time = time.time() prev_start_time = start_time snapshot_loss = 0 snapshot_time = 0 for iter_id in range(cfg.start_iter, cfg.max_iter): prev_start_time = start_time start_time = time.time() losses = exe.run(compile_program, fetch_list=[v.name for v in fetch_list]) smoothed_loss.add_value(np.mean(np.array(losses[0]))) snapshot_loss += np.mean(np.array(losses[0])) snapshot_time += start_time - prev_start_time lr = np.array(fluid.global_scope().find_var('learning_rate') .get_tensor()) print("Iter {:d}, lr {:.6f}, loss {:.6f}, time {:.5f}".format( iter_id, lr[0], smoothed_loss.get_mean_value(), start_time - prev_start_time)) sys.stdout.flush() if (iter_id + 1) % cfg.snapshot_iter == 0: save_model("model_iter{}".format(iter_id)) print("Snapshot {} saved, average loss: {}, \ average time: {}".format( iter_id + 1, snapshot_loss / float(cfg.snapshot_iter), snapshot_time / float(cfg.snapshot_iter))) if args.enable_ce and iter_id == cfg.max_iter - 1: if devices_num == 1: print("kpis\ttrain_cost_1card\t%f" % (snapshot_loss / float(cfg.snapshot_iter))) print("kpis\ttrain_duration_1card\t%f" % (snapshot_time / float(cfg.snapshot_iter))) else: print("kpis\ttrain_cost_8card\t%f" % (snapshot_loss / float(cfg.snapshot_iter))) print("kpis\ttrain_duration_8card\t%f" % (snapshot_time / float(cfg.snapshot_iter))) snapshot_loss = 0 snapshot_time = 0 except fluid.core.EOFException: py_reader.reset() save_model('model_final')
def train(): learning_rate = cfg.learning_rate image_shape = [3, cfg.TRAIN.max_size, cfg.TRAIN.max_size] devices_num = get_device_num() total_batch_size = devices_num * cfg.TRAIN.im_per_batch use_random = True model = model_builder.RCNN( add_conv_body_func=resnet. add_ResNet50_conv4_body, # res4: [-1, 1024, 84, 84] add_roi_box_head_func=resnet. add_ResNet_roi_conv5_head, # res5: [-1, 2048, 7, 7] use_pyreader=cfg.use_pyreader, use_random=use_random) model.build_model(image_shape) losses, keys = model.loss() loss = losses[0] fetch_list = losses boundaries = cfg.lr_steps gamma = cfg.lr_gamma step_num = len(cfg.lr_steps) values = [learning_rate * (gamma**i) for i in range(step_num + 1)] lr = exponential_with_warmup_decay(learning_rate=learning_rate, boundaries=boundaries, values=values, warmup_iter=cfg.warm_up_iter, warmup_factor=cfg.warm_up_factor) optimizer = fluid.optimizer.Momentum( learning_rate=lr, regularization=fluid.regularizer.L2Decay(cfg.weight_decay), momentum=cfg.momentum) optimizer.minimize(loss) fetch_list = fetch_list + [lr] for var in fetch_list: var.persistable = True gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if cfg.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(cfg.pretrained_model, var.name)) fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist) if cfg.parallel: build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = True exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_iteration_per_drop_scope = 10 if num_trainers > 1 and cfg.use_gpu: dist_utils.prepare_for_multi_process(exe, build_strategy, fluid.default_main_program()) # the process is fast when num_threads is 1 for multi-process training exec_strategy.num_threads = 1 train_exe = fluid.ParallelExecutor(use_cuda=bool(cfg.use_gpu), loss_name=loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) else: train_exe = exe shuffle = True # NOTE: do not shuffle dataset when using multi-process training shuffle_seed = None if num_trainers > 1: shuffle_seed = 1 if cfg.use_pyreader: train_reader = reader.train(batch_size=cfg.TRAIN.im_per_batch, total_batch_size=total_batch_size, padding_total=cfg.TRAIN.padding_minibatch, shuffle=shuffle, shuffle_seed=shuffle_seed) if num_trainers > 1: assert shuffle_seed is not None, "If num_trainers > 1, the shuffle_seed must be set, because the order of batch data generated by reader must be the same in the respective processes" train_reader = fluid.contrib.reader.distributed_batch_reader( train_reader) py_reader = model.py_reader py_reader.decorate_paddle_reader(train_reader) else: if num_trainers > 1: shuffle = False train_reader = reader.train(batch_size=total_batch_size, shuffle=shuffle) feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) def save_model(postfix): model_path = os.path.join(cfg.model_save_dir, postfix) if os.path.isdir(model_path): shutil.rmtree(model_path) fluid.io.save_persistables(exe, model_path) def train_loop_pyreader(): py_reader.start() train_stats = TrainingStats(cfg.log_window, keys) try: start_time = time.time() for iter_id in range(cfg.max_iter): prev_start_time = start_time start_time = time.time() outs = train_exe.run(fetch_list=[v.name for v in fetch_list]) stats = { k: np.array(v).mean() for k, v in zip(keys, outs[:-1]) } train_stats.update(stats) logs = train_stats.log() strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format( now_time(), iter_id, np.mean(outs[-1]), logs, start_time - prev_start_time) print(strs) sys.stdout.flush() if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0: save_model("model_iter{}".format(iter_id)) end_time = time.time() total_time = end_time - first_start_time last_loss = np.array(outs[0]).mean() except (StopIteration, fluid.core.EOFException): py_reader.reset() def train_loop(): train_stats = TrainingStats(cfg.log_window, keys) start_time = time.time() for iter_id, data in enumerate(train_reader()): prev_start_time = start_time start_time = time.time() outs = train_exe.run(fetch_list=[v.name for v in fetch_list], feed=feeder.feed(data)) stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])} train_stats.update(stats) logs = train_stats.log() stats = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format( now_time(), iter_id, np.mean(outs[-1]), logs, start_time - prev_start_time) print(stats) sys.stdout.flush() if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0: save_model("model_iter{}".format(iter_id)) if (iter_id + 1) == cfg.max_iter: break end_time = time.time() total_time = end_time - start_time last_loss = np.array(outs[0]).mean() if cfg.use_pyreader: train_loop_pyreader() else: train_loop() save_model('model_final')
def train(): learning_rate = cfg.learning_rate image_shape = [3, cfg.TRAIN.max_size, cfg.TRAIN.max_size] if cfg.enable_ce: fluid.default_startup_program().random_seed = 1000 fluid.default_main_program().random_seed = 1000 import random random.seed(0) np.random.seed(0) devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" devices_num = len(devices.split(",")) total_batch_size = devices_num * cfg.TRAIN.im_per_batch use_random = True if cfg.enable_ce: use_random = False model = model_builder.RCNN( add_conv_body_func=resnet.add_ResNet50_conv4_body, add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head, use_pyreader=cfg.use_pyreader, use_random=use_random) model.build_model(image_shape) losses, keys = model.loss() loss = losses[0] fetch_list = losses boundaries = cfg.lr_steps gamma = cfg.lr_gamma step_num = len(cfg.lr_steps) values = [learning_rate * (gamma**i) for i in range(step_num + 1)] lr = exponential_with_warmup_decay(learning_rate=learning_rate, boundaries=boundaries, values=values, warmup_iter=cfg.warm_up_iter, warmup_factor=cfg.warm_up_factor) optimizer = fluid.optimizer.Momentum( learning_rate=lr, regularization=fluid.regularizer.L2Decay(cfg.weight_decay), momentum=cfg.momentum) optimizer.minimize(loss) fetch_list = fetch_list + [lr] for var in fetch_list: var.persistable = True #fluid.memory_optimize(fluid.default_main_program(), skip_opt_set=set(fetch_list)) place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if cfg.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(cfg.pretrained_model, var.name)) fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist) if cfg.parallel: build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = False exec_strategy = fluid.ExecutionStrategy() exec_strategy.use_experimental_executor = True train_exe = fluid.ParallelExecutor(use_cuda=bool(cfg.use_gpu), loss_name=loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) else: train_exe = exe shuffle = True if cfg.enable_ce: shuffle = False if cfg.use_pyreader: train_reader = reader.train(batch_size=cfg.TRAIN.im_per_batch, total_batch_size=total_batch_size, padding_total=cfg.TRAIN.padding_minibatch, shuffle=shuffle) py_reader = model.py_reader py_reader.decorate_paddle_reader(train_reader) else: train_reader = reader.train(batch_size=total_batch_size, shuffle=shuffle) feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) def save_model(postfix): model_path = os.path.join(cfg.model_save_dir, postfix) if os.path.isdir(model_path): shutil.rmtree(model_path) fluid.io.save_persistables(exe, model_path) def train_loop_pyreader(): py_reader.start() train_stats = TrainingStats(cfg.log_window, keys) try: start_time = time.time() prev_start_time = start_time for iter_id in range(cfg.max_iter): prev_start_time = start_time start_time = time.time() outs = train_exe.run(fetch_list=[v.name for v in fetch_list]) stats = { k: np.array(v).mean() for k, v in zip(keys, outs[:-1]) } train_stats.update(stats) logs = train_stats.log() strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format( now_time(), iter_id, np.mean(outs[-1]), logs, start_time - prev_start_time) print(strs) sys.stdout.flush() if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0: save_model("model_iter{}".format(iter_id)) end_time = time.time() total_time = end_time - start_time last_loss = np.array(outs[0]).mean() if cfg.enable_ce: gpu_num = devices_num epoch_idx = iter_id + 1 loss = last_loss print("kpis\teach_pass_duration_card%s\t%s" % (gpu_num, total_time / epoch_idx)) print("kpis\ttrain_loss_card%s\t%s" % (gpu_num, loss)) except (StopIteration, fluid.core.EOFException): py_reader.reset() def train_loop(): start_time = time.time() prev_start_time = start_time start = start_time train_stats = TrainingStats(cfg.log_window, keys) for iter_id, data in enumerate(train_reader()): prev_start_time = start_time start_time = time.time() outs = train_exe.run(fetch_list=[v.name for v in fetch_list], feed=feeder.feed(data)) stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])} train_stats.update(stats) logs = train_stats.log() strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format( now_time(), iter_id, np.mean(outs[-1]), logs, start_time - prev_start_time) print(strs) sys.stdout.flush() if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0: save_model("model_iter{}".format(iter_id)) if (iter_id + 1) == cfg.max_iter: break end_time = time.time() total_time = end_time - start_time last_loss = np.array(outs[0]).mean() # only for ce if cfg.enable_ce: gpu_num = devices_num epoch_idx = iter_id + 1 loss = last_loss print("kpis\teach_pass_duration_card%s\t%s" % (gpu_num, total_time / epoch_idx)) print("kpis\ttrain_loss_card%s\t%s" % (gpu_num, loss)) return np.mean(every_pass_loss) if cfg.use_pyreader: train_loop_pyreader() else: train_loop() save_model('model_final')
def train(): if cfg.debug: fluid.default_startup_program().random_seed = 1000 fluid.default_main_program().random_seed = 1000 random.seed(0) np.random.seed(0) if not os.path.exists(cfg.model_save_dir): os.makedirs(cfg.model_save_dir) model = YOLOv3() model.build_model() input_size = cfg.input_size loss = model.loss() loss.persistable = True devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" devices_num = len(devices.split(",")) print("Found {} CUDA devices.".format(devices_num)) learning_rate = cfg.learning_rate boundaries = cfg.lr_steps gamma = cfg.lr_gamma step_num = len(cfg.lr_steps) values = [learning_rate * (gamma**i) for i in range(step_num + 1)] optimizer = fluid.optimizer.Momentum( learning_rate=exponential_with_warmup_decay( learning_rate=learning_rate, boundaries=boundaries, values=values, warmup_iter=cfg.warm_up_iter, warmup_factor=cfg.warm_up_factor), regularization=fluid.regularizer.L2Decay(cfg.weight_decay), momentum=cfg.momentum) optimizer.minimize(loss) if cfg.use_gpu: place = fluid.CUDAPlace(0) else: place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if cfg.pretrain: if not os.path.exists(cfg.pretrain): print("Pretrain weights not found: {}".format(cfg.pretrain)) def if_exist(var): return os.path.exists(os.path.join(cfg.pretrain, var.name)) fluid.io.load_vars(exe, cfg.pretrain, predicate=if_exist) build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = True build_strategy.sync_batch_norm = cfg.syncbn compile_program = fluid.compiler.CompiledProgram( fluid.default_main_program()).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy) random_sizes = [cfg.input_size] if cfg.random_shape: random_sizes = [32 * i for i in range(10, 20)] total_iter = cfg.max_iter - cfg.start_iter mixup_iter = total_iter - cfg.no_mixup_iter train_reader = reader.train(input_size, batch_size=cfg.batch_size, shuffle=True, total_iter=total_iter * devices_num, mixup_iter=mixup_iter * devices_num, random_sizes=random_sizes, use_multiprocessing=cfg.use_multiprocess) py_reader = model.py_reader py_reader.decorate_paddle_reader(train_reader) def save_model(postfix): model_path = os.path.join(cfg.model_save_dir, postfix) if os.path.isdir(model_path): shutil.rmtree(model_path) fluid.io.save_persistables(exe, model_path) fetch_list = [loss] py_reader.start() smoothed_loss = SmoothedValue() try: start_time = time.time() prev_start_time = start_time snapshot_loss = 0 snapshot_time = 0 for iter_id in range(cfg.start_iter, cfg.max_iter): prev_start_time = start_time start_time = time.time() losses = exe.run(compile_program, fetch_list=[v.name for v in fetch_list]) smoothed_loss.add_value(np.mean(np.array(losses[0]))) snapshot_loss += np.mean(np.array(losses[0])) snapshot_time += start_time - prev_start_time lr = np.array( fluid.global_scope().find_var('learning_rate').get_tensor()) print("Iter {:d}, lr {:.6f}, loss {:.6f}, time {:.5f}".format( iter_id, lr[0], smoothed_loss.get_mean_value(), start_time - prev_start_time)) sys.stdout.flush() if (iter_id + 1) % cfg.snapshot_iter == 0: save_model("model_iter{}".format(iter_id)) print("Snapshot {} saved, average loss: {}, \ average time: {}".format( iter_id + 1, snapshot_loss / float(cfg.snapshot_iter), snapshot_time / float(cfg.snapshot_iter))) snapshot_loss = 0 snapshot_time = 0 except fluid.core.EOFException: py_reader.reset() save_model('model_final')
def train(): learning_rate = cfg.learning_rate image_shape = [3, cfg.TRAIN.max_size, cfg.TRAIN.max_size] num_iterations = cfg.max_iter devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" devices_num = len(devices.split(",")) total_batch_size = devices_num * cfg.TRAIN.im_per_batch model = model_builder.RCNN( add_conv_body_func=resnet.add_ResNet50_conv4_body, add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head, use_pyreader=cfg.use_pyreader, use_random=False) model.build_model(image_shape) losses, keys = model.loss() loss = losses[0] fetch_list = [loss] boundaries = cfg.lr_steps gamma = cfg.lr_gamma step_num = len(cfg.lr_steps) values = [learning_rate * (gamma**i) for i in range(step_num + 1)] optimizer = fluid.optimizer.Momentum( learning_rate=exponential_with_warmup_decay( learning_rate=learning_rate, boundaries=boundaries, values=values, warmup_iter=500, warmup_factor=1.0 / 3.0), regularization=fluid.regularizer.L2Decay(0.0001), momentum=0.9) optimizer.minimize(loss) fluid.memory_optimize(fluid.default_main_program()) place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if cfg.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(cfg.pretrained_model, var.name)) fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist) if cfg.parallel: train_exe = fluid.ParallelExecutor(use_cuda=bool(cfg.use_gpu), loss_name=loss.name) if cfg.use_pyreader: train_reader = reader.train(batch_size=cfg.TRAIN.im_per_batch, total_batch_size=total_batch_size, padding_total=cfg.TRAIN.padding_minibatch, shuffle=False) py_reader = model.py_reader py_reader.decorate_paddle_reader(train_reader) else: train_reader = reader.train(batch_size=total_batch_size, shuffle=False) feeder = fluid.DataFeeder(place=place, feed_list=model.feeds()) def run(iterations): reader_time = [] run_time = [] total_images = 0 for batch_id in range(iterations): start_time = time.time() data = next(train_reader()) end_time = time.time() reader_time.append(end_time - start_time) start_time = time.time() if cfg.parallel: outs = train_exe.run(fetch_list=[v.name for v in fetch_list], feed=feeder.feed(data)) else: outs = exe.run(fluid.default_main_program(), fetch_list=[v.name for v in fetch_list], feed=feeder.feed(data)) end_time = time.time() run_time.append(end_time - start_time) total_images += len(data) print("Batch {:d}, loss {:.6f} ".format(batch_id, np.mean(outs[0]))) return reader_time, run_time, total_images def run_pyreader(iterations): reader_time = [0] run_time = [] total_images = 0 py_reader.start() try: for batch_id in range(iterations): start_time = time.time() if cfg.parallel: outs = train_exe.run( fetch_list=[v.name for v in fetch_list]) else: outs = exe.run(fluid.default_main_program(), fetch_list=[v.name for v in fetch_list]) end_time = time.time() run_time.append(end_time - start_time) total_images += devices_num print("Batch {:d}, loss {:.6f} ".format( batch_id, np.mean(outs[0]))) except fluid.core.EOFException: py_reader.reset() return reader_time, run_time, total_images run_func = run if not cfg.use_pyreader else run_pyreader # warm-up run_func(2) # profiling start = time.time() if cfg.use_profile: with profiler.profiler('GPU', 'total', '/tmp/profile_file'): reader_time, run_time, total_images = run_func(num_iterations) else: reader_time, run_time, total_images = run_func(num_iterations) end = time.time() total_time = end - start print( "Total time: {0}, reader time: {1} s, run time: {2} s, images/s: {3}". format(total_time, np.sum(reader_time), np.sum(run_time), total_images / total_time))
def train(): learning_rate = cfg.learning_rate image_shape = [3, 512, 512] if cfg.enable_ce: fluid.default_startup_program().random_seed = 1000 fluid.default_main_program().random_seed = 1000 import random random.seed(0) np.random.seed(0) devices_num = get_device_num() total_batch_size = devices_num * cfg.TRAIN.im_per_batch use_random = True startup_prog = fluid.Program() train_prog = fluid.Program() with fluid.program_guard(train_prog, startup_prog): with fluid.unique_name.guard(): model = model_builder.EAST( add_conv_body_func=resnet.ResNet(), use_random=use_random) model.build_model(image_shape) losses, keys = model.loss() loss = losses[0] fetch_list = losses boundaries = cfg.lr_steps gamma = cfg.lr_gamma step_num = len(cfg.lr_steps) values = [learning_rate * (gamma**i) for i in range(step_num + 1)] lr = exponential_with_warmup_decay( learning_rate=learning_rate, boundaries=boundaries, values=values, warmup_iter=cfg.warm_up_iter, warmup_factor=cfg.warm_up_factor) optimizer = fluid.optimizer.AdamOptimizer(learning_rate=lr, regularization=fluid.regularizer.L2Decay(cfg.weight_decay)) optimizer.minimize(loss) fetch_list = fetch_list + [lr] for var in fetch_list: var.persistable = True gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) place = fluid.CUDAPlace(gpu_id) if cfg.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) build_strategy = fluid.BuildStrategy() build_strategy.fuse_all_optimizer_ops = False build_strategy.fuse_elewise_add_act_ops = True build_strategy.sync_batch_norm=True exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_iteration_per_drop_scope = 1 exe.run(startup_prog) if cfg.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(cfg.pretrained_model, var.name)) fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist) compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy, exec_strategy=exec_strategy) dataset = icdar.ICDAR2015Dataset() data_generator = dataset.get_batch(num_workers=24, input_size=512, batch_size=14) def train_loop(): start_time = time.time() prev_start_time = start_time start = start_time train_stats = TrainingStats(cfg.log_window, keys) #for iter_id, data in enumerate(next(data_generator)): for iter_id in range(100000): data = next(data_generator) #for data in data_list: prev_start_time = start_time start_time = time.time() outs = exe.run(compiled_train_prog, fetch_list=[v.name for v in fetch_list], feed={"input_images": data[0], "input_score_maps": data[2], "input_geo_maps": data[3], "input_training_masks": data[4]}) stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])} train_stats.update(stats) logs = train_stats.log() strs = '{}, batch: {}, lr: {:.5f}, {}, time: {:.3f}'.format( now_time(), iter_id, np.mean(outs[-1]), logs, start_time - prev_start_time) if iter_id % 10 == 0: print(strs) sys.stdout.flush() if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0: save_model(exe, "model_iter{}".format(iter_id), train_prog) if (iter_id + 1) == cfg.max_iter: break end_time = time.time() total_time = end_time - start_time last_loss = np.array(outs[0]).mean() train_loop()