def build_program(args, is_train, main_prog, startup_prog, py_reader_startup_prog, sz, trn_dir, bs, min_scale, rect_val=False): dshape = [3, sz, sz] class_dim = 1000 pyreader = None with fluid.program_guard(main_prog, startup_prog): with fluid.unique_name.guard(): if is_train: with fluid.program_guard(main_prog, py_reader_startup_prog): with fluid.unique_name.guard(): pyreader = fluid.layers.py_reader( capacity=bs * DEVICE_NUM, shapes=([-1] + dshape, (-1, 1)), dtypes=('uint8', 'int64'), name="train_reader_" + str(sz) if is_train else "test_reader_" + str(sz), use_double_buffer=True) input, label = fluid.layers.read_file(pyreader) else: input = fluid.data( name="image", shape=[None, 3, 244, 244], dtype="uint8") label = fluid.data(name="label", shape=[None, 1], dtype="int64") cast_img_type = "float16" if args.fp16 else "float32" cast = fluid.layers.cast(input, cast_img_type) img_mean = fluid.layers.create_global_var( [3, 1, 1], 0.0, cast_img_type, name="img_mean", persistable=True) img_std = fluid.layers.create_global_var( [3, 1, 1], 0.0, cast_img_type, name="img_std", persistable=True) # image = (image - (mean * 255.0)) / (std * 255.0) t1 = fluid.layers.elementwise_sub(cast, img_mean, axis=1) t2 = fluid.layers.elementwise_div(t1, img_std, axis=1) model = FastImageNet(is_train=is_train) predict = model.net(t2, class_dim=class_dim, img_size=sz) cost, pred = fluid.layers.softmax_with_cross_entropy( predict, label, return_softmax=True) if args.scale_loss > 1: avg_cost = fluid.layers.mean(x=cost) * float(args.scale_loss) else: avg_cost = fluid.layers.mean(x=cost) batch_acc1 = fluid.layers.accuracy(input=pred, label=label, k=1) batch_acc5 = fluid.layers.accuracy(input=pred, label=label, k=5) # configure optimize optimizer = None if is_train: total_images = args.total_images lr = args.lr epochs = [(0, 7), (7, 13), (13, 22), (22, 25), (25, 28)] bs_epoch = [bs * DEVICE_NUM for bs in [224, 224, 96, 96, 50]] bs_scale = [bs * 1.0 / bs_epoch[0] for bs in bs_epoch] lrs = [(lr, lr * 2), (lr * 2, lr / 4), (lr * bs_scale[2], lr / 10 * bs_scale[2]), (lr / 10 * bs_scale[2], lr / 100 * bs_scale[2]), (lr / 100 * bs_scale[4], lr / 1000 * bs_scale[4]), lr / 1000 * bs_scale[4]] boundaries, values = lr_decay(lrs, epochs, bs_epoch, total_images) optimizer = fluid.optimizer.Momentum( learning_rate=fluid.layers.piecewise_decay( boundaries=boundaries, values=values), momentum=0.9) if args.fp16: params_grads = optimizer.backward(avg_cost) master_params_grads = utils.create_master_params_grads( params_grads, main_prog, startup_prog, args.scale_loss) optimizer.apply_gradients(master_params_grads) utils.master_param_to_train_param(master_params_grads, params_grads, main_prog) else: optimizer.minimize(avg_cost) return avg_cost, optimizer, [batch_acc1, batch_acc5], pyreader
def build_program(is_train, main_prog, startup_prog, args): pyreader = None class_dim = args.class_dim image_shape = [int(m) for m in args.image_shape.split(",")] trainer_count = args.dist_env["num_trainers"] device_num_per_worker = get_device_num() with fluid.program_guard(main_prog, startup_prog): pyreader = fluid.layers.py_reader( capacity=16, shapes=([-1] + image_shape, (-1, 1)), dtypes=('float32', 'int64'), name="train_reader" if is_train else "test_reader", use_double_buffer=True) with fluid.unique_name.guard(): image, label = fluid.layers.read_file(pyreader) if args.fp16: image = fluid.layers.cast(image, "float16") model_def = models.__dict__[args.model](layers=50, is_train=is_train) predict = model_def.net(image, class_dim=class_dim) cost, pred = fluid.layers.softmax_with_cross_entropy( predict, label, return_softmax=True) if args.scale_loss > 1: avg_cost = fluid.layers.mean(x=cost) * float(args.scale_loss) else: avg_cost = fluid.layers.mean(x=cost) batch_acc1 = fluid.layers.accuracy(input=pred, label=label, k=1) batch_acc5 = fluid.layers.accuracy(input=pred, label=label, k=5) optimizer = None if is_train: start_lr = args.lr end_lr = args.lr * trainer_count * args.multi_batch_repeat if os.getenv("FLAGS_selected_gpus"): # in multi process mode, "trainer_count" will be total devices # in the whole cluster, and we need to scale num_of nodes. end_lr /= device_num_per_worker total_images = args.total_images / trainer_count step = int(total_images / (args.batch_size * args.multi_batch_repeat) + 1) warmup_steps = step * 5 # warmup 5 passes epochs = [30, 60, 80] bd = [step * e for e in epochs] base_lr = end_lr lr = [] lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] print("start lr: %s, end lr: %s, decay boundaries: %s" % (start_lr, end_lr, bd)) # NOTE: we put weight decay in layers config, and remove # weight decay on bn layers, so don't add weight decay in # optimizer config. optimizer = fluid.optimizer.Momentum( learning_rate=utils.learning_rate.lr_warmup( fluid.layers.piecewise_decay(boundaries=bd, values=lr), warmup_steps, start_lr, end_lr), momentum=0.9) if args.fp16: params_grads = optimizer.backward(avg_cost) master_params_grads = utils.create_master_params_grads( params_grads, main_prog, startup_prog, args.scale_loss, reduce_master_grad=args.reduce_master_grad) optimizer.apply_gradients(master_params_grads) utils.master_param_to_train_param(master_params_grads, params_grads, main_prog) else: optimizer.minimize(avg_cost) # prepare reader for current program prepare_reader(is_train, pyreader, args) return pyreader, avg_cost, batch_acc1, batch_acc5
def get_model(args, is_train, main_prog, startup_prog, py_reader_startup_prog, sz, trn_dir, bs, min_scale, rect_val=False): _, reader, dshape, class_dim = _model_reader_dshape_classdim( args, is_train, val_bs=bs * args.gpus, sz=sz, trn_dir=trn_dir, min_scale=min_scale, rect_val=rect_val) pyreader = None batched_reader = None trainer_count = int(os.getenv("PADDLE_TRAINERS_NUM", "1")) with fluid.program_guard(main_prog, startup_prog): with fluid.unique_name.guard(): if is_train: with fluid.program_guard(main_prog, py_reader_startup_prog): with fluid.unique_name.guard(): pyreader = fluid.layers.py_reader( capacity=bs * args.gpus, shapes=([-1] + dshape, (-1, 1)), dtypes=('uint8', 'int64'), name="train_reader_" + str(sz) if is_train else "test_reader_" + str(sz), use_double_buffer=True) input, label = fluid.layers.read_file(pyreader) pyreader.decorate_paddle_reader( paddle.batch(reader, batch_size=bs)) else: input = fluid.layers.data(name="image", shape=[3, 244, 244], dtype="uint8") label = fluid.layers.data(name="label", shape=[1], dtype="int64") batched_reader = paddle.batch(reader, batch_size=bs * args.gpus) cast_img_type = "float16" if args.fp16 else "float32" cast = fluid.layers.cast(input, cast_img_type) img_mean = fluid.layers.create_global_var([3, 1, 1], 0.0, cast_img_type, name="img_mean", persistable=True) img_std = fluid.layers.create_global_var([3, 1, 1], 0.0, cast_img_type, name="img_std", persistable=True) # image = (image - (mean * 255.0)) / (std * 255.0) t1 = fluid.layers.elementwise_sub(cast, img_mean, axis=1) t2 = fluid.layers.elementwise_div(t1, img_std, axis=1) model = ResNet(is_train=is_train) predict = model.net(t2, class_dim=class_dim, img_size=sz) cost, pred = fluid.layers.softmax_with_cross_entropy( predict, label, return_softmax=True) if args.scale_loss > 1: avg_cost = fluid.layers.mean(x=cost) * float(args.scale_loss) else: avg_cost = fluid.layers.mean(x=cost) batch_acc1 = fluid.layers.accuracy(input=pred, label=label, k=1) batch_acc5 = fluid.layers.accuracy(input=pred, label=label, k=5) # configure optimize optimizer = None if is_train: total_images = 1281167 / trainer_count epochs = [(0, 7), (7, 13), (13, 22), (22, 25), (25, 28)] bs_epoch = [224, 224, 96, 96, 50] lrs = [(1.0, 2.0), (2.0, 0.25), (0.42857142857142855, 0.04285714285714286), (0.04285714285714286, 0.004285714285714286), (0.0022321428571428575, 0.00022321428571428573), 0.00022321428571428573] boundaries, values = lr_decay(lrs, epochs, bs_epoch, total_images) print("lr linear decay boundaries: ", boundaries, " \nvalues: ", values) optimizer = fluid.optimizer.Momentum( learning_rate=fluid.layers.piecewise_decay( boundaries=boundaries, values=values), momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) if args.fp16: params_grads = optimizer.backward(avg_cost) master_params_grads = utils.create_master_params_grads( params_grads, main_prog, startup_prog, args.scale_loss) optimizer.apply_gradients(master_params_grads) utils.master_param_to_train_param(master_params_grads, params_grads, main_prog) else: optimizer.minimize(avg_cost) if args.memory_optimize: fluid.memory_optimize(main_prog, skip_grads=True) return avg_cost, optimizer, [ batch_acc1, batch_acc5 ], batched_reader, pyreader, py_reader_startup_prog