def get_model(args, is_train, main_prog, startup_prog): model = SE_ResNeXt(layers=50) batched_reader = None pyreader = None trainer_count = int(os.getenv("PADDLE_TRAINERS")) dshape = train_parameters["input_size"] with fluid.program_guard(main_prog, startup_prog): with fluid.unique_name.guard(): if args.use_reader_op: pyreader = fluid.layers.py_reader( capacity=10, shapes=([-1] + dshape, (-1, 1)), dtypes=('float32', 'int64'), name="train_reader" if is_train else "test_reader", use_double_buffer=True) input, label = fluid.layers.read_file(pyreader) else: input = fluid.layers.data(name='data', shape=dshape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') out = model.net(input=input) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) optimizer = None if is_train: total_images = 1281167 / trainer_count step = int(total_images / args.batch_size + 1) epochs = [40, 80, 100] bd = [step * e for e in epochs] base_lr = args.learning_rate lr = [] lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] optimizer = fluid.optimizer.Momentum( # learning_rate=base_lr, learning_rate=fluid.layers.piecewise_decay(boundaries=bd, values=lr), momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) optimizer.minimize(avg_cost) if args.memory_optimize: fluid.memory_optimize(main_prog) # config readers if is_train: reader = train() else: reader = val() if not args.use_reader_op: batched_reader = paddle.batch(reader, batch_size=args.batch_size * args.gpus, drop_last=True) else: pyreader.decorate_paddle_reader( paddle.batch(reader, batch_size=args.batch_size)) return avg_cost, optimizer, [acc_top1, acc_top5], batched_reader, pyreader
def search_mobilenetv2(config, args, image_size, is_server=True): if is_server: ### start a server and a client rl_nas = RLNAS(key='lstm', configs=config, is_sync=False, server_addr=(args.server_address, args.port), controller_batch_size=1, controller_decay_steps=1000, controller_decay_rate=0.8, lstm_num_layers=1, hidden_size=10, temperature=1.0) else: ### start a client rl_nas = RLNAS(key='lstm', configs=config, is_sync=False, server_addr=(args.server_address, args.port), lstm_num_layers=1, hidden_size=10, temperature=1.0, controller_batch_size=1, controller_decay_steps=1000, controller_decay_rate=0.8, is_server=False) image_shape = [3, image_size, image_size] for step in range(args.search_steps): archs = rl_nas.next_archs(1)[0][0] train_program = fluid.Program() test_program = fluid.Program() startup_program = fluid.Program() train_loader, avg_cost, acc_top1, acc_top5 = build_program( train_program, startup_program, image_shape, archs, args) test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program( test_program, startup_program, image_shape, archs, args, is_test=True) test_program = test_program.clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_program) if args.data == 'cifar10': train_reader = paddle.fluid.io.batch(paddle.reader.shuffle( paddle.dataset.cifar.train10(cycle=False), buf_size=1024), batch_size=args.batch_size, drop_last=True) test_reader = paddle.fluid.io.batch( paddle.dataset.cifar.test10(cycle=False), batch_size=args.batch_size, drop_last=False) elif args.data == 'imagenet': train_reader = paddle.fluid.io.batch(imagenet_reader.train(), batch_size=args.batch_size, drop_last=True) test_reader = paddle.fluid.io.batch(imagenet_reader.val(), batch_size=args.batch_size, drop_last=False) train_loader.set_sample_list_generator( train_reader, places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places()) test_loader.set_sample_list_generator(test_reader, places=place) build_strategy = fluid.BuildStrategy() train_compiled_program = fluid.CompiledProgram( train_program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy) for epoch_id in range(args.retain_epoch): for batch_id, data in enumerate(train_loader()): fetches = [avg_cost.name] s_time = time.time() outs = exe.run(train_compiled_program, feed=data, fetch_list=fetches)[0] batch_time = time.time() - s_time if batch_id % 10 == 0: _logger.info( 'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms' .format(step, epoch_id, batch_id, outs[0], batch_time)) reward = [] for batch_id, data in enumerate(test_loader()): test_fetches = [ test_avg_cost.name, test_acc_top1.name, test_acc_top5.name ] batch_reward = exe.run(test_program, feed=data, fetch_list=test_fetches) reward_avg = np.mean(np.array(batch_reward), axis=1) reward.append(reward_avg) _logger.info( 'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}' .format(step, batch_id, batch_reward[0], batch_reward[1], batch_reward[2])) finally_reward = np.mean(np.array(reward), axis=0) _logger.info( 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( finally_reward[0], finally_reward[1], finally_reward[2])) rl_nas.reward(np.float32(finally_reward[1]))
def search_mobilenetv2_block(config, args, image_size): image_shape = [3, image_size, image_size] if args.is_server: sa_nas = SANAS(config, server_addr=(args.server_address, args.port), search_steps=args.search_steps, is_server=True) else: sa_nas = SANAS(config, server_addr=(args.server_address, args.port), search_steps=args.search_steps, is_server=False) for step in range(args.search_steps): archs = sa_nas.next_archs()[0] train_program = fluid.Program() test_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(train_program, startup_program): train_loader, data, label = create_data_loader(image_shape) data = conv_bn_layer(input=data, num_filters=32, filter_size=3, stride=2, padding='SAME', act='relu6', name='mobilenetv2_conv1') data = archs(data)[0] data = conv_bn_layer(input=data, num_filters=1280, filter_size=1, stride=1, padding='SAME', act='relu6', name='mobilenetv2_last_conv') data = fluid.layers.pool2d(input=data, pool_size=7, pool_stride=1, pool_type='avg', global_pooling=True, name='mobilenetv2_last_pool') output = fluid.layers.fc( input=data, size=args.class_dim, param_attr=ParamAttr(name='mobilenetv2_fc_weights'), bias_attr=ParamAttr(name='mobilenetv2_fc_offset')) softmax_out = fluid.layers.softmax(input=output, use_cudnn=False) cost = fluid.layers.cross_entropy(input=softmax_out, label=label) avg_cost = fluid.layers.mean(cost) acc_top1 = fluid.layers.accuracy(input=softmax_out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=softmax_out, label=label, k=5) test_program = train_program.clone(for_test=True) optimizer = fluid.optimizer.Momentum( learning_rate=0.1, momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) optimizer.minimize(avg_cost) current_flops = flops(train_program) print('step: {}, current_flops: {}'.format(step, current_flops)) if current_flops > int(321208544): continue place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_program) if args.data == 'cifar10': train_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.cifar.train10(cycle=False), buf_size=1024), batch_size=args.batch_size, drop_last=True) test_reader = paddle.batch( paddle.dataset.cifar.test10(cycle=False), batch_size=args.batch_size, drop_last=False) elif args.data == 'imagenet': train_reader = paddle.batch(imagenet_reader.train(), batch_size=args.batch_size, drop_last=True) test_reader = paddle.batch(imagenet_reader.val(), batch_size=args.batch_size, drop_last=False) test_loader, _, _ = create_data_loader(image_shape) train_loader.set_sample_list_generator( train_reader, places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places()) test_loader.set_sample_list_generator(test_reader, places=place) build_strategy = fluid.BuildStrategy() train_compiled_program = fluid.CompiledProgram( train_program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy) for epoch_id in range(args.retain_epoch): for batch_id, data in enumerate(train_loader()): fetches = [avg_cost.name] s_time = time.time() outs = exe.run(train_compiled_program, feed=data, fetch_list=fetches)[0] batch_time = time.time() - s_time if batch_id % 10 == 0: _logger.info( 'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms' .format(step, epoch_id, batch_id, outs[0], batch_time)) reward = [] for batch_id, data in enumerate(test_loader()): test_fetches = [avg_cost.name, acc_top1.name, acc_top5.name] batch_reward = exe.run(test_program, feed=data, fetch_list=test_fetches) reward_avg = np.mean(np.array(batch_reward), axis=1) reward.append(reward_avg) _logger.info( 'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}' .format(step, batch_id, batch_reward[0], batch_reward[1], batch_reward[2])) finally_reward = np.mean(np.array(reward), axis=0) _logger.info( 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( finally_reward[0], finally_reward[1], finally_reward[2])) sa_nas.reward(float(finally_reward[1]))
def eval(args): # parameters from arguments place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) val_program, feed_names, fetch_targets = fluid.io.load_inference_model( args.model_path, exe, model_filename="__model__.infer", params_filename="__params__") val_reader = paddle.batch(reader.val(), batch_size=128) feeder = fluid.DataFeeder( place=place, feed_list=feed_names, program=val_program) results = [] for batch_id, data in enumerate(val_reader()): image = [[d[0]] for d in data] label = [[d[1]] for d in data] feed_data = feeder.feed(image) pred = exe.run(val_program, feed=feed_data, fetch_list=fetch_targets) pred = np.array(pred[0]) label = np.array(label) sort_array = pred.argsort(axis=1) top_1_pred = sort_array[:, -1:][:, ::-1] top_1 = np.mean(label == top_1_pred) top_5_pred = sort_array[:, -5:][:, ::-1] acc_num = 0 for i in range(len(label)): if label[i][0] in top_5_pred[i]: acc_num += 1 top_5 = acc_num / len(label) results.append([top_1, top_5]) result = np.mean(np.array(results), axis=0) print("top1_acc/top5_acc= {}".format(result)) sys.stdout.flush() _logger.info("freeze the graph for inference") test_graph = IrGraph(core.Graph(val_program.desc), for_test=True) freeze_pass = QuantizationFreezePass( scope=fluid.global_scope(), place=place, weight_quantize_type=args.weight_quant_type) freeze_pass.apply(test_graph) server_program = test_graph.to_program() fluid.io.save_inference_model( dirname=os.path.join(args.save_path, 'float'), feeded_var_names=feed_names, target_vars=fetch_targets, executor=exe, main_program=server_program, model_filename='model', params_filename='weights') _logger.info("convert the weights into int8 type") convert_int8_pass = ConvertToInt8Pass( scope=fluid.global_scope(), place=place) convert_int8_pass.apply(test_graph) server_int8_program = test_graph.to_program() fluid.io.save_inference_model( dirname=os.path.join(args.save_path, 'int8'), feeded_var_names=feed_names, target_vars=fetch_targets, executor=exe, main_program=server_int8_program, model_filename='model', params_filename='weights')
def search_mobilenetv2(config, args, image_size, is_server=True): if is_server: ### start a server and a client rl_nas = RLNAS( key='ddpg', configs=config, is_sync=False, obs_dim=26, ### step + length_of_token server_addr=(args.server_address, args.port)) else: ### start a client rl_nas = RLNAS(key='ddpg', configs=config, is_sync=False, obs_dim=26, server_addr=(args.server_address, args.port), is_server=False) image_shape = [3, image_size, image_size] for step in range(args.search_steps): if step == 0: action_prev = [1. for _ in rl_nas.range_tables] else: action_prev = rl_nas.tokens[0] obs = [step] obs.extend(action_prev) archs = rl_nas.next_archs(obs=obs)[0][0] train_program = fluid.Program() test_program = fluid.Program() startup_program = fluid.Program() train_loader, avg_cost, acc_top1, acc_top5 = build_program( train_program, startup_program, image_shape, archs, args) test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program( test_program, startup_program, image_shape, archs, args, is_test=True) test_program = test_program.clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_program) if args.data == 'cifar10': train_reader = paddle.fluid.io.batch(paddle.reader.shuffle( paddle.dataset.cifar.train10(cycle=False), buf_size=1024), batch_size=args.batch_size, drop_last=True) test_reader = paddle.fluid.io.batch( paddle.dataset.cifar.test10(cycle=False), batch_size=args.batch_size, drop_last=False) elif args.data == 'imagenet': train_reader = paddle.fluid.io.batch(imagenet_reader.train(), batch_size=args.batch_size, drop_last=True) test_reader = paddle.fluid.io.batch(imagenet_reader.val(), batch_size=args.batch_size, drop_last=False) train_loader.set_sample_list_generator( train_reader, places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places()) test_loader.set_sample_list_generator(test_reader, places=place) build_strategy = fluid.BuildStrategy() train_compiled_program = fluid.CompiledProgram( train_program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy) for epoch_id in range(args.retain_epoch): for batch_id, data in enumerate(train_loader()): fetches = [avg_cost.name] s_time = time.time() outs = exe.run(train_compiled_program, feed=data, fetch_list=fetches)[0] batch_time = time.time() - s_time if batch_id % 10 == 0: _logger.info( 'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms' .format(step, epoch_id, batch_id, outs[0], batch_time)) reward = [] for batch_id, data in enumerate(test_loader()): test_fetches = [ test_avg_cost.name, test_acc_top1.name, test_acc_top5.name ] batch_reward = exe.run(test_program, feed=data, fetch_list=test_fetches) reward_avg = np.mean(np.array(batch_reward), axis=1) reward.append(reward_avg) _logger.info( 'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}' .format(step, batch_id, batch_reward[0], batch_reward[1], batch_reward[2])) finally_reward = np.mean(np.array(reward), axis=0) _logger.info( 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( finally_reward[0], finally_reward[1], finally_reward[2])) obs = np.expand_dims(obs, axis=0).astype('float32') actions = rl_nas.tokens obs_next = [step + 1] obs_next.extend(actions[0]) obs_next = np.expand_dims(obs_next, axis=0).astype('float32') if step == args.search_steps - 1: terminal = np.expand_dims([True], axis=0).astype(np.bool) else: terminal = np.expand_dims([False], axis=0).astype(np.bool) rl_nas.reward(np.expand_dims(np.float32(finally_reward[1]), axis=0), obs=obs, actions=actions.astype('float32'), obs_next=obs_next, terminal=terminal) if step == 2: sys.exit(0)
def test_search_result(tokens, image_size, args, config): sa_nas = SANAS(config, server_addr=("", 8887), init_temperature=args.init_temperature, reduce_rate=args.reduce_rate, search_steps=args.search_steps, is_server=True) image_shape = [3, image_size, image_size] archs = sa_nas.tokens2arch(tokens) train_program = fluid.Program() test_program = fluid.Program() startup_program = fluid.Program() train_loader, avg_cost, acc_top1, acc_top5 = build_program( train_program, startup_program, image_shape, archs, args) current_flops = flops(train_program) print('current_flops: {}'.format(current_flops)) test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program( test_program, startup_program, image_shape, archs, args, is_test=True) test_program = test_program.clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_program) if args.data == 'cifar10': train_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.cifar.train10(cycle=False), buf_size=1024), batch_size=args.batch_size, drop_last=True) test_reader = paddle.batch(paddle.dataset.cifar.test10(cycle=False), batch_size=args.batch_size, drop_last=False) elif args.data == 'imagenet': train_reader = paddle.batch(imagenet_reader.train(), batch_size=args.batch_size, drop_last=True) test_reader = paddle.batch(imagenet_reader.val(), batch_size=args.batch_size, drop_last=False) train_loader.set_sample_list_generator( train_reader, places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places()) test_loader.set_sample_list_generator(test_reader, places=place) build_strategy = fluid.BuildStrategy() train_compiled_program = fluid.CompiledProgram( train_program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy) for epoch_id in range(retain_epoch): for batch_id, data in enumerate(train_loader()): fetches = [avg_cost.name] s_time = time.time() outs = exe.run(train_compiled_program, feed=data, fetch_list=fetches)[0] batch_time = time.time() - s_time if batch_id % 10 == 0: _logger.info( 'TRAIN: epoch: {}, batch: {}, cost: {}, batch_time: {}ms'. format(epoch_id, batch_id, outs[0], batch_time)) reward = [] for batch_id, data in enumerate(test_loader()): test_fetches = [ test_avg_cost.name, test_acc_top1.name, test_acc_top5.name ] batch_reward = exe.run(test_program, feed=data, fetch_list=test_fetches) reward_avg = np.mean(np.array(batch_reward), axis=1) reward.append(reward_avg) _logger.info( 'TEST: batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'. format(batch_id, batch_reward[0], batch_reward[1], batch_reward[2])) finally_reward = np.mean(np.array(reward), axis=0) _logger.info( 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( finally_reward[0], finally_reward[1], finally_reward[2]))
def compress(args): class_dim = 1000 image_shape = "3,224,224" image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone(for_test=True) opt = create_optimizer(args) opt.minimize(avg_cost) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: def if_exist(var): exist = os.path.exists( os.path.join(args.pretrained_model, var.name)) print("exist", exist) return exist #fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.batch(reader.val(), batch_size=args.batch_size) train_reader = paddle.batch(reader.train(), batch_size=args.batch_size, drop_last=True) train_feeder = feeder = fluid.DataFeeder([image, label], place) val_feeder = feeder = fluid.DataFeeder([image, label], place, program=val_program) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() print( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 print("Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) def train(epoch, program): build_strategy = fluid.BuildStrategy() exec_strategy = fluid.ExecutionStrategy() train_program = fluid.compiler.CompiledProgram( program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) batch_id = 0 for data in train_reader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n, lr_n = exe.run( train_program, feed=train_feeder.feed(data), fetch_list=[ avg_cost.name, acc_top1.name, acc_top5.name, "learning_rate" ]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) lr_n = np.mean(lr_n) print( "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {};lrn: {}; time: {}" .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, lr_n, end_time - start_time)) batch_id += 1 params = [] for param in fluid.default_main_program().global_block().all_parameters(): #if "_weights" in param.name and "conv1_weights" not in param.name: if "_sep_weights" in param.name: params.append(param.name) print("fops before pruning: {}".format(flops( fluid.default_main_program()))) pruned_program_iter = fluid.default_main_program() pruned_val_program_iter = val_program for ratios in ratiolist: pruner = Pruner() pruned_val_program_iter = pruner.prune(pruned_val_program_iter, fluid.global_scope(), params=params, ratios=ratios, place=place, only_graph=True) pruned_program_iter = pruner.prune(pruned_program_iter, fluid.global_scope(), params=params, ratios=ratios, place=place) print("fops after pruning: {}".format(flops(pruned_program_iter))) """ do not inherit learning rate """ if (os.path.exists(args.pretrained_model + "/learning_rate")): os.remove(args.pretrained_model + "/learning_rate") if (os.path.exists(args.pretrained_model + "/@LR_DECAY_COUNTER@")): os.remove(args.pretrained_model + "/@LR_DECAY_COUNTER@") fluid.io.load_vars(exe, args.pretrained_model, main_program=pruned_program_iter, predicate=if_exist) pruned_program = pruned_program_iter pruned_val_program = pruned_val_program_iter for i in range(args.num_epochs): train(i, pruned_program) test(i, pruned_val_program) save_model(args, exe, pruned_program, pruned_val_program, i)
def compress(args): # add ce if args.enable_ce: SEED = 1 fluid.default_main_program().random_seed = SEED fluid.default_startup_program().random_seed = SEED image_shape = [int(m) for m in args.image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() if args.model == 'ResNet34': model.prefix_name = 'res34' out = model.net(input=image, class_dim=args.class_dim, fc_name='fc_0') else: out = model.net(input=image, class_dim=args.class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) #print("="*50+"student_model_params"+"="*50) #for v in fluid.default_main_program().list_vars(): # print(v.name, v.shape) val_program = fluid.default_main_program().clone() boundaries = [ args.total_images / args.batch_size * 30, args.total_images / args.batch_size * 60, args.total_images / args.batch_size * 90 ] values = [0.1, 0.01, 0.001, 0.0001] opt = fluid.optimizer.Momentum( momentum=0.9, learning_rate=fluid.layers.piecewise_decay(boundaries=boundaries, values=values), regularization=fluid.regularizer.L2Decay(4e-5)) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.batch(reader.val(), batch_size=args.batch_size) val_feed_list = [('image', image.name), ('label', label.name)] val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5', acc_top5.name)] train_reader = paddle.batch(reader.train(), batch_size=args.batch_size, drop_last=True) train_feed_list = [('image', image.name), ('label', label.name)] train_fetch_list = [('loss', avg_cost.name)] teacher_programs = [] distiller_optimizer = None teacher_model = models.__dict__[args.teacher_model](prefix_name='res50') # define teacher program teacher_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(teacher_program, startup_program): img = teacher_program.global_block()._clone_variable( image, force_persistable=False) predict = teacher_model.net(img, class_dim=args.class_dim, fc_name='fc_0') #print("="*50+"teacher_model_params"+"="*50) #for v in teacher_program.list_vars(): # print(v.name, v.shape) #return exe.run(startup_program) assert args.teacher_pretrained_model and os.path.exists( args.teacher_pretrained_model ), "teacher_pretrained_model should be set when teacher_model is not None." def if_exist(var): return os.path.exists( os.path.join(args.teacher_pretrained_model, var.name)) fluid.io.load_vars(exe, args.teacher_pretrained_model, main_program=teacher_program, predicate=if_exist) distiller_optimizer = opt teacher_programs.append(teacher_program.clone(for_test=True)) com_pass = Compressor(place, fluid.global_scope(), fluid.default_main_program(), train_reader=train_reader, train_feed_list=train_feed_list, train_fetch_list=train_fetch_list, eval_program=val_program, eval_reader=val_reader, eval_feed_list=val_feed_list, eval_fetch_list=val_fetch_list, teacher_programs=teacher_programs, save_eval_model=True, prune_infer_model=[[image.name], [out.name]], train_optimizer=opt, distiller_optimizer=distiller_optimizer) com_pass.config(args.compress_config) com_pass.run()
def compress(args): image_shape = "3,224,224" image_shape = [int(m) for m in image_shape.split(",")] image = fluid.data(name='image', shape=[None] + image_shape, dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=1000) # print(out) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone() # quantization usually use small learning rate values = [1e-4, 1e-5] opt = fluid.optimizer.Momentum( momentum=0.9, learning_rate=fluid.layers.piecewise_decay(boundaries=[5000 * 12], values=values), regularization=fluid.regularizer.L2Decay(1e-4)) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: assert os.path.exists( args.pretrained_model), "pretrained_model path doesn't exist" def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.batch(reader.val(), batch_size=args.batch_size) val_feed_list = [('image', image.name), ('label', label.name)] val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5', acc_top5.name)] train_reader = paddle.batch(reader.train(), batch_size=args.batch_size, drop_last=True) train_feed_list = [('image', image.name), ('label', label.name)] train_fetch_list = [('loss', avg_cost.name)] com_pass = Compressor(place, fluid.global_scope(), fluid.default_main_program(), train_reader=train_reader, train_feed_list=train_feed_list, train_fetch_list=train_fetch_list, eval_program=val_program, eval_reader=val_reader, eval_feed_list=val_feed_list, eval_fetch_list=val_fetch_list, teacher_programs=[], train_optimizer=opt, prune_infer_model=[[image.name], [out.name]], distiller_optimizer=None) com_pass.config(args.config_file) com_pass.run() conv_op_num = 0 fake_quant_op_num = 0 for op in com_pass.context.eval_graph.ops(): if op._op.type == 'conv2d': conv_op_num += 1 elif op._op.type.startswith('fake_quantize'): fake_quant_op_num += 1 print('conv op num {}'.format(conv_op_num)) print('fake quant op num {}'.format(fake_quant_op_num))
def compress(args): train_reader = None test_reader = None if args.data == "mnist": train_reader = paddle.dataset.mnist.train() val_reader = paddle.dataset.mnist.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = paddle.static.data(name='image', shape=[None] + image_shape, dtype='float32') label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label) avg_cost = paddle.mean(x=cost) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) val_program = paddle.static.default_main_program().clone(for_test=True) opt = create_optimizer(args) opt.minimize(avg_cost) places = paddle.static.cuda_places( ) if args.use_gpu else paddle.static.cpu_places() place = places[0] exe = paddle.static.Executor(place) exe.run(paddle.static.default_startup_program()) if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) _logger.info("Load pretrained model from {}".format( args.pretrained_model)) paddle.fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.batch(val_reader, batch_size=args.batch_size) train_reader = paddle.batch(train_reader, batch_size=args.batch_size, drop_last=True) train_loader = paddle.io.DataLoader.from_generator( feed_list=[image, label], capacity=64, use_double_buffer=True, iterable=True) valid_loader = paddle.io.DataLoader.from_generator( feed_list=[image, label], capacity=64, use_double_buffer=True, iterable=True) train_loader.set_sample_list_generator(train_reader, places) valid_loader.set_sample_list_generator(val_reader, place) def test(epoch, program): acc_top1_ns = [] acc_top5_ns = [] for batch_id, data in enumerate(valid_loader): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=data, fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) def train(epoch, program): build_strategy = paddle.static.BuildStrategy() exec_strategy = paddle.static.ExecutionStrategy() train_program = paddle.static.CompiledProgram( program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) for batch_id, data in enumerate(train_loader): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( train_program, feed=data, fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) batch_id += 1 test(0, val_program) params = get_pruned_params(args, paddle.static.default_main_program()) _logger.info("FLOPs before pruning: {}".format( flops(paddle.static.default_main_program()))) pruner = Pruner(args.criterion) pruned_val_program, _, _ = pruner.prune(val_program, paddle.static.global_scope(), params=params, ratios=[args.pruned_ratio] * len(params), place=place, only_graph=True) pruned_program, _, _ = pruner.prune(paddle.static.default_main_program(), paddle.static.global_scope(), params=params, ratios=[args.pruned_ratio] * len(params), place=place) _logger.info("FLOPs after pruning: {}".format(flops(pruned_program))) for i in range(args.num_epochs): train(i, pruned_program) if i % args.test_period == 0: test(i, pruned_val_program) save_model(exe, pruned_val_program, os.path.join(args.model_path, str(i))) if args.save_inference: infer_model_path = os.path.join(args.model_path, "infer_models", str(i)) paddle.fluid.io.save_inference_model(infer_model_path, ["image"], [out], exe, pruned_val_program) _logger.info( "Saved inference model into [{}]".format(infer_model_path))