def _model_reader_dshape_classdim(args, is_train): model = None reader = None if args.data_set == "flowers": class_dim = 102 if args.data_format == 'NCHW': dshape = [3, 224, 224] else: dshape = [224, 224, 3] if is_train: reader = paddle.dataset.flowers.train() else: reader = paddle.dataset.flowers.test() elif args.data_set == "imagenet": class_dim = 1000 if args.data_format == 'NCHW': dshape = [3, 224, 224] else: dshape = [224, 224, 3] if not args.data_path: raise Exception( "Must specify --data_path when training with imagenet") if not args.use_reader_op: if is_train: reader = train() else: reader = val() else: if is_train: reader = train(xmap=False) else: reader = val(xmap=False) return reader, dshape, class_dim
def quantize(args): place = paddle.CUDAPlace(0) if args.use_gpu else paddle.CPUPlace() assert os.path.exists(args.model_path), "args.model_path doesn't exist" assert os.path.isdir(args.model_path), "args.model_path must be a dir" def reader_generator(imagenet_reader): def gen(): for i, data in enumerate(imagenet_reader()): image, label = data image = np.expand_dims(image, axis=0) yield image return gen exe = paddle.static.Executor(place) quant_post_hpo( exe, place, args.model_path, args.save_path, train_sample_generator=reader_generator(reader.train()), eval_sample_generator=reader_generator(reader.val()), model_filename=args.model_filename, params_filename=args.params_filename, save_model_filename='__model__', save_params_filename='__params__', quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"], weight_quantize_type='channel_wise_abs_max', runcount_limit=args.max_model_quant_count)
def compress(args): # add ce if args.enable_ce: SEED = 1 fluid.default_main_program().random_seed = SEED fluid.default_startup_program().random_seed = SEED class_dim = 1000 image_shape = "3,224,224" image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone() opt = create_optimizer(args) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.batch(reader.val(), batch_size=args.batch_size) val_feed_list = [('image', image.name), ('label', label.name)] val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5', acc_top5.name)] train_reader = paddle.batch(reader.train(), batch_size=args.batch_size, drop_last=True) train_feed_list = [('image', image.name), ('label', label.name)] train_fetch_list = [('loss', avg_cost.name)] com_pass = Compressor(place, fluid.global_scope(), fluid.default_main_program(), train_reader=train_reader, train_feed_list=train_feed_list, train_fetch_list=train_fetch_list, eval_program=val_program, eval_reader=val_reader, eval_feed_list=val_feed_list, eval_fetch_list=val_fetch_list, save_eval_model=True, prune_infer_model=[[image.name], [out.name]], train_optimizer=opt) com_pass.config(args.config_file) com_pass.run()
def eval(args): train_reader = None test_reader = None if args.data == "mnist": val_reader = paddle.dataset.mnist.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = paddle.static.data(name='image', shape=[None] + image_shape, dtype='float32') label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) val_program = paddle.static.default_main_program().clone(for_test=True) place = paddle.CUDAPlace(0) if args.use_gpu else paddle.CPUPlace() exe = paddle.static.Executor(place) exe.run(paddle.static.default_startup_program()) val_reader = paddle.batch(val_reader, batch_size=args.batch_size) valid_loader = paddle.io.DataLoader.from_generator( feed_list=[image, label], capacity=64, use_double_buffer=True, iterable=True) valid_loader.set_sample_list_generator(val_reader, place) load_model(exe, val_program, args.model_path) acc_top1_ns = [] acc_top5_ns = [] for batch_id, data in enumerate(valid_loader): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( val_program, feed=data, fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval batch[{}] - acc_top1: {}; acc_top5: {}; time: {}".format( batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) _logger.info("Final eval - acc_top1: {}; acc_top5: {}".format( np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns))))
def quantize(args): val_reader = reader.train() place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() assert os.path.exists(args.model_path), "args.model_path doesn't exist" assert os.path.isdir(args.model_path), "args.model_path must be a dir" exe = fluid.Executor(place) quant_post(executor=exe, model_dir=args.model_path, quantize_model_path=args.save_path, sample_generator=val_reader, model_filename=args.model_filename, params_filename=args.params_filename, batch_size=args.batch_size, batch_nums=args.batch_num)
def quantize(args): place = paddle.CUDAPlace(0) if args.use_gpu else paddle.CPUPlace() #place = paddle.CPUPlace() exe = paddle.static.Executor(place) quant_config = { 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max', 'not_quant_pattern': ['skip_quant'], 'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'] } train_config={ "num_epoch": args.num_epoch, # training epoch num "max_iter": -1, "save_iter_step": args.save_iter_step, "learning_rate": args.learning_rate, "weight_decay": args.weight_decay, "use_pact": args.use_pact, "quant_model_ckpt_path":args.checkpoint_path, "teacher_model_path_prefix": args.teacher_model_path_prefix, "model_path_prefix": args.model_path_prefix, "distill_node_pair": args.distill_node_name_list } def test_callback(compiled_test_program, feed_names, fetch_list, checkpoint_name): ret = eval(exe, place, compiled_test_program, feed_names, fetch_list) print("{0} top1_acc/top5_acc= {1}".format(checkpoint_name, ret)) train_reader = paddle.batch(reader.train(), batch_size=args.batch_size) def train_reader_wrapper(): def gen(): for i, data in enumerate(train_reader()): imgs = np.float32([item[0] for item in data]) yield {"x":imgs} return gen quant_aware_with_infermodel( exe, place, scope=None, train_reader=train_reader_wrapper(), quant_config=quant_config, train_config=train_config, test_callback=test_callback)
def quantize(args): val_reader = reader.train() place = paddle.CUDAPlace(0) if args.use_gpu else paddle.CPUPlace() assert os.path.exists(args.model_path), "args.model_path doesn't exist" assert os.path.isdir(args.model_path), "args.model_path must be a dir" exe = paddle.static.Executor(place) quant_post_static(executor=exe, model_dir=args.model_path, quantize_model_path=args.save_path, sample_generator=val_reader, model_filename=args.model_filename, params_filename=args.params_filename, batch_size=args.batch_size, batch_nums=args.batch_num, algo=args.algo, hist_percent=args.hist_percent, bias_correction=args.bias_correction)
result = [np.mean(r) for r in result] results.append(result) if batch_id % 5000 == 0: print('Eval iter: ', batch_id) result = np.mean(np.array(results), axis=0) return result[0] if __name__ == '__main__': args = parser.parse_args() print_arguments(args) paddle.enable_static() compress_config, train_config = load_config(args.config_path) data_dir = args.data_dir train_reader = paddle.batch(reader.train(data_dir=data_dir), batch_size=args.batch_size) train_dataloader = reader_wrapper(train_reader) ac = AutoCompression( model_dir=args.model_dir, model_filename=args.model_filename, params_filename=args.params_filename, save_dir=args.save_dir, strategy_config=compress_config, train_config=train_config, train_dataloader=train_dataloader, eval_callback=eval_function if 'HyperParameterOptimization' not in compress_config else reader_wrapper(eval_reader(data_dir, 64)), devices=args.devices)
def search_mobilenetv2_block(config, args, image_size): image_shape = [3, image_size, image_size] if args.is_server: sa_nas = SANAS(config, server_addr=(args.server_address, args.port), search_steps=args.search_steps, is_server=True) else: sa_nas = SANAS(config, server_addr=(args.server_address, args.port), search_steps=args.search_steps, is_server=False) for step in range(args.search_steps): archs = sa_nas.next_archs()[0] train_program = fluid.Program() test_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(train_program, startup_program): train_loader, data, label = create_data_loader(image_shape) data = conv_bn_layer(input=data, num_filters=32, filter_size=3, stride=2, padding='SAME', act='relu6', name='mobilenetv2_conv1') data = archs(data)[0] data = conv_bn_layer(input=data, num_filters=1280, filter_size=1, stride=1, padding='SAME', act='relu6', name='mobilenetv2_last_conv') data = fluid.layers.pool2d(input=data, pool_size=7, pool_stride=1, pool_type='avg', global_pooling=True, name='mobilenetv2_last_pool') output = fluid.layers.fc( input=data, size=args.class_dim, param_attr=ParamAttr(name='mobilenetv2_fc_weights'), bias_attr=ParamAttr(name='mobilenetv2_fc_offset')) softmax_out = fluid.layers.softmax(input=output, use_cudnn=False) cost = fluid.layers.cross_entropy(input=softmax_out, label=label) avg_cost = fluid.layers.mean(cost) acc_top1 = fluid.layers.accuracy(input=softmax_out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=softmax_out, label=label, k=5) test_program = train_program.clone(for_test=True) optimizer = fluid.optimizer.Momentum( learning_rate=0.1, momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) optimizer.minimize(avg_cost) current_flops = flops(train_program) print('step: {}, current_flops: {}'.format(step, current_flops)) if current_flops > int(321208544): continue place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_program) if args.data == 'cifar10': train_reader = paddle.fluid.io.batch(paddle.reader.shuffle( paddle.dataset.cifar.train10(cycle=False), buf_size=1024), batch_size=args.batch_size, drop_last=True) test_reader = paddle.fluid.io.batch( paddle.dataset.cifar.test10(cycle=False), batch_size=args.batch_size, drop_last=False) elif args.data == 'imagenet': train_reader = paddle.fluid.io.batch(imagenet_reader.train(), batch_size=args.batch_size, drop_last=True) test_reader = paddle.fluid.io.batch(imagenet_reader.val(), batch_size=args.batch_size, drop_last=False) test_loader, _, _ = create_data_loader(image_shape) train_loader.set_sample_list_generator( train_reader, places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places()) test_loader.set_sample_list_generator(test_reader, places=place) build_strategy = fluid.BuildStrategy() train_compiled_program = fluid.CompiledProgram( train_program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy) for epoch_id in range(args.retain_epoch): for batch_id, data in enumerate(train_loader()): fetches = [avg_cost.name] s_time = time.time() outs = exe.run(train_compiled_program, feed=data, fetch_list=fetches)[0] batch_time = time.time() - s_time if batch_id % 10 == 0: _logger.info( 'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms' .format(step, epoch_id, batch_id, outs[0], batch_time)) reward = [] for batch_id, data in enumerate(test_loader()): test_fetches = [avg_cost.name, acc_top1.name, acc_top5.name] batch_reward = exe.run(test_program, feed=data, fetch_list=test_fetches) reward_avg = np.mean(np.array(batch_reward), axis=1) reward.append(reward_avg) _logger.info( 'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}' .format(step, batch_id, batch_reward[0], batch_reward[1], batch_reward[2])) finally_reward = np.mean(np.array(reward), axis=0) _logger.info( 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( finally_reward[0], finally_reward[1], finally_reward[2])) sa_nas.reward(float(finally_reward[1]))
def search_mobilenetv2(config, args, image_size, is_server=True): if is_server: ### start a server and a client rl_nas = RLNAS( key='ddpg', configs=config, is_sync=False, obs_dim=26, ### step + length_of_token server_addr=(args.server_address, args.port)) else: ### start a client rl_nas = RLNAS(key='ddpg', configs=config, is_sync=False, obs_dim=26, server_addr=(args.server_address, args.port), is_server=False) image_shape = [3, image_size, image_size] for step in range(args.search_steps): if step == 0: action_prev = [1. for _ in rl_nas.range_tables] else: action_prev = rl_nas.tokens[0] obs = [step] obs.extend(action_prev) archs = rl_nas.next_archs(obs=obs)[0][0] train_program = fluid.Program() test_program = fluid.Program() startup_program = fluid.Program() train_loader, avg_cost, acc_top1, acc_top5 = build_program( train_program, startup_program, image_shape, archs, args) test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program( test_program, startup_program, image_shape, archs, args, is_test=True) test_program = test_program.clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_program) if args.data == 'cifar10': train_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.cifar.train10(cycle=False), buf_size=1024), batch_size=args.batch_size, drop_last=True) test_reader = paddle.batch( paddle.dataset.cifar.test10(cycle=False), batch_size=args.batch_size, drop_last=False) elif args.data == 'imagenet': train_reader = paddle.batch(imagenet_reader.train(), batch_size=args.batch_size, drop_last=True) test_reader = paddle.batch(imagenet_reader.val(), batch_size=args.batch_size, drop_last=False) train_loader.set_sample_list_generator( train_reader, places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places()) test_loader.set_sample_list_generator(test_reader, places=place) build_strategy = fluid.BuildStrategy() train_compiled_program = fluid.CompiledProgram( train_program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy) for epoch_id in range(args.retain_epoch): for batch_id, data in enumerate(train_loader()): fetches = [avg_cost.name] s_time = time.time() outs = exe.run(train_compiled_program, feed=data, fetch_list=fetches)[0] batch_time = time.time() - s_time if batch_id % 10 == 0: _logger.info( 'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms' .format(step, epoch_id, batch_id, outs[0], batch_time)) reward = [] for batch_id, data in enumerate(test_loader()): test_fetches = [ test_avg_cost.name, test_acc_top1.name, test_acc_top5.name ] batch_reward = exe.run(test_program, feed=data, fetch_list=test_fetches) reward_avg = np.mean(np.array(batch_reward), axis=1) reward.append(reward_avg) _logger.info( 'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}' .format(step, batch_id, batch_reward[0], batch_reward[1], batch_reward[2])) finally_reward = np.mean(np.array(reward), axis=0) _logger.info( 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( finally_reward[0], finally_reward[1], finally_reward[2])) obs = np.expand_dims(obs, axis=0).astype('float32') actions = rl_nas.tokens obs_next = [step + 1] obs_next.extend(actions[0]) obs_next = np.expand_dims(obs_next, axis=0).astype('float32') if step == args.search_steps - 1: terminal = np.expand_dims([True], axis=0).astype(np.bool) else: terminal = np.expand_dims([False], axis=0).astype(np.bool) rl_nas.reward(np.expand_dims(np.float32(finally_reward[1]), axis=0), obs=obs, actions=actions.astype('float32'), obs_next=obs_next, terminal=terminal) if step == 2: sys.exit(0)
def compress(args): class_dim = 1000 image_shape = "3,224,224" image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone(for_test=True) opt = create_optimizer(args) opt.minimize(avg_cost) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: def if_exist(var): exist = os.path.exists( os.path.join(args.pretrained_model, var.name)) print("exist", exist) return exist #fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.fluid.io.batch(reader.val(), batch_size=args.batch_size) train_reader = paddle.fluid.io.batch(reader.train(), batch_size=args.batch_size, drop_last=True) train_feeder = feeder = fluid.DataFeeder([image, label], place) val_feeder = feeder = fluid.DataFeeder([image, label], place, program=val_program) def test(epoch, program): batch_id = 0 acc_top1_ns = [] acc_top5_ns = [] for data in val_reader(): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=train_feeder.feed(data), fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() print( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) batch_id += 1 print("Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) def train(epoch, program): build_strategy = fluid.BuildStrategy() exec_strategy = fluid.ExecutionStrategy() train_program = fluid.compiler.CompiledProgram( program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) batch_id = 0 for data in train_reader(): start_time = time.time() loss_n, acc_top1_n, acc_top5_n, lr_n = exe.run( train_program, feed=train_feeder.feed(data), fetch_list=[ avg_cost.name, acc_top1.name, acc_top5.name, "learning_rate" ]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) lr_n = np.mean(lr_n) print( "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {};lrn: {}; time: {}" .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, lr_n, end_time - start_time)) batch_id += 1 params = [] for param in fluid.default_main_program().global_block().all_parameters(): #if "_weights" in param.name and "conv1_weights" not in param.name: if "_sep_weights" in param.name: params.append(param.name) print("fops before pruning: {}".format(flops( fluid.default_main_program()))) pruned_program_iter = fluid.default_main_program() pruned_val_program_iter = val_program for ratios in ratiolist: pruner = Pruner() pruned_val_program_iter = pruner.prune(pruned_val_program_iter, fluid.global_scope(), params=params, ratios=ratios, place=place, only_graph=True) pruned_program_iter = pruner.prune(pruned_program_iter, fluid.global_scope(), params=params, ratios=ratios, place=place) print("fops after pruning: {}".format(flops(pruned_program_iter))) """ do not inherit learning rate """ if (os.path.exists(args.pretrained_model + "/learning_rate")): os.remove(args.pretrained_model + "/learning_rate") if (os.path.exists(args.pretrained_model + "/@LR_DECAY_COUNTER@")): os.remove(args.pretrained_model + "/@LR_DECAY_COUNTER@") fluid.io.load_vars(exe, args.pretrained_model, main_program=pruned_program_iter, predicate=if_exist) pruned_program = pruned_program_iter pruned_val_program = pruned_val_program_iter for i in range(args.num_epochs): train(i, pruned_program) test(i, pruned_val_program) save_model(args, exe, pruned_program, pruned_val_program, i)
def search_mobilenetv2(config, args, image_size, is_server=True): if is_server: ### start a server and a client sa_nas = SANAS(config, server_addr=(args.server_address, args.port), search_steps=args.search_steps, is_server=True) else: ### start a client sa_nas = SANAS(config, server_addr=(args.server_address, args.port), search_steps=args.search_steps, is_server=False) image_shape = [3, image_size, image_size] for step in range(args.search_steps): archs = sa_nas.next_archs()[0] train_program = fluid.Program() test_program = fluid.Program() startup_program = fluid.Program() train_loader, avg_cost, acc_top1, acc_top5 = build_program( train_program, startup_program, image_shape, archs, args) current_flops = flops(train_program) print('step: {}, current_flops: {}'.format(step, current_flops)) if current_flops > int(321208544): continue test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program( test_program, startup_program, image_shape, archs, args, is_test=True) test_program = test_program.clone(for_test=True) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_program) if args.data == 'cifar10': train_reader = paddle.batch(paddle.reader.shuffle( paddle.dataset.cifar.train10(cycle=False), buf_size=1024), batch_size=args.batch_size, drop_last=True) test_reader = paddle.batch( paddle.dataset.cifar.test10(cycle=False), batch_size=args.batch_size, drop_last=False) elif args.data == 'imagenet': train_reader = paddle.batch(imagenet_reader.train(), batch_size=args.batch_size, drop_last=True) test_reader = paddle.batch(imagenet_reader.val(), batch_size=args.batch_size, drop_last=False) train_loader.set_sample_list_generator( train_reader, places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places()) test_loader.set_sample_list_generator(test_reader, places=place) build_strategy = fluid.BuildStrategy() train_compiled_program = fluid.CompiledProgram( train_program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy) for epoch_id in range(args.retain_epoch): for batch_id, data in enumerate(train_loader()): fetches = [avg_cost.name] s_time = time.time() outs = exe.run(train_compiled_program, feed=data, fetch_list=fetches)[0] batch_time = time.time() - s_time if batch_id % 10 == 0: _logger.info( 'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms' .format(step, epoch_id, batch_id, outs[0], batch_time)) reward = [] for batch_id, data in enumerate(test_loader()): test_fetches = [ test_avg_cost.name, test_acc_top1.name, test_acc_top5.name ] batch_reward = exe.run(test_program, feed=data, fetch_list=test_fetches) reward_avg = np.mean(np.array(batch_reward), axis=1) reward.append(reward_avg) _logger.info( 'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}' .format(step, batch_id, batch_reward[0], batch_reward[1], batch_reward[2])) finally_reward = np.mean(np.array(reward), axis=0) _logger.info( 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( finally_reward[0], finally_reward[1], finally_reward[2])) sa_nas.reward(float(finally_reward[1]))
def get_model(args, is_train, main_prog, startup_prog): model = SE_ResNeXt(layers=50) batched_reader = None pyreader = None trainer_count = int(os.getenv("PADDLE_TRAINERS")) dshape = train_parameters["input_size"] with fluid.program_guard(main_prog, startup_prog): with fluid.unique_name.guard(): if args.use_reader_op: pyreader = fluid.layers.py_reader( capacity=10, shapes=([-1] + dshape, (-1, 1)), dtypes=('float32', 'int64'), name="train_reader" if is_train else "test_reader", use_double_buffer=True) input, label = fluid.layers.read_file(pyreader) else: input = fluid.layers.data(name='data', shape=dshape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') out = model.net(input=input) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) optimizer = None if is_train: total_images = 1281167 / trainer_count step = int(total_images / args.batch_size + 1) epochs = [40, 80, 100] bd = [step * e for e in epochs] base_lr = args.learning_rate lr = [] lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] optimizer = fluid.optimizer.Momentum( # learning_rate=base_lr, learning_rate=fluid.layers.piecewise_decay(boundaries=bd, values=lr), momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-4)) optimizer.minimize(avg_cost) if args.memory_optimize: fluid.memory_optimize(main_prog) # config readers if is_train: reader = train() else: reader = val() if not args.use_reader_op: batched_reader = paddle.batch(reader, batch_size=args.batch_size * args.gpus, drop_last=True) else: pyreader.decorate_paddle_reader( paddle.batch(reader, batch_size=args.batch_size)) return avg_cost, optimizer, [acc_top1, acc_top5], batched_reader, pyreader
test_feed_names[0]: image, test_feed_names[1]: label }, fetch_list=test_fetch_list) result = [np.mean(r) for r in result] results.append(result) result = np.mean(np.array(results), axis=0) return result[0] if __name__ == '__main__': args = parser.parse_args() print_arguments(args) paddle.enable_static() compress_config, train_config = load_config(args.config_path) train_reader = paddle.batch(reader.train(), batch_size=64) train_dataloader = reader_wrapper(train_reader) ac = AutoCompression(model_dir=args.model_dir, model_filename=args.model_filename, params_filename=args.params_filename, save_dir=args.save_dir, strategy_config=compress_config, train_config=train_config, train_dataloader=train_dataloader, eval_callback=eval_function, devices=args.devices) ac.compress()
def compress(args): # add ce if args.enable_ce: SEED = 1 fluid.default_main_program().random_seed = SEED fluid.default_startup_program().random_seed = SEED image_shape = [int(m) for m in args.image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition model = models.__dict__[args.model]() if args.model == 'ResNet34': model.prefix_name = 'res34' out = model.net(input=image, class_dim=args.class_dim, fc_name='fc_0') else: out = model.net(input=image, class_dim=args.class_dim) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) #print("="*50+"student_model_params"+"="*50) #for v in fluid.default_main_program().list_vars(): # print(v.name, v.shape) val_program = fluid.default_main_program().clone() boundaries = [ args.total_images / args.batch_size * 30, args.total_images / args.batch_size * 60, args.total_images / args.batch_size * 90 ] values = [0.1, 0.01, 0.001, 0.0001] opt = fluid.optimizer.Momentum( momentum=0.9, learning_rate=fluid.layers.piecewise_decay(boundaries=boundaries, values=values), regularization=fluid.regularizer.L2Decay(4e-5)) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.batch(reader.val(), batch_size=args.batch_size) val_feed_list = [('image', image.name), ('label', label.name)] val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5', acc_top5.name)] train_reader = paddle.batch(reader.train(), batch_size=args.batch_size, drop_last=True) train_feed_list = [('image', image.name), ('label', label.name)] train_fetch_list = [('loss', avg_cost.name)] teacher_programs = [] distiller_optimizer = None teacher_model = models.__dict__[args.teacher_model](prefix_name='res50') # define teacher program teacher_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(teacher_program, startup_program): img = teacher_program.global_block()._clone_variable( image, force_persistable=False) predict = teacher_model.net(img, class_dim=args.class_dim, fc_name='fc_0') #print("="*50+"teacher_model_params"+"="*50) #for v in teacher_program.list_vars(): # print(v.name, v.shape) #return exe.run(startup_program) assert args.teacher_pretrained_model and os.path.exists( args.teacher_pretrained_model ), "teacher_pretrained_model should be set when teacher_model is not None." def if_exist(var): return os.path.exists( os.path.join(args.teacher_pretrained_model, var.name)) fluid.io.load_vars(exe, args.teacher_pretrained_model, main_program=teacher_program, predicate=if_exist) distiller_optimizer = opt teacher_programs.append(teacher_program.clone(for_test=True)) com_pass = Compressor(place, fluid.global_scope(), fluid.default_main_program(), train_reader=train_reader, train_feed_list=train_feed_list, train_fetch_list=train_fetch_list, eval_program=val_program, eval_reader=val_reader, eval_feed_list=val_feed_list, eval_fetch_list=val_fetch_list, teacher_programs=teacher_programs, save_eval_model=True, prune_infer_model=[[image.name], [out.name]], train_optimizer=opt, distiller_optimizer=distiller_optimizer) com_pass.config(args.compress_config) com_pass.run()
def compress(args): image_shape = "3,224,224" image_shape = [int(m) for m in image_shape.split(",")] image = fluid.data(name='image', shape=[None] + image_shape, dtype='float32') label = fluid.data(name='label', shape=[None, 1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=1000) # print(out) cost = fluid.layers.cross_entropy(input=out, label=label) avg_cost = fluid.layers.mean(x=cost) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) val_program = fluid.default_main_program().clone() # quantization usually use small learning rate values = [1e-4, 1e-5] opt = fluid.optimizer.Momentum( momentum=0.9, learning_rate=fluid.layers.piecewise_decay(boundaries=[5000 * 12], values=values), regularization=fluid.regularizer.L2Decay(1e-4)) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) if args.pretrained_model: assert os.path.exists( args.pretrained_model), "pretrained_model path doesn't exist" def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.batch(reader.val(), batch_size=args.batch_size) val_feed_list = [('image', image.name), ('label', label.name)] val_fetch_list = [('acc_top1', acc_top1.name), ('acc_top5', acc_top5.name)] train_reader = paddle.batch(reader.train(), batch_size=args.batch_size, drop_last=True) train_feed_list = [('image', image.name), ('label', label.name)] train_fetch_list = [('loss', avg_cost.name)] com_pass = Compressor(place, fluid.global_scope(), fluid.default_main_program(), train_reader=train_reader, train_feed_list=train_feed_list, train_fetch_list=train_fetch_list, eval_program=val_program, eval_reader=val_reader, eval_feed_list=val_feed_list, eval_fetch_list=val_fetch_list, teacher_programs=[], train_optimizer=opt, prune_infer_model=[[image.name], [out.name]], distiller_optimizer=None) com_pass.config(args.config_file) com_pass.run() conv_op_num = 0 fake_quant_op_num = 0 for op in com_pass.context.eval_graph.ops(): if op._op.type == 'conv2d': conv_op_num += 1 elif op._op.type.startswith('fake_quantize'): fake_quant_op_num += 1 print('conv op num {}'.format(conv_op_num)) print('fake quant op num {}'.format(fake_quant_op_num))
def compress(args): train_reader = None test_reader = None if args.data == "mnist": train_reader = paddle.dataset.mnist.train() val_reader = paddle.dataset.mnist.test() class_dim = 10 image_shape = "1,28,28" elif args.data == "imagenet": import imagenet_reader as reader train_reader = reader.train() val_reader = reader.val() class_dim = 1000 image_shape = "3,224,224" else: raise ValueError("{} is not supported.".format(args.data)) image_shape = [int(m) for m in image_shape.split(",")] assert args.model in model_list, "{} is not in lists: {}".format( args.model, model_list) image = paddle.static.data(name='image', shape=[None] + image_shape, dtype='float32') label = paddle.static.data(name='label', shape=[None, 1], dtype='int64') # model definition model = models.__dict__[args.model]() out = model.net(input=image, class_dim=class_dim) cost = paddle.nn.functional.loss.cross_entropy(input=out, label=label) avg_cost = paddle.mean(x=cost) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5) val_program = paddle.static.default_main_program().clone(for_test=True) opt = create_optimizer(args) opt.minimize(avg_cost) places = paddle.static.cuda_places( ) if args.use_gpu else paddle.static.cpu_places() place = places[0] exe = paddle.static.Executor(place) exe.run(paddle.static.default_startup_program()) if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) _logger.info("Load pretrained model from {}".format( args.pretrained_model)) paddle.fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) val_reader = paddle.batch(val_reader, batch_size=args.batch_size) train_reader = paddle.batch(train_reader, batch_size=args.batch_size, drop_last=True) train_loader = paddle.io.DataLoader.from_generator( feed_list=[image, label], capacity=64, use_double_buffer=True, iterable=True) valid_loader = paddle.io.DataLoader.from_generator( feed_list=[image, label], capacity=64, use_double_buffer=True, iterable=True) train_loader.set_sample_list_generator(train_reader, places) valid_loader.set_sample_list_generator(val_reader, place) def test(epoch, program): acc_top1_ns = [] acc_top5_ns = [] for batch_id, data in enumerate(valid_loader): start_time = time.time() acc_top1_n, acc_top5_n = exe.run( program, feed=data, fetch_list=[acc_top1.name, acc_top5.name]) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1_n), np.mean(acc_top5_n), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1_n)) acc_top5_ns.append(np.mean(acc_top5_n)) _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns)))) def train(epoch, program): build_strategy = paddle.static.BuildStrategy() exec_strategy = paddle.static.ExecutionStrategy() train_program = paddle.static.CompiledProgram( program).with_data_parallel(loss_name=avg_cost.name, build_strategy=build_strategy, exec_strategy=exec_strategy) for batch_id, data in enumerate(train_loader): start_time = time.time() loss_n, acc_top1_n, acc_top5_n = exe.run( train_program, feed=data, fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) end_time = time.time() loss_n = np.mean(loss_n) acc_top1_n = np.mean(acc_top1_n) acc_top5_n = np.mean(acc_top5_n) if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, end_time - start_time)) batch_id += 1 test(0, val_program) params = get_pruned_params(args, paddle.static.default_main_program()) _logger.info("FLOPs before pruning: {}".format( flops(paddle.static.default_main_program()))) pruner = Pruner(args.criterion) pruned_val_program, _, _ = pruner.prune(val_program, paddle.static.global_scope(), params=params, ratios=[args.pruned_ratio] * len(params), place=place, only_graph=True) pruned_program, _, _ = pruner.prune(paddle.static.default_main_program(), paddle.static.global_scope(), params=params, ratios=[args.pruned_ratio] * len(params), place=place) _logger.info("FLOPs after pruning: {}".format(flops(pruned_program))) for i in range(args.num_epochs): train(i, pruned_program) if i % args.test_period == 0: test(i, pruned_val_program) save_model(exe, pruned_val_program, os.path.join(args.model_path, str(i))) if args.save_inference: infer_model_path = os.path.join(args.model_path, "infer_models", str(i)) paddle.fluid.io.save_inference_model(infer_model_path, ["image"], [out], exe, pruned_val_program) _logger.info( "Saved inference model into [{}]".format(infer_model_path))