def main(): paddle.enable_static() if FLAGS.static else None device = paddle.set_device(FLAGS.device) model_list = [x for x in models.__dict__["__all__"]] assert FLAGS.arch in model_list, "Expected FLAGS.arch in {}, but received {}".format( model_list, FLAGS.arch) net = models.__dict__[FLAGS.arch]( pretrained=FLAGS.eval_only and not FLAGS.resume) inputs = [Input([None, 3, 224, 224], 'float32', name='image')] labels = [Input([None, 1], 'int64', name='label')] model = paddle.Model(net, inputs, labels) if FLAGS.resume is not None: model.load(FLAGS.resume) train_dataset = ImageNetDataset(os.path.join(FLAGS.data, 'train'), mode='train', image_size=FLAGS.image_size, resize_short_size=FLAGS.resize_short_size) val_dataset = ImageNetDataset(os.path.join(FLAGS.data, 'val'), mode='val', image_size=FLAGS.image_size, resize_short_size=FLAGS.resize_short_size) optim = make_optimizer(np.ceil( len(train_dataset) * 1. / FLAGS.batch_size / ParallelEnv().nranks), parameter_list=model.parameters()) model.prepare(optim, paddle.nn.CrossEntropyLoss(), paddle.metric.Accuracy(topk=(1, 5))) if FLAGS.eval_only: model.evaluate(val_dataset, batch_size=FLAGS.batch_size, num_workers=FLAGS.num_workers) return output_dir = os.path.join( FLAGS.output_dir, FLAGS.arch, time.strftime('%Y-%m-%d-%H-%M', time.localtime())) if ParallelEnv().local_rank == 0 and not os.path.exists(output_dir): os.makedirs(output_dir) model.fit(train_dataset, val_dataset, batch_size=FLAGS.batch_size, epochs=FLAGS.epoch, save_dir=output_dir, num_workers=FLAGS.num_workers)
def main(): device = set_device(FLAGS.device) fluid.enable_dygraph(device) if FLAGS.dynamic else None model = models.__dict__[FLAGS.arch](pretrained=FLAGS.eval_only and not FLAGS.resume) if FLAGS.resume is not None: model.load(FLAGS.resume) inputs = [Input([None, 3, 224, 224], 'float32', name='image')] labels = [Input([None, 1], 'int64', name='label')] train_dataset = ImageNetDataset( os.path.join(FLAGS.data, 'train'), mode='train') val_dataset = ImageNetDataset(os.path.join(FLAGS.data, 'val'), mode='val') optim = make_optimizer( np.ceil( len(train_dataset) * 1. / FLAGS.batch_size / ParallelEnv().nranks), parameter_list=model.parameters()) model.prepare(optim, CrossEntropy(), Accuracy(topk=(1, 5)), inputs, labels) if FLAGS.eval_only: model.evaluate( val_dataset, batch_size=FLAGS.batch_size, num_workers=FLAGS.num_workers) return output_dir = os.path.join(FLAGS.output_dir, FLAGS.arch, time.strftime('%Y-%m-%d-%H-%M', time.localtime())) if ParallelEnv().local_rank == 0 and not os.path.exists(output_dir): os.makedirs(output_dir) model.fit(train_dataset, val_dataset, batch_size=FLAGS.batch_size, epochs=FLAGS.epoch, save_dir=output_dir, num_workers=FLAGS.num_workers)
def main(): # 1 load model model_list = [x for x in models.__dict__["__all__"]] assert FLAGS.arch in model_list, "Expected FLAGS.arch in {}, but received {}".format( model_list, FLAGS.arch) fp32_model = models.__dict__[FLAGS.arch](pretrained=True) fp32_model.eval() for name, layer in fp32_model.named_sublayers(): print(name, layer) count = 0 fuse_list = [] for name, layer in fp32_model.named_sublayers(): if isinstance(layer, nn.Conv2D): fuse_list.append([name]) if isinstance(layer, nn.BatchNorm2D): fuse_list[count].append(name) count += 1 if FLAGS.arch == 'resnet50': fuse_list = None val_dataset = ImageNetDataset(FLAGS.data, mode='val') # 2 quantizations ptq = PTQ() quant_model = ptq.quantize(fp32_model, fuse=FLAGS.fuse, fuse_list=fuse_list) print("Calibrate") calibrate(quant_model, val_dataset, FLAGS.quant_batch_num, FLAGS.quant_batch_size) # 3 save quant_output_dir = os.path.join(FLAGS.output_dir, FLAGS.arch, "int8_infer", "model") input_spec = paddle.static.InputSpec(shape=[None, 3, 224, 224], dtype='float32') ptq.save_quantized_model(quant_model, quant_output_dir, [input_spec]) fp32_output_dir = os.path.join(FLAGS.output_dir, FLAGS.arch, "fp32_infer", "model") paddle.jit.save(fp32_model, fp32_output_dir, [input_spec])
def train_mobilenet(): if not args.use_gpu: place = fluid.CPUPlace() elif not args.use_data_parallel: place = fluid.CUDAPlace(0) else: place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) with fluid.dygraph.guard(place): # 1. init net and optimizer place_num = paddle.fluid.core.get_cuda_device_count( ) if args.use_gpu else int(os.environ.get('CPU_NUM', 1)) if args.ce: print("ce mode") seed = 33 np.random.seed(seed) fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed if args.use_data_parallel: strategy = fluid.dygraph.parallel.prepare_context() if args.model == "MobileNetV1": net = MobileNetV1(class_dim=args.class_dim, scale=1.0) model_path_pre = 'mobilenet_v1' elif args.model == "MobileNetV2": net = MobileNetV2(class_dim=args.class_dim, scale=1.0) model_path_pre = 'mobilenet_v2' else: print( "wrong model name, please try model = MobileNetV1 or MobileNetV2" ) exit() optimizer = create_optimizer(args=args, parameter_list=net.parameters()) if args.use_data_parallel: net = fluid.dygraph.parallel.DataParallel(net, strategy) # 2. load checkpoint if args.checkpoint: assert os.path.exists(args.checkpoint + ".pdparams"), \ "Given dir {}.pdparams not exist.".format(args.checkpoint) assert os.path.exists(args.checkpoint + ".pdopt"), \ "Given dir {}.pdopt not exist.".format(args.checkpoint) para_dict, opti_dict = fluid.dygraph.load_dygraph(args.checkpoint) net.set_dict(para_dict) optimizer.set_dict(opti_dict) # 3. reader test_data_loader = utility.create_data_loader(is_train=False, args=args) num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) imagenet_reader = reader.ImageNetReader(seed=0, place_num=place_num) train_dataset = ImageNetDataset(os.path.join(args.data_dir, "train"), mode='train') train_data_loader = DataLoader(train_dataset, batch_size=args.batch_size, places=place, shuffle=True, drop_last=True, num_workers=10) test_dataset = ImageNetDataset(os.path.join(args.data_dir, "val"), mode='val') test_data_loader = DataLoader(test_dataset, batch_size=args.batch_size, places=place, shuffle=True, drop_last=True, num_workers=1) # 4. train loop total_batch_num = 0 #this is for benchmark for eop in range(args.num_epochs): epoch_start = time.time() if num_trainers > 1: imagenet_reader.set_shuffle_seed( eop + (args.random_seed if args.random_seed else 0)) net.train() total_loss = 0.0 total_acc1 = 0.0 total_acc5 = 0.0 total_sample = 0 batch_id = 0 t_last = 0 # 4.1 for each batch, call net() , backward(), and minimize() batch_cost_avg = TimeCostAverage() batch_reader_avg = TimeCostAverage() batch_net_avg = TimeCostAverage() batch_backward_avg = TimeCostAverage() batch_start = time.time() for img, label in train_data_loader(): if args.max_iter and total_batch_num == args.max_iter: return batch_reader_end = time.time() # 4.1.1 call net() out = net(img) softmax_out = fluid.layers.softmax(out, use_cudnn=False) loss = fluid.layers.cross_entropy(input=softmax_out, label=label) avg_loss = fluid.layers.mean(x=loss) acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) batch_net_end = time.time() # 4.1.2 call backward() if args.use_data_parallel: avg_loss = net.scale_loss(avg_loss) avg_loss.backward() net.apply_collective_grads() else: avg_loss.backward() batch_backward_end = time.time() # 4.1.3 call minimize() optimizer.minimize(avg_loss) net.clear_gradients() t2 = time.time() avg_loss_value = avg_loss.numpy() acc_top1_value = acc_top1.numpy() acc_top5_value = acc_top5.numpy() total_loss += avg_loss_value total_acc1 += acc_top1_value total_acc5 += acc_top5_value total_sample += 1 batch_id += 1 # NOTE: used for benchmark train_batch_cost = time.time() - batch_start batch_cost_avg.record(train_batch_cost) batch_reader_avg.record(batch_reader_end - batch_start) batch_net_avg.record(batch_net_end - batch_reader_end) batch_backward_avg.record(batch_backward_end - batch_net_end) total_batch_num = total_batch_num + 1 if batch_id % args.print_step == 0: ips = float(args.batch_size) / batch_cost_avg.get_average() print( "[Epoch %d, batch %d], avg_loss %.5f, acc_top1 %.5f, acc_top5 %.5f, batch_cost: %.5f sec, net_cost: %.5f sec, backward_cost: %.5f sec, reader_cost: %.5f sec, ips: %.5f images/sec" % (eop, batch_id, avg_loss_value, acc_top1_value, acc_top5_value, batch_cost_avg.get_average(), batch_net_avg.get_average(), batch_backward_avg.get_average(), batch_reader_avg.get_average(), ips)) sys.stdout.flush() batch_cost_avg.reset() batch_net_avg.reset() batch_backward_avg.reset() batch_reader_avg.reset() batch_start = time.time() if args.ce: print("kpis\ttrain_acc1\t%0.3f" % (total_acc1 / total_sample)) print("kpis\ttrain_acc5\t%0.3f" % (total_acc5 / total_sample)) print("kpis\ttrain_loss\t%0.3f" % (total_loss / total_sample)) train_epoch_cost = time.time() - epoch_start print( "[Epoch %d], loss %.5f, acc1 %.5f, acc5 %.5f, epoch_cost: %.5f s" % (eop, total_loss / total_sample, total_acc1 / total_sample, total_acc5 / total_sample, train_epoch_cost)) # 4.2 save checkpoint save_parameters = (not args.use_data_parallel) or ( args.use_data_parallel and fluid.dygraph.parallel.Env().local_rank == 0) if save_parameters: if not os.path.isdir(args.model_save_dir): os.makedirs(args.model_save_dir) model_path = os.path.join( args.model_save_dir, "_" + model_path_pre + "_epoch{}".format(eop)) fluid.dygraph.save_dygraph(net.state_dict(), model_path) fluid.dygraph.save_dygraph(optimizer.state_dict(), model_path) # 4.3 validation net.eval() eval(net, test_data_loader, eop) # 5. save final results save_parameters = (not args.use_data_parallel) or ( args.use_data_parallel and fluid.dygraph.parallel.Env().local_rank == 0) if save_parameters: model_path = os.path.join(args.model_save_dir, "_" + model_path_pre + "_final") fluid.dygraph.save_dygraph(net.state_dict(), model_path)
def main(): # create model model_list = [x for x in models.__dict__["__all__"]] assert FLAGS.arch in model_list, \ "Expected FLAGS.arch in {}, but received {}".format( model_list, FLAGS.arch) model = models.__dict__[FLAGS.arch](pretrained=not FLAGS.resume) # quantize model if FLAGS.enable_quant: if not FLAGS.use_naive_api: print("use slim api") quant_config = { 'weight_quantize_type': FLAGS.weight_quantize_type, } dygraph_qat = QAT(quant_config) else: print("use navie api") dygraph_qat = ImperativeQuantAware( weight_quantize_type=FLAGS.weight_quantize_type, ) dygraph_qat.quantize(model) # prepare model = paddle.Model(model) if FLAGS.resume is not None: print("Resume from " + FLAGS.resume) model.load(FLAGS.resume) train_dataset = ImageNetDataset(FLAGS.data, mode='train') val_dataset = ImageNetDataset(FLAGS.data, mode='val') optim = make_optimizer( np.ceil( float(len(train_dataset)) / FLAGS.batch_size / ParallelEnv().nranks), parameter_list=model.parameters()) model.prepare(optim, paddle.nn.CrossEntropyLoss(), Accuracy(topk=(1, 5))) # test if FLAGS.eval_only: model.evaluate( val_dataset, batch_size=FLAGS.batch_size, num_workers=FLAGS.num_workers) return # train output_dir = os.path.join(FLAGS.output_dir, "checkpoint", FLAGS.arch + "_checkpoint", time.strftime('%Y-%m-%d-%H-%M', time.localtime())) if not os.path.exists(output_dir): os.makedirs(output_dir) model.fit(train_dataset, val_dataset, batch_size=FLAGS.batch_size, epochs=FLAGS.epoch, save_dir=output_dir, num_workers=FLAGS.num_workers) # save if FLAGS.enable_quant: quant_output_dir = os.path.join(FLAGS.output_dir, FLAGS.arch, "model") input_spec = paddle.static.InputSpec( shape=[None, 3, 224, 224], dtype='float32') dygraph_qat.save_quantized_model(model.network, quant_output_dir, [input_spec]) print("save all checkpoints in " + output_dir) print("save quantized inference model in " + quant_output_dir)