def build_dataloader(rank, world_size, data_root, ann_file): val_dataset = COCOJoints(data_root, ann_file, image_set="val2017", order=("image", "boxes", "info")) val_sampler = SequentialSampler(val_dataset, 1, world_size=world_size, rank=rank) val_dataloader = DataLoader( val_dataset, sampler=val_sampler, num_workers=4, transform=T.Compose( transforms=[ T.Normalize(mean=cfg.img_mean, std=cfg.img_std), ExtendBoxes( cfg.test_x_ext, cfg.test_y_ext, cfg.input_shape[1] / cfg.input_shape[0], random_extend_prob=0, ), RandomBoxAffine( degrees=(0, 0), scale=(1, 1), output_shape=cfg.input_shape, rotate_prob=0, scale_prob=0, ), T.ToMode(), ], order=("image", "boxes", "info"), ), ) return val_dataloader
def build_dataloader(batch_size, dataset_dir, cfg): if cfg.dataset == "VOC2012": train_dataset = dataset.PascalVOC(dataset_dir, cfg.data_type, order=["image", "mask"]) elif cfg.dataset == "Cityscapes": train_dataset = dataset.Cityscapes(dataset_dir, "train", mode='gtFine', order=["image", "mask"]) else: raise ValueError("Unsupported dataset {}".format(cfg.dataset)) train_sampler = Infinite( RandomSampler(train_dataset, batch_size, drop_last=True)) train_dataloader = DataLoader( train_dataset, sampler=train_sampler, transform=T.Compose( transforms=[ T.RandomHorizontalFlip(0.5), T.RandomResize(scale_range=(0.5, 2)), T.RandomCrop( output_size=(cfg.img_height, cfg.img_width), padding_value=[0, 0, 0], padding_maskvalue=255, ), T.Normalize(mean=cfg.img_mean, std=cfg.img_std), T.ToMode(), ], order=["image", "mask"], ), num_workers=2, ) return train_dataloader
def build_dataloader(dataset_dir, cfg): if cfg.dataset == "VOC2012": val_dataset = EvalPascalVOC( dataset_dir, "val", order=["image", "mask", "info"] ) elif cfg.dataset == "Cityscapes": val_dataset = dataset.Cityscapes( dataset_dir, "val", mode="gtFine", order=["image", "mask", "info"] ) else: raise ValueError("Unsupported dataset {}".format(cfg.dataset)) val_sampler = InferenceSampler(val_dataset, 1) val_dataloader = DataLoader( val_dataset, sampler=val_sampler, transform=T.Normalize( mean=cfg.img_mean, std=cfg.img_std, order=["image", "mask"] ), num_workers=2, ) return val_dataloader
def build_dataloader(batch_size, dataset_dir): train_dataset = dataset.PascalVOC(dataset_dir, cfg.DATA_TYPE, order=["image", "mask"]) train_sampler = data.RandomSampler(train_dataset, batch_size, drop_last=True) train_dataloader = data.DataLoader( train_dataset, sampler=train_sampler, transform=T.Compose( transforms=[ T.RandomHorizontalFlip(0.5), T.RandomResize(scale_range=(0.5, 2)), T.RandomCrop( output_size=(cfg.IMG_SIZE, cfg.IMG_SIZE), padding_value=[0, 0, 0], padding_maskvalue=255, ), T.Normalize(mean=cfg.IMG_MEAN, std=cfg.IMG_STD), T.ToMode(), ], order=["image", "mask"], ), num_workers=0, ) return train_dataloader, train_dataset.__len__()
def worker(rank, world_size, args): if world_size > 1: # Initialize distributed process group logger.info("init distributed process group {} / {}".format( rank, world_size)) dist.init_process_group( master_ip="localhost", master_port=23456, world_size=world_size, rank=rank, dev=rank, ) model = getattr(M, args.arch)(pretrained=(args.model is None)) if args.model: logger.info("load weights from %s", args.model) model.load_state_dict(mge.load(args.model), strict=False) if args.quantized: quantize(model) @jit.trace(symbolic=True) def valid_func(image, label): model.eval() logits = model(image) loss = F.cross_entropy_with_softmax(logits, label) acc1, acc5 = F.accuracy(logits, label, (1, 5)) if dist.is_distributed(): # all_reduce_mean loss = dist.all_reduce_sum(loss, "valid_loss") / dist.get_world_size() acc1 = dist.all_reduce_sum(acc1, "valid_acc1") / dist.get_world_size() acc5 = dist.all_reduce_sum(acc5, "valid_acc5") / dist.get_world_size() return loss, acc1, acc5 logger.info("preparing dataset..") valid_dataset = data.dataset.ImageNet(args.data, train=False) valid_sampler = data.SequentialSampler(valid_dataset, batch_size=args.batch_size, drop_last=False) valid_queue = data.DataLoader( valid_dataset, sampler=valid_sampler, transform=T.Compose([ T.Resize(256), T.CenterCrop(224), T.Normalize(mean=[128.0, 128.0, 128.0], std=[1.0, 1.0, 1.0]), # BGR T.ToMode("CHW"), ]), num_workers=args.workers, ) _, valid_acc, valid_acc5 = infer(valid_func, valid_queue, args) logger.info("Valid %.3f / %.3f", valid_acc, valid_acc5)
def worker(world_size, args): # pylint: disable=too-many-statements rank = dist.get_rank() if world_size > 1: # Initialize distributed process group logger.info("init distributed process group {} / {}".format( rank, world_size)) model = models.__dict__[args.arch]() if args.mode != "normal": quantize_qat(model, qconfig=Q.ema_fakequant_qconfig) if args.checkpoint: logger.info("Load pretrained weights from %s", args.checkpoint) ckpt = mge.load(args.checkpoint) ckpt = ckpt["state_dict"] if "state_dict" in ckpt else ckpt model.load_state_dict(ckpt, strict=False) if args.mode == "quantized": quantize(model) # Define valid graph def valid_func(image, label): model.eval() logits = model(image) loss = F.loss.cross_entropy(logits, label, label_smooth=0.1) acc1, acc5 = F.topk_accuracy(logits, label, (1, 5)) if dist.is_distributed(): # all_reduce_mean loss = dist.functional.all_reduce_sum(loss) / dist.get_world_size() acc1 = dist.functional.all_reduce_sum(acc1) / dist.get_world_size() acc5 = dist.functional.all_reduce_sum(acc5) / dist.get_world_size() return loss, acc1, acc5 # Build valid datasets logger.info("preparing dataset..") valid_dataset = data.dataset.ImageNet(args.data, train=False) valid_sampler = data.SequentialSampler(valid_dataset, batch_size=100, drop_last=False) valid_queue = data.DataLoader( valid_dataset, sampler=valid_sampler, transform=T.Compose([ T.Resize(256), T.CenterCrop(224), T.Normalize(mean=128), T.ToMode("CHW") ]), num_workers=args.workers, ) _, valid_acc, valid_acc5 = infer(valid_func, valid_queue, args) if rank == 0: logger.info("TEST %f, %f", valid_acc, valid_acc5)
def build_dataset(args): train_dataset = data.dataset.ImageNet(args.data, train=True) train_sampler = data.Infinite( data.RandomSampler(train_dataset, batch_size=args.batch_size, drop_last=True)) train_dataloader = data.DataLoader( train_dataset, sampler=train_sampler, transform=T.Compose([ # Baseline Augmentation for small models T.RandomResizedCrop(224), T.RandomHorizontalFlip(), T.Normalize(mean=[103.530, 116.280, 123.675], std=[57.375, 57.120, 58.395]), # BGR T.ToMode("CHW"), ]) if args.arch in ("resnet18", "resnet34") else T.Compose( [ # Facebook Augmentation for large models T.RandomResizedCrop(224), T.RandomHorizontalFlip(), T.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), T.Normalize(mean=[103.530, 116.280, 123.675], std=[57.375, 57.120, 58.395]), # BGR T.ToMode("CHW"), ]), num_workers=args.workers, ) valid_dataset = data.dataset.ImageNet(args.data, train=False) valid_sampler = data.SequentialSampler(valid_dataset, batch_size=100, drop_last=False) valid_dataloader = data.DataLoader( valid_dataset, sampler=valid_sampler, transform=T.Compose([ T.Resize(256), T.CenterCrop(224), T.Normalize(mean=[103.530, 116.280, 123.675], std=[57.375, 57.120, 58.395]), # BGR T.ToMode("CHW"), ]), num_workers=args.workers, ) return train_dataloader, valid_dataloader
def main(): parser = argparse.ArgumentParser() parser.add_argument("-a", "--arch", default="shufflenet_v1_x0_5_g3", type=str) parser.add_argument("-m", "--model", default=None, type=str) parser.add_argument("-i", "--image", default=None, type=str) parser.add_argument("--quantized", action="store_true", help="inference by quantized model, cpu only") args = parser.parse_args() model = getattr(M, args.arch)(pretrained=(args.model is None)) if args.model: state_dict = mge.load(args.model) model.load_state_dict(state_dict, strict=False) if args.quantized: quantize(model) if args.image is None: path = "../../../assets/cat.jpg" else: path = args.image image = cv2.imread(path, cv2.IMREAD_COLOR) transform = T.Compose([ T.Resize(256), T.CenterCrop(224), T.Normalize(mean=[128.0, 128.0, 128.0], std=[1.0, 1.0, 1.0]), # BGR T.ToMode("CHW"), ]) @jit.trace(symbolic=False) def infer_func(processed_img): model.eval() logits = model(processed_img) probs = F.softmax(logits) return probs processed_img = transform.apply(image)[np.newaxis, :] probs = infer_func(processed_img) top_probs, classes = F.top_k(probs, k=5, descending=True) with open("../../../assets/imagenet_class_info.json") as fp: imagenet_class_index = json.load(fp) for rank, (prob, classid) in enumerate( zip(top_probs.numpy().reshape(-1), classes.numpy().reshape(-1))): print("{}: class = {:20s} with probability = {:4.1f} %".format( rank, imagenet_class_index[str(classid)][1], 100 * prob))
def build_dataloader(dataset_dir): val_dataset = dataset.PascalVOC(dataset_dir, "val", order=["image", "mask"]) val_sampler = data.SequentialSampler(val_dataset, cfg.VAL_BATCHES) val_dataloader = data.DataLoader( val_dataset, sampler=val_sampler, transform=T.Normalize(mean=cfg.IMG_MEAN, std=cfg.IMG_STD, order=["image", "mask"]), num_workers=cfg.DATA_WORKERS, ) return val_dataloader, val_dataset.__len__()
def prepare_dataset(name): """prepare dataset Args: name (str): name of the dataset, should be one of {facescrub, megaface} Returns: dataset (data.Dataset): required dataset queue (data.DataLoader): corresponding dataloader """ preprocess = T.Compose([T.Normalize(mean=127.5, std=128), T.ToMode("CHW")]) dataset = get_eval_dataset(name, dataset_dir=configs["dataset_dir"]) sampler = data.SequentialSampler(dataset, batch_size=configs["batch_size"]) queue = data.DataLoader(dataset, sampler=sampler, transform=preprocess) return dataset, queue
def main(): parser = argparse.ArgumentParser() parser.add_argument("-a", "--arch", default="shufflenet_v2_x1_0", type=str) parser.add_argument("-m", "--model", default=None, type=str) parser.add_argument("-i", "--image", default=None, type=str) args = parser.parse_args() model = snet_model.__dict__[args.arch](pretrained=(args.model is None)) if args.model is not None: logging.info("load from checkpoint %s", args.model) checkpoint = megengine.load(args.model) if "state_dict" in checkpoint: state_dict = checkpoint["state_dict"] model.load_state_dict(state_dict) if args.image is None: path = "../../../assets/cat.jpg" else: path = args.image image = cv2.imread(path, cv2.IMREAD_COLOR) transform = T.Compose([ T.Resize(256), T.CenterCrop(224), T.Normalize(mean=[103.530, 116.280, 123.675], std=[57.375, 57.120, 58.395]), # BGR T.ToMode("CHW"), ]) def infer_func(processed_img): model.eval() logits = model(processed_img) probs = F.softmax(logits) return probs processed_img = transform.apply(image)[np.newaxis, :] probs = infer_func(processed_img) top_probs, classes = F.topk(probs, k=5, descending=True) with open("../../../assets/imagenet_class_info.json") as fp: imagenet_class_index = json.load(fp) for rank, (prob, classid) in enumerate( zip(top_probs.numpy().reshape(-1), classes.numpy().reshape(-1))): print("{}: class = {:20s} with probability = {:4.1f} %".format( rank, imagenet_class_index[str(classid)][1], 100 * prob))
def main(): parser = argparse.ArgumentParser() parser.add_argument("-a", "--arch", default="resnet50_frelu", type=str) parser.add_argument("-m", "--model", default=None, type=str) parser.add_argument("-i", "--image", default=None, type=str) args = parser.parse_args() model = getattr(M, args.arch)(pretrained=(args.model is None)) if args.model: state_dict = mge.load(args.model) model.load_state_dict(state_dict) if args.image is None: path = "../../../assets/cat.jpg" # please find the files in https://github.com/MegEngine/Models/tree/master/official/assets else: path = args.image image = cv2.imread(path, cv2.IMREAD_COLOR) transform = T.Compose([ T.Resize(256), T.CenterCrop(224), T.Normalize(mean=[103.530, 116.280, 123.675], std=[57.375, 57.120, 58.395]), # BGR T.ToMode("CHW"), ]) @jit.trace(symbolic=True) def infer_func(processed_img): model.eval() logits = model(processed_img) probs = F.softmax(logits) return probs processed_img = transform.apply(image)[np.newaxis, :] probs = infer_func(processed_img) top_probs, classes = F.top_k(probs, k=5, descending=True) with open( "../../../assets/imagenet_class_info.json" ) as fp: # please find the files in https://github.com/MegEngine/Models/tree/master/official/assets imagenet_class_index = json.load(fp) for rank, (prob, classid) in enumerate( zip(top_probs.numpy().reshape(-1), classes.numpy().reshape(-1))): print("{}: class = {:20s} with probability = {:4.1f} %".format( rank, imagenet_class_index[str(classid)][1], 100 * prob))
def build_dataset(args): train_dataloader = None valid_dataset = data.dataset.ImageNet(args.data, train=False) valid_sampler = data.SequentialSampler(valid_dataset, batch_size=100, drop_last=False) valid_dataloader = data.DataLoader( valid_dataset, sampler=valid_sampler, transform=T.Compose([ T.Resize(256), T.CenterCrop(224), T.Normalize(mean=[103.530, 116.280, 123.675], std=[57.375, 57.120, 58.395]), # BGR T.ToMode("CHW"), ]), num_workers=args.workers, ) return train_dataloader, valid_dataloader
def build_dataloader(dataset_dir, cfg): if cfg.DATASET == "VOC2012": val_dataset = EvalPascalVOC(dataset_dir, "val", order=["image", "mask", "info"]) elif cfg.DATASET == "Cityscapes": val_dataset = dataset.Cityscapes(dataset_dir, "val", mode='gtFine', order=["image", "mask", "info"]) else: raise ValueError("Unsupported dataset {}".format(cfg.DATASET)) val_sampler = data.SequentialSampler(val_dataset, cfg.VAL_BATCHES) val_dataloader = data.DataLoader( val_dataset, sampler=val_sampler, transform=T.Normalize(mean=cfg.IMG_MEAN, std=cfg.IMG_STD, order=["image", "mask"]), num_workers=cfg.DATA_WORKERS, ) return val_dataloader, val_dataset.__len__()
def build_dataloader(batch_size, dataset_dir, cfg): if cfg.DATASET == "VOC2012": train_dataset = dataset.PascalVOC( dataset_dir, cfg.DATA_TYPE, order=["image", "mask"] ) elif cfg.DATASET == "Cityscapes": train_dataset = dataset.Cityscapes( dataset_dir, "train", mode='gtFine', order=["image", "mask"] ) else: raise ValueError("Unsupported dataset {}".format(cfg.DATASET)) train_sampler = data.RandomSampler(train_dataset, batch_size, drop_last=True) train_dataloader = data.DataLoader( train_dataset, sampler=train_sampler, transform=T.Compose( transforms=[ T.RandomHorizontalFlip(0.5), T.RandomResize(scale_range=(0.5, 2)), T.RandomCrop( output_size=(cfg.IMG_HEIGHT, cfg.IMG_WIDTH), padding_value=[0, 0, 0], padding_maskvalue=255, ), T.Normalize(mean=cfg.IMG_MEAN, std=cfg.IMG_STD), T.ToMode(), ], order=["image", "mask"], ), num_workers=0, ) return train_dataloader, train_dataset.__len__()
def worker(rank, world_size, args): # pylint: disable=too-many-statements if world_size > 1: # Initialize distributed process group logger.info("init distributed process group {} / {}".format( rank, world_size)) dist.init_process_group( master_ip="localhost", master_port=23456, world_size=world_size, rank=rank, dev=rank, ) model = models.__dict__[args.arch]() if args.mode != "normal": Q.quantize_qat(model, Q.ema_fakequant_qconfig) if args.checkpoint: logger.info("Load pretrained weights from %s", args.checkpoint) ckpt = mge.load(args.checkpoint) ckpt = ckpt["state_dict"] if "state_dict" in ckpt else ckpt model.load_state_dict(ckpt, strict=False) if args.mode == "quantized": Q.quantize(model) # Define valid graph @jit.trace(symbolic=True) def valid_func(image, label): model.eval() logits = model(image) loss = F.cross_entropy_with_softmax(logits, label, label_smooth=0.1) acc1, acc5 = F.accuracy(logits, label, (1, 5)) if dist.is_distributed(): # all_reduce_mean loss = dist.all_reduce_sum(loss, "valid_loss") / dist.get_world_size() acc1 = dist.all_reduce_sum(acc1, "valid_acc1") / dist.get_world_size() acc5 = dist.all_reduce_sum(acc5, "valid_acc5") / dist.get_world_size() return loss, acc1, acc5 # Build valid datasets logger.info("preparing dataset..") valid_dataset = data.dataset.ImageNet(args.data, train=False) valid_sampler = data.SequentialSampler(valid_dataset, batch_size=100, drop_last=False) valid_queue = data.DataLoader( valid_dataset, sampler=valid_sampler, transform=T.Compose([ T.Resize(256), T.CenterCrop(224), T.Normalize(mean=128), T.ToMode("CHW"), ]), num_workers=args.workers, ) _, valid_acc, valid_acc5 = infer(valid_func, valid_queue, args) logger.info("TEST %f, %f", valid_acc, valid_acc5)
def worker(rank, world_size, args): if world_size > 1: # Initialize distributed process group logger.info("init distributed process group {} / {}".format( rank, world_size)) dist.init_process_group( master_ip="localhost", master_port=23456, world_size=world_size, rank=rank, dev=rank, ) save_dir = os.path.join(args.save, args.arch) model = getattr(M, args.arch)() optimizer = optim.SGD( get_parameters(model), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay, ) # Define train and valid graph @jit.trace(symbolic=True) def train_func(image, label): model.train() logits = model(image) loss = F.cross_entropy_with_softmax(logits, label, label_smooth=0.1) acc1, acc5 = F.accuracy(logits, label, (1, 5)) optimizer.backward(loss) # compute gradients if dist.is_distributed(): # all_reduce_mean loss = dist.all_reduce_sum(loss, "train_loss") / dist.get_world_size() acc1 = dist.all_reduce_sum(acc1, "train_acc1") / dist.get_world_size() acc5 = dist.all_reduce_sum(acc5, "train_acc5") / dist.get_world_size() return loss, acc1, acc5 @jit.trace(symbolic=True) def valid_func(image, label): model.eval() logits = model(image) loss = F.cross_entropy_with_softmax(logits, label, label_smooth=0.1) acc1, acc5 = F.accuracy(logits, label, (1, 5)) if dist.is_distributed(): # all_reduce_mean loss = dist.all_reduce_sum(loss, "valid_loss") / dist.get_world_size() acc1 = dist.all_reduce_sum(acc1, "valid_acc1") / dist.get_world_size() acc5 = dist.all_reduce_sum(acc5, "valid_acc5") / dist.get_world_size() return loss, acc1, acc5 # Build train and valid datasets logger.info("preparing dataset..") train_dataset = data.dataset.ImageNet(args.data, train=True) train_sampler = data.RandomSampler(train_dataset, batch_size=args.batch_size, drop_last=True) train_queue = data.DataLoader( train_dataset, sampler=train_sampler, transform=T.Compose([ T.RandomResizedCrop(224), T.RandomHorizontalFlip(), T.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), T.Normalize(mean=[103.530, 116.280, 123.675], std=[57.375, 57.120, 58.395]), # BGR T.ToMode("CHW"), ]), num_workers=args.workers, ) train_queue = infinite_iter(train_queue) valid_dataset = data.dataset.ImageNet(args.data, train=False) valid_sampler = data.SequentialSampler(valid_dataset, batch_size=100, drop_last=False) valid_queue = data.DataLoader( valid_dataset, sampler=valid_sampler, transform=T.Compose([ T.Resize(256), T.CenterCrop(224), T.Normalize(mean=[103.530, 116.280, 123.675], std=[57.375, 57.120, 58.395]), # BGR T.ToMode("CHW"), ]), num_workers=args.workers, ) # Start training objs = AverageMeter("Loss") top1 = AverageMeter("Acc@1") top5 = AverageMeter("Acc@5") total_time = AverageMeter("Time") t = time.time() for step in range(0, args.steps + 1250 + 1): # Linear learning rate decay decay = 1.0 decay = 1 - float(step) / args.steps if step < args.steps else 0.0 for param_group in optimizer.param_groups: param_group["lr"] = args.learning_rate * decay image, label = next(train_queue) image = image.astype("float32") label = label.astype("int32") n = image.shape[0] optimizer.zero_grad() loss, acc1, acc5 = train_func(image, label) optimizer.step() top1.update(100 * acc1.numpy()[0], n) top5.update(100 * acc5.numpy()[0], n) objs.update(loss.numpy()[0], n) total_time.update(time.time() - t) t = time.time() if step % args.report_freq == 0 and rank == 0: logger.info( "TRAIN %06d %f %s %s %s %s", step, args.learning_rate * decay, objs, top1, top5, total_time, ) objs.reset() top1.reset() top5.reset() total_time.reset() if step % 10000 == 0 and rank == 0: logger.info("SAVING %06d", step) mge.save( model.state_dict(), os.path.join(save_dir, "checkpoint-{:06d}.pkl".format(step)), ) if step % 10000 == 0 and step != 0: _, valid_acc, valid_acc5 = infer(valid_func, valid_queue, args) logger.info("TEST %06d %f, %f", step, valid_acc, valid_acc5) mge.save(model.state_dict(), os.path.join(save_dir, "checkpoint-{:06d}.pkl".format(step))) _, valid_acc, valid_acc5 = infer(valid_func, valid_queue, args) logger.info("TEST %06d %f, %f", step, valid_acc, valid_acc5)
def main(): parser = argparse.ArgumentParser() parser.add_argument("-a", "--arch", default="resnet18", type=str) parser.add_argument("-c", "--checkpoint", default=None, type=str) parser.add_argument("-i", "--image", default=None, type=str) parser.add_argument( "-m", "--mode", default="quantized", type=str, choices=["normal", "qat", "quantized"], help="Quantization Mode\n" "normal: no quantization, using float32\n" "qat: quantization aware training, simulate int8\n" "quantized: convert mode to int8 quantized, inference only", ) parser.add_argument("--dump", action="store_true", help="Dump quantized model") args = parser.parse_args() model = models.__dict__[args.arch]() if args.mode != "normal": quantize_qat(model, qconfig=Q.ema_fakequant_qconfig) if args.mode == "quantized": quantize(model) if args.checkpoint: logger.info("Load pretrained weights from %s", args.checkpoint) ckpt = mge.load(args.checkpoint) ckpt = ckpt["state_dict"] if "state_dict" in ckpt else ckpt model.load_state_dict(ckpt, strict=False) rpath = os.path.realpath(__file__ + "/../../") if args.image is None: path = rpath + "/assets/cat.jpg" else: path = args.image image = cv2.imread(path, cv2.IMREAD_COLOR) transform = T.Compose( [T.Resize(256), T.CenterCrop(224), T.Normalize(mean=128), T.ToMode("CHW")] ) @trace(symbolic=True, capture_as_const=True) def infer_func(processed_img): model.eval() logits = model(processed_img) probs = F.softmax(logits) return probs processed_img = transform.apply(image)[np.newaxis, :] processed_img = mge.tensor(processed_img, dtype="float32") probs = infer_func(processed_img) top_probs, classes = F.topk(probs, k=5, descending=True) if args.dump: output_file = ".".join([args.arch, args.mode, "megengine"]) logger.info("Dump to {}".format(output_file)) infer_func.dump(output_file, arg_names=["data"]) mge.save(model.state_dict(), output_file.replace("megengine", "pkl")) with open(rpath + "/assets/imagenet_class_info.json") as fp: imagenet_class_index = json.load(fp) for rank, (prob, classid) in enumerate( zip(top_probs.numpy().reshape(-1), classes.numpy().reshape(-1)) ): print( "{}: class = {:20s} with probability = {:4.1f} %".format( rank, imagenet_class_index[str(classid)][1], 100 * prob ) )
def worker(master_ip, port, rank, world_size, args): if world_size > 1: # Initialize distributed process group logger.info("init distributed process group {} / {}".format(rank, world_size)) dist.init_process_group( master_ip=master_ip, port=port, world_size=world_size, rank=rank, device=rank, ) model_name = "{}_{}x{}".format(args.arch, cfg.input_shape[0], cfg.input_shape[1]) save_dir = os.path.join(args.save, model_name) model = getattr(kpm, args.arch)() model.train() start_epoch = 0 if args.resume is not None: file = mge.load(args.resume) model.load_state_dict(file["state_dict"]) start_epoch = file["epoch"] optimizer = optim.Adam( model.parameters(), lr=cfg.initial_lr, weight_decay=cfg.weight_decay ) gm = GradManager() if dist.get_world_size() > 1: gm.attach( model.parameters(), callbacks=[dist.make_allreduce_cb("SUM", dist.WORLD)], ) else: gm.attach(model.parameters()) if dist.get_world_size() > 1: dist.bcast_list_(model.parameters(), dist.WORLD) # sync parameters # Build train datasets logger.info("preparing dataset..") ann_file = os.path.join( cfg.data_root, "annotations", "person_keypoints_train2017.json" ) train_dataset = COCOJoints( cfg.data_root, ann_file, image_set="train2017", order=("image", "keypoints", "boxes", "info"), ) logger.info("Num of Samples: {}".format(len(train_dataset))) train_sampler = data.RandomSampler( train_dataset, batch_size=cfg.batch_size, drop_last=True ) transforms = [ T.Normalize(mean=cfg.img_mean, std=cfg.img_std), RandomHorizontalFlip(0.5, keypoint_flip_order=cfg.keypoint_flip_order) ] if cfg.half_body_transform: transforms.append( HalfBodyTransform( cfg.upper_body_ids, cfg.lower_body_ids, cfg.prob_half_body ) ) if cfg.extend_boxes: transforms.append( ExtendBoxes(cfg.x_ext, cfg.y_ext, cfg.input_shape[1] / cfg.input_shape[0]) ) transforms += [ RandomBoxAffine( degrees=cfg.rotate_range, scale=cfg.scale_range, output_shape=cfg.input_shape, rotate_prob=cfg.rotation_prob, scale_prob=cfg.scale_prob, ) ] transforms += [T.ToMode()] train_queue = data.DataLoader( train_dataset, sampler=train_sampler, num_workers=args.workers, transform=T.Compose(transforms=transforms, order=train_dataset.order,), collator=HeatmapCollator( cfg.input_shape, cfg.output_shape, cfg.keypoint_num, cfg.heat_thr, cfg.heat_kernels if args.multi_scale_supervision else cfg.heat_kernels[-1:], cfg.heat_range, ), ) # Start training for epoch in range(start_epoch, cfg.epochs): loss = train(model, train_queue, optimizer, gm, epoch=epoch) logger.info("Epoch %d Train %.6f ", epoch, loss) if rank == 0 and epoch % cfg.save_freq == 0: # save checkpoint mge.save( {"epoch": epoch + 1, "state_dict": model.state_dict()}, os.path.join(save_dir, "epoch_{}.pkl".format(epoch)), )
def worker(rank, world_size, args): if world_size > 1: # Initialize distributed process group logger.info("init distributed process group {} / {}".format( rank, world_size)) dist.init_process_group( master_ip="localhost", master_port=23456, world_size=world_size, rank=rank, dev=rank, ) model_name = "{}_{}x{}".format(args.arch, cfg.input_shape[0], cfg.input_shape[1]) save_dir = os.path.join(args.save, model_name) model = getattr(M, args.arch)(pretrained=args.pretrained) model.train() start_epoch = 0 if args.c is not None: file = mge.load(args.c) model.load_state_dict(file["state_dict"]) start_epoch = file["epoch"] optimizer = optim.Adam( model.parameters(requires_grad=True), lr=args.lr, weight_decay=cfg.weight_decay, ) # Build train datasets logger.info("preparing dataset..") train_dataset = COCOJoints( args.data_root, args.ann_file, image_set="train", order=("image", "keypoints", "boxes", "info"), ) train_sampler = data.RandomSampler(train_dataset, batch_size=args.batch_size, drop_last=True) transforms = [T.Normalize(mean=cfg.IMG_MEAN, std=cfg.IMG_STD)] if cfg.half_body_transform: transforms.append( HalfBodyTransform(cfg.upper_body_ids, cfg.lower_body_ids, cfg.prob_half_body)) if cfg.extend_boxes: transforms.append( ExtendBoxes(cfg.x_ext, cfg.y_ext, cfg.input_shape[1] / cfg.input_shape[0])) transforms += [ RandomHorizontalFlip(0.5, keypoint_flip_order=cfg.keypoint_flip_order) ] transforms += [ RandomBoxAffine( degrees=cfg.rotate_range, scale=cfg.scale_range, output_shape=cfg.input_shape, rotate_prob=cfg.rotation_prob, scale_prob=cfg.scale_prob, ) ] transforms += [T.ToMode()] train_queue = data.DataLoader( train_dataset, sampler=train_sampler, num_workers=args.workers, transform=T.Compose( transforms=transforms, order=train_dataset.order, ), collator=HeatmapCollator( cfg.input_shape, cfg.output_shape, cfg.keypoint_num, cfg.heat_thre, cfg.heat_kernel if args.multi_scale_supervision else cfg.heat_kernel[-1:], cfg.heat_range, ), ) # Start training for epoch in range(start_epoch, args.epochs): loss = train(model, train_queue, optimizer, args, epoch=epoch) logger.info("Epoch %d Train %.6f ", epoch, loss) if rank == 0: # save checkpoint mge.save( { "epoch": epoch + 1, "state_dict": model.state_dict(), }, os.path.join(save_dir, "epoch_{}.pkl".format(epoch)), )
def worker(rank, world_size, args): # pylint: disable=too-many-statements if world_size > 1: # Initialize distributed process group logger.info("init distributed process group {} / {}".format( rank, world_size)) dist.init_process_group( master_ip="localhost", master_port=23456, world_size=world_size, rank=rank, dev=rank, ) save_dir = os.path.join(args.save, args.arch + "." + args.mode) if not os.path.exists(save_dir): os.makedirs(save_dir, exist_ok=True) mge.set_log_file(os.path.join(save_dir, "log.txt")) model = models.__dict__[args.arch]() cfg = config.get_finetune_config(args.arch) cfg.LEARNING_RATE *= world_size # scale learning rate in distributed training total_batch_size = cfg.BATCH_SIZE * world_size steps_per_epoch = 1280000 // total_batch_size total_steps = steps_per_epoch * cfg.EPOCHS if args.mode != "normal": Q.quantize_qat(model, Q.ema_fakequant_qconfig) if args.checkpoint: logger.info("Load pretrained weights from %s", args.checkpoint) ckpt = mge.load(args.checkpoint) ckpt = ckpt["state_dict"] if "state_dict" in ckpt else ckpt model.load_state_dict(ckpt, strict=False) if args.mode == "quantized": raise ValueError("mode = quantized only used during inference") Q.quantize(model) optimizer = optim.SGD( get_parameters(model, cfg), lr=cfg.LEARNING_RATE, momentum=cfg.MOMENTUM, ) # Define train and valid graph @jit.trace(symbolic=True) def train_func(image, label): model.train() logits = model(image) loss = F.cross_entropy_with_softmax(logits, label, label_smooth=0.1) acc1, acc5 = F.accuracy(logits, label, (1, 5)) optimizer.backward(loss) # compute gradients if dist.is_distributed(): # all_reduce_mean loss = dist.all_reduce_sum(loss, "train_loss") / dist.get_world_size() acc1 = dist.all_reduce_sum(acc1, "train_acc1") / dist.get_world_size() acc5 = dist.all_reduce_sum(acc5, "train_acc5") / dist.get_world_size() return loss, acc1, acc5 @jit.trace(symbolic=True) def valid_func(image, label): model.eval() logits = model(image) loss = F.cross_entropy_with_softmax(logits, label, label_smooth=0.1) acc1, acc5 = F.accuracy(logits, label, (1, 5)) if dist.is_distributed(): # all_reduce_mean loss = dist.all_reduce_sum(loss, "valid_loss") / dist.get_world_size() acc1 = dist.all_reduce_sum(acc1, "valid_acc1") / dist.get_world_size() acc5 = dist.all_reduce_sum(acc5, "valid_acc5") / dist.get_world_size() return loss, acc1, acc5 # Build train and valid datasets logger.info("preparing dataset..") train_dataset = data.dataset.ImageNet(args.data, train=True) train_sampler = data.Infinite( data.RandomSampler(train_dataset, batch_size=cfg.BATCH_SIZE, drop_last=True)) train_queue = data.DataLoader( train_dataset, sampler=train_sampler, transform=T.Compose([ T.RandomResizedCrop(224), T.RandomHorizontalFlip(), cfg.COLOR_JITTOR, T.Normalize(mean=128), T.ToMode("CHW"), ]), num_workers=args.workers, ) train_queue = iter(train_queue) valid_dataset = data.dataset.ImageNet(args.data, train=False) valid_sampler = data.SequentialSampler(valid_dataset, batch_size=100, drop_last=False) valid_queue = data.DataLoader( valid_dataset, sampler=valid_sampler, transform=T.Compose([ T.Resize(256), T.CenterCrop(224), T.Normalize(mean=128), T.ToMode("CHW"), ]), num_workers=args.workers, ) def adjust_learning_rate(step, epoch): learning_rate = cfg.LEARNING_RATE if cfg.SCHEDULER == "Linear": learning_rate *= 1 - float(step) / total_steps elif cfg.SCHEDULER == "Multistep": learning_rate *= cfg.SCHEDULER_GAMMA**bisect.bisect_right( cfg.SCHEDULER_STEPS, epoch) else: raise ValueError(cfg.SCHEDULER) for param_group in optimizer.param_groups: param_group["lr"] = learning_rate return learning_rate # Start training objs = AverageMeter("Loss") top1 = AverageMeter("Acc@1") top5 = AverageMeter("Acc@5") total_time = AverageMeter("Time") t = time.time() for step in range(0, total_steps): # Linear learning rate decay epoch = step // steps_per_epoch learning_rate = adjust_learning_rate(step, epoch) image, label = next(train_queue) image = image.astype("float32") label = label.astype("int32") n = image.shape[0] optimizer.zero_grad() loss, acc1, acc5 = train_func(image, label) optimizer.step() top1.update(100 * acc1.numpy()[0], n) top5.update(100 * acc5.numpy()[0], n) objs.update(loss.numpy()[0], n) total_time.update(time.time() - t) t = time.time() if step % args.report_freq == 0 and rank == 0: logger.info("TRAIN e%d %06d %f %s %s %s %s", epoch, step, learning_rate, objs, top1, top5, total_time) objs.reset() top1.reset() top5.reset() total_time.reset() if step % 10000 == 0 and rank == 0: logger.info("SAVING %06d", step) mge.save( { "step": step, "state_dict": model.state_dict() }, os.path.join(save_dir, "checkpoint.pkl"), ) if step % 10000 == 0 and step != 0: _, valid_acc, valid_acc5 = infer(valid_func, valid_queue, args) logger.info("TEST %06d %f, %f", step, valid_acc, valid_acc5) mge.save({ "step": step, "state_dict": model.state_dict() }, os.path.join(save_dir, "checkpoint-final.pkl")) _, valid_acc, valid_acc5 = infer(valid_func, valid_queue, args) logger.info("TEST %06d %f, %f", step, valid_acc, valid_acc5)
def worker(master_ip, port, world_size, rank, configs): if world_size > 1: dist.init_process_group( master_ip=master_ip, port=port, world_size=world_size, rank=rank, device=rank, ) logger.info("init process group for gpu{} done".format(rank)) # set up logger os.makedirs(configs["base_dir"], exist_ok=True) worklog_path = os.path.join(configs["base_dir"], "worklog.txt") mge.set_log_file(worklog_path) # prepare model-related components model = FaceRecognitionModel(configs) # prepare data-related components preprocess = T.Compose([T.Normalize(mean=127.5, std=128), T.ToMode("CHW")]) augment = T.Compose([T.RandomHorizontalFlip()]) train_dataset = get_train_dataset(configs["dataset"], dataset_dir=configs["dataset_dir"]) train_sampler = data.RandomSampler(train_dataset, batch_size=configs["batch_size"], drop_last=True) train_queue = data.DataLoader(train_dataset, sampler=train_sampler, transform=T.Compose([augment, preprocess])) # prepare optimize-related components configs["learning_rate"] = configs["learning_rate"] * dist.get_world_size() if dist.get_world_size() > 1: dist.bcast_list_(model.parameters()) gm = ad.GradManager().attach( model.parameters(), callbacks=[dist.make_allreduce_cb("mean")]) else: gm = ad.GradManager().attach(model.parameters()) opt = optim.SGD( model.parameters(), lr=configs["learning_rate"], momentum=configs["momentum"], weight_decay=configs["weight_decay"], ) # try to load checkpoint model, start_epoch = try_load_latest_checkpoint(model, configs["base_dir"]) # do training def train_one_epoch(): def train_func(images, labels): opt.clear_grad() with gm: loss, accuracy, _ = model(images, labels) gm.backward(loss) if dist.is_distributed(): # all_reduce_mean loss = dist.functional.all_reduce_sum( loss) / dist.get_world_size() accuracy = dist.functional.all_reduce_sum( accuracy) / dist.get_world_size() opt.step() return loss, accuracy model.train() average_loss = AverageMeter("loss") average_accuracy = AverageMeter("accuracy") data_time = AverageMeter("data_time") train_time = AverageMeter("train_time") total_step = len(train_queue) data_iter = iter(train_queue) for step in range(total_step): # get next batch of data data_tic = time.time() images, labels = next(data_iter) data_toc = time.time() # forward pass & backward pass train_tic = time.time() images = mge.tensor(images, dtype="float32") labels = mge.tensor(labels, dtype="int32") loss, accuracy = train_func(images, labels) train_toc = time.time() # do the statistics and logging n = images.shape[0] average_loss.update(loss.item(), n) average_accuracy.update(accuracy.item() * 100, n) data_time.update(data_toc - data_tic) train_time.update(train_toc - train_tic) if step % configs["log_interval"] == 0 and dist.get_rank() == 0: logger.info( "epoch: %d, step: %d, %s, %s, %s, %s", epoch, step, average_loss, average_accuracy, data_time, train_time, ) for epoch in range(start_epoch, configs["num_epoch"]): adjust_learning_rate(opt, epoch, configs) train_one_epoch() if dist.get_rank() == 0: checkpoint_path = os.path.join(configs["base_dir"], f"epoch-{epoch+1}-checkpoint.pkl") mge.save( { "epoch": epoch + 1, "state_dict": model.state_dict() }, checkpoint_path, )
def worker(world_size, args): # pylint: disable=too-many-statements rank = dist.get_rank() if world_size > 1: # Initialize distributed process group logger.info("init distributed process group {} / {}".format(rank, world_size)) save_dir = os.path.join(args.save, args.arch + "." + "calibration") if not os.path.exists(save_dir): os.makedirs(save_dir, exist_ok=True) mge.set_log_file(os.path.join(save_dir, "log.txt")) model = models.__dict__[args.arch]() # load calibration model assert args.checkpoint logger.info("Load pretrained weights from %s", args.checkpoint) ckpt = mge.load(args.checkpoint) ckpt = ckpt["state_dict"] if "state_dict" in ckpt else ckpt model.load_state_dict(ckpt, strict=False) # Build valid datasets valid_dataset = data.dataset.ImageNet(args.data, train=False) valid_sampler = data.SequentialSampler( valid_dataset, batch_size=100, drop_last=False ) valid_queue = data.DataLoader( valid_dataset, sampler=valid_sampler, transform=T.Compose( [T.Resize(256), T.CenterCrop(224), T.Normalize(mean=128), T.ToMode("CHW")] ), num_workers=args.workers, ) # calibration model.fc.disable_quantize() model = quantize_qat(model, qconfig=Q.calibration_qconfig) # calculate scale def calculate_scale(image, label): model.eval() enable_observer(model) logits = model(image) loss = F.loss.cross_entropy(logits, label, label_smooth=0.1) acc1, acc5 = F.topk_accuracy(logits, label, (1, 5)) if dist.is_distributed(): # all_reduce_mean loss = dist.functional.all_reduce_sum(loss) / dist.get_world_size() acc1 = dist.functional.all_reduce_sum(acc1) / dist.get_world_size() acc5 = dist.functional.all_reduce_sum(acc5) / dist.get_world_size() return loss, acc1, acc5 infer(calculate_scale, valid_queue, args) # quantized model = quantize(model) # eval quantized model def eval_func(image, label): model.eval() logits = model(image) loss = F.loss.cross_entropy(logits, label, label_smooth=0.1) acc1, acc5 = F.topk_accuracy(logits, label, (1, 5)) if dist.is_distributed(): # all_reduce_mean loss = dist.functional.all_reduce_sum(loss) / dist.get_world_size() acc1 = dist.functional.all_reduce_sum(acc1) / dist.get_world_size() acc5 = dist.functional.all_reduce_sum(acc5) / dist.get_world_size() return loss, acc1, acc5 _, valid_acc, valid_acc5 = infer(eval_func, valid_queue, args) logger.info("TEST %f, %f", valid_acc, valid_acc5) # save quantized model mge.save( {"step": -1, "state_dict": model.state_dict()}, os.path.join(save_dir, "checkpoint-calibration.pkl"), ) logger.info( "save in {}".format(os.path.join(save_dir, "checkpoint-calibration.pkl")) )
def worker(rank, world_size, args): if world_size > 1: # Initialize distributed process group logger.info("init distributed process group {} / {}".format( rank, world_size)) dist.init_process_group( master_ip="localhost", master_port=23456, world_size=world_size, rank=rank, dev=rank, ) save_dir = os.path.join(args.save, args.arch) model = getattr(M, args.arch)() optimizer = optim.SGD( model.parameters(requires_grad=True), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay, ) scheduler = optim.MultiStepLR(optimizer, [30, 60, 80]) # Define train and valid graph @jit.trace(symbolic=True) def train_func(image, label): model.train() logits = model(image) loss = F.cross_entropy_with_softmax(logits, label) acc1, acc5 = F.accuracy(logits, label, (1, 5)) optimizer.backward(loss) # compute gradients if dist.is_distributed(): # all_reduce_mean loss = dist.all_reduce_sum(loss, "train_loss") / dist.get_world_size() acc1 = dist.all_reduce_sum(acc1, "train_acc1") / dist.get_world_size() acc5 = dist.all_reduce_sum(acc5, "train_acc5") / dist.get_world_size() return loss, acc1, acc5 @jit.trace(symbolic=True) def valid_func(image, label): model.eval() logits = model(image) loss = F.cross_entropy_with_softmax(logits, label) acc1, acc5 = F.accuracy(logits, label, (1, 5)) if dist.is_distributed(): # all_reduce_mean loss = dist.all_reduce_sum(loss, "valid_loss") / dist.get_world_size() acc1 = dist.all_reduce_sum(acc1, "valid_acc1") / dist.get_world_size() acc5 = dist.all_reduce_sum(acc5, "valid_acc5") / dist.get_world_size() return loss, acc1, acc5 # Build train and valid datasets logger.info("preparing dataset..") train_dataset = data.dataset.ImageNet(args.data, train=True) train_sampler = data.RandomSampler(train_dataset, batch_size=args.batch_size, drop_last=True) train_queue = data.DataLoader( train_dataset, sampler=train_sampler, transform=T.Compose([ # Baseline Augmentation for small models T.RandomResizedCrop(224), T.RandomHorizontalFlip(), T.Normalize(mean=[103.530, 116.280, 123.675], std=[57.375, 57.120, 58.395]), # BGR T.ToMode("CHW"), ]) if args.arch in ("resnet18", "resnet34") else T.Compose( [ # Facebook Augmentation for large models T.RandomResizedCrop(224), T.RandomHorizontalFlip(), T.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), T.Lighting(0.1), T.Normalize(mean=[103.530, 116.280, 123.675], std=[57.375, 57.120, 58.395]), # BGR T.ToMode("CHW"), ]), num_workers=args.workers, ) valid_dataset = data.dataset.ImageNet(args.data, train=False) valid_sampler = data.SequentialSampler(valid_dataset, batch_size=100, drop_last=False) valid_queue = data.DataLoader( valid_dataset, sampler=valid_sampler, transform=T.Compose([ T.Resize(256), T.CenterCrop(224), T.Normalize(mean=[103.530, 116.280, 123.675], std=[57.375, 57.120, 58.395]), # BGR T.ToMode("CHW"), ]), num_workers=args.workers, ) # Start training top1_acc = 0 for epoch in range(0, args.epochs): logger.info("Epoch %d LR %.3e", epoch, scheduler.get_lr()[0]) _, train_acc, train_acc5 = train(train_func, train_queue, optimizer, args, epoch=epoch) logger.info("Epoch %d Train %.3f / %.3f", epoch, train_acc, train_acc5) _, valid_acc, valid_acc5 = infer(valid_func, valid_queue, args, epoch=epoch) logger.info("Epoch %d Valid %.3f / %.3f", epoch, valid_acc, valid_acc5) scheduler.step() if rank == 0: # save checkpoint mge.save( { "epoch": epoch + 1, "state_dict": model.state_dict(), "accuracy": valid_acc, }, os.path.join(save_dir, "checkpoint.pkl"), ) if valid_acc > top1_acc: top1_acc = valid_acc shutil.copy( os.path.join(save_dir, "checkpoint.pkl"), os.path.join(save_dir, "model_best.pkl"), )
def worker(world_size, args): # pylint: disable=too-many-statements rank = dist.get_rank() if world_size > 1: logger.info("init distributed process group {} / {}".format( rank, world_size)) save_dir = os.path.join(args.save, args.arch + "." + args.mode) if not os.path.exists(save_dir): os.makedirs(save_dir, exist_ok=True) mge.set_log_file(os.path.join(save_dir, "log.txt")) model = models.__dict__[args.arch]() cfg = config.get_config(args.arch) cfg.LEARNING_RATE *= world_size # scale learning rate in distributed training total_batch_size = cfg.BATCH_SIZE * world_size steps_per_epoch = 1280000 // total_batch_size total_steps = steps_per_epoch * cfg.EPOCHS if args.mode != "normal": quantize_qat(model, qconfig=Q.ema_fakequant_qconfig) if world_size > 1: # Sync parameters dist.bcast_list_(model.parameters(), dist.WORLD) # Autodiff gradient manager gm = autodiff.GradManager().attach( model.parameters(), callbacks=dist.make_allreduce_cb("MEAN") if world_size > 1 else None, ) optimizer = optim.SGD( get_parameters(model, cfg), lr=cfg.LEARNING_RATE, momentum=cfg.MOMENTUM, ) # Define train and valid graph def train_func(image, label): with gm: model.train() logits = model(image) loss = F.loss.cross_entropy(logits, label, label_smooth=0.1) acc1, acc5 = F.topk_accuracy(logits, label, (1, 5)) gm.backward(loss) optimizer.step().clear_grad() return loss, acc1, acc5 def valid_func(image, label): model.eval() logits = model(image) loss = F.loss.cross_entropy(logits, label, label_smooth=0.1) acc1, acc5 = F.topk_accuracy(logits, label, (1, 5)) return loss, acc1, acc5 # Build train and valid datasets logger.info("preparing dataset..") train_dataset = data.dataset.ImageNet(args.data, train=True) train_sampler = data.Infinite( data.RandomSampler(train_dataset, batch_size=cfg.BATCH_SIZE, drop_last=True)) train_queue = data.DataLoader( train_dataset, sampler=train_sampler, transform=T.Compose([ T.RandomResizedCrop(224), T.RandomHorizontalFlip(), cfg.COLOR_JITTOR, T.Normalize(mean=128), T.ToMode("CHW"), ]), num_workers=args.workers, ) train_queue = iter(train_queue) valid_dataset = data.dataset.ImageNet(args.data, train=False) valid_sampler = data.SequentialSampler(valid_dataset, batch_size=100, drop_last=False) valid_queue = data.DataLoader( valid_dataset, sampler=valid_sampler, transform=T.Compose([ T.Resize(256), T.CenterCrop(224), T.Normalize(mean=128), T.ToMode("CHW") ]), num_workers=args.workers, ) def adjust_learning_rate(step, epoch): learning_rate = cfg.LEARNING_RATE if cfg.SCHEDULER == "Linear": learning_rate *= 1 - float(step) / total_steps elif cfg.SCHEDULER == "Multistep": learning_rate *= cfg.SCHEDULER_GAMMA**bisect.bisect_right( cfg.SCHEDULER_STEPS, epoch) else: raise ValueError(cfg.SCHEDULER) for param_group in optimizer.param_groups: param_group["lr"] = learning_rate return learning_rate # Start training objs = AverageMeter("Loss") top1 = AverageMeter("Acc@1") top5 = AverageMeter("Acc@5") total_time = AverageMeter("Time") t = time.time() for step in range(0, total_steps): # Linear learning rate decay epoch = step // steps_per_epoch learning_rate = adjust_learning_rate(step, epoch) image, label = next(train_queue) image = mge.tensor(image, dtype="float32") label = mge.tensor(label, dtype="int32") n = image.shape[0] loss, acc1, acc5 = train_func(image, label) top1.update(100 * acc1.numpy()[0], n) top5.update(100 * acc5.numpy()[0], n) objs.update(loss.numpy()[0], n) total_time.update(time.time() - t) t = time.time() if step % args.report_freq == 0 and rank == 0: logger.info( "TRAIN e%d %06d %f %s %s %s %s", epoch, step, learning_rate, objs, top1, top5, total_time, ) objs.reset() top1.reset() top5.reset() total_time.reset() if step != 0 and step % 10000 == 0 and rank == 0: logger.info("SAVING %06d", step) mge.save( { "step": step, "state_dict": model.state_dict() }, os.path.join(save_dir, "checkpoint.pkl"), ) if step % 10000 == 0 and step != 0: _, valid_acc, valid_acc5 = infer(valid_func, valid_queue, args) logger.info("TEST %06d %f, %f", step, valid_acc, valid_acc5) mge.save( { "step": step, "state_dict": model.state_dict() }, os.path.join(save_dir, "checkpoint-final.pkl"), ) _, valid_acc, valid_acc5 = infer(valid_func, valid_queue, args) logger.info("TEST %06d %f, %f", step, valid_acc, valid_acc5)
# ------------------------------------------------------------------------------ import megengine.data as data import megengine.data.transform as T import megengine.optimizer as optim import megengine_mimicry as mmc import megengine_mimicry.nets.dcgan.dcgan_cifar as dcgan dataset = mmc.datasets.load_dataset(root=None, name='cifar10') dataloader = data.DataLoader(dataset, sampler=data.Infinite( data.RandomSampler(dataset, batch_size=64, drop_last=True)), transform=T.Compose( [T.Normalize(std=255), T.ToMode("CHW")]), num_workers=4) netG = dcgan.DCGANGeneratorCIFAR() netD = dcgan.DCGANDiscriminatorCIFAR() optD = optim.Adam(netD.parameters(), 2e-4, betas=(0.0, 0.9)) optG = optim.Adam(netG.parameters(), 2e-4, betas=(0.0, 0.9)) LOG_DIR = "./log/dcgan_example" trainer = mmc.training.Trainer(netD=netD, netG=netG, optD=optD, optG=optG, n_dis=5,
# This file has been modified by Megvii ("Megvii Modifications"). # All Megvii Modifications are Copyright (C) 2014-2019 Megvii Inc. All rights reserved. # ------------------------------------------------------------------------------ import megengine.data as data import megengine.data.transform as T import megengine.optimizer as optim import megengine_mimicry as mmc import megengine_mimicry.nets.wgan.wgan_cifar as wgan dataset = mmc.datasets.load_dataset(root=None, name='cifar10') dataloader = data.DataLoader( dataset, sampler=data.Infinite( data.RandomSampler(dataset, batch_size=64, drop_last=True)), transform=T.Compose([T.Normalize(mean=127, std=127), T.ToMode("CHW")]), num_workers=4) netG = wgan.WGANGeneratorCIFAR() netD = wgan.WGANDiscriminatorCIFAR() optD = optim.Adam(netD.parameters(), 2e-4, betas=(0.0, 0.9)) optG = optim.Adam(netG.parameters(), 2e-4, betas=(0.0, 0.9)) LOG_DIR = "./log/wgan_example" trainer = mmc.training.Trainer(netD=netD, netG=netG, optD=optD, optG=optG, n_dis=5,
def worker(rank, world_size, args): # pylint: disable=too-many-statements if world_size > 1: # Initialize distributed process group logger.info("init distributed process group {} / {}".format(rank, world_size)) dist.init_process_group( master_ip="localhost", master_port=23456, world_size=world_size, rank=rank, dev=rank, ) save_dir = os.path.join(args.save, args.arch + "." + args.mode) if not os.path.exists(save_dir): os.makedirs(save_dir, exist_ok=True) mge.set_log_file(os.path.join(save_dir, "log.txt")) model = models.__dict__[args.arch]() cfg = config.get_finetune_config(args.arch) cfg.LEARNING_RATE *= world_size # scale learning rate in distributed training total_batch_size = cfg.BATCH_SIZE * world_size steps_per_epoch = 1280000 // total_batch_size total_steps = steps_per_epoch * cfg.EPOCHS # load calibration model assert args.checkpoint logger.info("Load pretrained weights from %s", args.checkpoint) ckpt = mge.load(args.checkpoint) ckpt = ckpt["state_dict"] if "state_dict" in ckpt else ckpt model.load_state_dict(ckpt, strict=False) # Build valid datasets valid_dataset = data.dataset.ImageNet(args.data, train=False) # valid_dataset = ImageNetNoriDataset(args.data) valid_sampler = data.SequentialSampler( valid_dataset, batch_size=100, drop_last=False ) valid_queue = data.DataLoader( valid_dataset, sampler=valid_sampler, transform=T.Compose( [ T.Resize(256), T.CenterCrop(224), T.Normalize(mean=128), T.ToMode("CHW"), ] ), num_workers=args.workers, ) # calibration model.fc.disable_quantize() model = quantize_qat(model, qconfig=Q.calibration_qconfig) # calculate scale @jit.trace(symbolic=True) def calculate_scale(image, label): model.eval() enable_observer(model) logits = model(image) loss = F.cross_entropy_with_softmax(logits, label, label_smooth=0.1) acc1, acc5 = F.accuracy(logits, label, (1, 5)) if dist.is_distributed(): # all_reduce_mean loss = dist.all_reduce_sum(loss, "valid_loss") / dist.get_world_size() acc1 = dist.all_reduce_sum(acc1, "valid_acc1") / dist.get_world_size() acc5 = dist.all_reduce_sum(acc5, "valid_acc5") / dist.get_world_size() return loss, acc1, acc5 # model.fc.disable_quantize() infer(calculate_scale, valid_queue, args) # quantized model = quantize(model) # eval quantized model @jit.trace(symbolic=True) def eval_func(image, label): model.eval() logits = model(image) loss = F.cross_entropy_with_softmax(logits, label, label_smooth=0.1) acc1, acc5 = F.accuracy(logits, label, (1, 5)) if dist.is_distributed(): # all_reduce_mean loss = dist.all_reduce_sum(loss, "valid_loss") / dist.get_world_size() acc1 = dist.all_reduce_sum(acc1, "valid_acc1") / dist.get_world_size() acc5 = dist.all_reduce_sum(acc5, "valid_acc5") / dist.get_world_size() return loss, acc1, acc5 _, valid_acc, valid_acc5 = infer(eval_func, valid_queue, args) logger.info("TEST %f, %f", valid_acc, valid_acc5) # save quantized model mge.save( {"step": -1, "state_dict": model.state_dict()}, os.path.join(save_dir, "checkpoint-calibration.pkl") ) logger.info("save in {}".format(os.path.join(save_dir, "checkpoint-calibration.pkl")))
def fetch_input_transform(): transformer = T.Compose([T.ToMode(), T.Normalize(mean=0, std=255.0)]) return transformer