def main(): nway = 5 batch_size = 32 train_loader, val_loader = build_dataset(nway=nway, batch_size=batch_size) model = OmniglotFC(28 * 28, nway) model.train() maml = MAML(model) loss_fn = F.cross_entropy_with_softmax opt = optim.Adam(maml.trainable_params, lr=0.003) accuracy = F.accuracy adapt_data = meg.tensor(dtype='float32') adapt_label = meg.tensor(dtype='int32') eval_data = meg.tensor(dtype='float32') eval_label = meg.tensor(dtype='int32') iteration = 0 for ep in range(500): for (images_support, labels_support, images_query, labels_query) in train_loader: opt.zero_grad() meta_train_error = 0.0 meta_train_accuracy = 0.0 for i in range(batch_size): (image_support, label_support, image_query, label_query) = (images_support[i], labels_support[i], images_query[i], labels_query[i]) adapt_data.set_value(np.squeeze(image_support, 1)) adapt_label.set_value(np.squeeze(label_support, 1)) loss = loss_fn(model.forward(adapt_data), adapt_label) gradients = F.grad(loss, maml.trainable_params, use_virtual_grad=False, return_zero_for_nodep=False) fast_weights = [ p - 0.5 * g for p, g in zip(maml.trainable_params, gradients) ] maml.replace_fast_parameter(fast_weights) # Evaluate the adapted model eval_data.set_value(np.squeeze(image_query, 1)) eval_label.set_value(np.squeeze(label_query, 1)) predictions = model.forward(eval_data) valid_error = loss_fn(predictions, eval_label) valid_accuracy = accuracy(predictions, eval_label) opt.backward(valid_error) meta_train_error += valid_error.numpy().item() meta_train_accuracy += valid_accuracy.numpy().item() # for p in maml.trainable_params: # p.grad = p.grad * (1.0 / batch_size) opt.step() print('Iteration', iteration) print('Meta Train Error', meta_train_error / batch_size) print('Meta Train Accuracy', meta_train_accuracy / batch_size) iteration += 1
total_steps += 1 result = { "train_loss": sum_loss / total_steps, "train_accuracy": sum_accuracy / total_examples, } logger.info("***** Train results *****") for key in sorted(result.keys()): logger.info("%s = %s", key, str(result[key])) if __name__ == "__main__": bert, config, vocab_file = create_hub_bert(args.pretrained_bert, pretrained=True) args.vocab_file = vocab_file model = BertForSequenceClassification(config, num_labels=2, bert=bert) mrpc_dataset = MRPCDataset(args) optimizer = optim.Adam( model.parameters(requires_grad=True), lr=args.learning_rate, ) mrpc_dataset = MRPCDataset(args) train_dataloader, train_size = mrpc_dataset.get_train_dataloader() eval_dataloader, eval_size = mrpc_dataset.get_eval_dataloader() for epoch in range(args.num_train_epochs): logger.info("***** Epoch {} *****".format(epoch + 1)) train(train_dataloader, model, optimizer) mge.save(model.state_dict(), args.save_model_path) eval(eval_dataloader, model)
#Pad(2), # 'CHW'表示把图片由 (height, width, channel) 格式转换成 (channel, height, width) 格式 #ToMode('CHW'), ])) mnist_test_dataloader = DataLoader( dataset=mnist_test_dataset, sampler=sequential_sampler, ) # model from model import get_net net = get_net() optimizer = optim.Adam( net.parameters(), lr=0.01, ) def get_kl_divergence(mean, var): return 1 / 2 * (mean**2 + var - F.log(var) - 1).sum(axis=1).mean() data = mge.tensor() label = mge.tensor(dtype="float32") code = mge.tensor(dtype="float32") onehot = mge.tensor(dtype="int32") total_epochs = 256 for epoch in range(total_epochs):
def worker(master_ip, port, rank, world_size, args): if world_size > 1: # Initialize distributed process group logger.info("init distributed process group {} / {}".format(rank, world_size)) dist.init_process_group( master_ip=master_ip, port=port, world_size=world_size, rank=rank, device=rank, ) model_name = "{}_{}x{}".format(args.arch, cfg.input_shape[0], cfg.input_shape[1]) save_dir = os.path.join(args.save, model_name) model = getattr(kpm, args.arch)() model.train() start_epoch = 0 if args.resume is not None: file = mge.load(args.resume) model.load_state_dict(file["state_dict"]) start_epoch = file["epoch"] optimizer = optim.Adam( model.parameters(), lr=cfg.initial_lr, weight_decay=cfg.weight_decay ) gm = GradManager() if dist.get_world_size() > 1: gm.attach( model.parameters(), callbacks=[dist.make_allreduce_cb("SUM", dist.WORLD)], ) else: gm.attach(model.parameters()) if dist.get_world_size() > 1: dist.bcast_list_(model.parameters(), dist.WORLD) # sync parameters # Build train datasets logger.info("preparing dataset..") ann_file = os.path.join( cfg.data_root, "annotations", "person_keypoints_train2017.json" ) train_dataset = COCOJoints( cfg.data_root, ann_file, image_set="train2017", order=("image", "keypoints", "boxes", "info"), ) logger.info("Num of Samples: {}".format(len(train_dataset))) train_sampler = data.RandomSampler( train_dataset, batch_size=cfg.batch_size, drop_last=True ) transforms = [ T.Normalize(mean=cfg.img_mean, std=cfg.img_std), RandomHorizontalFlip(0.5, keypoint_flip_order=cfg.keypoint_flip_order) ] if cfg.half_body_transform: transforms.append( HalfBodyTransform( cfg.upper_body_ids, cfg.lower_body_ids, cfg.prob_half_body ) ) if cfg.extend_boxes: transforms.append( ExtendBoxes(cfg.x_ext, cfg.y_ext, cfg.input_shape[1] / cfg.input_shape[0]) ) transforms += [ RandomBoxAffine( degrees=cfg.rotate_range, scale=cfg.scale_range, output_shape=cfg.input_shape, rotate_prob=cfg.rotation_prob, scale_prob=cfg.scale_prob, ) ] transforms += [T.ToMode()] train_queue = data.DataLoader( train_dataset, sampler=train_sampler, num_workers=args.workers, transform=T.Compose(transforms=transforms, order=train_dataset.order,), collator=HeatmapCollator( cfg.input_shape, cfg.output_shape, cfg.keypoint_num, cfg.heat_thr, cfg.heat_kernels if args.multi_scale_supervision else cfg.heat_kernels[-1:], cfg.heat_range, ), ) # Start training for epoch in range(start_epoch, cfg.epochs): loss = train(model, train_queue, optimizer, gm, epoch=epoch) logger.info("Epoch %d Train %.6f ", epoch, loss) if rank == 0 and epoch % cfg.save_freq == 0: # save checkpoint mge.save( {"epoch": epoch + 1, "state_dict": model.state_dict()}, os.path.join(save_dir, "epoch_{}.pkl".format(epoch)), )
import megengine_mimicry.nets.dcgan.dcgan_cifar as dcgan dataset = mmc.datasets.load_dataset(root=None, name='cifar10') dataloader = data.DataLoader(dataset, sampler=data.Infinite( data.RandomSampler(dataset, batch_size=64, drop_last=True)), transform=T.Compose( [T.Normalize(std=255), T.ToMode("CHW")]), num_workers=4) netG = dcgan.DCGANGeneratorCIFAR() netD = dcgan.DCGANDiscriminatorCIFAR() optD = optim.Adam(netD.parameters(), 2e-4, betas=(0.0, 0.9)) optG = optim.Adam(netG.parameters(), 2e-4, betas=(0.0, 0.9)) LOG_DIR = "./log/dcgan_example" trainer = mmc.training.Trainer(netD=netD, netG=netG, optD=optD, optG=optG, n_dis=5, num_steps=100000, lr_decay="linear", dataloader=dataloader, log_dir=LOG_DIR, device=0)
def worker(args): # pylint: disable=too-many-statements rank = dist.get_rank() world_size = dist.get_world_size() if rank == 0: os.makedirs(os.path.join(args.save, args.arch), exist_ok=True) megengine.logger.set_log_file(os.path.join(args.save, args.arch, "log.txt")) # init process group # build dataset train_dataloader, valid_dataloader = build_dataset(args) train_queue = iter(train_dataloader) # infinite steps_per_epoch = args.steps_per_epoch # build model model = UNetD(3) # Sync parameters if world_size > 1: dist.bcast_list_(model.parameters(), dist.WORLD) # Autodiff gradient manager gm = autodiff.GradManager().attach( model.parameters(), callbacks=dist.make_allreduce_cb("SUM") if world_size > 1 else None, ) # Optimizer opt = optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay * world_size, # scale weight decay in "SUM" mode ) # mixup def preprocess(image, label): if args.dnd: image, label = MixUp_AUG(image, label) return image, label # train and valid func def train_step(image, label): with gm: logits = model(image) logits = image - logits loss = F.nn.l1_loss(logits, label) gm.backward(loss) opt.step().clear_grad() return loss def valid_step(image, label): pred = model(image) pred = image - pred mae_iter = F.nn.l1_loss(pred, label) psnr_it = batch_PSNR(pred, label) #print(psnr_it.item()) if world_size > 1: mae_iter = F.distributed.all_reduce_sum(mae_iter) / world_size psnr_it = F.distributed.all_reduce_sum(psnr_it) / world_size return mae_iter, psnr_it # multi-step learning rate scheduler with warmup def adjust_learning_rate(step): #lr = 1e-6 + 0.5 * (args.lr - 1e-6)*(1 + np.cos(step/(args.epochs*steps_per_epoch) * np.pi)) lr = args.lr * (np.cos(step / (steps_per_epoch * args.epochs) * np.pi) + 1) / 2 for param_group in opt.param_groups: param_group["lr"] = lr return lr # start training for step in range(0, int(args.epochs * steps_per_epoch)): #print(step) lr = adjust_learning_rate(step) t_step = time.time() image, label = next(train_queue) if step > steps_per_epoch: image, label = preprocess(image, label) image = megengine.tensor(image) label = megengine.tensor(label) t_data = time.time() - t_step loss = train_step(image, label) t_train = time.time() - t_step speed = 1. / t_train if step % args.print_freq == 0 and dist.get_rank() == 0: logging.info( "Epoch {} Step {}, Speed={:.2g} mb/s, dp_cost={:.2g}, Loss={:5.2e}, lr={:.2e}".format( step // int(steps_per_epoch), step, speed, t_data/t_train, loss.item(), lr )) #print(steps_per_epoch) if (step + 1) % steps_per_epoch == 0: model.eval() loss, psnr_v = valid(valid_step, valid_dataloader) model.train() logging.info( "Epoch {} Test mae {:.3f}, psnr {:.3f}".format( (step + 1) // steps_per_epoch, loss.item(), psnr_v.item(), )) megengine.save( { "epoch": (step + 1) // steps_per_epoch, "state_dict": model.state_dict(), }, os.path.join(args.save, args.arch, "checkpoint.pkl"), ) if rank == 0 else None
def worker(rank, world_size, args): if world_size > 1: # Initialize distributed process group logger.info("init distributed process group {} / {}".format( rank, world_size)) dist.init_process_group( master_ip="localhost", master_port=23456, world_size=world_size, rank=rank, dev=rank, ) model_name = "{}_{}x{}".format(args.arch, cfg.input_shape[0], cfg.input_shape[1]) save_dir = os.path.join(args.save, model_name) model = getattr(M, args.arch)(pretrained=args.pretrained) model.train() start_epoch = 0 if args.c is not None: file = mge.load(args.c) model.load_state_dict(file["state_dict"]) start_epoch = file["epoch"] optimizer = optim.Adam( model.parameters(requires_grad=True), lr=args.lr, weight_decay=cfg.weight_decay, ) # Build train datasets logger.info("preparing dataset..") train_dataset = COCOJoints( args.data_root, args.ann_file, image_set="train", order=("image", "keypoints", "boxes", "info"), ) train_sampler = data.RandomSampler(train_dataset, batch_size=args.batch_size, drop_last=True) transforms = [T.Normalize(mean=cfg.IMG_MEAN, std=cfg.IMG_STD)] if cfg.half_body_transform: transforms.append( HalfBodyTransform(cfg.upper_body_ids, cfg.lower_body_ids, cfg.prob_half_body)) if cfg.extend_boxes: transforms.append( ExtendBoxes(cfg.x_ext, cfg.y_ext, cfg.input_shape[1] / cfg.input_shape[0])) transforms += [ RandomHorizontalFlip(0.5, keypoint_flip_order=cfg.keypoint_flip_order) ] transforms += [ RandomBoxAffine( degrees=cfg.rotate_range, scale=cfg.scale_range, output_shape=cfg.input_shape, rotate_prob=cfg.rotation_prob, scale_prob=cfg.scale_prob, ) ] transforms += [T.ToMode()] train_queue = data.DataLoader( train_dataset, sampler=train_sampler, num_workers=args.workers, transform=T.Compose( transforms=transforms, order=train_dataset.order, ), collator=HeatmapCollator( cfg.input_shape, cfg.output_shape, cfg.keypoint_num, cfg.heat_thre, cfg.heat_kernel if args.multi_scale_supervision else cfg.heat_kernel[-1:], cfg.heat_range, ), ) # Start training for epoch in range(start_epoch, args.epochs): loss = train(model, train_queue, optimizer, args, epoch=epoch) logger.info("Epoch %d Train %.6f ", epoch, loss) if rank == 0: # save checkpoint mge.save( { "epoch": epoch + 1, "state_dict": model.state_dict(), }, os.path.join(save_dir, "epoch_{}.pkl".format(epoch)), )