Ejemplo n.º 1
0
def main():
    nway = 5
    batch_size = 32
    train_loader, val_loader = build_dataset(nway=nway, batch_size=batch_size)
    model = OmniglotFC(28 * 28, nway)
    model.train()
    maml = MAML(model)

    loss_fn = F.cross_entropy_with_softmax
    opt = optim.Adam(maml.trainable_params, lr=0.003)
    accuracy = F.accuracy
    adapt_data = meg.tensor(dtype='float32')
    adapt_label = meg.tensor(dtype='int32')
    eval_data = meg.tensor(dtype='float32')
    eval_label = meg.tensor(dtype='int32')
    iteration = 0
    for ep in range(500):
        for (images_support, labels_support, images_query,
             labels_query) in train_loader:
            opt.zero_grad()
            meta_train_error = 0.0
            meta_train_accuracy = 0.0
            for i in range(batch_size):
                (image_support, label_support, image_query,
                 label_query) = (images_support[i], labels_support[i],
                                 images_query[i], labels_query[i])
                adapt_data.set_value(np.squeeze(image_support, 1))
                adapt_label.set_value(np.squeeze(label_support, 1))

                loss = loss_fn(model.forward(adapt_data), adapt_label)
                gradients = F.grad(loss,
                                   maml.trainable_params,
                                   use_virtual_grad=False,
                                   return_zero_for_nodep=False)

                fast_weights = [
                    p - 0.5 * g
                    for p, g in zip(maml.trainable_params, gradients)
                ]

                maml.replace_fast_parameter(fast_weights)
                # Evaluate the adapted model
                eval_data.set_value(np.squeeze(image_query, 1))
                eval_label.set_value(np.squeeze(label_query, 1))

                predictions = model.forward(eval_data)
                valid_error = loss_fn(predictions, eval_label)
                valid_accuracy = accuracy(predictions, eval_label)
                opt.backward(valid_error)
                meta_train_error += valid_error.numpy().item()
                meta_train_accuracy += valid_accuracy.numpy().item()

            # for p in maml.trainable_params:
            #   p.grad = p.grad * (1.0 / batch_size)
            opt.step()
            print('Iteration', iteration)
            print('Meta Train Error', meta_train_error / batch_size)
            print('Meta Train Accuracy', meta_train_accuracy / batch_size)
            iteration += 1
Ejemplo n.º 2
0
        total_steps += 1

    result = {
        "train_loss": sum_loss / total_steps,
        "train_accuracy": sum_accuracy / total_examples,
    }

    logger.info("***** Train results *****")
    for key in sorted(result.keys()):
        logger.info("%s = %s", key, str(result[key]))


if __name__ == "__main__":
    bert, config, vocab_file = create_hub_bert(args.pretrained_bert,
                                               pretrained=True)
    args.vocab_file = vocab_file
    model = BertForSequenceClassification(config, num_labels=2, bert=bert)
    mrpc_dataset = MRPCDataset(args)
    optimizer = optim.Adam(
        model.parameters(requires_grad=True),
        lr=args.learning_rate,
    )
    mrpc_dataset = MRPCDataset(args)
    train_dataloader, train_size = mrpc_dataset.get_train_dataloader()
    eval_dataloader, eval_size = mrpc_dataset.get_eval_dataloader()
    for epoch in range(args.num_train_epochs):
        logger.info("***** Epoch {} *****".format(epoch + 1))
        train(train_dataloader, model, optimizer)
        mge.save(model.state_dict(), args.save_model_path)
        eval(eval_dataloader, model)
Ejemplo n.º 3
0
        #Pad(2),
        # 'CHW'表示把图片由 (height, width, channel) 格式转换成 (channel, height, width) 格式
        #ToMode('CHW'),
    ]))
mnist_test_dataloader = DataLoader(
    dataset=mnist_test_dataset,
    sampler=sequential_sampler,
)

# model
from model import get_net

net = get_net()

optimizer = optim.Adam(
    net.parameters(),
    lr=0.01,
)


def get_kl_divergence(mean, var):

    return 1 / 2 * (mean**2 + var - F.log(var) - 1).sum(axis=1).mean()


data = mge.tensor()
label = mge.tensor(dtype="float32")
code = mge.tensor(dtype="float32")
onehot = mge.tensor(dtype="int32")

total_epochs = 256
for epoch in range(total_epochs):
Ejemplo n.º 4
0
def worker(master_ip, port, rank, world_size, args):
    if world_size > 1:
        # Initialize distributed process group
        logger.info("init distributed process group {} / {}".format(rank, world_size))
        dist.init_process_group(
            master_ip=master_ip,
            port=port,
            world_size=world_size,
            rank=rank,
            device=rank,
        )

    model_name = "{}_{}x{}".format(args.arch, cfg.input_shape[0], cfg.input_shape[1])
    save_dir = os.path.join(args.save, model_name)

    model = getattr(kpm, args.arch)()
    model.train()
    start_epoch = 0
    if args.resume is not None:
        file = mge.load(args.resume)
        model.load_state_dict(file["state_dict"])
        start_epoch = file["epoch"]

    optimizer = optim.Adam(
        model.parameters(), lr=cfg.initial_lr, weight_decay=cfg.weight_decay
    )

    gm = GradManager()
    if dist.get_world_size() > 1:
        gm.attach(
            model.parameters(), callbacks=[dist.make_allreduce_cb("SUM", dist.WORLD)],
        )
    else:
        gm.attach(model.parameters())

    if dist.get_world_size() > 1:
        dist.bcast_list_(model.parameters(), dist.WORLD)  # sync parameters

    # Build train datasets
    logger.info("preparing dataset..")
    ann_file = os.path.join(
        cfg.data_root, "annotations", "person_keypoints_train2017.json"
    )
    train_dataset = COCOJoints(
        cfg.data_root,
        ann_file,
        image_set="train2017",
        order=("image", "keypoints", "boxes", "info"),
    )
    logger.info("Num of Samples: {}".format(len(train_dataset)))
    train_sampler = data.RandomSampler(
        train_dataset, batch_size=cfg.batch_size, drop_last=True
    )

    transforms = [
        T.Normalize(mean=cfg.img_mean, std=cfg.img_std),
        RandomHorizontalFlip(0.5, keypoint_flip_order=cfg.keypoint_flip_order)
    ]

    if cfg.half_body_transform:
        transforms.append(
            HalfBodyTransform(
                cfg.upper_body_ids, cfg.lower_body_ids, cfg.prob_half_body
            )
        )
    if cfg.extend_boxes:
        transforms.append(
            ExtendBoxes(cfg.x_ext, cfg.y_ext, cfg.input_shape[1] / cfg.input_shape[0])
        )

    transforms += [
        RandomBoxAffine(
            degrees=cfg.rotate_range,
            scale=cfg.scale_range,
            output_shape=cfg.input_shape,
            rotate_prob=cfg.rotation_prob,
            scale_prob=cfg.scale_prob,
        )
    ]
    transforms += [T.ToMode()]

    train_queue = data.DataLoader(
        train_dataset,
        sampler=train_sampler,
        num_workers=args.workers,
        transform=T.Compose(transforms=transforms, order=train_dataset.order,),
        collator=HeatmapCollator(
            cfg.input_shape,
            cfg.output_shape,
            cfg.keypoint_num,
            cfg.heat_thr,
            cfg.heat_kernels if args.multi_scale_supervision else cfg.heat_kernels[-1:],
            cfg.heat_range,
        ),
    )

    # Start training
    for epoch in range(start_epoch, cfg.epochs):
        loss = train(model, train_queue, optimizer, gm, epoch=epoch)
        logger.info("Epoch %d Train %.6f ", epoch, loss)

        if rank == 0 and epoch % cfg.save_freq == 0:  # save checkpoint
            mge.save(
                {"epoch": epoch + 1, "state_dict": model.state_dict()},
                os.path.join(save_dir, "epoch_{}.pkl".format(epoch)),
            )
Ejemplo n.º 5
0
import megengine_mimicry.nets.dcgan.dcgan_cifar as dcgan

dataset = mmc.datasets.load_dataset(root=None, name='cifar10')
dataloader = data.DataLoader(dataset,
                             sampler=data.Infinite(
                                 data.RandomSampler(dataset,
                                                    batch_size=64,
                                                    drop_last=True)),
                             transform=T.Compose(
                                 [T.Normalize(std=255),
                                  T.ToMode("CHW")]),
                             num_workers=4)

netG = dcgan.DCGANGeneratorCIFAR()
netD = dcgan.DCGANDiscriminatorCIFAR()
optD = optim.Adam(netD.parameters(), 2e-4, betas=(0.0, 0.9))
optG = optim.Adam(netG.parameters(), 2e-4, betas=(0.0, 0.9))

LOG_DIR = "./log/dcgan_example"

trainer = mmc.training.Trainer(netD=netD,
                               netG=netG,
                               optD=optD,
                               optG=optG,
                               n_dis=5,
                               num_steps=100000,
                               lr_decay="linear",
                               dataloader=dataloader,
                               log_dir=LOG_DIR,
                               device=0)
Ejemplo n.º 6
0
def worker(args):
    # pylint: disable=too-many-statements
    rank = dist.get_rank()
    world_size = dist.get_world_size()
    if rank == 0:
        os.makedirs(os.path.join(args.save, args.arch), exist_ok=True)
        megengine.logger.set_log_file(os.path.join(args.save, args.arch, "log.txt"))
    # init process group

    # build dataset
    train_dataloader, valid_dataloader = build_dataset(args)
    train_queue = iter(train_dataloader)  # infinite
    steps_per_epoch = args.steps_per_epoch

    # build model
    model = UNetD(3)
    # Sync parameters
    if world_size > 1:
        dist.bcast_list_(model.parameters(), dist.WORLD)

    # Autodiff gradient manager
    gm = autodiff.GradManager().attach(
        model.parameters(),
        callbacks=dist.make_allreduce_cb("SUM") if world_size > 1 else None,
    )

    # Optimizer
    opt = optim.Adam(
        model.parameters(),
        lr=args.lr,
        weight_decay=args.weight_decay * world_size,  # scale weight decay in "SUM" mode
    )

    # mixup
    def preprocess(image, label):
        if args.dnd:
            image, label = MixUp_AUG(image, label)
        return image, label

    # train and valid func
    def train_step(image, label):
        with gm:
            logits = model(image)
            logits = image - logits
            loss = F.nn.l1_loss(logits, label)
            gm.backward(loss)
            opt.step().clear_grad()
        return loss

    def valid_step(image, label):
        pred = model(image)
        pred = image - pred
        mae_iter = F.nn.l1_loss(pred, label)
        psnr_it = batch_PSNR(pred, label)
        #print(psnr_it.item())
        if world_size > 1:
            mae_iter = F.distributed.all_reduce_sum(mae_iter) / world_size
            psnr_it = F.distributed.all_reduce_sum(psnr_it) / world_size

        return mae_iter, psnr_it

    # multi-step learning rate scheduler with warmup
    def adjust_learning_rate(step):
        #lr = 1e-6 + 0.5 * (args.lr - 1e-6)*(1 + np.cos(step/(args.epochs*steps_per_epoch) * np.pi))
        lr = args.lr * (np.cos(step / (steps_per_epoch * args.epochs) * np.pi) + 1) / 2
        for param_group in opt.param_groups:
            param_group["lr"] = lr
        return lr

    # start training
    for step in range(0, int(args.epochs * steps_per_epoch)):
        #print(step)
        lr = adjust_learning_rate(step)

        t_step = time.time()

        image, label = next(train_queue)
        if step > steps_per_epoch:
            image, label = preprocess(image, label)
        image = megengine.tensor(image)
        label = megengine.tensor(label)
        t_data = time.time() - t_step
        loss = train_step(image, label)
        t_train = time.time() - t_step
        speed = 1. / t_train
        if step % args.print_freq == 0 and dist.get_rank() == 0:
            logging.info(
                "Epoch {} Step {}, Speed={:.2g} mb/s, dp_cost={:.2g}, Loss={:5.2e}, lr={:.2e}".format(
                step // int(steps_per_epoch),
                step,
                speed,
                t_data/t_train,
                loss.item(),
                lr
            ))
        #print(steps_per_epoch)
        if (step + 1) % steps_per_epoch == 0:
            model.eval()
            loss, psnr_v = valid(valid_step, valid_dataloader)
            model.train()
            logging.info(
                "Epoch {} Test mae {:.3f}, psnr {:.3f}".format(
                (step + 1) // steps_per_epoch,
                loss.item(),
                psnr_v.item(),
            ))
            megengine.save(
                {
                    "epoch": (step + 1) // steps_per_epoch,
                    "state_dict": model.state_dict(),
                },
                os.path.join(args.save, args.arch, "checkpoint.pkl"),
            ) if rank == 0 else None
Ejemplo n.º 7
0
def worker(rank, world_size, args):
    if world_size > 1:
        # Initialize distributed process group
        logger.info("init distributed process group {} / {}".format(
            rank, world_size))
        dist.init_process_group(
            master_ip="localhost",
            master_port=23456,
            world_size=world_size,
            rank=rank,
            dev=rank,
        )

    model_name = "{}_{}x{}".format(args.arch, cfg.input_shape[0],
                                   cfg.input_shape[1])
    save_dir = os.path.join(args.save, model_name)

    model = getattr(M, args.arch)(pretrained=args.pretrained)
    model.train()
    start_epoch = 0
    if args.c is not None:
        file = mge.load(args.c)
        model.load_state_dict(file["state_dict"])
        start_epoch = file["epoch"]

    optimizer = optim.Adam(
        model.parameters(requires_grad=True),
        lr=args.lr,
        weight_decay=cfg.weight_decay,
    )
    # Build train datasets
    logger.info("preparing dataset..")
    train_dataset = COCOJoints(
        args.data_root,
        args.ann_file,
        image_set="train",
        order=("image", "keypoints", "boxes", "info"),
    )
    train_sampler = data.RandomSampler(train_dataset,
                                       batch_size=args.batch_size,
                                       drop_last=True)

    transforms = [T.Normalize(mean=cfg.IMG_MEAN, std=cfg.IMG_STD)]
    if cfg.half_body_transform:
        transforms.append(
            HalfBodyTransform(cfg.upper_body_ids, cfg.lower_body_ids,
                              cfg.prob_half_body))
    if cfg.extend_boxes:
        transforms.append(
            ExtendBoxes(cfg.x_ext, cfg.y_ext,
                        cfg.input_shape[1] / cfg.input_shape[0]))
    transforms += [
        RandomHorizontalFlip(0.5, keypoint_flip_order=cfg.keypoint_flip_order)
    ]
    transforms += [
        RandomBoxAffine(
            degrees=cfg.rotate_range,
            scale=cfg.scale_range,
            output_shape=cfg.input_shape,
            rotate_prob=cfg.rotation_prob,
            scale_prob=cfg.scale_prob,
        )
    ]
    transforms += [T.ToMode()]

    train_queue = data.DataLoader(
        train_dataset,
        sampler=train_sampler,
        num_workers=args.workers,
        transform=T.Compose(
            transforms=transforms,
            order=train_dataset.order,
        ),
        collator=HeatmapCollator(
            cfg.input_shape,
            cfg.output_shape,
            cfg.keypoint_num,
            cfg.heat_thre,
            cfg.heat_kernel
            if args.multi_scale_supervision else cfg.heat_kernel[-1:],
            cfg.heat_range,
        ),
    )

    # Start training
    for epoch in range(start_epoch, args.epochs):
        loss = train(model, train_queue, optimizer, args, epoch=epoch)
        logger.info("Epoch %d Train %.6f ", epoch, loss)

        if rank == 0:  # save checkpoint
            mge.save(
                {
                    "epoch": epoch + 1,
                    "state_dict": model.state_dict(),
                },
                os.path.join(save_dir, "epoch_{}.pkl".format(epoch)),
            )