コード例 #1
0
def main_worker(cfg):
    # create tensorboard and logs
    if cfg.DDP_CONFIG.GPU_WORLD_RANK == 0:
        tb_logdir = build_log_dir(cfg)
        writer = SummaryWriter(log_dir=tb_logdir)
    else:
        writer = None
    cfg.freeze()

    # create model
    model = get_model(cfg)
    model = deploy_model(model, cfg)

    # create dataset and dataloader
    val_loader, _ = build_dataloader_val(cfg)

    if cfg.CONFIG.MODEL.LOAD:
        model, _ = load_model(model, cfg, load_fc=True)

    criterion = nn.CrossEntropyLoss().cuda()

    # adversarial_classification(model, val_loader, -1, criterion, cfg, writer)
    validation_classification(model, val_loader, -1, criterion, cfg, writer)

    if writer is not None:
        writer.close()
コード例 #2
0
def main_worker(cfg):
    # create tensorboard and logs
    if cfg.DDP_CONFIG.GPU_WORLD_RANK == 0:
        tb_logdir = build_log_dir(cfg)
        writer = SummaryWriter(log_dir=tb_logdir)
    else:
        writer = None
    cfg.freeze()

    # create model
    model = get_model(cfg)
    model = deploy_model(model, cfg)

    # create dataset and dataloader
    train_loader, val_loader, train_sampler, val_sampler, mg_sampler = build_dataloader(
        cfg)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=cfg.CONFIG.TRAIN.LR,
                                momentum=cfg.CONFIG.TRAIN.MOMENTUM,
                                weight_decay=cfg.CONFIG.TRAIN.W_DECAY)
    if cfg.CONFIG.MODEL.LOAD:
        model, _ = load_model(model, optimizer, cfg, load_fc=True)

    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer,
        milestones=cfg.CONFIG.TRAIN.LR_MILESTONE,
        gamma=cfg.CONFIG.TRAIN.STEP)
    criterion = nn.CrossEntropyLoss().cuda()

    base_iter = 0
    for epoch in range(cfg.CONFIG.TRAIN.EPOCH_NUM):
        if cfg.DDP_CONFIG.DISTRIBUTED:
            train_sampler.set_epoch(epoch)

        base_iter = train_classification(base_iter,
                                         model,
                                         train_loader,
                                         epoch,
                                         criterion,
                                         optimizer,
                                         cfg,
                                         writer=writer)
        scheduler.step()
        if epoch % cfg.CONFIG.VAL.FREQ == 0 or epoch == cfg.CONFIG.TRAIN.EPOCH_NUM - 1:
            validation_classification(model, val_loader, epoch, criterion, cfg,
                                      writer)

        if epoch % cfg.CONFIG.LOG.SAVE_FREQ == 0:
            if cfg.DDP_CONFIG.GPU_WORLD_RANK == 0 or cfg.DDP_CONFIG.DISTRIBUTED == False:
                save_model(model, optimizer, epoch, cfg)
    if writer is not None:
        writer.close()
コード例 #3
0
def main_worker(cfg):
    # create tensorboard and logs
    if cfg.DDP_CONFIG.GPU_WORLD_RANK == 0:
        tb_logdir = build_log_dir(cfg)
        writer = SummaryWriter(log_dir=tb_logdir)
    else:
        writer = None
    cfg.freeze()

    # create model
    model = get_model(cfg)
    model = deploy_model(model, cfg)

    # create dataset and dataloader
    train_loader, val_loader, train_sampler, val_sampler, mg_sampler = build_dataloader(
        cfg)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=cfg.CONFIG.TRAIN.LR,
                                momentum=cfg.CONFIG.TRAIN.MOMENTUM,
                                weight_decay=cfg.CONFIG.TRAIN.W_DECAY)
    if cfg.CONFIG.MODEL.LOAD:
        model, _ = load_model(model, optimizer, cfg, load_fc=True)

    if cfg.CONFIG.TRAIN.LR_POLICY == 'Step':
        scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer,
            milestones=cfg.CONFIG.TRAIN.LR_MILESTONE,
            gamma=cfg.CONFIG.TRAIN.STEP)
    elif cfg.CONFIG.TRAIN.LR_POLICY == 'Cosine':
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer,
            T_max=cfg.CONFIG.TRAIN.EPOCH_NUM - cfg.CONFIG.TRAIN.WARMUP_EPOCHS,
            eta_min=0,
            last_epoch=cfg.CONFIG.TRAIN.RESUME_EPOCH)
    else:
        print(
            'Learning rate schedule %s is not supported yet. Please use Step or Cosine.'
        )

    if cfg.CONFIG.TRAIN.USE_WARMUP:
        scheduler_warmup = GradualWarmupScheduler(
            optimizer,
            multiplier=(cfg.CONFIG.TRAIN.WARMUP_END_LR / cfg.CONFIG.TRAIN.LR),
            total_epoch=cfg.CONFIG.TRAIN.WARMUP_EPOCHS,
            after_scheduler=scheduler)

    criterion = nn.CrossEntropyLoss().cuda()

    base_iter = 0
    for epoch in range(cfg.CONFIG.TRAIN.EPOCH_NUM):
        if cfg.DDP_CONFIG.DISTRIBUTED:
            train_sampler.set_epoch(epoch)

        base_iter = train_classification(base_iter,
                                         model,
                                         train_loader,
                                         epoch,
                                         criterion,
                                         optimizer,
                                         cfg,
                                         writer=writer)
        if cfg.CONFIG.TRAIN.USE_WARMUP:
            scheduler_warmup.step()
        else:
            scheduler.step()

        if cfg.CONFIG.TRAIN.MULTIGRID.USE_LONG_CYCLE:
            if epoch in cfg.CONFIG.TRAIN.MULTIGRID.LONG_CYCLE_EPOCH:
                mg_sampler.step_long_cycle()

        if epoch % cfg.CONFIG.VAL.FREQ == 0 or epoch == cfg.CONFIG.TRAIN.EPOCH_NUM - 1:
            validation_classification(model, val_loader, epoch, criterion, cfg,
                                      writer)

        if epoch % cfg.CONFIG.LOG.SAVE_FREQ == 0:
            if cfg.DDP_CONFIG.GPU_WORLD_RANK == 0 or cfg.DDP_CONFIG.DISTRIBUTED == False:
                save_model(model, optimizer, epoch, cfg)
    if writer is not None:
        writer.close()