コード例 #1
0
def train_stage_one(args, model, train_loader, valid_loader, criterion):
    optimizer = WeightDecayOptimizerWrapper(
        torch.optim.Adam(model.parameters(), lr=2e-3), 0.1)
    freeze_layers(model, [True, True, False])

    # stage 1
    n_steps = len(train_loader) // 2
    bot = ImageClassificationBot(model=model,
                                 train_loader=train_loader,
                                 val_loader=valid_loader,
                                 clip_grad=10.,
                                 optimizer=optimizer,
                                 echo=not ON_KAGGLE,
                                 criterion=criterion,
                                 avg_window=len(train_loader) // 10,
                                 callbacks=[
                                     LearningRateSchedulerCallback(
                                         TriangularLR(optimizer,
                                                      100,
                                                      ratio=3,
                                                      steps_per_cycle=n_steps))
                                 ],
                                 pbar=not ON_KAGGLE,
                                 use_tensorboard=False)
    bot.logger.info(bot.criterion)
    bot.train(n_steps,
              log_interval=len(train_loader) // 10,
              snapshot_interval=len(train_loader) // 4)
    bot.load_model(bot.best_performers[0][1])
    torch.save(bot.model.state_dict(),
               str(CACHE_DIR / f"stage1_{args.fold}.pth"))
    bot.remove_checkpoints(keep=0)
コード例 #2
0
def train():
    train_dl, valid_dl = get_cifar10_dataset(batch_size=1024)
    steps_per_epoch = len(train_dl)

    model = get_wide_resnet()

    # optimizer = WeightDecayOptimizerWrapper(optim.SGD(
    #     model.parameters(), lr=0.1,
    #     momentum=0.9, weight_decay=0), 0.05)
    # optimizer = WeightDecayOptimizerWrapper(AdaBound(
    #     model.parameters(), lr=1e-3, final_lr=0.1, gamma=1/steps_per_epoch/2.5, weight_decay=0
    # ), 0.05)
    optimizer = WeightDecayOptimizerWrapper(optim.Adam(
        model.parameters(), lr=1.5e-3), 0.1)
    model, optimizer = amp.initialize(
        model, optimizer, opt_level="O2", keep_batchnorm_fp32=True,
        loss_scale="dynamic"
    )

    n_epochs = 50
    n_steps = n_epochs * steps_per_epoch
    bot = CifarBot(
        model=model, train_loader=train_dl, val_loader=valid_dl,
        optimizer=optimizer, echo=True,
        avg_window=steps_per_epoch // 5,
        criterion=nn.CrossEntropyLoss(),
        device=DEVICE, clip_grad=1.,
        callbacks=[
            LearningRateSchedulerCallback(
                TriangularLR(
                    optimizer, 100, ratio=4, steps_per_cycle=n_steps
                )
            )
        ],
        metrics=[SoftmaxAccuracy()],
        pbar=True,
        use_amp=True
    )
    bot.train(
        n_steps,
        snapshot_interval=steps_per_epoch,
        log_interval=steps_per_epoch // 5,
        keep_n_snapshots=1
    )
    print(f"GPU Memory Used: {get_gpu_memory_map()} MB")
    bot.load_model(bot.best_performers[0][1])
    torch.save(bot.model.state_dict(), "cache/baseline.pth")
    bot.remove_checkpoints(keep=0)
コード例 #3
0
ファイル: train_video.py プロジェクト: hisiter97/yt8m-2019
def main():
    parser = argparse.ArgumentParser()
    arg = parser.add_argument
    arg('config', type=str)
    arg('--from-checkpoint', type=str, default='')
    args = parser.parse_args()
    with open(args.config) as fin:
        config = yaml.safe_load(fin)
    train_loader, valid_loader = get_loaders(config["video"])

    model_config = config["video"]["model"]
    training_config = config["video"]["training"]
    model = create_video_model(model_config)
    print(model)

    optimizer_grouped_parameters = [
        {
            'params': [p for n, p in model.named_parameters()
                       if not any(nd in n for nd in NO_DECAY)],
        },
        {
            'params': [p for n, p in model.named_parameters()
                       if any(nd in n for nd in NO_DECAY)],
        }
    ]
    optimizer = WeightDecayOptimizerWrapper(
        torch.optim.Adam(
            optimizer_grouped_parameters,
            lr=float(training_config['lr']),
            eps=float(training_config['eps'])
        ),
        [float(training_config['weight_decay']), 0]
    )

    if args.from_checkpoint:
        bot = resume_training(
            training_config, args.from_checkpoint, model,
            optimizer, train_loader, valid_loader)
    else:
        bot = train_from_start(
            training_config, model, optimizer,
            train_loader, valid_loader)
    target_dir = (MODEL_DIR / datetime.now().strftime("%Y%m%d_%H%M"))
    target_dir.mkdir(parents=True)
    torch.save(bot.model.state_dict(), target_dir / "model.pth")
    with open(target_dir / "config.yaml", "w") as fout:
        fout.write(yaml.dump(config, default_flow_style=False))
コード例 #4
0
ファイル: main.py プロジェクト: hisiter97/yt8m-2019
def get_optimizer(model, lr):
    return WeightDecayOptimizerWrapper(torch.optim.Adam([{
        'params': [
            p
            for n, p in model.named_parameters() if not any(nd in n
                                                            for nd in NO_DECAY)
        ],
    }, {
        'params': [
            p for n, p in model.named_parameters() if any(nd in n
                                                          for nd in NO_DECAY)
        ],
    }],
                                                        weight_decay=0,
                                                        lr=lr),
                                       weight_decay=[1e-1, 0],
                                       change_with_lr=True)
コード例 #5
0
def train_from_scratch(args, model, train_loader, valid_loader, criterion):
    n_steps = len(train_loader) * args.epochs
    optimizer = WeightDecayOptimizerWrapper(torch.optim.Adam([{
        'params': [
            p
            for n, p in model.named_parameters() if not any(nd in n
                                                            for nd in NO_DECAY)
        ],
    }, {
        'params': [
            p for n, p in model.named_parameters() if any(nd in n
                                                          for nd in NO_DECAY)
        ],
    }],
                                                             weight_decay=0,
                                                             lr=args.lr),
                                            weight_decay=[1e-1, 0],
                                            change_with_lr=True)
    if args.debug:
        print("No decay:", [
            n for n, p in model.named_parameters() if any(nd in n
                                                          for nd in NO_DECAY)
        ])
    if args.amp:
        if not APEX_AVAILABLE:
            raise ValueError("Apex is not installed!")
        model, optimizer = amp.initialize(model, optimizer, opt_level=args.amp)

    callbacks = [
        LearningRateSchedulerCallback(
            # TriangularLR(
            #     optimizer, 100, ratio=4, steps_per_cycle=n_steps
            # )
            GradualWarmupScheduler(optimizer,
                                   100,
                                   len(train_loader),
                                   after_scheduler=CosineAnnealingLR(
                                       optimizer,
                                       n_steps - len(train_loader))))
    ]
    if args.mixup_alpha:
        callbacks.append(
            MixUpCallback(alpha=args.mixup_alpha, softmax_target=True))
    bot = ImageClassificationBot(model=model,
                                 train_loader=train_loader,
                                 val_loader=valid_loader,
                                 clip_grad=10.,
                                 optimizer=optimizer,
                                 echo=True,
                                 criterion=criterion,
                                 avg_window=len(train_loader) // 5,
                                 callbacks=callbacks,
                                 pbar=True,
                                 use_tensorboard=True,
                                 use_amp=(args.amp != ''))
    bot.train(
        n_steps,
        log_interval=len(train_loader) // 6,
        snapshot_interval=len(train_loader) // 2,
        # early_stopping_cnt=8,
        min_improv=1e-2,
        keep_n_snapshots=1)
    bot.remove_checkpoints(keep=1)
    bot.load_model(bot.best_performers[0][1])
    torch.save(bot.model.state_dict(), CACHE_DIR / f"final_weights.pth")
    bot.remove_checkpoints(keep=0)
コード例 #6
0
def train_stage_two(args, model, train_loader, valid_loader, criterion):
    n_steps = len(train_loader) * args.epochs
    optimizer = WeightDecayOptimizerWrapper(
        setup_differential_learning_rates(
            partial(
                torch.optim.Adam,
                weight_decay=0
                # AdaBound, weight_decay=0, gamma=1/5000, betas=(.8, .999)
                # torch.optim.SGD, momentum=0.9
            ),
            model,
            [1e-5, 8e-5, 5e-4],
            [1., 1., 1.]),
        weight_decay=5e-2,
        change_with_lr=True)
    freeze_layers(model, [False, False, False])
    bot = ImageClassificationBot(
        model=model,
        train_loader=train_loader,
        val_loader=valid_loader,
        clip_grad=10.,
        optimizer=optimizer,
        echo=not ON_KAGGLE,
        criterion=criterion,
        avg_window=len(train_loader) // 15,
        callbacks=[
            LearningRateSchedulerCallback(
                TriangularLR(optimizer, 100, ratio=4, steps_per_cycle=n_steps)
                # GradualWarmupScheduler(
                # optimizer, 100, len(train_loader),
                # after_scheduler=CosineAnnealingLR(
                #     optimizer, n_steps - len(train_loader)
                # )
            ),
            MixUpCallback(alpha=0.2)
        ],
        pbar=not ON_KAGGLE,
        use_tensorboard=not ON_KAGGLE)
    bot.logger.info(bot.criterion)
    bot.model.load_state_dict(torch.load(CACHE_DIR /
                                         f"stage1_{args.fold}.pth"))

    # def snapshot_or_not(step):
    #     if step < 4000:
    #         if step % 2000 == 0:
    #             return True
    #     elif (step - 4000) % 1000 == 0:
    #         return True
    #     return False

    bot.train(
        n_steps,
        log_interval=len(train_loader) // 20,
        snapshot_interval=len(train_loader) // 2,
        # snapshot_interval=snapshot_or_not,
        early_stopping_cnt=args.early_stop,
        min_improv=1e-4,
        keep_n_snapshots=1)
    bot.load_model(bot.best_performers[0][1])
    bot.remove_checkpoints(keep=0)

    # Final model
    torch.save(bot.model, MODEL_DIR / f"final_{args.fold}.pth")
    # Failover (args + state dict)
    torch.save([args.arch, bot.model.state_dict()],
               MODEL_DIR / f"failover_{args.arch}_{args.fold}.pth")
コード例 #7
0
def main():
    parser = argparse.ArgumentParser()
    arg = parser.add_argument
    arg('config')
    arg('context_model_dir', type=str)
    arg('segment_model_dir', type=str)
    arg('--steps', type=int, default=-1)
    arg('--fold', type=int, default=0)
    arg('--name', type=str, default="context_model")
    args = parser.parse_args()
    with open(args.config) as fin:
        config = yaml.safe_load(fin)
    training_config = config["segment_w_context"]["training"]
    train_loader, valid_loader = get_loaders(
        training_config["batch_size"],
        fold=args.fold,
        seed=int(os.environ.get("SEED", "9293")),
        offset=training_config["offset"])

    if args.steps > 0:
        # override
        training_config["steps"] = args.steps

    context_model_dir = Path(args.context_model_dir)
    with open(context_model_dir / "config.yaml") as fin:
        context_config = yaml.safe_load(fin)
    config["context_base"] = context_config["video"]
    context_state_dict = torch.load(str(context_model_dir / "model.pth"))
    segment_model_dir = Path(args.segment_model_dir)
    with open(segment_model_dir / "config.yaml") as fin:
        segment_config = yaml.safe_load(fin)
    config["segment_base"] = segment_config["video"]
    segment_state_dict = torch.load(str(segment_model_dir / "model.pth"))
    model = prepare_model(config,
                          context_state_dict=context_state_dict,
                          segment_state_dict=segment_state_dict)
    print(model)

    # optimizer_grouped_parameters = []
    lr = float(training_config["lr"])
    optimizer_grouped_parameters = [{
        'params': [
            p for n, p in model.segment_model.named_parameters()
            if not any(nd in n for nd in NO_DECAY)
        ],
        'lr':
        lr / 2
    }, {
        'params': [
            p for n, p in model.segment_model.named_parameters()
            if any(nd in n for nd in NO_DECAY)
        ],
        'lr':
        lr / 2
    }]
    if config["segment_w_context"]["model"]["finetune_context"]:
        optimizer_grouped_parameters += [{
            'params': [
                p for n, p in model.context_model.named_parameters()
                if not any(nd in n for nd in NO_DECAY)
            ],
            'lr':
            lr / 4
        }, {
            'params': [
                p for n, p in model.context_model.named_parameters()
                if any(nd in n for nd in NO_DECAY)
            ],
            'lr':
            lr / 4
        }]
    for module in (model.expert_fc, model.gating_fc, model.intermediate_fc):
        optimizer_grouped_parameters += [{
            'params': [
                p for n, p in module.named_parameters()
                if not any(nd in n for nd in NO_DECAY)
            ],
            'lr':
            lr
        }, {
            'params': [
                p for n, p in module.named_parameters()
                if any(nd in n for nd in NO_DECAY)
            ],
            'lr':
            lr
        }]
    optimizer = WeightDecayOptimizerWrapper(
        torch.optim.Adam(optimizer_grouped_parameters,
                         lr=lr,
                         eps=float(training_config["eps"])),
        [training_config["weight_decay"], 0] *
        (len(optimizer_grouped_parameters) // 2))

    n_steps = training_config["steps"]
    checkpoints = CheckpointCallback(keep_n_checkpoints=1,
                                     checkpoint_dir=CACHE_DIR / "model_cache/",
                                     monitor_metric="roc_auc")
    break_points = [0, int(n_steps * 0.25)]
    lr_durations = np.diff(break_points + [n_steps])
    bot = YoutubeBot(
        model=model,
        train_loader=train_loader,
        valid_loader=valid_loader,
        clip_grad=10.,
        optimizer=optimizer,
        echo=True,
        criterion=SampledCrossEntropyLoss(),
        callbacks=[
            LearningRateSchedulerCallback(
                MultiStageScheduler(
                    [
                        LinearLR(optimizer, 0.01, lr_durations[0]),
                        LinearLR(
                            optimizer, 0.001, lr_durations[1], upward=False)
                        # CosineAnnealingLR(optimizer, lr_durations[1])
                    ],
                    start_at_epochs=break_points)),
            MovingAverageStatsTrackerCallback(
                avg_window=1200,
                log_interval=1000,
            ),
            checkpoints,
        ],
        pbar=True,
        use_tensorboard=False)
    bot.train(total_steps=n_steps,
              checkpoint_interval=training_config["ckpt_interval"])
    bot.load_model(checkpoints.best_performers[0][1])
    checkpoints.remove_checkpoints(keep=0)

    # save the model
    target_dir = (
        MODEL_DIR /
        f"{args.name}_{args.fold}_{datetime.now().strftime('%Y%m%d-%H%M')}")
    target_dir.mkdir(parents=True)
    torch.save(bot.model.state_dict(), target_dir / "model.pth")
    with open(target_dir / "config.yaml", "w") as fout:
        fout.write(yaml.dump(config, default_flow_style=False))