def train(model, train_loader, validate_loader, loss_fct, config, vocab):
    optimizer = optim.Adam(model.parameters(), lr=config.lr)

    # total num_batches
    t_total = len(train_loader) * config.num_epochs
    config.warmup_step = int(config.warmup_percent * t_total)

    # decay learning rate, related to a validation
    scheduler_cosine = CosineAnnealingLR(optimizer,
                                         config.num_epochs,
                                         eta_min=0,
                                         last_epoch=-1)
    scheduler = GradualWarmupScheduler(optimizer,
                                       1,
                                       config.warmup_step,
                                       after_scheduler=scheduler_cosine)

    # Train!
    logger.info(f"***** Running model*****")
    logger.info("  Num Epochs = %d", config.num_epochs)
    logger.info("  Total optimization steps = %d", t_total)
    logger.info("  Warmup Steps = %d", config.warmup_step)

    global_step = 0
    steps_trained_in_current_epoch = 0
    # cum_loss, current loss
    tr_loss, logging_loss = 0.0, 0.0
    best_val_loss = 1e8

    model.zero_grad()

    train_iterator = trange(
        0,
        int(config.num_epochs),
        desc="Epoch",
    )

    set_seed(config)

    config.model_path = 'CNNmodel'

    if not os.path.isdir(config.model_path):
        os.makedirs(config.model_path)

    for _ in train_iterator:
        epoch_iterator = tqdm(train_loader, desc="Iteration")

        for step, (file_name, mfcc, target_index) in enumerate(epoch_iterator):

            model.train()

            audios, texts = list(
                map(lambda x: x.to(config.device), [mfcc, target_index]))

            texts = texts.squeeze(-1).long().to(config.device)

            logit, feature = model(audios)

            loss = loss_fct(logit, texts.view(-1))

            loss.backward()

            tr_loss += loss.item()

            torch.nn.utils.clip_grad_norm_(model.parameters(), config.clip)

            optimizer.step()
            scheduler.step()

            model.zero_grad()

            global_step += 1

            if global_step % config.logging_steps == 0:
                logger.info("  train loss : %.3f",
                            (tr_loss - logging_loss) / config.logging_steps)
                logging_loss = tr_loss

        val_loss, info = evaluate(validate_loader, loss_fct, model, vocab,
                                  config)
        logger.info(info)

        if val_loss < best_val_loss:
            best_val_loss = val_loss

            torch.save(model, f"{config.model_path}/model.pt")
            torch.save(config, f"{config.model_path}/config.pt")
            logger.info(f"  Saved {config.model_path}")

        logger.info("  val loss : %.3f", val_loss)
        logger.info("  best_val loss : %.3f", best_val_loss)
Beispiel #2
0
with timer('create model'):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model = CnnModel(num_classes=186,
                     encoder="se_resnext50_32x4d",
                     pretrained="imagenet",
                     pool_type="concat")
    # model = convert_model_ReLU2Swish(model)
    model = model.to(device)
    # model.load_state_dict(torch.load("models/exp19_custom_fc_mixup_cutmix_45epoch_4e-4_fold0.pth"))
    # LOGGER.info("exp19 model loaded")

    criterion = nn.CrossEntropyLoss(reduction='mean').to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, eps=1e-4)
    scheduler = GradualWarmupScheduler(optimizer,
                                       multiplier=1.1,
                                       total_epoch=5,
                                       after_scheduler=None)

with timer('training loop'):
    best_score = -999
    best_epoch = 0
    for epoch in range(1, epochs + 1):

        LOGGER.info("Starting {} epoch...".format(epoch))
        tr_loss = train_one_epoch_mixup_cutmix_for_single_output(
            model, train_loader, criterion, optimizer, device)
        LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5)))

        val_pred, y_true, val_loss = validate_for_single_output(
            model, val_loader, criterion, device)
        score = macro_recall(y_true, val_pred)
    logs = []

G = Generator()
F = Generator()
Da = Discriminator()
Db = Discriminator()

GF_optimizer = optim.Adam(itertools.chain(G.parameters(), F.parameters()),
                          lr=G_MAX_LR,
                          betas=(0.5, 0.999))
GF_scheduler_cos = CosineAnnealingWarmRestarts(GF_optimizer,
                                               T_0=TOTAL_EPOCHS,
                                               T_mult=1,
                                               eta_min=G_MIN_LR)
GF_scheduler = GradualWarmupScheduler(GF_optimizer,
                                      multiplier=1,
                                      total_epoch=WARMUP_EPOCHS,
                                      after_scheduler=GF_scheduler_cos)

Da_optimizer = optim.Adam(Da.parameters(), lr=D_MAX_LR, betas=(0.5, 0.999))
Da_scheduler_cos = CosineAnnealingWarmRestarts(Da_optimizer,
                                               T_0=TOTAL_EPOCHS,
                                               T_mult=1,
                                               eta_min=D_MIN_LR)
Da_scheduler = GradualWarmupScheduler(Da_optimizer,
                                      multiplier=1,
                                      total_epoch=WARMUP_EPOCHS,
                                      after_scheduler=Da_scheduler_cos)

Db_optimizer = optim.Adam(Db.parameters(), lr=D_MAX_LR, betas=(0.5, 0.999))
Db_scheduler_cos = CosineAnnealingWarmRestarts(Db_optimizer,
                                               T_0=TOTAL_EPOCHS,