Exemple #1
0
def main():
    args = get_arguments()
    config = get_config(args.config)

    result_path = os.path.dirname(args.config)

    device = get_device(allow_only_gpu=True)

    model = get_model(name=config.model,
                      z_dim=config.z_dim,
                      image_size=config.size)
    for k, v in model.items():
        state_dict = torch.load(
            os.path.join(result_path, "final_model_%s.prm" % k))

        v.load_state_dict(state_dict)
        v.to(device)
        v.eval()

    train_loader = get_dataloader(
        csv_file=config.train_csv,
        batch_size=config.batch_size,
        shuffle=True,
        num_workers=config.num_workers,
        pin_memory=True,
        drop_last=True,
        transform=ImageTransform(mean=get_mean(), std=get_std()),
    )

    make_image(
        train_loader,
        model,
        config.model,
        result_path,
        config.z_dim,
        device,
    )

    print("Done")
Exemple #2
0
def main() -> None:
    args = get_arguments()

    # configuration
    config = get_config(args.config)

    result_path = os.path.dirname(args.config)

    # cpu or cuda
    device = get_device(allow_only_gpu=True)

    # Dataloader
    assert args.mode in ["validation", "test"]

    transform = Compose([
        Resize(config.size),
        ToTensor(),
        Normalize(mean=get_mean(), std=get_std())
    ])

    imgs = np.load(config.train_imgs)["arr_0"]
    imgs = imgs.reshape(-1, 28, 28)
    ids = np.load(config.train_ids)["arr_0"]
    train_imgs, val_imgs, train_ids, val_ids = train_test_split(
        imgs, ids, test_size=0.1, random_state=random_seed, stratify=ids)
    test_imgs = np.load(config.test_imgs)["arr_0"]
    test_imgs = test_imgs.reshape(-1, 28, 28)
    test_ids = np.load(config.test_ids)["arr_0"]

    loader = get_dataloader(
        imgs=val_imgs if args.mode == "validation" else test_imgs,
        ids=val_ids if args.mode == "validation" else test_ids,
        batch_size=1,
        shuffle=False,
        num_workers=config.num_workers,
        pin_memory=True,
        transform=transform,
    )

    # the number of classes
    n_classes = len(get_cls2id_map())

    model = get_model(config.model, n_classes, pretrained=config.pretrained)

    # send the model to cuda/cpu
    model.to(device)

    # load the state dict of the model
    if args.model is not None:
        state_dict = torch.load(args.model)
    else:
        state_dict = torch.load(os.path.join(result_path, "best_model.prm"))

    model.load_state_dict(state_dict)

    # criterion for loss
    criterion = get_criterion(config.use_class_weight, train_ids, device)

    # train and validate model
    print(f"---------- Start evaluation for {args.mode} data ----------")

    # evaluation
    loss, acc1, f1s, c_matrix = evaluate(loader, model, criterion, device)

    print("loss: {:.5f}\tacc1: {:.2f}\tF1 Score: {:.2f}".format(
        loss, acc1, f1s))

    df = pd.DataFrame(
        {
            "loss": [loss],
            "acc@1": [acc1],
            "f1score": [f1s]
        },
        columns=["loss", "acc@1", "f1score"],
        index=None,
    )

    df.to_csv(os.path.join(result_path, "{}_log.csv").format(args.mode),
              index=False)

    with open(
            os.path.join(result_path, "{}_c_matrix.csv").format(args.mode),
            "w") as file:
        writer = csv.writer(file, lineterminator="\n")
        writer.writerows(c_matrix)

    print("Done.")
Exemple #3
0
def main() -> None:
    args = get_arguments()

    # configuration
    config = get_config(args.config)

    # save log files in the directory which contains config file.
    result_path = os.path.dirname(args.config)
    experiment_name = os.path.basename(result_path)

    # cpu or cuda
    device = get_device(allow_only_gpu=True)

    # Dataloader
    train_transform = Compose([
        RandomResizedCrop(size=(config.height, config.width)),
        RandomHorizontalFlip(),
        ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
        ToTensor(),
        Normalize(mean=get_mean(), std=get_std()),
    ])

    val_transform = Compose(
        [ToTensor(), Normalize(mean=get_mean(), std=get_std())])

    train_loader = get_dataloader(
        config.train_csv,
        batch_size=config.batch_size,
        shuffle=True,
        num_workers=config.num_workers,
        pin_memory=True,
        drop_last=True,
        transform=train_transform,
    )

    val_loader = get_dataloader(
        config.val_csv,
        batch_size=1,
        shuffle=False,
        num_workers=config.num_workers,
        pin_memory=True,
        transform=val_transform,
    )

    # the number of classes
    n_classes = len(get_cls2id_map())

    # define a model
    model = get_model(config.model, n_classes, pretrained=config.pretrained)

    # send the model to cuda/cpu
    model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)

    # keep training and validation log
    begin_epoch = 0
    best_loss = float("inf")
    log = pd.DataFrame(columns=[
        "epoch",
        "lr",
        "train_time[sec]",
        "train_loss",
        "train_acc@1",
        "train_f1s",
        "val_time[sec]",
        "val_loss",
        "val_acc@1",
        "val_f1s",
    ])

    # resume if you want
    if args.resume:
        resume_path = os.path.join(result_path, "checkpoint.pth")
        begin_epoch, model, optimizer, best_loss = resume(
            resume_path, model, optimizer)

        log_path = os.path.join(result_path, "log.csv")
        assert os.path.exists(
            log_path), "there is no checkpoint at the result folder"
        log = pd.read_csv(log_path)

    # criterion for loss
    criterion = get_criterion(config.use_class_weight, config.train_csv,
                              device)

    # Weights and biases
    if not args.no_wandb:
        wandb.init(
            name=experiment_name,
            config=config,
            project="image_classification_template",
            job_type="training",
            dirs="./wandb_result/",
        )
        # Magic
        wandb.watch(model, log="all")

    # train and validate model
    print("---------- Start training ----------")

    for epoch in range(begin_epoch, config.max_epoch):
        # training
        start = time.time()
        train_loss, train_acc1, train_f1s = train(train_loader, model,
                                                  criterion, optimizer, epoch,
                                                  device)
        train_time = int(time.time() - start)

        # validation
        start = time.time()
        val_loss, val_acc1, val_f1s, c_matrix = evaluate(
            val_loader, model, criterion, device)
        val_time = int(time.time() - start)

        # save a model if top1 acc is higher than ever
        if best_loss > val_loss:
            best_loss = val_loss
            torch.save(
                model.state_dict(),
                os.path.join(result_path, "best_model.prm"),
            )

        # save checkpoint every epoch
        save_checkpoint(result_path, epoch, model, optimizer, best_loss)

        # write logs to dataframe and csv file
        tmp = pd.Series(
            [
                epoch,
                optimizer.param_groups[0]["lr"],
                train_time,
                train_loss,
                train_acc1,
                train_f1s,
                val_time,
                val_loss,
                val_acc1,
                val_f1s,
            ],
            index=log.columns,
        )

        log = log.append(tmp, ignore_index=True)
        log.to_csv(os.path.join(result_path, "log.csv"), index=False)

        # save logs to wandb
        if not args.no_wandb:
            wandb.log(
                {
                    "lr": optimizer.param_groups[0]["lr"],
                    "train_time[sec]": train_time,
                    "train_loss": train_loss,
                    "train_acc@1": train_acc1,
                    "train_f1s": train_f1s,
                    "val_time[sec]": val_time,
                    "val_loss": val_loss,
                    "val_acc@1": val_acc1,
                    "val_f1s": val_f1s,
                },
                step=epoch,
            )

        print("""epoch: {}\tepoch time[sec]: {}\tlr: {}\ttrain loss: {:.4f}\t\
            val loss: {:.4f} val_acc1: {:.5f}\tval_f1s: {:.5f}
            """.format(
            epoch,
            train_time + val_time,
            optimizer.param_groups[0]["lr"],
            train_loss,
            val_loss,
            val_acc1,
            val_f1s,
        ))

    # save models
    torch.save(model.state_dict(), os.path.join(result_path,
                                                "final_model.prm"))

    # delete checkpoint
    os.remove(os.path.join(result_path, "checkpoint.pth"))

    print("Done")
Exemple #4
0
def main():
    args = get_arguments()
    config = get_config(args.config)

    result_path = os.path.dirname(args.config)
    experiment_name = os.path.basename(result_path)

    if os.path.exists(os.path.join(result_path, "final_model.prm")):
        print("Already done.")
        return

    device = get_device(allow_only_gpu=True)

    transform = DataTransform(config.size, get_mean())
    voc_classes = [k for k in get_cls2id_map().keys()]

    train_loader = get_dataloader(
        config.train_csv,
        phase="train",
        batch_size=config.batch_size,
        shuffle=True,
        num_workers=config.num_workers,
        pin_memory=True,
        drop_last=True,
        transform=transform,
        transform_anno=Anno_xml2list(voc_classes),
    )

    val_loader = get_dataloader(
        config.val_csv,
        phase="val",
        batch_size=1,
        shuffle=True,
        num_workers=config.num_workers,
        pin_memory=True,
        drop_last=True,
        transform=transform,
        transform_anno=Anno_xml2list(voc_classes),
    )

    n_classes = len(voc_classes) + 1
    model = get_model(
        input_size=config.size,
        n_classes=n_classes,
        phase="train",
        pretrained=config.pretrained,
    )
    model.to(device)

    optimizer = optim.SGD(
        model.parameters(),
        lr=config.learning_rate,
        momentum=0.9,
        weight_decay=5e-4
    )

    begin_epoch = 0
    best_loss = float("inf")
    # TODO 評価指標の検討
    log = pd.DataFrame(
        columns=[
            "epoch",
            "lr",
            "train_time[sec]",
            "train_loss",
            "val_time[sec]",
            "val_loss",
        ]
    )

    if args.resume:
        resume_path = os.path.join(result_path, "checkpoint.pth")
        begin_epoch, model, optimizer, best_loss = resume(resume_path, model, optimizer)

        log_path = os.path.join(result_path, "log.csv")
        assert os.path.exists(log_path), "there is no checkpoint at the result folder"
        log = pd.read_csv(log_path)

    criterion = get_criterion(device=device)

    print("---------- Start training ----------")

    for epoch in range(begin_epoch, config.max_epoch):
        start = time.time()
        train_loss = train(
            train_loader,
            model,
            criterion,
            optimizer,
            epoch,
            device,
            interval_of_progress=10,
        )
        train_time = int(time.time() - start)

        start = time.time()
        val_loss = evaluate(
            val_loader,
            model,
            criterion,
            device,
        )
        val_time = int(time.time() - start)

        if best_loss > val_loss:
            best_loss = val_loss
            torch.save(
                model.state_dict(),
                os.path.join(result_path, "best_model.prm"),
            )

        save_checkpoint(result_path, epoch, model, optimizer, best_loss)

        tmp = pd.Series(
            [
                epoch,
                optimizer.param_groups[0]["lr"],
                train_time,
                train_loss,
                val_time,
                val_loss,
            ],
            index=log.columns,
        )

        log = log.append(tmp, ignore_index=True)
        log.to_csv(os.path.join(result_path, "log.csv"), index=False)
        make_graphs(os.path.join(result_path, "log.csv"))

        print(
            """epoch: {}\tepoch time[sec]: {}\tlr: {}\ttrain loss: {:.4f}\t\
            val loss: {:.4f}
            """.format(
                epoch,
                train_time + val_time,
                optimizer.param_groups[0]["lr"],
                train_loss,
                val_loss,
            )
        )

    torch.save(model.state_dict(), os.path.join(result_path, "final_model.prm"))

    os.remove(os.path.join(result_path, "checkpoint.pth"))

    print("Done")
Exemple #5
0
def main():
    args = get_arguments()
    config = get_config(args.config)

    result_path = os.path.dirname(args.config)
    experiment_name = os.path.basename(result_path)

    if os.path.exists(os.path.join(result_path, "final_model_G.prm")):
        print("Already done.")
        return

    device = get_device(allow_only_gpu=True)

    train_loader = get_dataloader(
        csv_file=config.train_csv,
        batch_size=config.batch_size,
        shuffle=True,
        num_workers=config.num_workers,
        pin_memory=True,
        drop_last=True,
        transform=ImageTransform(mean=get_mean(), std=get_std()),
    )

    model = get_model(config.model, z_dim=config.z_dim, image_size=config.size)
    for v in model.values():
        v.to(device)

    g_optimizer = torch.optim.Adam(
        model["G"].parameters(),
        config.g_lr,
        [config.beta1, config.beta2],
    )
    d_optimizer = torch.optim.Adam(
        model["D"].parameters(),
        config.d_lr,
        [config.beta1, config.beta2],
    )
    optimizer = {
        "G": g_optimizer,
        "D": d_optimizer,
    }

    begin_epoch = 0
    best_loss = float("inf")
    # TODO 評価指標の検討
    log = pd.DataFrame(
        columns=[
            "epoch",
            "d_lr",
            "g_lr",
            "train_time[sec]",
            "train_loss",
            "train_d_loss",
            "train_g_loss",
        ]
    )

    if args.resume:
        resume_path = os.path.join(result_path, "checkpoint_%s.pth")
        begin_epoch, model, optimizer, best_loss = resume(resume_path, model, optimizer)

        log_path = os.path.join(result_path, "log.csv")
        assert os.path.exists(log_path), "there is no checkpoint at the result folder"
        log = pd.read_csv(log_path)

    criterion = nn.BCEWithLogitsLoss(reduction="mean")

    print("---------- Start training ----------")

    for epoch in range(begin_epoch, config.max_epoch):
        start = time.time()
        train_d_loss, train_g_loss,  = train(
            train_loader,
            model,
            config.model,
            criterion,
            optimizer,
            epoch,
            config.z_dim,
            device,
            interval_of_progress=1,
        )
        train_time = int(time.time() - start)

        if best_loss > train_d_loss + train_g_loss:
            best_loss = train_d_loss + train_g_loss
            for k in model.keys():
                torch.save(
                    model[k].state_dict(),
                    os.path.join(result_path, "best_model_%s.prm" % k),
                )

        save_checkpoint(result_path, epoch, model, optimizer, best_loss)

        tmp = pd.Series(
            [
                epoch,
                optimizer["D"].param_groups[0]["lr"],
                optimizer["G"].param_groups[0]["lr"],
                train_time,
                train_d_loss + train_g_loss,
                train_d_loss,
                train_g_loss,
            ],
            index=log.columns,
        )

        log = log.append(tmp, ignore_index=True)
        log.to_csv(os.path.join(result_path, "log.csv"), index=False)
        make_graphs(os.path.join(result_path, "log.csv"))

        print(
            "epoch: {}\tepoch time[sec]: {}\tD_lr: {}\tG_lr: {}\ttrain loss: {:.4f}\ttrain d_loss: {:.4f}\ttrain g_loss: {:.4f}".format(
                epoch,
                train_time,
                optimizer["D"].param_groups[0]["lr"],
                optimizer["G"].param_groups[0]["lr"],
                train_d_loss + train_g_loss,
                train_d_loss,
                train_g_loss,
            )
        )

    for k in model.keys():
        torch.save(
            model[k].state_dict(),
            os.path.join(result_path, "final_model_%s.prm" % k),
        )

    for k in model.keys():
        os.remove(os.path.join(result_path, "checkpoint_%s.pth" % k))

    print("Done")