Example #1
0
def train(args):
    # CONFIGS = yaml.load(open(args.config)) # deprecated, please set the configs in parse_args()

    # Set device
    if torch.cuda.is_available():
        os.environ["CUDA_VISIBLE_DEVICES"] = args.device.strip()
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")  # Not suggested

    # Set save folder & logging config
    subfolder = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime(time.time()))
    if not args.save_folder or (not os.path.isdir(args.save_folder)):
        print(
            "Warning: Not invalid value of 'save_folder', set as default value: './save_folder'.."
        )
        save_folder = "./save_folder"
    else:
        save_folder = args.save_folder
    if not os.path.exists(save_folder):
        os.mkdir(save_folder)
    save_folder = os.path.join(save_folder, subfolder)
    os.mkdir(save_folder)
    #TODO:logging

    # Load Dataset
    trainloader = get_loader(args.train_gtfile,
                             batch_size=args.batch_size,
                             num_thread=args.num_workers)
    valloader = get_loader(args.val_gtfile,
                           batch_size=args.batch_size,
                           num_thread=args.num_workers)

    # Init Net
    model = Net(numAngle=args.num_angle,
                numRho=args.num_rho,
                backbone=args.backbone)
    if args.resume:
        model.load_state_dict(torch.load(args.resume))
    model = torch.nn.DataParallel(model).to(device)

    # Optimizer
    optimizer = optim.Adam(model.parameters())

    # Loss
    criterion = torch.nn.CrossEntropyLoss()
    losses = AverageMeter()

    # Start Training
    model.train()
    iter = 0  # iter id start from 1
    for epoch in range(args.max_epoch):

        for batch in trainloader:
            start = time.time()
            iter += 1
            img_tensor, gt_tensor = batch
            optimizer.zero_grad()

            # Forwarding
            preds = model(img_tensor)

            # Calculate Loss
            loss = criterion(preds, gt_tensor)
            loss.backward()
            optimizer.step()
            losses.update(loss.item(), args.batch_size)

            if iter % args.show_interval == 0:
                logging.info(
                    f"Training [{epoch}/{args.max_epoch}][{iter}] Loss:{losses.avg} Time:{time.time()-start:.1f}s"
                )

            if iter % args.val_interval == 0:
                pass
Example #2
0
def main():

    logger.info(args)
    assert os.path.isdir(CONFIGS["DATA"]["DIR"])

    if CONFIGS['TRAIN']['SEED'] is not None:
        random.seed(CONFIGS['TRAIN']['SEED'])
        torch.manual_seed(CONFIGS['TRAIN']['SEED'])
        cudnn.deterministic = True

    model = Net(numAngle=CONFIGS["MODEL"]["NUMANGLE"],
                numRho=CONFIGS["MODEL"]["NUMRHO"],
                backbone=CONFIGS["MODEL"]["BACKBONE"])

    if CONFIGS["TRAIN"]["DATA_PARALLEL"]:
        logger.info("Model Data Parallel")
        model = nn.DataParallel(model).cuda()
    else:
        model = model.cuda(device=CONFIGS["TRAIN"]["GPU_ID"])

    # optimizer
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=CONFIGS["OPTIMIZER"]["LR"],
        weight_decay=CONFIGS["OPTIMIZER"]["WEIGHT_DECAY"])

    # learning rate scheduler
    scheduler = lr_scheduler.MultiStepLR(
        optimizer,
        milestones=CONFIGS["OPTIMIZER"]["STEPS"],
        gamma=CONFIGS["OPTIMIZER"]["GAMMA"])
    best_acc1 = 0
    if args.resume:
        if isfile(args.resume):
            logger.info("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            model.load_state_dict(checkpoint['state_dict'])
            # optimizer.load_state_dict(checkpoint['optimizer'])
            logger.info("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            logger.info("=> no checkpoint found at '{}'".format(args.resume))

    # dataloader
    train_loader = get_loader(CONFIGS["DATA"]["DIR"],
                              CONFIGS["DATA"]["LABEL_FILE"],
                              batch_size=CONFIGS["DATA"]["BATCH_SIZE"],
                              num_thread=CONFIGS["DATA"]["WORKERS"],
                              split='train')
    val_loader = get_loader(CONFIGS["DATA"]["VAL_DIR"],
                            CONFIGS["DATA"]["VAL_LABEL_FILE"],
                            batch_size=1,
                            num_thread=CONFIGS["DATA"]["WORKERS"],
                            split='val')

    logger.info("Data loading done.")

    # Tensorboard summary

    writer = SummaryWriter(log_dir=os.path.join(CONFIGS["MISC"]["TMP"]))

    start_epoch = 0
    best_acc = best_acc1
    is_best = False
    start_time = time.time()

    if CONFIGS["TRAIN"]["RESUME"] is not None:
        raise (NotImplementedError)

    if CONFIGS["TRAIN"]["TEST"]:
        validate(val_loader, model, 0, writer, args)
        return

    logger.info("Start training.")

    for epoch in range(start_epoch, CONFIGS["TRAIN"]["EPOCHS"]):

        train(train_loader, model, optimizer, epoch, writer, args)
        acc = validate(val_loader, model, epoch, writer, args)
        #return
        scheduler.step()

        if best_acc < acc:
            is_best = True
            best_acc = acc
        else:
            is_best = False

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_acc1': best_acc,
                'optimizer': optimizer.state_dict()
            },
            is_best,
            path=CONFIGS["MISC"]["TMP"])

        t = time.time() - start_time
        elapsed = DayHourMinute(t)
        t /= (epoch + 1) - start_epoch  # seconds per epoch
        t = (CONFIGS["TRAIN"]["EPOCHS"] - epoch - 1) * t
        remaining = DayHourMinute(t)

        logger.info(
            "Epoch {0}/{1} finishied, auxiliaries saved to {2} .\t"
            "Elapsed {elapsed.days:d} days {elapsed.hours:d} hours {elapsed.minutes:d} minutes.\t"
            "Remaining {remaining.days:d} days {remaining.hours:d} hours {remaining.minutes:d} minutes."
            .format(epoch,
                    CONFIGS["TRAIN"]["EPOCHS"],
                    CONFIGS["MISC"]["TMP"],
                    elapsed=elapsed,
                    remaining=remaining))

    logger.info("Optimization done, ALL results saved to %s." %
                CONFIGS["MISC"]["TMP"])
Example #3
0
valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                           batch_size=batch_size,
                                           num_workers=num_workers,
                                           shuffle=False)
device = 'cuda'

if torch.cuda.device_count() > 1 and device == 'cuda':
    print("Let's use", torch.cuda.device_count(), "GPUs!")
model = Net(num_classes=n_classes)
# pretrained model in my pc. now i will train on all images for 2 epochs
# model.load_state_dict(torch.load('./epoch_5_val_loss_7.03_auc_0.844.pth'))
model = nn.DataParallel(model).to(device)


optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = torch.nn.CrossEntropyLoss()

train_loss, val_loss = [], []

for epoch in range(epochs):
    print('Epoch {}/{}'.format(epoch, epochs - 1))
    print('-' * 10)
    model.train()
    running_loss = 0
    tk0 = tqdm(train_loader, total=int(len(train_loader)))
    for im, labels in tk0:
        inputs = im["image"].to(device, dtype=torch.float)
        labels = labels.to(device, dtype=torch.long)
        optimizer.zero_grad()
        outputs = model(inputs)
Example #4
0
                        shuffle=False,
                        num_workers=2)

    #    dataiter = iter(loader)
    #    images, labels = dataiter.next()
    #    print (images)
    #    images=tensor_to_img(images)
    #    print (labels)
    #    print (images)

    net = Net(14 * batch_size)
    lstm = LSTMLayer(7 * 7 * (16 + 5 * 2), 64, 14 * 14 * (num_class + 5 * 2),
                     2, batch_size)
    lossfunction = Loss(batch_size)
    optimizer = optim.Adam([{
        'params': net.parameters()
    }, {
        'params': lstm.parameters(),
        'lr': 0.0001
    }],
                           lr=0,
                           weight_decay=0)
    if load_checkpoint:
        net.load_state_dict(torch.load(SAVE_PATH))

    net.cuda()

    optimizer = optim.Adam(net.parameters(), lr=0.0001)
    for epoch in range(2000):
        for i, data in enumerate(loader, 0):
            # get the inputs