Ejemplo n.º 1
0
def main():
    model = Yolov1(split_size=7, num_boxes=2, num_classes=20).to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    loss_fn = YoloLoss()
    train_dataset = VOCDataset("data/train.csv", transform=transform, img_dir=IMG_DIR, label_dir=LABEL_DIR)
    test_dataset = VOCDataset("data/test.csv", transform=transform, img_dir=IMG_DIR, label_dir=LABEL_DIR)
    train_loader=DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, num_workers=1, pin_memory=PIN_MEMORY, shuffle=True,drop_last=True)
    test_loader=DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, num_workers=1, pin_memory=PIN_MEMORY, shuffle=True,drop_last=True)
    for epoch in range(EPOCHS):
        pred_boxes, target_boxes = get_bboxes(train_loader, model, iou_threshold=0.5, threshold=0.4)
        mAP = mean_average_precision(pred_boxes, target_boxes, iou_threshold=0.5)
        print(f"Train mAP:{mAP}")
        train_fn(train_loader, model, optimizer, loss_fn)
    if epoch > 99:
        for x, y in test_loader:
        x = x.to(DEVICE)
        for idx in range(16):
            bboxes = cellboxes_to_boxes(model(x))
            bboxes = non_max_suppression(bboxes[idx], iou_threshold=0.5, threshold=0.4)
            plot_image(x[idx].permute(1,2,0).to("cpu"), bboxes)
        
        

if __name__  == "__main__":
    
    main()
Ejemplo n.º 2
0
def _setup_dataloaders(root_dir, return_dataset=False):
    """
    Setup dataloaders.
    """
    preprocessing = [
        aug.NormalizeBboxes(cfg.grid_size),
        aug.Bboxes2Matrices(cfg.grid_size, cfg.num_classes),
        aug.Resize(cfg.target_size),
        aug.Normalize(cfg.mean, cfg.std, 1. / 255),
        aug.ToTensor()
    ]
    transforms_train = preprocessing
    transforms_val = preprocessing

    ds_train = VOCDataset(root_dir, image_set="train")
    dl_train = get_dataloader(ds_train,
                              transforms_train,
                              cfg.batch_size,
                              num_workers=4)
    ds_val = VOCDataset(root_dir, image_set="val")
    dl_val = get_dataloader(ds_val, transforms_val, cfg.batch_size)

    if return_dataset:
        return dl_train, dl_val, ds_train, ds_val

    return dl_train, dl_val
Ejemplo n.º 3
0
def main():
    model = Yolov1(split_size=7, num_boxes=2, num_classes=20).to(DEVICE)
    optimizer = optim.Adam(
        model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY
    )
    loss_fn = YoloLoss()

    if LOAD_MODEL:
        load_checkpoint(torch.load(LOAD_MODEL_FILE), model, optimizer)
    

    train_dataset = VOCDataset(
        "data/100examples.csv",
        transform=transform,
        img_dir=IMG_DIR,
        label_dir=LABEL_DIR
    )

    test_dataset = VOCDataset(
        "data/test.csv",
        transform=transform,
        img_dir=IMG_DIR,
        label_dir=LABEL_DIR
    )

    train_loader = DataLoader(
        dataset=train_dataset,
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
        pin_memory=PIN_MEMORY,
        shuffle=True,
        drop_last=True
    )

    test_loader = DataLoader(
        dataset=test_dataset,
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
        pin_memory=PIN_MEMORY,
        shuffle=True,
        drop_last=True
    )

    for epoch in range(EPOCHS):
        pred_boxes, target_boxes = get_bboxes(
            train_loader, model, iou_threshold=0.5, threshold=0.4
        )

        mean_avg_prec = mean_average_precision(
            pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint"
        )

        print(f"Train mAP in {epoch}: {mean_avg_prec}")

        train_fn(train_loader, model, optimizer, loss_fn)
Ejemplo n.º 4
0
def get_dataloader(params, transform):
    # create data loader
    BATCH_SIZE, NUM_WORKERS, PIN_MEMORY, DATA_DIR, CSV_TRAIN, CSV_VAL = params
    print('***:', type(CSV_TRAIN), type(CSV_VAL))
    img_dir = '{}/images'.format(DATA_DIR)
    label_dir = '{}/labels'.format(DATA_DIR)
    # create annotations
    if type(CSV_TRAIN) == str:
        csv_t = pd.read_csv('{}/{}'.format(DATA_DIR, CSV_TRAIN))
    else:
        csv_t = pd.DataFrame(columns=['image', 'annotation'])
        for csv_dir in CSV_TRAIN:
            csv_t = csv_t.append(pd.read_csv('{}/{}'.format(DATA_DIR,
                                                            csv_dir)),
                                 ignore_index=True)

    if type(CSV_VAL) == str:
        csv_v = pd.read_csv('{}/{}'.format(DATA_DIR, CSV_VAL))
    else:
        csv_v = pd.DataFrame(columns=['image', 'annotation'])
        for csv_dir in CSV_VAL:
            csv_v = csv_v.append(pd.read_csv('{}/{}'.format(DATA_DIR,
                                                            csv_dir)),
                                 ignore_index=True)

    my_transform = transform

    data_train = VOCDataset(dataset_csv=csv_t,
                            img_dir=img_dir,
                            label_dir=label_dir,
                            transform=my_transform['train'])
    data_val = VOCDataset(dataset_csv=csv_v,
                          img_dir=img_dir,
                          label_dir=label_dir,
                          transform=my_transform['val'])
    data_loader = {
        'train':
        DataLoader(data_train,
                   batch_size=BATCH_SIZE,
                   shuffle=True,
                   num_workers=NUM_WORKERS,
                   drop_last=True,
                   pin_memory=PIN_MEMORY),
        'val':
        DataLoader(data_val,
                   batch_size=BATCH_SIZE,
                   shuffle=True,
                   num_workers=NUM_WORKERS,
                   drop_last=True,
                   pin_memory=PIN_MEMORY)
    }
    return data_loader
Ejemplo n.º 5
0
def main(model_name=None):
    tr = transforms.Compose([
        transforms.RandomResizedCrop(300),
        transforms.ToTensor(),
        transforms.Normalize([0.4589, 0.4355, 0.4032],
                             [0.2239, 0.2186, 0.2206])
    ])

    val_set = VOCDataset(directory, 'val', transforms=tr)
    val_loader = DataLoader(val_set,
                            batch_size=batch_size,
                            collate_fn=collate_wrapper,
                            shuffle=False,
                            num_workers=16)

    model = models.resnet34(pretrained=True)
    model.fc = nn.Linear(512, 20)
    model.load_state_dict(torch.load(model_name + '.pt'))
    model.to(device)

    classwise_frequencies = np.array(list(val_set.classes_count.values()))
    minimum_frequency = np.min(classwise_frequencies)
    loss_weights = minimum_frequency / classwise_frequencies
    loss_weights = torch.Tensor(loss_weights).to(device)
    loss_function = nn.BCEWithLogitsLoss(weight=loss_weights)

    val_loss, predictions, targets = validate(model, device, val_loader,
                                              loss_function)

    print("Saving raw predictions for validation pass...")
    with open("{}_validation.pkl".format(model_name), 'wb') as f:
        pred_targets = torch.cat(
            (predictions.unsqueeze(0), targets.unsqueeze(0)))
        pickle.dump(pred_targets, f)
    f.close()
Ejemplo n.º 6
0
def main():
    bs = 64
    n_anchors = 4
    dataset = VOCDataset(TRAIN_JSON, TRAIN_JPEG, device=DEVICE)
    loader = VOCDataLoader(dataset, batch_size=bs, num_workers=0)
    # plotter = VOCPlotter(id2cat=dataset.id2cat, figsize=(12, 10))
    #
    # for images, (boxes, classes) in iter(loader):
    #     with plotter:
    #         plotter.plot_boxes(*to_np(images, boxes, classes))
    #         break  # a single batch to verify everything works

    n_classes = len(dataset.id2cat)
    cycle_len = math.ceil(len(dataset)/bs)
    model = SSD(n_classes=n_classes, bias=-3.)
    optimizer = optim.Adam(model.parameters(), lr=1e-2)
    scheduler = CosineAnnealingLR(optimizer, t_max=cycle_len)
    loop = Loop(model, optimizer, scheduler, device=DEVICE)

    anchors, grid_sizes = [
        x.to(DEVICE) for x in (
            t(make_grid(n_anchors), requires_grad=False).float(),
            t([1/n_anchors], requires_grad=False).unsqueeze(1))]

    bce_loss = BinaryCrossEntropyLoss(n_classes)
    loss_fn = lambda x, y: ssd_loss(x, y, anchors, grid_sizes, bce_loss, n_classes)

    loop.run(
        train_data=loader,
        epochs=100,
        loss_fn=loss_fn,
        callbacks=[Logger()]
    )
Ejemplo n.º 7
0
def main(args):
    dataset = VOCDataset('data')
    # return test(dataset)
    if args.infer:
        infer(dataset)
    if args.train:
        train(dataset)
Ejemplo n.º 8
0
def evaluate():
    checkpoint_path = os.path.join(args.model_root, args.model_name)
    checkpoint = torch.load(checkpoint_path, map_location=device)
    model = SSD300(n_classes=len(label_map), device=device).to(device)
    model.load_state_dict(checkpoint['model'])

    transform = Transform(size=(300, 300), train=False)
    test_dataset = VOCDataset(root=args.data_root,
                              image_set=args.image_set,
                              transform=transform,
                              keep_difficult=True)
    test_loader = DataLoader(dataset=test_dataset,
                             collate_fn=collate_fn,
                             batch_size=args.batch_size,
                             num_workers=args.num_workers,
                             shuffle=False,
                             pin_memory=True)

    detected_bboxes = []
    detected_labels = []
    detected_scores = []
    true_bboxes = []
    true_labels = []
    true_difficulties = []

    model.eval()
    with torch.no_grad():
        bar = tqdm(test_loader, desc='Evaluate the model')
        for i, (images, bboxes, labels, difficulties) in enumerate(bar):
            images = images.to(device)
            bboxes = [b.to(device) for b in bboxes]
            labels = [l.to(device) for l in labels]
            difficulties = [d.to(device) for d in difficulties]

            predicted_bboxes, predicted_scores = model(images)
            _bboxes, _labels, _scores = model.detect_objects(predicted_bboxes,
                                                             predicted_scores,
                                                             min_score=0.01,
                                                             max_overlap=0.45,
                                                             top_k=200)

            detected_bboxes += _bboxes
            detected_labels += _labels
            detected_scores += _scores
            true_bboxes += bboxes
            true_labels += labels
            true_difficulties += difficulties

        all_ap, mean_ap = calculate_mAP(detected_bboxes,
                                        detected_labels,
                                        detected_scores,
                                        true_bboxes,
                                        true_labels,
                                        true_difficulties,
                                        device=device)

    pretty_printer = PrettyPrinter()
    pretty_printer.pprint(all_ap)
    print('Mean Average Precision (mAP): %.4f' % mean_ap)
Ejemplo n.º 9
0
def run():
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size', type=int, default=32, metavar='N',
                        help='input batch size for training (default: 32)')

    parser.add_argument('--test-batch-size', type=int, default=32, metavar='N',
                        help='input batch size for testing (default: 1000)')

    parser.add_argument('--epochs', type=int, default=15, metavar='N',
                        help='number of epochs to train (default: 15)')
    
    parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
                        help='learning rate (default: 0.001)')
    
    parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                        help='SGD momentum (default: 0.5)')
    
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    
    parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                        help='how many batches to wait before logging training status')

    parser.add_argument('--mode', type=str, default='A', metavar='M',
                        help='Mode of model')

    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    torch.manual_seed(args.seed)
    device = torch.device("cuda" if use_cuda else "cpu")

    root = './'
    model_path = './results/pascalvoc_A.pt'

    test_transform = transforms.Compose([
                transforms.Resize(256),
                transforms.FiveCrop(224),
                transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])), 
                transforms.Lambda(lambda crops: torch.stack([transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])(crop) for crop in crops])), 
                ])

    #Get dataset and input into Dataloader
    test_loader = torch.utils.data.DataLoader(
        VOCDataset(root, 'val', transform = test_transform),
        batch_size=args.test_batch_size, shuffle=False)

    test_loss_function = F.binary_cross_entropy_with_logits
    
    #Define Model
    model = load_model(model_path)
    model = model.to(device)

    val_loss, val_acc, output = test(args, model, device, test_loader, test_loss_function)

    torch.save(output, 'val_set_results.pt')
Ejemplo n.º 10
0
def main():
    model = Yolov1(split_size=7, num_boxes=2, num_classes=20).to(DEVICE)
    optimizer = optim.Adam(model.parameters(),
                           lr=LEARNING_RATE,
                           weight_decay=WEIGHT_DECAY)
    loss_fn = YoloLoss()

    if LOAD_MODEL:
        pass

    train_transform = transforms.Compose(
        [transforms.Resize(size=(448, 448)),
         transforms.ToTensor()])
    train_dataset = VOCDataset(csv_file='',
                               img_root=IMG_DIR,
                               S=7,
                               B=2,
                               C=20,
                               transform=train_transform)

    test_transform = transforms.Compose(
        [transforms.Resize(size=(448, 448)),
         transforms.ToTensor()])
    test_dataset = VOCDataset(csv_file='',
                              img_root=IMG_DIR,
                              transform=test_transform)

    train_loader = DataLoader(
        dataset=train_dataset,
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
        pin_memory=PIN_MEMORY,
        shuffle=True,
        drop_last=True,
    )

    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=BATCH_SIZE,
                             num_workers=NUM_WORKERS,
                             pin_memory=PIN_MEMORY,
                             shuffle=True,
                             drop_last=True)

    for epoch in range(EPOCHS):
        train_fn(train_loader, model, optimizer, loss_fn)
Ejemplo n.º 11
0
def get_dataloader(params):
    # create data loader
    BATCH_SIZE, NUM_WORKERS, PIN_MEMORY, DATA_DIR, CSV_TRAIN, CSV_VAL = params
    img_dir = '{}/images'.format(DATA_DIR)
    label_dir = '{}/labels'.format(DATA_DIR)
    csv_dir_t = '{}/{}'.format(DATA_DIR, CSV_TRAIN)
    csv_dir_v = '{}/{}'.format(DATA_DIR, CSV_VAL)
    my_transform = {
        'train':
        transforms.Compose([
            transforms.Resize((448, 448)),
            transforms.ToTensor(),
        ]),
        'val':
        transforms.Compose([
            transforms.Resize((448, 448)),
            transforms.ToTensor(),
        ])
    }

    data_train = VOCDataset(dataset_csv=csv_dir_t,
                            img_dir=img_dir,
                            label_dir=label_dir,
                            transform=my_transform['train'])
    data_val = VOCDataset(dataset_csv=csv_dir_v,
                          img_dir=img_dir,
                          label_dir=label_dir,
                          transform=my_transform['val'])
    data_loader = {
        'train':
        DataLoader(data_train,
                   batch_size=BATCH_SIZE,
                   shuffle=True,
                   num_workers=NUM_WORKERS,
                   drop_last=True,
                   pin_memory=PIN_MEMORY),
        'val':
        DataLoader(data_val,
                   batch_size=BATCH_SIZE,
                   shuffle=True,
                   num_workers=NUM_WORKERS,
                   drop_last=True,
                   pin_memory=PIN_MEMORY)
    }
    return data_loader
Ejemplo n.º 12
0
def main():
    model = YOLOv1(split_size=7, num_boxes=2, num_classes=20).to(device)
    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           weight_decay=wd)
    loss_fn = YoloLoss()
    if load_model:
        load_checkpoint(torch.load(load_model_file), model, optimizer)

    train_dataset = VOCDataset("data/8examples.csv",
                               transform=transform,
                               img_dir=img_dir,
                               label_dir=label_dir)

    test_dataset = VOCDataset("data/test.csv",
                              transform=transform,
                              img_dir=img_dir,
                              label_dir=label_dir)

    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=bs,
                              num_workers=num_workers,
                              pin_memory=pin_mem,
                              shuffle=True,
                              drop_last=False)
    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=bs,
                             num_workers=num_workers,
                             pin_memory=pin_mem,
                             shuffle=True,
                             drop_last=True)

    for epoch in range(epochs):
        pred_boxes, target_boxes = get_bboxes(train_loader,
                                              model,
                                              iou_threshold=0.5,
                                              threshold=0.4)
        mean_avg_prec = mean_average_precision(pred_boxes,
                                               target_boxes,
                                               iou_threshold=0.5,
                                               box_format="midpoint")
        print(f"Train mAP: {mean_avg_prec}")

        train_fn(train_loader, model, optimizer, loss_fn)
Ejemplo n.º 13
0
def train():
    ROOT = './'
    VGG16_WEIGHT_PATH = './vgg/vgg16_weights.npz'
    DATASET_PATH = os.path.join(ROOT, 'VOC2012/')
    CHECKPOINT_DIR = os.path.join(DATASET_PATH, 'saved_model')

    IMAGE_SHAPE = (512, 512)
    N_CLASSES = 21
    N_EPOCHS = 100
    BATCH_SIZE = 1

    LEARNING_RATE = 1e-5
    DECAY_RATE = 0.95
    DECAY_EPOCH = 10
    DROPOUT_RATE = 0.5

    print('Starting end-to-end training FCN-8s')
    session_config = tf.ConfigProto(gpu_options=tf.GPUOptions(
        allow_growth=True))
    session = tf.compat.v1.InteractiveSession(config=session_config)
    session.as_default()

    # ------------- Load VOC from TFRecord ---------------
    dataset = VOCDataset()
    dataset_train = dataset.load_dataset(DATASET_PATH,
                                         BATCH_SIZE,
                                         is_training=True)
    dataset_val = dataset.load_dataset(DATASET_PATH,
                                       BATCH_SIZE,
                                       is_training=False)

    # ------------- Build fcn model ------------
    fcn = FCN(IMAGE_SHAPE, N_CLASSES, VGG16_WEIGHT_PATH)
    fcn.build_from_vgg()

    learning_rate_fn = learning_rate_with_exp_decay(BATCH_SIZE,
                                                    dataset.n_images['train'],
                                                    DECAY_EPOCH, DECAY_RATE,
                                                    LEARNING_RATE)
    compile_model(fcn, learning_rate_fn)
    fit_model(fcn, N_EPOCHS, BATCH_SIZE, dataset_train, dataset_val,
              CHECKPOINT_DIR, DROPOUT_RATE)
Ejemplo n.º 14
0
def main(args):
    dataset = VOCDataset(args.images_dir, args.labels_dir)

    # Saved cropped images of each object to categorized folders
    for img in dataset:
        for obj in img.objects:
            save_object(obj,
                        args.action_types,
                        args.save_dir,
                        must_include_all_actions=False,
                        save_negatives=True)
Ejemplo n.º 15
0
def get_detection_dataset(data_type, subset, root):
    if data_type in bam_media_classes:
        dataset = BAMDataset(root, subset)
    elif data_type == 'clipart':
        dataset = ClipArtDataset(root, subset)
    elif data_type == 'voc':
        dataset = VOCDataset(root, subset)
    else:
        raise NotImplementedError
    assert (issubclass(type(dataset), chainer.dataset.DatasetMixin))
    return dataset
Ejemplo n.º 16
0
def test():
    import numpy as np
    import argparse
    import matplotlib.pyplot as plt

    from visual import draw_rect
    from dataset import VOCDataset

    parser = argparse.ArgumentParser()
    parser.add_argument('-p',
                        '--phase',
                        default='train',
                        help='载入哪一部分的数据,默认是train,还可以是valid、test')
    parser.add_argument('-c',
                        '--channle',
                        default=4,
                        type=int,
                        help='可视化preds的哪个维度,默认是4,即第一个B的confidence')
    parser.add_argument('-f',
                        '--func',
                        default='encode',
                        choices=['encode', 'decode'],
                        help='测试的方法,默认是encode,也可以是decode')
    args = parser.parse_args()

    dat = VOCDataset('G:/dataset/VOC2012/VOCdevkit/VOC2012/',
                     phase=args.phase,
                     drop_diff=False,
                     return_tensor=True,
                     out='all')
    for img, labels, locs, preds in dat:
        if args.func == 'encode':
            img = draw_rect(img, locs, labels=labels)
            fig, axes = plt.subplots(ncols=2, figsize=(10, 5))
            axes[0].imshow(np.asarray(img))
            axes[1].imshow(preds[..., args.channle])
            plt.show()
        else:
            res_c, res_s, res_l = dat.y_encoder.decode(preds, img.size)
            print(res_c)
            print(res_s)
            print(res_l)
            print(labels)
            print(locs)
            img = draw_rect(img, res_l)
            fig, axes = plt.subplots(ncols=2, figsize=(10, 5))
            axes[0].imshow(np.asarray(img))
            axes[1].imshow(preds[..., args.channle])
            plt.show()
            break
Ejemplo n.º 17
0
def test():
    import numpy as np
    import argparse
    import matplotlib.pyplot as plt
    from torchvision import transforms

    from visual import draw_rect
    from dataset import VOCDataset

    parser = argparse.ArgumentParser()
    parser.add_argument('-p',
                        '--phase',
                        default='train',
                        help='载入哪一部分的数据,默认是train,还可以是valid、test')
    args = parser.parse_args()

    # 这里和github略有不同,github上是每个变换都各自有0.5的概率是不发生的,而
    #   这里是只要发生变换则三种变换是同时发生的。
    color_transfers = transforms.RandomApply([
        transforms.ColorJitter(
            brightness=(0.5, 1.5), saturation=(0.5, 1.5), hue=(-0.2, 0.2))
    ], 0.5)
    img_transfers = OnlyImage([RandomBlur(), color_transfers])
    all_transfers = Compose([
        RandomHorizontalFlip(),
        RandomResize(), img_transfers,
        RandomShift(),
        RandomCrop()
    ])
    dat = VOCDataset('G:/dataset/VOC2012/VOCdevkit/VOC2012/',
                     phase=args.phase,
                     drop_diff=False,
                     return_tensor=True,
                     transfers=all_transfers)
    for img, labels, locs in dat:
        img = draw_rect(img, locs, labels=labels)
        plt.imshow(np.asarray(img))
        plt.show()
Ejemplo n.º 18
0
def train():

    net.train()

    # define optimizer
    optimizer = optim.SGD(net.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.decay)

    # create data batch generator
    training_set = VOCDataset("D:/dataset/VOC/VOCdevkit/",
                              "2012",
                              "train",
                              image_size=net.IMAGE_W)
    dataloader = DataLoader(training_set,
                            shuffle=True,
                            batch_size=net.BATCH_SIZE)

    N_ITERS_PER_EPOCH = len(dataloader)

    writer = SummaryWriter()

    if torch.cuda.is_available():
        writer.add_graph(net.cpu(), torch.rand(4, 3, 416, 416))
    else:
        writer.add_graph(net, torch.rand(4, 3, 416, 416))

    for epoch in range(args.epoch):
        for step, (images, labels) in enumerate(dataloader):

            if images.shape[0] != net.BATCH_SIZE:
                continue

            print("")
            print("========== Epoch: {}, step: {}/{} ==========".format(
                epoch, step, N_ITERS_PER_EPOCH))

            time_start = time.time()

            if torch.cuda.is_available():
                image = Variable(images.cuda(), requires_grad=True)
            else:
                image = Variable(images, requires_grad=True)

            optimizer.zero_grad()
            output = net.forward(images)

            loss_xy, loss_wh, loss_conf, loss_cls = net.loss(output, labels)
            loss_coord = loss_xy + loss_wh
            total_loss = loss_coord + loss_conf + loss_cls

            total_loss.backward()
            optimizer.step()

            total_loss, loss_xy, loss_wh, loss_conf, loss_cls = [
                l.item()
                for l in [total_loss, loss_xy, loss_wh, loss_conf, loss_cls]
            ]

            ### logs to tensorboard
            writer.add_scalar('Train/Total_loss', total_loss,
                              epoch * N_ITERS_PER_EPOCH + step)
            writer.add_scalar('Train/Coordination_xy_loss', loss_xy,
                              epoch * N_ITERS_PER_EPOCH + step)
            writer.add_scalar('Train/Coordination_wh_loss', loss_wh,
                              epoch * N_ITERS_PER_EPOCH + step)
            writer.add_scalar('Train/Confidence_loss', loss_conf,
                              epoch * N_ITERS_PER_EPOCH + step)
            writer.add_scalar('Train/Class_loss', loss_cls,
                              epoch * N_ITERS_PER_EPOCH + step)

            ### log to console
            print('- Train step time: {} seconds'.format(time.time() -
                                                         time_start))
            print('- Train/Coordination_xy_loss: ', loss_xy)
            print('- Train/Coordination_wh_loss: ', loss_wh)
            print('- Train/Confidence_loss: ', loss_conf)
            print('- Train/Class_loss: ', loss_cls)
            print('- Train/Total_loss: ', total_loss)

            if step % 10 == 0:
                boxes = get_detection_result(output,
                                             net.ANCHORS,
                                             net.CLASS,
                                             conf_thres=0.5,
                                             nms_thres=0.4)

                # draw detected boxes and save sample
                im = images[0].data.numpy().astype('uint8')
                im = im.transpose(1, 2, 0)
                im = im.copy()
                color_red = (0, 0, 255)
                color_green = (0, 255, 0)
                im = draw_boxes(im, labels[0], net.LABELS, color=color_green)
                im = draw_boxes(im, boxes[0], net.LABELS, color=color_red)

                file_path = os.path.join(
                    args.output,
                    "result_epoch_{}_iter_{}.jpg".format(epoch, step))
                cv2.imwrite(file_path, im)

        ### save model
        model_path = os.path.join(args.model_dir,
                                  "yolov2_epoch_{}.weights".format(epoch))
        torch.save(net.state_dict(), model_path)
        print("Saved model: ", model_path)

    writer.close()
Ejemplo n.º 19
0
from torchvision import transforms
from torch.utils.data import DataLoader
from dataset import VOCDataset, collate_wrapper

directory = 'VOC2012'

tr = transforms.Compose([transforms.CenterCrop(224), transforms.ToTensor()])
train = VOCDataset(directory, 'train', transforms=tr, multi_instance=True)
train_loader = DataLoader(train,
                          batch_size=16,
                          collate_fn=collate_wrapper,
                          shuffle=True,
                          num_workers=4)
"""
How to enumerate across the DataLoader:

for _, batch in enumerate(train_loader):
    batch_of_image_tensors = batch.image
    batch of label_lists = batch.labels
"""
Ejemplo n.º 20
0
def main(mode, num_epochs, num_workers, lr, sc, model_name=None):
    tr = transforms.Compose([transforms.RandomResizedCrop(300),
                             transforms.ToTensor(),
                             transforms.Normalize([0.4589, 0.4355, 0.4032],[0.2239, 0.2186, 0.2206])])

    augs = transforms.Compose([transforms.RandomResizedCrop(300),
                               transforms.RandomRotation(20),
                               transforms.ToTensor(),
                               transforms.Normalize([0.4589, 0.4355, 0.4032],[0.2239, 0.2186, 0.2206])])

    # Get the NB matrix from the dataset,
    # counting multiple instances of labels.
    nb_dataset = VOCDataset(directory, 'train', transforms=tr, multi_instance=True)
    nb = NaiveBayes(nb_dataset, 1)
    mat = nb.get_nb_matrix()
    print_nb_matrix(nb_dataset, mat)
    mat = torch.Tensor(mat).to(device)

    # Define the training dataset, removing
    # multiple instances for the training problem.
    train_set = VOCDataset(directory, 'train', transforms=augs, multi_instance=False)
    train_loader = DataLoader(train_set, batch_size=batch_size, collate_fn=collate_wrapper, shuffle=True, num_workers=num_workers)

    val_set = VOCDataset(directory, 'val', transforms=tr)
    val_loader = DataLoader(val_set, batch_size=batch_size, collate_fn=collate_wrapper, shuffle=True, num_workers=num_workers)

    model = models.resnet34(pretrained=True)
    model.fc = nn.Linear(512, 20)

    if model_name == None:
        train_losses = []
        val_losses = []
        curr_epoch = 0
    else:
        model.load_state_dict(torch.load(model_name + '.pt'))
        print('Loading history')
        train_losses = np.load('train_history_{}_{}.npy'.format(mode, model_name)).tolist()
        val_losses = np.load('val_history_{}_{}.npy'.format(mode, model_name)).tolist()
        curr_epoch = int(model_name.split('_')[-2])

    model.to(device)
    print('Starting optimizer with LR={}'.format(lr))
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

    # ====================================== #
    # Use either:                            #
    # loss_function = nn.BCEWithLogitsLoss() #
    # loss_function = MultiLabelNBLoss(mat)  #
    # ====================================== #
    if mode == 'BCE':
        classwise_frequencies = np.array(list(train_set.classes_count.values()))
        minimum_frequency = np.min(classwise_frequencies)
        loss_weights = minimum_frequency / classwise_frequencies
        loss_weights = torch.Tensor(loss_weights).to(device)
        loss_function = nn.BCEWithLogitsLoss(weight=loss_weights)
    elif mode == 'NB':
        loss_function = MultiLabelNBLoss(mat, scaling_c=sc)

    try:
        for epoch in range(1, num_epochs + 1):
            train_loss = train(model, device, train_loader, optimizer, curr_epoch+1, loss_function)
            val_loss, predictions, targets = validate(model, device, val_loader, loss_function)

            print("Saving raw predictions for epoch {}...".format(curr_epoch+1))
            with open("pred_{}_{}.pkl".format(mode, curr_epoch+1), 'wb') as f:
                pred_targets = torch.cat((predictions.unsqueeze(0), targets.unsqueeze(0)))
                pickle.dump(pred_targets, f)

            if (len(val_losses) > 0) and (val_loss < min(val_losses)):
                torch.save(model.state_dict(), "lr{}_sc{}_model_{}_{}_{:.4f}.pt".format(lr, sc, mode, curr_epoch+1, val_loss))
                print("Saving model (epoch {}) with lowest validation loss: {}"
                    .format(epoch, val_loss))

            train_losses.append(train_loss)
            val_losses.append(val_loss)
            torch.save(model.state_dict(), 'temp_model.pt')
            curr_epoch += 1

        model_save_name = "stop_lr{}_sc{}_model_{}_{}_{:.4f}.pt".format(lr, sc, mode, curr_epoch, val_losses[-1])
        torch.save(model.state_dict(), model_save_name)

    except KeyboardInterrupt:
        model.load_state_dict(torch.load('temp_model.pt'))
        model_save_name = "pause_lr{}_sc{}_model_{}_{}_{:.4f}.pt".format(lr, sc, mode, curr_epoch, val_losses[-1])
        torch.save(model.state_dict(), model_save_name)
        print("Saving model (epoch {}) with current validation loss: {}".format(curr_epoch, val_losses[-1]))

    train_history = np.array(train_losses)
    val_history = np.array(val_losses)

    print('Saving history')
    np.save("train_history_{}_{}".format(mode, model_save_name[5:-3]), train_history)
    np.save("val_history_{}_{}".format(mode, model_save_name[5:-3]), val_history)
Ejemplo n.º 21
0
def main():
    model = Yolov1(split_size=S, num_boxes=B, num_classes=C).to(DEVICE)
    optimizer = optim.Adam(model.parameters(),
                           lr=LEARNING_RATE,
                           weight_decay=WEIGHT_DECAY)
    loss_fn = YoloLoss(S=S, B=B, C=C)

    if LOAD_MODEL:
        load_checkpoint(torch.load(LOAD_MODEL_FILE), model, optimizer)

    train_dataset = VOCDataset(
        training_path=
        '/home/mt/Desktop/For_github/computer_vision_projects/face_recognition/data',
        S=3,
        C=2,
        transform=transform)

    # test_dataset = VOCDataset(
    #     "data/test.csv", transform=transform, img_dir=IMG_DIR, label_dir=LABEL_DIR,
    # )

    train_loader = DataLoader(
        dataset=train_dataset,
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
        pin_memory=PIN_MEMORY,
        shuffle=True,
        drop_last=True,
    )

    # test_loader = DataLoader(
    #     dataset=test_dataset,
    #     batch_size=BATCH_SIZE,
    #     num_workers=NUM_WORKERS,
    #     pin_memory=PIN_MEMORY,
    #     shuffle=True,
    #     drop_last=True,
    # )

    for epoch in range(EPOCHS):
        # for x, y in train_loader:
        #    x = x.to(DEVICE)
        #    for idx in range(8):
        #        bboxes = cellboxes_to_boxes(model(x))
        #        bboxes = non_max_suppression(bboxes[idx], iou_threshold=0.5, threshold=0.4, box_format="midpoint")
        #        plot_image(x[idx].permute(1,2,0).to("cpu"), bboxes)

        #    import sys
        #    sys.exit()

        pred_boxes, target_boxes = get_bboxes(train_loader,
                                              model,
                                              iou_threshold=0.5,
                                              threshold=0.4)

        mean_avg_prec = mean_average_precision(pred_boxes,
                                               target_boxes,
                                               iou_threshold=0.5,
                                               box_format="midpoint")
        print(f"Train mAP: {mean_avg_prec}")

        #if mean_avg_prec > 0.9:
        #    checkpoint = {
        #        "state_dict": model.state_dict(),
        #        "optimizer": optimizer.state_dict(),
        #    }
        #    save_checkpoint(checkpoint, filename=LOAD_MODEL_FILE)
        #    import time
        #    time.sleep(10)

        train_fn(train_loader, model, optimizer, loss_fn)
Ejemplo n.º 22
0
from fcn import FCN

# Killing optional CPU driver warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
tf.logging.set_verbosity(tf.logging.ERROR)

if __name__ == '__main__':
    image_shape = (512, 512)
    n_classes = 21
    vgg16_weights_path = './vgg/vgg16_weights.npz'
    model = FCN(image_shape, n_classes, vgg16_weights_path)
    model.build_from_vgg()

    root_path = './'
    dataset_path = os.path.join(root_path, 'VOC2012/')
    dataset = VOCDataset(augmentation_params=None)

    dataset_val = dataset.load_dataset(dataset_path,
                                       batch_size=8,
                                       is_training=False)
    iterator = tf.data.Iterator.from_structure(dataset_val.output_types,
                                               dataset_val.output_shapes)

    next_batch = iterator.get_next()
    val_init_op = iterator.make_initializer(dataset_val)

    session_config = tf.ConfigProto(gpu_options=tf.GPUOptions(
        allow_growth=True))
    with tf.Session(config=session_config) as session:
        session.run(tf.global_variables_initializer())
        session.run(val_init_op)
Ejemplo n.º 23
0
def main():
    model = Yolov1(split_size=7, num_boxes=2, num_classes=20).to(DEVICE)
    optimizer = optim.Adam(
        model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY
    )
    loss_fn = YoloLoss()

    if LOAD_MODEL:
        load_checkpoint(torch.load(LOAD_MODEL_FILE), model, optimizer)

    train_dataset = VOCDataset(
        # test.csv, 8examples.csv, 100examples.csv
        "data/8examples.csv", 
        transform=transform,
        img_dir=IMG_DIR,
        label_dir=LABEL_DIR,
    )
    
    test_dataset = VOCDataset(
        "data/test.csv" , transform = transform, img_dir = IMG_DIR, label_dir = LABEL_DIR        
    )

    train_loader = DataLoader(
        dataset=train_dataset,
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
        pin_memory=PIN_MEMORY,
        shuffle=True,
        drop_last=False,
    )

    test_loader = DataLoader(
        dataset=test_dataset,
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
        pin_memory=PIN_MEMORY,
        shuffle=True,
        drop_last=False,
    )


    for epoch in range(EPOCHS):
        # for x, y in train_loader:
        #    x = x.to(DEVICE)
        #    for idx in range(8):
        #        bboxes = cellboxes_to_boxes(model(x))
        #        bboxes = non_max_suppression(bboxes[idx], iou_threshold=0.5, threshold=0.4, box_format="midpoint")
        #        plot_image(x[idx].permute(1,2,0).to("cpu"), bboxes)

        #    import sys
        #    sys.exit()
        pred_boxes, target_boxes = get_bboxes(
            train_loader, model, iou_threshold=0.5, threshold=0.4 , device= DEVICE,
        )

        mean_avg_prec = mean_average_precision(
            pred_boxes, target_boxes, iou_threshold=0.5, box_format="midpoint"
        )
        
        if mean_avg_prec > 0.9:
            checkpoint = {
                "state_dict": model.state_dict(),
                "optimizer": optimizer.state_dict(),
            }
            save_checkpoint(checkpoint, filename=LOAD_MODEL_FILE)
            import time
            time.sleep(10)

        print(f"Train mAP: {mean_avg_prec}")
        
        train_fn(train_loader, model, optimizer, loss_fn)
Ejemplo n.º 24
0
def test2(target_dir='augmented2'):
    data_dir = '/home/alex/datasets/PascalVOC'
    img_dir = '{}/images'.format(data_dir)
    label_dir = '{}/labels'.format(data_dir)
    csv_dir = '{}/8examples.csv'.format(data_dir)
    # csv_dir = '{}/1example.csv'.format(data_dir)

    # init transform
    transform = Compose([])
    data = VOCDataset(dataset_csv=csv_dir,
                      img_dir=img_dir,
                      label_dir=label_dir,
                      transform=transform,
                      test=True)
    for img_id, (img, labels) in enumerate(data):
        w, h = img.shape[1:]
        for j in range(labels.shape[0]):
            x1, x2 = int((labels[j, 1] - labels[j, 3] / 2) * w), int(
                (labels[j, 1] + labels[j, 3] / 2) * w)
            y1, y2 = int((labels[j, 2] - labels[j, 4] / 2) * h), int(
                (labels[j, 2] + labels[j, 4] / 2) * h)
            img = rectangle(img, x1, x2, y1, y2)
            # save_image(img, '{}/{}_bb{}.jpeg'.format(target_dir, img_id, j))
        save_image(img, '{}/{}_bb.jpeg'.format(target_dir, img_id))

    # translation transform
    transform = Compose([
        RTranslation(),
    ])
    data = VOCDataset(dataset_csv=csv_dir,
                      img_dir=img_dir,
                      label_dir=label_dir,
                      transform=transform,
                      test=True)
    for epoch in range(5):
        for img_id, (img, labels) in enumerate(data):
            w, h = img.shape[1:]
            for j in range(labels.shape[0]):
                x1, x2 = int((labels[j, 1] - labels[j, 3] / 2) * w), int(
                    (labels[j, 1] + labels[j, 3] / 2) * w)
                y1, y2 = int((labels[j, 2] - labels[j, 4] / 2) * h), int(
                    (labels[j, 2] + labels[j, 4] / 2) * h)
                img = rectangle(img, x1, x2, y1, y2)
                # save_image(img, '{}/{}_bb{}.jpeg'.format(target_dir, img_id, j))
            save_image(img,
                       '{}/{}_transl{}.jpeg'.format(target_dir, img_id, epoch))

    # scaling transform
    transform = Compose([
        RScaling(),
    ])
    data = VOCDataset(dataset_csv=csv_dir,
                      img_dir=img_dir,
                      label_dir=label_dir,
                      transform=transform,
                      test=True)
    for epoch in range(5):
        for img_id, (img, labels) in enumerate(data):
            w, h = img.shape[1:]
            for j in range(labels.shape[0]):
                x1, x2 = int((labels[j, 1] - labels[j, 3] / 2) * w), int(
                    (labels[j, 1] + labels[j, 3] / 2) * w)
                y1, y2 = int((labels[j, 2] - labels[j, 4] / 2) * h), int(
                    (labels[j, 2] + labels[j, 4] / 2) * h)
                img = rectangle(img, x1, x2, y1, y2)
                # save_image(img, '{}/{}_bb{}.jpeg'.format(target_dir, img_id, j))
            save_image(img,
                       '{}/{}_scale{}.jpeg'.format(target_dir, img_id, epoch))
Ejemplo n.º 25
0
rc('mathtext', default='regular')
params = {
    'legend.fontsize': 'x-large',
    'figure.figsize': (10, 10),
    'axes.labelsize': 'x-large',
    'axes.titlesize': 'x-large',
    'xtick.labelsize': 'x-large',
    'ytick.labelsize': 'x-large'
}
pylab.rcParams.update(params)
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(25, 18))
ax2 = ax.twinx()

from dataset import VOCDataset
dataset = VOCDataset('VOC2012', 'train')
keys = list(dataset.classes_count.keys())
values = np.array(list(dataset.classes_count.values()))
minv = np.min(values)
maxv = np.max(values)
newvalues = (minv / values)

ax.tick_params(labelsize=20)
ax2.tick_params(labelsize=20)
ax2.set_ylim(top=maxv)
plt.setp(ax.get_xticklabels(), **{"rotation": 45, "ha": "right"})

ax.set_title("Training set distribution", fontsize=30)
ax.set_xlabel("Classes", fontsize=30)
ax.set_ylabel("Count", fontsize=30)
ax2.set_ylabel("Weights", fontsize=30)
Ejemplo n.º 26
0
from dataset import VOCDataset, collate_wrapper
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

tr = transforms.Compose([transforms.RandomResizedCrop(300), transforms.ToTensor()])
dataset = VOCDataset('VOC2012', 'train', transforms=tr)
loader = DataLoader(dataset, batch_size=48, collate_fn=collate_wrapper, shuffle=False, num_workers=16)

mean = 0.
std = 0.
nb_samples = 0.

for _, batch in enumerate(loader):
    data = batch.image
    data = data.view(data.size(0), data.size(1), -1)
    mean += data.mean(2).sum(0)
    std += data.std(2).sum(0)
    nb_samples += data.size(0)

mean /= nb_samples
std /= nb_samples

print(mean,std)
Ejemplo n.º 27
0
def run():
    parser = argparse.ArgumentParser(description='Pascal VOC 2012 Classifier')
    parser.add_argument('--batch-size',
                        type=int,
                        default=32,
                        metavar='N',
                        help='input batch size for training (default: 32)')

    parser.add_argument('--test-batch-size',
                        type=int,
                        default=32,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')

    parser.add_argument('--epochs',
                        type=int,
                        default=15,
                        metavar='N',
                        help='number of epochs to train (default: 15)')

    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        metavar='LR',
                        help='learning rate (default: 0.001)')

    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')

    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')

    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')

    parser.add_argument('--mode',
                        type=str,
                        default='A',
                        metavar='M',
                        help='Mode of model')

    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    torch.manual_seed(args.seed)
    device = torch.device("cuda" if use_cuda else "cpu")

    root = './'

    train_transform, test_transform = initialise_transforms()

    #Get dataset and input into Dataloader
    train_loader = torch.utils.data.DataLoader(VOCDataset(
        root, 'train', transform=train_transform),
                                               batch_size=args.batch_size,
                                               shuffle=True)

    test_loader = torch.utils.data.DataLoader(VOCDataset(
        root, 'val', transform=test_transform),
                                              batch_size=args.test_batch_size,
                                              shuffle=True)

    #Define Loss function
    train_loss_function = nn.modules.BCEWithLogitsLoss()
    test_loss_function = F.binary_cross_entropy_with_logits

    #Define Model
    model, params = load_model()
    model = model.to(device)

    #Define Optimizer
    optimizer = torch.optim.Adam(
        params,
        lr=args.lr,
    )

    best_loss = -1
    train_loss_epoch = []
    val_loss_epoch = []
    val_acc_epoch = []

    for epoch in range(1, (args.epochs + 1)):
        train_loss = train(args, model, device, train_loader, optimizer, epoch,
                           train_loss_function)
        val_loss, val_acc = test(args, model, device, test_loader,
                                 test_loss_function)

        train_loss_epoch.append(train_loss.item())
        val_acc_epoch.append(val_acc)
        val_loss_epoch.append(val_loss)

        if best_loss < 0 or val_loss < best_loss:
            best_loss = val_loss
            best_param = model.state_dict()
            print("FOUND BETTER MODEL, SAVING WEIGHTS...\n")

    results = {
        "train_loss": train_loss_epoch,
        "val_loss": val_loss_epoch,
        "val_acc": val_acc_epoch
    }

    print('Saving model...')
    save_dir = './results'
    torch.save(best_param, save_dir + 'pascalvoc_' + args.mode + '.pt')
    print('Model saved as : {}\n'.format('pascalvoc_' + args.mode + '.pt'))

    print('Saving results...')
    torch.save(results,
               save_dir + 'pascalvoc_' + args.mode + '_results' + '.pt')
    print('Results saved as : {}'.format('pascalvoc_' + args.mode +
                                         '_results' + '.pt'))
Ejemplo n.º 28
0
from torch.utils.data import DataLoader
from torchvision import transforms
import torch

from dataset import VOCDataset

# 计算图像各个通道的均值和方差,以便后续作正则化

valid_dataset = VOCDataset(train=False,
                           transform=transforms.ToTensor(),
                           label_transform=transforms.ToTensor())
valid_loader = DataLoader(dataset=valid_dataset,
                          batch_size=1,
                          shuffle=True,
                          num_workers=8)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if __name__ == "__main__":
    mean_0 = 0
    mean_1 = 0
    mean_2 = 0
    std_0 = 0
    std_1 = 0
    std_2 = 0

    for i, (img, label) in enumerate(valid_loader):
        img.to(device)
        mean_0 += img[0][0].mean()
        mean_1 += img[0][1].mean()
        mean_2 += img[0][2].mean()
Ejemplo n.º 29
0
def train():
    set_seed(seed=10)
    os.makedirs(args.save_root, exist_ok=True)

    # create model, optimizer and criterion
    model = SSD300(n_classes=len(label_map), device=device)
    biases = []
    not_biases = []
    for name, param in model.named_parameters():
        if param.requires_grad:
            if name.endswith('.bias'):
                biases.append(param)
            else:
                not_biases.append(param)
    model = model.to(device)
    optimizer = torch.optim.SGD(params=[{
        'params': biases,
        'lr': 2 * args.lr
    }, {
        'params': not_biases
    }],
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    if args.resume is None:
        start_epoch = 0
    else:
        checkpoint = torch.load(args.resume, map_location=device)
        start_epoch = checkpoint['epoch'] + 1
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
    print(f'Training will start at epoch {start_epoch}.')

    criterion = MultiBoxLoss(priors_cxcy=model.priors_cxcy,
                             device=device,
                             alpha=args.alpha)
    criterion = criterion.to(device)
    '''
    scheduler = StepLR(optimizer=optimizer,
                       step_size=20,
                       gamma=0.5,
                       last_epoch=start_epoch - 1,
                       verbose=True)
    '''

    # load data
    transform = Transform(size=(300, 300), train=True)
    train_dataset = VOCDataset(root=args.data_root,
                               image_set=args.image_set,
                               transform=transform,
                               keep_difficult=True)
    train_loader = DataLoader(dataset=train_dataset,
                              collate_fn=collate_fn,
                              batch_size=args.batch_size,
                              num_workers=args.num_workers,
                              shuffle=True,
                              pin_memory=True)

    losses = AverageMeter()
    for epoch in range(start_epoch, args.num_epochs):
        # decay learning rate at particular epochs
        if epoch in [120, 140, 160]:
            adjust_learning_rate(optimizer, 0.1)

        # train model
        model.train()
        losses.reset()
        bar = tqdm(train_loader, desc='Train the model')
        for i, (images, bboxes, labels, _) in enumerate(bar):
            images = images.to(device)
            bboxes = [b.to(device) for b in bboxes]
            labels = [l.to(device) for l in labels]

            predicted_bboxes, predicted_scores = model(
                images)  # (N, 8732, 4), (N, 8732, num_classes)
            loss = criterion(predicted_bboxes, predicted_scores, bboxes,
                             labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            losses.update(loss.item(), images.size(0))

            if i % args.print_freq == args.print_freq - 1:
                bar.write(f'Average Loss: {losses.avg:.4f}')

        bar.write(f'Epoch: [{epoch + 1}|{args.num_epochs}] '
                  f'Average Loss: {losses.avg:.4f}')
        # adjust learning rate
        # scheduler.step()

        # save model
        state_dict = {
            'epoch': epoch,
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }
        save_path = os.path.join(args.save_root, 'ssd300.pth')
        torch.save(state_dict, save_path)

        if epoch % args.save_freq == args.save_freq - 1:
            shutil.copyfile(
                save_path,
                os.path.join(args.save_root, f'ssd300_epochs_{epoch + 1}.pth'))
Ejemplo n.º 30
0
def run():
    parser = argparse.ArgumentParser(description='Pascal VOC 2012 Classifier')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=32,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--mode',
                        type=str,
                        default='A',
                        metavar='M',
                        help='Mode of model')
    parser.add_argument('--demo_mode',
                        type=str,
                        default='single',
                        metavar='M',
                        help='Mode of demo')
    parser.add_argument('--image_path',
                        type=str,
                        default='./test.jpg',
                        metavar='M',
                        help='Mode of demo')
    # parser.add_argument('--class_name', type=str, default='aeroplane', metavar='M',
    #                     help='Mode of demo')
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    torch.manual_seed(args.seed)
    device = torch.device("cuda" if use_cuda else "cpu")

    # Get transform
    _, test_transform = initialise_transforms()

    # Initialise model
    model, params = load_model()
    model = model.to(device)
    model.eval()
    model_name = 'pascalvoc_' + args.mode + '.pt'
    print('Loading model...')
    model.load_state_dict(torch.load(model_name))

    # Convert jpg to tensor
    if args.demo_mode == 'single':
        image = Image.open(args.image_path).convert('RGB')
        image_tensor = test_transform(image).unsqueeze(0).to(device)
        # Get model prediction
        pred = model(image_tensor)
        pred = F.sigmoid(pred)
        display_prediction(pred, image)

    elif args.demo_mode == 'gui':
        class_to_index = utils.class_to_index()
        # index = class_to_index[args.class_name]

        # 2-part transform to preserve image after first_transform
        first_transform = transforms.Compose([transforms.Resize(224)])
        second_transform = transforms.Compose([
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

        # Validation set
        test_loader = torch.utils.data.DataLoader(
            VOCDataset(root, 'val', transform=test_transform),
            batch_size=args.test_batch_size,
            shuffle=True)

        # Get predictions on validation set
        model.eval()
        all_predictions = []
        start = time.time()
        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)
                output = F.sigmoid(output)
                target = target.float()
                # Precision for each class in each example
                for i in range(output.shape[0]):
                    example_predictions = []
                    scores = target[i] * output[
                        i]  # Ground truth as mask for predictions
                    all_predictions.append(scores)

        end = time.time()
        print("Time lapsed: {:.2f}s".format((end - start)))
        print(all_predictions)

    else:
        raise Exception("Please enter demo_mode as 'single' or 'gui'")