예제 #1
0
        for cluster in range(k):
            clusters[cluster] = dist(boxes[nearest_clusters == cluster],
                                     axis=0)

        last_clusters = nearest_clusters

    return clusters


if __name__ == "__main__":
    import sys
    import torchvision
    import os

    from datasets.voc import handle_voc_target, VOCDetection
    sys.path.append(os.path.abspath(".."))
    root_path = os.path.abspath("..")
    sets = VOCDetection(os.path.join(root_path, "data"),
                        image_set="train",
                        target_transform=handle_voc_target)
    boxes = []
    for _, target in sets:
        boxes.append(target[:, 1:5])
    boxes = np.concatenate(boxes, 0)
    boxes = handle_boxes(boxes)
    out = kmeans(boxes, k=9)
    print("Boxes: \n {}".format(out))

    ratios = np.around(out[:, 0] / out[:, 1], decimals=3).tolist()
    print("Ratios: \n {}".format(sorted(ratios)))
예제 #2
0
# =====================================
transform_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224,
                                                          0.225]),
])  # apply to image data only

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224,
                                                          0.225]),
])  # apply to image data only

trainset = VOCDetection(root='./datasets',
                        year='2007',
                        image_set='train',
                        download=False,
                        transform=transform_train)

testset = VOCDetection(root='./datasets',
                       year='2007',
                       image_set='val',
                       download=False,
                       transform=transform_train)

trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=4)

testloader = torch.utils.data.DataLoader(testset,
예제 #3
0
def init_dataloader_train(opt):

    batch_size = opt.batch_size_train

    dataset_name = opt.dataset
    transform = init_transforms(opt, 'train')

    if dataset_name.startswith('voc'):

        opt.num_classes = 21
        from datasets.voc import VOCDetection
        voc_root = os.path.join(opt.data_root_dir, 'VOCdevkit')

        # voc 2007
        if dataset_name == 'voc07':
            image_root = os.path.join(voc_root, 'VOC2007/JPEGImages')
            list_file = os.path.join('data/voc/voc07_trainval.txt')

        # voc 2012
        elif dataset_name == 'voc12':
            image_root = os.path.join(voc_root, 'VOC2012/JPEGImages')
            list_file = os.path.join('data/voc/voc12_trainval.txt')

        # voc 0712
        elif dataset_name == 'voc0712':
            image_root = os.path.join(voc_root, 'VOC0712/JPEGImages')
            list_file = [os.path.join('data/voc/voc07_trainval.txt'),
                         os.path.join('data/voc/voc12_trainval.txt')]

        else:
            raise ValueError('Not a valid dataset name')

        dataset = VOCDetection(root=image_root,
                               list_file=list_file,
                               transform=transform)

        loader = dataloader.DataLoader(dataset=dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=opt.num_workers)

    elif dataset_name == 'wider':
        opt.num_classes = 2
        from datasets.wider import WiderDetection
        wider_root = os.path.join(opt.data_root_dir, 'WIDER/WIDER_train/images')
        list_file = 'data/wider/wider_train.txt'
        dataset = WiderDetection(root=wider_root,
                                 list_file=list_file,
                                 transform=transform)

        loader = dataloader.DataLoader(dataset=dataset,
                                       batch_size=batch_size,
                                       shuffle=False,
                                       num_workers=opt.num_workers,)

    elif dataset_name == 'fddb':
        opt.num_classes = 2
        from datasets.fddb import FDDBDetection
        from preprocess.fddb.preprocess import parse_fddb_annotation

        wider_root = os.path.join(opt.data_root_dir, 'FDDB/imgs')
        annotation_dir = os.path.join(opt.data_root_dir, 'FDDB/anno')
        list_file = os.path.join(opt.project_root, 'data/fddb/fddb_train.txt')
        os.system('rm %s' % list_file)
        parse_fddb_annotation(wider_root, annotation_dir, 'train', list_file, opt.fold)

        dataset = FDDBDetection(root=wider_root,
                                list_file=list_file,
                                mode='train',
                                transform=transform)

        loader = dataloader.DataLoader(dataset=dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=opt.num_workers)

    elif dataset_name == 'v_caption_detection':
        from datasets.v_caption_detection import V_Caption_Detection
        data_root = os.path.join(opt.data_root_dir, 'V.DO/caption/background_result')
        list_file = 'data/v_caption_detection/caption_BG_train.txt'
        dataset = V_Caption_Detection(root=data_root,
                                 list_file=list_file,
                                 transform=transform)

        loader = dataloader.DataLoader(dataset=dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=opt.num_workers)

    elif dataset_name == 'v_caption_patch':
        from datasets.v_caption_patch import V_Caption_Patch
        data_root = os.path.join(opt.data_root_dir, '')
        list_file = 'data/v_caption_patch_hangul/patch_train.txt'
        dataset = V_Caption_Patch(root=data_root,
                                 list_file=list_file,
                                 transform=transform)

        loader = dataloader.DataLoader(dataset=dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=opt.num_workers,
                                       pin_memory=True)

    elif dataset_name == 'v_caption':
        from datasets.v_caption import V_Caption
        data_root = os.path.join(opt.data_root_dir, '')
        list_file = ['data/v_caption/hangul_patch_train.txt']
        dataset = V_Caption(root=data_root,
                                 list_file=list_file,
                                 transform=transform)

        loader = dataloader.DataLoader(dataset=dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=opt.num_workers,
                                       pin_memory=True)


    elif dataset_name == 'v_caption_patch_num':
        from datasets.v_caption_patch_num import V_Caption_Patch_Num
        data_root = os.path.join(opt.data_root_dir, '')
        list_file = 'data/v_caption_detection/bgnumber_patch_train.txt'
        dataset = V_Caption_Patch_Num(root=data_root,
                                 list_file=list_file,
                                 transform=transform)

        loader = dataloader.DataLoader(dataset=dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=opt.num_workers,
                                       pin_memory=True)

    elif dataset_name == 'v_caption_patch_alp':
        from datasets.v_caption_patch_alp import V_Caption_Patch_Alp
        data_root = os.path.join(opt.data_root_dir, '')
        list_file = 'data/v_caption_detection/bgalphabet_patch_train.txt'
        dataset = V_Caption_Patch_Alp(root=data_root,
                                 list_file=list_file,
                                 transform=transform)

        loader = dataloader.DataLoader(dataset=dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=opt.num_workers,
                                       pin_memory=True)

    elif dataset_name == 'v_caption_patch_alp2':
        from datasets.v_caption_patch_alp2 import V_Caption_Patch_Alp2
        data_root = os.path.join(opt.data_root_dir, '')
        list_file = ['data/v_caption_detection/bgalphabet_patch_train.txt','data/v_caption_detection/bgalphabet_patch_val.txt','data/v_caption_detection/alphabet_patch_train.txt']
        dataset = V_Caption_Patch_Alp2(root=data_root,
                                 list_file=list_file,
                                 transform=transform)

        loader = dataloader.DataLoader(dataset=dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=opt.num_workers,
                                       pin_memory=True)

    elif dataset_name == 'v_caption_patch_sym':
        from datasets.v_caption_patch_sym import V_Caption_Patch_Sym
        data_root = os.path.join(opt.data_root_dir, '')
        list_file ='data/v_caption_detection/bgsymbol_patch_train.txt'
        dataset = V_Caption_Patch_Sym(root=data_root,
                                 list_file=list_file,
                                 transform=transform)

        loader = dataloader.DataLoader(dataset=dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=opt.num_workers,
                                       pin_memory=True)

    elif dataset_name == 'v_caption_patch_type':
        opt.num_classes = 4
        from datasets.v_caption_patch_type import V_Caption_Resnet_Type
        data_root = os.path.join(opt.data_root_dir, '')
        list_file = ['data/v_caption_detection/bgalphabet_patch_train.txt','data/v_caption_detection/bghangul_patch_train.txt','data/v_caption_detection/bgnumber_patch_train.txt',
                     'data/v_caption_detection/bgsymbol_patch_train.txt']
        dataset = V_Caption_Resnet_Type(root=data_root,
                                 list_file=list_file,
                                 transform=transform)

        loader = dataloader.DataLoader(dataset=dataset,
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=opt.num_workers,
                                       pin_memory=True)

    else:
        raise ValueError('Not a valid dataset')

    return loader
예제 #4
0
            orders = orders[iou <= threshold]
            boxes = boxes[iou <= threshold,:]

        keeps.append(keep)

    return keeps

# Define the recall and total dictionary for calculating mAP
#recall = {}
#total = {}
#for i in range(21):
#    recall[i] = 0
#    total[i] = 0

# Define the test loader: Only use toTensor to keep the original image
testset = VOCDetection(root='./datasets', year='2007', image_set='train', download=False, transform=transforms.ToTensor())
testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=1)

# Load the model
checkpoint = torch.load('rcnn.pth', map_location=torch.device('cpu'))
model = FasterRCNN()
model.eval() # Turn to eval mode
model.load_state_dict(checkpoint['state_dict'])

print("Start eval...")
for batch_idx, (images, gt_boxes, gt_classes, gt_boxes_, gt_classes_) in enumerate(testloader):

    # Use for partial training
    if batch_idx <= 147:
        continue
    # Add the normalization step
예제 #5
0
def init_dataloader_valid(opt):

    batch_size = opt.batch_size_valid

    dataset_name = opt.dataset
    transform = init_transforms(opt, 'valid')

    if dataset_name.startswith('voc'):

        opt.num_classes = 21
        from datasets.voc import VOCDetection
        voc_root = os.path.join(opt.data_root_dir, 'VOCdevkit')

        # voc 2007
        if dataset_name == 'voc07':
            image_root = os.path.join(voc_root, 'VOC2007/JPEGImages')
            list_file = os.path.join('data/voc/voc07_test.txt')

        # voc 2012
        elif dataset_name == 'voc12':
            image_root = os.path.join(voc_root, 'VOC2012/JPEGImages')
            list_file = os.path.join('data/voc/voc12_test.txt')

        # voc 0712
        elif dataset_name == 'voc0712':
            image_root = os.path.join(voc_root, 'VOC0712/JPEGImages')
            list_file = os.path.join('data/voc/voc07_test.txt')
        else:
            raise ValueError('Not a valid dataset name')

        dataset = VOCDetection(root=image_root,
                               list_file=list_file,
                               transform=transform)

        loader = dataloader.DataLoader(dataset=dataset,
                                       batch_size=batch_size,
                                       shuffle=False,
                                       num_workers=opt.num_workers)

    elif dataset_name == 'wider':
        opt.num_classes = 2
        from datasets.wider import WiderDetection
        wider_root = os.path.join(opt.data_root_dir, 'WIDER/WIDER_val/images')
        list_file = 'data/wider/wider_val.txt'
        dataset = WiderDetection(root=wider_root,
                                 list_file=list_file,
                                 transform=transform)

        loader = dataloader.DataLoader(
            dataset=dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=opt.num_workers,
        )

    elif dataset_name == 'fddb':
        opt.num_classes = 2
        from datasets.fddb import FDDBDetection
        from preprocess.fddb.preprocess import parse_fddb_annotation
        wider_root = os.path.join(opt.data_root_dir, 'FDDB/imgs')
        annotation_dir = os.path.join(opt.data_root_dir, 'FDDB/anno')
        list_file = os.path.join(opt.project_root, 'data/fddb/fddb_val.txt')
        os.system('rm %s' % list_file)
        parse_fddb_annotation(wider_root, annotation_dir, 'valid', list_file,
                              opt.fold)

        dataset = FDDBDetection(root=wider_root,
                                list_file=list_file,
                                mode='valid',
                                transform=transform)

        loader = dataloader.DataLoader(dataset=dataset,
                                       batch_size=batch_size,
                                       shuffle=False,
                                       num_workers=opt.num_workers)

    elif dataset_name == 'v_caption':
        from datasets.v_caption import V_Caption
        data_root = os.path.join(opt.data_root_dir, '')
        list_file = ['data/v_caption/bghangul_patch_val.txt']
        dataset = V_Caption(root=data_root,
                            list_file=list_file,
                            transform=transform)

        loader = dataloader.DataLoader(dataset=dataset,
                                       batch_size=batch_size,
                                       shuffle=False,
                                       num_workers=opt.num_workers,
                                       pin_memory=True)

    else:
        raise ValueError('Not a valid dataset')

    return loader