for cluster in range(k): clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0) last_clusters = nearest_clusters return clusters if __name__ == "__main__": import sys import torchvision import os from datasets.voc import handle_voc_target, VOCDetection sys.path.append(os.path.abspath("..")) root_path = os.path.abspath("..") sets = VOCDetection(os.path.join(root_path, "data"), image_set="train", target_transform=handle_voc_target) boxes = [] for _, target in sets: boxes.append(target[:, 1:5]) boxes = np.concatenate(boxes, 0) boxes = handle_boxes(boxes) out = kmeans(boxes, k=9) print("Boxes: \n {}".format(out)) ratios = np.around(out[:, 0] / out[:, 1], decimals=3).tolist() print("Ratios: \n {}".format(sorted(ratios)))
# ===================================== transform_train = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) # apply to image data only transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) # apply to image data only trainset = VOCDetection(root='./datasets', year='2007', image_set='train', download=False, transform=transform_train) testset = VOCDetection(root='./datasets', year='2007', image_set='val', download=False, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4) testloader = torch.utils.data.DataLoader(testset,
def init_dataloader_train(opt): batch_size = opt.batch_size_train dataset_name = opt.dataset transform = init_transforms(opt, 'train') if dataset_name.startswith('voc'): opt.num_classes = 21 from datasets.voc import VOCDetection voc_root = os.path.join(opt.data_root_dir, 'VOCdevkit') # voc 2007 if dataset_name == 'voc07': image_root = os.path.join(voc_root, 'VOC2007/JPEGImages') list_file = os.path.join('data/voc/voc07_trainval.txt') # voc 2012 elif dataset_name == 'voc12': image_root = os.path.join(voc_root, 'VOC2012/JPEGImages') list_file = os.path.join('data/voc/voc12_trainval.txt') # voc 0712 elif dataset_name == 'voc0712': image_root = os.path.join(voc_root, 'VOC0712/JPEGImages') list_file = [os.path.join('data/voc/voc07_trainval.txt'), os.path.join('data/voc/voc12_trainval.txt')] else: raise ValueError('Not a valid dataset name') dataset = VOCDetection(root=image_root, list_file=list_file, transform=transform) loader = dataloader.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=opt.num_workers) elif dataset_name == 'wider': opt.num_classes = 2 from datasets.wider import WiderDetection wider_root = os.path.join(opt.data_root_dir, 'WIDER/WIDER_train/images') list_file = 'data/wider/wider_train.txt' dataset = WiderDetection(root=wider_root, list_file=list_file, transform=transform) loader = dataloader.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False, num_workers=opt.num_workers,) elif dataset_name == 'fddb': opt.num_classes = 2 from datasets.fddb import FDDBDetection from preprocess.fddb.preprocess import parse_fddb_annotation wider_root = os.path.join(opt.data_root_dir, 'FDDB/imgs') annotation_dir = os.path.join(opt.data_root_dir, 'FDDB/anno') list_file = os.path.join(opt.project_root, 'data/fddb/fddb_train.txt') os.system('rm %s' % list_file) parse_fddb_annotation(wider_root, annotation_dir, 'train', list_file, opt.fold) dataset = FDDBDetection(root=wider_root, list_file=list_file, mode='train', transform=transform) loader = dataloader.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=opt.num_workers) elif dataset_name == 'v_caption_detection': from datasets.v_caption_detection import V_Caption_Detection data_root = os.path.join(opt.data_root_dir, 'V.DO/caption/background_result') list_file = 'data/v_caption_detection/caption_BG_train.txt' dataset = V_Caption_Detection(root=data_root, list_file=list_file, transform=transform) loader = dataloader.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=opt.num_workers) elif dataset_name == 'v_caption_patch': from datasets.v_caption_patch import V_Caption_Patch data_root = os.path.join(opt.data_root_dir, '') list_file = 'data/v_caption_patch_hangul/patch_train.txt' dataset = V_Caption_Patch(root=data_root, list_file=list_file, transform=transform) loader = dataloader.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True) elif dataset_name == 'v_caption': from datasets.v_caption import V_Caption data_root = os.path.join(opt.data_root_dir, '') list_file = ['data/v_caption/hangul_patch_train.txt'] dataset = V_Caption(root=data_root, list_file=list_file, transform=transform) loader = dataloader.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True) elif dataset_name == 'v_caption_patch_num': from datasets.v_caption_patch_num import V_Caption_Patch_Num data_root = os.path.join(opt.data_root_dir, '') list_file = 'data/v_caption_detection/bgnumber_patch_train.txt' dataset = V_Caption_Patch_Num(root=data_root, list_file=list_file, transform=transform) loader = dataloader.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True) elif dataset_name == 'v_caption_patch_alp': from datasets.v_caption_patch_alp import V_Caption_Patch_Alp data_root = os.path.join(opt.data_root_dir, '') list_file = 'data/v_caption_detection/bgalphabet_patch_train.txt' dataset = V_Caption_Patch_Alp(root=data_root, list_file=list_file, transform=transform) loader = dataloader.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True) elif dataset_name == 'v_caption_patch_alp2': from datasets.v_caption_patch_alp2 import V_Caption_Patch_Alp2 data_root = os.path.join(opt.data_root_dir, '') list_file = ['data/v_caption_detection/bgalphabet_patch_train.txt','data/v_caption_detection/bgalphabet_patch_val.txt','data/v_caption_detection/alphabet_patch_train.txt'] dataset = V_Caption_Patch_Alp2(root=data_root, list_file=list_file, transform=transform) loader = dataloader.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True) elif dataset_name == 'v_caption_patch_sym': from datasets.v_caption_patch_sym import V_Caption_Patch_Sym data_root = os.path.join(opt.data_root_dir, '') list_file ='data/v_caption_detection/bgsymbol_patch_train.txt' dataset = V_Caption_Patch_Sym(root=data_root, list_file=list_file, transform=transform) loader = dataloader.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True) elif dataset_name == 'v_caption_patch_type': opt.num_classes = 4 from datasets.v_caption_patch_type import V_Caption_Resnet_Type data_root = os.path.join(opt.data_root_dir, '') list_file = ['data/v_caption_detection/bgalphabet_patch_train.txt','data/v_caption_detection/bghangul_patch_train.txt','data/v_caption_detection/bgnumber_patch_train.txt', 'data/v_caption_detection/bgsymbol_patch_train.txt'] dataset = V_Caption_Resnet_Type(root=data_root, list_file=list_file, transform=transform) loader = dataloader.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True) else: raise ValueError('Not a valid dataset') return loader
orders = orders[iou <= threshold] boxes = boxes[iou <= threshold,:] keeps.append(keep) return keeps # Define the recall and total dictionary for calculating mAP #recall = {} #total = {} #for i in range(21): # recall[i] = 0 # total[i] = 0 # Define the test loader: Only use toTensor to keep the original image testset = VOCDetection(root='./datasets', year='2007', image_set='train', download=False, transform=transforms.ToTensor()) testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=1) # Load the model checkpoint = torch.load('rcnn.pth', map_location=torch.device('cpu')) model = FasterRCNN() model.eval() # Turn to eval mode model.load_state_dict(checkpoint['state_dict']) print("Start eval...") for batch_idx, (images, gt_boxes, gt_classes, gt_boxes_, gt_classes_) in enumerate(testloader): # Use for partial training if batch_idx <= 147: continue # Add the normalization step
def init_dataloader_valid(opt): batch_size = opt.batch_size_valid dataset_name = opt.dataset transform = init_transforms(opt, 'valid') if dataset_name.startswith('voc'): opt.num_classes = 21 from datasets.voc import VOCDetection voc_root = os.path.join(opt.data_root_dir, 'VOCdevkit') # voc 2007 if dataset_name == 'voc07': image_root = os.path.join(voc_root, 'VOC2007/JPEGImages') list_file = os.path.join('data/voc/voc07_test.txt') # voc 2012 elif dataset_name == 'voc12': image_root = os.path.join(voc_root, 'VOC2012/JPEGImages') list_file = os.path.join('data/voc/voc12_test.txt') # voc 0712 elif dataset_name == 'voc0712': image_root = os.path.join(voc_root, 'VOC0712/JPEGImages') list_file = os.path.join('data/voc/voc07_test.txt') else: raise ValueError('Not a valid dataset name') dataset = VOCDetection(root=image_root, list_file=list_file, transform=transform) loader = dataloader.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False, num_workers=opt.num_workers) elif dataset_name == 'wider': opt.num_classes = 2 from datasets.wider import WiderDetection wider_root = os.path.join(opt.data_root_dir, 'WIDER/WIDER_val/images') list_file = 'data/wider/wider_val.txt' dataset = WiderDetection(root=wider_root, list_file=list_file, transform=transform) loader = dataloader.DataLoader( dataset=dataset, batch_size=batch_size, shuffle=False, num_workers=opt.num_workers, ) elif dataset_name == 'fddb': opt.num_classes = 2 from datasets.fddb import FDDBDetection from preprocess.fddb.preprocess import parse_fddb_annotation wider_root = os.path.join(opt.data_root_dir, 'FDDB/imgs') annotation_dir = os.path.join(opt.data_root_dir, 'FDDB/anno') list_file = os.path.join(opt.project_root, 'data/fddb/fddb_val.txt') os.system('rm %s' % list_file) parse_fddb_annotation(wider_root, annotation_dir, 'valid', list_file, opt.fold) dataset = FDDBDetection(root=wider_root, list_file=list_file, mode='valid', transform=transform) loader = dataloader.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False, num_workers=opt.num_workers) elif dataset_name == 'v_caption': from datasets.v_caption import V_Caption data_root = os.path.join(opt.data_root_dir, '') list_file = ['data/v_caption/bghangul_patch_val.txt'] dataset = V_Caption(root=data_root, list_file=list_file, transform=transform) loader = dataloader.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True) else: raise ValueError('Not a valid dataset') return loader