def __call__(self, fname): with Image.open(fname) as img: img = img.convert('RGB') img_size = Size(img.size[0], img.size[1]) if self.dataset == 'VOC': #fname : '~/VOC/JPEGImages/002222.jpg' with open( os.path.join(fname[:-21], 'Annotations', fname[-10:-4] + '.xml'), 'r') as f: doc = lxml.etree.parse(f) boxes = [] objects = doc.xpath('/annotation/object') for obj in objects: label = obj.xpath('name')[0].text xmin = float(obj.xpath('bndbox/xmin')[0].text) xmax = float(obj.xpath('bndbox/xmax')[0].text) ymin = float(obj.xpath('bndbox/ymin')[0].text) ymax = float(obj.xpath('bndbox/ymax')[0].text) labels = self.label2idx[label] center, size = abs2prop( xmin, xmax, ymin, ymax, img_size) # make gt to 0~1 important!!!!!!!!!!!!!! box = Box(obj, labels, center, size) boxes.append(box) elif self.dataset == 'KITTI': with open( os.path.join(fname[:-18], 'label_2', fname[-10:-4] + '.txt'), 'r') as fp: objs = [line.split(' ') for line in fp.readlines()] boxes = [] for obj in objs: if not obj[0] == 'DontCare': xmin = float(obj[4]) ymin = float(obj[5]) xmax = float(obj[6]) ymax = float(obj[7]) label = self.label2idx[obj[0]] center, size = abs2prop( xmin, xmax, ymin, ymax, img_size ) # Convert the absolute min-max box bound to proportional center-width bounds # (/300 , /1300) -> (0~1, 0~1) box = Box(obj[0], label, center, size) boxes.append(box) sample = Sample(fname, boxes, img_size) return img, labels, sample
def transform_box(box, orig_size, new_size, h_off, w_off): #--------------------------------------------------------------------------- # Compute the new coordinates of the box #--------------------------------------------------------------------------- xmin, xmax, ymin, ymax = prop2abs(box.center, box.size, orig_size) xmin += w_off xmax += w_off ymin += h_off ymax += h_off #--------------------------------------------------------------------------- # Check if the center falls within the image #--------------------------------------------------------------------------- width = xmax - xmin height = ymax - ymin new_cx = xmin + int(width / 2) new_cy = ymin + int(height / 2) if new_cx < 0 or new_cx >= new_size.w: return None if new_cy < 0 or new_cy >= new_size.h: return None center, size = abs2prop(xmin, xmax, ymin, ymax, new_size) return Box(box.label, box.labelid, center, size)
def __build_sample_list(self, root, annot_files, dataset_name): image_root = root + '/JPEGImages/' samples = [] for fn in tqdm(annot_files, desc=dataset_name, unit='samples'): with open(fn, 'r') as f: doc = lxml.etree.parse(f) filename = image_root + doc.xpath( '/annotation/filename')[0].text if not os.path.exists(filename): continue img = cv2.imread(filename) imgsize = Size(img.shape[1], img.shape[0]) boxes = [] objects = doc.xpath('/annotation/object') for obj in objects: label = obj.xpath('name')[0].text if label == myObject: xmin = int(float(obj.xpath('bndbox/xmin')[0].text)) xmax = int(float(obj.xpath('bndbox/xmax')[0].text)) ymin = int(float(obj.xpath('bndbox/ymin')[0].text)) ymax = int(float(obj.xpath('bndbox/ymax')[0].text)) center, size = abs2prop(xmin, xmax, ymin, ymax, imgsize) box = Box(label, self.lname2id[label], center, size) boxes.append(box) if not boxes: continue sample = Sample(filename, boxes, imgsize) samples.append(sample) return samples
def __build_sample_list(self, root, annot_files): """ Build a list of samples for the VOC dataset (either trainval or test) """ image_root = os.path.join(root, 'rgb-images') samples = [] #----------------------------------------------------------------------- # Process each annotated sample #----------------------------------------------------------------------- for fn in tqdm(annot_files, desc='ucf_24_frame', unit='samples'): act = fn.split('/')[4] video = fn.split('/')[5] frame_id = fn.split('/')[-1][:-4] image_path = os.path.join(image_root, act, video, '{}.jpg'.format(frame_id)) #--------------------------------------------------------------- # Get the file dimensions #--------------------------------------------------------------- if not os.path.exists(image_path): continue img = cv2.imread(image_path) imgsize = Size(img.shape[1], img.shape[0]) #--------------------------------------------------------------- # Get boxes for all the objects #--------------------------------------------------------------- boxes = [] with open(fn, 'r') as fin: objects = fin.readlines() for line in objects: line = line[:-1] #----------------------------------------------------------- # Get the properties of the box and convert them to the # proportional terms #----------------------------------------------------------- obj = line.split(' ') label = int(obj[0]) - 1 xmin = int(float(obj[1])) ymin = int(float(obj[2])) xmax = int(float(obj[3])) ymax = int(float(obj[4])) center, size = abs2prop(xmin, xmax, ymin, ymax, imgsize) box = Box(self.lid2name[label], label, center, size) boxes.append(box) if not boxes: continue sample = Sample(image_path, boxes, imgsize) samples.append(sample) return samples
def _build_sample_list(self, root, annot_files): image_root = root + 'VOC_Jpeg/' image_seg_root = root + 'VOC_Segmentation/' samples = [] for fn in tqdm(annot_files, unit='samples'): with open(fn, 'r') as f: doc = lxml.etree.parse(f) filename = image_root + doc.xpath( '/annotation/filename')[0].text with open(fn, 'r') as f1: doc1 = lxml.etree.parse(f1) seg_gt = image_seg_root + doc1.xpath( '/annotation/filename')[0].text seg_gt = seg_gt.replace('jpg', 'png') seg_gt_to_compare = seg_gt #--------------------------------------------------------------- # Get the file dimensions #--------------------------------------------------------------- if not os.path.exists(filename): continue img = cv2.imread(filename) img_seg_gt = cv2.imread(seg_gt) imgsize = Size(img.shape[1], img.shape[0]) #--------------------------------------------------------------- # Get boxes for all the objects #--------------------------------------------------------------- boxes = [] objects = doc.xpath('/annotation/object') for obj in objects: #----------------------------------------------------------- # Get the properties of the box and convert them to the # proportional terms #----------------------------------------------------------- label = obj.xpath('name')[0].text xmin = int(float(obj.xpath('bndbox/xmin')[0].text)) xmax = int(float(obj.xpath('bndbox/xmax')[0].text)) ymin = int(float(obj.xpath('bndbox/ymin')[0].text)) ymax = int(float(obj.xpath('bndbox/ymax')[0].text)) center, size = abs2prop(xmin, xmax, ymin, ymax, imgsize) box = Box(label, self.lname2id[label], center, size) boxes.append(box) if not boxes: continue sample = Sample(filename, boxes, imgsize, seg_gt, seg_gt_to_compare) samples.append(sample) return samples
def __build_sample_list(self, root, dataset_name): """ Build a list of samples for the VOC dataset (either trainval or test) """ image_root = root + '/JPEGImages/' annot_root = root + '/Annotations/' annot_files = glob(annot_root + '/*xml') samples = [] #----------------------------------------------------------------------- # Process each annotated sample #----------------------------------------------------------------------- for fn in tqdm(annot_files, desc=dataset_name, unit='samples'): with open(fn, 'r') as f: doc = lxml.etree.parse(f) filename = image_root + doc.xpath( '/annotation/filename')[0].text #--------------------------------------------------------------- # Get the file dimensions #--------------------------------------------------------------- if not os.path.exists(filename): continue img = cv2.imread(filename) imgsize = Size(img.shape[1], img.shape[0]) #--------------------------------------------------------------- # Get boxes for all the objects #--------------------------------------------------------------- boxes = [] objects = doc.xpath('/annotation/object') for obj in objects: #----------------------------------------------------------- # Get the properties of the box and convert them to the # proportional terms #----------------------------------------------------------- label = obj.xpath('name')[0].text xmin = int(float(obj.xpath('bndbox/xmin')[0].text)) xmax = int(float(obj.xpath('bndbox/xmax')[0].text)) ymin = int(float(obj.xpath('bndbox/ymin')[0].text)) ymax = int(float(obj.xpath('bndbox/ymax')[0].text)) center, size = abs2prop(xmin, xmax, ymin, ymax, imgsize) box = Box(label, self.lname2id[label], center, size) boxes.append(box) if not boxes: continue sample = Sample(filename, boxes, imgsize) samples.append(sample) return samples
def __build_sample_list(self, root,image_dir,info_dir): """ Build a list of samples for the VOC dataset (either trainval or test) """ samples = [] file = open(root+info_dir ,'r') All = file.read() file.close() lines = All.split('\n') print(len(lines)) #----------------------------------------------------------------------- # Process each annotated sample #----------------------------------------------------------------------- i=0 while i<(len(lines)-1)/10: #--------------------------------------------------------------- # Get the file dimensions #--------------------------------------------------------------- filename = root+image_dir+lines[i] img = cv2.imread(filename) imgsize = Size(img.shape[1], img.shape[0]) #--------------------------------------------------------------- # Get boxes for all the objects #--------------------------------------------------------------- boxes = [] i+=1 num_objects = int(lines[i]) if not num_objects: i+=2 continue i+=1 for obj in range(num_objects): #----------------------------------------------------------- # Get the properties of the box and convert them to the # proportional terms #----------------------------------------------------------- label = 'face' xmin = int(lines[i+obj].split()[0])-int(lines[i+obj].split()[2])/2 xmax = int(lines[i+obj].split()[0])+int(lines[i+obj].split()[2])/2 ymin = int(lines[i+obj].split()[1])-int(lines[i+obj].split()[3])/2 ymax = int(lines[i+obj].split()[1])+int(lines[i+obj].split()[3])/2 center, size = abs2prop(xmin, xmax, ymin, ymax, imgsize) box = Box(label, self.lname2id[label], center, size) boxes.append(box) i+=num_objects sample = Sample(filename, boxes, imgsize) samples.append(sample) return samples