def main(label_type): wildcard = '/*/*/' if label_type == 'train' else '/' # dataset_path = 'data/ILSVRC2015/' # dataset_path = '/media/yueshen/Sea_Gate!/imagenet/detection/ILSVRC2014_DET_train/' # dataset_path = '/media/yueshen/Sea_Gate!/imagenet/detection/ILSVRC2014_DET_train/' # dataset path on google dataset_path = '/home/detection/ILSVRC/' annotationPath = dataset_path + 'Annotations/' imagePath = dataset_path + 'Data/' if not os.path.exists(os.path.join('labels', label_type)): os.makedirs(os.path.join('labels', label_type)) imageNameFile = open('labels/' + label_type + '/image_names.txt', 'w') labels = [] labels = glob.glob(annotationPath + 'DET/' + label_type + wildcard + '*.xml') print('len labels = ', len(labels)) # # test # labels = glob.glob(annotationPath + 'n01443537/' + '*.xml') labels.sort() print('label = ', len(labels)) images = [ label.replace('Annotations', 'Data').replace('xml', 'JPEG') for label in labels ] print(len(images)) bboxes = [] for ii, imageName in enumerate(images): if ii % 100 == 0: print('iter %d of %d = %.2f%%' % (ii, len(images), ii * 1.0 / len(images) * 100)) if not DEBUG: imageNameFile.write(imageName + '\n') imOn = ii label = labels[imOn] labelTree = ET.parse(label) # try: imgSize = get_image_size(images[imOn]) # except: # continue area_cutoff = imgSize[0] * imgSize[1] * 0.01 if DEBUG: print('\nimage name\n\n%s\n' % images[imOn]) image = cv2.imread(images[imOn]) print('image size', image.shape) print(label) print(labelTree) print(labelTree.findall('object')) for obj in labelTree.findall('object'): bbox = obj.find('bndbox') bbox = [ int(bbox.find('xmin').text), int(bbox.find('ymin').text), int(bbox.find('xmax').text), int(bbox.find('ymax').text), imOn ] if (bbox[3] - bbox[1]) * (bbox[2] - bbox[0]) < area_cutoff: continue if DEBUG: print('name', obj.find('name').text, '\n') print(bbox) image = image.squeeze() if len(image.shape) < 3: image = np.tile(image[:, :, np.newaxis], (1, 1, 3)) drawing.drawRect(image, bbox[:-1], 3, [0, 0, 255]) bboxes.append(bbox) if DEBUG: if len(image.shape) == 2: image = np.tile(image[:, :, np.newaxis], (1, 1, 3)) cv2.imshow('image', image) cv2.waitKey(0) bboxes = np.array(bboxes) if not DEBUG: np.save('labels/' + label_type + '/labels.npy', bboxes)
def draw_state(self): from utils import drawing scale = 8 if self.board is None: locs = self.graph.points * scale self.board = np.zeros( ((self.graph.yMax - self.graph.yMin) * scale, (self.graph.xMax - self.graph.xMin) * scale), dtype=np.uint8) locs -= np.array([self.graph.xMin, self.graph.yMin]) * scale for loc in locs: drawing.drawRect(self.board, [loc[0], loc[1], loc[0], loc[1]], scale / 2, 4) if type(self.end_point) == list: for end_point in self.end_point: goal_loc = ( np.array(end_point) * np.array([scale, scale, 90]) - np.array([self.graph.xMin, self.graph.yMin, 0]) * scale).astype(int) drawing.drawRect( self.board, [goal_loc[0], goal_loc[1], goal_loc[0], goal_loc[1]], scale / 2, 5) else: goal_loc = ( np.array(self.end_point) * np.array([scale, scale, 90]) - np.array([self.graph.xMin, self.graph.yMin, 0]) * scale).astype(int) goal_arrow = [ goal_loc[0] + scale / 2 * (goal_loc[2] == 90) - scale / 2 * (goal_loc[2] == 270), goal_loc[1] + scale / 2 * (goal_loc[2] == 0) - scale / 2 * (goal_loc[2] == 180) ] drawing.drawRect( self.board, [goal_loc[0], goal_loc[1], goal_loc[0], goal_loc[1]], scale / 2, 5) drawing.drawRect(self.board, [ goal_arrow[0], goal_arrow[1], goal_arrow[0], goal_arrow[1] ], scale / 4, 6) self.board[np.logical_or(self.board == 2, self.board == 3)] = 4 curr_point = np.array(self.pose[:3]) curr_loc = (curr_point * np.array([scale, scale, 90]) - np.array([self.graph.xMin, self.graph.yMin, 0]) * scale).astype(int) curr_arrow = [ curr_loc[0] + scale / 2 * (curr_loc[2] == 90) - scale / 2 * (curr_loc[2] == 270), curr_loc[1] + scale / 2 * (curr_loc[2] == 0) - scale / 2 * (curr_loc[2] == 180) ] drawing.drawRect(self.board, [curr_loc[0], curr_loc[1], curr_loc[0], curr_loc[1]], scale / 2, 2) drawing.drawRect( self.board, [curr_arrow[0], curr_arrow[1], curr_arrow[0], curr_arrow[1]], scale / 4, 3) self.board[0, 0] = 6 return np.flipud(self.board)
Images[num_seq, ...] = tImage.copy() Labels[num_seq, ...] = xyxyLabels.copy() num_seq += 1 print('current seq # = ', num_seq) # np.save('Images.npy', Images) # np.save('Labels.npy', Labels) print('final seq idx = ', dataset.seq_idx) print('done!') print('Checking images... ') path = './test/' idx = 15 # random image = Images[idx, ...].copy() labels = Labels[idx, ...].copy() for i in range(image.shape[0]): # print(i) im = image[i, ...].transpose(1, 2, 0).copy() bbox = 227 * labels[i // 2, ...].copy() / 10 print('bbox = ', bbox) patch = drawing.drawRect(im, bbox, 1, (255, 255, 0)) # print(im.shape) cv2.imwrite(path + str(i) + '.png', patch) # Images_load = np.load('Images.npy') # Labels_load = np.load('Labels.npy') # print(Images_load.shape, Labels_load.shape) # print('images load = ', np.sum(Images_load)) # print('label load = ', Labels_load[0,0:5,:])
def get_data_sequence(self): try: # Preallocate the space for the datas and labels. dataImage = np.zeros((self.num_unrolls, self.memory_size, IMG_SIZE, IMG_SIZE, 3), dtype=np.float32) dataMotion = np.zeros((self.num_unrolls, self.memory_size, 4), dtype=np.float32) labelImage = np.zeros((self.num_unrolls, IMG_SIZE, IMG_SIZE, 3), dtype=np.float32) labelMotion = np.zeros((self.num_unrolls, 4), dtype=np.float32) # Read a new data sequence from batch cache and get the ground truth. (batchKey, images) = self.getData() # key = [data_idx, video_idx, track_id, image_seq] gtKey = batchKey initImageIndex = self.key_lookup[gtKey] if self.debug: print('Inital gtKey: ', gtKey) print('') # initial data # key = [data_idx, video_idx, track_id, image_seq] newKey = list(gtKey) newKey = tuple(newKey) imageIndex = self.key_lookup[newKey] bbox = self.datasets[newKey[0]][imageIndex, :4].copy() x1, y1, x2, y2 = bbox # image image = images[0] object = image[int(y1):int(y2), int(x1):int(x2)] object = cv2.resize(object, (IMG_SIZE, IMG_SIZE)) / 255. dataImage[:] = object # motion height, width, _ = image.shape """ cx = float(int((x1 + x2) / 2) / width) cy = float(int((y1 + y2) / 2) / height) w = float((x2 - x1) / width) h = float((y2 - y1) / height) dataMotion[:] = [cx, cy, w, h] """ x1 = float(x1 /width) y1 = float(y1 / height) x2 = float(x2 / width) y2 = float(y2 / height) dataMotion[:] = [x1, y1, x2, y2] # data for unroll in range(self.num_unrolls): if self.debug: print('Unroll : ', unroll) print('') debug_image = np.zeros((self.memory_size, IMG_SIZE, IMG_SIZE, 3), dtype=np.float32) debug_motion = np.zeros((self.memory_size, 4), dtype=np.float32) debug_image_label = np.zeros((IMG_SIZE, IMG_SIZE, 3), dtype=np.float32) debug_motion_label = np.zeros((4), dtype=np.float32) for memory_idx in range(self.memory_size): if unroll >= memory_idx: # key = [data_idx, video_idx, track_id, image_seq] newKey = list(gtKey) newKey[3] += unroll - memory_idx newKey = tuple(newKey) imageIndex = self.key_lookup[newKey] bbox = self.datasets[newKey[0]][imageIndex, :4].copy() x1, y1, x2, y2 = bbox # image image = images[unroll - memory_idx] object = image[int(y1):int(y2), int(x1):int(x2)] object = cv2.resize(object, (IMG_SIZE, IMG_SIZE)) / 255. dataImage[unroll, memory_idx] = object # motion height, width, _ = image.shape """ cx = float(int((x1 + x2) / 2) / width) cy = float(int((y1 + y2) / 2) / height) w = float((x2 - x1) / width) h = float((y2 - y1) / height) dataMotion[:] = [cx, cy, w, h] """ x1 = float(x1 / width) y1 = float(y1 / height) x2 = float(x2 / width) y2 = float(y2 / height) dataMotion[:] = [x1, y1, x2, y2] if self.debug: # debug each debug_image1 = image.copy() drawing.drawRect(debug_image1, [x1, y1, x2, y2], 2, [255, 0, 0]) #cv2.imshow('debug bbox', debug_image1) path = self.image_paths[newKey[0]][newKey[-1]] print('Memory idx : ', memory_idx) print('gtKey : ', newKey) print('bbox : ', bbox) print('bbox(float) : ', [x1, y1, x2, y2]) print('Image idx : ', imageIndex) print('Image path : ', path) print('') debug_image[memory_idx] = object debug_motion[memory_idx] = [x1, y1, x2, y2] #cv2.waitKey(0) # label # key = [data_idx, video_idx, track_id, image_seq] newKey = list(gtKey) newKey[3] += unroll + 1 newKey = tuple(newKey) imageIndex = self.key_lookup[newKey] bbox = self.datasets[newKey[0]][imageIndex, :4].copy() x1, y1, x2, y2 = bbox # image image = images[unroll + 1] object = image[int(y1):int(y2), int(x1):int(x2)] object = cv2.resize(object, (IMG_SIZE, IMG_SIZE)) / 255. labelImage[unroll] = object # motion height, width, _ = image.shape """ cx = float(int((x1 + x2) / 2) / width) cy = float(int((y1 + y2) / 2) / height) w = float((x2 - x1) / width) h = float((y2 - y1) / height) dataMotion[:] = [cx, cy, w, h] """ x1 = float(x1 / width) y1 = float(y1 / height) x2 = float(x2 / width) y2 = float(y2 / height) labelMotion[:] = [x1, y1, x2, y2] if self.debug: # debug each #debug_image1 = image.copy() #drawing.drawRect(debug_image1, [x1, y1, x2, y2], 2, [255, 0, 0]) #cv2.imshow('debug bbox', debug_image1) path = self.image_paths[newKey[0]][newKey[-1]] print('[label]') print('gtKey : ', newKey) print('bbox : ', bbox) print('bbox(float) : ', [x1, y1, x2, y2]) print('Image idx : ', imageIndex) print('Image path : ', path) print('') debug_image_label = object debug_motion_label = [x1, y1, x2, y2] plots = [] for idx in range(self.memory_size): #print('Memory idx :', idx, debug_motion[idx]) plots.append(dataImage[unroll, idx]) subplot = np.zeros((IMG_SIZE * self.memory_size, IMG_SIZE, 3), dtype=np.float32) cv2.vconcat(tuple(plots), subplot) cv2.imshow('external memory', subplot) cv2.imshow('prediction', labelImage[unroll]) cv2.waitKey(0) dataImage = dataImage.reshape(([self.num_unrolls * self.memory_size] + list(dataImage.shape[2:]))) dataMotion = dataMotion.reshape(([self.num_unrolls * self.memory_size] + list(dataMotion.shape[2:]))) return (dataImage, dataMotion), (labelImage, labelMotion) except Exception as e: import traceback traceback.print_exc() import pdb pdb.set_trace() print('exception')
def main(label_type): folder = [ '/ILSVRC2015_VID_train_0001/*/', '/ILSVRC2015_VID_train_0002/*/', '/ILSVRC2015_VID_train_0003/*/' ] for fol in folder: wildcard = fol if label_type == 'train' else '/*/' # dataset_path = 'data/ILSVRC2015/' # dataset_path = '/media/yueshen/Sea_Gate!/imagenet/ILSVRC/' # google root directory dataset_path = '/home/ILSVRC/' annotationPath = dataset_path + 'Annotations/' imagePath = dataset_path + 'Data/' if not DEBUG: if not os.path.exists(os.path.join('labels', label_type)): os.makedirs(os.path.join('labels', label_type)) imageNameFile = open( 'labels/' + label_type + '/image_names' + str(fol[-4]) + '.txt', 'w') videos = sorted( glob.glob(annotationPath + 'VID/' + label_type + wildcard)) bboxes = [] imNum = 0 totalImages = len( glob.glob(annotationPath + 'VID/' + label_type + wildcard + '*.xml')) print('totalImages', totalImages) classes = { 'n01674464': 1, 'n01662784': 2, 'n02342885': 3, 'n04468005': 4, 'n02509815': 5, 'n02084071': 6, 'n01503061': 7, 'n02324045': 8, 'n02402425': 9, 'n02834778': 10, 'n02419796': 11, 'n02374451': 12, 'n04530566': 13, 'n02118333': 14, 'n02958343': 15, 'n02510455': 16, 'n03790512': 17, 'n02391049': 18, 'n02121808': 19, 'n01726692': 20, 'n02062744': 21, 'n02503517': 22, 'n02691156': 23, 'n02129165': 24, 'n02129604': 25, 'n02355227': 26, 'n02484322': 27, 'n02411705': 28, 'n02924116': 29, 'n02131653': 30, } for vv, video in enumerate(videos): labels = sorted(glob.glob(video + '*.xml')) images = [ label.replace('Annotations', 'Data').replace('xml', 'JPEG') for label in labels ] trackColor = dict() for ii, imageName in enumerate(images): if imNum % 100 == 0: print('imNum %d of %d = %.2f%%' % (imNum, totalImages, imNum * 100.0 / totalImages)) if not DEBUG: # Leave off initial bit of path so we can just add parent dir to path later. imageNameFile.write(imageName + '\n') label = labels[ii] labelTree = ET.parse(label) imgSize = get_image_size(images[ii]) area = imgSize[0] * imgSize[1] if DEBUG: print('\n%s' % images[ii]) image = cv2.imread(images[ii]) print('video', vv, 'image', ii) for obj in labelTree.findall('object'): cls = obj.find('name').text assert cls in classes classInd = classes[cls] occl = int(obj.find('occluded').text) trackId = int(obj.find('trackid').text) bbox = obj.find('bndbox') bbox = [ int(bbox.find('xmin').text), int(bbox.find('ymin').text), int(bbox.find('xmax').text), int(bbox.find('ymax').text), vv, trackId, imNum, classInd, occl ] if DEBUG: print('name', obj.find('name').text, '\n') print(bbox) if trackId not in trackColor: trackColor[trackId] = [ random.random() * 255 for _ in range(3) ] drawing.drawRect(image, bbox[:4], 3, trackColor[trackId]) bboxes.append(bbox) if DEBUG: cv2.imshow('image', image) cv2.waitKey(1) imNum += 1 bboxes = np.array(bboxes) # Reorder by video_id, then track_id, then video image number so all labels for a single track are next to each other. # This only matters if a single image could have multiple tracks. order = np.lexsort((bboxes[:, 6], bboxes[:, 5], bboxes[:, 4])) bboxes = bboxes[order, :] if not DEBUG: np.save('labels/' + label_type + '/labels' + str(fol[-4]) + '.npy', bboxes)