def get_test_tags(bags): raw_img = Image() tags_all = raw_img.get_tags() # get only tes all_tags = [] for bag in bags: # get all tags start from bag string. r = re.compile(bag + "*") tag_list = filter(r.match, tags_all) bag_tag_list = list(tag_list) all_tags += bag_tag_list return all_tags
def get_test_tags(bags): raw_img = Image() tags_all = raw_img.get_tags() # get only tes all_tags = [] for bag in bags: # get all tags start from bag string. r = re.compile('^'+bag) tag_list = filter(r.match, tags_all) bag_tag_list = list(tag_list) all_tags += bag_tag_list return all_tags
def __init__(self, bags, split_rate=0.7): self.bags = bags self.raw_img = Image() self.raw_tracklet_tag_list = list(Tracklet().frames_object.keys()) # get all tags self.tags_all = self.raw_img.get_tags() self.size = len(self.tags_all) # for holding bags, like '1/15' self.training_bags = [] # for holding tags, like '1/15/00000' self.training_tags = [] self.val_bags = [] self.val_tags = [] self.split_rate = split_rate self.real_split_rate = -1 # get training_bags, training_tags, val_bags, val_tags. self.split_bags_by_tag_name()
def __init__(self, tags, queue_size=20, require_shuffle=False, require_log=False, is_testset=False, n_skip_frames=0, random_num=666, is_flip=False): self.is_testset = is_testset self.shuffled = require_shuffle self.random_num = random_num self.preprocess = data.Preprocess() self.raw_img = Image() self.raw_tracklet = Tracklet() self.raw_lidar = Lidar() # skip some frames self.tags = [tag for i, tag in enumerate(tags) if i % (n_skip_frames + 1) == 0] self.is_flip = is_flip if self.shuffled: self.tags = shuffle(self.tags, random_state=self.random_num) self.tag_index = 0 self.size = len(self.tags) self.require_log = require_log self.flip_axis = 1 # if axis=1, flip from y=0. If axis=0, flip from x=0 self.flip_rate = 2 # if flip_rate is 2, means every two frames self.cache_size = queue_size self.loader_need_exit = Value('i', 0) if use_thread: self.prepr_data = [] self.lodaer_processing = threading.Thread(target=self.loader) else: self.preproc_data_queue = Queue() self.buffer_blocks = [Array('h', 41246691) for i in range(queue_size)] self.blocks_usage = Array('i', range(queue_size)) self.lodaer_processing = Process(target=self.loader) self.lodaer_processing.start()
class BatchLoading2: def __init__(self, tags, queue_size=20, require_shuffle=False, require_log=False, is_testset=False, n_skip_frames=0, random_num=666, is_flip=False): self.is_testset = is_testset self.shuffled = require_shuffle self.random_num = random_num self.preprocess = data.Preprocess() self.raw_img = Image() self.raw_tracklet = Tracklet() self.raw_lidar = Lidar() # skip some frames self.tags = [tag for i, tag in enumerate(tags) if i % (n_skip_frames + 1) == 0] self.is_flip = is_flip if self.shuffled: self.tags = shuffle(self.tags, random_state=self.random_num) self.tag_index = 0 self.size = len(self.tags) self.require_log = require_log self.flip_axis = 1 # if axis=1, flip from y=0. If axis=0, flip from x=0 self.flip_rate = 2 # if flip_rate is 2, means every two frames self.cache_size = queue_size self.loader_need_exit = Value('i', 0) if use_thread: self.prepr_data = [] self.lodaer_processing = threading.Thread(target=self.loader) else: self.preproc_data_queue = Queue() self.buffer_blocks = [Array('h', 41246691) for i in range(queue_size)] self.blocks_usage = Array('i', range(queue_size)) self.lodaer_processing = Process(target=self.loader) self.lodaer_processing.start() def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.loader_need_exit.value = True if self.require_log: print('set loader_need_exit True') self.lodaer_processing.join() if self.require_log: print('exit lodaer_processing') def keep_gt_inside_range(self, train_gt_labels, train_gt_boxes3d): import pdb pdb.set_trace() train_gt_labels = np.array(train_gt_labels, dtype=np.int32) train_gt_boxes3d = np.array(train_gt_boxes3d, dtype=np.float32) if train_gt_labels.shape[0] == 0: return False, None, None assert train_gt_labels.shape[0] == train_gt_boxes3d.shape[0] # get limited train_gt_boxes3d and train_gt_labels. keep = np.zeros((len(train_gt_labels)), dtype=bool) for i in range(len(train_gt_labels)): if box.box3d_in_top_view(train_gt_boxes3d[i]): keep[i] = 1 # if all targets are out of range in selected top view, return True. if np.sum(keep) == 0: return False, None, None train_gt_labels = train_gt_labels[keep] train_gt_boxes3d = train_gt_boxes3d[keep] return True, train_gt_labels, train_gt_boxes3d def load_from_one_tag(self, one_frame_tag): if self.is_testset: obstacles = None else: obstacles = self.raw_tracklet.load(one_frame_tag) rgb = self.raw_img.load(one_frame_tag) lidar = self.raw_lidar.load(one_frame_tag) return obstacles, rgb, lidar def preprocess_one_frame(self, rgb, lidar, obstacles): import pdb pdb.set_trace() rgb = self.preprocess.rgb(rgb) top = self.preprocess.lidar_to_top(lidar) if self.is_testset: return rgb, top, None, None boxes3d = [self.preprocess.bbox3d(obs) for obs in obstacles] labels = [self.preprocess.label(obs) for obs in obstacles] # flip in y axis. if self.is_flip and len(boxes3d) > 0: if self.tag_index % self.flip_rate == 1: top, rgb, boxes3d = self.preprocess.flip(rgb, top, boxes3d, axis=1) elif self.tag_index % self.flip_rate == 2: top, rgb, boxes3d = self.preprocess.flip(rgb, top, boxes3d, axis=0) return rgb, top, boxes3d, labels def get_shape(self): # todo for tracking, it means wasted a frame which will cause offset. train_rgbs, train_tops, train_fronts, train_gt_labels, train_gt_boxes3d, _ = self.load() top_shape = train_tops[0].shape front_shape = train_fronts[0].shape rgb_shape = train_rgbs[0].shape return top_shape, front_shape, rgb_shape def data_preprocessed(self): # only feed in frames with ground truth labels and bboxes during training, or the training nets will break. skip_frames = True while skip_frames: fronts = [] frame_tag = self.tags[self.tag_index] obstacles, rgb, lidar = self.load_from_one_tag(frame_tag) rgb, top, boxes3d, labels = self.preprocess_one_frame(rgb, lidar, obstacles) if self.require_log and not self.is_testset: draw_bbox_on_rgb(rgb, boxes3d, frame_tag) draw_bbox_on_lidar_top(top, boxes3d, frame_tag) self.tag_index += 1 # reset self tag_index to 0 and shuffle tag list if self.tag_index >= self.size: self.tag_index = 0 if self.shuffled: self.tags = shuffle(self.tags, random_state=self.random_num) skip_frames = False # only feed in frames with ground truth labels and bboxes during training, or the training nets will break. if not self.is_testset: is_gt_inside_range, batch_gt_labels_in_range, batch_gt_boxes3d_in_range = \ self.keep_gt_inside_range(labels, boxes3d) labels = batch_gt_labels_in_range boxes3d = batch_gt_boxes3d_in_range # if no gt labels inside defined range, discard this training frame. if not is_gt_inside_range: skip_frames = True return np.array([rgb]), np.array([top]), np.array([fronts]), np.array([labels]), \ np.array([boxes3d]), frame_tag def find_empty_block(self): idx = -1 for i in range(self.cache_size): if self.blocks_usage[i] == 1: continue else: idx = i break return idx def loader(self): if use_thread: while self.loader_need_exit.value == 0: if len(self.prepr_data) >= self.cache_size: time.sleep(1) # print('sleep ') else: self.prepr_data = [(self.data_preprocessed())] + self.prepr_data # print('data_preprocessed') else: while self.loader_need_exit.value == 0: empty_idx = self.find_empty_block() if empty_idx == -1: time.sleep(1) # print('sleep ') else: prepr_data = (self.data_preprocessed()) # print('data_preprocessed') dumps = pickle.dumps(prepr_data) length = len(dumps) self.buffer_blocks[empty_idx][0:length] = dumps[0:length] self.preproc_data_queue.put({ 'index': empty_idx, 'length': length }) if self.require_log: print('loader exit') def load(self): if use_thread: while len(self.prepr_data) == 0: time.sleep(1) data_ori = self.prepr_data.pop() else: # print('self.preproc_data_queue.qsize() = ', self.preproc_data_queue.qsize()) info = self.preproc_data_queue.get(block=True) length = info['length'] block_index = info['index'] dumps = self.buffer_blocks[block_index][0:length] # set flag self.blocks_usage[block_index] = 0 # convert to bytes string dumps = array.array('B', dumps).tostring() data_ori = pickle.loads(dumps) return data_ori def get_frame_info(self): return self.tags[self.tag_index]
class TrainingValDataSplitter: def __init__(self, bags, split_rate=0.7): self.bags = bags self.raw_img = Image() self.raw_tracklet_tag_list = list(Tracklet().frames_object.keys()) # get all tags self.tags_all = self.raw_img.get_tags() self.size = len(self.tags_all) # for holding bags, like '1/15' self.training_bags = [] # for holding tags, like '1/15/00000' self.training_tags = [] self.val_bags = [] self.val_tags = [] self.split_rate = split_rate self.real_split_rate = -1 # get training_bags, training_tags, val_bags, val_tags. self.split_bags_by_tag_name() def check_frames_integrity(self, bag): # get number of images belong to this bag name = bag.split('/') bag_dir = os.path.join(cfg.RAW_DATA_SETS_DIR, name[0], name[1]) img_path = os.path.join(bag_dir, 'image_02', 'data', '*') lidar_path = os.path.join(bag_dir, 'velodyne_points', 'data', '*') img_num = len(glob.glob(img_path)) lidar_num = len(glob.glob(lidar_path)) r = re.compile(bag + '*') tracklet_tag_list = filter(r.match, self.raw_tracklet_tag_list) tracklet_num = len(list(tracklet_tag_list)) if not img_num == lidar_num == tracklet_num: return False return True def split_bags_by_tag_name(self): # input tags, split rate, # record: training_bags(a name list), training_tags(list), val_bags(a name list), val_tags(list) self.bags = shuffle(self.bags, random_state=0) # make sure all bags have same number of lidar, images and tracklet poses. problematic_bags = [] for bag in self.bags: if_same = self.check_frames_integrity(bag) if not if_same: problematic_bags.append(bag) if len(problematic_bags) != 0: raise ValueError( 'Number of images, lidar and tracklet of these bags are not the same ', problematic_bags) # shuffle bags all_tags = [] for bag in self.bags: # get all tags start from bag string. r = re.compile(bag + "*") tag_list = filter(r.match, self.tags_all) bag_tag_list = list(tag_list) all_tags += bag_tag_list tag_size = len(all_tags) split_point = round(tag_size * self.split_rate) for i in range(split_point, tag_size): first_frame = all_tags[i] sec_frame = all_tags[i + 1] if ('/').join(first_frame.split('/')[:2]) != ('/').join( sec_frame.split('/')[:2]): split_point = i break self.training_tags = all_tags[:split_point + 1] self.val_tags = all_tags[split_point + 1:] self.real_split_rate = 1. * split_point / tag_size print('real split rate is here: ', self.real_split_rate) # print('first frame is here: ', all_tags[i], ' and sec is: ', all_tags[i + 1]) split_bag = ('/').join(all_tags[i + 1].split('/')[:2]) in_training_bag = True for i in self.bags: if i == split_bag: in_training_bag = False if in_training_bag: self.training_bags += [i] else: self.val_bags += [i]
class TrainingValDataSplitter: def __init__(self, bags, split_rate=0.7): self.bags = bags self.raw_img = Image() self.raw_tracklet_tag_list = list(Tracklet().frames_object.keys()) # get all tags self.tags_all = self.raw_img.get_tags() self.size = len(self.tags_all) # for holding bags, like '1/15' self.training_bags = [] # for holding tags, like '1/15/00000' self.training_tags = [] self.val_bags = [] self.val_tags = [] self.split_rate = split_rate self.real_split_rate = -1 # get training_bags, training_tags, val_bags, val_tags. self.split_bags_by_tag_name() def check_frames_integrity(self, bag): # get number of images belong to this bag name = bag.split('/') bag_dir = os.path.join(cfg.RAW_DATA_SETS_DIR, name[0], name[1]) img_path = os.path.join(bag_dir, 'image_02', 'data', '*') lidar_path = os.path.join(bag_dir, 'velodyne_points', 'data', '*') img_num = len(glob.glob(img_path)) lidar_num = len(glob.glob(lidar_path)) r = re.compile('^'+bag) tracklet_tag_list = filter(r.match, self.raw_tracklet_tag_list) tracklet_num = len(list(tracklet_tag_list)) if not img_num == lidar_num == tracklet_num: return False return True def handle_one_bag(self): all_tags = [] for bag in self.bags: r = re.compile('^' + bag) tag_list = filter(r.match, self.tags_all) bag_tag_list = list(tag_list) all_tags += bag_tag_list tag_size = len(all_tags) split_point = round(tag_size * self.split_rate) self.training_tags = all_tags[:split_point + 1] self.val_tags = all_tags[split_point + 1:] self.training_bags += [bag] self.val_bags += [bag] def split_bags_by_tag_name(self): if len(self.bags) == 1: self.handle_one_bag() return # input tags, split rate, # record: training_bags(a name list), training_tags(list), val_bags(a name list), val_tags(list) self.bags = shuffle(self.bags, random_state=0) # make sure all bags have same number of lidar, images and tracklet poses. problematic_bags = [] for bag in self.bags: if_same = self.check_frames_integrity(bag) if not if_same: problematic_bags.append(bag) if len(problematic_bags) != 0: raise ValueError('Number of images, lidar and tracklet of these bags are not the same ', problematic_bags) # shuffle bags all_tags = [] for bag in self.bags: # get all tags start from bag string. r = re.compile('^'+bag) tag_list = filter(r.match, self.tags_all) bag_tag_list = list(tag_list) all_tags += bag_tag_list tag_size = len(all_tags) split_point = round(tag_size * self.split_rate) for i in range(split_point, tag_size): first_frame = all_tags[i] sec_frame = all_tags[i + 1] if ('/').join(first_frame.split('/')[:2]) != ('/').join(sec_frame.split('/')[:2]): split_point = i break self.training_tags = all_tags[:split_point + 1] self.val_tags = all_tags[split_point + 1:] self.real_split_rate = 1. * split_point / tag_size print('real split rate is here: ', self.real_split_rate) # print('first frame is here: ', all_tags[i], ' and sec is: ', all_tags[i + 1]) # get all bags: split_bag = ('/').join(all_tags[split_point + 1].split('/')[:2]) in_training_bag = True for i in self.bags: if i == split_bag: in_training_bag = False if in_training_bag: self.training_bags += [i] else: self.val_bags += [i]