def augment_images(): # root = 'labels/test/' # data_list = ['elevator_20181230_convert_test.txt'] root = 'labels/train/' data_list = [ 'elevator_20181230_convert_train.txt', 'elevator_20181231_convert_train.txt', 'elevator_20190106_convert_train.txt', 'person_7421_train.txt' ] dataset = read_files_to_list(root, data_list) transfer = DataAugmentation() save_path = 'generate_image2/' cvtools.makedirs(save_path) annts_lines = '' # pool = Pool(processes=3) # 创建进程池,指定最大并发进程数 for line in tqdm(dataset): def augment_one_image(line): nonlocal annts_lines line = line.strip().split() file_path = line[0] boxes = [] classes = [] for label_str in line[1:]: bbox_cls_str = label_str.split(',') boxes.append([float(i) for i in bbox_cls_str][0:4]) classes.append(int(bbox_cls_str[4])) boxes = np.array(boxes) classes = np.array(classes) new_name = osp.splitext(file_path.split(os.sep)[-1])[0] new = save_path + new_name.replace('.jpg', '') + '_{index}.jpg' for im_index in range(1, 5): # 每张图片增强出4张 new_image_name = new.format(index=im_index) if not os.path.isfile(new_image_name): im = cvtools.imread(file_path) img, boxes_trans, classes_trans = transfer(im, boxes, classes) boxes_trans = boxes_trans.astype(np.int32) classes_trans = classes_trans.astype(np.int32) # print('save %s...' % new_image_name) cv2.imwrite(new_image_name, img) annts_lines += new_image_name + ' ' for box, cls in zip(boxes_trans, classes_trans): annts_lines += ','.join(map(str, box)) + \ ',' + str(cls) + ' ' annts_lines += '\n' # pool.apply_async(augment_one_image, args=(line,)) augment_one_image(line) # pool.close() # 关闭进程池,阻止更多的任务提交到进程池Pool # pool.join() # 主进程等待进程池中的进程执行完毕,回收进程池 new_annots = 'labels/gen/gen_annots.txt' print('save %s...' % new_annots) with open(new_annots, 'w') as f: f.write(annts_lines) print('draw boxes in images...') draw_rect_test_labels(new_annots, 'temp/')
def _cluster_by_cat(self, save_root, name_clusters=('bbox', ), n_clusters=(3, )): assert len(name_clusters) == len(n_clusters) cluster_dict = defaultdict(lambda: defaultdict(list)) for key, ann in self.COCO.anns.items(): cat_name = self.COCO.cats[ann['category_id']]['name'] if 'area' in name_clusters: cluster_dict[cat_name]['area'].append(ann['area']) if 'w-vs-h' in name_clusters: cluster_dict[cat_name]['w-vs-h'].append(ann['bbox'][2] / float(ann['bbox'][3])) cvtools.makedirs(save_root) for cat_name, cluster_value in cluster_dict.items(): cluster_values = cluster_dict[cat_name] cluster_results = defaultdict(lambda: defaultdict(list)) for i, cluster_name in enumerate(cluster_values.keys()): if len(cluster_value) < n_clusters[i]: continue centers = cvtools.k_means_cluster( np.array(cluster_value).reshape(-1, 1), n_clusters=n_clusters[i]) cluster_results[cluster_name][cat_name].append( list(centers.reshape(-1))) cvtools.dump_json( cluster_results, osp.join(save_root, 'cluster_{}.json'.format(cat_name)))
def __init__(self, voc_root, mode, classes=get_classes('voc'), use_xml_name=True, read_test=False): self.voc_root = voc_root self.mode = mode self.use_xml_name = use_xml_name self.read_test = read_test if isinstance(classes, str): self.classes = cvtools.read_files_to_list(classes) else: self.classes = classes self.label_path = osp.join(self.voc_root, 'labels') cvtools.makedirs(self.label_path) file = osp.join(voc_root, 'ImageSets/Main/{}.txt'.format(mode)) self.imgs = cvtools.read_files_to_list(file) self.img_paths = [ 'JPEGImages/{}.jpg'.format(img_name) # relative path for img_name in self.imgs ] self.xml_paths = [ osp.join(voc_root, 'Annotations/{}.xml'.format(img_name)) for img_name in self.imgs ]
def save_crop_labeltxt(self, save_root): cvtools.makedirs(osp.join(save_root, 'images')) cvtools.makedirs(osp.join(save_root, 'labelTxt+crop')) for i, image_name in enumerate(self.imgs): crops = [] img_name_no_suffix, img_suffix = osp.splitext(image_name) imgToObjs = self.img_to_objs[i] for crop_i, img_coor in enumerate(imgToObjs): crop_objs = imgToObjs[img_coor] if len(crop_objs) == 0: continue # write crop results to txt txt_name = '_'.join( [img_name_no_suffix] + [str(crop_i)] + list(map(str, img_coor))) + '.txt' txt_content = '' for crop_obj in crop_objs: if len(crop_obj) == 0: continue polygen = np.array(crop_obj['segmentation'][0]).reshape(-1, 2) polygen = polygen - np.array(img_coor[:2]).reshape(-1, 2) line = list(map(str, polygen.reshape(-1))) cat = self.COCO.cats[crop_obj['category_id']]['name'] diffcult = str(crop_obj['difficult']) line.append(cat) line.append(diffcult) txt_content += ' '.join(line) + '\n' cvtools.strwrite( txt_content, osp.join(save_root, 'labelTxt+crop', txt_name)) if len(crops) > 0: draw_img = cvtools.draw_boxes_texts( img, crops, line_width=3, box_format='x1y1x2y2') cvtools.imwrite( draw_img, osp.join(save_root, 'images', img_name_no_suffix+'.jpg'))
def draw_dataframe(data, dst, col, im_size=(20 * 4, 5), show_value=None): """根据列名选择DataFrame的列,绘制图像 """ if not isinstance(data, (pd.DataFrame, )): print('!not supported data type: {}'.format(type(data))) ax = data.ix[:, col].plot(sharey=False, figsize=im_size, grid=True) fig = ax.get_figure() if np.max(data.index.values) - np.min(data.index.values) < 200: tick_spacing = 5 ax.xaxis.set_major_locator(ticker.MultipleLocator(tick_spacing)) elif np.max(data.index.values) - np.min(data.index.values) < 1000: tick_spacing = 10 ax.xaxis.set_major_locator(ticker.MultipleLocator(tick_spacing)) else: tick_spacing = 100 ax.xaxis.set_major_locator(ticker.MultipleLocator(tick_spacing)) if show_value is not None: x = np.min(data.index.values) y = data.ix[:, col].max().values[0] value = str(show_value) ax.text(x, y, value) # save draw cvtools.makedirs(dst) fig.savefig(dst) plt.close()
def crop_with_label(self, save_root='./', iof_th=0.5): image_ids = self.COCO.getImgIds() image_ids.sort() if cvtools._DEBUG: roidb = copy.deepcopy(self.COCO.loadImgs(image_ids))[:10] else: roidb = copy.deepcopy(self.COCO.loadImgs(image_ids)) print('{} images.'.format(len(roidb))) cvtools.makedirs(save_root + '/images') cvtools.makedirs(save_root + '/labelTxt+crop') stats = defaultdict(crop_objs=0, total_objs=0, missing_objs=0, total_croped_images=0) for entry in tqdm(roidb): if cvtools._DEBUG: print('crop {}'.format(entry['file_name'])) # read image image_name = entry['file_name'] image_file = osp.join(self.img_prefix, image_name) img = cvtools.imread(image_file) if img is None: print('{} is None.'.format(image_file)) continue # crop image crop_imgs, starts = self.crop(img) # handling the box at the edge of the cropped image ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) img_to_objs, obj_to_num = self.deal_edged_boxes(ann_ids, crop_imgs, starts, iof_th=iof_th) if img_to_objs is None: continue # stats for _, num in obj_to_num.items(): stats['crop_objs'] += num stats['total_objs'] += len(ann_ids) stats['missing_objs'] += len(set(ann_ids) - set(obj_to_num.keys())) for obj in img_to_objs.values(): if len(obj) > 0: stats['total_croped_images'] += 1 # save results # self.save_crop_labeltxt(image_name, img_to_objs, save_root) # save stats values total_images = len(roidb) stats['total_images'] = len(roidb) stats['objs_per_croped_image'] = stats['total_croped_images'] / float( total_images) stats['objs_per_image'] = stats['total_objs'] / float(total_images) cvtools.save_json(stats, to_file='stats.json')
def crop_in_order_with_label(self, save_root, w=1920, h=1080, overlap=0.): assert 1920 >= w >= 800 and 1080 >= h >= 800 and 0.5 >= overlap >= 0. crop = CropInOder(width_size=w, height_size=h, overlap=overlap) image_ids = self.COCO.getImgIds() image_ids.sort() if cvtools._DEBUG: roidb = copy.deepcopy(self.COCO.loadImgs(image_ids))[:10] else: roidb = copy.deepcopy(self.COCO.loadImgs(image_ids)) print('{} images.'.format(len(roidb))) cvtools.makedirs(save_root + '/images') cvtools.makedirs(save_root + '/labelTxt+crop') for entry in tqdm(roidb): if cvtools._DEBUG: print('crop {}'.format(entry['file_name'])) image_name = entry['file_name'] image_file = osp.join(self.img_prefix, image_name) img_name_no_suffix, img_suffix = osp.splitext(image_name) img = cv2.imdecode(np.fromfile(image_file, dtype=np.uint8), cv2.IMREAD_COLOR) # support chinese # img = cv2.imread(image_file) # not support chinese if img is None: print('{} is None.'.format(image_file)) continue ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) objs = self.COCO.loadAnns(ann_ids) boxes = [obj['bbox'] for obj in objs] # labels = [obj['category_id'] for obj in objs] if len(boxes) == 0: continue crop_imgs, starts, new_ann_ids = crop(img, np.array(boxes), np.array(ann_ids)) # crops = [] for crop_i, crop_img in enumerate(crop_imgs): # new_img_name = img_name + '_' + str(crop_i) + img_suffix # cv2.imwrite(os.path.join(save_root, 'images', new_img_name), crop_img) sx, sy = starts[crop_i] h, w, _ = crop_img.shape ex, ey = sx + w, sy + h # crops.append([sx+3, sy+3, ex-3, ey-3]) txt_name = '_'.join([img_name_no_suffix] + [str(crop_i)] + list(map(str, [sx, sy, ex, ey]))) + '.txt' txt_content = '' crop_objs = self.COCO.loadAnns(new_ann_ids[crop_i]) if len(crop_objs) == 0: continue crop_segs = np.array( [crop_obj['segmentation'][0] for crop_obj in crop_objs]) # filter_segs = [] temp1 = np.any(crop_segs < 0., axis=1) filter_segs = crop_segs[np.any(crop_segs > w, axis=1)] # filter_segs.append(crop_segs[np.any(crop_segs > w, axis=1)]) if len(filter_segs) > 0: pass
def write_str(data, to_file): """写字符串到文件 Args: data (str): str对象 to_file (str): 保存的文件名 """ cvtools.makedirs(to_file) with open(to_file, 'w') as f: f.write(data)
def dump_pkl(data, to_file='data.pkl'): """使用pickle序列化对象 Args: data: 待序列化对象 to_file: 保存的文件名 """ cvtools.makedirs(to_file) # 默认 using protocol 0. 负数表示最高协议 with open(to_file, 'wb') as f: pickle.dump(data, f, -1)
def dump_json(data, to_file='data.json'): """写json文件 Args: data: 待保存成json格式的对象 to_file: 保存的文件名 """ # save json format results to disk cvtools.makedirs(to_file) with open(to_file, 'w') as f: json.dump(data, f) # using indent=4 show more friendly print('!save {} finished'.format(to_file))
def run_model(model_obj, port=5000, logfile=None): global model model = model_obj global PORT PORT = port if logfile is not None: logfile_split = cvtools.splitpath(logfile) global log_save_root log_save_root = logfile_split[0] global logger cvtools.makedirs(logfile) logger = cvtools.logger_file_handler(logger, logfile, mode='w') app.run(host='0.0.0.0', port=port)
def vis_instances(self, save_root, vis='bbox', box_format='x1y1wh', by_cat=False): if by_cat: self._vis_instances_by_cat(save_root, vis, box_format) image_ids = self.COCO.getImgIds() image_ids.sort() if cvtools._DEBUG: roidb = copy.deepcopy(self.COCO.loadImgs(image_ids))[:10] else: roidb = copy.deepcopy(self.COCO.loadImgs(image_ids)) print('{} images.'.format(len(roidb))) cvtools.makedirs(save_root) for i, entry in enumerate(roidb): print('Visualize image %d of %d: %s' % (i, len(roidb), entry['file_name'])) image_name = entry['file_name'] image_file = osp.join(self.img_prefix, image_name) img = cvtools.imread(image_file) image_name = osp.splitext(image_name)[0] if 'crop' in entry: img = img[entry['crop'][1]:entry['crop'][3], entry['crop'][0]:entry['crop'][2]] image_name = '_'.join([image_name] + list(map(str, entry['crop']))) if img is None: print('{} is None.'.format(image_file)) continue ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) objs = self.COCO.loadAnns(ann_ids) if len(objs) == 0: continue # Sanitize bboxes -- some are invalid for obj in objs: vis_obj = [] if 'ignore' in obj and obj['ignore'] == 1: continue if vis in obj: vis_obj = obj[vis] class_name = self.COCO.cats[ obj['category_id']]['name'] if 'category_id' in obj else '' img = cvtools.draw_boxes_texts(img, vis_obj, class_name, box_format=box_format) # save in jpg format for saving storage cvtools.imwrite(img, osp.join(save_root, image_name + '.jpg'))
def vis_instances( self, save_root, vis='bbox', # or segm vis_cats=None, output_by_cat=False, box_format='x1y1wh'): """Visualise bbox and polygon in annotation. 包含某一类的图片上所有类别均会被绘制。 Args: save_root (str): path for saving image. vis (str): 'bbox' or 'segmentation' vis_cats (list): categories to be visualized output_by_cat (bool): output visual images by category. box_format (str): 'x1y1wh' or 'polygon' """ assert vis in ('bbox', 'segmentation') assert box_format in ('x1y1wh', 'polygon') if vis_cats is not None or output_by_cat: self._vis_instances_by_cat(save_root, vis=vis, vis_cats=vis_cats, box_format=box_format) return image_ids = self.COCO.getImgIds() image_ids.sort() if cvtools._DEBUG: roidb = copy.deepcopy(self.COCO.loadImgs(image_ids))[:10] else: roidb = copy.deepcopy(self.COCO.loadImgs(image_ids)) print('{} images.'.format(len(roidb))) cvtools.makedirs(save_root) for i, entry in enumerate(roidb): print('Visualize image %d of %d: %s' % (i, len(roidb), entry['file_name'])) img, image_name = self.read_img_or_crop(entry) if img is None: print('{} is None.'.format(image_name)) continue ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) objs = self.COCO.loadAnns(ann_ids) if len(objs) == 0: continue # Sanitize bboxes -- some are invalid img = self.vis_objs(img, objs, vis=vis, box_format=box_format) cvtools.imwrite(img, osp.join(save_root, image_name + '.jpg'))
def cluster_analysis(self, save_root, name_clusters=('bbox', ), n_clusters=(3, ), by_cat=False): if by_cat: self._cluster_by_cat(save_root, name_clusters, n_clusters) assert len(name_clusters) == len(n_clusters) image_ids = self.COCO.getImgIds() image_ids.sort() roidb = copy.deepcopy(self.COCO.loadImgs(image_ids)) print('roidb: {}'.format(len(roidb))) cluster_dict = defaultdict(list) for entry in roidb: ann_ids = self.COCO.getAnnIds(imgIds=entry['id'], iscrowd=None) objs = self.COCO.loadAnns(ann_ids) # Sanitize bboxes -- some are invalid for obj in objs: if 'ignore' in obj and obj['ignore'] == 1: continue if 'area' in name_clusters: cluster_dict['area'].append(obj['area']) if 'w-vs-h' in name_clusters: cluster_dict['w-vs-h'].append(obj['bbox'][2] / float(obj['bbox'][3])) cvtools.makedirs(save_root) print('start cluster analysis...') for i, cluster_name in enumerate(cluster_dict.keys()): cluster_value = cluster_dict[cluster_name] assert len(cluster_value) >= n_clusters[i] value_arr = np.array(cluster_value) percent = np.percentile(value_arr, [1, 50, 99]) value_arr = value_arr[percent[2] > value_arr] cvtools.draw_hist(value_arr, bins=1000, x_label=cluster_name, y_label="Quantity", title=cluster_name, density=False, save_name=osp.join(save_root, cluster_name + '.png')) cluster_value = np.array(value_arr).reshape(-1, 1) cluster_value_centers = cvtools.DBSCAN_cluster(cluster_value, metric='manhattan') np.savetxt(osp.join(save_root, cluster_name + '.txt'), np.around(cluster_value_centers, decimals=0)) print('cluster analysis finished!')
def extract_feats(self, file): file_id = osp.splitext(osp.basename(file))[0] cache = self._cache.get(file_id) if cache is not None: feats = mmcv.load(cache) else: data = mmcv.load(file)['PPG'] ppg = list(data) feats = self._extract_feats(ppg) if file_id is not None: file = osp.join(self._cache.cache_path, file_id + '.pkl') cvtools.makedirs(file) mmcv.dump(feats, file) self._cache.put(file_id, file) feats = feats['ppg_t'].to_frame() scaler = MinMaxScaler(feature_range=(0, 1)) feats = scaler.fit_transform(feats.values) return feats
def write_key_value(data, to_file): """写字典到文件中(非序列化) 每行以字符':'分割key和value Args: data (dict): dict中元素只能是基本类型 to_file: 保存的文件名 Returns: """ if not isinstance(data, dict): return cvtools.makedirs(to_file) with open(to_file, 'w', encoding='utf8') as f: for key, value in data.items(): f.write('{}: {}\n'.format(key, value))
def test_makedirs(): shutil.rmtree(current_path + '/out') os.makedirs(current_path + '/out') # 不需要最后一个字符是路径分隔符 assert not cvtools.makedirs('') assert not cvtools.makedirs(None) assert not cvtools.makedirs(current_path + '/out/dir') assert cvtools.makedirs(current_path + '/out/dir/') assert cvtools.makedirs(current_path + '/out/dir/test1/test.txt') assert cvtools.makedirs(current_path + '/out/dir\\test2/test.txt') assert cvtools.makedirs(current_path + '/out/dir/test3/test.txt') assert cvtools.makedirs(current_path + '/out/dir/test3/../test4/test.txt')
def test_makedirs(): assert not cvtools.makedirs('') assert not cvtools.makedirs(None) assert not cvtools.makedirs('test.txt') assert cvtools.makedirs('test1/test.txt') assert cvtools.makedirs('test2\\test.txt') assert cvtools.makedirs('test3/test3/test.txt') assert not cvtools.makedirs('../tests/test.txt')
def write_list_to_file(data, dst, line_break=True): """保存list到文件 Args: data (list): list中元素只能是基本类型 dst (str): 保存的文件名 line_break: 是否加换行 Returns: """ images_list = [] cvtools.makedirs(dst) with open(dst, 'w') as f: for line in data: if line_break: line += '\n' f.write(line) return images_list
def imwrite(img, file_path, params=None, auto_mkdir=True): """Write image to file Args: img (ndarray): Image array to be written. file_path (str): Image file path. params (None or list): Same as opencv's :func:`imwrite` interface. auto_mkdir (bool): If the parent folder of `file_path` does not exist, whether to create it automatically. Returns: bool: Successful or not. """ if not isinstance(img, np.ndarray): raise TypeError('"img" must be a numpy array!') if auto_mkdir: cvtools.makedirs(file_path) # return cv.imwrite(file_path, img, params) # support path included chinese return cv.imencode(osp.splitext(file_path)[-1], img, params)[1].tofile(file_path)
def draw_dataframe_list(data_list, color_list, dst, col, im_size=None, tick_spacing=None): """根据列名选择DataFrame的列,绘制图像 """ colors = ['green', 'red', 'skyblue', 'blue'] if im_size is not None: plt.figure(figsize=im_size) ax = plt.subplot(111) if tick_spacing is not None: ax.xaxis.set_major_locator(ticker.MultipleLocator(tick_spacing)) plt.grid() # 设置为网格 for index, data in enumerate(data_list): x = data.index.values y = data.ix[:, col].values plt.plot(x, y, color=colors[color_list[index]]) # save draw cvtools.makedirs(dst) plt.savefig(dst) plt.close()
def split_channels(self, file, file_info): file_id = file_info['id'] if self._cache.get(file_id) is None: try: with open(file, 'r') as fp: raw_data = fp.readlines() # file_info['original_name'] = raw_data[0].strip() data = pd.DataFrame( # 10分钟150W, 4s1W [item.split() for item in raw_data[46:]], columns=[item.strip() for item in raw_data[45].split()]) # self.channel_map[data.columns.tolist()[0]] = 'time' data = data[self.channel_map.keys()][1:].astype(float) # 使用data.columns = []修改不了 data.rename(columns=self.channel_map, inplace=True) # save clear data filename = osp.join(cfg.CLEAR_SRC, file_id + '.pkl') cvtools.makedirs(filename) cvtools.dump(data, filename) self._cache.put(file_id, filename) except Exception as e: print(file, e) file_info['filename'] = self._cache.get(file_id) return file_info