def imwrite_tf_worker_answer(self, is_correct: bool, img_fname: str, row: WorkerHITResult, category: str): """ TODO: Switch to using os.path() for windows compatibility. Args: - is_correct: boolean representing if correct answer choice selected - img_fname - row - category Returns: """ if '/' in category: # extra forward slash will screw up the filename category = category.replace('/', '_') if self.save_tf_worker_answers: # save under True/False distinction src = f'{_ROOT}/temp_files/{self.hit_info.sentinel_img_dir}/{img_fname}' new_dirpath = f'{self.dump_root}' new_dirpath += f'/{self.hit_info.task_nickname}' new_dirpath += f'/{self.split}' new_dirpath += f'/{row.WorkerId}' new_dirpath += f'/{row.HITId}' new_dirpath += f'/{str(is_correct)}' new_dirpath += f'/{category}' check_mkdir(new_dirpath) dst = f'{new_dirpath}/{img_fname}' copyfile(src, dst)
def dump_relpath_txt(jpg_dir: str, txt_output_dir: str) -> str: """ Dump relative paths. Args: - jpg_dir: - txt_output_dir: Returns: - txt_save_fpath: """ fpaths = [] dirname = Path(jpg_dir).stem for suffix in ['jpg', 'JPG', 'jpeg', 'JPEG', 'png', 'PNG']: suffix_fpaths = glob.glob(f'{jpg_dir}/*.{suffix}') fpaths.extend(suffix_fpaths) txt_lines = [ get_last_n_path_elements_as_str(fpath, n=1) for fpath in fpaths ] txt_lines.sort() check_mkdir(txt_output_dir) txt_save_fpath = f'{txt_output_dir}/{dirname}_relative_paths.txt' write_txt_lines(txt_save_fpath, txt_lines) return txt_save_fpath
def dump_relpath_txt(jpg_dir: str, txt_output_dir: str) -> str: """ Dump relative paths. Args: - jpg_dir: - txt_output_dir: Returns: - txt_save_fpath: """ fpaths = [] dirname = Path(jpg_dir).stem # for suffix in ['jpg','JPG','jpeg','JPEG','png','PNG']: # suffix_fpaths = glob.glob(f'{jpg_dir}/*.{suffix}') # fpaths.extend(suffix_fpaths) for root, dirs, fs in os.walk(jpg_dir): for f in fs: if any( f.endswith(_) for _ in ['jpg', 'JPG', 'jpeg', 'JPEG', 'png', 'PNG']): fpaths.append(os.path.join(root, f)) #txt_lines = [get_last_n_path_elements_as_str(fpath, n=-1) for fpath in fpaths] txt_lines = [fpath[len(jpg_dir):] for fpath in fpaths] txt_lines.sort() check_mkdir(txt_output_dir) txt_save_fpath = f'{txt_output_dir}/{dirname}_relative_paths.txt' write_txt_lines(txt_save_fpath, txt_lines) return txt_save_fpath
def execute_on_dataloader( self, test_loader: torch.utils.data.dataloader.DataLoader): """ Args: - test_loader: Returns: - None """ if self.args.save_folder == 'default': self.args.save_folder = f'{_ROOT}/temp_files/{self.args.model_name}_{self.args.dataset}_universal_{self.scales_str}/{self.args.base_size}' os.makedirs(self.args.save_folder, exist_ok=True) gray_folder = os.path.join(self.args.save_folder, 'gray') self.gray_folder = gray_folder data_time = AverageMeter() batch_time = AverageMeter() end = time.time() check_mkdir(self.gray_folder) for i, (input, _) in enumerate(test_loader): logger.info(f'On image {i}') data_time.update(time.time() - end) # determine path for grayscale label map image_path, _ = self.data_list[i] if self.args.img_name_unique: image_name = Path(image_path).stem else: image_name = get_unique_stem_from_last_k_strs(image_path) gray_path = os.path.join(self.gray_folder, image_name + '.png') if Path(gray_path).exists(): continue # convert Pytorch tensor -> Numpy, then feedforward input = np.squeeze(input.numpy(), axis=0) image = np.transpose(input, (1, 2, 0)) gray_img = self.execute_on_img(image) batch_time.update(time.time() - end) end = time.time() cv2.imwrite(gray_path, gray_img) # todo: update to time remaining. if ((i + 1) % self.args.print_freq == 0) or (i + 1 == len(test_loader)): logger.info( 'Test: [{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}).'. format(i + 1, len(test_loader), data_time=data_time, batch_time=batch_time))
def execute_on_dataloader( self, test_loader: torch.utils.data.dataloader.DataLoader): """ Args: - test_loader: Returns: - None """ if self.args.save_folder == 'default': self.args.save_folder = f'{_ROOT}/temp_files/{self.args.model_name}_{self.args.dataset}_universal_{self.scales_str}/{self.args.base_size}' os.makedirs(self.args.save_folder, exist_ok=True) gray_folder = os.path.join(self.args.save_folder, 'gray') self.gray_folder = gray_folder check_mkdir(self.gray_folder) data_time = AverageMeter() batch_time = AverageMeter() end = time.time() results = dict() # path: label_map for i, (input, _) in enumerate(tqdm.tqdm(test_loader)): data_time.update(time.time() - end) # convert Pytorch tensor -> Numpy input = np.squeeze(input.numpy(), axis=0) image = np.transpose(input, (1, 2, 0)) gray_img = self.execute_on_img_single(image) batch_time.update(time.time() - end) end = time.time() image_name, _ = self.data_list[i] img_id = image_name[len(self.input_file):] results[img_id] = gray_img # todo: update to time remaining. if 0 and ((i + 1) % self.args.print_freq == 0) or (i + 1 == len(test_loader)): logger.info( 'Test: [{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}).'. format(i + 1, len(test_loader), data_time=data_time, batch_time=batch_time)) mmcv.dump(results, os.path.join(gray_folder, 'label_maps.pkl'))
def get_dummy_datalist(): """ Write dummy camvid data. Expect inter [4,2,1] Expect union [7,5,1] IoUs: 4/7, 2/5, 1/1 """ pred1 = np.array([[0, 0], [1, 0]]).astype(np.uint8) target1 = np.array([[0, 0], [1, 1]]).astype(np.uint8) # inter [2, 1, 0] # union [3, 2, 0] pred2 = np.array([[2, 0], [1, 0]]).astype(np.uint8) target2 = np.array([[2, 0], [1, 1]]).astype(np.uint8) num_classes = 3 # intersection, [1,1,1] # union, [2,2,1] pred3 = np.array([[1, 0], [1, 0]]).astype(np.uint8) target3 = np.array([[255, 0], [255, 1]]).astype(np.uint8) # intersection, [1, 0, 0]) # union, [2, 1, 0] check_mkdir(f'{_ROOT}/accuracy_calculator_data/ground_truth') gt_fpath1 = f'{_ROOT}/accuracy_calculator_data/ground_truth/img1.png' gt_fpath2 = f'{_ROOT}/accuracy_calculator_data/ground_truth/img2.png' gt_fpath3 = f'{_ROOT}/accuracy_calculator_data/ground_truth/img3.png' imageio.imwrite(gt_fpath1, target1) imageio.imwrite(gt_fpath2, target2) imageio.imwrite(gt_fpath3, target3) check_mkdir(f'{_ROOT}/accuracy_calculator_data/gray') imageio.imwrite(f'{_ROOT}/accuracy_calculator_data/gray/img1.png', pred1) imageio.imwrite(f'{_ROOT}/accuracy_calculator_data/gray/img2.png', pred2) imageio.imwrite(f'{_ROOT}/accuracy_calculator_data/gray/img3.png', pred3) # dummy RGB filepaths data_list = [ (gt_fpath1, gt_fpath1), (gt_fpath2, gt_fpath2), (gt_fpath3, gt_fpath3), ] return data_list
def execute_on_dataloader_batched( self, test_loader: torch.utils.data.dataloader.DataLoader): """ Optimize throughput through the network by batched inference, instead of single image inference """ if self.args.save_folder == 'default': self.args.save_folder = f'{_ROOT}/temp_files/{self.args.model_name}_{self.args.dataset}_universal_{self.scales_str}/{self.args.base_size}' os.makedirs(self.args.save_folder, exist_ok=True) gray_folder = os.path.join(self.args.save_folder, 'gray') self.gray_folder = gray_folder data_time = AverageMeter() batch_time = AverageMeter() end = time.time() check_mkdir(self.gray_folder) for i, (input, _) in enumerate(test_loader): logger.info(f"On batch {i}") data_time.update(time.time() - end) gray_batch = self.execute_on_batch(input) batch_sz = input.shape[0] # dump results to disk for j in range(batch_sz): # determine path for grayscale label map image_path, _ = self.data_list[i * self.args.batch_size_val + j] if self.args.img_name_unique: image_name = Path(image_path).stem else: image_name = get_unique_stem_from_last_k_strs(image_path) gray_path = os.path.join(self.gray_folder, image_name + '.png') cv2.imwrite(gray_path, gray_batch[j]) batch_time.update(time.time() - end) end = time.time() if ((i + 1) % self.args.print_freq == 0) or (i + 1 == len(test_loader)): logger.info( f'Test: [{i+1}/{len(test_loader)}] ' f'Data {data_time.val:.3f} (avg={data_time.avg:.3f})' f'Batch {batch_time.val:.3f} (avg={batch_time.avg:.3f})')
def dump_pascalcontext_mat_files(pcontext_dst_dir: str) -> None: """ Convert PASCAL Context annotations from .mat files to .png Args: - pcontext_dst_dir: string represent absolute path to PASCAL Context destination directory Returns: - None """ dataset_name = 'pascal-context-460' id_to_class_name_map = get_dataloader_id_to_classname_map( dataset_name, include_ignore_idx_cls=False) save_dirname = 'Segmentation_GT_460cls' png_save_dir = f'{pcontext_dst_dir}/{save_dirname}' check_mkdir(png_save_dir) # annotation files, stored as .mat files mat_files_dir = f'{pcontext_dst_dir}/trainval' mat_fpaths = glob.glob(f'{mat_files_dir}/*.mat') for i, mat_fpath in enumerate(mat_fpaths): if i % 500 == 0: print(f'On {i}/{len(mat_fpaths)}') fname_stem = Path(mat_fpath).stem label_data = scipy.io.loadmat(mat_fpath) label_img = label_data['LabelMap'] label_save_fpath = f'{png_save_dir}/{fname_stem}.png' # Need uint16 to be able to exceed 256 value range # there are up to 460 classes present. label_img_uint16 = label_img.astype(np.uint16) imageio.imwrite(label_save_fpath, label_img_uint16) loaded_label_img = imageio.imread(label_save_fpath) assert np.allclose(loaded_label_img, label_img)
def write_semantic_from_panoptic( cse: COCOSemanticExtractor, split: str, instance_img_fpath: str, ignore_idx: int = 255 ) -> None: """ Args: - cse - split - instance_img_fpath - ignore_idx Returns: - None """ fname_stem = Path(instance_img_fpath).stem instance_id_img = cse.instance_api.get_instance_id_img(split, fname_stem) img_annot = cse.semantic_api.get_img_annotation(split, fname_stem) # default pixel value is unlabeled semantic_img = np.ones_like(instance_id_img, dtype=np.uint8) * ignore_idx for segment in img_annot['segments_info']: segmentid = segment['id'] categoryid = segment['category_id'] segment_mask = (instance_id_img == segmentid).astype(np.uint8) semantic_img = swap_px_inside_mask( semantic_img, segment_mask, old_val=ignore_idx, new_val=categoryid, require_strict_boundaries=True ) semantic_fpath = instance_img_fpath.replace( f'annotations/panoptic_{split}2017', f'semantic_annotations201/{split}2017' # in 201-class taxonomy ) check_mkdir(Path(semantic_fpath).parent) imageio.imwrite(semantic_fpath, semantic_img)
def render_each_worker_annotations( dir_savename: str, batch_csv_fname: str, img_dirpath: str, folder_per_hit: bool = True ): """ In train set, had to discard: worker_id = 'A2R2YZTSME1K3F', hit_id = '3SSN80MU8CPEVNHI370TAPO2EATKXE' """ csv_dirpath = '/Users/johnlamb/Downloads' csv_fpath = f'{csv_dirpath}/{batch_csv_fname}' dump_root = 'temp_files' with open(csv_fpath, 'r') as csv_file: csv_reader = csv.DictReader(csv_file) for row in csv_reader: worker_id = row['WorkerId'] hit_id = row['HITId'] for i in range(100): for key in row.keys(): if key == f'Answer.choice_{i}': category = row[key] image_url = row[f'Input.image_url_{i}'] split = 'val' if 'val' in image_url else 'train' fname = Path(image_url).name dump_dirname = category split_img_dirpath = img_dirpath.replace('SPLIT', f'{split}') src = f'temp_files/{split_img_dirpath}/{fname}' new_dirpath = f'{dump_root}/{dir_savename}/{worker_id}/{dump_dirname}' if folder_per_hit: new_dirpath += f'/{hit_id}' dst = f'{new_dirpath}/{fname}' check_mkdir(new_dirpath) copyfile(src, dst)
def save_img_classification(self, img_fname: str, category: str): """ Args: - img_fname - category Returns: - """ src = f'{_ROOT}/temp_files/{self.img_dir}/{img_fname}' new_dirpath = f'{self.dump_root}' new_dirpath += f'/{self.hit_info.task_nickname}' new_dirpath += f'/{self.split}' new_dirpath += f'/{category}' check_mkdir(new_dirpath) dst = f'{new_dirpath}/{img_fname}' if Path(dst).exists(): return copyfile(src, dst)
def analyze_multinomial_worker_agreement(self): """ Analyze multinomial worker agreement. For those HITs that were approved, make a list of assigned labels per URL. Also record the number of approved observations per Take mode from approved, consider this the relabeled category. Record relabeled list for each (dataset, original_classname) tuple. Args: - None Returns: - None """ imgurl_label_dict, times_seen_dict = self.count_multinomial_votes_csv() plt.title('Num Repeats Per Img') plt.hist(list(times_seen_dict.values()), bins=6) plt.show() # Print out textual version of "times_seen" histogram stats # times_seen_arr = np.array(list(times_seen_dict.values())) # for i in range(MAX_NUM_WORKER_VOTES): # count = (times_seen_arr == i).sum() # if count != 0: # print(f'Saw {count} exactly {i} times') category_lists = defaultdict(list) print('Classifying re-labeled images by mode...') for i, (imgurl, classname_votes) in enumerate(imgurl_label_dict.items()): if i % 100 == 0: print(f'On img {i}') times_seen = times_seen_dict[imgurl] classname_mode, percent = most_frequent(classname_votes) if percent >= 80: dump_dirname = classname_mode + '_80percent_conf' else: print(f'\tLOW CONSENSUS {percent}') # elif percent >= 60 and percent < 80: # dump_dirname = classname_mode + '_60to80percent_conf' # print('LOW') # elif percent < 60: # dump_dirname = classname_mode + '_lessthan60percent_conf' # else: # print('Unknown error, quitting...') # quit() fname = Path(imgurl).name if fname in self.split_img_fnames: classname_mode = strip_forward_slash(classname_mode) self.save_img_classification(fname, classname_mode) if self.is_sentinel(fname): # We don't need consensus on these, since we have ground truth. continue category_lists[classname_mode] += [fname] # Also write the Sentinel classification to disk for (sentinel_fname, sentinel_classname) in self.hit_info.sentinels: sentinel_classname = strip_forward_slash(sentinel_classname) if sentinel_fname in self.split_img_fnames: assert self.hit_info.sentinel_img_dir == self.img_dir category_lists[sentinel_classname] += [sentinel_fname] # Sanity check -- ensure sum of total split imgs cardinality is correct. num_relabeled_split_imgs = sum( [len(cat_list) for classname, cat_list in category_lists.items()]) if num_relabeled_split_imgs != self.num_split_imgs: print(f'Found {self.num_split_imgs} imgs in split.') print(f'Found {num_relabeled_split_imgs} relabeled imgs.') print( 'Not all images relabeled yet. Cannot write final classifications yet.' ) return num_written_imgs = 0 for classname, img_fnames in category_lists.items(): save_fname = f'{hit_info.dataset_name}_{self.split}_{hit_info.task_nickname}_to_{classname}.txt' dirname = f'{hit_info.dataset_name}_{hit_info.task_nickname}' save_dir = f'mturk/verified_reclassification_files/{dirname}' check_mkdir(save_dir) save_fpath = f'{save_dir}/{save_fname}' write_txt_lines(save_fpath, img_fnames) num_written_imgs += len(read_txtfile_as_np(save_fpath)) assert num_written_imgs == self.num_split_imgs print('# Written == # Read. Success.')
def find_relabeled_taxonomy(dname: str, update_records): """ For any given dataset, compare relabeled classes (in universal taxonomy) with universal classes that correspond with original classes. We populate 3 separate spreadsheets: (1) map from [original->relabeled] taxonomies `remap_rows` is only for (2) _names.txt file for relabeled taxonomy `new_tax_rows` (3) column of master Google spreadsheet with correspondence to all universal classes should contain everything from (2), and more (i.e. blank entries for complement) `new_googlesheet_rows` """ tsv_data = pd.read_csv(tsv_fpath, sep='\t', keep_default_na=False) remap_rows = [] new_googlesheet_rows = [] all_u_classes = [] featured_u_classes = [] relabeled_classes = get_new_classes(update_records) for id, row in tsv_data.iterrows(): u_name = parse_uentry(row['universal']) all_u_classes += [u_name] # specific dataset's classes that map to this u_name d_classes = parse_entry(row[dname]) if len(d_classes) != 0: # pre-existing corresponding labels, before re-labeling featured_u_classes += [u_name] # if pre-existing correspondence, or new correspondence will exist if len(d_classes) != 0 or u_name in relabeled_classes: for d_class in d_classes: remap_rows += [{dname: d_class, f'{dname}-relabeled': u_name}] new_googlesheet_rows += [{ f'{dname}-relabeled': u_name, 'universal': u_name }] else: # leave blank, will be no such u_name label new_googlesheet_rows += [{ f'{dname}-relabeled': '', 'universal': u_name }] # ensure no typos in update records assert all([ relabeled_class in all_u_classes for relabeled_class in relabeled_classes ]) new_classes = relabeled_classes - set(featured_u_classes) print(f'To {dname}, we added {new_classes}') new_taxonomy = relabeled_classes | set(featured_u_classes) new_tax_rows = [{ f'{dname}-relabeled': new_tax_class } for new_tax_class in new_taxonomy] save_new_taxonomy_csv = f'names/{dname}-relabeled_names.tsv' check_mkdir('names') write_csv(save_new_taxonomy_csv, new_tax_rows) remap_rows += [{dname: 'unlabeled', f'{dname}-relabeled': 'unlabeled'}] save_remap_csv = f'{REPO_ROOT}/mseg/class_remapping_files/{dname}_to_{dname}-relabeled.tsv' write_csv(save_remap_csv, remap_rows) new_googlesheet_csv = f'{REPO_ROOT}/{dname}_to_relabeled_universal.tsv' write_csv(new_googlesheet_csv, new_googlesheet_rows)