def imwrite_tf_worker_answer(self, is_correct: bool, img_fname: str,
                                 row: WorkerHITResult, category: str):
        """
		TODO: Switch to using os.path() for windows compatibility.

			Args:
			-	is_correct: boolean representing if correct answer choice selected
			-	img_fname
			-	row
			-	category

			Returns:
		"""
        if '/' in category:
            # extra forward slash will screw up the filename
            category = category.replace('/', '_')
        if self.save_tf_worker_answers:
            # save under True/False distinction
            src = f'{_ROOT}/temp_files/{self.hit_info.sentinel_img_dir}/{img_fname}'

            new_dirpath = f'{self.dump_root}'
            new_dirpath += f'/{self.hit_info.task_nickname}'
            new_dirpath += f'/{self.split}'
            new_dirpath += f'/{row.WorkerId}'
            new_dirpath += f'/{row.HITId}'
            new_dirpath += f'/{str(is_correct)}'
            new_dirpath += f'/{category}'

            check_mkdir(new_dirpath)
            dst = f'{new_dirpath}/{img_fname}'
            copyfile(src, dst)
Exemple #2
0
def dump_relpath_txt(jpg_dir: str, txt_output_dir: str) -> str:
    """
	Dump relative paths.

		Args:
		-	jpg_dir:
		-	txt_output_dir:

		Returns:
		-	txt_save_fpath:
	"""
    fpaths = []
    dirname = Path(jpg_dir).stem
    for suffix in ['jpg', 'JPG', 'jpeg', 'JPEG', 'png', 'PNG']:
        suffix_fpaths = glob.glob(f'{jpg_dir}/*.{suffix}')
        fpaths.extend(suffix_fpaths)

    txt_lines = [
        get_last_n_path_elements_as_str(fpath, n=1) for fpath in fpaths
    ]
    txt_lines.sort()
    check_mkdir(txt_output_dir)
    txt_save_fpath = f'{txt_output_dir}/{dirname}_relative_paths.txt'
    write_txt_lines(txt_save_fpath, txt_lines)
    return txt_save_fpath
def dump_relpath_txt(jpg_dir: str, txt_output_dir: str) -> str:
    """
	Dump relative paths.

		Args:
		-	jpg_dir:
		-	txt_output_dir:

		Returns:
		-	txt_save_fpath:
	"""
    fpaths = []
    dirname = Path(jpg_dir).stem
    # for suffix in ['jpg','JPG','jpeg','JPEG','png','PNG']:
    # 	suffix_fpaths = glob.glob(f'{jpg_dir}/*.{suffix}')
    # 	fpaths.extend(suffix_fpaths)
    for root, dirs, fs in os.walk(jpg_dir):
        for f in fs:
            if any(
                    f.endswith(_)
                    for _ in ['jpg', 'JPG', 'jpeg', 'JPEG', 'png', 'PNG']):
                fpaths.append(os.path.join(root, f))
    #txt_lines = [get_last_n_path_elements_as_str(fpath, n=-1) for fpath in fpaths]
    txt_lines = [fpath[len(jpg_dir):] for fpath in fpaths]
    txt_lines.sort()
    check_mkdir(txt_output_dir)
    txt_save_fpath = f'{txt_output_dir}/{dirname}_relative_paths.txt'
    write_txt_lines(txt_save_fpath, txt_lines)
    return txt_save_fpath
    def execute_on_dataloader(
            self, test_loader: torch.utils.data.dataloader.DataLoader):
        """
			Args:
			-   test_loader: 

			Returns:
			-   None
		"""
        if self.args.save_folder == 'default':
            self.args.save_folder = f'{_ROOT}/temp_files/{self.args.model_name}_{self.args.dataset}_universal_{self.scales_str}/{self.args.base_size}'

        os.makedirs(self.args.save_folder, exist_ok=True)
        gray_folder = os.path.join(self.args.save_folder, 'gray')
        self.gray_folder = gray_folder

        data_time = AverageMeter()
        batch_time = AverageMeter()
        end = time.time()

        check_mkdir(self.gray_folder)

        for i, (input, _) in enumerate(test_loader):
            logger.info(f'On image {i}')
            data_time.update(time.time() - end)

            # determine path for grayscale label map
            image_path, _ = self.data_list[i]
            if self.args.img_name_unique:
                image_name = Path(image_path).stem
            else:
                image_name = get_unique_stem_from_last_k_strs(image_path)
            gray_path = os.path.join(self.gray_folder, image_name + '.png')
            if Path(gray_path).exists():
                continue

            # convert Pytorch tensor -> Numpy, then feedforward
            input = np.squeeze(input.numpy(), axis=0)
            image = np.transpose(input, (1, 2, 0))
            gray_img = self.execute_on_img(image)

            batch_time.update(time.time() - end)
            end = time.time()
            cv2.imwrite(gray_path, gray_img)

            # todo: update to time remaining.
            if ((i + 1) % self.args.print_freq == 0) or (i + 1
                                                         == len(test_loader)):
                logger.info(
                    'Test: [{}/{}] '
                    'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                    'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}).'.
                    format(i + 1,
                           len(test_loader),
                           data_time=data_time,
                           batch_time=batch_time))
    def execute_on_dataloader(
            self, test_loader: torch.utils.data.dataloader.DataLoader):
        """
            Args:
            -   test_loader: 

            Returns:
            -   None
        """
        if self.args.save_folder == 'default':
            self.args.save_folder = f'{_ROOT}/temp_files/{self.args.model_name}_{self.args.dataset}_universal_{self.scales_str}/{self.args.base_size}'

        os.makedirs(self.args.save_folder, exist_ok=True)
        gray_folder = os.path.join(self.args.save_folder, 'gray')
        self.gray_folder = gray_folder
        check_mkdir(self.gray_folder)

        data_time = AverageMeter()
        batch_time = AverageMeter()
        end = time.time()
        results = dict()  # path: label_map

        for i, (input, _) in enumerate(tqdm.tqdm(test_loader)):
            data_time.update(time.time() - end)
            # convert Pytorch tensor -> Numpy
            input = np.squeeze(input.numpy(), axis=0)
            image = np.transpose(input, (1, 2, 0))
            gray_img = self.execute_on_img_single(image)
            batch_time.update(time.time() - end)
            end = time.time()
            image_name, _ = self.data_list[i]
            img_id = image_name[len(self.input_file):]
            results[img_id] = gray_img

            # todo: update to time remaining.
            if 0 and ((i + 1) % self.args.print_freq
                      == 0) or (i + 1 == len(test_loader)):
                logger.info(
                    'Test: [{}/{}] '
                    'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                    'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}).'.
                    format(i + 1,
                           len(test_loader),
                           data_time=data_time,
                           batch_time=batch_time))
        mmcv.dump(results, os.path.join(gray_folder, 'label_maps.pkl'))
def get_dummy_datalist():
    """
	Write dummy camvid data.
	Expect inter [4,2,1]
	Expect union [7,5,1]
	IoUs: 4/7, 2/5, 1/1
	"""
    pred1 = np.array([[0, 0], [1, 0]]).astype(np.uint8)
    target1 = np.array([[0, 0], [1, 1]]).astype(np.uint8)
    # inter [2, 1, 0]
    # union [3, 2, 0]

    pred2 = np.array([[2, 0], [1, 0]]).astype(np.uint8)
    target2 = np.array([[2, 0], [1, 1]]).astype(np.uint8)
    num_classes = 3
    # intersection, [1,1,1]
    # union, [2,2,1]

    pred3 = np.array([[1, 0], [1, 0]]).astype(np.uint8)
    target3 = np.array([[255, 0], [255, 1]]).astype(np.uint8)
    # intersection, [1, 0, 0])
    # union, [2, 1, 0]

    check_mkdir(f'{_ROOT}/accuracy_calculator_data/ground_truth')
    gt_fpath1 = f'{_ROOT}/accuracy_calculator_data/ground_truth/img1.png'
    gt_fpath2 = f'{_ROOT}/accuracy_calculator_data/ground_truth/img2.png'
    gt_fpath3 = f'{_ROOT}/accuracy_calculator_data/ground_truth/img3.png'

    imageio.imwrite(gt_fpath1, target1)
    imageio.imwrite(gt_fpath2, target2)
    imageio.imwrite(gt_fpath3, target3)

    check_mkdir(f'{_ROOT}/accuracy_calculator_data/gray')
    imageio.imwrite(f'{_ROOT}/accuracy_calculator_data/gray/img1.png', pred1)
    imageio.imwrite(f'{_ROOT}/accuracy_calculator_data/gray/img2.png', pred2)
    imageio.imwrite(f'{_ROOT}/accuracy_calculator_data/gray/img3.png', pred3)

    # dummy RGB filepaths
    data_list = [
        (gt_fpath1, gt_fpath1),
        (gt_fpath2, gt_fpath2),
        (gt_fpath3, gt_fpath3),
    ]
    return data_list
Exemple #7
0
    def execute_on_dataloader_batched(
            self, test_loader: torch.utils.data.dataloader.DataLoader):
        """ Optimize throughput through the network by batched inference, instead of single image inference
		"""
        if self.args.save_folder == 'default':
            self.args.save_folder = f'{_ROOT}/temp_files/{self.args.model_name}_{self.args.dataset}_universal_{self.scales_str}/{self.args.base_size}'

        os.makedirs(self.args.save_folder, exist_ok=True)
        gray_folder = os.path.join(self.args.save_folder, 'gray')
        self.gray_folder = gray_folder

        data_time = AverageMeter()
        batch_time = AverageMeter()
        end = time.time()

        check_mkdir(self.gray_folder)

        for i, (input, _) in enumerate(test_loader):
            logger.info(f"On batch {i}")
            data_time.update(time.time() - end)

            gray_batch = self.execute_on_batch(input)
            batch_sz = input.shape[0]
            # dump results to disk
            for j in range(batch_sz):
                # determine path for grayscale label map
                image_path, _ = self.data_list[i * self.args.batch_size_val +
                                               j]
                if self.args.img_name_unique:
                    image_name = Path(image_path).stem
                else:
                    image_name = get_unique_stem_from_last_k_strs(image_path)
                gray_path = os.path.join(self.gray_folder, image_name + '.png')
                cv2.imwrite(gray_path, gray_batch[j])

            batch_time.update(time.time() - end)
            end = time.time()

            if ((i + 1) % self.args.print_freq == 0) or (i + 1
                                                         == len(test_loader)):
                logger.info(
                    f'Test: [{i+1}/{len(test_loader)}] '
                    f'Data {data_time.val:.3f} (avg={data_time.avg:.3f})'
                    f'Batch {batch_time.val:.3f} (avg={batch_time.avg:.3f})')
Exemple #8
0
def dump_pascalcontext_mat_files(pcontext_dst_dir: str) -> None:
    """
	Convert PASCAL Context annotations from .mat files to .png

		Args:
		-	pcontext_dst_dir: string represent absolute path to 
				PASCAL Context destination directory

		Returns:
		-	None
	"""
    dataset_name = 'pascal-context-460'
    id_to_class_name_map = get_dataloader_id_to_classname_map(
        dataset_name, include_ignore_idx_cls=False)

    save_dirname = 'Segmentation_GT_460cls'
    png_save_dir = f'{pcontext_dst_dir}/{save_dirname}'
    check_mkdir(png_save_dir)

    # annotation files, stored as .mat files
    mat_files_dir = f'{pcontext_dst_dir}/trainval'
    mat_fpaths = glob.glob(f'{mat_files_dir}/*.mat')

    for i, mat_fpath in enumerate(mat_fpaths):
        if i % 500 == 0:
            print(f'On {i}/{len(mat_fpaths)}')
        fname_stem = Path(mat_fpath).stem

        label_data = scipy.io.loadmat(mat_fpath)
        label_img = label_data['LabelMap']

        label_save_fpath = f'{png_save_dir}/{fname_stem}.png'

        # Need uint16 to be able to exceed 256 value range
        # there are up to 460 classes present.
        label_img_uint16 = label_img.astype(np.uint16)

        imageio.imwrite(label_save_fpath, label_img_uint16)
        loaded_label_img = imageio.imread(label_save_fpath)
        assert np.allclose(loaded_label_img, label_img)
Exemple #9
0
def write_semantic_from_panoptic(
	cse: COCOSemanticExtractor,
	split: str,
	instance_img_fpath: str,
	ignore_idx: int = 255
) -> None:
	"""
		Args:
		-	cse
		-	split
		-	instance_img_fpath
		-	ignore_idx

		Returns:
		-	None
	"""
	fname_stem = Path(instance_img_fpath).stem
	instance_id_img = cse.instance_api.get_instance_id_img(split, fname_stem)
	img_annot = cse.semantic_api.get_img_annotation(split, fname_stem)
	
	# default pixel value is unlabeled
	semantic_img = np.ones_like(instance_id_img, dtype=np.uint8) * ignore_idx
	for segment in img_annot['segments_info']:
		segmentid = segment['id']
		categoryid = segment['category_id']
		segment_mask = (instance_id_img == segmentid).astype(np.uint8)

		semantic_img = swap_px_inside_mask(
			semantic_img, 
			segment_mask,
			old_val=ignore_idx,
			new_val=categoryid,
			require_strict_boundaries=True
		)
	semantic_fpath = instance_img_fpath.replace(
		f'annotations/panoptic_{split}2017',
		f'semantic_annotations201/{split}2017' # in 201-class taxonomy
		)
	check_mkdir(Path(semantic_fpath).parent)
	imageio.imwrite(semantic_fpath, semantic_img)
Exemple #10
0
def render_each_worker_annotations(
	dir_savename: str,
	batch_csv_fname: str,
	img_dirpath: str,
	folder_per_hit: bool = True
):
	"""
		In train set, had to discard:
			worker_id = 'A2R2YZTSME1K3F', hit_id = '3SSN80MU8CPEVNHI370TAPO2EATKXE'

	"""
	csv_dirpath = '/Users/johnlamb/Downloads'
	csv_fpath = f'{csv_dirpath}/{batch_csv_fname}'
	dump_root = 'temp_files'

	with open(csv_fpath, 'r') as csv_file:
		csv_reader = csv.DictReader(csv_file)
		for row in csv_reader:
			worker_id = row['WorkerId']

			hit_id = row['HITId']

			for i in range(100):
				for key in row.keys():
					if key == f'Answer.choice_{i}':
						category = row[key]
						image_url = row[f'Input.image_url_{i}']
						split = 'val' if 'val' in image_url else 'train'
						fname = Path(image_url).name
						dump_dirname = category

						split_img_dirpath = img_dirpath.replace('SPLIT', f'{split}')
						src = f'temp_files/{split_img_dirpath}/{fname}'
						new_dirpath = f'{dump_root}/{dir_savename}/{worker_id}/{dump_dirname}'
						if folder_per_hit:
							new_dirpath += f'/{hit_id}'
						dst = f'{new_dirpath}/{fname}'
						check_mkdir(new_dirpath)
						copyfile(src, dst)
    def save_img_classification(self, img_fname: str, category: str):
        """
			Args:
			-	img_fname
			-	category

			Returns:
			-	
		"""
        src = f'{_ROOT}/temp_files/{self.img_dir}/{img_fname}'

        new_dirpath = f'{self.dump_root}'
        new_dirpath += f'/{self.hit_info.task_nickname}'
        new_dirpath += f'/{self.split}'
        new_dirpath += f'/{category}'

        check_mkdir(new_dirpath)
        dst = f'{new_dirpath}/{img_fname}'

        if Path(dst).exists():
            return

        copyfile(src, dst)
    def analyze_multinomial_worker_agreement(self):
        """
			Analyze multinomial worker agreement. For those HITs that were approved, 
			make a list of assigned labels per URL. Also record the number of approved
			observations per 
			Take mode from approved, consider this the relabeled category.
			Record relabeled list for each (dataset, original_classname) tuple.

			Args:
			-	None

			Returns:
			-	None
		"""
        imgurl_label_dict, times_seen_dict = self.count_multinomial_votes_csv()

        plt.title('Num Repeats Per Img')
        plt.hist(list(times_seen_dict.values()), bins=6)
        plt.show()

        # Print out textual version of "times_seen" histogram stats
        # times_seen_arr = np.array(list(times_seen_dict.values()))
        # for i in range(MAX_NUM_WORKER_VOTES):
        # 	count = (times_seen_arr == i).sum()
        # 	if count != 0:
        # 		print(f'Saw {count} exactly {i} times')

        category_lists = defaultdict(list)

        print('Classifying re-labeled images by mode...')
        for i, (imgurl,
                classname_votes) in enumerate(imgurl_label_dict.items()):

            if i % 100 == 0:
                print(f'On img {i}')

            times_seen = times_seen_dict[imgurl]
            classname_mode, percent = most_frequent(classname_votes)

            if percent >= 80:
                dump_dirname = classname_mode + '_80percent_conf'
            else:
                print(f'\tLOW CONSENSUS {percent}')

            # elif percent >= 60 and percent < 80:
            # 	dump_dirname = classname_mode + '_60to80percent_conf'
            # 	print('LOW')
            # elif percent < 60:
            # 	dump_dirname = classname_mode + '_lessthan60percent_conf'
            # else:
            # 	print('Unknown error, quitting...')
            # 	quit()

            fname = Path(imgurl).name

            if fname in self.split_img_fnames:
                classname_mode = strip_forward_slash(classname_mode)
                self.save_img_classification(fname, classname_mode)

            if self.is_sentinel(fname):
                # We don't need consensus on these, since we have ground truth.
                continue
            category_lists[classname_mode] += [fname]

        # Also write the Sentinel classification to disk
        for (sentinel_fname, sentinel_classname) in self.hit_info.sentinels:
            sentinel_classname = strip_forward_slash(sentinel_classname)
            if sentinel_fname in self.split_img_fnames:
                assert self.hit_info.sentinel_img_dir == self.img_dir
                category_lists[sentinel_classname] += [sentinel_fname]

        # Sanity check -- ensure sum of total split imgs cardinality is correct.
        num_relabeled_split_imgs = sum(
            [len(cat_list) for classname, cat_list in category_lists.items()])

        if num_relabeled_split_imgs != self.num_split_imgs:
            print(f'Found {self.num_split_imgs} imgs in split.')
            print(f'Found {num_relabeled_split_imgs} relabeled imgs.')
            print(
                'Not all images relabeled yet. Cannot write final classifications yet.'
            )
            return

        num_written_imgs = 0
        for classname, img_fnames in category_lists.items():
            save_fname = f'{hit_info.dataset_name}_{self.split}_{hit_info.task_nickname}_to_{classname}.txt'
            dirname = f'{hit_info.dataset_name}_{hit_info.task_nickname}'
            save_dir = f'mturk/verified_reclassification_files/{dirname}'
            check_mkdir(save_dir)
            save_fpath = f'{save_dir}/{save_fname}'
            write_txt_lines(save_fpath, img_fnames)

            num_written_imgs += len(read_txtfile_as_np(save_fpath))

        assert num_written_imgs == self.num_split_imgs
        print('# Written == # Read. Success.')
def find_relabeled_taxonomy(dname: str, update_records):
    """
	For any given dataset, compare relabeled classes (in universal taxonomy) with
	universal classes that correspond with original classes.

	We populate 3 separate spreadsheets:
	(1) map from [original->relabeled] taxonomies
		`remap_rows` is only for 
	(2) _names.txt file for relabeled taxonomy
		`new_tax_rows`
	(3) column of master Google spreadsheet with correspondence to all universal classes
		should contain everything from (2), and more (i.e. blank entries for complement)
		`new_googlesheet_rows`
	"""
    tsv_data = pd.read_csv(tsv_fpath, sep='\t', keep_default_na=False)

    remap_rows = []
    new_googlesheet_rows = []
    all_u_classes = []
    featured_u_classes = []

    relabeled_classes = get_new_classes(update_records)

    for id, row in tsv_data.iterrows():
        u_name = parse_uentry(row['universal'])
        all_u_classes += [u_name]
        # specific dataset's classes that map to this u_name
        d_classes = parse_entry(row[dname])
        if len(d_classes) != 0:
            # pre-existing corresponding labels, before re-labeling
            featured_u_classes += [u_name]
        # if pre-existing correspondence, or new correspondence will exist
        if len(d_classes) != 0 or u_name in relabeled_classes:
            for d_class in d_classes:
                remap_rows += [{dname: d_class, f'{dname}-relabeled': u_name}]
            new_googlesheet_rows += [{
                f'{dname}-relabeled': u_name,
                'universal': u_name
            }]
        else:
            # leave blank, will be no such u_name label
            new_googlesheet_rows += [{
                f'{dname}-relabeled': '',
                'universal': u_name
            }]

    # ensure no typos in update records
    assert all([
        relabeled_class in all_u_classes
        for relabeled_class in relabeled_classes
    ])

    new_classes = relabeled_classes - set(featured_u_classes)
    print(f'To {dname}, we added {new_classes}')

    new_taxonomy = relabeled_classes | set(featured_u_classes)
    new_tax_rows = [{
        f'{dname}-relabeled': new_tax_class
    } for new_tax_class in new_taxonomy]
    save_new_taxonomy_csv = f'names/{dname}-relabeled_names.tsv'
    check_mkdir('names')
    write_csv(save_new_taxonomy_csv, new_tax_rows)

    remap_rows += [{dname: 'unlabeled', f'{dname}-relabeled': 'unlabeled'}]
    save_remap_csv = f'{REPO_ROOT}/mseg/class_remapping_files/{dname}_to_{dname}-relabeled.tsv'
    write_csv(save_remap_csv, remap_rows)

    new_googlesheet_csv = f'{REPO_ROOT}/{dname}_to_relabeled_universal.tsv'
    write_csv(new_googlesheet_csv, new_googlesheet_rows)