def load_predefined_train_test(task): """ the v1-annotations dataset has a predefined train / test split The training set can be used however you want (cross-validation wise) The test may only be used for final evaluation. """ # TODO: generalize path spec # HACK train_scenes = sorted(['0000', '0002', '0101', '0102', '0401', '0503']) test_scenes = sorted(['0001', '0100', '0400', '0500', '0501', '0502']) return train_scenes, test_scenes # FIXME dpath = expanduser( '~/code/baseline-algorithms/DIVA/splits/VIRAT/videos/new_v1_annotation' ) def parse_scene(fname): """ ~~All~~ (most of) the filenames are formatted as follows: VIRAT_S_XXYYZZ_KK_SSSSSS_TTTTTT.mp4 """ import parse # virat_format = 'VIRAT_S_{group:DD}{scene:DD}{seq:DD}_{segmentid}_{start}_{stop}.mp4' virat_format = 'VIRAT_S_{group:DD}{scene:DD}{therest}.mp4' extra_types = {'DD': parse.with_pattern(r'\d\d')(lambda x: x)} result = parse.parse(virat_format, fname, extra_types) if result: return result.named train_scenes = set() test_scenes = set() for fpath in glob.glob(join(dpath, 'Validation_*')): paths = [p for p in ub.readfrom(fpath).split('\n') if p] info = [parse_scene(p) for p in paths] info = [p for p in info if p] scenes = {p['group'] + p['scene'] for p in info} train_scenes.update(scenes) for fpath in glob.glob(join(dpath, 'test_*')): paths = [p for p in ub.readfrom(fpath).split('\n') if p] info = [parse_scene(p) for p in paths] info = [p for p in info if p] scenes = {p['group'] + p['scene'] for p in info} test_scenes.update(scenes) # Ensure determenism train_scenes = sorted(train_scenes) test_scenes = sorted(test_scenes) return train_scenes, test_scenes
def get_bibtex_dict(): import ubelt as ub # HACK: custom current bibtex file possible_bib_fpaths = [ ub.truepath('./My_Library_clean.bib'), #ub.truepath('~/latex/crall-thesis-2017/My_Library_clean.bib'), ] bib_fpath = None for bib_fpath_ in possible_bib_fpaths: if exists(bib_fpath_): bib_fpath = bib_fpath_ break if bib_fpath is None: raise Exception('cant find bibtex file') # import bibtexparser from bibtexparser import bparser parser = bparser.BibTexParser() parser.ignore_nonstandard_types = True bib_text = ub.readfrom(bib_fpath) bibtex_db = parser.parse(bib_text) bibtex_dict = bibtex_db.get_entry_dict() return bibtex_dict
def _read_split_paths(devkit_dpath, split, year): """ split = 'train' self = VOCDataset('test') year = 2007 year = 2012 """ import glob import re split_idstrs = [] data_dpath = join(devkit_dpath, 'VOC{}'.format(year)) split_dpath = join(data_dpath, 'ImageSets', 'Main') pattern = join(split_dpath, '*_' + split + '.txt') for p in sorted(glob.glob(pattern)): rows = [ list(re.split(' +', t)) for t in ub.readfrom(p).split('\n') if t ] # code = -1 if the image does not contain the object # code = 1 if the image contains at least one instance # code = 0 if the image contains only hard instances of the object idstrs = [idstr for idstr, code in rows if int(code) == 1] split_idstrs.extend(idstrs) split_idstrs = sorted(set(split_idstrs)) image_dpath = join(data_dpath, 'JPEGImages') annot_dpath = join(data_dpath, 'Annotations') gpaths = [ join(image_dpath, '{}.jpg'.format(idstr)) for idstr in split_idstrs ] apaths = [ join(annot_dpath, '{}.xml'.format(idstr)) for idstr in split_idstrs ] return gpaths, apaths
def _read_kresimir_results(): # Load downloaded matlab csv results mat = scipy.io.loadmat(expanduser('~/data/camtrawl_stereo_sample_data/Haul_83/Haul_083_qcresult.mat')) header = ub.readfrom(expanduser('~/data/camtrawl_stereo_sample_data/Haul_83/mat_file_header.csv')).strip().split(',') data = mat['lengthsqc'] mat_df = pd.DataFrame(data, columns=header) mat_df['current_frame'] = mat_df['current_frame'].astype(np.int) mat_df['Species'] = mat_df['Species'].astype(np.int) mat_df['QC'] = mat_df['QC'].astype(np.int) # Transform so each row corresponds to one set of (x, y) points per detection bbox_cols1 = ['LX1', 'LX2', 'LX3', 'LX4', 'LY1', 'LY2', 'LY3', 'LY4', 'Lar', 'LboxL', 'WboxL', 'aveL'] bbox_pts1 = mat_df[bbox_cols1[0:8]] # NOQA bbox_pts1_ = bbox_pts1.values bbox_pts1_ = bbox_pts1_.reshape(len(bbox_pts1_), 2, 4).transpose((0, 2, 1)) bbox_cols2 = ['RX1', 'RX2', 'RX3', 'RX4', 'RY1', 'RY2', 'RY3', 'RY4', 'Rar', 'LboxR', 'WboxR', 'aveW'] bbox_pts2 = mat_df[bbox_cols2] # NOQA bbox_pts2 = mat_df[bbox_cols2[0:8]] # NOQA bbox_pts2_ = bbox_pts2.values bbox_pts2_ = bbox_pts2_.reshape(len(bbox_pts2_), 2, 4).transpose((0, 2, 1)) # Convert matlab bboxes into python-style bboxes mat_df['obox1'] = [ctalgo.OrientedBBox(*cv2.minAreaRect(pts[:, None, :].astype(np.int))) for pts in bbox_pts1_] mat_df['obox2'] = [ctalgo.OrientedBBox(*cv2.minAreaRect(pts[:, None, :].astype(np.int))) for pts in bbox_pts2_] mat_df.drop(bbox_cols2, axis=1, inplace=True) mat_df.drop(bbox_cols1, axis=1, inplace=True) return mat_df
def clean_lprof_file(self, input_fname, output_fname=None): """ Reads a .lprof file and cleans it """ # Read the raw .lprof text dump text = ub.readfrom(input_fname) # Sort and clean the text output_text = self.clean_line_profile_text(text) return output_text
def test_modify_directory_symlinks(): dpath = ub.ensure_app_cache_dir('ubelt', 'test_modify_symlinks') ub.delete(dpath, verbose=2) ub.ensuredir(dpath, verbose=2) happy_dpath = join(dpath, 'happy_dpath') happy_dlink = join(dpath, 'happy_dlink') ub.ensuredir(happy_dpath, verbose=2) ub.symlink(happy_dpath, happy_dlink, verbose=2) # Test file inside directory symlink file_path1 = join(happy_dpath, 'file.txt') file_path2 = join(happy_dlink, 'file.txt') ub.touch(file_path1, verbose=2) assert exists(file_path1) assert exists(file_path2) ub.writeto(file_path1, 'foo') assert ub.readfrom(file_path1) == 'foo' assert ub.readfrom(file_path2) == 'foo' ub.writeto(file_path2, 'bar') assert ub.readfrom(file_path1) == 'bar' assert ub.readfrom(file_path2) == 'bar' ub.delete(file_path2, verbose=2) assert not exists(file_path1) assert not exists(file_path2) # Test directory inside directory symlink dir_path1 = join(happy_dpath, 'dir') dir_path2 = join(happy_dlink, 'dir') ub.ensuredir(dir_path1, verbose=2) assert exists(dir_path1) assert exists(dir_path2) subfile_path1 = join(dir_path1, 'subfile.txt') subfile_path2 = join(dir_path2, 'subfile.txt') ub.writeto(subfile_path2, 'foo') assert ub.readfrom(subfile_path1) == 'foo' assert ub.readfrom(subfile_path2) == 'foo' ub.writeto(subfile_path1, 'bar') assert ub.readfrom(subfile_path1) == 'bar' assert ub.readfrom(subfile_path2) == 'bar' ub.delete(dir_path1, verbose=2) assert not exists(dir_path1) assert not exists(dir_path2)
def test_grabdata(): # xdoctest: +REQUIRES(--network) import ubelt as ub # fname = 'foo.bar' # url = 'http://i.imgur.com/rqwaDag.png' # prefix1 = '944389a39dfb8fa9' fname = 'foo2.bar' url = _demo_url(128 * 11) prefix1 = 'b7fa848cd088ae842a89ef' fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1) stamp_fpath = fpath + '.sha512.hash' assert ub.readfrom(stamp_fpath) == prefix1 # Check that the download doesn't happen again fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1) # todo: check file timestamps have not changed # # Check redo works with hash fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1, redo=True) # todo: check file timestamps have changed # # Check that a redownload occurs when the stamp is changed with open(stamp_fpath, 'w') as file: file.write('corrupt-stamp') fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1) assert ub.readfrom(stamp_fpath) == prefix1 # # Check that a redownload occurs when the stamp is removed ub.delete(stamp_fpath) with open(fpath, 'w') as file: file.write('corrupt-data') assert not ub.hash_file(fpath, base='hex', hasher='sha512').startswith(prefix1) fpath = ub.grabdata(url, fname=fname, hash_prefix=prefix1) assert ub.hash_file(fpath, base='hex', hasher='sha512').startswith(prefix1) # # Check that requesting new data causes redownload #url2 = 'https://data.kitware.com/api/v1/item/5b4039308d777f2e6225994c/download' #prefix2 = 'c98a46cb31205cf' # hack SSL # url2 = 'http://i.imgur.com/rqwaDag.png' # prefix2 = '944389a39dfb8fa9' url2, prefix2 = url, prefix1 fpath = ub.grabdata(url2, fname=fname, hash_prefix=prefix2) assert ub.readfrom(stamp_fpath) == prefix2
def test_readwrite(): import ubelt as ub dpath = ub.ensure_app_cache_dir('ubelt') fpath = dpath + '/' + 'testwrite.txt' if exists(fpath): os.remove(fpath) to_write = 'utf-8 symbols Δ, Й, ק, م, ๗, あ, 叶, 葉, and 말.' ub.writeto(fpath, to_write, verbose=True) read_ = ub.readfrom(fpath, verbose=True) assert read_ == to_write
def test_modify_file_symlinks(): """ CommandLine: python -m ubelt.tests.test_links test_modify_symlinks """ # TODO: test that we handle broken links dpath = ub.ensure_app_cache_dir('ubelt', 'test_modify_symlinks') happy_fpath = join(dpath, 'happy_fpath.txt') happy_flink = join(dpath, 'happy_flink.txt') ub.touch(happy_fpath, verbose=2) ub.symlink(happy_fpath, happy_flink, verbose=2) # Test file symlink ub.writeto(happy_fpath, 'foo') assert ub.readfrom(happy_fpath) == 'foo' assert ub.readfrom(happy_flink) == 'foo' ub.writeto(happy_flink, 'bar') assert ub.readfrom(happy_fpath) == 'bar' assert ub.readfrom(happy_flink) == 'bar'
def research(r, start_line_str=None, rate='3', sentence_mode=True, open_file=False): fname = join(split(__file__)[0], 'to_speak.txt') if start_line_str == "prep": os.system(fname) return if open_file is True: os.system(fname) import ubelt as ub input_str = preprocess_research(ub.readfrom(fname)) if sentence_mode: input_str = input_str.replace('\n', ' ').replace('. ', '.\n') input_str = re.sub(' *', ' ', input_str) line_count = 0 page = 0 page_re = re.compile(' *--- Page [0-9]* *--- *') if start_line_str is None: try: start_page = 0 start_line = int(input('Did you forget the start line?')) except Exception: pass elif start_line_str.find('page') != -1: start_page = int(start_line_str.replace('page', '')) start_line = 0 else: start_page = 0 start_line = int(start_line_str) print('Starting on line: %d' % (start_line)) print('Starting on page: %d' % (start_page)) for line in input_str.split('\n'): print('____') # Check for page marker if page_re.findall(line) != []: page = int(re.sub(' *--- Page ', '', line).replace('---', '')) # Print out what is being read line_count += 1 print('%d, %d > %s' % (page, line_count, line)) if start_line > line_count or start_page > page: continue # Preprocess the line line = process_research_line(line) if line == '': continue print('--') robos.speak(r, line, rate)
def _read_kresimir_results(): # Load downloaded matlab csv results mat = scipy.io.loadmat( expanduser( '~/data/camtrawl_stereo_sample_data/Haul_83/Haul_083_qcresult.mat') ) header = ub.readfrom( expanduser( '~/data/camtrawl_stereo_sample_data/Haul_83/mat_file_header.csv') ).strip().split(',') data = mat['lengthsqc'] mat_df = pd.DataFrame(data, columns=header) mat_df['current_frame'] = mat_df['current_frame'].astype(np.int) mat_df['Species'] = mat_df['Species'].astype(np.int) mat_df['QC'] = mat_df['QC'].astype(np.int) # Transform so each row corresponds to one set of (x, y) points per detection bbox_cols1 = [ 'LX1', 'LX2', 'LX3', 'LX4', 'LY1', 'LY2', 'LY3', 'LY4', 'Lar', 'LboxL', 'WboxL', 'aveL' ] bbox_pts1 = mat_df[bbox_cols1[0:8]] # NOQA bbox_pts1_ = bbox_pts1.values bbox_pts1_ = bbox_pts1_.reshape(len(bbox_pts1_), 2, 4).transpose((0, 2, 1)) bbox_cols2 = [ 'RX1', 'RX2', 'RX3', 'RX4', 'RY1', 'RY2', 'RY3', 'RY4', 'Rar', 'LboxR', 'WboxR', 'aveW' ] bbox_pts2 = mat_df[bbox_cols2] # NOQA bbox_pts2 = mat_df[bbox_cols2[0:8]] # NOQA bbox_pts2_ = bbox_pts2.values bbox_pts2_ = bbox_pts2_.reshape(len(bbox_pts2_), 2, 4).transpose((0, 2, 1)) # Convert matlab bboxes into python-style bboxes mat_df['obox1'] = [ ctalgo.OrientedBBox(*cv2.minAreaRect(pts[:, None, :].astype(np.int))) for pts in bbox_pts1_ ] mat_df['obox2'] = [ ctalgo.OrientedBBox(*cv2.minAreaRect(pts[:, None, :].astype(np.int))) for pts in bbox_pts2_ ] mat_df.drop(bbox_cols2, axis=1, inplace=True) mat_df.drop(bbox_cols1, axis=1, inplace=True) return mat_df
def generate(): content = ub.readfrom('base_diff.py') + '\n\n' xdoc_version = content + ub.codeblock(''' if __name__ == '__main__': import xdoctest xdoctest.doctest_module(__file__) ''') + '\n' doc_version = content + ub.codeblock(''' if __name__ == '__main__': import doctest doctest.testmod() ''') + '\n' ub.writeto('_doc_version.py', doc_version) ub.writeto('_xdoc_version.py', xdoc_version)
def evaulate_all(harn): """ Performs testing on trained snapshots. """ harn.prepare_test_model(force=False) test_weight_dpaths = harn.find_test_weights_dpaths() for test_weights_dpath in test_weight_dpaths: harn.test_weights_dpath = test_weights_dpath harn.test_dump_dpath = test_weights_dpath link_fpath = join(test_weights_dpath, 'test_weights.caffemodel.lnk') harn.test_weights_fpath = ub.readfrom(link_fpath) # if not exists(join(harn.test_weights_dpath, 'pred')): if not exists(join(harn.test_dump_dpath, 'results.json')): print( 'Need to evaluate: harn.test_weights_fpath = {!r}'.format( harn.test_weights_fpath)) harn.evaluate()
def modify_conf(): """ pip install redbaron """ import redbaron import ubelt as ub conf_path = 'docs/conf.py' source = ub.readfrom(conf_path) red = redbaron.RedBaron(source) # Insert custom extensions extra_extensions = ['"sphinxcontrib.napoleon"'] ext_node = red.find('name', value='extensions').parent ext_node.value.value.extend(extra_extensions) # Overwrite theme to read-the-docs theme_node = red.find('name', value='html_theme').parent theme_node.value.value = '"sphinx_rtd_theme"' ub.writeto(conf_path, red.dumps())
def _read_snapshot_results(self): # Parse the output of the test dir results_fpaths = glob.glob( join(self.test_dir, 'weights_*/results.json')) result_infos = [] for fpath in results_fpaths: dname = basename(dirname(fpath)) parts = dname.split('_') # Weird parsing because arch may contain an underscore. item = {} item['train_input_id'] = parts[1] item['init_id'], item['hyper_id'], item['n_iters'] = parts[-3:] item['arch_id'] = '_'.join(parts[2:-3]) item['n_iters'] = int(item['n_iters']) item['fpath'] = fpath result_infos.append(item) snap_paths_df = pd.DataFrame(result_infos) # Group items by their overall train id train_id_keys = ('arch_id', 'hyper_id', 'init_id', 'train_input_id') groups = list(snap_paths_df.groupby(train_id_keys)) assert len(groups) == 1, 'TODO, support train_id comparisons' for train_id, group in groups: # Short by number of iterations group = group.sort_values('n_iters') # Read the results data into a dictionary group_datas = [] for n_iters, fpath in zip(group.n_iters, group.fpath): item = json.loads(ub.readfrom(fpath)) item['n_iters'] = n_iters item['fpath'] = fpath group_datas.append(item) yield train_id, group_datas
new_lines.append(line) elif state == mode: new_lines.append(line) # elif state == 'THEIRS': # pass # elif state == 'ANCESTORS': # pass # elif state == 'OURS': # new_lines.append(line) return ''.join(new_lines[::-1]) if __name__ == '__main__': r""" CommandLine: export PYTHONPATH=$PYTHONPATH:/home/joncrall/misc python ~/misc/fix_merge_3.py """ import sys argv = sys.argv[1:] import ubelt as ub fpaths = ub.cmd('git diff --name-only --diff-filter=U')['out'].splitlines() print('fpaths = {!r}'.format(fpaths)) for fpath in fpaths: text = ub.readfrom(fpath) text = resolve_diff3_conflict(text) # print(text) ub.writeto(fpath, text)
def _autojit_cython(pyx_fpath, verbose=1, recompile=False, annotate=False): """ This idea is that given a pyx file, we try to compile it. We write a stamp file so subsequent calls should be very fast as long as the source pyx has not changed. Parameters ---------- pyx_fpath : str path to the pyx file verbose : int higher is more verbose. """ import shutil if verbose > 3: print('_autojit_cython') # TODO: move necessary ubelt utilities to nx.utils? # Separate this into its own util? if shutil.which("cythonize"): pyx_dpath = dirname(pyx_fpath) if verbose > 3: print('pyx_dpath = {!r}'.format(pyx_dpath)) # Check if the compiled library exists pyx_base = splitext(basename(pyx_fpath))[0] SO_EXTS = _platform_pylib_exts() so_fname = False for fname in os.listdir(pyx_dpath): if fname.startswith(pyx_base) and fname.endswith(SO_EXTS): so_fname = fname break if verbose > 3: print('so_fname = {!r}'.format(so_fname)) try: # Currently this functionality depends on ubelt. # We could replace ub.cmd with subprocess.check_call and ub.augpath # with os.path operations, but hash_file and CacheStamp are harder # to replace. We can use "liberator" to statically extract these # and add them to nx.utils though. import ubelt as ub except Exception: if verbose > 3: print('return false, no ubelt') return False else: if so_fname is False: # We can compute what the so_fname will be if it doesnt exist so_fname = pyx_base + SO_EXTS[0] so_fpath = join(pyx_dpath, so_fname) content = ub.readfrom(pyx_fpath) mtime = os.stat(pyx_fpath).st_mtime depends = [ub.hash_data(content, hasher="sha1"), mtime] stamp_fname = ub.augpath(so_fname, ext=".jit.stamp") stamp = ub.CacheStamp( stamp_fname, dpath=pyx_dpath, product=so_fpath, depends=depends, verbose=verbose, ) if verbose > 3: print('stamp = {!r}'.format(stamp)) if recompile or stamp.expired(): # Heuristic to try and grab the numpy include dir or not cythonize_args = ['cythonize'] cythonize_env = os.environ.copy() needs_numpy = 'numpy' in content if needs_numpy: import numpy as np import pathlib numpy_include_dpath = pathlib.Path(np.get_include()) numpy_dpath = (numpy_include_dpath / '../..').resolve() # cythonize_env['CPATH'] = numpy_include_dpath + ':' + cythonize_env.get('CPATH', '') cythonize_env['CFLAGS'] = ' '.join([ '-I{}'.format(numpy_include_dpath), ]) + cythonize_env.get('CFLAGS', '') cythonize_env['LDFLAGS'] = ' '.join([ '-L{} -lnpyrandom'.format(numpy_dpath / 'random/lib'), '-L{} -lnpymath'.format(numpy_dpath / 'core/lib'), ]) + cythonize_env.get('LDFLAGS', '') if annotate: cythonize_args.append('-a') cythonize_args.append('-i {}'.format(pyx_fpath)) cythonize_cmd = ' '.join(cythonize_args) if needs_numpy: print('CFLAGS="{}" '.format(cythonize_env['CFLAGS']) + 'LDFLAGS="{}" '.format(cythonize_env['LDFLAGS']) + cythonize_cmd) ub.cmd(cythonize_cmd, verbose=verbose, check=True, env=cythonize_env) stamp.renew() return True else: if verbose > 2: print('Cythonize not found!')
def convert_file_docstrings(path_to_convert, dry=True): """ path_to_convert = ub.expandpath('~/code/networkx/networkx/algorithms/isomorphism/_embeddinghelpers/balanced_sequence.py') """ import ubelt as ub from xdoctest.core import package_calldefs pkg_calldefs = list(package_calldefs(path_to_convert)) def recnone(val, default): return default if val is None else val for calldefs, modpath in pkg_calldefs: to_insert = [] old_text = ub.readfrom(modpath) old_lines = old_text.split('\n') sortnames = ub.argsort(calldefs, key=lambda node: recnone(node.doclineno, -1)) for name in sortnames: node = calldefs[name] if node.docstr is not None: google_docstr = node.docstr numpy_docstr = google_to_numpy_docstr(google_docstr) body_lines = numpy_docstr.split('\n') start = node.doclineno stop = node.doclineno_end to_insert.append((start, stop, body_lines)) to_insert = sorted(to_insert)[::-1] new_lines = old_lines.copy() for start, stop, body_lines in to_insert: old_middle = old_lines[start - 1:stop] print('old_middle = {}'.format(ub.repr2(old_middle, nl=1))) print('start = {!r}'.format(start)) startline = new_lines[start - 1] print('startline = {!r}'.format(startline)) ssline = startline.strip(' ') sq = ssline[0] tq = sq * 3 n_indent = len(startline) - len(ssline) indent = ' ' * n_indent print('n_indent = {!r}'.format(n_indent)) body_lines = [indent + line for line in body_lines] body_lines = [indent + tq] + body_lines + [indent + tq] prefix = new_lines[:start - 1] suffix = new_lines[stop:] mid = body_lines new_lines = prefix + mid + suffix new_text = '\n'.join(new_lines) # print(new_text) if dry: import xdev print( xdev.misc.difftext(old_text, new_text, context_lines=10, colored=True)) print('^^^ modpath = {!r}'.format(modpath)) else: ub.writeto(modpath, new_text, verbose=3)
def grab_tiny_imagenet_as_coco(): import ubelt as ub url = 'http://cs231n.stanford.edu/tiny-imagenet-200.zip' dpath = ub.ensure_app_cache_dir('netharn', 'tiny-imagenet-200') dset_root = join(dpath, 'tiny-imagenet-200') zip_fpath = ub.grabdata(url, dpath=dpath) if not exists(dset_root): import zipfile zip_ref = zipfile.ZipFile(zip_fpath, 'r') zip_ref.extractall(dpath) zip_ref.close() tiny_imgnet_info = { 'train': join(dset_root, 'train'), 'test': join(dset_root, 'test'), 'vali': join(dset_root, 'val'), 'wnids': join(dset_root, 'wnids.txt'), 'words': join(dset_root, 'words.txt'), } import glob train_annots = list(glob.glob(join(tiny_imgnet_info['train'], '*/*boxes.txt'))) vali_annots = list(glob.glob(join(tiny_imgnet_info['vali'], 'val_annotations.txt'))) import ndsampler img_root = { 'train': join(tiny_imgnet_info['train']), 'vali': join(tiny_imgnet_info['vali'], 'images'), 'test': join(tiny_imgnet_info['test'], 'images'), } gpaths = { 'train': list(glob.glob(join(tiny_imgnet_info['train'], '*/images/*.JPEG'))), 'vali': list(glob.glob(join(tiny_imgnet_info['vali'], 'images/*.JPEG'))), 'test': list(glob.glob(join(tiny_imgnet_info['test'], 'images/*.JPEG'))) } annots_text = { 'train': ''.join(ub.readfrom(fpath) for fpath in train_annots), 'vali': ''.join(ub.readfrom(fpath) for fpath in vali_annots), } coco_datasets = { 'train': ndsampler.CocoDataset(tag='tiny-imagenet-train'), 'vali': ndsampler.CocoDataset(tag='tiny-imagenet-vali'), } for catname in (_ for _ in ub.readfrom(tiny_imgnet_info['wnids']).split('\n') if _): for dset in coco_datasets.values(): dset.add_category(name=catname) for tag in ['train', 'vali']: gpaths_ = gpaths[tag] annots_ = annots_text[tag] dset = coco_datasets[tag] dset.img_root = img_root[tag] for gpath in gpaths_: dset.add_image(file_name=gpath) for line in (_ for _ in annots_.split('\n') if _): parts = line.split('\t') if tag == 'train': gname = parts[0] catname = gname.split('_')[0] x, y, w, h = list(map(float, parts[1:])) gpath = join(img_root[tag], catname, 'images', gname) else: gname, catname = parts[0:2] x, y, w, h = list(map(float, parts[2:])) gpath = join(img_root[tag], gname) bbox = (x, y, w + 1, h + 1) cat = dset.name_to_cat[catname] img = dset.index.file_name_to_img[gpath] dset.add_annotation(image_id=img['id'], bbox=bbox, category_id=cat['id']) dset._ensure_imgsize() dset._build_hashid() print('dset.hashid = {!r}'.format(dset.hashid)) return coco_datasets
def convert_camvid_raw_to_coco(camvid_raw_info): """ Converts the raw camvid format to an MSCOCO based format, ( which lets use use kwcoco's COCO backend). Example: >>> # xdoctest: +REQUIRES(--download) >>> camvid_raw_info = grab_raw_camvid() >>> # test with a reduced set of data >>> del camvid_raw_info['img_paths'][2:] >>> del camvid_raw_info['mask_paths'][2:] >>> dset = convert_camvid_raw_to_coco(camvid_raw_info) >>> # xdoctest: +REQUIRES(--show) >>> import kwplot >>> plt = kwplot.autoplt() >>> kwplot.figure(fnum=1, pnum=(1, 2, 1)) >>> dset.show_image(gid=1) >>> kwplot.figure(fnum=1, pnum=(1, 2, 2)) >>> dset.show_image(gid=2) """ import re import kwimage import kwcoco print('Converting CamVid to MS-COCO format') dset_root, img_paths, label_path, mask_paths = ub.take( camvid_raw_info, 'dset_root, img_paths, label_path, mask_paths'.split(', ')) img_infos = { 'img_fname': img_paths, 'mask_fname': mask_paths, } keys = list(img_infos.keys()) next_vals = list(zip(*img_infos.values())) image_items = [{k: v for k, v in zip(keys, vals)} for vals in next_vals] dataset = { 'img_root': dset_root, 'images': [], 'categories': [], 'annotations': [], } lines = ub.readfrom(label_path).split('\n') lines = [line for line in lines if line] for line in lines: color_text, name = re.split('\t+', line) r, g, b = map(int, color_text.split(' ')) color = (r, g, b) # Parse the special camvid format cid = (r << 16) + (g << 8) + (b << 0) cat = { 'id': cid, 'name': name, 'color': color, } dataset['categories'].append(cat) for gid, img_item in enumerate(image_items, start=1): img = { 'id': gid, 'file_name': img_item['img_fname'], # nonstandard image field 'segmentation': img_item['mask_fname'], } dataset['images'].append(img) dset = kwcoco.CocoDataset(dataset) dset.rename_categories({'Void': 'background'}) assert dset.name_to_cat['background']['id'] == 0 dset.name_to_cat['background'].setdefault('alias', []).append('Void') if False: _define_camvid_class_hierarcy(dset) if 1: # TODO: Binarize CCs (and efficiently encode if possible) import numpy as np bad_info = [] once = False # Add images dset.remove_annotations(list(dset.index.anns.keys())) for gid, img in ub.ProgIter(dset.imgs.items(), desc='parse label masks'): mask_fpath = join(dset_root, img['segmentation']) rgb_mask = kwimage.imread(mask_fpath, space='rgb') r, g, b = rgb_mask.T.astype(np.int64) cid_mask = np.ascontiguousarray(rgb_to_cid(r, g, b).T) cids = set(np.unique(cid_mask)) - {0} for cid in cids: if cid not in dset.cats: if gid == 618: # Handle a known issue with image 618 c_mask = (cid == cid_mask).astype(np.uint8) total_bad = c_mask.sum() if total_bad < 32: if not once: print( 'gid 618 has a few known bad pixels, ignoring them' ) once = True continue else: raise Exception('more bad pixels than expected') else: raise Exception( 'UNKNOWN cid = {!r} in gid={!r}'.format(cid, gid)) # bad_rgb = cid_to_rgb(cid) # print('bad_rgb = {!r}'.format(bad_rgb)) # print('WARNING UNKNOWN cid = {!r} in gid={!r}'.format(cid, gid)) # bad_info.append({ # 'gid': gid, # 'cid': cid, # }) else: ann = { 'category_id': cid, 'image_id': gid # 'segmentation': mask.to_coco() } assert cid in dset.cats c_mask = (cid == cid_mask).astype(np.uint8) mask = kwimage.Mask(c_mask, 'c_mask') box = kwimage.Boxes([mask.get_xywh()], 'xywh') # box = mask.to_boxes() ann['bbox'] = ub.peek(box.to_coco()) ann['segmentation'] = mask.to_coco() dset.add_annotation(**ann) if 0: bad_cids = [i['cid'] for i in bad_info] print(sorted([c['color'] for c in dataset['categories']])) print(sorted(set([cid_to_rgb(i['cid']) for i in bad_info]))) gid = 618 img = dset.imgs[gid] mask_fpath = join(dset_root, img['segmentation']) rgb_mask = kwimage.imread(mask_fpath, space='rgb') r, g, b = rgb_mask.T.astype(np.int64) cid_mask = np.ascontiguousarray(rgb_to_cid(r, g, b).T) cid_hist = ub.dict_hist(cid_mask.ravel()) bad_cid_hist = {} for cid in bad_cids: bad_cid_hist[cid] = cid_hist.pop(cid) import kwplot kwplot.autompl() kwplot.imshow(rgb_mask) if 0: import kwplot plt = kwplot.autoplt() plt.clf() dset.show_image(1) import xdev gid_list = list(dset.imgs) for gid in xdev.InteractiveIter(gid_list): dset.show_image(gid) xdev.InteractiveIter.draw() dset._build_index() dset._build_hashid() return dset
def count_usage(cmdline=True): config = UsageConfig(cmdline=cmdline) import ubelt as ub import glob from os.path import join names = [ 'netharn', 'ndsampler', 'kwimage', 'kwplot', ] + config['extra_modnames'] all_fpaths = [] for name in names: if name: repo_fpath = ub.expandpath(join('~/code', name)) fpaths = glob.glob(join(repo_fpath, '**', '*.py'), recursive=True) for fpath in fpaths: all_fpaths.append((name, fpath)) print('names = {}'.format(ub.repr2(names))) import re import ubelt as ub modname = 'kwarray' module = ub.import_module_from_name(modname) package_name = module.__name__ package_allvar = module.__all__ pat = re.compile(r'\b' + package_name + r'\.(?P<attr>[a-zA-Z_][A-Za-z_0-9]*)\b') pkg_to_hist = ub.ddict(lambda: ub.ddict(int)) for name, fpath in ub.ProgIter(all_fpaths): # print('fpath = {!r}'.format(fpath)) text = ub.readfrom(fpath, verbose=0) # text = open(fpath, 'r').read() for match in pat.finditer(text): attr = match.groupdict()['attr'] if attr in package_allvar: pkg_to_hist[name][attr] += 1 hist_iter = iter(pkg_to_hist.values()) usage = next(hist_iter).copy() for other in hist_iter: for k, v in other.items(): usage[k] += v for attr in package_allvar: usage[attr] += 0 for name in pkg_to_hist.keys(): pkg_to_hist[name] = ub.odict( sorted(pkg_to_hist[name].items(), key=lambda t: t[1])[::-1]) usage = ub.odict(sorted(usage.items(), key=lambda t: t[1])[::-1]) if config['print_packages']: print(ub.repr2(pkg_to_hist, nl=2)) if config['remove_zeros']: for k, v in list(usage.items()): if v == 0: usage.pop(k) # if config['hardcoded_ubelt_hack']: # for k in list(usage): # if k.startswith('util_'): # usage.pop(k) # if k.startswith('_util_'): # usage.pop(k) # # ub._util_deprecated # from ubelt import _util_deprecated # if k in dir(_util_deprecated): # usage.pop(k) print(ub.repr2(usage, nl=1)) return usage
def readlines(fpath): return ub.readfrom(fpath, aslines=True)