def update_registry(drive): print('Updating registered files in %r' % (drive,)) # Update existing files fpath_exists_list = list(map(exists, ut.ProgIter(drive.fpath_list, 'checkexist fpath', freq=1000))) dpath_exists_list = list(map(exists, ut.ProgIter(drive.dpath_list, 'checkexist dpath', freq=1000))) if all(fpath_exists_list): print('No change in file structure') else: print('%d/%d files no longer exist' % ( len(drive.fpath_list) - sum(fpath_exists_list), len(drive.fpath_list))) removed_fpaths = ut.compress(drive.fpath_list, ut.not_list(fpath_exists_list)) print('removed_fpaths = %s' % (ut.list_str(removed_fpaths),)) if all(dpath_exists_list): print('No change in dpath structure') else: print('%d/%d dirs no longer exist' % ( len(drive.dpath_list) - sum(dpath_exists_list), len(drive.dpath_list))) removed_dpaths = ut.compress( drive.dpath_list, ut.not_list(dpath_exists_list)) print('removed_dpaths = %s' % (ut.list_str(removed_dpaths),)) drive.fpath_list = ut.compress(drive.fpath_list, fpath_exists_list) drive.dpath_list = ut.compress(drive.dpath_list, dpath_exists_list) drive.cache.save('fpath_list', drive.fpath_list) drive.cache.save('dpath_list', drive.dpath_list)
def update_registry(drive): print('Updating registered files in %r' % (drive, )) # Update existing files fpath_exists_list = list( map(exists, ut.ProgIter(drive.fpath_list, 'checkexist fpath', freq=1000))) dpath_exists_list = list( map(exists, ut.ProgIter(drive.dpath_list, 'checkexist dpath', freq=1000))) if all(fpath_exists_list): print('No change in file structure') else: print('%d/%d files no longer exist' % (len(drive.fpath_list) - sum(fpath_exists_list), len(drive.fpath_list))) removed_fpaths = ut.compress(drive.fpath_list, ut.not_list(fpath_exists_list)) print('removed_fpaths = %s' % (ut.repr2(removed_fpaths), )) if all(dpath_exists_list): print('No change in dpath structure') else: print('%d/%d dirs no longer exist' % (len(drive.dpath_list) - sum(dpath_exists_list), len(drive.dpath_list))) removed_dpaths = ut.compress(drive.dpath_list, ut.not_list(dpath_exists_list)) print('removed_dpaths = %s' % (ut.repr2(removed_dpaths), )) drive.fpath_list = ut.compress(drive.fpath_list, fpath_exists_list) drive.dpath_list = ut.compress(drive.dpath_list, dpath_exists_list) drive.cache.save('fpath_list', drive.fpath_list) drive.cache.save('dpath_list', drive.dpath_list)
def get_cards_in_hand(player, valid_types=None, invert=False): card_list = player.hand if valid_types is None: valid_cards = card_list else: flags = [ut.is_superset(c.types, valid_types) for c in card_list] if invert: flags = ut.not_list(flags) valid_cards = ut.compress(card_list, flags) return valid_cards
def hack_remove_pystuff(self): import utool as ut # Hack of a method new_lines = [] for lines in self.found_lines_list: # remove comment results flags = [not line.strip().startswith('# ') for line in lines] lines = ut.compress(lines, flags) # remove doctest results flags = [not line.strip().startswith('>>> ') for line in lines] lines = ut.compress(lines, flags) # remove cmdline tests import re flags = [ not re.search('--test-' + self.extended_regex_list[0], line) for line in lines ] lines = ut.compress(lines, flags) flags = [ not re.search('--exec-' + self.extended_regex_list[0], line) for line in lines ] lines = ut.compress(lines, flags) flags = [ not re.search( '--exec-[a-zA-z]*\.' + self.extended_regex_list[0], line) for line in lines ] lines = ut.compress(lines, flags) flags = [ not re.search( '--test-[a-zA-z]*\.' + self.extended_regex_list[0], line) for line in lines ] lines = ut.compress(lines, flags) # remove func defs flags = [ not re.search('def ' + self.extended_regex_list[0], line) for line in lines ] lines = ut.compress(lines, flags) new_lines += [lines] self.found_lines_list = new_lines # compress self flags = [len(lines_) > 0 for lines_ in self.found_lines_list] idxs = ut.list_where(ut.not_list(flags)) del self[idxs]
def download_image_urls(image_url_info_list): # Find ones that we already have print('Requested %d downloaded images' % (len(image_url_info_list))) full_gpath_list = [join(image_dir, basename(gpath)) for gpath in image_url_info_list] exists_list = [ut.checkpath(gpath) for gpath in full_gpath_list] image_url_info_list_ = ut.compress(image_url_info_list, ut.not_list(exists_list)) print('Already have %d/%d downloaded images' % ( len(image_url_info_list) - len(image_url_info_list_), len(image_url_info_list))) print('Need to download %d images' % (len(image_url_info_list_))) #import sys #sys.exit(0) # Download the rest imgurl_prefix = 'https://snapshotserengeti.s3.msi.umn.edu/' image_url_list = [imgurl_prefix + suffix for suffix in image_url_info_list_] for img_url in ut.ProgressIter(image_url_list, lbl='Downloading image'): ut.grab_file_url(img_url, download_dir=image_dir) return full_gpath_list
def hack_remove_pystuff(self): import utool as ut # Hack of a method new_lines = [] for lines in self.found_lines_list: # remove comment results flags = [not line.strip().startswith('# ') for line in lines] lines = ut.compress(lines, flags) # remove doctest results flags = [not line.strip().startswith('>>> ') for line in lines] lines = ut.compress(lines, flags) # remove cmdline tests import re flags = [not re.search('--test-' + self.extended_regex_list[0], line) for line in lines] lines = ut.compress(lines, flags) flags = [not re.search('--exec-' + self.extended_regex_list[0], line) for line in lines] lines = ut.compress(lines, flags) flags = [not re.search('--exec-[a-zA-z]*\.' + self.extended_regex_list[0], line) for line in lines] lines = ut.compress(lines, flags) flags = [not re.search('--test-[a-zA-z]*\.' + self.extended_regex_list[0], line) for line in lines] lines = ut.compress(lines, flags) # remove func defs flags = [not re.search('def ' + self.extended_regex_list[0], line) for line in lines] lines = ut.compress(lines, flags) new_lines += [lines] self.found_lines_list = new_lines # compress self flags = [len(lines_) > 0 for lines_ in self.found_lines_list] idxs = ut.list_where(ut.not_list(flags)) del self[idxs]
def convert_hsdb_to_ibeis(hsdir, dbdir=None, **kwargs): r""" Args hsdir (str): Directory to folder *containing* _hsdb dbdir (str): Output directory (defaults to same as hsdb) CommandLine: python -m ibeis convert_hsdb_to_ibeis --dbdir ~/work/Frogs python -m ibeis convert_hsdb_to_ibeis --hsdir "/raid/raw/RotanTurtles/Roatan HotSpotter Nov_21_2016" Ignore: from ibeis.dbio.ingest_hsdb import * # NOQA hsdir = "/raid/raw/RotanTurtles/Roatan HotSpotter Nov_21_2016" dbdir = "~/work/RotanTurtles" Example: >>> # SCRIPT >>> from ibeis.dbio.ingest_hsdb import * # NOQA >>> dbdir = ut.get_argval('--dbdir', type_=str, default=None) >>> hsdir = ut.get_argval('--hsdir', type_=str, default=dbdir) >>> result = convert_hsdb_to_ibeis(hsdir) >>> print(result) """ from ibeis.control import IBEISControl import utool as ut if dbdir is None: dbdir = hsdir print('[ingest] Ingesting hsdb: %r -> %r' % (hsdir, dbdir)) assert is_hsdb( hsdir ), 'not a hotspotter database. cannot even force convert: hsdir=%r' % ( hsdir, ) assert not is_succesful_convert(dbdir), 'hsdir=%r is already converted' % ( hsdir, ) #print('FORCE DELETE: %r' % (hsdir,)) #ibsfuncs.delete_ibeis_database(hsdir) imgdir = join(hsdir, 'images') internal_dir = get_hsinternal(hsdir) nametbl_fpath = join(internal_dir, 'name_table.csv') imgtbl_fpath = join(internal_dir, 'image_table.csv') chiptbl_fpath = join(internal_dir, 'chip_table.csv') # READ NAME TABLE name_text_list = ['____'] name_hs_nid_list = [0] with open(nametbl_fpath, 'r') as nametbl_file: name_reader = csv.reader(nametbl_file) for ix, row in enumerate(name_reader): #if ix >= 3: if len(row) == 0 or row[0].strip().startswith('#'): continue else: hs_nid = int(row[0]) name = row[1].strip() name_text_list.append(name) name_hs_nid_list.append(hs_nid) # READ IMAGE TABLE iamge_hs_gid_list = [] image_gname_list = [] image_reviewed_list = [] with open(imgtbl_fpath, 'r') as imgtb_file: image_reader = csv.reader(imgtb_file) for ix, row in enumerate(image_reader): if len(row) == 0 or row[0].strip().startswith('#'): continue else: hs_gid = int(row[0]) gname_ = row[1].strip() # aif in hotspotter is equivilant to reviewed in IBEIS reviewed = bool(row[2]) iamge_hs_gid_list.append(hs_gid) image_gname_list.append(gname_) image_reviewed_list.append(reviewed) image_gpath_list = [join(imgdir, gname) for gname in image_gname_list] ut.debug_duplicate_items(image_gpath_list) #print(image_gpath_list) image_exist_flags = list(map(exists, image_gpath_list)) missing_images = [] for image_gpath, flag in zip(image_gpath_list, image_exist_flags): if not flag: missing_images.append(image_gpath) print('Image does not exist: %s' % image_gpath) if not all(image_exist_flags): print('Only %d / %d image exist' % (sum(image_exist_flags), len(image_exist_flags))) SEARCH_FOR_IMAGES = False if SEARCH_FOR_IMAGES: # Hack to try and find the missing images from os.path import basename subfiles = ut.glob(hsdir, '*', recursive=True, fullpath=True, with_files=True) basename_to_existing = ut.group_items(subfiles, ut.lmap(basename, subfiles)) can_copy_list = [] for gpath in missing_images: gname = basename(gpath) if gname not in basename_to_existing: print('gname = %r' % (gname, )) pass else: existing = basename_to_existing[gname] can_choose = True if len(existing) > 1: if not ut.allsame(ut.lmap(ut.get_file_uuid, existing)): can_choose = False if can_choose: found = existing[0] can_copy_list.append((found, gpath)) else: print(existing) src, dst = ut.listT(can_copy_list) ut.copy_list(src, dst) # READ CHIP TABLE chip_bbox_list = [] chip_theta_list = [] chip_hs_nid_list = [] chip_hs_gid_list = [] chip_note_list = [] with open(chiptbl_fpath, 'r') as chiptbl_file: chip_reader = csv.reader(chiptbl_file) for ix, row in enumerate(chip_reader): if len(row) == 0 or row[0].strip().startswith('#'): continue else: hs_gid = int(row[1]) hs_nid = int(row[2]) bbox_text = row[3] theta = float(row[4]) notes = '<COMMA>'.join([item.strip() for item in row[5:]]) bbox_text = bbox_text.replace('[', '').replace(']', '').strip() bbox_text = re.sub(' *', ' ', bbox_text) bbox_strlist = bbox_text.split(' ') bbox = tuple(map(int, bbox_strlist)) #bbox = [int(item) for item in bbox_strlist] chip_hs_nid_list.append(hs_nid) chip_hs_gid_list.append(hs_gid) chip_bbox_list.append(bbox) chip_theta_list.append(theta) chip_note_list.append(notes) names = ut.ColumnLists({ 'hs_nid': name_hs_nid_list, 'text': name_text_list, }) images = ut.ColumnLists({ 'hs_gid': iamge_hs_gid_list, 'gpath': image_gpath_list, 'reviewed': image_reviewed_list, 'exists': image_exist_flags, }) chips = ut.ColumnLists({ 'hs_gid': chip_hs_gid_list, 'hs_nid': chip_hs_nid_list, 'bbox': chip_bbox_list, 'theta': chip_theta_list, 'note': chip_note_list, }) IGNORE_MISSING_IMAGES = True if IGNORE_MISSING_IMAGES: # Ignore missing information print('pre') print('chips = %r' % (chips, )) print('images = %r' % (images, )) print('names = %r' % (names, )) missing_gxs = ut.where(ut.not_list(images['exists'])) missing_gids = ut.take(images['hs_gid'], missing_gxs) gid_to_cxs = ut.dzip(*chips.group_indicies('hs_gid')) missing_cxs = ut.flatten(ut.take(gid_to_cxs, missing_gids)) # Remove missing images and dependant chips images = images.remove(missing_gxs) chips = chips.remove(missing_cxs) valid_nids = set(chips['hs_nid'] + [0]) isvalid = [nid in valid_nids for nid in names['hs_nid']] names = names.compress(isvalid) print('post') print('chips = %r' % (chips, )) print('images = %r' % (images, )) print('names = %r' % (names, )) assert all(images['exists']), 'some images dont exist' # if gid is None: # print('Not adding the ix=%r-th Chip. Its image is corrupted image.' % (ix,)) # # continue # # Build mappings to new indexes # names_nid_to_nid = {names_nid: nid for (names_nid, nid) in zip(hs_nid_list, nid_list)} # names_nid_to_nid[1] = names_nid_to_nid[0] # hsdb unknknown is 0 or 1 # images_gid_to_gid = {images_gid: gid for (images_gid, gid) in zip(hs_gid_list, gid_list)} ibs = IBEISControl.request_IBEISController(dbdir=dbdir, check_hsdb=False, **kwargs) assert len(ibs.get_valid_gids()) == 0, 'target database is not empty' # Add names, images, and annotations names['ibs_nid'] = ibs.add_names(names['text']) images['ibs_gid'] = ibs.add_images( images['gpath']) # any failed gids will be None if True: # Remove corrupted images print('pre') print('chips = %r' % (chips, )) print('images = %r' % (images, )) print('names = %r' % (names, )) missing_gxs = ut.where(ut.flag_None_items(images['ibs_gid'])) missing_gids = ut.take(images['hs_gid'], missing_gxs) gid_to_cxs = ut.dzip(*chips.group_indicies('hs_gid')) missing_cxs = ut.flatten(ut.take(gid_to_cxs, missing_gids)) # Remove missing images and dependant chips chips = chips.remove(missing_cxs) images = images.remove(missing_gxs) print('post') print('chips = %r' % (chips, )) print('images = %r' % (images, )) print('names = %r' % (names, )) # Index chips using new ibs rowids ibs_gid_lookup = ut.dzip(images['hs_gid'], images['ibs_gid']) ibs_nid_lookup = ut.dzip(names['hs_nid'], names['ibs_nid']) try: chips['ibs_gid'] = ut.take(ibs_gid_lookup, chips['hs_gid']) except KeyError: chips['ibs_gid'] = [ ibs_gid_lookup.get(index, None) for index in chips['hs_gid'] ] try: chips['ibs_nid'] = ut.take(ibs_nid_lookup, chips['hs_nid']) except KeyError: chips['ibs_nid'] = [ ibs_nid_lookup.get(index, None) for index in chips['hs_nid'] ] ibs.add_annots(chips['ibs_gid'], bbox_list=chips['bbox'], theta_list=chips['theta'], nid_list=chips['ibs_nid'], notes_list=chips['note']) # aid_list = ibs.get_valid_aids() # flag_list = [True] * len(aid_list) # ibs.set_annot_exemplar_flags(aid_list, flag_list) # assert(all(ibs.get_annot_exemplar_flags(aid_list))), 'exemplars not set correctly' # Write file flagging successful conversion with open(join(ibs.get_ibsdir(), SUCCESS_FLAG_FNAME), 'w') as file_: file_.write('Successfully converted hsdir=%r' % (hsdir, )) print('finished ingest') return ibs
def parse_column_tuples(self, col_name_list, col_types_dict, col_getter_dict, col_bgrole_dict, col_ider_dict, col_setter_dict, editable_colnames, sortby, sort_reverse=True, strict=False, **kwargs): """ parses simple lists into information suitable for making guitool headers """ # Unpack the column tuples into names, getters, and types if not strict: # slopply colname definitions flag_list = [colname in col_getter_dict for colname in col_name_list] if not all(flag_list): invalid_colnames = ut.compress(col_name_list, ut.not_list(flag_list)) print('[api_item_widget] Warning: colnames=%r have no getters' % (invalid_colnames,)) col_name_list = ut.compress(col_name_list, flag_list) # sloppy type inference for colname in col_name_list: getter_ = col_getter_dict[colname] if colname not in col_types_dict: type_ = ut.get_homogenous_list_type(getter_) if type_ is not None: col_types_dict[colname] = type_ # sloppy kwargs. # FIXME: explicitly list col_nice_dict col_nice_dict = kwargs.get('col_nice_dict', {}) self.col_nice_list = [col_nice_dict.get(name, name) for name in col_name_list] self.col_name_list = col_name_list self.col_type_list = [col_types_dict.get(colname, str) for colname in col_name_list] self.col_getter_list = [col_getter_dict.get(colname, str) for colname in col_name_list] # First col is always a getter # Get number of rows / columns self.nCols = len(self.col_getter_list) self.nRows = 0 if self.nCols == 0 else len(self.col_getter_list[0]) # FIXME # Init iders to default and then overwite based on dict inputs self.col_ider_list = ut.alloc_nones(self.nCols) for colname, ider_colnames in six.iteritems(col_ider_dict): try: col = self.col_name_list.index(colname) # Col iders might have tuple input ider_cols = ut.uinput_1to1(self.col_name_list.index, ider_colnames) col_ider = ut.uinput_1to1(lambda c: partial(self.get, c), ider_cols) self.col_ider_list[col] = col_ider del col_ider del ider_cols del col del colname except Exception as ex: ut.printex(ex, keys=['colname', 'ider_colnames', 'col', 'col_ider', 'ider_cols']) raise # Init setters to data, and then overwrite based on dict inputs self.col_setter_list = list(self.col_getter_list) for colname, col_setter in six.iteritems(col_setter_dict): col = self.col_name_list.index(colname) self.col_setter_list[col] = col_setter # Init bgrole_getters to None, and then overwrite based on dict inputs self.col_bgrole_getter_list = [col_bgrole_dict.get(colname, None) for colname in self.col_name_list] # Mark edtiable columns self.col_edit_list = [name in editable_colnames for name in col_name_list] # Mark the sort column index if ut.is_str(sortby): self.col_sort_index = self.col_name_list.index(sortby) else: self.col_sort_index = sortby self.col_sort_reverse = sort_reverse
def compute_and_write_probchip(ibs, aid_list, config2_=None, lazy=True): """ Computes probability chips using pyrf CommandLine: python -m ibeis.algo.preproc.preproc_probchip --test-compute_and_write_probchip:0 --show python -m ibeis.algo.preproc.preproc_probchip --test-compute_and_write_probchip:1 python -m ibeis.algo.preproc.preproc_probchip --test-compute_and_write_probchip:2 --show --cnn Example0: >>> # ENABLE_DOCTEST >>> from ibeis.algo.preproc.preproc_probchip import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> config2_ = None >>> lazy = True >>> aid_list = ibs.get_valid_aids(species=ibeis.const.TEST_SPECIES.ZEB_PLAIN)[0:4] >>> probchip_fpath_list_ = compute_and_write_probchip(ibs, aid_list, config2_, lazy=lazy) >>> result = ut.list_str(probchip_fpath_list_) >>> print(result) >>> ut.quit_if_noshow() >>> import plottool as pt >>> iteract_obj = pt.interact_multi_image.MultiImageInteraction(probchip_fpath_list_, nPerPage=4) >>> ut.show_if_requested() Example1: >>> # SLOW_DOCTEST >>> from ibeis.algo.preproc.preproc_probchip import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('testdb1') >>> config2_ = None >>> lazy = False >>> aid_list = ibs.get_valid_aids(species=ibeis.const.TEST_SPECIES.ZEB_PLAIN) >>> probchip_fpath_list_ = compute_and_write_probchip(ibs, aid_list, config2_, lazy=lazy) >>> result = ut.list_str(probchip_fpath_list_) >>> print(result) Example2: >>> # DISABLE_DOCTEST >>> from ibeis.algo.preproc.preproc_probchip import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('testdb1') >>> config2_ = ibs.new_query_params({'featweight_detector': 'cnn'}) >>> lazy = True >>> aid_list = ibs.get_valid_aids() >>> probchip_fpath_list_ = compute_and_write_probchip(ibs, aid_list, config2_, lazy=lazy) >>> result = ut.list_str(probchip_fpath_list_) >>> print(result) >>> ut.quit_if_noshow() >>> import plottool as pt >>> iteract_obj = pt.interact_multi_image.MultiImageInteraction(probchip_fpath_list_, nPerPage=4) >>> ut.show_if_requested() Dev:: #ibs.delete_annot_chips(aid_list) #probchip_fpath_list = get_annot_probchip_fpath_list(ibs, aid_list) """ # Get probchip dest information (output path) # TODO; properly ungroup output grouped_aids, unique_species, groupxs = group_aids_by_featweight_species( ibs, aid_list, config2_) nSpecies = len(unique_species) nTasks = len(aid_list) print(('[preproc_probchip.compute_and_write_probchip] ' 'Preparing to compute %d probchips of %d species') % (nTasks, nSpecies)) cachedir = ibs.get_probchip_dir() ut.ensuredir(cachedir) grouped_probchip_fpath_list = [] if ut.VERBOSE: print('[preproc_probchip] +--------------------') for aids, species in zip(grouped_aids, unique_species): if ut.VERBOSE: print('[preproc_probchip] Computing probchips for species=%r' % species) print('[preproc_probchip] |--------------------') if len(aids) == 0: continue probchip_fpaths = get_annot_probchip_fpath_list(ibs, aids, config2_=config2_, species=species) if lazy: # Filter out probchips that are already on disk # pyrf used to do this, now we need to do it # caching should be implicit due to using the visual_annot_uuid in # the filename isdirty_list = ut.not_list(map(exists, probchip_fpaths)) dirty_aids = ut.compress(aids, isdirty_list) dirty_probchip_fpath_list = ut.compress(probchip_fpaths, isdirty_list) print(('[preproc_probchip.compute_and_write_probchip]' ' Lazy compute of to compute %d/%d of species=%s') % (len(dirty_aids), len(aids), species)) else: # No filtering dirty_aids = aids dirty_probchip_fpath_list = probchip_fpaths if len(dirty_aids) > 0: write_dirty_aids(ibs, dirty_probchip_fpath_list, dirty_aids, config2_, species) grouped_probchip_fpath_list.append(probchip_fpaths) if ut.VERBOSE: print('[preproc_probchip] Done computing probability images') print('[preproc_probchip] L_______________________') probchip_fpath_list = vt.invert_apply_grouping2( grouped_probchip_fpath_list, groupxs, dtype=object) return probchip_fpath_list
def add_rows_from_parent(table, parent_rowids, config=None, verbose=True, return_num_dirty=False): """ Lazy addition """ try: # Get requested configuration id config_rowid = table.get_config_rowid(config) # Find leaf rowids that need to be computed initial_rowid_list = table._get_rowid_from_superkey(parent_rowids, config=config) # Get corresponding "dirty" parent rowids isdirty_list = ut.flag_None_items(initial_rowid_list) dirty_parent_rowids = ut.compress(parent_rowids, isdirty_list) num_dirty = len(dirty_parent_rowids) num_total = len(parent_rowids) if num_dirty > 0: if verbose: fmtstr = 'adding %d / %d new props to %r for config_rowid=%r' print(fmtstr % (num_dirty, num_total, table.tablename, config_rowid)) args = zip(*dirty_parent_rowids) if table._asobject: # Convinience args = [table.depc.get_obj(parent, rowids) for parent, rowids in zip(table.parents, args)] # CALL EXTERNAL PREPROCESSING / GENERATION FUNCTION proptup_gen = table.preproc_func(table.depc, *args, config=config) #proptup_gen = list(proptup_gen) if len(table._nested_idxs) > 0: # TODO: rewrite nested_nCols = len(table.data_colnames) idxs1 = table._nested_idxs mask1 = ut.index_to_boolmask(idxs1, nested_nCols) mask2 = ut.not_list(mask1) idxs2 = ut.where(mask2) def unnest_data(data): unnested_cols = list(zip(ut.take(data, idxs2))) nested_cols = ut.take(data, idxs1) grouped_items = [nested_cols, unnested_cols] groupxs = [idxs1, idxs2] unflat = ut.ungroup(grouped_items, groupxs, nested_nCols - 1) return tuple(ut.flatten(unflat)) # Hack when a sql schema has tuples defined in it proptup_gen = (unnest_data(data) for data in proptup_gen) #proptup_gen = list(proptup_gen) dirty_params_iter = ( parent_rowids + (config_rowid,) + data_cols for parent_rowids, data_cols in zip(dirty_parent_rowids, proptup_gen)) #dirty_params_iter = list(dirty_params_iter) #print('dirty_params_iter = %s' % (ut.repr2(dirty_params_iter, nl=1),)) CHUNKED_ADD = table.chunksize is not None if CHUNKED_ADD: for dirty_params_chunk in ut.ichunks(dirty_params_iter, chunksize=table.chunksize): table.db._add(table.tablename, table._table_colnames, dirty_params_chunk, nInput=len(dirty_params_chunk)) else: nInput = num_dirty table.db._add(table.tablename, table._table_colnames, dirty_params_iter, nInput=nInput) # Now that the dirty params are added get the correct order of rowids rowid_list = table._get_rowid_from_superkey(parent_rowids, config=config) else: rowid_list = initial_rowid_list if return_num_dirty: return rowid_list, num_dirty else: return rowid_list except Exception as ex: ut.printex(ex, 'error in add_rowids', keys=[ 'table', 'parent_rowids', 'config', 'args', 'dirty_parent_rowids', 'table.preproc_func']) raise
def fix_empty_dirs(drive): """ # --- FIND EMPTY DIRECTORIES --- """ print('Fixing Empty Dirs in %r' % (drive,)) fidxs_list = ut.dict_take(drive.dpath_to_fidx, drive.dpath_list) isempty_flags = [len(fidxs) == 0 for fidxs in fidxs_list] empty_dpaths = ut.compress(drive.dpath_list, isempty_flags) def is_cplat_link(path_): try: if islink(path_): return True os.listdir(d) return False except SystemErrors: return True valid_flags = [not is_cplat_link(d) for d in empty_dpaths] if not all(valid_flags): print('Filtered windows links %r / %r' % ( len(empty_dpaths) - sum(valid_flags), len(empty_dpaths))) #print(ut.list_str(empty_dpaths[0:10])) empty_dpaths = ut.compress(empty_dpaths, valid_flags) print('Found %r / %r empty_dpaths' % (len(empty_dpaths), len(drive.dpath_list))) print(ut.list_str(empty_dpaths[0:10])) # Ensure actually still empty current_contents = [ut.glob(d, with_dirs=False) for d in ut.ProgIter(empty_dpaths, 'checking empty status')] current_lens = list(map(len, current_contents)) assert not any(current_lens), 'some dirs are not empty' # n ** 2 check to get only the base directories isbase_dir = [ not any([d.startswith(dpath_) and d != dpath_ for dpath_ in empty_dpaths]) for d in ut.ProgIter(empty_dpaths, 'finding base dirs') ] base_empty_dirs = ut.compress(empty_dpaths, isbase_dir) def list_only_files(dpath): # glob is too slow for root, dirs, fpaths in os.walk(dpath): for fpath in fpaths: yield fpath base_current_contents = [ list(list_only_files(d)) for d in ut.ProgIter(base_empty_dirs, 'checking emptyness', freq=10)] is_actually_empty = [len(fs) == 0 for fs in base_current_contents] not_really_empty = ut.compress(base_empty_dirs, ut.not_list(is_actually_empty)) print('%d dirs are not actually empty' % (len(not_really_empty),)) print('not_really_empty = %s' % (ut.list_str(not_really_empty[0:10]),)) truly_empty_dirs = ut.compress(base_empty_dirs, is_actually_empty) def list_all(dpath): # glob is too slow for root, dirs, fpaths in os.walk(dpath): for dir_ in dirs: yield dir_ for fpath in fpaths: yield fpath exclude_base_dirs = [join(drive.root_dpath, 'AppData')] exclude_end_dirs = ['__pycache__'] truly_empty_dirs1 = truly_empty_dirs for ed in exclude_base_dirs: truly_empty_dirs1 = [ d for d in truly_empty_dirs1 if ( not any(d.startswith(ed) for ed in exclude_base_dirs) and not any(d.endswith(ed) for ed in exclude_end_dirs) ) ] # Ensure actually still empty (with recursive checks for hidden files) print('truly_empty_dirs1[::5] = %s' % ( ut.list_str(truly_empty_dirs1[0::5], strvals=True),)) #print('truly_empty_dirs1 = %s' % (ut.list_str(truly_empty_dirs1, strvals=True),)) if not dryrun: # FIX PART #from os.path import normpath #for d in ut.ProgIter(truly_empty_dirs): # break # if ut.WIN32: # # http://www.sevenforums.com/system-security/53095-file-folder-read-only-attribute-wont-disable.html # ut.cmd('attrib', '-r', '-s', normpath(d), verbose=False) #x = ut.remove_fpaths(truly_empty_dirs, strict=False) print('Deleting %d truly_empty_dirs1' % (len(truly_empty_dirs1),)) for d in ut.ProgIter(truly_empty_dirs1, 'DELETE empty dirs', freq=1000): # NOQA ut.delete(d, quiet=True) if ut.WIN32 and False: # remove file that failed removing flags = list(map(exists, truly_empty_dirs1)) truly_empty_dirs1 = ut.compress(truly_empty_dirs1, flags) for d in ut.ProgIter(truly_empty_dirs1, 'rming', freq=1000): ut.cmd('rmdir', d)
def compute_and_write_probchip(ibs, aid_list, config2_=None, lazy=True): """ Computes probability chips using pyrf CommandLine: python -m ibeis.algo.preproc.preproc_probchip --test-compute_and_write_probchip:0 --show python -m ibeis.algo.preproc.preproc_probchip --test-compute_and_write_probchip:1 python -m ibeis.algo.preproc.preproc_probchip --test-compute_and_write_probchip:2 --show --cnn Example0: >>> # ENABLE_DOCTEST >>> from ibeis.algo.preproc.preproc_probchip import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> config2_ = None >>> lazy = True >>> aid_list = ibs.get_valid_aids(species=ibeis.const.TEST_SPECIES.ZEB_PLAIN)[0:4] >>> probchip_fpath_list_ = compute_and_write_probchip(ibs, aid_list, config2_, lazy=lazy) >>> result = ut.list_str(probchip_fpath_list_) >>> print(result) >>> ut.quit_if_noshow() >>> import plottool as pt >>> iteract_obj = pt.interact_multi_image.MultiImageInteraction(probchip_fpath_list_, nPerPage=4) >>> ut.show_if_requested() Example1: >>> # SLOW_DOCTEST >>> from ibeis.algo.preproc.preproc_probchip import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('testdb1') >>> config2_ = None >>> lazy = False >>> aid_list = ibs.get_valid_aids(species=ibeis.const.TEST_SPECIES.ZEB_PLAIN) >>> probchip_fpath_list_ = compute_and_write_probchip(ibs, aid_list, config2_, lazy=lazy) >>> result = ut.list_str(probchip_fpath_list_) >>> print(result) Example2: >>> # DISABLE_DOCTEST >>> from ibeis.algo.preproc.preproc_probchip import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('testdb1') >>> config2_ = ibs.new_query_params({'fw_detector': 'cnn'}) >>> lazy = True >>> aid_list = ibs.get_valid_aids() >>> probchip_fpath_list_ = compute_and_write_probchip(ibs, aid_list, config2_, lazy=lazy) >>> result = ut.list_str(probchip_fpath_list_) >>> print(result) >>> ut.quit_if_noshow() >>> import plottool as pt >>> iteract_obj = pt.interact_multi_image.MultiImageInteraction(probchip_fpath_list_, nPerPage=4) >>> ut.show_if_requested() Dev:: #ibs.delete_annot_chips(aid_list) #probchip_fpath_list = get_annot_probchip_fpath_list(ibs, aid_list) """ # Get probchip dest information (output path) # TODO; properly ungroup output grouped_aids, unique_species, groupxs = group_aids_by_featweight_species( ibs, aid_list, config2_) nSpecies = len(unique_species) nTasks = len(aid_list) print(('[preproc_probchip.compute_and_write_probchip] ' 'Preparing to compute %d probchips of %d species') % (nTasks, nSpecies)) cachedir = ibs.get_probchip_dir() ut.ensuredir(cachedir) grouped_probchip_fpath_list = [] if ut.VERBOSE: print('[preproc_probchip] +--------------------') for aids, species in zip(grouped_aids, unique_species): if ut.VERBOSE: print('[preproc_probchip] Computing probchips for species=%r' % species) print('[preproc_probchip] |--------------------') if len(aids) == 0: continue probchip_fpaths = get_annot_probchip_fpath_list(ibs, aids, config2_=config2_, species=species) if lazy: # Filter out probchips that are already on disk # pyrf used to do this, now we need to do it # caching should be implicit due to using the visual_annot_uuid in # the filename isdirty_list = ut.not_list(map(exists, probchip_fpaths)) dirty_aids = ut.compress(aids, isdirty_list) dirty_probchip_fpath_list = ut.compress(probchip_fpaths, isdirty_list) print(('[preproc_probchip.compute_and_write_probchip]' ' Lazy compute of to compute %d/%d of species=%s') % (len(dirty_aids), len(aids), species)) else: # No filtering dirty_aids = aids dirty_probchip_fpath_list = probchip_fpaths if len(dirty_aids) > 0: write_dirty_aids(ibs, dirty_probchip_fpath_list, dirty_aids, config2_, species) grouped_probchip_fpath_list.append(probchip_fpaths) if ut.VERBOSE: print('[preproc_probchip] Done computing probability images') print('[preproc_probchip] L_______________________') probchip_fpath_list = vt.invert_apply_grouping2( grouped_probchip_fpath_list, groupxs, dtype=object) return probchip_fpath_list
def parse_column_tuples(self, col_name_list, col_types_dict, col_getter_dict, col_bgrole_dict, col_ider_dict, col_setter_dict, editable_colnames, sortby, sort_reverse=True, strict=False, **kwargs): """ parses simple lists into information suitable for making guitool headers """ # Unpack the column tuples into names, getters, and types if not strict: # slopply colname definitions flag_list = [ colname in col_getter_dict for colname in col_name_list ] if not all(flag_list): invalid_colnames = ut.compress(col_name_list, ut.not_list(flag_list)) print( '[api_item_widget] Warning: colnames=%r have no getters' % (invalid_colnames, )) col_name_list = ut.compress(col_name_list, flag_list) # sloppy type inference for colname in col_name_list: getter_ = col_getter_dict[colname] if colname not in col_types_dict: type_ = ut.get_homogenous_list_type(getter_) if type_ is not None: col_types_dict[colname] = type_ # sloppy kwargs. # FIXME: explicitly list col_nice_dict col_nice_dict = kwargs.get('col_nice_dict', {}) self.col_nice_list = [ col_nice_dict.get(name, name) for name in col_name_list ] self.col_name_list = col_name_list self.col_type_list = [ col_types_dict.get(colname, str) for colname in col_name_list ] self.col_getter_list = [ col_getter_dict.get(colname, str) for colname in col_name_list ] # First col is always a getter # Get number of rows / columns self.nCols = len(self.col_getter_list) self.nRows = 0 if self.nCols == 0 else len( self.col_getter_list[0]) # FIXME # Init iders to default and then overwite based on dict inputs self.col_ider_list = ut.alloc_nones(self.nCols) for colname, ider_colnames in six.iteritems(col_ider_dict): try: col = self.col_name_list.index(colname) # Col iders might have tuple input ider_cols = ut.uinput_1to1(self.col_name_list.index, ider_colnames) col_ider = ut.uinput_1to1(lambda c: partial(self.get, c), ider_cols) self.col_ider_list[col] = col_ider del col_ider del ider_cols del col del colname except Exception as ex: ut.printex(ex, keys=[ 'colname', 'ider_colnames', 'col', 'col_ider', 'ider_cols' ]) raise # Init setters to data, and then overwrite based on dict inputs self.col_setter_list = list(self.col_getter_list) for colname, col_setter in six.iteritems(col_setter_dict): col = self.col_name_list.index(colname) self.col_setter_list[col] = col_setter # Init bgrole_getters to None, and then overwrite based on dict inputs self.col_bgrole_getter_list = [ col_bgrole_dict.get(colname, None) for colname in self.col_name_list ] # Mark edtiable columns self.col_edit_list = [ name in editable_colnames for name in col_name_list ] # Mark the sort column index if ut.is_str(sortby): self.col_sort_index = self.col_name_list.index(sortby) else: self.col_sort_index = sortby self.col_sort_reverse = sort_reverse
def get_injured_sharks(): """ >>> from wbia.scripts.getshark import * # NOQA """ import requests url = 'http://www.whaleshark.org/getKeywordImages.jsp' resp = requests.get(url) assert resp.status_code == 200 keywords = resp.json()['keywords'] key_list = ut.take_column(keywords, 'indexName') key_to_nice = {k['indexName']: k['readableName'] for k in keywords} injury_patterns = [ 'injury', 'net', 'hook', 'trunc', 'damage', 'scar', 'nicks', 'bite', ] injury_keys = [ key for key in key_list if any([pat in key for pat in injury_patterns]) ] noninjury_keys = ut.setdiff(key_list, injury_keys) injury_nice = ut.lmap(lambda k: key_to_nice[k], injury_keys) # NOQA noninjury_nice = ut.lmap(lambda k: key_to_nice[k], noninjury_keys) # NOQA key_list = injury_keys keyed_images = {} for key in ut.ProgIter(key_list, lbl='reading index', bs=True): key_url = url + '?indexName={indexName}'.format(indexName=key) key_resp = requests.get(key_url) assert key_resp.status_code == 200 key_imgs = key_resp.json()['images'] keyed_images[key] = key_imgs key_hist = {key: len(imgs) for key, imgs in keyed_images.items()} key_hist = ut.sort_dict(key_hist, 'vals') logger.info(ut.repr3(key_hist)) nice_key_hist = ut.map_dict_keys(lambda k: key_to_nice[k], key_hist) nice_key_hist = ut.sort_dict(nice_key_hist, 'vals') logger.info(ut.repr3(nice_key_hist)) key_to_urls = { key: ut.take_column(vals, 'url') for key, vals in keyed_images.items() } overlaps = {} import itertools overlap_img_list = [] for k1, k2 in itertools.combinations(key_to_urls.keys(), 2): overlap_imgs = ut.isect(key_to_urls[k1], key_to_urls[k2]) num_overlap = len(overlap_imgs) overlaps[(k1, k2)] = num_overlap overlaps[(k1, k1)] = len(key_to_urls[k1]) if num_overlap > 0: # logger.info('[%s][%s], overlap=%r' % (k1, k2, num_overlap)) overlap_img_list.extend(overlap_imgs) all_img_urls = list(set(ut.flatten(key_to_urls.values()))) num_all = len(all_img_urls) # NOQA logger.info('num_all = %r' % (num_all, )) # Determine super-categories categories = ['nicks', 'scar', 'trunc'] # Force these keys into these categories key_to_cat = {'scarbite': 'other_injury'} cat_to_keys = ut.ddict(list) for key in key_to_urls.keys(): flag = 1 if key in key_to_cat: cat = key_to_cat[key] cat_to_keys[cat].append(key) continue for cat in categories: if cat in key: cat_to_keys[cat].append(key) flag = 0 if flag: cat = 'other_injury' cat_to_keys[cat].append(key) cat_urls = ut.ddict(list) for cat, keys in cat_to_keys.items(): for key in keys: cat_urls[cat].extend(key_to_urls[key]) cat_hist = {} for cat in list(cat_urls.keys()): cat_urls[cat] = list(set(cat_urls[cat])) cat_hist[cat] = len(cat_urls[cat]) logger.info(ut.repr3(cat_to_keys)) logger.info(ut.repr3(cat_hist)) key_to_cat = dict([(val, key) for key, vals in cat_to_keys.items() for val in vals]) # ingestset = { # '__class__': 'ImageSet', # 'images': ut.ddict(dict) # } # for key, key_imgs in keyed_images.items(): # for imgdict in key_imgs: # url = imgdict['url'] # encid = imgdict['correspondingEncounterNumber'] # # Make structure # encdict = encounters[encid] # encdict['__class__'] = 'Encounter' # imgdict = ut.delete_keys(imgdict.copy(), ['correspondingEncounterNumber']) # imgdict['__class__'] = 'Image' # cat = key_to_cat[key] # annotdict = {'relative_bbox': [.01, .01, .98, .98], 'tags': [cat, key]} # annotdict['__class__'] = 'Annotation' # # Ensure structures exist # encdict['images'] = encdict.get('images', []) # imgdict['annots'] = imgdict.get('annots', []) # # Add an image to this encounter # encdict['images'].append(imgdict) # # Add an annotation to this image # imgdict['annots'].append(annotdict) # # http://springbreak.wildbook.org/rest/org.ecocean.Encounter/1111 # get_enc_url = 'http://www.whaleshark.org/rest/org.ecocean.Encounter/%s' % (encid,) # resp = requests.get(get_enc_url) # logger.info(ut.repr3(encdict)) # logger.info(ut.repr3(encounters)) # Download the files to the local disk # fpath_list = all_urls = ut.unique( ut.take_column( ut.flatten( ut.dict_subset(keyed_images, ut.flatten(cat_to_keys.values())).values()), 'url', )) dldir = ut.truepath('~/tmpsharks') from os.path import commonprefix, basename # NOQA prefix = commonprefix(all_urls) suffix_list = [url_[len(prefix):] for url_ in all_urls] fname_list = [suffix.replace('/', '--') for suffix in suffix_list] fpath_list = [] for url, fname in ut.ProgIter(zip(all_urls, fname_list), lbl='downloading imgs', freq=1): fpath = ut.grab_file_url(url, download_dir=dldir, fname=fname, verbose=False) fpath_list.append(fpath) # Make sure we keep orig info # url_to_keys = ut.ddict(list) url_to_info = ut.ddict(dict) for key, imgdict_list in keyed_images.items(): for imgdict in imgdict_list: url = imgdict['url'] info = url_to_info[url] for k, v in imgdict.items(): info[k] = info.get(k, []) info[k].append(v) info['keys'] = info.get('keys', []) info['keys'].append(key) # url_to_keys[url].append(key) info_list = ut.take(url_to_info, all_urls) for info in info_list: if len(set(info['correspondingEncounterNumber'])) > 1: assert False, 'url with two different encounter nums' # Combine duplicate tags hashid_list = [ ut.get_file_uuid(fpath_, stride=8) for fpath_ in ut.ProgIter(fpath_list, bs=True) ] groupxs = ut.group_indices(hashid_list)[1] # Group properties by duplicate images # groupxs = [g for g in groupxs if len(g) > 1] fpath_list_ = ut.take_column(ut.apply_grouping(fpath_list, groupxs), 0) url_list_ = ut.take_column(ut.apply_grouping(all_urls, groupxs), 0) info_list_ = [ ut.map_dict_vals(ut.flatten, ut.dict_accum(*info_)) for info_ in ut.apply_grouping(info_list, groupxs) ] encid_list_ = [ ut.unique(info_['correspondingEncounterNumber'])[0] for info_ in info_list_ ] keys_list_ = [ut.unique(info_['keys']) for info_ in info_list_] cats_list_ = [ut.unique(ut.take(key_to_cat, keys)) for keys in keys_list_] clist = ut.ColumnLists({ 'gpath': fpath_list_, 'url': url_list_, 'encid': encid_list_, 'key': keys_list_, 'cat': cats_list_, }) # for info_ in ut.apply_grouping(info_list, groupxs): # info = ut.dict_accum(*info_) # info = ut.map_dict_vals(ut.flatten, info) # x = ut.unique(ut.flatten(ut.dict_accum(*info_)['correspondingEncounterNumber'])) # if len(x) > 1: # info = info.copy() # del info['keys'] # logger.info(ut.repr3(info)) flags = ut.lmap(ut.fpath_has_imgext, clist['gpath']) clist = clist.compress(flags) import wbia ibs = wbia.opendb('WS_Injury', allow_newdir=True) gid_list = ibs.add_images(clist['gpath']) clist['gid'] = gid_list failed_flags = ut.flag_None_items(clist['gid']) logger.info('# failed %s' % (sum(failed_flags), )) passed_flags = ut.not_list(failed_flags) clist = clist.compress(passed_flags) ut.assert_all_not_None(clist['gid']) # ibs.get_image_uris_original(clist['gid']) ibs.set_image_uris_original(clist['gid'], clist['url'], overwrite=True) # ut.zipflat(clist['cat'], clist['key']) if False: # Can run detection instead clist['tags'] = ut.zipflat(clist['cat']) aid_list = ibs.use_images_as_annotations(clist['gid'], adjust_percent=0.01, tags_list=clist['tags']) aid_list import wbia.plottool as pt from wbia import core_annots pt.qt4ensure() # annots = ibs.annots() # aids = [1, 2] # ibs.depc_annot.get('hog', aids , 'hog') # ibs.depc_annot.get('chip', aids, 'img') for aid in ut.InteractiveIter(ibs.get_valid_aids()): hogs = ibs.depc_annot.d.get_hog_hog([aid]) chips = ibs.depc_annot.d.get_chips_img([aid]) chip = chips[0] hogimg = core_annots.make_hog_block_image(hogs[0]) pt.clf() pt.imshow(hogimg, pnum=(1, 2, 1)) pt.imshow(chip, pnum=(1, 2, 2)) fig = pt.gcf() fig.show() fig.canvas.draw() # logger.info(len(groupxs)) # if False: # groupxs = ut.find_duplicate_items(ut.lmap(basename, suffix_list)).values() # logger.info(ut.repr3(ut.apply_grouping(all_urls, groupxs))) # # FIX # for fpath, fname in zip(fpath_list, fname_list): # if ut.checkpath(fpath): # ut.move(fpath, join(dirname(fpath), fname)) # logger.info('fpath = %r' % (fpath,)) # import wbia # from wbia.dbio import ingest_dataset # dbdir = wbia.sysres.lookup_dbdir('WS_ALL') # self = ingest_dataset.Ingestable2(dbdir) if False: # Show overlap matrix import wbia.plottool as pt import pandas as pd import numpy as np dict_ = overlaps s = pd.Series(dict_, index=pd.MultiIndex.from_tuples(overlaps)) df = s.unstack() lhs, rhs = df.align(df.T) df = lhs.add(rhs, fill_value=0).fillna(0) label_texts = df.columns.values def label_ticks(label_texts): import wbia.plottool as pt truncated_labels = [repr(lbl[0:100]) for lbl in label_texts] ax = pt.gca() ax.set_xticks(list(range(len(label_texts)))) ax.set_xticklabels(truncated_labels) [lbl.set_rotation(-55) for lbl in ax.get_xticklabels()] [ lbl.set_horizontalalignment('left') for lbl in ax.get_xticklabels() ] # xgrid, ygrid = np.meshgrid(range(len(label_texts)), range(len(label_texts))) # pt.plot_surface3d(xgrid, ygrid, disjoint_mat) ax.set_yticks(list(range(len(label_texts)))) ax.set_yticklabels(truncated_labels) [ lbl.set_horizontalalignment('right') for lbl in ax.get_yticklabels() ] [ lbl.set_verticalalignment('center') for lbl in ax.get_yticklabels() ] # [lbl.set_rotation(20) for lbl in ax.get_yticklabels()] # df = df.sort(axis=0) # df = df.sort(axis=1) sortx = np.argsort(df.sum(axis=1).values)[::-1] df = df.take(sortx, axis=0) df = df.take(sortx, axis=1) fig = pt.figure(fnum=1) fig.clf() mat = df.values.astype(np.int32) mat[np.diag_indices(len(mat))] = 0 vmax = mat[(1 - np.eye(len(mat))).astype(np.bool)].max() import matplotlib.colors norm = matplotlib.colors.Normalize(vmin=0, vmax=vmax, clip=True) pt.plt.imshow(mat, cmap='hot', norm=norm, interpolation='none') pt.plt.colorbar() pt.plt.grid('off') label_ticks(label_texts) fig.tight_layout() # overlap_df = pd.DataFrame.from_dict(overlap_img_list) class TmpImage(ut.NiceRepr): pass from skimage.feature import hog from skimage import data, color, exposure import wbia.plottool as pt image2 = color.rgb2gray(data.astronaut()) # NOQA fpath = './GOPR1120.JPG' import vtool as vt for fpath in [fpath]: """ http://scikit-image.org/docs/dev/auto_examples/plot_hog.html """ image = vt.imread(fpath, grayscale=True) image = pt.color_funcs.to_base01(image) fig = pt.figure(fnum=2) fd, hog_image = hog( image, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1, 1), visualise=True, ) fig, (ax1, ax2) = pt.plt.subplots(1, 2, figsize=(8, 4), sharex=True, sharey=True) ax1.axis('off') ax1.imshow(image, cmap=pt.plt.cm.gray) ax1.set_title('Input image') ax1.set_adjustable('box-forced') # Rescale histogram for better display hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 0.02)) ax2.axis('off') ax2.imshow(hog_image_rescaled, cmap=pt.plt.cm.gray) ax2.set_title('Histogram of Oriented Gradients') ax1.set_adjustable('box-forced') pt.plt.show()
def split_analysis(ibs): """ CommandLine: python -m ibeis.other.dbinfo split_analysis --show python -m ibeis split_analysis --show python -m ibeis split_analysis --show --good Ignore: # mount sshfs -o idmap=user lev:/ ~/lev # unmount fusermount -u ~/lev Example: >>> # DISABLE_DOCTEST GGR >>> from ibeis.other.dbinfo import * # NOQA >>> import ibeis >>> dbdir = '/media/danger/GGR/GGR-IBEIS' >>> dbdir = dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS') >>> ibs = ibeis.opendb(dbdir=dbdir, allow_newdir=False) >>> import guitool_ibeis as gt >>> gt.ensure_qtapp() >>> win = split_analysis(ibs) >>> ut.quit_if_noshow() >>> import plottool_ibeis as pt >>> gt.qtapp_loop(qwin=win) >>> #ut.show_if_requested() """ #nid_list = ibs.get_valid_nids(filter_empty=True) import datetime day1 = datetime.date(2016, 1, 30) day2 = datetime.date(2016, 1, 31) filter_kw = { 'multiple': None, #'view': ['right'], #'minqual': 'good', 'is_known': True, 'min_pername': 1, } aids1 = ibs.filter_annots_general(filter_kw=ut.dict_union( filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 1.0)), }) ) aids2 = ibs.filter_annots_general(filter_kw=ut.dict_union( filter_kw, { 'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 0.0)), 'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)), }) ) all_aids = aids1 + aids2 all_annots = ibs.annots(all_aids) print('%d annots on day 1' % (len(aids1)) ) print('%d annots on day 2' % (len(aids2)) ) print('%d annots overall' % (len(all_annots)) ) print('%d names overall' % (len(ut.unique(all_annots.nids))) ) nid_list, annots_list = all_annots.group(all_annots.nids) REVIEWED_EDGES = True if REVIEWED_EDGES: aids_list = [annots.aids for annots in annots_list] #aid_pairs = [annots.get_am_aidpairs() for annots in annots_list] # Slower aid_pairs = ibs.get_unflat_am_aidpairs(aids_list) # Faster else: # ALL EDGES aid_pairs = [annots.get_aidpairs() for annots in annots_list] speeds_list = ibs.unflat_map(ibs.get_annotpair_speeds, aid_pairs) import vtool_ibeis as vt max_speeds = np.array([vt.safe_max(s, nans=False) for s in speeds_list]) nan_idx = np.where(np.isnan(max_speeds))[0] inf_idx = np.where(np.isinf(max_speeds))[0] bad_idx = sorted(ut.unique(ut.flatten([inf_idx, nan_idx]))) ok_idx = ut.index_complement(bad_idx, len(max_speeds)) print('#nan_idx = %r' % (len(nan_idx),)) print('#inf_idx = %r' % (len(inf_idx),)) print('#ok_idx = %r' % (len(ok_idx),)) ok_speeds = max_speeds[ok_idx] ok_nids = ut.take(nid_list, ok_idx) ok_annots = ut.take(annots_list, ok_idx) sortx = np.argsort(ok_speeds)[::-1] sorted_speeds = np.array(ut.take(ok_speeds, sortx)) sorted_annots = np.array(ut.take(ok_annots, sortx)) sorted_nids = np.array(ut.take(ok_nids, sortx)) # NOQA sorted_speeds = np.clip(sorted_speeds, 0, 100) #idx = vt.find_elbow_point(sorted_speeds) #EXCESSIVE_SPEED = sorted_speeds[idx] # http://www.infoplease.com/ipa/A0004737.html # http://www.speedofanimals.com/animals/zebra #ZEBRA_SPEED_MAX = 64 # km/h #ZEBRA_SPEED_RUN = 50 # km/h ZEBRA_SPEED_SLOW_RUN = 20 # km/h #ZEBRA_SPEED_FAST_WALK = 10 # km/h #ZEBRA_SPEED_WALK = 7 # km/h MAX_SPEED = ZEBRA_SPEED_SLOW_RUN #MAX_SPEED = ZEBRA_SPEED_WALK #MAX_SPEED = EXCESSIVE_SPEED flags = sorted_speeds > MAX_SPEED flagged_ok_annots = ut.compress(sorted_annots, flags) inf_annots = ut.take(annots_list, inf_idx) flagged_annots = inf_annots + flagged_ok_annots print('MAX_SPEED = %r km/h' % (MAX_SPEED,)) print('%d annots with infinite speed' % (len(inf_annots),)) print('%d annots with large speed' % (len(flagged_ok_annots),)) print('Marking all pairs of annots above the threshold as non-matching') from ibeis.algo.graph import graph_iden import networkx as nx progkw = dict(freq=1, bs=True, est_window=len(flagged_annots)) bad_edges_list = [] good_edges_list = [] for annots in ut.ProgIter(flagged_annots, lbl='flag speeding names', **progkw): edge_to_speeds = annots.get_speeds() bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED] good_edges = [edge for edge, speed in edge_to_speeds.items() if speed <= MAX_SPEED] bad_edges_list.append(bad_edges) good_edges_list.append(good_edges) all_bad_edges = ut.flatten(bad_edges_list) good_edges_list = ut.flatten(good_edges_list) print('num_bad_edges = %r' % (len(ut.flatten(bad_edges_list)),)) print('num_bad_edges = %r' % (len(ut.flatten(good_edges_list)),)) if 1: from ibeis.viz import viz_graph2 import guitool_ibeis as gt gt.ensure_qtapp() if ut.get_argflag('--good'): print('Looking at GOOD (no speed problems) edges') aid_pairs = good_edges_list else: print('Looking at BAD (speed problems) edges') aid_pairs = all_bad_edges aids = sorted(list(set(ut.flatten(aid_pairs)))) infr = graph_iden.AnnotInference(ibs, aids, verbose=False) infr.initialize_graph() # Use random scores to randomize sort order rng = np.random.RandomState(0) scores = (-rng.rand(len(aid_pairs)) * 10).tolist() infr.graph.add_edges_from(aid_pairs) if True: edge_sample_size = 250 pop_nids = ut.unique(ibs.get_annot_nids(ut.unique(ut.flatten(aid_pairs)))) sorted_pairs = ut.sortedby(aid_pairs, scores)[::-1][0:edge_sample_size] sorted_nids = ibs.get_annot_nids(ut.take_column(sorted_pairs, 0)) sample_size = len(ut.unique(sorted_nids)) am_rowids = ibs.get_annotmatch_rowid_from_undirected_superkey(*zip(*sorted_pairs)) flags = ut.not_list(ut.flag_None_items(am_rowids)) #am_rowids = ut.compress(am_rowids, flags) positive_tags = ['SplitCase', 'Photobomb'] flags_list = [ut.replace_nones(ibs.get_annotmatch_prop(tag, am_rowids), 0) for tag in positive_tags] print('edge_case_hist: ' + ut.repr3( ['%s %s' % (txt, sum(flags_)) for flags_, txt in zip(flags_list, positive_tags)])) is_positive = ut.or_lists(*flags_list) num_positive = sum(ut.lmap(any, ut.group_items(is_positive, sorted_nids).values())) pop = len(pop_nids) print('A positive is any edge flagged as a %s' % (ut.conj_phrase(positive_tags, 'or'),)) print('--- Sampling wrt edges ---') print('edge_sample_size = %r' % (edge_sample_size,)) print('edge_population_size = %r' % (len(aid_pairs),)) print('num_positive_edges = %r' % (sum(is_positive))) print('--- Sampling wrt names ---') print('name_population_size = %r' % (pop,)) vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=.95) nx.set_edge_attributes(infr.graph, name='score', values=dict(zip(aid_pairs, scores))) win = viz_graph2.AnnotGraphWidget(infr=infr, use_image=False, init_mode=None) win.populate_edge_model() win.show() return win # Make review interface for only bad edges infr_list = [] iter_ = list(zip(flagged_annots, bad_edges_list)) for annots, bad_edges in ut.ProgIter(iter_, lbl='creating inference', **progkw): aids = annots.aids nids = [1] * len(aids) infr = graph_iden.AnnotInference(ibs, aids, nids, verbose=False) infr.initialize_graph() infr.reset_feedback() infr_list.append(infr) # Check which ones are user defined as incorrect #num_positive = 0 #for infr in infr_list: # flag = np.any(infr.get_feedback_probs()[0] == 0) # num_positive += flag #print('num_positive = %r' % (num_positive,)) #pop = len(infr_list) #print('pop = %r' % (pop,)) iter_ = list(zip(infr_list, bad_edges_list)) for infr, bad_edges in ut.ProgIter(iter_, lbl='adding speed edges', **progkw): flipped_edges = [] for aid1, aid2 in bad_edges: if infr.graph.has_edge(aid1, aid2): flipped_edges.append((aid1, aid2)) infr.add_feedback((aid1, aid2), NEGTV) nx.set_edge_attributes(infr.graph, name='_speed_split', values='orig') nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'new' for edge in bad_edges}) nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'flip' for edge in flipped_edges}) #for infr in ut.ProgIter(infr_list, lbl='flagging speeding edges', **progkw): # annots = ibs.annots(infr.aids) # edge_to_speeds = annots.get_speeds() # bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED] def inference_stats(infr_list_): relabel_stats = [] for infr in infr_list_: num_ccs, num_inconsistent = infr.relabel_using_reviews() state_hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, 'decision').values()) if POSTV not in state_hist: state_hist[POSTV] = 0 hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, '_speed_split').values()) subgraphs = infr.positive_connected_compoments() subgraph_sizes = [len(g) for g in subgraphs] info = ut.odict([ ('num_nonmatch_edges', state_hist[NEGTV]), ('num_match_edges', state_hist[POSTV]), ('frac_nonmatch_edges', state_hist[NEGTV] / (state_hist[POSTV] + state_hist[NEGTV])), ('num_inconsistent', num_inconsistent), ('num_ccs', num_ccs), ('edges_flipped', hist.get('flip', 0)), ('edges_unchanged', hist.get('orig', 0)), ('bad_unreviewed_edges', hist.get('new', 0)), ('orig_size', len(infr.graph)), ('new_sizes', subgraph_sizes), ]) relabel_stats.append(info) return relabel_stats relabel_stats = inference_stats(infr_list) print('\nAll Split Info:') lines = [] for key in relabel_stats[0].keys(): data = ut.take_column(relabel_stats, key) if key == 'new_sizes': data = ut.flatten(data) lines.append('stats(%s) = %s' % (key, ut.repr2(ut.get_stats(data, use_median=True), precision=2))) print('\n'.join(ut.align_lines(lines, '='))) num_incon_list = np.array(ut.take_column(relabel_stats, 'num_inconsistent')) can_split_flags = num_incon_list == 0 print('Can trivially split %d / %d' % (sum(can_split_flags), len(can_split_flags))) splittable_infrs = ut.compress(infr_list, can_split_flags) relabel_stats = inference_stats(splittable_infrs) print('\nTrival Split Info:') lines = [] for key in relabel_stats[0].keys(): if key in ['num_inconsistent']: continue data = ut.take_column(relabel_stats, key) if key == 'new_sizes': data = ut.flatten(data) lines.append('stats(%s) = %s' % ( key, ut.repr2(ut.get_stats(data, use_median=True), precision=2))) print('\n'.join(ut.align_lines(lines, '='))) num_match_edges = np.array(ut.take_column(relabel_stats, 'num_match_edges')) num_nonmatch_edges = np.array(ut.take_column(relabel_stats, 'num_nonmatch_edges')) flags1 = np.logical_and(num_match_edges > num_nonmatch_edges, num_nonmatch_edges < 3) reasonable_infr = ut.compress(splittable_infrs, flags1) new_sizes_list = ut.take_column(relabel_stats, 'new_sizes') flags2 = [len(sizes) == 2 and sum(sizes) > 4 and (min(sizes) / max(sizes)) > .3 for sizes in new_sizes_list] reasonable_infr = ut.compress(splittable_infrs, flags2) print('#reasonable_infr = %r' % (len(reasonable_infr),)) for infr in ut.InteractiveIter(reasonable_infr): annots = ibs.annots(infr.aids) edge_to_speeds = annots.get_speeds() print('max_speed = %r' % (max(edge_to_speeds.values())),) infr.initialize_visual_node_attrs() infr.show_graph(use_image=True, only_reviewed=True) rest = ~np.logical_or(flags1, flags2) nonreasonable_infr = ut.compress(splittable_infrs, rest) rng = np.random.RandomState(0) random_idx = ut.random_indexes(len(nonreasonable_infr) - 1, 15, rng=rng) random_infr = ut.take(nonreasonable_infr, random_idx) for infr in ut.InteractiveIter(random_infr): annots = ibs.annots(infr.aids) edge_to_speeds = annots.get_speeds() print('max_speed = %r' % (max(edge_to_speeds.values())),) infr.initialize_visual_node_attrs() infr.show_graph(use_image=True, only_reviewed=True) #import scipy.stats as st #conf_interval = .95 #st.norm.cdf(conf_interval) # view-source:http://www.surveysystem.com/sscalc.htm #zval = 1.96 # 95 percent confidence #zValC = 3.8416 # #zValC = 6.6564 #import statsmodels.stats.api as sms #es = sms.proportion_effectsize(0.5, 0.75) #sms.NormalIndPower().solve_power(es, power=0.9, alpha=0.05, ratio=1) pop = 279 num_positive = 3 sample_size = 15 conf_level = .95 #conf_level = .99 vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(sample_size + 38, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(sample_size + 38 / 3, num_positive, pop, conf_level) print('---') vt.calc_error_bars_from_sample(15 + 38, num_positive=3, pop=675, conf_level=.95) vt.calc_error_bars_from_sample(15, num_positive=3, pop=675, conf_level=.95) pop = 279 #err_frac = .05 # 5% err_frac = .10 # 10% conf_level = .95 vt.calc_sample_from_error_bars(err_frac, pop, conf_level) pop = 675 vt.calc_sample_from_error_bars(err_frac, pop, conf_level) vt.calc_sample_from_error_bars(.05, pop, conf_level=.95, prior=.1) vt.calc_sample_from_error_bars(.05, pop, conf_level=.68, prior=.2) vt.calc_sample_from_error_bars(.10, pop, conf_level=.68) vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.95) vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.68)
def parse_column_tuples( self, col_name_list, col_types_dict, col_getter_dict, col_bgrole_dict, col_ider_dict, col_setter_dict, editable_colnames, sortby, sort_reverse=True, strict=False, **kwargs, ): """ parses simple lists into information suitable for making guitool headers """ # Unpack the column tuples into names, getters, and types if not strict: # slopply colname definitions flag_list = [colname in col_getter_dict for colname in col_name_list] if not all(flag_list): invalid_colnames = ut.compress(col_name_list, ut.not_list(flag_list)) logger.info( '[api_item_widget] Warning: colnames=%r have no getters' % (invalid_colnames,) ) col_name_list = ut.compress(col_name_list, flag_list) # sloppy type inference for colname in col_name_list: getter_ = col_getter_dict[colname] if colname not in col_types_dict: type_ = ut.get_homogenous_list_type(getter_) if type_ is not None: col_types_dict[colname] = type_ # sloppy kwargs. # FIXME: explicitly list col_nice_dict col_nice_dict = kwargs.get('col_nice_dict', {}) self.col_nice_list = [col_nice_dict.get(name, name) for name in col_name_list] self.col_name_list = col_name_list self.col_type_list = [ col_types_dict.get(colname, str) for colname in col_name_list ] # First col is always a getter self.col_getter_list = [ col_getter_dict.get(colname, str) for colname in col_name_list ] # Get number of rows / columns self.nCols = len(self.col_getter_list) if self.nCols == 0: self.nRows = 0 else: for getter in self.col_getter_list: if ut.isiterable(getter): break getter = None # FIXME assert getter is not None, 'at least one getter must be an array/list' self.nRows = len(getter) # self.nRows = 0 if self.nCols == 0 else len(self.col_getter_list[0]) # FIXME # Init iders to default and then overwite based on dict inputs self.col_ider_list = [None] * self.nCols # ut.alloc_nones(self.nCols) # for colname, ider_colnames in six.iteritems(col_ider_dict): # import utool # utool.embed() colname2_colx = ut.make_index_lookup(self.col_name_list) for colname, ider_colnames in six.iteritems(col_ider_dict): if colname not in colname2_colx: continue # for colname in self.col_name_list: ider_colnames = col_ider_dict[colname] try: colx = colname2_colx[colname] # Col iders might have tuple input ider_cols = self._uinput_1to1(self.col_name_list.index, ider_colnames) col_ider = self._uinput_1to1(lambda c: ut.partial(self.get, c), ider_cols) self.col_ider_list[colx] = col_ider del col_ider del ider_cols del colx del colname except Exception as ex: ut.printex( ex, keys=['colname', 'ider_colnames', 'colx', 'col_ider', 'ider_cols'], ) raise # Init setters to data, and then overwrite based on dict inputs self.col_setter_list = list(self.col_getter_list) for colname, col_setter in six.iteritems(col_setter_dict): colx = colname2_colx[colname] self.col_setter_list[colx] = col_setter # Init bgrole_getters to None, and then overwrite based on dict inputs self.col_bgrole_getter_list = [ col_bgrole_dict.get(colname, None) for colname in self.col_name_list ] # Mark edtiable columns self.col_edit_list = [name in editable_colnames for name in col_name_list] # Mark the sort column index if sortby is None: self.col_sort_index = 0 elif ut.is_str(sortby): self.col_sort_index = self.col_name_list.index(sortby) else: self.col_sort_index = sortby self.col_sort_reverse = sort_reverse # Hacks for tree widget self._iders = kwargs.get('iders', None) col_level_dict = kwargs.get('col_level_dict', None) if col_level_dict is None: self.col_level_list = None else: self.col_level_list = ut.take(col_level_dict, col_name_list)
def fix_empty_dirs(drive): """ # --- FIND EMPTY DIRECTORIES --- """ print('Fixing Empty Dirs in %r' % (drive, )) fidxs_list = ut.dict_take(drive.dpath_to_fidx, drive.dpath_list) isempty_flags = [len(fidxs) == 0 for fidxs in fidxs_list] empty_dpaths = ut.compress(drive.dpath_list, isempty_flags) def is_cplat_link(path_): try: if islink(path_): return True os.listdir(d) return False except SystemErrors: return True valid_flags = [not is_cplat_link(d) for d in empty_dpaths] if not all(valid_flags): print('Filtered windows links %r / %r' % (len(empty_dpaths) - sum(valid_flags), len(empty_dpaths))) #print(ut.repr2(empty_dpaths[0:10])) empty_dpaths = ut.compress(empty_dpaths, valid_flags) print('Found %r / %r empty_dpaths' % (len(empty_dpaths), len(drive.dpath_list))) print(ut.repr2(empty_dpaths[0:10])) # Ensure actually still empty current_contents = [ ut.glob(d, with_dirs=False) for d in ut.ProgIter(empty_dpaths, 'checking empty status') ] current_lens = list(map(len, current_contents)) assert not any(current_lens), 'some dirs are not empty' # n ** 2 check to get only the base directories isbase_dir = [ not any([ d.startswith(dpath_) and d != dpath_ for dpath_ in empty_dpaths ]) for d in ut.ProgIter(empty_dpaths, 'finding base dirs') ] base_empty_dirs = ut.compress(empty_dpaths, isbase_dir) def list_only_files(dpath): # glob is too slow for root, dirs, fpaths in os.walk(dpath): for fpath in fpaths: yield fpath base_current_contents = [ list(list_only_files(d)) for d in ut.ProgIter( base_empty_dirs, 'checking emptyness', freq=10) ] is_actually_empty = [len(fs) == 0 for fs in base_current_contents] not_really_empty = ut.compress(base_empty_dirs, ut.not_list(is_actually_empty)) print('%d dirs are not actually empty' % (len(not_really_empty), )) print('not_really_empty = %s' % (ut.repr2(not_really_empty[0:10]), )) truly_empty_dirs = ut.compress(base_empty_dirs, is_actually_empty) def list_all(dpath): # glob is too slow for root, dirs, fpaths in os.walk(dpath): for dir_ in dirs: yield dir_ for fpath in fpaths: yield fpath exclude_base_dirs = [join(drive.root_dpath, 'AppData')] exclude_end_dirs = ['__pycache__'] truly_empty_dirs1 = truly_empty_dirs for ed in exclude_base_dirs: truly_empty_dirs1 = [ d for d in truly_empty_dirs1 if (not any(d.startswith(ed) for ed in exclude_base_dirs) and not any( d.endswith(ed) for ed in exclude_end_dirs)) ] # Ensure actually still empty (with recursive checks for hidden files) print('truly_empty_dirs1[::5] = %s' % (ut.repr2(truly_empty_dirs1[0::5], strvals=True), )) #print('truly_empty_dirs1 = %s' % (ut.repr2(truly_empty_dirs1, strvals=True),)) if not dryrun: # FIX PART #from os.path import normpath #for d in ut.ProgIter(truly_empty_dirs): # break # if ut.WIN32: # # http://www.sevenforums.com/system-security/53095-file-folder-read-only-attribute-wont-disable.html # ut.cmd('attrib', '-r', '-s', normpath(d), verbose=False) #x = ut.remove_fpaths(truly_empty_dirs, strict=False) print('Deleting %d truly_empty_dirs1' % (len(truly_empty_dirs1), )) for d in ut.ProgIter(truly_empty_dirs1, 'DELETE empty dirs', freq=1000): # NOQA ut.delete(d, quiet=True) if ut.WIN32 and False: # remove file that failed removing flags = list(map(exists, truly_empty_dirs1)) truly_empty_dirs1 = ut.compress(truly_empty_dirs1, flags) for d in ut.ProgIter(truly_empty_dirs1, 'rming', freq=1000): ut.cmd('rmdir', d)
def fix_chktex(): """ ./texfix.py --fixcite --fix-chktex """ import parse fpaths = testdata_fpaths() print('Running chktex') output_list = [ ut.cmd('chktex', fpath, verbose=False)[0] for fpath in fpaths ] fixcite = ut.get_argflag('--fixcite') fixlbl = ut.get_argflag('--fixlbl') fixcmdterm = ut.get_argflag('--fixcmdterm') for fpath, output in zip(fpaths, output_list): text = ut.readfrom(fpath) buffer = text.split('\n') pat = '\n' + ut.positive_lookahead('Warning') warn_list = list( filter(lambda x: x.startswith('Warning'), re.split(pat, output))) delete_linenos = [] if not (fixcmdterm or fixlbl or fixcite): print(' CHOOSE A FIX ') modified_lines = [] for warn in warn_list: warnlines = warn.split('\n') pres = parse.parse( 'Warning {num} in {fpath} line {lineno}: {warnmsg}', warnlines[0]) if pres is not None: fpath_ = pres['fpath'] lineno = int(pres['lineno']) - 1 warnmsg = pres['warnmsg'] try: assert fpath == fpath_, ('%r != %r' % (fpath, fpath_)) except AssertionError: continue if 'No errors printed' in warn: #print('Cannot fix') continue if lineno in modified_lines: print('Skipping modified line') continue if fixcmdterm and warnmsg == 'Command terminated with space.': print('Fix command termination') errorline = warnlines[1] # NOQA carrotline = warnlines[2] pos = carrotline.find('^') if 0: print('pos = %r' % (pos, )) print('lineno = %r' % (lineno, )) print('errorline = %r' % (errorline, )) modified_lines.append(lineno) line = buffer[lineno] pre_, post_ = line[:pos], line[pos + 1:] newline = (pre_ + '{} ' + post_).rstrip(' ') #print('newline = %r' % (newline,)) buffer[lineno] = newline elif fixlbl and warnmsg == 'Delete this space to maintain correct pagereferences.': print('Fix label newline') fpath_ = pres['fpath'] errorline = warnlines[1] # NOQA new_prevline = buffer[ lineno - 1].rstrip() + errorline.lstrip(' ') buffer[lineno - 1] = new_prevline modified_lines.append(lineno) delete_linenos.append(lineno) elif fixcite and re.match( 'Non-breaking space \\(.~.\\) should have been used', warnmsg): #print(warnmsg) #print('\n'.join(warnlines)) print('Fix citation space') carrotline = warnlines[2] pos = carrotline.find('^') modified_lines.append(lineno) line = buffer[lineno] if line[pos] == ' ': pre_, post_ = line[:pos], line[pos + 1:] newline = (pre_ + '~' + post_).rstrip(' ') else: pre_, post_ = line[:pos + 1], line[pos + 1:] newline = (pre_ + '~' + post_).rstrip(' ') print(warn) print(line[pos]) assert False #assert line[pos] == ' ', '%r' % line[pos] break if len(pre_.strip()) == 0: new_prevline = buffer[ lineno - 1].rstrip() + newline.lstrip(' ') buffer[lineno - 1] = new_prevline delete_linenos.append(lineno) else: #print('newline = %r' % (newline,)) buffer[lineno] = newline #print(warn) if len(delete_linenos) > 0: mask = ut.index_to_boolmask(delete_linenos, len(buffer)) buffer = ut.compress(buffer, ut.not_list(mask)) newtext = '\n'.join(buffer) #ut.dump_autogen_code(fpath, newtext, 'tex', fullprint=False) ut.print_difftext( ut.get_textdiff(text, newtext, num_context_lines=4)) if ut.get_argflag('-w'): ut.writeto(fpath, newtext) else: print('Specify -w to finialize change')
def get_imageset_isoccurrence(ibs, imgsetid_list): flags = ut.not_list(ibs.is_special_imageset(imgsetid_list)) #imgset_texts = ibs.get_imageset_text(imgsetid_list) #flags = [text.lower().startswith('occurrence') for text in imgset_texts] return flags
def mark_unreviewed_above_score_as_correct(qres_wgt): selected_qtindex_list = qres_wgt.selectedRows() if len(selected_qtindex_list) == 1: qtindex = selected_qtindex_list[0] # aid1, aid2 = qres_wgt.get_aidpair_from_qtindex(qtindex) thresh = qtindex.model().get_header_data('score', qtindex) logger.info('thresh = %r' % (thresh, )) rows = qres_wgt.review_api.ider() scores_ = qres_wgt.review_api.get( qres_wgt.review_api.col_name_list.index('score'), rows) valid_rows = ut.compress(rows, scores_ >= thresh) aids1 = qres_wgt.review_api.get( qres_wgt.review_api.col_name_list.index('qaid'), valid_rows) aids2 = qres_wgt.review_api.get( qres_wgt.review_api.col_name_list.index('aid'), valid_rows) # ibs = qres_wgt.ibs ibs = qres_wgt.ibs am_rowids = ibs.get_annotmatch_rowid_from_undirected_superkey( aids1, aids2) reviewed = ibs.get_annotmatch_reviewed(am_rowids) unreviewed = ut.not_list(reviewed) valid_rows = ut.compress(valid_rows, unreviewed) aids1 = ut.compress(aids1, unreviewed) aids2 = ut.compress(aids2, unreviewed) import networkx as nx graph = nx.Graph() graph.add_edges_from(list(zip(aids1, aids2)), {'user_thresh_match': True}) review_groups = list(nx.connected_component_subgraphs(graph)) changing_aids = list(graph.nodes()) nids = ibs.get_annot_nids(changing_aids) nid2_aids = ut.group_items(changing_aids, nids) for nid, aids in nid2_aids.items(): # Connect all original names in the database to denote merges for u, v in ut.itertwo(aids): graph.add_edge(u, v) dbside_groups = list(nx.connected_component_subgraphs(graph)) options = [ 'Accept', # 'Review More' ] msg = (ut.codeblock(""" There are %d names and %d annotations in this mass review set. Mass review has discovered %d internal groups. Accepting will induce a database grouping of %d names. """) % ( len(nid2_aids), len(changing_aids), len(review_groups), len(dbside_groups), )) reply = gt.user_option(msg=msg, options=options) if reply == options[0]: # This is not the smartest way to group names. # Ideally what will happen here, is that reviewed edges will go into # the new graph name inference algorithm. # then the chosen point will be used as the threshold. Then # the graph cut algorithm will be applied. logger_ = qres_wgt.logger logger_.debug(msg) logger_.info('START MASS_THRESHOLD_MERGE') logger_.info('num_groups=%d thresh=%r' % ( len(dbside_groups), thresh, )) for count, subgraph in enumerate(dbside_groups): thresh_aid_pairs = [ edge for edge, flag in nx.get_edge_attributes( graph, 'user_thresh_match').items() if flag ] thresh_uuid_pairs = ibs.unflat_map(ibs.get_annot_uuids, thresh_aid_pairs) aids = list(subgraph.nodes()) nids = ibs.get_annot_name_rowids(aids) flags = ut.not_list(ibs.is_aid_unknown(aids)) previous_names = ibs.get_name_texts(nids) valid_nids = ut.compress(nids, flags) if len(valid_nids) == 0: merge_nid = ibs.make_next_nids(num=1)[0] type_ = 'new' else: merge_nid = min(valid_nids) type_ = 'existing' # Need to find other non-exemplar / query names that may # need merging other_aids = ibs.get_name_aids(valid_nids) other_aids = set(ut.flatten(other_aids)) - set(aids) other_auuids = ibs.get_annot_uuids(other_aids) other_previous_names = ibs.get_annot_names(other_aids) merge_name = ibs.get_name_texts(merge_nid) annot_uuids = ibs.get_annot_uuids(aids) ### # Set as reviewed (so we dont see them again), but mark it # with a different code to denote that it was a MASS review aid1_list = ut.take_column(thresh_aid_pairs, 0) aid2_list = ut.take_column(thresh_aid_pairs, 1) am_rowids = ibs.add_annotmatch_undirected( aid1_list, aid2_list) ibs.set_annotmatch_reviewer( am_rowids, ['algo:lnbnn_thresh'] * len(am_rowids)) logger_.info('START GROUP %d' % (count, )) logger_.info( 'GROUP BASED ON %d ANNOT_PAIRS WITH SCORE ABOVE (thresh=%r)' % ( len(thresh_uuid_pairs), thresh, )) logger_.debug('(uuid_pairs=%r)' % (thresh_uuid_pairs)) logger_.debug('(merge_name=%r)' % (merge_name)) logger_.debug( 'CHANGE NAME OF %d (annot_uuids=%r) WITH (previous_names=%r) TO (%s) (merge_name=%r)' % ( len(annot_uuids), annot_uuids, previous_names, type_, merge_name, )) logger_.debug( 'ADDITIONAL CHANGE NAME OF %d (annot_uuids=%r) WITH (previous_names=%r) TO (%s) (merge_name=%r)' % ( len(other_auuids), other_auuids, other_previous_names, type_, merge_name, )) logger_.info('END GROUP %d' % (count, )) new_nids = [merge_nid] * len(aids) ibs.set_annot_name_rowids(aids, new_nids) logger_.info('END MASS_THRESHOLD_MERGE') else: logger.info('[context] Multiple %d selection' % (len(selected_qtindex_list), ))
def get_imageset_isoccurrence(ibs, imgsetid_list): flags = ut.not_list(ibs.is_special_imageset(imgsetid_list)) # imgset_texts = ibs.get_imageset_text(imgsetid_list) # flags = [text.lower().startswith('occurrence') for text in imgset_texts] return flags