def select_row_from_id(view, _id, scroll=False, collapse=True): """ _id is from the iders function (i.e. an wbia rowid) selects the row in that view if it exists """ with ut.Timer( '[api_item_view] select_row_from_id(id=%r, scroll=%r, collapse=%r)' % (_id, scroll, collapse)): qtindex, row = view.get_row_and_qtindex_from_id(_id) if row is not None: if isinstance(view, QtWidgets.QTreeView): if collapse: view.collapseAll() select_model = view.selectionModel() select_flag = QtCore.QItemSelectionModel.ClearAndSelect # select_flag = QtCore.QItemSelectionModel.Select # select_flag = QtCore.QItemSelectionModel.NoUpdate with ut.Timer('[api_item_view] selecting name. qtindex=%r' % (qtindex, )): select_model.select(qtindex, select_flag) with ut.Timer('[api_item_view] expanding'): view.setExpanded(qtindex, True) else: # For Table Views view.selectRow(row) # Scroll to selection if scroll: with ut.Timer('scrolling'): view.scrollTo(qtindex) return row return None
def load_oxford_wbia(): import wbia ibs = wbia.opendb('Oxford') dim_size = None _dannots = ibs.annots(ibs.filter_annots_general(has_none='query'), config=dict(dim_size=dim_size)) _qannots = ibs.annots(ibs.filter_annots_general(has_any='query'), config=dict(dim_size=dim_size)) with ut.Timer('reading info'): vecs_list = _dannots.vecs kpts_list = _dannots.kpts nfeats_list = np.array(_dannots.num_feats) with ut.Timer('stacking info'): all_vecs = np.vstack(vecs_list) all_kpts = np.vstack(kpts_list) offset_list = np.hstack(([0], nfeats_list.cumsum())).astype(np.int64) # data_annots = reorder_annots(_dannots, data_uri_order) data_uri_order = get_annots_imgid(_dannots) query_uri_order = get_annots_imgid(_qannots) data = { 'offset_list': offset_list, 'all_kpts': all_kpts, 'all_vecs': all_vecs, 'data_uri_order': data_uri_order, 'query_uri_order': query_uri_order, } return data
def set_part_verts(ibs, part_rowid_list, verts_list, delete_thumbs=True, notify_root=True): r""" Sets the vertices [(x, y), ...] of a list of part_rowid_list RESTful: Method: PUT URL: /api/part/vert/ """ from vtool import geometry nInput = len(part_rowid_list) # Compute data to set if isinstance(verts_list, np.ndarray): verts_list = verts_list.tolist() for index, vert_list in enumerate(verts_list): if isinstance(vert_list, np.ndarray): verts_list[index] = vert_list.tolist() num_verts_list = list(map(len, verts_list)) verts_as_strings = list(map(six.text_type, verts_list)) id_iter1 = ((part_rowid, ) for part_rowid in part_rowid_list) # also need to set the internal number of vertices val_iter1 = ((num_verts, verts) for (num_verts, verts) in zip(num_verts_list, verts_as_strings)) colnames = ( PART_NUM_VERTS, PART_VERTS, ) # SET VERTS in PART_TABLE ibs.db.set(const.PART_TABLE, colnames, val_iter1, id_iter1, nInput=nInput) # changing the vertices also changes the bounding boxes bbox_list = geometry.bboxes_from_vert_list(verts_list) # new bboxes xtl_list, ytl_list, width_list, height_list = list(zip(*bbox_list)) val_iter2 = zip(xtl_list, ytl_list, width_list, height_list) id_iter2 = ((part_rowid, ) for part_rowid in part_rowid_list) colnames = ( 'part_xtl', 'part_ytl', 'part_width', 'part_height', ) # SET BBOX in PART_TABLE ibs.db.set(const.PART_TABLE, colnames, val_iter2, id_iter2, nInput=nInput) with ut.Timer('set_annot_verts...thumbs'): if delete_thumbs: ibs.delete_part_chips(part_rowid_list) # INVALIDATE THUMBNAILS with ut.Timer('set_annot_verts...roots'): if notify_root: ibs.depc_part.notify_root_changed(part_rowid_list, 'verts', force_delete=True)
def test_simple_parallel(): r""" CommandLine: python -m pyhesaff.tests.test_pyhesaff_simple_parallel --test-test_simple_parallel --show Example: >>> # ENABLE_DOCTEST >>> from pyhesaff.tests.test_pyhesaff_simple_parallel import * # NOQA >>> import matplotlib as mpl >>> from matplotlib import pyplot as plt >>> img_fpaths, kpts_array, desc_array = test_simple_parallel() >>> ut.quit_if_noshow() >>> # Do not plot by default >>> fig = plt.figure() >>> for count, (img_fpath, kpts, desc) in enumerate(zip(img_fpaths, kpts_array, >>> desc_array)): >>> if count > 3: >>> break >>> ax = fig.add_subplot(2, 2, count + 1) >>> img = mpl.image.imread(img_fpath) >>> plt.imshow(img) >>> _xs, _ys = kpts.T[0:2] >>> ax.plot(_xs, _ys, 'ro', alpha=.5) >>> ut.show_if_requested() """ import pyhesaff test_fnames = ['carl.jpg', 'lena.png', 'zebra.png', 'ada.jpg', 'star.png'] img_fpaths = list(map(ut.grab_test_imgpath, test_fnames)) * 2 # Time parallel computation with ut.Timer('Timing Parallel'): kpts_array, desc_array = pyhesaff.detect_feats_list(img_fpaths) # Time serial computation kpts_list2 = [] desc_list2 = [] with ut.Timer('Timing Iterative'): for img_fpath in img_fpaths: kpts_, desc_ = pyhesaff.detect_feats(img_fpath) kpts_list2.append(kpts_) desc_list2.append(desc_) print('Checking for errors') for (kpts_, desc_, kpts, desc) in zip(kpts_list2, desc_list2, kpts_array, desc_array): print('shape(kpts, kpts_, desc, desc_) = %9r, %9r, %11r, %11r' % (kpts.shape, kpts_.shape, desc.shape, desc_.shape)) try: assert np.all(kpts_ == kpts), 'parallel computation inconsistent' assert np.all(desc_ == desc), 'parallel computation inconsistent' assert len(kpts_) > 0, 'no kpts detected' #assert False, 'deliberate triggering to see printouts' except Exception as ex: ut.printex(ex) raise print('Keypoints seem consistent') return img_fpaths, kpts_array, desc_array
def OLD_compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose=False): """ """ if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.rrr() smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids) with ut.Timer('timer_orig1'): wx_sublist = np.array(wx2_drvecs.keys()) if not ut.QUIET: print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight') if ut.VERBOSE or verbose: print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh)) # Get list of aids and rvecs w.r.t. words aids_list = [wx2_aids[wx] for wx in wx_sublist] rvecs_list1 = [wx2_drvecs[wx] for wx in wx_sublist] maws_list = [wx2_dmaws[wx] for wx in wx_sublist] if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.assert_single_assigned_maws(maws_list) # Group by daids first and then by word index daid2_wx2_drvecs = clustertool.double_group(wx_sublist, aids_list, rvecs_list1) # For every daid, compute its sccw using pregrouped rvecs # Summation over words for each aid if ut.VERBOSE or verbose: print('[smk_index.sccw] SCCW Sum (over daid): ') # Get lists w.r.t daids aid_list = list(daid2_wx2_drvecs.keys()) # list of mappings from words to rvecs foreach daid # [wx2_aidrvecs_1, ..., wx2_aidrvecs_nDaids,] _wx2_aidrvecs_list = list(daid2_wx2_drvecs.values()) _aidwxs_iter = (list(wx2_aidrvecs.keys()) for wx2_aidrvecs in _wx2_aidrvecs_list) aidrvecs_list = [list(wx2_aidrvecs.values()) for wx2_aidrvecs in _wx2_aidrvecs_list] aididf_list = [[wx2_idf[wx] for wx in aidwxs] for aidwxs in _aidwxs_iter] with ut.Timer('timer_orig2'): if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_data_smksumm(aididf_list, aidrvecs_list) # TODO: implement database side soft-assign sccw_list = [smk_scoring.sccw_summation(rvecs_list, None, idf_list, None, smk_alpha, smk_thresh) for idf_list, rvecs_list in zip(aididf_list, aidrvecs_list)] daid2_sccw = dict(zip(aid_list, sccw_list)) if ut.VERBOSE or verbose: print('[smk_index.sccw] L___ End Compute Data SCCW\n') return daid2_sccw
def TIME_GEN_PREPROC_IMG(ibs): from ibeis.algo.preproc.preproc_image import add_images_params_gen print('[TIME_GEN_PREPROC_IMG]') gid_list = ibs.get_valid_gids() gpath_list = ibs.get_image_paths(gid_list) # STABILITY if not utool.get_argflag('--nostable'): # TEST 1 with utool.Timer('parallel chunksize=1'): output1 = list(add_images_params_gen(gpath_list, chunksize=1)) print(utool.truncate_str(str(output1), 80)) assert len(output1) == len(gpath_list), 'chuncksize changes output' # TEST 2 with utool.Timer('parallel chunksize=2'): output2 = list(add_images_params_gen(gpath_list, chunksize=2)) print(utool.truncate_str(str(output2), 80)) assert output1 == output2, 'chuncksize changes output' # TEST N with utool.Timer('parallel chunksize=None'): outputN = list(add_images_params_gen(gpath_list, chunksize=None)) print(utool.truncate_str(str(output2), 80)) assert outputN == output2, 'chuncksize changes output' # BENCHMARK setup = utool.unindent(''' from ibeis.algo.preproc.preproc_image import add_images_params_gen genkw = dict(prog=False, verbose=True) gpath_list = %r ''' % (gpath_list, )) print(utool.truncate_str(str(gpath_list), 80)) print('Processing %d images' % (len(gpath_list), )) timeit3 = partial(timeit2, setup=setup, number=3) timeit3('list(add_images_params_gen(gpath_list, chunksize=None, **genkw))') timeit3('list(add_images_params_gen(gpath_list, chunksize=None, **genkw))') timeit3('list(add_images_params_gen(gpath_list, chunksize=1, **genkw))') timeit3('list(add_images_params_gen(gpath_list, chunksize=2, **genkw))') timeit3('list(add_images_params_gen(gpath_list, chunksize=4, **genkw))') timeit3('list(add_images_params_gen(gpath_list, chunksize=8, **genkw))') timeit3('list(add_images_params_gen(gpath_list, chunksize=16, **genkw))') timeit3('list(add_images_params_gen(gpath_list, chunksize=32, **genkw))') print('[/TIME_GEN_PREPROC_IMG]') return locals()
def reindex_step(count, count_list, time_list_reindex): daids = all_randomize_daids_[0:count] vecs = np.vstack(ibs.get_annot_vecs(daids)) with ut.Timer(verbose=False) as t: flann = make_flann_index(vecs, flann_params) # NOQA count_list.append(count) time_list_reindex.append(t.ellapsed)
def subindexer_time_experiment(): """ builds plot of number of annotations vs indexer build time. TODO: time experiment """ import ibeis import utool as ut import pyflann import plottool as pt ibs = ibeis.opendb(db='PZ_Master0') daid_list = ibs.get_valid_aids() count_list = [] time_list = [] flann_params = ibs.cfg.query_cfg.flann_cfg.get_flann_params() for count in ut.ProgressIter(range(1, 301)): daids_ = daid_list[:] np.random.shuffle(daids_) daids = daids_[0:count] vecs = np.vstack(ibs.get_annot_vecs(daids)) with ut.Timer(verbose=False) as t: flann = pyflann.FLANN() flann.build_index(vecs, **flann_params) count_list.append(count) time_list.append(t.ellapsed) count_arr = np.array(count_list) time_arr = np.array(time_list) pt.plot2(count_arr, time_arr, marker='-', equal_aspect=False, x_label='num_annotations', y_label='FLANN build time')
def get_buildtime_data(**kwargs): flann_params = vt.get_flann_params(**kwargs) print('flann_params = %r' % (ut.dict_str(flann_params), )) data_list = [] num = 1000 print('-----') for count in ut.ProgressIter(itertools.count(), nTotal=-1, freq=1, autoadjust=False): num = int(num * 1.2) print('num = %r' % (num, )) #if num > 1E6: # break data = pool.get_testdata(num) print('object size ' + ut.get_object_size_str(data, 'data')) flann = pyflann.FLANN(**flann_params) with ut.Timer(verbose=False) as t: flann.build_index(data) print('t.ellapsed = %r' % (t.ellapsed, )) if t.ellapsed > 5 or count > 1000: break data_list.append((count, num, t.ellapsed)) print('-----') return data_list, flann_params
def addition_step(count, flann, count_list2, time_list_addition): daids = all_randomize_daids_[count:count + 1] vecs = np.vstack(ibs.get_annot_vecs(daids)) with ut.Timer(verbose=False) as t: flann.add_points(vecs) count_list2.append(count) time_list_addition.append(t.ellapsed)
def gridsearch_coverage_grid(): """ CommandLine: python -m vtool.coverage_grid --test-gridsearch_coverage_grid --show Example: >>> # DISABLE_DOCTEST >>> from vtool.coverage_grid import * # NOQA >>> import plottool as pt >>> gridsearch_coverage_grid() >>> pt.show_if_requested() """ import plottool as pt fname = None # 'easy1.png' kpts, chipsize, weights = coverage_kpts.testdata_coverage(fname) if len(kpts) > 100: kpts = kpts[::100] weights = weights[::100] cfgdict_list, cfglbl_list = get_coverage_grid_gridsearch_configs() coverage_gridtup_list = [ sparse_grid_coverage(kpts, chipsize, weights, **cfgdict) for cfgdict in ut.ProgressIter(cfgdict_list, lbl='coverage grid') ] fnum = 1 with ut.Timer('plotting gridsearch'): ut.interact_gridsearch_result_images( show_coverage_grid, cfgdict_list, cfglbl_list, coverage_gridtup_list, fnum=fnum, figtitle='coverage grid', unpack=True, max_plots=25) pt.iup()
def test_pyflann_io(): """ CommandLine: python -m vtool.tests.test_pyflann --test-test_pyflann_io Example: >>> # ENABLE_DOCTEST >>> from vtool.tests.test_pyflann import * # NOQA >>> result = test_pyflann_io() >>> print(result) """ # Create qpts and database data print('Create random qpts and database data') num_neighbors = 3 nPts = 1009 nQPts = 31 qpts = testdata_points(nPts=nQPts) pts = testdata_points(nPts=nPts) # Create flann object print('Create flann object') flann = pyflann.FLANN() # Build kd-tree index over the data print('Build the kd tree') with utool.Timer('Buliding the kd-tree with %d pts' % (len(pts), )): _build_params = flann.build_index(pts) # noqa # Find the closest few points to num_neighbors print('Find nn_index nearest neighbors') indices1, dists1 = flann.nn_index(qpts, num_neighbors=num_neighbors) # Save the data to disk print('Save the data to the disk') np.savez('test_pyflann_ptsdata.npz', pts) npload_pts = np.load('test_pyflann_ptsdata.npz') pts2 = npload_pts['arr_0'] print('Save and delete the FLANN index') flann.save_index('test_pyflann_index.flann') flann.delete_index() print('Reload the data') flann2 = pyflann.FLANN() flann2.load_index('test_pyflann_index.flann', pts2) indices2, dists2 = flann2.nn_index(qpts, num_neighbors=num_neighbors) #print(utool.hz_str('indices2, dists2 = ', indices2, dists2)) print('Find the same nearest neighbors?') if np.all(indices1 == indices2) and np.all(dists1 == dists2): print('...data is the same! SUCCESS!') else: raise AssertionError('...data is the different! FAILURE!')
def _test_build_internal_structure(_module, lang): import utool as ut # Test data N = 6 # N = 2000 def ider_level0(): return range(N) def ider_level1(input_): def _single(x): return [y for y in range(x**2, x**2 + max(0, ((N // 1) - x - 1)))] if isinstance(input_, list): return [_single(x) for x in input_] else: x = input_ return _single(x) # Build Structure ider_list = [ider_level0, ider_level1] num_levels = len(ider_list) # TEST RECURSIVE print('================') with ut.Timer(lang + ' recursive:'): if num_levels == 0: root_id_list = [] else: root_id_list = ider_list[0]() root_node1 = _module.TreeNode(-1, None, -1) level = 0 _module._populate_tree_recursive(root_node1, root_id_list, num_levels, ider_list, level) if N < 10: print('') print(api_tree_node.tree_node_string(root_node1, indent=' * ')) print('================') # with ut.Timer(lang + ' iterative:'): # # TEST ITERATIVE # # TODO: Vet this code a bit more. # root_node2 = _module.TreeNode(-1, None, -1) # _module._populate_tree_iterative( # root_node2, num_levels, ider_list) # if N < 10: # print('') # print(api_tree_node.tree_node_string(root_node2, indent=' * ')) print('================') print('finished %s test' % lang)
def run_parallel_task(num_procs=None): print('run_parallel_task. num_procs=%r' % None) if num_procs is not None: util_parallel.close_pool() util_parallel.init_pool(num_procs) else: num_procs = util_parallel.get_default_numprocs() msg = 'processing tasks in %s' % ('serial' if num_procs == 1 else str(num_procs) + '-parallel') with utool.Timer(msg): result_list = util_parallel.process(pyhesaff.detect_feats, args_list, hesaff_kwargs) print_test_results(result_list) return result_list
def TIME_QUERY(ibs): print('[TIME_QUERY]') #valid_aids = ibs.get_valid_aids() # [0:20] valid_aids = ibs.get_valid_aids()[0:10] # [0:20] qaid_list = valid_aids daid_list = valid_aids # Query without using the query cache querykw = { 'use_bigcache': False, 'use_cache': False, } with utool.Timer('timing all vs all query'): qres_list = ibs.query_chips(qaid_list, daid_list, **querykw) print('[/TIME_QUERY]') return locals()
def TEST_PARALLEL(): gpath_list = grabdata.get_test_gpaths(ndata=10, names=['zebra', 'lena', 'jeff']) args_list = [(gpath, ) for gpath in gpath_list] @utool.argv_flag_dec def print_test_results(result_list): for kpts, desc in result_list: print('[test] kpts.shape=(%4d, %d), desc.sum=%8d' % (kpts.shape[0], kpts.shape[1], desc.sum())) hesaff_kwargs = {'scale_min': -1, 'scale_max': -1, 'nogravity_hack': False} with utool.Timer('c++ parallel'): kpts_list, desc_list = pyhesaff.detect_feats_list( gpath_list, **hesaff_kwargs) # Run parallel tasks @utool.indent_func('[test_task]') def run_parallel_task(num_procs=None): print('run_parallel_task. num_procs=%r' % None) if num_procs is not None: util_parallel.close_pool() util_parallel.init_pool(num_procs) else: num_procs = util_parallel.get_default_numprocs() msg = 'processing tasks in %s' % ('serial' if num_procs == 1 else str(num_procs) + '-parallel') with utool.Timer(msg): result_list = util_parallel.process(pyhesaff.detect_feats, args_list, hesaff_kwargs) print_test_results(result_list) return result_list run_parallel_task() # Compare to serial if needed @utool.argv_flag_dec def compare_serial(): print('compare_serial') run_parallel_task(1) compare_serial() return locals()
def test_kmeans_plus_plus_speed(n_clusters=2000, n_features=128, per_cluster=10, asint=False, fix=True): """ from speedup_kmeans import * from sklearn.cluster.k_means_ import * """ rng = np.random.RandomState(42) # Make random cluster centers on a ball centers = rng.rand(n_clusters, n_features) centers /= np.linalg.norm(centers, axis=0)[None, :] centers = (centers * 512).astype(np.uint8) / 512 centers /= np.linalg.norm(centers, axis=0)[None, :] n_samples = int(n_clusters * per_cluster) n_clusters, n_features = centers.shape X, true_labels = make_blobs(n_samples=n_samples, centers=centers, cluster_std=1., random_state=42) if asint: X = (X * 512).astype(np.int32) x_squared_norms = row_norms(X, squared=True) if fix: _k_init = sklearn.cluster.k_means_._k_init else: _k_init = sklearn_master.cluster.k_means_._k_init random_state = np.random.RandomState(42) n_local_trials = None # NOQA with ut.Timer('testing kmeans init') as t: centers = _k_init(X, n_clusters, random_state=random_state, x_squared_norms=x_squared_norms) return centers, t.ellapsed
def grabcut_from_probchip(chip_img, label_mask): rect = (0, 0, w, h) bgd_model = np.zeros((1, 13 * 5), np.float64) fgd_model = np.zeros((1, 13 * 5), np.float64) num_iters = 5 mode = cv2.GC_INIT_WITH_MASK # label_mask is an outvar label_mask_ = label_mask.copy() print(label_values) print(np.unique(label_mask_)) with ut.Timer('grabcut'): cv2.grabCut(chip_img, label_mask_, rect, bgd_model, fgd_model, num_iters, mode=mode) #is_foreground = (label_mask == cv2.GC_FGD) + (label_mask == cv2.GC_PR_FGD) #is_foreground = (label_mask_ == cv2.GC_FGD) # + (label_mask == cv2.GC_PR_FGD) return label_mask_
def segment(img_fpath, bbox_, new_size=None): """ Runs grabcut """ printDBG('[segm] segment(img_fpath=%r, bbox=%r)>' % (img_fpath, bbox_)) num_iters = 5 bgd_model = np.zeros((1, 13 * 5), np.float64) fgd_model = np.zeros((1, 13 * 5), np.float64) mode = cv2.GC_INIT_WITH_MASK # Initialize # !!! CV2 READS (H,W) !!! # WH Unsafe img_resz, bbox_resz = resize_img_and_bbox(img_fpath, bbox_, new_size=new_size) # WH Unsafe (img_h, img_w) = img_resz.shape[:2] # Image Shape printDBG(' * img_resz.shape=%r' % ((img_h, img_w),)) # WH Safe tlbr = ut.xywh_to_tlbr(bbox_resz, (img_w, img_h)) # Rectangle ANNOTATION (x1, y1, x2, y2) = tlbr rect = tuple(bbox_resz) # Initialize: rect printDBG(' * rect=%r' % (rect,)) printDBG(' * tlbr=%r' % (tlbr,)) # WH Unsafe _mask = np.zeros((img_h, img_w), dtype=np.uint8) # Initialize: mask _mask[y1:y2, x1:x2] = cv2.GC_PR_FGD # Set ANNOTATION to cv2.GC_PR_FGD # Grab Cut tt = ut.Timer(' * cv2.grabCut()', verbose=DEBUG_SEGM) cv2.grabCut(img_resz, _mask, rect, bgd_model, fgd_model, num_iters, mode=mode) tt.toc() img_mask = np.where((_mask == cv2.GC_FGD) + (_mask == cv2.GC_PR_FGD), 255, 0).astype('uint8') # Crop chip = img_resz[y1:y2, x1:x2] chip_mask = img_mask[y1:y2, x1:x2] chip_mask = clean_mask(chip_mask) chip_mask = np.array(chip_mask, np.float) / 255.0 # Mask the value of HSV chip_hsv = cv2.cvtColor(chip, cv2.COLOR_RGB2HSV) chip_hsv = np.array(chip_hsv, dtype=np.float) / 255.0 chip_hsv[:, :, 2] *= chip_mask chip_hsv = np.array(np.round(chip_hsv * 255.0), dtype=np.uint8) seg_chip = cv2.cvtColor(chip_hsv, cv2.COLOR_HSV2RGB) return seg_chip, img_mask
def wait_for_job_result(jobiface, jobid, timeout=10, freq=.1): t = ut.Timer(verbose=False) t.tic() while True: reply = jobiface.get_job_status(jobid) if reply['jobstatus'] == 'completed': return elif reply['jobstatus'] == 'exception': result = jobiface.get_unpacked_result(jobid) #raise Exception(result) print('Exception occured in engine') return result elif reply['jobstatus'] == 'working': pass elif reply['jobstatus'] == 'unknown': pass else: raise Exception('Unknown jobstatus=%r' % (reply['jobstatus'], )) time.sleep(freq) if timeout is not None and t.toc() > timeout: raise Exception('Timeout')
def compute_word_weights(inva, method='idf'): """ Compute a per-word weight like idf Example: >>> # xdoctest: +REQUIRES(--slow) >>> # ENABLE_DOCTEST >>> from wbia.algo.smk.inverted_index import * # NOQA >>> qreq_, inva = testdata_inva() >>> wx_to_weight = inva.compute_word_weights() >>> print('wx_to_weight = %r' % (wx_to_weight,)) """ wx_list = sorted(inva.wx_to_aids.keys()) with ut.Timer('Computing %s weights' % (method, )): if method == 'idf': ndocs_total = len(inva.aids) # Unweighted documents ndocs_per_word = np.array( [len(set(inva.wx_to_aids[wx])) for wx in wx_list]) weight_per_word = smk_funcs.inv_doc_freq( ndocs_total, ndocs_per_word) elif method == 'idf-maw': # idf denom (the num of docs containing a word for each word) # The max(maws) denote the prob that this word indexes an annot ndocs_total = len(inva.aids) # Weighted documents wx_to_ndocs = {wx: 0.0 for wx in wx_list} for wx, maws in zip(ut.iflatten(inva.wx_lists), ut.iflatten(inva.maws_lists)): wx_to_ndocs[wx] += min(1.0, max(maws)) ndocs_per_word = ut.take(wx_to_ndocs, wx_list) weight_per_word = smk_funcs.inv_doc_freq( ndocs_total, ndocs_per_word) elif method == 'uniform': weight_per_word = np.ones(len(wx_list)) wx_to_weight = dict(zip(wx_list, weight_per_word)) wx_to_weight = ut.DefaultValueDict(0, wx_to_weight) return wx_to_weight
def compute_inverted_list(inva): with ut.Timer('Building inverted list'): wx_to_aids = smk_funcs.invert_lists(inva.aids, inva.wx_lists) return wx_to_aids
def run_asmk_script(): with ut.embed_on_exception_context: # NOQA """ >>> from wbia.algo.smk.script_smk import * """ # NOQA # ============================================== # PREPROCESSING CONFIGURATION # ============================================== config = { # 'data_year': 2013, 'data_year': None, 'dtype': 'float32', # 'root_sift': True, 'root_sift': False, # 'centering': True, 'centering': False, 'num_words': 2**16, # 'num_words': 1E6 # 'num_words': 8000, 'kmeans_impl': 'sklearn.mini', 'extern_words': False, 'extern_assign': False, 'assign_algo': 'kdtree', 'checks': 1024, 'int_rvec': True, 'only_xy': False, } # Define which params are relevant for which operations relevance = {} relevance['feats'] = ['dtype', 'root_sift', 'centering', 'data_year'] relevance['words'] = relevance['feats'] + [ 'num_words', 'extern_words', 'kmeans_impl', ] relevance['assign'] = relevance['words'] + [ 'checks', 'extern_assign', 'assign_algo', ] # relevance['ydata'] = relevance['assign'] + ['int_rvec'] # relevance['xdata'] = relevance['assign'] + ['only_xy', 'int_rvec'] nAssign = 1 class SMKCacher(ut.Cacher): def __init__(self, fname, ext='.cPkl'): relevant_params = relevance[fname] relevant_cfg = ut.dict_subset(config, relevant_params) cfgstr = ut.get_cfg_lbl(relevant_cfg) dbdir = ut.truepath('/raid/work/Oxford/') super(SMKCacher, self).__init__(fname, cfgstr, cache_dir=dbdir, ext=ext) # ============================================== # LOAD DATASET, EXTRACT AND POSTPROCESS FEATURES # ============================================== if config['data_year'] == 2007: data = load_oxford_2007() elif config['data_year'] == 2013: data = load_oxford_2013() elif config['data_year'] is None: data = load_oxford_wbia() offset_list = data['offset_list'] all_kpts = data['all_kpts'] raw_vecs = data['all_vecs'] query_uri_order = data['query_uri_order'] data_uri_order = data['data_uri_order'] # del data # ================ # PRE-PROCESS # ================ import vtool as vt # Alias names to avoid errors in interactive sessions proc_vecs = raw_vecs del raw_vecs feats_cacher = SMKCacher('feats', ext='.npy') all_vecs = feats_cacher.tryload() if all_vecs is None: if config['dtype'] == 'float32': logger.info('Converting vecs to float32') proc_vecs = proc_vecs.astype(np.float32) else: proc_vecs = proc_vecs raise NotImplementedError('other dtype') if config['root_sift']: with ut.Timer('Apply root sift'): np.sqrt(proc_vecs, out=proc_vecs) vt.normalize(proc_vecs, ord=2, axis=1, out=proc_vecs) if config['centering']: with ut.Timer('Apply centering'): mean_vec = np.mean(proc_vecs, axis=0) # Center and then re-normalize np.subtract(proc_vecs, mean_vec[None, :], out=proc_vecs) vt.normalize(proc_vecs, ord=2, axis=1, out=proc_vecs) if config['dtype'] == 'int8': smk_funcs all_vecs = proc_vecs feats_cacher.save(all_vecs) del proc_vecs # ===================================== # BUILD VISUAL VOCABULARY # ===================================== if config['extern_words']: words = data['words'] assert config['num_words'] is None or len( words) == config['num_words'] else: word_cacher = SMKCacher('words') words = word_cacher.tryload() if words is None: with ut.embed_on_exception_context: if config['kmeans_impl'] == 'sklearn.mini': import sklearn.cluster rng = np.random.RandomState(13421421) # init_size = int(config['num_words'] * 8) init_size = int(config['num_words'] * 4) # converged after 26043 iterations clusterer = sklearn.cluster.MiniBatchKMeans( config['num_words'], init_size=init_size, batch_size=1000, compute_labels=False, max_iter=20, random_state=rng, n_init=1, verbose=1, ) clusterer.fit(all_vecs) words = clusterer.cluster_centers_ elif config['kmeans_impl'] == 'yael': from yael import ynumpy centroids, qerr, dis, assign, nassign = ynumpy.kmeans( all_vecs, config['num_words'], init='kmeans++', verbose=True, output='all', ) words = centroids word_cacher.save(words) # ===================================== # ASSIGN EACH VECTOR TO ITS NEAREST WORD # ===================================== if config['extern_assign']: assert config[ 'extern_words'], 'need extern cluster to extern assign' idx_to_wxs = vt.atleast_nd(data['idx_to_wx'], 2) idx_to_maws = np.ones(idx_to_wxs.shape, dtype=np.float32) idx_to_wxs = np.ma.array(idx_to_wxs) idx_to_maws = np.ma.array(idx_to_maws) else: from wbia.algo.smk import vocab_indexer vocab = vocab_indexer.VisualVocab(words) dassign_cacher = SMKCacher('assign') assign_tup = dassign_cacher.tryload() if assign_tup is None: vocab.flann_params['algorithm'] = config['assign_algo'] vocab.build() # Takes 12 minutes to assign jegous vecs to 2**16 vocab with ut.Timer('assign vocab neighbors'): _idx_to_wx, _idx_to_wdist = vocab.nn_index( all_vecs, nAssign, checks=config['checks']) if nAssign > 1: idx_to_wxs, idx_to_maws = smk_funcs.weight_multi_assigns( _idx_to_wx, _idx_to_wdist, massign_alpha=1.2, massign_sigma=80.0, massign_equal_weights=True, ) else: idx_to_wxs = np.ma.masked_array(_idx_to_wx, fill_value=-1) idx_to_maws = np.ma.ones(idx_to_wxs.shape, fill_value=-1, dtype=np.float32) idx_to_maws.mask = idx_to_wxs.mask assign_tup = (idx_to_wxs, idx_to_maws) dassign_cacher.save(assign_tup) idx_to_wxs, idx_to_maws = assign_tup # Breakup vectors, keypoints, and word assignments by annotation wx_lists = [ idx_to_wxs[left:right] for left, right in ut.itertwo(offset_list) ] maw_lists = [ idx_to_maws[left:right] for left, right in ut.itertwo(offset_list) ] vecs_list = [ all_vecs[left:right] for left, right in ut.itertwo(offset_list) ] kpts_list = [ all_kpts[left:right] for left, right in ut.itertwo(offset_list) ] # ======================= # FIND QUERY SUBREGIONS # ======================= ibs, query_annots, data_annots, qx_to_dx = load_ordered_annots( data_uri_order, query_uri_order) daids = data_annots.aids qaids = query_annots.aids query_super_kpts = ut.take(kpts_list, qx_to_dx) query_super_vecs = ut.take(vecs_list, qx_to_dx) query_super_wxs = ut.take(wx_lists, qx_to_dx) query_super_maws = ut.take(maw_lists, qx_to_dx) # Mark which keypoints are within the bbox of the query query_flags_list = [] only_xy = config['only_xy'] for kpts_, bbox in zip(query_super_kpts, query_annots.bboxes): flags = kpts_inside_bbox(kpts_, bbox, only_xy=only_xy) query_flags_list.append(flags) logger.info('Queries are crops of existing database images.') logger.info('Looking at average percents') percent_list = [ flags_.sum() / flags_.shape[0] for flags_ in query_flags_list ] percent_stats = ut.get_stats(percent_list) logger.info('percent_stats = %s' % (ut.repr4(percent_stats), )) import vtool as vt query_kpts = vt.zipcompress(query_super_kpts, query_flags_list, axis=0) query_vecs = vt.zipcompress(query_super_vecs, query_flags_list, axis=0) query_wxs = vt.zipcompress(query_super_wxs, query_flags_list, axis=0) query_maws = vt.zipcompress(query_super_maws, query_flags_list, axis=0) # ======================= # CONSTRUCT QUERY / DATABASE REPR # ======================= # int_rvec = not config['dtype'].startswith('float') int_rvec = config['int_rvec'] X_list = [] _prog = ut.ProgPartial(length=len(qaids), label='new X', bs=True, adjust=True) for aid, fx_to_wxs, fx_to_maws in _prog( zip(qaids, query_wxs, query_maws)): X = new_external_annot(aid, fx_to_wxs, fx_to_maws, int_rvec) X_list.append(X) # ydata_cacher = SMKCacher('ydata') # Y_list = ydata_cacher.tryload() # if Y_list is None: Y_list = [] _prog = ut.ProgPartial(length=len(daids), label='new Y', bs=True, adjust=True) for aid, fx_to_wxs, fx_to_maws in _prog(zip(daids, wx_lists, maw_lists)): Y = new_external_annot(aid, fx_to_wxs, fx_to_maws, int_rvec) Y_list.append(Y) # ydata_cacher.save(Y_list) # ====================== # Add in some groundtruth logger.info('Add in some groundtruth') for Y, nid in zip(Y_list, ibs.get_annot_nids(daids)): Y.nid = nid for X, nid in zip(X_list, ibs.get_annot_nids(qaids)): X.nid = nid for Y, qual in zip(Y_list, ibs.get_annot_quality_texts(daids)): Y.qual = qual # ====================== # Add in other properties for Y, vecs, kpts in zip(Y_list, vecs_list, kpts_list): Y.vecs = vecs Y.kpts = kpts imgdir = ut.truepath('/raid/work/Oxford/oxbuild_images') for Y, imgid in zip(Y_list, data_uri_order): gpath = ut.unixjoin(imgdir, imgid + '.jpg') Y.gpath = gpath for X, vecs, kpts in zip(X_list, query_vecs, query_kpts): X.kpts = kpts X.vecs = vecs # ====================== logger.info('Building inverted list') daids = [Y.aid for Y in Y_list] # wx_list = sorted(ut.list_union(*[Y.wx_list for Y in Y_list])) wx_list = sorted(set.union(*[Y.wx_set for Y in Y_list])) assert daids == data_annots.aids assert len(wx_list) <= config['num_words'] wx_to_aids = smk_funcs.invert_lists(daids, [Y.wx_list for Y in Y_list], all_wxs=wx_list) # Compute IDF weights logger.info('Compute IDF weights') ndocs_total = len(daids) # Use only the unique number of words ndocs_per_word = np.array([len(set(wx_to_aids[wx])) for wx in wx_list]) logger.info('ndocs_perword stats: ' + ut.repr4(ut.get_stats(ndocs_per_word))) idf_per_word = smk_funcs.inv_doc_freq(ndocs_total, ndocs_per_word) wx_to_weight = dict(zip(wx_list, idf_per_word)) logger.info('idf stats: ' + ut.repr4(ut.get_stats(wx_to_weight.values()))) # Filter junk Y_list_ = [Y for Y in Y_list if Y.qual != 'junk'] # ======================= # CHOOSE QUERY KERNEL # ======================= params = { 'asmk': dict(alpha=3.0, thresh=0.0), 'bow': dict(), 'bow2': dict(), } # method = 'bow' method = 'bow2' method = 'asmk' smk = SMK(wx_to_weight, method=method, **params[method]) # Specific info for the type of query if method == 'asmk': # Make residual vectors if True: # The stacked way is 50x faster # TODO: extend for multi-assignment and record fxs flat_query_vecs = np.vstack(query_vecs) flat_query_wxs = np.vstack(query_wxs) flat_query_offsets = np.array( [0] + ut.cumsum(ut.lmap(len, query_wxs))) flat_wxs_assign = flat_query_wxs flat_offsets = flat_query_offsets flat_vecs = flat_query_vecs tup = smk_funcs.compute_stacked_agg_rvecs( words, flat_wxs_assign, flat_vecs, flat_offsets) all_agg_vecs, all_error_flags, agg_offset_list = tup if int_rvec: all_agg_vecs = smk_funcs.cast_residual_integer( all_agg_vecs) agg_rvecs_list = [ all_agg_vecs[left:right] for left, right in ut.itertwo(agg_offset_list) ] agg_flags_list = [ all_error_flags[left:right] for left, right in ut.itertwo(agg_offset_list) ] for X, agg_rvecs, agg_flags in zip(X_list, agg_rvecs_list, agg_flags_list): X.agg_rvecs = agg_rvecs X.agg_flags = agg_flags[:, None] flat_wxs_assign = idx_to_wxs flat_offsets = offset_list flat_vecs = all_vecs tup = smk_funcs.compute_stacked_agg_rvecs( words, flat_wxs_assign, flat_vecs, flat_offsets) all_agg_vecs, all_error_flags, agg_offset_list = tup if int_rvec: all_agg_vecs = smk_funcs.cast_residual_integer( all_agg_vecs) agg_rvecs_list = [ all_agg_vecs[left:right] for left, right in ut.itertwo(agg_offset_list) ] agg_flags_list = [ all_error_flags[left:right] for left, right in ut.itertwo(agg_offset_list) ] for Y, agg_rvecs, agg_flags in zip(Y_list, agg_rvecs_list, agg_flags_list): Y.agg_rvecs = agg_rvecs Y.agg_flags = agg_flags[:, None] else: # This non-stacked way is about 500x slower _prog = ut.ProgPartial(label='agg Y rvecs', bs=True, adjust=True) for Y in _prog(Y_list_): make_agg_vecs(Y, words, Y.vecs) _prog = ut.ProgPartial(label='agg X rvecs', bs=True, adjust=True) for X in _prog(X_list): make_agg_vecs(X, words, X.vecs) elif method == 'bow2': # Hack for orig tf-idf bow vector nwords = len(words) for X in ut.ProgIter(X_list, label='make bow vector'): ensure_tf(X) bow_vector(X, wx_to_weight, nwords) for Y in ut.ProgIter(Y_list_, label='make bow vector'): ensure_tf(Y) bow_vector(Y, wx_to_weight, nwords) if method != 'bow2': for X in ut.ProgIter(X_list, 'compute X gamma'): X.gamma = smk.gamma(X) for Y in ut.ProgIter(Y_list_, 'compute Y gamma'): Y.gamma = smk.gamma(Y) # Execute matches (could go faster by enumerating candidates) scores_list = [] for X in ut.ProgIter(X_list, label='query %s' % (smk, )): scores = [smk.kernel(X, Y) for Y in Y_list_] scores = np.array(scores) scores = np.nan_to_num(scores) scores_list.append(scores) import sklearn.metrics avep_list = [] _iter = list(zip(scores_list, X_list)) _iter = ut.ProgIter(_iter, label='evaluate %s' % (smk, )) for scores, X in _iter: truth = [X.nid == Y.nid for Y in Y_list_] avep = sklearn.metrics.average_precision_score(truth, scores) avep_list.append(avep) avep_list = np.array(avep_list) mAP = np.mean(avep_list) logger.info('mAP = %r' % (mAP, ))
def prometheus_update(ibs, *args, **kwargs): try: with ut.Timer(verbose=False) as timer: if ibs.containerized: container_name = const.CONTAINER_NAME else: container_name = ibs.dbname global PROMETHEUS_COUNTER PROMETHEUS_COUNTER = PROMETHEUS_COUNTER + 1 # NOQA # logger.info('PROMETHEUS LIMIT %d / %d' % (PROMETHEUS_COUNTER, PROMETHEUS_LIMIT, )) if PROMETHEUS_COUNTER >= PROMETHEUS_LIMIT: PROMETHEUS_COUNTER = 0 try: PROMETHEUS_DATA['info'].info( { 'uuid': str(ibs.get_db_init_uuid()), 'dbname': ibs.dbname, 'hostname': ut.get_computer_name(), 'container': container_name, 'version': ibs.db.get_db_version(), 'containerized': str(int(ibs.containerized)), 'production': str(int(ibs.production)), } ) except Exception: pass try: if ibs.production: num_imageset_rowids = 0 num_gids = 0 num_aids = 0 num_pids = 0 num_nids = 0 num_species = 0 else: num_imageset_rowids = len(ibs._get_all_imageset_rowids()) num_gids = len(ibs._get_all_gids()) num_aids = len(ibs._get_all_aids()) num_pids = len(ibs._get_all_part_rowids()) num_nids = len(ibs._get_all_name_rowids()) num_species = len(ibs._get_all_species_rowids()) PROMETHEUS_DATA['imagesets'].labels(name=container_name).set( num_imageset_rowids ) PROMETHEUS_DATA['images'].labels(name=container_name).set(num_gids) PROMETHEUS_DATA['annotations'].labels(name=container_name).set( num_aids ) PROMETHEUS_DATA['parts'].labels(name=container_name).set(num_pids) PROMETHEUS_DATA['names'].labels(name=container_name).set(num_nids) PROMETHEUS_DATA['species'].labels(name=container_name).set( num_species ) except Exception: pass try: job_status_dict = ibs.get_job_status()['json_result'] except Exception: pass try: job_uuid_list = list(job_status_dict.keys()) status_dict_template = { 'received': 0, 'accepted': 0, 'queued': 0, 'working': 0, 'publishing': 0, 'completed': 0, 'exception': 0, 'suppressed': 0, 'corrupted': 0, '_error': 0, } status_dict = { '*': status_dict_template.copy(), 'max': status_dict_template.copy(), } endpoints = set([]) working_endpoint = None except Exception: pass for job_uuid in job_uuid_list: try: job_status = job_status_dict[job_uuid] status = job_status['status'] endpoint = job_status['endpoint'] jobcounter = job_status['jobcounter'] status = '%s' % (status,) endpoint = '%s' % (endpoint,) if status not in status_dict_template.keys(): status = '_error' if endpoint not in status_dict: status_dict[endpoint] = status_dict_template.copy() endpoints.add(endpoint) except Exception: pass try: if status in ['working']: from wbia.web.job_engine import ( calculate_timedelta, _timestamp, ) started = job_status['time_started'] now = _timestamp() ( hours, minutes, seconds, total_seconds, ) = calculate_timedelta(started, now) logger.info( 'ELAPSED (%s): %d seconds...' % (job_uuid, total_seconds) ) PROMETHEUS_DATA['elapsed'].labels( name=container_name, endpoint=endpoint ).set(total_seconds) PROMETHEUS_DATA['elapsed'].labels( name=container_name, endpoint='*' ).set(total_seconds) working_endpoint = endpoint except Exception: pass try: if status not in status_dict_template: logger.info('UNRECOGNIZED STATUS %r' % (status,)) status_dict[endpoint][status] += 1 status_dict['*'][status] += 1 current_max = status_dict['max'][status] status_dict['max'][status] = max(current_max, jobcounter) if job_uuid not in PROMETHUS_JOB_CACHE_DICT: PROMETHUS_JOB_CACHE_DICT[job_uuid] = {} except Exception: pass try: runtime_sec = job_status.get('time_runtime_sec', None) if ( runtime_sec is not None and 'runtime' not in PROMETHUS_JOB_CACHE_DICT[job_uuid] ): PROMETHUS_JOB_CACHE_DICT[job_uuid]['runtime'] = runtime_sec PROMETHEUS_DATA['runtime'].labels( name=container_name, endpoint=endpoint ).set(runtime_sec) PROMETHEUS_DATA['runtime'].labels( name=container_name, endpoint='*' ).set(runtime_sec) except Exception: pass try: turnaround_sec = job_status.get('time_turnaround_sec', None) if ( turnaround_sec is not None and 'turnaround' not in PROMETHUS_JOB_CACHE_DICT[job_uuid] ): PROMETHUS_JOB_CACHE_DICT[job_uuid][ 'turnaround' ] = turnaround_sec PROMETHEUS_DATA['turnaround'].labels( name=container_name, endpoint=endpoint ).set(turnaround_sec) PROMETHEUS_DATA['turnaround'].labels( name=container_name, endpoint='*' ).set(turnaround_sec) except Exception: pass try: if working_endpoint is None: PROMETHEUS_DATA['elapsed'].labels( name=container_name, endpoint='*' ).set(0.0) for endpoint in endpoints: if endpoint == working_endpoint: continue PROMETHEUS_DATA['elapsed'].labels( name=container_name, endpoint=endpoint ).set(0.0) except Exception: pass try: # logger.info(ut.repr3(status_dict)) for endpoint in status_dict: for status in status_dict[endpoint]: number = status_dict[endpoint][status] PROMETHEUS_DATA['engine'].labels( status=status, name=container_name, endpoint=endpoint ).set(number) except Exception: pass try: # logger.info(ut.repr3(status_dict)) process_status_dict = ibs.get_process_alive_status() for process in process_status_dict: number = 0 if process_status_dict.get(process, False) else 1 PROMETHEUS_DATA['process'].labels( process=process, name=container_name ).set(number) except Exception: pass try: PROMETHEUS_DATA['update'].labels(name=container_name).set(timer.ellapsed) except Exception: pass except Exception: pass
def load_oxford_2007(): """ Loads data from http://www.robots.ox.ac.uk:5000/~vgg/publications/2007/Philbin07/philbin07.pdf >>> from wbia.algo.smk.script_smk import * # NOQA """ from os.path import join, basename, splitext import pandas as pd import vtool as vt dbdir = ut.truepath('/raid/work/Oxford/') data_fpath0 = join(dbdir, 'data_2007.pkl') if ut.checkpath(data_fpath0): data = ut.load_data(data_fpath0) return data else: word_dpath = join(dbdir, 'word_oxc1_hesaff_sift_16M_1M') _word_fpath_list = ut.ls(word_dpath) imgid_to_word_fpath = { splitext(basename(word_fpath))[0]: word_fpath for word_fpath in _word_fpath_list } readme_fpath = join(dbdir, 'README2.txt') imgid_order = ut.readfrom(readme_fpath).split('\n')[20:-1] imgid_order = imgid_order data_uri_order = [x.replace('oxc1_', '') for x in imgid_order] imgid_to_df = {} for imgid in ut.ProgIter(imgid_order, label='reading kpts'): word_fpath = imgid_to_word_fpath[imgid] row_gen = (map(float, line.strip('\n').split(' ')) for line in ut.read_lines_from(word_fpath)[2:]) rows = [(int(word_id), x, y, e11, e12, e22) for (word_id, x, y, e11, e12, e22) in row_gen] df = pd.DataFrame( rows, columns=['word_id', 'x', 'y', 'e11', 'e12', 'e22']) imgid_to_df[imgid] = df df_list = ut.take(imgid_to_df, imgid_order) nfeat_list = [len(df_) for df_ in df_list] offset_list = [0] + ut.cumsum(nfeat_list) shape = (offset_list[-1], 128) # shape = (16334970, 128) sift_fpath = join(dbdir, 'OxfordSIFTDescriptors', 'feat_oxc1_hesaff_sift.bin') try: file_ = open(sift_fpath, 'rb') with ut.Timer('Reading SIFT binary file'): nbytes = np.prod(shape) all_vecs = np.fromstring(file_.read(nbytes), dtype=np.uint8) all_vecs = all_vecs.reshape(shape) finally: file_.close() kpts_list = [ df_.loc[:, ('x', 'y', 'e11', 'e12', 'e22')].values for df_ in df_list ] wordid_list = [df_.loc[:, 'word_id'].values for df_ in df_list] kpts_Z = np.vstack(kpts_list) idx_to_wx = np.hstack(wordid_list) # assert len(np.unique(idx_to_wx)) == 1E6 # Reqd standard query order query_files = sorted( ut.glob(dbdir + '/oxford_groundtruth', '*_query.txt')) query_uri_order = [] for qpath in query_files: text = ut.readfrom(qpath, verbose=0) query_uri = text.split(' ')[0].replace('oxc1_', '') query_uri_order.append(query_uri) logger.info('converting to invV') all_kpts = vt.convert_kptsZ_to_kpts(kpts_Z) data = { 'offset_list': offset_list, 'all_kpts': all_kpts, 'all_vecs': all_vecs, 'idx_to_wx': idx_to_wx, 'data_uri_order': data_uri_order, 'query_uri_order': query_uri_order, } ut.save_data(data_fpath0, data) return data
def get_extern_distinctiveness(qreq_, cm, **kwargs): r""" Uses distinctivness normalizer class (which uses predownloaded models) to normalize the distinctivness of a keypoint for query points. IDEA: because we have database points as well we can use the distance between normalizer of the query point and the normalizer of the database point. They should have a similar normalizer if they are a correct match AND nondistinctive. Args: qreq_ (QueryRequest): query request object with hyper-parameters cm (QueryResult): object of feature correspondences and scores Returns: tuple: (new_fsv_list, daid_list) CommandLine: python -m ibeis.algo.hots.special_query --test-get_extern_distinctiveness Example: >>> # SLOW_DOCTEST >>> from ibeis.algo.hots.special_query import * # NOQA >>> import ibeis >>> # build test data >>> ibs = ibeis.opendb('testdb1') >>> daids = ibs.get_valid_aids(species=ibeis.const.TEST_SPECIES.ZEB_PLAIN) >>> qaids = daids[0:1] >>> cfgdict = dict(codename='vsone_unnorm_dist_ratio_extern_distinctiveness') >>> qreq_ = ibs.new_query_request(qaids, daids, cfgdict=cfgdict) >>> #qreq_.lazy_load() >>> cm = ibs.query_chips(qreq_=qreq_, use_cache=False, save_qcache=False)[0] >>> # execute function >>> (new_fsv_list, daid_list) = get_extern_distinctiveness(qreq_, cm) >>> # verify results >>> assert all([fsv.shape[1] == 1 + len(cm.filtkey_list) for fsv in new_fsv_list]) >>> assert all([np.all(fsv.T[-1] >= 0) for fsv in new_fsv_list]) >>> assert all([np.all(fsv.T[-1] <= 1) for fsv in new_fsv_list]) """ dstcnvs_normer = qreq_.dstcnvs_normer assert dstcnvs_normer is not None, 'must have loaded normalizer' filtkey = hstypes.FiltKeys.DISTINCTIVENESS # make sure filter does not already exist scorex_vsone = ut.listfind(cm.filtkey_list, filtkey) assert scorex_vsone is None, 'already applied distinctivness' daid_list = list(six.iterkeys(cm.aid2_fsv)) # Find subset of features to get distinctivness of qfxs_list = [cm.aid2_fm[daid].T[0] for daid in daid_list] query_vecs = qreq_.ibs.get_annot_vecs(cm.qaid, config2_=qreq_.qparams) # there might be duplicate feature indexes in the list of feature index # lists. We can use to perform neighbor lookup more efficiently by only # performing a single query per feature index. Utool does the mapping for us def rowid_distinctivness(unique_flat_qfx_list, dstcnvs_normer=None, query_vecs=None, **kwargs): # Take only the unique vectors unique_flat_subvecs = query_vecs.take(unique_flat_qfx_list, axis=0) unique_flat_dstcvns = dstcnvs_normer.get_distinctiveness( unique_flat_subvecs, **kwargs) return unique_flat_dstcvns[:, None] aug_fsv_list = ut.unflat_unique_rowid_map(rowid_distinctivness, qfxs_list, dstcnvs_normer=dstcnvs_normer, query_vecs=query_vecs, **kwargs) if False: with ut.Timer('time1'): aug_fsv_list = ut.unflat_unique_rowid_map( rowid_distinctivness, qfxs_list, dstcnvs_normer=dstcnvs_normer, query_vecs=query_vecs) with ut.Timer('time2'): # Less efficient way to do this _vecs_list = [query_vecs.take(qfxs, axis=0) for qfxs in qfxs_list] _aug_fsv_list = [ dstcnvs_normer.get_distinctiveness(_vecs)[:, None] for _vecs in _vecs_list ] isequal_list = [ np.all(np.equal(*tup)) for tup in zip(aug_fsv_list, _aug_fsv_list) ] assert all(isequal_list), 'utool is broken' # Compute the distinctiveness as the augmenting score # ensure the shape is (X, 1) # Stack the new and augmenting scores old_fsv_list = [cm.aid2_fsv[daid] for daid in daid_list] new_fsv_list = list(map(np.hstack, zip(old_fsv_list, aug_fsv_list))) # FURTHER HACKS TO SCORING #if 'fg_power' in kwargs: for filtkey in hstypes.WEIGHT_FILTERS: key = filtkey + '_power' if key in kwargs: _power = kwargs[key] _index = ut.listfind(cm.filtkey_list, filtkey) for fsv in new_fsv_list: fsv.T[_index] **= _power #new_aid2_fsv = dict(zip(daid_list, new_fsv_list)) return new_fsv_list, daid_list
def post(self): global NETWORK_MODEL_TAG global NETWORK global NETWORK_VALUES response = {'success': False} # ut.embed() try: with ut.Timer('Pre'): parser = reqparse.RequestParser() parser.add_argument('image', type=str) parser.add_argument('config', type=dict) args = parser.parse_args() image_base64_str = args['image'] image = get_image_from_base64_str(image_base64_str) config = args['config'] model_tag = config.get('model_tag', None) num_returns = config.get('topk', 100) model_url = model_url_dict.get(model_tag, None) assert model_url is not None, 'Model tag %r is not recognized' % ( model_tag, ) if model_tag != NETWORK_MODEL_TAG: with ut.Timer('Loading network'): print('Loading network from weights %r' % (model_tag, )) values_url = model_url.replace('.pth', '.values.pth') # Download files model_filepath = ut.grab_file_url(model_url, appname='kaggle7', check_hash=True) values_filepath = ut.grab_file_url(values_url, appname='kaggle7', check_hash=True) model_values = torch.load(values_filepath) classes = model_values['classes'] num_classes = len(classes) model_weights = torch.load(model_filepath, map_location=get_device()) network_model, mutliple = make_new_network( num_classes, RING_HEADS, GEM_CONST, pretrained=False) if mutliple: pass if torch.cuda.is_available(): network_model = network_model.cuda() # model_weights = model_weights['model'] network_model.load_state_dict(model_weights) network_model.eval() NETWORK_MODEL_TAG = model_tag NETWORK = network_model NETWORK_VALUES = model_values print('Using network %r' % (NETWORK_MODEL_TAG, )) with ut.Timer('Loading input tensor'): input_image = image.convert(CMODE).convert('LA').convert(CMODE) input_image = TFRM_RESIZE(input_image) input_image = pil2tensor(input_image, np.float32) input_image = input_image.div_(255) input_image = TFRM_WHITEN(input_image) size = input_image.size() input_tensor = input_image.view(-1, size[0], size[1], size[2]) input_tensor = input_tensor.to(get_device()) # Run inference with ut.Timer('Inference'): print('Running inference on input tensor %r' % (input_tensor.size(), )) output = NETWORK(input_tensor) print('...done') preds_list, feats_list = output with ut.Timer('Post1'): print('Performing post-processing') prediction_raw = preds_list[-1][0] features_raw = TFRM_L2NORM(torch.cat(feats_list, dim=1))[0] with ut.Timer('Post2'): print('...classifier') # Post Process classification classifier_temp = NETWORK_VALUES['thresholds'][ 'classifier_softmax_temp'] classifier_prediction = torch.softmax(prediction_raw / classifier_temp, dim=0) with ut.Timer('Post3'): # Post process features print('...features') train_feats = NETWORK_VALUES['train_feats'] train_gt = NETWORK_VALUES['train_gt'] size = features_raw.size() features = features_raw.view(-1, size[0]) distance_matrix_imgs = batched_dmv(features, train_feats) distance_matrix_classes = dm2cm(distance_matrix_imgs, train_gt) features_sim = (2.0 - distance_matrix_classes) * 0.5 features_sim = features_sim[0] features_temp = NETWORK_VALUES['thresholds'][ 'feature_softmax_temp'] features_prediction = torch.softmax(features_sim / features_temp, dim=0) with ut.Timer('Post4'): print('...mixing') p = NETWORK_VALUES['thresholds']['mixing_value'] classifier_prediction = classifier_prediction.to('cpu') final_prediction = (p * classifier_prediction + (1.0 - p) * features_prediction) with ut.Timer('Collection'): print('Collecting prediction') top_k_score_list, top_k_index_list = final_prediction.topk( num_returns, 0) top_k_score_list = top_k_score_list.detach().tolist() classes = NETWORK_VALUES['classes'] top_k_class_list = ut.take(classes, top_k_index_list) response['scores'] = {} for top_k_class, top_k_score in zip(top_k_class_list, top_k_score_list): response['scores'][top_k_class] = top_k_score response['success'] = True print('...done') except Exception as ex: message = str(ex) response['message'] = message print('!!!ERROR!!!') print(response) # if torch.cuda.is_available(): # torch.cuda.empty_cache() return response
def dev_train_distinctiveness(species=None): r""" Args: ibs (IBEISController): wbia controller object species (None): CommandLine: python -m wbia.algo.hots.distinctiveness_normalizer --test-dev_train_distinctiveness alias dev_train_distinctiveness='python -m wbia.algo.hots.distinctiveness_normalizer --test-dev_train_distinctiveness' # Publishing (uses cached normalizers if available) dev_train_distinctiveness --species GZ --publish dev_train_distinctiveness --species PZ --publish dev_train_distinctiveness --species PZ --retrain Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.hots.distinctiveness_normalizer import * # NOQA >>> import wbia >>> species = ut.get_argval('--species', str, 'zebra_grevys') >>> dev_train_distinctiveness(species) """ import wbia # if 'species' not in vars() or species is None: # species = 'zebra_grevys' if species == 'zebra_grevys': dbname = 'GZ_ALL' elif species == 'zebra_plains': dbname = 'PZ_Master0' ibs = wbia.opendb(dbname) global_distinctdir = ibs.get_global_distinctiveness_modeldir() cachedir = global_distinctdir dstcnvs_normer = DistinctivnessNormalizer(species, cachedir=cachedir) try: if ut.get_argflag('--retrain'): raise IOError('force cache miss') with ut.Timer('loading distinctiveness'): dstcnvs_normer.load(cachedir) # Cache hit logger.info('distinctivness model cache hit') except IOError: logger.info('distinctivness model cache miss') with ut.Timer('training distinctiveness'): # Need to train # Add one example from each name # TODO: add one exemplar per viewpoint for each name # max_vecs = 1E6 # max_annots = 975 max_annots = 975 # ibs.fix_and_clean_database() nid_list = ibs.get_valid_nids() aids_list = ibs.get_name_aids(nid_list) # remove junk aids_list = ibs.unflat_map(ibs.filter_junk_annotations, aids_list) # remove empty aids_list = [aids for aids in aids_list if len(aids) > 0] num_annots_list = list(map(len, aids_list)) aids_list = ut.sortedby(aids_list, num_annots_list, reverse=True) # take only one annot per name aid_list = ut.get_list_column(aids_list, 0) # Keep only a certain number of annots for distinctiveness mapping aid_list_ = ut.listclip(aid_list, max_annots) logger.info('total num named annots = %r' % (sum(num_annots_list))) logger.info( 'training distinctiveness using %d/%d singleton annots' % (len(aid_list_), len(aid_list))) # vec # FIXME: qreq_ params for config rowid vecs_list = ibs.get_annot_vecs(aid_list_) num_vecs = sum(list(map(len, vecs_list))) logger.info('num_vecs = %r' % (num_vecs, )) vecs = np.vstack(vecs_list) logger.info('vecs size = %r' % (ut.get_object_size_str(vecs), )) dstcnvs_normer.init_support(vecs) dstcnvs_normer.save(global_distinctdir) if ut.get_argflag('--publish'): dstcnvs_normer.publish()
def tst_single_annot_distinctiveness_params(ibs, aid): r""" CommandLine: python -m wbia.algo.hots.distinctiveness_normalizer --test-test_single_annot_distinctiveness_params --show python -m wbia.algo.hots.distinctiveness_normalizer --test-test_single_annot_distinctiveness_params --show --db GZ_ALL Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.hots.distinctiveness_normalizer import * # NOQA >>> import wbia.plottool as pt >>> import wbia >>> # build test data >>> ibs = wbia.opendb(ut.get_argval('--db', type_=str, default='PZ_MTEST')) >>> aid = ut.get_argval('--aid', type_=int, default=1) >>> # execute function >>> test_single_annot_distinctiveness_params(ibs, aid) >>> pt.show_if_requested() """ #### # TODO: Also paramatarize the downweighting based on the keypoint size #### # HACK IN ABILITY TO SET CONFIG from wbia.init.main_commands import postload_commands from wbia.algo import Config postload_commands(ibs, None) import wbia.plottool as pt # cfglbl_list = cfgdict_list # ut.all_dict_combinations_lbls(varied_dict) # Get info to find distinctivness of species_text = ibs.get_annot_species(aid) # FIXME; qreq_ params for config rowid vecs = ibs.get_annot_vecs(aid) kpts = ibs.get_annot_kpts(aid) chip = ibs.get_annot_chips(aid) # Paramater space to search # TODO: use slicing to control the params being varied # Use GridSearch class to modify paramaters as you go. varied_dict = Config.DCVS_DEFAULT.get_varydict() logger.info('Varied Dict: ') logger.info(ut.repr2(varied_dict)) cfgdict_list, cfglbl_list = ut.make_constrained_cfg_and_lbl_list( varied_dict) # Get groundtruthish distinctivness map # for objective function # Load distinctivness normalizer with ut.Timer('Loading Distinctivness Normalizer for %s' % (species_text)): dstcvnss_normer = request_species_distinctiveness_normalizer( species_text) # Get distinctivness over all params dstncvs_list = [ dstcvnss_normer.get_distinctiveness(vecs, **cfgdict) for cfgdict in ut.ProgIter(cfgdict_list, lbl='get dstcvns') ] # fgweights = ibs.get_annot_fgweights([aid])[0] # dstncvs_list = [x * fgweights for x in dstncvs_list] fnum = 1 import functools show_func = functools.partial(show_chip_distinctiveness_plot, chip, kpts) ut.interact_gridsearch_result_images( show_func, cfgdict_list, cfglbl_list, dstncvs_list, score_list=None, fnum=fnum, figtitle='dstncvs gridsearch', ) pt.present()
def vsone_( qreq_, query_aids, data_aids, qannot_cfg, dannot_cfg, configured_obj_annots, hyper_params, ): # Do vectorized preload before constructing lazy dicts # Then make sure the lazy dicts point to this subset unique_obj_annots = list(configured_obj_annots.values()) for annots in ut.ProgIter(unique_obj_annots, 'vectorized preload'): annots.set_caching(True) annots.chip_size annots.vecs annots.kpts annots.yaw annots.qual annots.gps annots.time if qreq_.qparams.featweight_enabled: annots.fgweights # annots._internal_attrs.clear() # Make convinient lazy dict representations (after loading pre info) configured_lazy_annots = ut.ddict(dict) for config, annots in configured_obj_annots.items(): annot_dict = configured_lazy_annots[config] for _annot in ut.ProgIter(annots.scalars(), label='make lazy dict'): annot = _annot._make_lazy_dict() annot_dict[_annot.aid] = annot unique_lazy_annots = ut.flatten([x.values() for x in configured_lazy_annots.values()]) flann_params = {'algorithm': 'kdtree', 'trees': 4} for annot in ut.ProgIter(unique_lazy_annots, label='lazy flann'): vt.matching.ensure_metadata_flann(annot, flann_params) vt.matching.ensure_metadata_normxy(annot) for annot in ut.ProgIter(unique_lazy_annots, 'preload kpts'): annot['kpts'] for annot in ut.ProgIter(unique_lazy_annots, 'preload normxy'): annot['norm_xys'] for annot in ut.ProgIter(unique_lazy_annots, 'preload vecs'): annot['vecs'] # Extract pairs of annot objects (with shared caches) lazy_annots1 = ut.take(configured_lazy_annots[qannot_cfg], query_aids) lazy_annots2 = ut.take(configured_lazy_annots[dannot_cfg], data_aids) # TODO: param search over grid # 'use_sv': [0, 1], # 'use_fg': [0, 1], # 'use_ratio_test': [0, 1], matches_RAT = [ vt.PairwiseMatch(annot1, annot2) for annot1, annot2 in zip(lazy_annots1, lazy_annots2) ] # Construct global measurements global_keys = ['yaw', 'qual', 'gps', 'time'] for match in ut.ProgIter(matches_RAT, label='setup globals'): match.add_global_measures(global_keys) # Preload flann for only specific annots for match in ut.ProgIter(matches_RAT, label='preload FLANN'): match.annot1['flann'] cfgdict = hyper_params.vsone_assign # Find one-vs-one matches # cfgdict = {'checks': 20, 'symmetric': False} for match in ut.ProgIter(matches_RAT, label='assign vsone'): match.assign(cfgdict=cfgdict) # gridsearch_ratio_thresh() # vt.matching.gridsearch_match_operation(matches_RAT, 'apply_ratio_test', { # 'ratio_thresh': np.linspace(.6, .7, 50) # }) for match in ut.ProgIter(matches_RAT, label='apply ratio thresh'): match.apply_ratio_test({'ratio_thresh': 0.638}, inplace=True) # TODO gridsearch over sv params # vt.matching.gridsearch_match_operation(matches_RAT, 'apply_sver', { # 'xy_thresh': np.linspace(0, 1, 3) # }) matches_RAT_SV = [ match.apply_sver(inplace=True) for match in ut.ProgIter(matches_RAT, label='sver') ] # Add keypoint spatial information to local features for match in matches_RAT_SV: match.add_local_measures() # key_ = 'norm_xys' # norm_xy1 = match.annot1[key_].take(match.fm.T[0], axis=1) # norm_xy2 = match.annot2[key_].take(match.fm.T[1], axis=1) # match.local_measures['norm_x1'] = norm_xy1[0] # match.local_measures['norm_y1'] = norm_xy1[1] # match.local_measures['norm_x2'] = norm_xy2[0] # match.local_measures['norm_y2'] = norm_xy2[1] # match.local_measures['scale1'] = vt.get_scales( # match.annot1['kpts'].take(match.fm.T[0], axis=0)) # match.local_measures['scale2'] = vt.get_scales( # match.annot2['kpts'].take(match.fm.T[1], axis=0)) # Create another version where we find global normalizers for the data # qreq_.load_indexer() # matches_SV_LNBNN = batch_apply_lnbnn(matches_RAT_SV, qreq_, inplace=True) # if 'weight' in cfgdict: # for match in matches_SV_LNBNN[::-1]: # lnbnn_dist = match.local_measures['lnbnn'] # ndist = match.local_measures['lnbnn_norm_dist'] # weights = match.local_measures[cfgdict['weight']] # match.local_measures['weighted_lnbnn'] = weights * lnbnn_dist # match.local_measures['weighted_lnbnn_norm_dist'] = weights * ndist # match.fs = match.local_measures['weighted_lnbnn'] cached_data = { # 'RAT': matches_RAT, 'RAT_SV': matches_RAT_SV, # 'SV_LNBNN': matches_SV_LNBNN, } return cached_data from sklearn.metrics.classification import coo_matrix def quick_cm(y_true, y_pred, labels, sample_weight): n_labels = len(labels) C = coo_matrix( (sample_weight, (y_true, y_pred)), shape=(n_labels, n_labels) ).toarray() return C def quick_mcc(C): """ assumes y_true and y_pred are in index/encoded format """ t_sum = C.sum(axis=1) p_sum = C.sum(axis=0) n_correct = np.diag(C).sum() n_samples = p_sum.sum() cov_ytyp = n_correct * n_samples - np.dot(t_sum, p_sum) cov_ypyp = n_samples ** 2 - np.dot(p_sum, p_sum) cov_ytyt = n_samples ** 2 - np.dot(t_sum, t_sum) mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp) return mcc def mcc_hack(): sample_weight = np.ones(len(self.samples), dtype=np.int) task_mccs = ut.ddict(dict) # Determine threshold levels per score type score_to_order = {} for scoretype in score_dict.keys(): y_score = score_dict[scoretype].values sortx = np.argsort(y_score, kind='mergesort')[::-1] y_score = y_score[sortx] distinct_value_indices = np.where(np.diff(y_score))[0] threshold_idxs = np.r_[distinct_value_indices, y_score.size - 1] thresh = y_score[threshold_idxs] score_to_order[scoretype] = (sortx, y_score, thresh) classes_ = np.array([0, 1], dtype=np.int) for task in task_list: labels = self.samples.subtasks[task] for sublabels in labels.gen_one_vs_rest_labels(): for scoretype in score_dict.keys(): sortx, y_score, thresh = score_to_order[scoretype] y_true = sublabels.y_enc[sortx] mcc = -np.inf for t in thresh: y_pred = (y_score > t).astype(np.int) C1 = quick_cm(y_true, y_pred, classes_, sample_weight) mcc1 = quick_mcc(C1) if mcc1 < 0: C2 = quick_cm(y_true, 1 - y_pred, classes_, sample_weight) mcc1 = quick_mcc(C2) mcc = max(mcc1, mcc) # logger.info('mcc = %r' % (mcc,)) task_mccs[sublabels.task_name][scoretype] = mcc return task_mccs if 0: with ut.Timer('mcc'): task_mccs = mcc_hack() logger.info('\nMCC of simple scoring measures:') df = pd.DataFrame.from_dict(task_mccs, orient='index') from utool.experimental.pandas_highlight import to_string_monkey logger.info(to_string_monkey(df, highlight_cols=np.arange(len(df.columns))))