def _setup_links(self, cfg_prefix, config=None): """ Called only when setting up an experiment to make a measurement. Creates symlinks such that all data is written to a directory that depends on a computer name, cfg_prefix and an arbitrary configuration dict. Then force the link in the basic directory to point to abs_dpath. """ # Setup directory from os.path import expanduser assert self.dname is not None computer_id = ut.get_argval('--comp', default=ut.get_computer_name()) conf_dpath = ut.ensuredir((expanduser(self.base_dpath), 'configured')) comp_dpath = ut.ensuredir((join(conf_dpath, computer_id))) link_dpath = ut.ensuredir((self.base_dpath, 'link')) # if True: # # move to new system # old_dpath = join(conf_dpath, self.dbname + '_' + computer_id) # if exists(old_dpath): # ut.move(old_dpath, join(comp_dpath, self.dbname)) try: cfgstr = ut.repr3(config.getstate_todict_recursive()) except AttributeError: cfgstr = ut.repr3(config) hashid = ut.hash_data(cfgstr)[0:6] suffix = '_'.join([cfg_prefix, hashid]) dbcode = self.dbname + '_' + suffix abs_dpath = ut.ensuredir(join(comp_dpath, dbcode)) self.dname = dbcode self.dpath = abs_dpath self.abs_dpath = abs_dpath # Place a basic link in the base link directory links = [] links.append(expanduser(join(link_dpath, self.dbname))) # # Make a configured but computer agnostic link # links.append(expanduser(join(conf_dpath, self.dbname))) for link in links: try: # Overwrite any existing link so the most recently used is # the default self.link = ut.symlink(abs_dpath, link, overwrite=True) except Exception: if exists(abs_dpath): newpath = ut.non_existing_path(abs_dpath, suffix='_old') ut.move(link, newpath) self.link = ut.symlink(abs_dpath, link) ut.writeto(join(abs_dpath, 'info.txt'), cfgstr)
def _make_deploy_metadata(self, task_key=None): pblm = self.pblm if pblm.samples is None: pblm.setup() if task_key is None: task_key = pblm.primary_task_key # task_keys = list(pblm.samples.supported_tasks()) clf_key = pblm.default_clf_key data_key = pblm.default_data_key # Save the classifie data_info = pblm.feat_extract_info[data_key] feat_extract_config, feat_dims = data_info samples = pblm.samples labels = samples.subtasks[task_key] edge_hashid = samples.edge_set_hashid() label_hashid = samples.task_label_hashid(task_key) tasksamp_hashid = samples.task_sample_hashid(task_key) annot_hashid = ut.hashid_arr(samples._unique_annots.visual_uuids, 'annots') # species = pblm.infr.ibs.get_primary_database_species( # samples._unique_annots.aid) species = '+'.join(sorted(set(samples._unique_annots.species))) metadata = { 'tasksamp_hashid': tasksamp_hashid, 'edge_hashid': edge_hashid, 'label_hashid': label_hashid, 'annot_hashid': annot_hashid, 'class_hist': labels.make_histogram(), 'class_names': labels.class_names, 'data_info': data_info, 'task_key': task_key, 'species': species, 'data_key': data_key, 'clf_key': clf_key, 'n_dims': len(feat_dims), # 'aid_pairs': samples.aid_pairs, } meta_cfgstr = ut.repr2(metadata, kvsep=':', itemsep='', si=True) hashid = ut.hash_data(meta_cfgstr)[0:16] deploy_fname = self.fname_fmtstr.format(hashid=hashid, ** metadata) + '.cPkl' deploy_metadata = metadata.copy() deploy_metadata['hashid'] = hashid deploy_metadata['fname'] = deploy_fname return deploy_metadata, deploy_fname
def vim_grep(pat, mode='normal', hashid=None): import vim import utool as ut ut.ENABLE_COLORS = False ut.util_str.ENABLE_COLORS = False if hashid is None: hashid = ut.hash_data(pat) print('Grepping for pattern = %r' % (pat,)) import os def _grep_dpath(dpath): grep_tup = ut.grep([pat], dpath_list=[dpath], exclude_patterns=['*.pyc'], verbose=False) reflags = 0 (found_fpath_list, found_lines_list, found_lxs_list) = grep_tup regex_list = [pat] _exprs_flags = [ut.util_regex.extend_regex2(expr, reflags) for expr in regex_list] extended_regex_list = ut.take_column(_exprs_flags, 0) grep_result = ut.GrepResult(found_fpath_list, found_lines_list, found_lxs_list, extended_regex_list, reflags=reflags) text = '\n'.join([ 'Greping Directory "{}"'.format(dpath), 'tofind_list={}'.format(ut.repr2(extended_regex_list)), grep_result.make_resultstr(colored=False), '=============', 'found_fpath_list = {}'.format(ut.repr2(found_fpath_list, nl=1)) ]) return text if mode == 'normal': text = _grep_dpath(os.getcwd()) elif mode == 'repo': for path in ut.ancestor_paths(limit={'~/code', '~'}): if exists(join(path, '.git')): break text = _grep_dpath(path) elif mode == 'project': msg_list = ut.grep_projects([pat], verbose=False, colored=False) text = '\n'.join(msg_list) else: raise KeyError('unknown pyvim_funcs.vim_grep mode={}'.format(mode)) fname = 'tmp_grep_' + hashid + '.txt' dpath = ut.ensure_app_cache_dir('pyvim_funcs') fpath = join(dpath, fname) # Display the text in a new vim split open_fpath(fpath=fpath, mode='new') overwrite_text(text) vim.command(":exec ':w'")
def predict_k_neigh(db_emb, db_lbls, test_emb, k=5, f=None, nearest_neighbors_cache_path=None): """Predict k nearest solutions for test embeddings based on labelled database embeddings. Input: db_emb: 2D float array (num_emb, emb_size): database embeddings db_lbls: 1D array, string or floats: database labels test_emb: 2D float array: test embeddings k: integer, number of predictions. Returns: neigh_lbl_un - 2d int array of shape [len(test_emb), k] labels of predictions neigh_ind_un - 2d int array of shape [len(test_emb), k] labels of indices of nearest points neigh_dist_un - 2d float array of shape [len(test_emb), k] distances of predictions """ # Set number of nearest points (with duplicated labels) k_w_dupl = min(50, len(db_emb)) if nearest_neighbors_cache_path is None: cache_filepath = None else: import utool as ut from six.moves import cPickle as pickle # NOQA assert os.path.exists(nearest_neighbors_cache_path) db_emb_hash = list(map(ut.hash_data, db_emb)) db_data = list(zip(db_emb_hash, db_lbls)) db_data = sorted(db_data) db_data = '%s' % (db_data, ) db_data_hash = ut.hash_data(db_data) args = ( len(db_emb), db_data_hash, k_w_dupl, ) cache_filename = 'pie-kneigh-num-%d-hash-%s-k-%d.cPkl' % args cache_filepath = os.path.join(nearest_neighbors_cache_path, cache_filename) nn_classifier = None if cache_filepath is not None and os.path.exists(cache_filepath): print('[pie] Found existing K Nearest Neighbors cache at: %r' % (cache_filepath, )) try: with open(cache_filepath, 'rb') as pickle_file: nn_classifier = pickle.load(pickle_file) print('[pie] pie cache loaded!') except Exception: print('[pie] pie cache failed to load!') nn_classifier = None if nn_classifier is None: nn_classifier = NearestNeighbors(n_neighbors=k_w_dupl, metric='euclidean') nn_classifier.fit(db_emb, db_lbls) if cache_filepath is not None and not os.path.exists(cache_filepath): assert nn_classifier is not None print('[pie] Creating new K Nearest Neighbors cache at: %r' % (cache_filepath, )) with open(cache_filepath, 'wb') as pickle_file: pickle.dump(nn_classifier, pickle_file) print('[pie] pie cache saved!') # Predict nearest neighbors and distances for test embeddings neigh_dist, neigh_ind = nn_classifier.kneighbors(test_emb) # Get labels of nearest neighbors neigh_lbl = np.zeros(shape=neigh_ind.shape, dtype=db_lbls.dtype) for i, preds in enumerate(neigh_ind): for j, pred in enumerate(preds): neigh_lbl[i, j] = db_lbls[pred] # Remove duplicates neigh_lbl_un = [] neigh_ind_un = [] neigh_dist_un = [] for j in range(neigh_lbl.shape[0]): indices = np.arange(0, len(neigh_lbl[j])) a, b = rem_dupl(neigh_lbl[j], indices) neigh_lbl_un.append(a[:k]) neigh_ind_un.append(neigh_ind[j][b][:k].tolist()) neigh_dist_un.append(neigh_dist[j][b][:k].tolist()) return neigh_lbl_un, neigh_ind_un, neigh_dist_un