Esempio n. 1
0
    def _setup_links(self, cfg_prefix, config=None):
        """
        Called only when setting up an experiment to make a measurement.

        Creates symlinks such that all data is written to a directory that
        depends on a computer name, cfg_prefix and an arbitrary configuration
        dict.

        Then force the link in the basic directory to point to abs_dpath.
        """
        # Setup directory
        from os.path import expanduser
        assert self.dname is not None

        computer_id = ut.get_argval('--comp', default=ut.get_computer_name())

        conf_dpath = ut.ensuredir((expanduser(self.base_dpath), 'configured'))
        comp_dpath = ut.ensuredir((join(conf_dpath, computer_id)))

        link_dpath = ut.ensuredir((self.base_dpath, 'link'))

        # if True:
        #     # move to new system
        #     old_dpath = join(conf_dpath, self.dbname + '_' + computer_id)
        #     if exists(old_dpath):
        #         ut.move(old_dpath, join(comp_dpath, self.dbname))

        try:
            cfgstr = ut.repr3(config.getstate_todict_recursive())
        except AttributeError:
            cfgstr = ut.repr3(config)

        hashid = ut.hash_data(cfgstr)[0:6]
        suffix = '_'.join([cfg_prefix, hashid])
        dbcode = self.dbname + '_' + suffix

        abs_dpath = ut.ensuredir(join(comp_dpath, dbcode))

        self.dname = dbcode
        self.dpath = abs_dpath
        self.abs_dpath = abs_dpath

        # Place a basic link in the base link directory
        links = []
        links.append(expanduser(join(link_dpath, self.dbname)))
        # # Make a configured but computer agnostic link
        # links.append(expanduser(join(conf_dpath, self.dbname)))

        for link in links:
            try:
                # Overwrite any existing link so the most recently used is
                # the default
                self.link = ut.symlink(abs_dpath, link, overwrite=True)
            except Exception:
                if exists(abs_dpath):
                    newpath = ut.non_existing_path(abs_dpath, suffix='_old')
                    ut.move(link, newpath)
                    self.link = ut.symlink(abs_dpath, link)

        ut.writeto(join(abs_dpath, 'info.txt'), cfgstr)
Esempio n. 2
0
    def _make_deploy_metadata(self, task_key=None):
        pblm = self.pblm
        if pblm.samples is None:
            pblm.setup()

        if task_key is None:
            task_key = pblm.primary_task_key

        # task_keys = list(pblm.samples.supported_tasks())
        clf_key = pblm.default_clf_key
        data_key = pblm.default_data_key

        # Save the classifie
        data_info = pblm.feat_extract_info[data_key]
        feat_extract_config, feat_dims = data_info

        samples = pblm.samples
        labels = samples.subtasks[task_key]

        edge_hashid = samples.edge_set_hashid()
        label_hashid = samples.task_label_hashid(task_key)
        tasksamp_hashid = samples.task_sample_hashid(task_key)

        annot_hashid = ut.hashid_arr(samples._unique_annots.visual_uuids,
                                     'annots')

        # species = pblm.infr.ibs.get_primary_database_species(
        #     samples._unique_annots.aid)
        species = '+'.join(sorted(set(samples._unique_annots.species)))

        metadata = {
            'tasksamp_hashid': tasksamp_hashid,
            'edge_hashid': edge_hashid,
            'label_hashid': label_hashid,
            'annot_hashid': annot_hashid,
            'class_hist': labels.make_histogram(),
            'class_names': labels.class_names,
            'data_info': data_info,
            'task_key': task_key,
            'species': species,
            'data_key': data_key,
            'clf_key': clf_key,
            'n_dims': len(feat_dims),
            # 'aid_pairs': samples.aid_pairs,
        }

        meta_cfgstr = ut.repr2(metadata, kvsep=':', itemsep='', si=True)
        hashid = ut.hash_data(meta_cfgstr)[0:16]

        deploy_fname = self.fname_fmtstr.format(hashid=hashid, **
                                                metadata) + '.cPkl'

        deploy_metadata = metadata.copy()
        deploy_metadata['hashid'] = hashid
        deploy_metadata['fname'] = deploy_fname
        return deploy_metadata, deploy_fname
Esempio n. 3
0
def vim_grep(pat, mode='normal', hashid=None):
    import vim
    import utool as ut
    ut.ENABLE_COLORS = False
    ut.util_str.ENABLE_COLORS = False
    if hashid is None:
        hashid = ut.hash_data(pat)
    print('Grepping for pattern = %r' % (pat,))
    import os

    def _grep_dpath(dpath):
        grep_tup = ut.grep([pat], dpath_list=[dpath],
                           exclude_patterns=['*.pyc'], verbose=False)
        reflags = 0
        (found_fpath_list, found_lines_list, found_lxs_list) = grep_tup
        regex_list = [pat]
        _exprs_flags = [ut.util_regex.extend_regex2(expr, reflags)
                        for expr in regex_list]
        extended_regex_list = ut.take_column(_exprs_flags, 0)
        grep_result = ut.GrepResult(found_fpath_list, found_lines_list,
                                    found_lxs_list, extended_regex_list,
                                    reflags=reflags)
        text = '\n'.join([
            'Greping Directory "{}"'.format(dpath),
            'tofind_list={}'.format(ut.repr2(extended_regex_list)),
            grep_result.make_resultstr(colored=False),
            '=============',
            'found_fpath_list = {}'.format(ut.repr2(found_fpath_list, nl=1))
        ])
        return text

    if mode == 'normal':
        text = _grep_dpath(os.getcwd())
    elif mode == 'repo':
        for path in ut.ancestor_paths(limit={'~/code', '~'}):
            if exists(join(path, '.git')):
                break
        text = _grep_dpath(path)
    elif mode == 'project':
        msg_list = ut.grep_projects([pat], verbose=False, colored=False)
        text = '\n'.join(msg_list)
    else:
        raise KeyError('unknown pyvim_funcs.vim_grep mode={}'.format(mode))

    fname = 'tmp_grep_' + hashid + '.txt'
    dpath = ut.ensure_app_cache_dir('pyvim_funcs')
    fpath = join(dpath, fname)

    # Display the text in a new vim split
    open_fpath(fpath=fpath, mode='new')
    overwrite_text(text)
    vim.command(":exec ':w'")
def predict_k_neigh(db_emb,
                    db_lbls,
                    test_emb,
                    k=5,
                    f=None,
                    nearest_neighbors_cache_path=None):
    """Predict k nearest solutions for test embeddings based on labelled database embeddings.
    Input:
    db_emb: 2D float array (num_emb, emb_size): database embeddings
    db_lbls: 1D array, string or floats: database labels
    test_emb: 2D float array: test embeddings
    k: integer, number of predictions.

    Returns:
    neigh_lbl_un - 2d int array of shape [len(test_emb), k] labels of predictions
    neigh_ind_un - 2d int array of shape [len(test_emb), k] labels of indices of nearest points
    neigh_dist_un - 2d float array of shape [len(test_emb), k] distances of predictions
    """
    # Set number of nearest points (with duplicated labels)

    k_w_dupl = min(50, len(db_emb))

    if nearest_neighbors_cache_path is None:
        cache_filepath = None
    else:
        import utool as ut
        from six.moves import cPickle as pickle  # NOQA

        assert os.path.exists(nearest_neighbors_cache_path)

        db_emb_hash = list(map(ut.hash_data, db_emb))
        db_data = list(zip(db_emb_hash, db_lbls))
        db_data = sorted(db_data)
        db_data = '%s' % (db_data, )
        db_data_hash = ut.hash_data(db_data)
        args = (
            len(db_emb),
            db_data_hash,
            k_w_dupl,
        )
        cache_filename = 'pie-kneigh-num-%d-hash-%s-k-%d.cPkl' % args
        cache_filepath = os.path.join(nearest_neighbors_cache_path,
                                      cache_filename)

    nn_classifier = None
    if cache_filepath is not None and os.path.exists(cache_filepath):
        print('[pie] Found existing K Nearest Neighbors cache at: %r' %
              (cache_filepath, ))
        try:
            with open(cache_filepath, 'rb') as pickle_file:
                nn_classifier = pickle.load(pickle_file)
            print('[pie] pie cache loaded!')
        except Exception:
            print('[pie] pie cache failed to load!')
            nn_classifier = None

    if nn_classifier is None:
        nn_classifier = NearestNeighbors(n_neighbors=k_w_dupl,
                                         metric='euclidean')
        nn_classifier.fit(db_emb, db_lbls)

    if cache_filepath is not None and not os.path.exists(cache_filepath):
        assert nn_classifier is not None
        print('[pie] Creating new K Nearest Neighbors cache at: %r' %
              (cache_filepath, ))
        with open(cache_filepath, 'wb') as pickle_file:
            pickle.dump(nn_classifier, pickle_file)
        print('[pie] pie cache saved!')

    # Predict nearest neighbors and distances for test embeddings
    neigh_dist, neigh_ind = nn_classifier.kneighbors(test_emb)

    # Get labels of nearest neighbors
    neigh_lbl = np.zeros(shape=neigh_ind.shape, dtype=db_lbls.dtype)
    for i, preds in enumerate(neigh_ind):
        for j, pred in enumerate(preds):
            neigh_lbl[i, j] = db_lbls[pred]

    # Remove duplicates
    neigh_lbl_un = []
    neigh_ind_un = []
    neigh_dist_un = []

    for j in range(neigh_lbl.shape[0]):
        indices = np.arange(0, len(neigh_lbl[j]))
        a, b = rem_dupl(neigh_lbl[j], indices)
        neigh_lbl_un.append(a[:k])
        neigh_ind_un.append(neigh_ind[j][b][:k].tolist())
        neigh_dist_un.append(neigh_dist[j][b][:k].tolist())

    return neigh_lbl_un, neigh_ind_un, neigh_dist_un