Ejemplo n.º 1
0
    def forest(rf, tree_path_list, **kwargs):
        """
            Create the forest object by loading a list of tree paths.

            Args:
                tree_path_list (list of str): list of tree paths as strings
                serial (bool, optional): flag to signify if to load the forest in serial;
                    defaults to False
                verbose (bool, optional): verbose flag; defaults to object's verbose or
                    selectively enabled for this function

            Returns:
                forest (object): the forest object of the loaded trees
        """
        # Default values
        params = odict([
            ('serial',                       False),
            ('verbose',                      rf.verbose),
            ('quiet',                        rf.quiet),
        ])
        #params.update(kwargs)
        ut.update_existing(params, kwargs)

        # Data integrity
        assert len(tree_path_list) > 0, \
            'Must specify at least one tree path to load'
        assert all( [ exists(tree_path) for tree_path in tree_path_list ] ), \
            'At least one specified tree path does not exist'

        params_list = [
            _cast_list_to_c(ensure_bytes_strings(tree_path_list), C_CHAR),
            len(tree_path_list),
        ] + list(params.values())
        return RF_CLIB.forest(rf.detector_c_obj, *params_list)
Ejemplo n.º 2
0
    def train(dark, voc_path, weight_path, **kwargs):
        """
            Train a new forest with the given positive chips and negative chips.

            Args:
                train_pos_chip_path_list (list of str): list of positive training chips
                train_neg_chip_path_list (list of str): list of negative training chips
                trees_path (str): string path of where the newly trained trees are to be saved

            Kwargs:
                chips_norm_width (int, optional): Chip normalization width for resizing;
                    the chip is resized to have a width of chips_norm_width and
                    whatever resulting height in order to best match the original
                    aspect ratio; defaults to 128

                    If both chips_norm_width and chips_norm_height are specified,
                    the original aspect ratio of the chip is not respected
                chips_norm_height (int, optional): Chip normalization height for resizing;
                    the chip is resized to have a height of chips_norm_height and
                    whatever resulting width in order to best match the original
                    aspect ratio; defaults to None

                    If both chips_norm_width and chips_norm_height are specified,
                    the original aspect ratio of the chip is not respected
                verbose (bool, optional): verbose flag; defaults to object's verbose or
                    selectively enabled for this function

            Returns:
                None
        """
        # Default values
        params = odict(
            [
                ("weight_filepath", None),  # This value always gets overwritten
                ("verbose", dark.verbose),
                ("quiet", dark.quiet),
            ]
        )
        # params.update(kwargs)
        ut.update_existing(params, kwargs)

        # Make the tree path absolute
        weight_path = abspath(weight_path)
        ut.ensuredir(weight_path)

        # Setup training files and folder structures
        results = dark._train_setup(voc_path, weight_path)
        manifest_filename, num_images, config_filepath, class_filepath = results

        # Run training algorithm
        params_list = [dark.net, manifest_filename, weight_path, num_images] + list(params.values())
        DARKNET_CLIB.train(*params_list)
        weight_filepath = params["weight_filepath"]

        if not params["quiet"]:
            print("\n\n[pydarknet py] *************************************")
            print("[pydarknet py] Training Completed")
            print("[pydarknet py] Weight file saved to: %s" % (weight_filepath,))
        return weight_filepath, config_filepath, class_filepath
Ejemplo n.º 3
0
    def get_cfgstr(nnindexer, noquery=False):
        r""" returns string which uniquely identified configuration and support data

        Args:
            noquery (bool): if True cfgstr is only relevant to building the
                index. No search params are returned (default = False)

        Returns:
            str: flann_cfgstr

        CommandLine:
            python -m wbia.algo.hots.neighbor_index --test-get_cfgstr

        Example:
            >>> # DISABLE_DOCTEST
            >>> from wbia.algo.hots.neighbor_index import *  # NOQA
            >>> import wbia
            >>> cfgdict = dict(fg_on=False)
            >>> qreq_ = wbia.testdata_qreq_(defaultdb='testdb1', p='default:fg_on=False')
            >>> qreq_.load_indexer()
            >>> nnindexer = qreq_.indexer
            >>> noquery = True
            >>> flann_cfgstr = nnindexer.get_cfgstr(noquery)
            >>> result = ('flann_cfgstr = %s' % (str(flann_cfgstr),))
            >>> print(result)
            flann_cfgstr = _FLANN((algo=kdtree,seed=42,t=8,))_VECS((11260,128)gj5nea@ni0%f3aja)
        """
        flann_cfgstr_list = []
        use_params_hash = True
        use_data_hash = True
        if use_params_hash:
            flann_defaults = vt.get_flann_params(
                nnindexer.flann_params['algorithm'])
            # flann_params_clean = flann_defaults.copy()
            flann_params_clean = ut.sort_dict(flann_defaults)
            ut.update_existing(flann_params_clean, nnindexer.flann_params)
            if noquery:
                ut.delete_dict_keys(flann_params_clean, ['checks'])
            shortnames = dict(algorithm='algo',
                              checks='chks',
                              random_seed='seed',
                              trees='t')
            short_params = ut.odict([
                (shortnames.get(key, key), str(val)[0:7])
                for key, val in six.iteritems(flann_params_clean)
            ])
            flann_valsig_ = ut.repr2(short_params,
                                     nl=False,
                                     explicit=True,
                                     strvals=True)
            flann_valsig_ = flann_valsig_.lstrip('dict').replace(' ', '')
            # flann_valsig_ = str(list(flann_params.values()))
            # flann_valsig = ut.remove_chars(flann_valsig_, ', \'[]')
            flann_cfgstr_list.append('_FLANN(' + flann_valsig_ + ')')
        if use_data_hash:
            vecs_hashstr = ut.hashstr_arr(nnindexer.idx2_vec, '_VECS')
            flann_cfgstr_list.append(vecs_hashstr)
        flann_cfgstr = ''.join(flann_cfgstr_list)
        return flann_cfgstr
Ejemplo n.º 4
0
def generate_and_make(repo_dpath, **kwargs):
    import utool as ut

    cmake_vars = {
        # build with
        'fletch_BUILD_WITH_PYTHON': True,
        'fletch_BUILD_WITH_MATLAB': False,
        'fletch_BUILD_WITH_CUDA': False,
        'fletch_BUILD_WITH_CUDNN': False,
        # select version
        'OpenCV_SELECT_VERSION': '3.1.0',
        'VTK_SELECT_VERSION': '6.2.0',
        'fletch_PYTHON_VERSION': sys.version[0:3],
        'PYTHON_EXECUTABLE': sys.executable,
    }
    ut.update_existing(cmake_vars, kwargs)

    DISABLED_LIBS = [  # NOQA
        'ITK',
    ]

    VTK_LIBS = [
        'VTK',
        'TinyXML',
        'libxml2',
        'Qt',
    ]

    ENABLED_LIBS = [
        'Boost', 'Caffe', 'Ceres', 'Eigen', 'FFmpeg', 'GeographicLib',
        'GFlags', 'GLog', 'HDF5', 'jom', 'LevelDB', 'libjpeg-turbo', 'libjson',
        'libkml', 'libtiff',  'LMDB', 'log4cplus', 'OpenBLAS', 'OpenCV',
        'OpenCV_contrib', 'PNG', 'PROJ4', 'Protobuf', 'shapelib', 'Snappy',
        'SuiteSparse', 'VXL', 'yasm', 'ZLib',
    ] + VTK_LIBS

    lines = ['cmake -G "Unix Makefiles" -D CMAKE_BUILD_TYPE=RELEASE']
    lines += ['-D fletch_ENABLE_{}=True'.format(lib) for lib in ENABLED_LIBS]
    lines += ['-D {}={}'.format(key, val) for key, val in cmake_vars.items()]
    lines += [repo_dpath]

    command = ' '.join(lines)
    print(command)

    if False:
        # import utool as ut
        # cmake_retcode = ut.cmd2(command, verbose=True)['ret']
        cmake_retcode = os.system(command)

        if cmake_retcode == 0:
            os.system('make -j9')
Ejemplo n.º 5
0
 def parse_cfgstr_list2(cfgstr_list, named_dcfgs_dict, cfgtype=None, alias_keys=None):
     """
     Parse a genetic cfgstr --flag name1:custom_args1 name2:custom_args2
     """
     cfg_list = []
     for cfgstr in cfgstr_list:
         cfgstr_split = cfgstr.split(':')
         cfgname = cfgstr_split[0]
         cfg = named_dcfgs_dict[cfgname].copy()
         # Parse dict out of a string
         if len(cfgstr_split) > 1:
             cfgstr_options =  ':'.join(cfgstr_split[1:]).split(',')
             cfg_options = ut.parse_cfgstr_list(cfgstr_options, smartcast=True, oldmode=False)
         else:
             cfg_options = {}
         # Hack for q/d specific configs
         if cfgtype is not None:
             for key in list(cfg_options.keys()):
                 # check if key is nonstandard
                 if not (key in cfg or key in alias_keys):
                     # does removing prefix make it stanard?
                     prefix = cfgtype[0]
                     if key.startswith(prefix):
                         key_ = key[len(prefix):]
                         if key_ in cfg or key_ in alias_keys:
                             # remove prefix
                             cfg_options[key_] = cfg_options[key]
                     try:
                         assert key[1:] in cfg or key[1:] in alias_keys, 'key=%r, key[1:] =%r' % (key, key[1:] )
                     except AssertionError as ex:
                         ut.printex(ex, 'error', keys=['key', 'cfg', 'alias_keys'])
                         raise
                     del cfg_options[key]
         # Remap keynames based on aliases
         if alias_keys is not None:
             for key in alias_keys.keys():
                 if key in cfg_options:
                     # use standard new key
                     cfg_options[alias_keys[key]] = cfg_options[key]
                     # remove old alised key
                     del cfg_options[key]
         # Finalize configuration dict
         cfg = ut.update_existing(cfg, cfg_options, copy=True, assert_exists=True)
         cfg['_cfgtype'] = cfgtype
         cfg['_cfgname'] = cfgname
         cfg['_cfgstr'] = cfgstr
         cfg_list.append((cfgname, cfg))
         break  # FIXME: do more than one eventually
     return cfg
Ejemplo n.º 6
0
 def parse_cfgstr_list2(cfgstr_list, named_dcfgs_dict, cfgtype=None, alias_keys=None):
     """
     Parse a genetic cfgstr --flag name1:custom_args1 name2:custom_args2
     """
     cfg_list = []
     for cfgstr in cfgstr_list:
         cfgstr_split = cfgstr.split(':')
         cfgname = cfgstr_split[0]
         cfg = named_dcfgs_dict[cfgname].copy()
         # Parse dict out of a string
         if len(cfgstr_split) > 1:
             cfgstr_options =  ':'.join(cfgstr_split[1:]).split(',')
             cfg_options = ut.parse_cfgstr_list(cfgstr_options, smartcast=True, oldmode=False)
         else:
             cfg_options = {}
         # Hack for q/d specific configs
         if cfgtype is not None:
             for key in list(cfg_options.keys()):
                 # check if key is nonstandard
                 if not (key in cfg or key in alias_keys):
                     # does removing prefix make it stanard?
                     prefix = cfgtype[0]
                     if key.startswith(prefix):
                         key_ = key[len(prefix):]
                         if key_ in cfg or key_ in alias_keys:
                             # remove prefix
                             cfg_options[key_] = cfg_options[key]
                     try:
                         assert key[1:] in cfg or key[1:] in alias_keys, 'key=%r, key[1:] =%r' % (key, key[1:] )
                     except AssertionError as ex:
                         ut.printex(ex, 'error', keys=['key', 'cfg', 'alias_keys'])
                         raise
                     del cfg_options[key]
         # Remap keynames based on aliases
         if alias_keys is not None:
             for key in alias_keys.keys():
                 if key in cfg_options:
                     # use standard new key
                     cfg_options[alias_keys[key]] = cfg_options[key]
                     # remove old alised key
                     del cfg_options[key]
         # Finalize configuration dict
         cfg = ut.update_existing(cfg, cfg_options, copy=True, assert_exists=True)
         cfg['_cfgtype'] = cfgtype
         cfg['_cfgname'] = cfgname
         cfg['_cfgstr'] = cfgstr
         cfg_list.append((cfgname, cfg))
         break  # FIXME: do more than one eventually
     return cfg
Ejemplo n.º 7
0
    def __init__(
        qres_wgt,
        ibs,
        cm_list,
        parent=None,
        callback=None,
        qreq_=None,
        query_title='',
        review_cfg={},
    ):
        if ut.VERBOSE:
            logger.info('[qres_wgt] Init QueryResultsWidget')

        assert not isinstance(cm_list, dict)
        assert qreq_ is not None, 'must specify qreq_'

        if USE_FILTER_PROXY:
            super(QueryResultsWidget,
                  qres_wgt).__init__(parent=parent,
                                     model_class=CustomFilterModel)
        else:
            super(QueryResultsWidget, qres_wgt).__init__(parent=parent)

        # if USE_FILTER_PROXY:
        #    APIItemWidget.__init__(qres_wgt, parent=parent,
        #                            model_class=CustomFilterModel)
        # else:
        #    APIItemWidget.__init__(qres_wgt, parent=parent)

        qres_wgt.cm_list = cm_list
        qres_wgt.ibs = ibs
        qres_wgt.qreq_ = qreq_
        qres_wgt.query_title = query_title
        qres_wgt.qaid2_cm = dict([(cm.qaid, cm) for cm in cm_list])

        qres_wgt.review_cfg = id_review_api.REVIEW_CFG_DEFAULTS.copy()
        qres_wgt.review_cfg = ut.update_existing(qres_wgt.review_cfg,
                                                 review_cfg,
                                                 assert_exists=True)

        # qres_wgt.altkey_shortcut =
        # QtWidgets.QShortcut(QtGui.QKeySequence(QtCore.Qt.ALT), qres_wgt,
        #                qres_wgt.on_alt_pressed,
        #                context=QtCore..Qt.WidgetShortcut)
        qres_wgt.button_list = None
        qres_wgt.show_new = True
        qres_wgt.show_join = True
        qres_wgt.show_split = True
        qres_wgt.tt = ut.tic()
        # Set results data
        if USE_FILTER_PROXY:
            qres_wgt.add_checkboxes(qres_wgt.show_new, qres_wgt.show_join,
                                    qres_wgt.show_split)

        lbl = gt.newLineEdit(
            qres_wgt,
            text=
            "'T' marks as correct match. 'F' marks as incorrect match. Alt brings up context menu. Double click a row to inspect matches.",
            editable=False,
            enabled=False,
        )
        qres_wgt.layout().setSpacing(0)
        qres_wgt_layout = qres_wgt.layout()
        if hasattr(qres_wgt_layout, 'setMargin'):
            qres_wgt_layout.setMargin(0)
        else:
            qres_wgt_layout.setContentsMargins(0, 0, 0, 0)
        bottom_bar = gt.newWidget(qres_wgt,
                                  orientation=Qt.Horizontal,
                                  spacing=0,
                                  margin=0)
        bottom_bar.layout().setSpacing(0)
        bottom_bar_layout = bottom_bar.layout()
        if hasattr(bottom_bar_layout, 'setMargin'):
            bottom_bar_layout.setMargin(0)
        else:
            bottom_bar_layout.setContentsMargins(0, 0, 0, 0)
        lbl.setMinimumSize(0, 0)
        lbl.setSizePolicy(QtWidgets.QSizePolicy.Expanding,
                          QtWidgets.QSizePolicy.Ignored)
        # lbl.setSizePolicy(gt.newSizePolicy())

        qres_wgt.layout().addWidget(bottom_bar)
        bottom_bar.addWidget(lbl)
        bottom_bar.addNewButton(
            'Mark unreviewed with higher scores as correct',
            pressed=qres_wgt.mark_unreviewed_above_score_as_correct,
        )
        bottom_bar.addNewButton('Repopulate', pressed=qres_wgt.repopulate)
        bottom_bar.addNewButton('Edit Filters', pressed=qres_wgt.edit_filters)

        qres_wgt.setSizePolicy(gt.newSizePolicy())
        qres_wgt.repopulate()
        qres_wgt.connect_signals_and_slots()
        if callback is None:
            callback = partial(ut.identity, None)
        qres_wgt.callback = callback
        qres_wgt.view.setColumnHidden(0, False)
        qres_wgt.view.setColumnHidden(1, False)
        qres_wgt.view.connect_single_key_to_slot(gt.ALT_KEY,
                                                 qres_wgt.on_alt_pressed)
        qres_wgt.view.connect_keypress_to_slot(qres_wgt.on_special_key_pressed)
        if parent is None:
            # Register parentless QWidgets
            fig_presenter.register_qt4_win(qres_wgt)

        dbdir = qres_wgt.qreq_.ibs.get_dbdir()
        expt_dir = ut.ensuredir(ut.unixjoin(dbdir, 'SPECIAL_GGR_EXPT_LOGS'))
        review_log_dir = ut.ensuredir(ut.unixjoin(expt_dir, 'review_logs'))

        ts = ut.get_timestamp(isutc=True, timezone=True)
        log_fpath = ut.unixjoin(
            review_log_dir,
            'review_log_%s_%s.json' % (qres_wgt.qreq_.ibs.dbname, ts))

        # LOG ALL CHANGES MADE TO NAMES
        import logging

        # ut.vd(review_log_dir)
        # create logger with 'spam_application'
        logger_ = logging.getLogger('query_review')
        logger_.setLevel(logging.DEBUG)
        formatter = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        # create file handler which logs even debug messages
        fh = logging.FileHandler(log_fpath)
        fh.setLevel(logging.DEBUG)
        fh.setFormatter(formatter)
        logger_.addHandler(fh)

        # create console handler with a higher log level
        ch = logging.StreamHandler()
        ch.setLevel(logging.INFO)
        ch.setFormatter(formatter)
        logger_.addHandler(ch)

        qres_wgt.logger = logger
        logger_.info('START QUERY_RESULT_REVIEW')
        logger_.info('NUM CHIP_MATCH OBJECTS (len(cm_list)=%d)' %
                     (len(cm_list), ))
        logger_.info('NUM PAIRS TO EVIDENCE_DECISION (nRows=%d)' %
                     (qres_wgt.review_api.nRows, ))
        logger_.info('PARENT QUERY REQUEST (cfgstr=%s)' %
                     (qres_wgt.qreq_.get_cfgstr(with_input=True), ))
Ejemplo n.º 8
0
    def detect(dark, input_gpath_list, **kwargs):
        """
            Run detection with a given loaded forest on a list of images

            Args:
                input_gpath_list (list of str): the list of image paths that you want
                    to test
                config_filepath (str, optional): the network definition for YOLO to use
                weight_filepath (str, optional): the network weights for YOLO to use

            Kwargs:
                sensitivity (float, optional): the sensitivity of the detector, which
                    accepts a value between 0.0 and 1.0; defaults to 0.0
                batch_size (int, optional): the number of images to test at a single
                    time in paralell (if None, the number of CPUs is used); defaults to
                    None
                verbose (bool, optional): verbose flag; defaults to object's verbose or
                    selectively enabled for this function

            Yields:
                (str, (list of dict)): tuple of the input image path and a list
                    of dictionaries specifying the detected bounding boxes

                    The dictionaries returned by this function are of the form:
                        xtl (int): the top left x position of the bounding box
                        ytl (int): the top left y position of the bounding box
                        width (int): the width of the bounding box
                        height (int): the hiehgt of the bounding box
                        class (str): the most probably class detected by the network
                        confidence (float): the confidence that this bounding box is of
                            the class specified by the trees used during testing

        """
        # Default values
        params = odict(
            [
                ("batch_size", None),
                ("class_list", dark.CLASS_LIST),
                ("sensitivity", 0.2),
                ("grid", False),
                ("results_array", None),  # This value always gets overwritten
                ("verbose", dark.verbose),
                ("quiet", dark.quiet),
            ]
        )
        # params.update(kwargs)
        ut.update_existing(params, kwargs)
        class_list = params["class_list"]
        del params["class_list"]  # Remove this value from params

        if params["grid"]:
            _update_globals(grid=10, class_list=class_list)
        else:
            _update_globals(grid=1, class_list=class_list)

        # Try to determine the parallel processing batch size
        if params["batch_size"] is None:
            # try:
            #     cpu_count = multiprocessing.cpu_count()
            #     if not params['quiet']:
            #         print('[pydarknet py] Detecting with %d CPUs' % (cpu_count, ))
            #     params['batch_size'] = cpu_count
            # except:
            #     params['batch_size'] = 128
            params["batch_size"] = 32

        params["verbose"] = int(params["verbose"])
        params["quiet"] = int(params["quiet"])

        # Data integrity
        assert params["sensitivity"] >= 0 and params["sensitivity"] <= 1.0, "Threshold must be in the range [0, 1]."

        # Run training algorithm
        batch_size = params["batch_size"]
        del params["batch_size"]  # Remove this value from params
        batch_num = int(np.ceil(len(input_gpath_list) / float(batch_size)))
        # Detect for each batch
        for batch in ut.ProgressIter(range(batch_num), lbl="[pydarknet py]", freq=1, invert_rate=True):
            begin = time.time()
            start = batch * batch_size
            end = start + batch_size
            if end > len(input_gpath_list):
                end = len(input_gpath_list)
            input_gpath_list_ = input_gpath_list[start:end]
            num_images = len(input_gpath_list_)
            # Final sanity check
            params["results_array"] = np.empty(num_images * RESULT_LENGTH, dtype=C_FLOAT)
            # Make the params_list
            params_list = [
                dark.net,
                _cast_list_to_c(ensure_bytes_strings(input_gpath_list_), C_CHAR),
                num_images,
            ] + list(params.values())
            DARKNET_CLIB.detect(*params_list)
            results_list = params["results_array"]
            conclude = time.time()
            results_list = results_list.reshape((num_images, -1))
            if not params["quiet"]:
                print("[pydarknet py] Took %r seconds to compute %d images" % (conclude - begin, num_images))
            for input_gpath, result_list in zip(input_gpath_list_, results_list):
                probs_list, bbox_list = np.split(result_list, [PROB_RESULT_LENGTH])
                assert probs_list.shape[0] == PROB_RESULT_LENGTH and bbox_list.shape[0] == BBOX_RESULT_LENGTH
                probs_list = probs_list.reshape((-1, len(class_list)))
                bbox_list = bbox_list.reshape((-1, 4))

                result_list_ = []
                for prob_list, bbox in zip(probs_list, bbox_list):
                    class_index = np.argmax(prob_list)
                    class_label = class_list[class_index] if len(class_list) > class_index else DEFAULT_CLASS
                    class_confidence = prob_list[class_index]
                    if class_confidence < params["sensitivity"]:
                        continue
                    result_dict = {
                        "xtl": int(np.around(bbox[0])),
                        "ytl": int(np.around(bbox[1])),
                        "width": int(np.around(bbox[2])),
                        "height": int(np.around(bbox[3])),
                        "class": class_label,
                        "confidence": float(class_confidence),
                    }
                    result_list_.append(result_dict)

                yield (input_gpath, result_list_)
            params["results_array"] = None
Ejemplo n.º 9
0
def get_flann_params(algorithm='kdtree', **kwargs):
    """
    Returns flann params that are relvant tothe algorithm

    References:
        http://www.cs.ubc.ca/research/flann/uploads/FLANN/flann_manual-1.8.4.pdf

    Args:
        algorithm (str): (default = 'kdtree')

    Returns:
        dict: flann_params

    CommandLine:
        python -m vtool.nearest_neighbors --test-get_flann_params --algo=kdtree
        python -m vtool.nearest_neighbors --test-get_flann_params --algo=kmeans

    Example:
        >>> # ENABLE_DOCTEST
        >>> from vtool.nearest_neighbors import *  # NOQA
        >>> algorithm = ut.get_argval('--algo', default='kdtree')
        >>> flann_params = get_flann_params(algorithm)
        >>> result = ('flann_params = %s' % (ut.dict_str(flann_params),))
        >>> print(result)
    """
    _algorithm_options = [
        'linear', 'kdtree', 'kmeans', 'composite', 'kdtree_single'
    ]
    _centersinit_options = [
        'random',
        'gonzales',
        'kmeanspp',
    ]
    # Search params (for all algos)
    assert algorithm in _algorithm_options
    flann_params = {'algorithm': algorithm}
    if algorithm != 'linear':
        flann_params.update({'random_seed': -1})
    if algorithm in ['kdtree', 'composite']:
        # kdtree index parameters
        flann_params.update({
            'algorithm': _algorithm_options[1],
            'trees': 4,
            'checks': 32,  # how many leafs (features) to check in one search
        })
    elif algorithm in ['kmeans', 'composite']:
        # Kmeans index parametrs
        flann_params.update({
            'branching': 32,
            'iterations': 5,
            'centers_init': _centersinit_options[2],
            'cb_index':
            0.5,  # cluster boundary index for searching kmeanms tree
            'checks': 32,  # how many leafs (features) to check in one search
        })
    elif algorithm == 'autotuned':
        flann_params.update({
            'algorithm': 'autotuned',
            'target_precision':
            .01,  # precision desired (used for autotuning, -1 otherwise)
            'build_weight': 0.01,  # build tree time weighting factor
            'memory_weight': 0.0,  # index memory weigthing factor
            'sample_fraction':
            0.001,  # what fraction of the dataset to use for autotuning
        })
    elif algorithm == 'lsh':
        flann_params.update({
            'table_number_': 12,
            'key_size_': 20,
            'multi_probe_level_': 2,
        })

    flann_params = ut.update_existing(flann_params, kwargs, assert_exists=True)
    return flann_params
Ejemplo n.º 10
0
    def detect(rf, forest, input_gpath_list, **kwargs):
        """
            Run detection with a given loaded forest on a list of images

            Args:
                forest (object): the forest obejct that you want to use during
                    detection
                input_gpath_list (list of str): the list of image paths that you want
                    to test

            Kwargs:
                output_gpath_list (list of str, optional): the paralell list of output
                    image paths for detection debugging or results; defaults to None

                    When this list is None no images are outputted for any test
                    images, whereas the list can be a parallel list where some values
                    are strings and others are None
                output_scale_gpath_list (list of str, optional): the paralell list of output
                    scale image paths for detection debugging or results; defaults
                    to None

                    When this list is None no images are outputted for any test
                    images, whereas the list can be a parallel list where some values
                    are strings and others are None
                mode (int, optional): the mode that the detector outputs; detaults to 0
                    0 - Hough Voting - the output is a Hough image that predicts the
                        locations of the obejct centeroids
                    0 - Classification Map - the output is a classification probability
                        map across the entire image where no regression information
                        is utilized
                sensitivity (float, optional): the sensitivity of the detector;

                        mode = 0 - defaults to 128.0
                        mode = 1 - defaults to 255.0

                scale_list (list of float, optional): the list of floats that specifies the scales
                    to try during testing;
                    defaults to [1.0, 0.80, 0.65, 0.50, 0.40, 0.30, 0.20, 0.10]

                        scale > 1.0 - Upscale the image
                        scale = 1.0 - Original image size
                        scale < 1.0 - Downscale the image

                    The list of scales highly impacts the performance of the detector and
                    should be carefully chosen

                    The scales are applied to BOTH the width and the height of the image
                    in order to scale the image and an interpolation of OpenCV's
                    CV_INTER_LANCZOS4 is used
                batch_size (int, optional): the number of images to test at a single
                    time in paralell (if None, the number of CPUs is used); defaults to None
                nms_min_area_contour (int, optional): the minimum size of a centroid
                    candidate region; defaults to 300
                nms_min_area_overlap (float, optional, DEPRICATED): the allowable overlap in
                    bounding box predictions; defaults to 0.75
                serial (bool, optional): flag to signify if to run detection in serial;

                        len(input_gpath_list) >= batch_size - defaults to False
                        len(input_gpath_list) <  batch_size - defaults to False

                verbose (bool, optional): verbose flag; defaults to object's verbose or
                    selectively enabled for this function

            Yields:
                (str, (list of dict)): tuple of the input image path and a list
                    of dictionaries specifying the detected bounding boxes

                    The dictionaries returned by this function are of the form:
                        centerx (int): the x position of the object's centroid

                            Note that the center of the bounding box and the location of
                            the object's centroid can be different
                        centery (int): the y position of the obejct's centroid

                            Note that the center of the bounding box and the location of
                            the object's centroid can be different
                        xtl (int): the top left x position of the bounding box
                        ytl (int): the top left y position of the bounding box
                        width (int): the width of the bounding box
                        height (int): the hiehgt of the bounding box
                        confidence (float): the confidence that this bounding box is of
                            the class specified by the trees used during testing
                        suppressed (bool, DEPRICATED): the flag of if this bounding
                            box has been marked to be suppressed by the detection
                            algorithm

        """
        # Default values
        params = odict([
            ('output_gpath_list',            None),
            ('output_scale_gpath_list',      None),
            ('mode',                         0),
            ('sensitivity',                  None),
            ('scale_list',                   [1.0, 0.80, 0.65, 0.50, 0.40, 0.30, 0.20, 0.10]),
            ('_scale_num',                   None),  # This value always gets overwritten
            ('batch_size',                   None),
            ('nms_min_area_contour',         100),
            ('nms_min_area_overlap',         0.75),
            ('results_val_array',            None),  # This value always gets overwritten
            ('results_len_array',            None),  # This value always gets overwritten
            ('RESULT_LENGTH',                None),  # This value always gets overwritten
            ('serial',                       False),
            ('verbose',                      rf.verbose),
            ('quiet',                        rf.quiet),
        ])

        ut.update_existing(params, kwargs)
        #print('Unused kwargs %r' % (set(kwargs.keys()) - set(params.keys()),))

        params['RESULT_LENGTH'] = RESULT_LENGTH
        output_gpath_list = params['output_gpath_list']
        output_scale_gpath_list = params['output_scale_gpath_list']
        # We no longer want these parameters in params
        del params['output_gpath_list']
        del params['output_scale_gpath_list']

        if params['sensitivity'] is None:
            assert params['mode'] in [0, 1], 'Invalid mode provided'
            if params['mode'] == 0:
                params['sensitivity'] = 128.0
            elif params['mode'] == 1:
                params['sensitivity'] = 255.0

        # Try to determine the parallel processing batch size
        if params['batch_size'] is None:
            try:
                cpu_count = multiprocessing.cpu_count()
                if not params['quiet']:
                    print('[pyrf py] Detecting with %d CPUs' % (cpu_count, ))
                params['batch_size'] = cpu_count
            except:
                params['batch_size'] = 8

        # To eleminate downtime, add 1 to batch_size
        # params['batch_size'] +=

        # Data integrity
        assert params['mode'] >= 0, \
            'Detection mode must be non-negative'
        assert 0.0 <= params['sensitivity'], \
            'Sensitivity must be non-negative'
        assert len(params['scale_list']) > 0 , \
            'The scale list cannot be empty'
        assert all( [ scale > 0.0 for scale in params['scale_list'] ]), \
            'All scales must be positive'
        assert params['batch_size'] > 0, \
            'Batch size must be positive'
        assert params['nms_min_area_contour'] > 0, \
            'Non-maximum suppression minimum contour area cannot be negative'
        assert 0.0 <= params['nms_min_area_overlap'] and params['nms_min_area_overlap'] <= 1.0, \
            'Non-maximum supression minimum area overlap percentage must be between 0 and 1 (inclusive)'

        # Convert optional parameters to C-valid default options
        if output_gpath_list is None:
            output_gpath_list = [''] * len(input_gpath_list)
        elif output_gpath_list is not None:
            assert len(output_gpath_list) == len(input_gpath_list), \
                'Output image path list is invalid or is not the same length as the input list'
            for index in range(len(output_gpath_list)):
                if output_gpath_list[index] is None:
                    output_gpath_list[index] = ''
        output_gpath_list = _cast_list_to_c(ensure_bytes_strings(output_gpath_list), C_CHAR)

        if output_scale_gpath_list is None:
            output_scale_gpath_list = [''] * len(input_gpath_list)
        elif output_scale_gpath_list is not None:
            assert len(output_scale_gpath_list) == len(input_gpath_list), \
                'Output scale image path list is invalid or is not the same length as the input list'
            for index in range(len(output_scale_gpath_list)):
                if output_scale_gpath_list[index] is None:
                    output_scale_gpath_list[index] = ''
        output_scale_gpath_list = _cast_list_to_c(ensure_bytes_strings(output_scale_gpath_list), C_CHAR)

        # Prepare for C
        params['_scale_num'] = len(params['scale_list'])
        params['scale_list'] = _cast_list_to_c(params['scale_list'], C_FLOAT)
        if not params['quiet']:
            print('[pyrf py] Detecting over %d scales' % (params['_scale_num'], ))

        # Run training algorithm
        batch_size = params['batch_size']
        del params['batch_size']  # Remove this value from params
        batch_num = int(len(input_gpath_list) / batch_size) + 1
        # Detect for each batch
        for batch in ut.ProgressIter(range(batch_num), lbl="[pyrf py]", freq=1, invert_rate=True):
            begin = time.time()
            start = batch * batch_size
            end   = start + batch_size
            if end > len(input_gpath_list):
                end = len(input_gpath_list)
            input_gpath_list_        = input_gpath_list[start:end]
            output_gpath_list_       = output_gpath_list[start:end]
            output_scale_gpath_list_ = output_scale_gpath_list[start:end]
            num_images = len(input_gpath_list_)
            # Set image detection to be run in serial if less than half a batch to run
            if num_images < min(batch_size / 2, 8):
                params['serial'] = True
            # Final sanity check
            assert len(input_gpath_list_) == len(output_gpath_list_) and len(input_gpath_list_) == len(output_scale_gpath_list_)
            params['results_val_array'] = np.empty(num_images, dtype=NP_ARRAY_FLOAT)
            params['results_len_array'] = np.empty(num_images, dtype=C_INT)
            # Make the params_list
            params_list = [
                forest,
                _cast_list_to_c(ensure_bytes_strings(input_gpath_list_), C_CHAR),
                num_images,
                _cast_list_to_c(ensure_bytes_strings(output_gpath_list_), C_CHAR),
                _cast_list_to_c(ensure_bytes_strings(output_scale_gpath_list_), C_CHAR)
            ] + list(params.values())
            try:
                RF_CLIB.detect(rf.detector_c_obj, *params_list)
            except C.ArgumentError as ex:
                print('ERROR passing arguments to pyrf')
                print(' * params_list = %s' % (ut.repr3(params_list, nl=3),))
                ut.printex(ex)
            results_list = _extract_np_array(params['results_len_array'], params['results_val_array'], NP_ARRAY_FLOAT, NP_FLOAT32, RESULT_LENGTH)
            conclude = time.time()
            if not params['quiet']:
                print('[pyrf py] Took %r seconds to compute %d images' % (conclude - begin, num_images, ))
            for input_gpath, result_list in zip(input_gpath_list_, results_list):
                if params['mode'] == 0:
                    result_list_ = []
                    for result in result_list:
                        # Unpack result into a nice Python dictionary and return
                        temp = {}
                        temp['centerx']    = int(result[0])
                        temp['centery']    = int(result[1])
                        temp['xtl']        = int(result[2])
                        temp['ytl']        = int(result[3])
                        temp['width']      = int(result[4])
                        temp['height']     = int(result[5])
                        temp['confidence'] = float(np.round(result[6], decimals=4))
                        temp['suppressed'] = int(result[7]) == 1
                        result_list_.append(temp)
                    yield (input_gpath, result_list_)
                else:
                    yield (input_gpath, None)
            results_list = None
            params['results_val_array'] = None
            params['results_len_array'] = None
Ejemplo n.º 11
0
    def train(rf, train_pos_cpath_list, train_neg_cpath_list, trees_path, **kwargs):
        """
            Train a new forest with the given positive chips and negative chips.

            Args:
                train_pos_chip_path_list (list of str): list of positive training chips
                train_neg_chip_path_list (list of str): list of negative training chips
                trees_path (str): string path of where the newly trained trees are to be saved

            Kwargs:
                chips_norm_width (int, optional): Chip normalization width for resizing;
                    the chip is resized to have a width of chips_norm_width and
                    whatever resulting height in order to best match the original
                    aspect ratio; defaults to 128

                    If both chips_norm_width and chips_norm_height are specified,
                    the original aspect ratio of the chip is not respected
                chips_norm_height (int, optional): Chip normalization height for resizing;
                    the chip is resized to have a height of chips_norm_height and
                    whatever resulting width in order to best match the original
                    aspect ratio; defaults to None

                    If both chips_norm_width and chips_norm_height are specified,
                    the original aspect ratio of the chip is not respected
                chips_prob_flip_horizontally (float, optional): The probability
                    that a chips is flipped horizontally before training to make
                    the training set invariant to horizontal flips in the image;
                    defaults to 0.5; 0.0 <= chips_prob_flip_horizontally <= 1.0
                chips_prob_flip_vertically (float, optional): The probability
                    that a chips is flipped vertivcally before training to make
                    the training set invariant to vertical flips in the image;
                    defaults to 0.5; 0.0 <= chips_prob_flip_vertically <= 1.0
                patch_width (int, optional): the width of the patches for extraction
                    in the tree; defaults to 32; patch_width > 0
                patch_height (int, optional): the height of the patches for extraction
                    in the tree; defaults to 32; patch_height > 0
                patch_density (float, optional): the number of patches to extract from
                    each chip as a function of density; the density is calculated as:
                        samples = patch_density * [(chip_width * chip_height) / (patch_width * patch_height)]
                    and specifies how many times a particular pixel is sampled
                    from the chip; defaults to 4.0; patch_density > 0
                trees_num (int, optional): the number of trees to train in parallel;
                    defaults to 10
                trees_offset (int, optional): the tree number that begins the sequence
                    of when a tree is trained; defaults to None

                    If None is specified, the trees_offset value is automatically guessed
                    by using the number of files in trees_path

                    Tree model files are overwritten if the offset has overlap with
                    previouly generated trees
                trees_max_depth (int, optional): the maximum depth of the tree during
                    training, this can used for regularization; defaults to 16
                trees_max_patches (int, optional): the maximum number of patches that
                    should be extracted for training between positives AND negatives
                    (the detector attempts to balance between the number of positive
                    and negative patches to be roughly the same in quantity);
                    defaults to 64000
                trees_leaf_size (int, optional): the number of patches in a node that
                    specifies the threshold for becoming a leaf; defaults to 20

                    A node becomes a leaf under two conditions:
                        1.) The maximum tree depth has been reached (trees_max_depth)
                        2.) The patches in the node is less than trees_leaf_size and
                            is stopped prematurely
                trees_pixel_tests (int, optional): the number of pixel tests to perform
                    at each node; defaults to 2000
                trees_prob_optimize_mode (float, optional): The probability of the
                    tree optimizing between classification and regression; defaults to
                    0.5
                serial (bool, optional): flag to signify if to run training in serial;
                    defaults to False
                verbose (bool, optional): verbose flag; defaults to object's verbose or
                    selectively enabled for this function

            Returns:
                None
        """
        # Default values
        params = odict([
            ('chips_norm_width',             128),
            ('chips_norm_height',            None),
            ('chips_prob_flip_horizontally', 0.5),
            ('chips_prob_flip_vertically',   0.0),
            ('patch_width',                  32),
            ('patch_height',                 32),
            ('patch_density',                4.0),
            ('trees_num',                    10),
            ('trees_offset',                 None),
            ('trees_max_depth',              16),
            ('trees_max_patches',            64000),
            ('trees_leaf_size',              20),
            ('trees_pixel_tests',            10000),
            ('trees_prob_optimize_mode',     0.5),
            ('serial',                       False),
            ('verbose',                      rf.verbose),
            ('quiet',                        rf.quiet),
        ])
        #params.update(kwargs)
        ut.update_existing(params, kwargs)
        # Make the tree path absolute
        trees_path = abspath(trees_path)

        # cout << "AIM FOR A SPLIT OF 24k - 32k POSITIVE & NEGATIVE PATCHES EACH FOR GOOD REGULARIZATION AT DEPTH 16" << endl;

        # Ensure the trees_path exists
        ut.ensuredir(trees_path)
        data_path = join(trees_path, 'data')
        if isdir(data_path):
            shutil.rmtree(data_path)
        ut.ensuredir(data_path)
        data_path_pos = join(data_path, 'pos')
        ut.ensuredir(data_path_pos)
        data_path_neg = join(data_path, 'neg')
        ut.ensuredir(data_path_neg)

        # Try to figure out the correct tree offset
        if params['trees_offset'] is None:
            direct = Directory(trees_path, include_file_extensions=['txt'])
            params['trees_offset'] = len(direct.files()) + 1
            if not params['quiet']:
                print('[pyrf py] Auto Tree Offset: %d' % params['trees_offset'])

        # Data integrity
        assert params['chips_norm_width'] is None or params['chips_norm_width'] >= params['patch_width'], \
            'Normalization width too small for patch width'
        assert params['chips_norm_height'] is None or params['chips_norm_height'] >= params['patch_height'], \
            'Normalization height too small for patch height'
        assert params['patch_width'] > 0, \
            'Patch width must be positive'
        assert params['patch_height'] > 0, \
            'Patch height must be positive'
        assert params['patch_density'] > 0.0, \
            'Patch density must be positive'
        assert 0.0 <= params['chips_prob_flip_horizontally'] and params['chips_prob_flip_horizontally'] <= 1.0, \
            'Horizontal flip probability must be between 0 and 1'
        assert 0.0 <= params['chips_prob_flip_vertically'] and params['chips_prob_flip_vertically'] <= 1.0, \
            'Vertical flip probability must be between 0 and 1'
        assert params['trees_num'] > 0, \
            'Number of trees must be positive'
        assert params['trees_offset'] >= 0, \
            'Number of trees must be non-negative'
        assert params['trees_max_depth'] > 1, \
            'Tree depth must be greater than 1'
        assert params['trees_max_patches'] % 2 == 0 and params['trees_max_patches'] > 0, \
            'A tree must have an even (positive) number of patches'
        assert 0.0 <= params['trees_prob_optimize_mode'] and params['trees_prob_optimize_mode'] <= 1.0, \
            'Tree optimization mode probability must be between 0 and 1 (inclusive)'
        assert all( [ exists(train_pos_cpath) for train_pos_cpath in train_pos_cpath_list ] ), \
            'At least one specified positive chip path does not exist'
        assert all( [ exists(train_neg_cpath) for train_neg_cpath in train_neg_cpath_list ] ), \
            'At least one specified positive chip path does not exist'
        # We will let the C++ code perform the patch size checks

        if not params['quiet']:
            print('[pyrf py] Caching positives into %r' % (data_path_pos, ))
        train_pos_chip_filename_list = _cache_data(train_pos_cpath_list, data_path_pos, **params)

        if not params['quiet']:
            print('[pyrf py] Caching negatives into %r' % (data_path_neg, ))
        train_neg_chip_filename_list = _cache_data(train_neg_cpath_list, data_path_neg, **params)

        # We no longer need these parameters (and they should not be transferred to the C++ library)
        del params['chips_norm_width']
        del params['chips_norm_height']
        del params['chips_prob_flip_horizontally']
        del params['chips_prob_flip_vertically']

        # Run training algorithm
        params_list = [
            data_path_pos,
            _cast_list_to_c(ensure_bytes_strings(train_pos_chip_filename_list), C_CHAR),
            len(train_pos_chip_filename_list),
            data_path_neg,
            _cast_list_to_c(ensure_bytes_strings(train_neg_chip_filename_list), C_CHAR),
            len(train_neg_chip_filename_list),
            trees_path,
        ] + list(params.values())
        RF_CLIB.train(rf.detector_c_obj, *params_list)
        if not params['quiet']:
            print('\n\n[pyrf py] *************************************')
            print('[pyrf py] Training Completed')
Ejemplo n.º 12
0
def get_review_edges(cm_list, ibs=None, review_cfg={}):
    r"""
    Needs to be moved to a better file. Maybe something to do with
    identification.

    Returns a list of matches that should be inspected
    This function is more lightweight than orgres or allres.
    Used in id_review_api and interact_qres2

    Args:
        cm_list (list): list of chip match objects
        ranks_top (int): put all ranks less than this number into the graph
        directed (bool):

    Returns:
        tuple: review_edges = (qaid_arr, daid_arr, score_arr, rank_arr)

    CommandLine:
        python -m ibeis.gui.id_review_api get_review_edges:0

    Example0:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.gui.id_review_api import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('PZ_MTEST')
        >>> qreq_ = ibeis.main_helpers.testdata_qreq_()
        >>> cm_list = qreq_.execute()
        >>> review_cfg = dict(ranks_top=5, directed=True, name_scoring=False,
        >>>                   filter_true_matches=True)
        >>> review_edges = get_review_edges(cm_list, ibs=ibs, review_cfg=review_cfg)
        >>> print(review_edges)

    Example1:
        >>> # UNSTABLE_DOCTEST
        >>> from ibeis.gui.id_review_api import *  # NOQA
        >>> import ibeis
        >>> cm_list, qreq_ = ibeis.testdata_cmlist('PZ_MTEST', a='default:qsize=5,dsize=20')
        >>> review_cfg = dict(ranks_top=5, directed=True, name_scoring=False,
        >>>                   filter_reviewed=False, filter_true_matches=True)
        >>> review_edges = get_review_edges(cm_list, review_cfg=review_cfg, ibs=ibs)
        >>> print(review_edges)

    Example3:
        >>> # UNSTABLE_DOCTEST
        >>> from ibeis.gui.id_review_api import *  # NOQA
        >>> import ibeis
        >>> cm_list, qreq_ = ibeis.testdata_cmlist('PZ_MTEST', a='default:qsize=1,dsize=100')
        >>> review_cfg = dict(ranks_top=1, directed=False, name_scoring=False,
        >>>                   filter_reviewed=False, filter_true_matches=True)
        >>> review_edges = get_review_edges(cm_list, review_cfg=review_cfg, ibs=ibs)
        >>> print(review_edges)

    Example4:
        >>> # UNSTABLE_DOCTEST
        >>> from ibeis.gui.id_review_api import *  # NOQA
        >>> import ibeis
        >>> cm_list, qreq_ = ibeis.testdata_cmlist('PZ_MTEST', a='default:qsize=10,dsize=10')
        >>> ranks_top = 3
        >>> review_cfg = dict(ranks_top=3, directed=False, name_scoring=False,
        >>>                   filter_reviewed=False, filter_true_matches=True)
        >>> review_edges = get_review_edges(cm_list, review_cfg=review_cfg, ibs=ibs)
        >>> print(review_edges)
    """
    import vtool as vt
    from ibeis.algo.hots import chip_match
    automatch_kw = REVIEW_CFG_DEFAULTS.copy()
    automatch_kw = ut.update_existing(automatch_kw, review_cfg)
    print('[resorg] get_review_edges(%s)' % (ut.repr2(automatch_kw)))
    print('[resorg] len(cm_list) = %d' % (len(cm_list)))
    qaids_stack  = []
    daids_stack  = []
    ranks_stack  = []
    scores_stack = []

    # For each QueryResult, Extract inspectable candidate matches
    if isinstance(cm_list, dict):
        cm_list = list(cm_list.values())

    if len(cm_list) == 0:
        return ([], [], [], [])

    for cm in cm_list:
        if isinstance(cm, chip_match.ChipMatch):
            daids  = cm.get_top_aids(ntop=automatch_kw['ranks_top'])
            scores = cm.get_top_scores(ntop=automatch_kw['ranks_top'])
            ranks  = np.arange(len(daids))
            qaids  = np.full(daids.shape, cm.qaid, dtype=daids.dtype)
        else:
            (qaids, daids, scores, ranks) = cm.get_match_tbldata(
                ranks_top=automatch_kw['ranks_top'],
                name_scoring=automatch_kw['name_scoring'],
                ibs=ibs)
        qaids_stack.append(qaids)
        daids_stack.append(daids)
        scores_stack.append(scores)
        ranks_stack.append(ranks)

    # Stack them into a giant array
    qaid_arr  = np.hstack(qaids_stack)
    daid_arr  = np.hstack(daids_stack)
    score_arr = np.hstack(scores_stack)
    rank_arr  = np.hstack(ranks_stack)

    # Sort by scores
    sortx = score_arr.argsort()[::-1]
    qaid_arr  = qaid_arr[sortx]
    daid_arr   = daid_arr[sortx]
    score_arr = score_arr[sortx]
    rank_arr  = rank_arr[sortx]

    # IS_REVIEWED DOES NOT WORK
    if automatch_kw['filter_reviewed']:
        _is_reviewed = ibs.get_annot_pair_is_reviewed(qaid_arr.tolist(),
                                                      daid_arr.tolist())
        is_unreviewed = ~np.array(_is_reviewed, dtype=np.bool)
        qaid_arr  = qaid_arr.compress(is_unreviewed)
        daid_arr   = daid_arr.compress(is_unreviewed)
        score_arr = score_arr.compress(is_unreviewed)
        rank_arr  = rank_arr.compress(is_unreviewed)

    # Remove directed edges
    if not automatch_kw['directed']:
        #nodes = np.unique(directed_edges.flatten())
        directed_edges = np.vstack((qaid_arr, daid_arr)).T
        #idx1, idx2 = vt.intersect2d_indices(directed_edges, directed_edges[:, ::-1])

        unique_rowx = vt.find_best_undirected_edge_indexes(directed_edges,
                                                           score_arr)

        qaid_arr  = qaid_arr.take(unique_rowx)
        daid_arr  = daid_arr.take(unique_rowx)
        score_arr = score_arr.take(unique_rowx)
        rank_arr  = rank_arr.take(unique_rowx)

    # Filter Double Name Matches
    if automatch_kw['filter_duplicate_true_matches']:
        # filter_dup_namepairs
        qnid_arr = ibs.get_annot_nids(qaid_arr)
        dnid_arr = ibs.get_annot_nids(daid_arr)
        if not automatch_kw['directed']:
            directed_name_edges = np.vstack((qnid_arr, dnid_arr)).T
            unique_rowx2 = vt.find_best_undirected_edge_indexes(
                directed_name_edges, score_arr)
        else:
            namepair_id_list = np.array(vt.compute_unique_data_ids_(
                list(zip(qnid_arr, dnid_arr))))
            unique_namepair_ids, namepair_groupxs = vt.group_indices(namepair_id_list)
            score_namepair_groups = vt.apply_grouping(score_arr, namepair_groupxs)
            unique_rowx2 = np.array(sorted([
                groupx[score_group.argmax()]
                for groupx, score_group in zip(namepair_groupxs, score_namepair_groups)
            ]), dtype=np.int32)
        qaid_arr  = qaid_arr.take(unique_rowx2)
        daid_arr  = daid_arr.take(unique_rowx2)
        score_arr = score_arr.take(unique_rowx2)
        rank_arr  = rank_arr.take(unique_rowx2)

    # Filter all true matches
    if automatch_kw['filter_true_matches']:
        qnid_arr = ibs.get_annot_nids(qaid_arr)
        dnid_arr = ibs.get_annot_nids(daid_arr)
        valid_flags = qnid_arr != dnid_arr
        qaid_arr  = qaid_arr.compress(valid_flags)
        daid_arr   = daid_arr.compress(valid_flags)
        score_arr = score_arr.compress(valid_flags)
        rank_arr  = rank_arr.compress(valid_flags)

    if automatch_kw['filter_photobombs']:
        unique_aids = ut.unique(ut.flatten([qaid_arr, daid_arr]))
        #grouped_aids, unique_nids = ibs.group_annots_by_name(unique_aids)
        invalid_nid_map = get_photobomber_map(ibs, qaid_arr)

        nid2_aids = ut.group_items(unique_aids, ibs.get_annot_nids(unique_aids))

        expanded_aid_map = ut.ddict(set)
        for nid1, other_nids in invalid_nid_map.items():
            for aid1 in nid2_aids[nid1]:
                for nid2 in other_nids:
                    for aid2 in nid2_aids[nid2]:
                        expanded_aid_map[aid1].add(aid2)
                        expanded_aid_map[aid2].add(aid1)

        valid_flags = [daid not in expanded_aid_map[qaid]
                       for qaid, daid in zip(qaid_arr, daid_arr)]
        qaid_arr  = qaid_arr.compress(valid_flags)
        daid_arr   = daid_arr.compress(valid_flags)
        score_arr = score_arr.compress(valid_flags)
        rank_arr  = rank_arr.compress(valid_flags)

    review_edges = (qaid_arr, daid_arr, score_arr, rank_arr)
    return review_edges
Ejemplo n.º 13
0
def get_flann_params(algorithm='kdtree', **kwargs):
    """
    Returns flann params that are relvant tothe algorithm

    References:
        http://www.cs.ubc.ca/research/flann/uploads/FLANN/flann_manual-1.8.4.pdf

    Args:
        algorithm (str): (default = 'kdtree')

    Returns:
        dict: flann_params

    CommandLine:
        python -m vtool.nearest_neighbors --test-get_flann_params --algo=kdtree
        python -m vtool.nearest_neighbors --test-get_flann_params --algo=kmeans

    Example:
        >>> # ENABLE_DOCTEST
        >>> from vtool.nearest_neighbors import *  # NOQA
        >>> algorithm = ut.get_argval('--algo', default='kdtree')
        >>> flann_params = get_flann_params(algorithm)
        >>> result = ('flann_params = %s' % (ut.dict_str(flann_params),))
        >>> print(result)
    """
    _algorithm_options = [
        'linear',
        'kdtree',
        'kmeans',
        'composite',
        'kdtree_single'
    ]
    _centersinit_options = [
        'random',
        'gonzales',
        'kmeanspp',
    ]
    # Search params (for all algos)
    assert algorithm in _algorithm_options
    flann_params = {
        'algorithm': algorithm
    }
    if algorithm != 'linear':
        flann_params.update({
            'random_seed': -1
        })
    if algorithm in ['kdtree', 'composite']:
        # kdtree index parameters
        flann_params.update({
            'algorithm': _algorithm_options[1],
            'trees': 4,
            'checks': 32,  # how many leafs (features) to check in one search
        })
    elif algorithm in ['kmeans', 'composite']:
        # Kmeans index parametrs
        flann_params.update({
            'branching': 32,
            'iterations': 5,
            'centers_init': _centersinit_options[2],
            'cb_index': 0.5,  # cluster boundary index for searching kmeanms tree
            'checks': 32,  # how many leafs (features) to check in one search
        })
    elif algorithm == 'autotuned':
        flann_params.update({
            'algorithm'        : 'autotuned',
            'target_precision' : .01,    # precision desired (used for autotuning, -1 otherwise)
            'build_weight'     : 0.01,   # build tree time weighting factor
            'memory_weight'    : 0.0,    # index memory weigthing factor
            'sample_fraction'  : 0.001,  # what fraction of the dataset to use for autotuning
        })
    elif algorithm == 'lsh':
        flann_params.update({
            'table_number_': 12,
            'key_size_': 20,
            'multi_probe_level_': 2,
        })

    flann_params = ut.update_existing(flann_params, kwargs, assert_exists=True)
    return flann_params