Ejemplo n.º 1
0
def ensure_config_rowid_from_suffix(ibs, cfgsuffix_list):
    config_rowid_list = ibs.get_config_rowid_from_suffix(cfgsuffix_list)
    is_dirty_list = ut.flag_None_items(config_rowid_list)
    if any(is_dirty_list):
        # Only call adder if needed, adders cause debug output to be large
        return ibs.add_config(cfgsuffix_list)
    else:
        return config_rowid_list
Ejemplo n.º 2
0
def ensure_config_rowid_from_suffix(ibs, cfgsuffix_list):
    config_rowid_list = ibs.get_config_rowid_from_suffix(cfgsuffix_list)
    is_dirty_list = ut.flag_None_items(config_rowid_list)
    if any(is_dirty_list):
        # Only call adder if needed, adders cause debug output to be large
        return ibs.add_config(cfgsuffix_list)
    else:
        return config_rowid_list
Ejemplo n.º 3
0
 def get_prop(self, attrname, idxs=None):
     """
     Caching getter
     """
     if attrname not in self.attrs:
         self.attrs[attrname] = [None for _ in range(len(self))]
     prop_list = self.attrs[attrname]
     if idxs is None:
         idxs = list(range(len(prop_list)))
         props = prop_list
     else:
         props = ut.take(prop_list, idxs)
     miss_flags = ut.flag_None_items(props)
     if any(miss_flags):
         miss_idxs = ut.compress(idxs, miss_flags)
         miss_fpaths = self._abs(ut.take(self.rel_fpath_list, miss_idxs))
         miss_iter = getattr(self, '_' + attrname)(miss_fpaths)
         miss_iter = ut.ProgIter(miss_iter, length=len(miss_idxs),
                                 label='Compute %s' % (attrname,))
         for idx, val in zip(miss_idxs, miss_iter):
             prop_list[idx] = val
         props = ut.take(prop_list, idxs)
     return props
Ejemplo n.º 4
0
 def get_prop(self, attrname, idxs=None):
     """
     Caching getter
     """
     if attrname not in self.attrs:
         self.attrs[attrname] = [None for _ in range(len(self))]
     prop_list = self.attrs[attrname]
     if idxs is None:
         idxs = list(range(len(prop_list)))
         props = prop_list
     else:
         props = ut.take(prop_list, idxs)
     miss_flags = ut.flag_None_items(props)
     if any(miss_flags):
         miss_idxs = ut.compress(idxs, miss_flags)
         miss_fpaths = self._abs(ut.take(self.rel_fpath_list, miss_idxs))
         miss_iter = getattr(self, '_' + attrname)(miss_fpaths)
         miss_iter = ut.ProgIter(miss_iter,
                                 length=len(miss_idxs),
                                 label='Compute %s' % (attrname, ))
         for idx, val in zip(miss_idxs, miss_iter):
             prop_list[idx] = val
         props = ut.take(prop_list, idxs)
     return props
Ejemplo n.º 5
0
def split_analysis(ibs):
    """
    CommandLine:
        python -m ibeis.other.dbinfo split_analysis --show
        python -m ibeis split_analysis --show
        python -m ibeis split_analysis --show --good

    Ignore:
        # mount
        sshfs -o idmap=user lev:/ ~/lev

        # unmount
        fusermount -u ~/lev

    Example:
        >>> # DISABLE_DOCTEST GGR
        >>> from ibeis.other.dbinfo import *  # NOQA
        >>> import ibeis
        >>> dbdir = '/media/danger/GGR/GGR-IBEIS'
        >>> dbdir = dbdir if ut.checkpath(dbdir) else ut.truepath('~/lev/media/danger/GGR/GGR-IBEIS')
        >>> ibs = ibeis.opendb(dbdir=dbdir, allow_newdir=False)
        >>> import guitool_ibeis as gt
        >>> gt.ensure_qtapp()
        >>> win = split_analysis(ibs)
        >>> ut.quit_if_noshow()
        >>> import plottool_ibeis as pt
        >>> gt.qtapp_loop(qwin=win)
        >>> #ut.show_if_requested()
    """
    #nid_list = ibs.get_valid_nids(filter_empty=True)
    import datetime
    day1 = datetime.date(2016, 1, 30)
    day2 = datetime.date(2016, 1, 31)

    filter_kw = {
        'multiple': None,
        #'view': ['right'],
        #'minqual': 'good',
        'is_known': True,
        'min_pername': 1,
    }
    aids1 = ibs.filter_annots_general(filter_kw=ut.dict_union(
        filter_kw, {
            'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 0.0)),
            'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day1, 1.0)),
        })
    )
    aids2 = ibs.filter_annots_general(filter_kw=ut.dict_union(
        filter_kw, {
            'min_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 0.0)),
            'max_unixtime': ut.datetime_to_posixtime(ut.date_to_datetime(day2, 1.0)),
        })
    )
    all_aids = aids1 + aids2
    all_annots = ibs.annots(all_aids)
    print('%d annots on day 1' % (len(aids1)) )
    print('%d annots on day 2' % (len(aids2)) )
    print('%d annots overall' % (len(all_annots)) )
    print('%d names overall' % (len(ut.unique(all_annots.nids))) )

    nid_list, annots_list = all_annots.group(all_annots.nids)

    REVIEWED_EDGES = True
    if REVIEWED_EDGES:
        aids_list = [annots.aids for annots in annots_list]
        #aid_pairs = [annots.get_am_aidpairs() for annots in annots_list]  # Slower
        aid_pairs = ibs.get_unflat_am_aidpairs(aids_list)  # Faster
    else:
        # ALL EDGES
        aid_pairs = [annots.get_aidpairs() for annots in annots_list]

    speeds_list = ibs.unflat_map(ibs.get_annotpair_speeds, aid_pairs)
    import vtool_ibeis as vt
    max_speeds = np.array([vt.safe_max(s, nans=False) for s in speeds_list])

    nan_idx = np.where(np.isnan(max_speeds))[0]
    inf_idx = np.where(np.isinf(max_speeds))[0]
    bad_idx = sorted(ut.unique(ut.flatten([inf_idx, nan_idx])))
    ok_idx = ut.index_complement(bad_idx, len(max_speeds))

    print('#nan_idx = %r' % (len(nan_idx),))
    print('#inf_idx = %r' % (len(inf_idx),))
    print('#ok_idx = %r' % (len(ok_idx),))

    ok_speeds = max_speeds[ok_idx]
    ok_nids = ut.take(nid_list, ok_idx)
    ok_annots = ut.take(annots_list, ok_idx)
    sortx = np.argsort(ok_speeds)[::-1]

    sorted_speeds = np.array(ut.take(ok_speeds, sortx))
    sorted_annots = np.array(ut.take(ok_annots, sortx))
    sorted_nids = np.array(ut.take(ok_nids, sortx))  # NOQA

    sorted_speeds = np.clip(sorted_speeds, 0, 100)

    #idx = vt.find_elbow_point(sorted_speeds)
    #EXCESSIVE_SPEED = sorted_speeds[idx]
    # http://www.infoplease.com/ipa/A0004737.html
    # http://www.speedofanimals.com/animals/zebra
    #ZEBRA_SPEED_MAX  = 64  # km/h
    #ZEBRA_SPEED_RUN  = 50  # km/h
    ZEBRA_SPEED_SLOW_RUN  = 20  # km/h
    #ZEBRA_SPEED_FAST_WALK = 10  # km/h
    #ZEBRA_SPEED_WALK = 7  # km/h

    MAX_SPEED = ZEBRA_SPEED_SLOW_RUN
    #MAX_SPEED = ZEBRA_SPEED_WALK
    #MAX_SPEED = EXCESSIVE_SPEED

    flags = sorted_speeds > MAX_SPEED
    flagged_ok_annots = ut.compress(sorted_annots, flags)
    inf_annots = ut.take(annots_list, inf_idx)
    flagged_annots = inf_annots + flagged_ok_annots

    print('MAX_SPEED = %r km/h' % (MAX_SPEED,))
    print('%d annots with infinite speed' % (len(inf_annots),))
    print('%d annots with large speed' % (len(flagged_ok_annots),))
    print('Marking all pairs of annots above the threshold as non-matching')

    from ibeis.algo.graph import graph_iden
    import networkx as nx
    progkw = dict(freq=1, bs=True, est_window=len(flagged_annots))

    bad_edges_list = []
    good_edges_list = []
    for annots in ut.ProgIter(flagged_annots, lbl='flag speeding names', **progkw):
        edge_to_speeds = annots.get_speeds()
        bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED]
        good_edges = [edge for edge, speed in edge_to_speeds.items() if speed <= MAX_SPEED]
        bad_edges_list.append(bad_edges)
        good_edges_list.append(good_edges)
    all_bad_edges = ut.flatten(bad_edges_list)
    good_edges_list = ut.flatten(good_edges_list)
    print('num_bad_edges = %r' % (len(ut.flatten(bad_edges_list)),))
    print('num_bad_edges = %r' % (len(ut.flatten(good_edges_list)),))

    if 1:
        from ibeis.viz import viz_graph2
        import guitool_ibeis as gt
        gt.ensure_qtapp()

        if ut.get_argflag('--good'):
            print('Looking at GOOD (no speed problems) edges')
            aid_pairs = good_edges_list
        else:
            print('Looking at BAD (speed problems) edges')
            aid_pairs = all_bad_edges
        aids = sorted(list(set(ut.flatten(aid_pairs))))
        infr = graph_iden.AnnotInference(ibs, aids, verbose=False)
        infr.initialize_graph()

        # Use random scores to randomize sort order
        rng = np.random.RandomState(0)
        scores = (-rng.rand(len(aid_pairs)) * 10).tolist()
        infr.graph.add_edges_from(aid_pairs)

        if True:
            edge_sample_size = 250
            pop_nids = ut.unique(ibs.get_annot_nids(ut.unique(ut.flatten(aid_pairs))))
            sorted_pairs = ut.sortedby(aid_pairs, scores)[::-1][0:edge_sample_size]
            sorted_nids = ibs.get_annot_nids(ut.take_column(sorted_pairs, 0))
            sample_size = len(ut.unique(sorted_nids))
            am_rowids = ibs.get_annotmatch_rowid_from_undirected_superkey(*zip(*sorted_pairs))
            flags = ut.not_list(ut.flag_None_items(am_rowids))
            #am_rowids = ut.compress(am_rowids, flags)
            positive_tags = ['SplitCase', 'Photobomb']
            flags_list = [ut.replace_nones(ibs.get_annotmatch_prop(tag, am_rowids), 0)
                          for tag in positive_tags]
            print('edge_case_hist: ' + ut.repr3(
                ['%s %s' % (txt, sum(flags_)) for flags_, txt in zip(flags_list, positive_tags)]))
            is_positive = ut.or_lists(*flags_list)
            num_positive = sum(ut.lmap(any, ut.group_items(is_positive, sorted_nids).values()))
            pop = len(pop_nids)
            print('A positive is any edge flagged as a %s' % (ut.conj_phrase(positive_tags, 'or'),))
            print('--- Sampling wrt edges ---')
            print('edge_sample_size  = %r' % (edge_sample_size,))
            print('edge_population_size = %r' % (len(aid_pairs),))
            print('num_positive_edges = %r' % (sum(is_positive)))
            print('--- Sampling wrt names ---')
            print('name_population_size = %r' % (pop,))
            vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level=.95)

        nx.set_edge_attributes(infr.graph, name='score', values=dict(zip(aid_pairs, scores)))

        win = viz_graph2.AnnotGraphWidget(infr=infr, use_image=False,
                                          init_mode=None)
        win.populate_edge_model()
        win.show()
        return win
        # Make review interface for only bad edges

    infr_list = []
    iter_ = list(zip(flagged_annots, bad_edges_list))
    for annots, bad_edges in ut.ProgIter(iter_, lbl='creating inference', **progkw):
        aids = annots.aids
        nids = [1] * len(aids)
        infr = graph_iden.AnnotInference(ibs, aids, nids, verbose=False)
        infr.initialize_graph()
        infr.reset_feedback()
        infr_list.append(infr)

    # Check which ones are user defined as incorrect
    #num_positive = 0
    #for infr in infr_list:
    #    flag = np.any(infr.get_feedback_probs()[0] == 0)
    #    num_positive += flag
    #print('num_positive = %r' % (num_positive,))
    #pop = len(infr_list)
    #print('pop = %r' % (pop,))

    iter_ = list(zip(infr_list, bad_edges_list))
    for infr, bad_edges in ut.ProgIter(iter_, lbl='adding speed edges', **progkw):
        flipped_edges = []
        for aid1, aid2 in bad_edges:
            if infr.graph.has_edge(aid1, aid2):
                flipped_edges.append((aid1, aid2))
            infr.add_feedback((aid1, aid2), NEGTV)
        nx.set_edge_attributes(infr.graph, name='_speed_split', values='orig')
        nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'new' for edge in bad_edges})
        nx.set_edge_attributes(infr.graph, name='_speed_split', values={edge: 'flip' for edge in flipped_edges})

    #for infr in ut.ProgIter(infr_list, lbl='flagging speeding edges', **progkw):
    #    annots = ibs.annots(infr.aids)
    #    edge_to_speeds = annots.get_speeds()
    #    bad_edges = [edge for edge, speed in edge_to_speeds.items() if speed > MAX_SPEED]

    def inference_stats(infr_list_):
        relabel_stats = []
        for infr in infr_list_:
            num_ccs, num_inconsistent = infr.relabel_using_reviews()
            state_hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, 'decision').values())
            if POSTV not in state_hist:
                state_hist[POSTV] = 0
            hist = ut.dict_hist(nx.get_edge_attributes(infr.graph, '_speed_split').values())

            subgraphs = infr.positive_connected_compoments()
            subgraph_sizes = [len(g) for g in subgraphs]

            info = ut.odict([
                ('num_nonmatch_edges', state_hist[NEGTV]),
                ('num_match_edges', state_hist[POSTV]),
                ('frac_nonmatch_edges',  state_hist[NEGTV] / (state_hist[POSTV] + state_hist[NEGTV])),
                ('num_inconsistent', num_inconsistent),
                ('num_ccs', num_ccs),
                ('edges_flipped', hist.get('flip', 0)),
                ('edges_unchanged', hist.get('orig', 0)),
                ('bad_unreviewed_edges', hist.get('new', 0)),
                ('orig_size', len(infr.graph)),
                ('new_sizes', subgraph_sizes),
            ])
            relabel_stats.append(info)
        return relabel_stats

    relabel_stats = inference_stats(infr_list)

    print('\nAll Split Info:')
    lines = []
    for key in relabel_stats[0].keys():
        data = ut.take_column(relabel_stats, key)
        if key == 'new_sizes':
            data = ut.flatten(data)
        lines.append('stats(%s) = %s' % (key, ut.repr2(ut.get_stats(data, use_median=True), precision=2)))
    print('\n'.join(ut.align_lines(lines, '=')))

    num_incon_list = np.array(ut.take_column(relabel_stats, 'num_inconsistent'))
    can_split_flags = num_incon_list == 0
    print('Can trivially split %d / %d' % (sum(can_split_flags), len(can_split_flags)))

    splittable_infrs = ut.compress(infr_list, can_split_flags)

    relabel_stats = inference_stats(splittable_infrs)

    print('\nTrival Split Info:')
    lines = []
    for key in relabel_stats[0].keys():
        if key in ['num_inconsistent']:
            continue
        data = ut.take_column(relabel_stats, key)
        if key == 'new_sizes':
            data = ut.flatten(data)
        lines.append('stats(%s) = %s' % (
            key, ut.repr2(ut.get_stats(data, use_median=True), precision=2)))
    print('\n'.join(ut.align_lines(lines, '=')))

    num_match_edges = np.array(ut.take_column(relabel_stats, 'num_match_edges'))
    num_nonmatch_edges = np.array(ut.take_column(relabel_stats, 'num_nonmatch_edges'))
    flags1 = np.logical_and(num_match_edges > num_nonmatch_edges, num_nonmatch_edges < 3)
    reasonable_infr = ut.compress(splittable_infrs, flags1)

    new_sizes_list = ut.take_column(relabel_stats, 'new_sizes')
    flags2 = [len(sizes) == 2 and sum(sizes) > 4 and (min(sizes) / max(sizes)) > .3
              for sizes in new_sizes_list]
    reasonable_infr = ut.compress(splittable_infrs, flags2)
    print('#reasonable_infr = %r' % (len(reasonable_infr),))

    for infr in ut.InteractiveIter(reasonable_infr):
        annots = ibs.annots(infr.aids)
        edge_to_speeds = annots.get_speeds()
        print('max_speed = %r' % (max(edge_to_speeds.values())),)
        infr.initialize_visual_node_attrs()
        infr.show_graph(use_image=True, only_reviewed=True)

    rest = ~np.logical_or(flags1, flags2)
    nonreasonable_infr = ut.compress(splittable_infrs, rest)
    rng = np.random.RandomState(0)
    random_idx = ut.random_indexes(len(nonreasonable_infr) - 1, 15, rng=rng)
    random_infr = ut.take(nonreasonable_infr, random_idx)
    for infr in ut.InteractiveIter(random_infr):
        annots = ibs.annots(infr.aids)
        edge_to_speeds = annots.get_speeds()
        print('max_speed = %r' % (max(edge_to_speeds.values())),)
        infr.initialize_visual_node_attrs()
        infr.show_graph(use_image=True, only_reviewed=True)

    #import scipy.stats as st
    #conf_interval = .95
    #st.norm.cdf(conf_interval)
    # view-source:http://www.surveysystem.com/sscalc.htm
    #zval = 1.96  # 95 percent confidence
    #zValC = 3.8416  #
    #zValC = 6.6564

    #import statsmodels.stats.api as sms
    #es = sms.proportion_effectsize(0.5, 0.75)
    #sms.NormalIndPower().solve_power(es, power=0.9, alpha=0.05, ratio=1)

    pop = 279
    num_positive = 3
    sample_size = 15
    conf_level = .95
    #conf_level = .99
    vt.calc_error_bars_from_sample(sample_size, num_positive, pop, conf_level)
    print('---')
    vt.calc_error_bars_from_sample(sample_size + 38, num_positive, pop, conf_level)
    print('---')
    vt.calc_error_bars_from_sample(sample_size + 38 / 3, num_positive, pop, conf_level)
    print('---')

    vt.calc_error_bars_from_sample(15 + 38, num_positive=3, pop=675, conf_level=.95)
    vt.calc_error_bars_from_sample(15, num_positive=3, pop=675, conf_level=.95)

    pop = 279
    #err_frac = .05  # 5%
    err_frac = .10  # 10%
    conf_level = .95
    vt.calc_sample_from_error_bars(err_frac, pop, conf_level)

    pop = 675
    vt.calc_sample_from_error_bars(err_frac, pop, conf_level)
    vt.calc_sample_from_error_bars(.05, pop, conf_level=.95, prior=.1)
    vt.calc_sample_from_error_bars(.05, pop, conf_level=.68, prior=.2)
    vt.calc_sample_from_error_bars(.10, pop, conf_level=.68)

    vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.95)
    vt.calc_error_bars_from_sample(100, num_positive=5, pop=675, conf_level=.68)
Ejemplo n.º 6
0
    def add_rows_from_parent(table, parent_rowids, config=None, verbose=True,
                             return_num_dirty=False):
        """
        Lazy addition
        """
        try:
            # Get requested configuration id
            config_rowid = table.get_config_rowid(config)
            # Find leaf rowids that need to be computed
            initial_rowid_list = table._get_rowid_from_superkey(parent_rowids,
                                                                config=config)
            # Get corresponding "dirty" parent rowids
            isdirty_list = ut.flag_None_items(initial_rowid_list)
            dirty_parent_rowids = ut.compress(parent_rowids, isdirty_list)
            num_dirty = len(dirty_parent_rowids)
            num_total = len(parent_rowids)
            if num_dirty > 0:
                if verbose:
                    fmtstr = 'adding %d / %d new props to %r for config_rowid=%r'
                    print(fmtstr % (num_dirty, num_total, table.tablename,
                                    config_rowid))
                args = zip(*dirty_parent_rowids)
                if table._asobject:
                    # Convinience
                    args = [table.depc.get_obj(parent, rowids)
                            for parent, rowids in zip(table.parents, args)]
                # CALL EXTERNAL PREPROCESSING / GENERATION FUNCTION
                proptup_gen = table.preproc_func(table.depc, *args, config=config)

                #proptup_gen = list(proptup_gen)

                if len(table._nested_idxs) > 0:
                    # TODO: rewrite
                    nested_nCols = len(table.data_colnames)
                    idxs1 = table._nested_idxs
                    mask1 = ut.index_to_boolmask(idxs1, nested_nCols)
                    mask2 = ut.not_list(mask1)
                    idxs2 = ut.where(mask2)
                    def unnest_data(data):
                        unnested_cols = list(zip(ut.take(data, idxs2)))
                        nested_cols = ut.take(data, idxs1)
                        grouped_items = [nested_cols, unnested_cols]
                        groupxs = [idxs1, idxs2]
                        unflat = ut.ungroup(grouped_items, groupxs, nested_nCols - 1)
                        return tuple(ut.flatten(unflat))
                    # Hack when a sql schema has tuples defined in it
                    proptup_gen = (unnest_data(data) for data in proptup_gen)

                #proptup_gen = list(proptup_gen)

                dirty_params_iter = (
                    parent_rowids + (config_rowid,) + data_cols
                    for parent_rowids, data_cols in zip(dirty_parent_rowids, proptup_gen))
                #dirty_params_iter = list(dirty_params_iter)
                #print('dirty_params_iter = %s' % (ut.repr2(dirty_params_iter, nl=1),))
                CHUNKED_ADD = table.chunksize is not None
                if CHUNKED_ADD:
                    for dirty_params_chunk in ut.ichunks(dirty_params_iter,
                                                         chunksize=table.chunksize):
                        table.db._add(table.tablename, table._table_colnames,
                                      dirty_params_chunk,
                                      nInput=len(dirty_params_chunk))
                else:
                    nInput = num_dirty
                    table.db._add(table.tablename, table._table_colnames,
                                  dirty_params_iter, nInput=nInput)
                # Now that the dirty params are added get the correct order of rowids
                rowid_list = table._get_rowid_from_superkey(parent_rowids,
                                                            config=config)
            else:
                rowid_list = initial_rowid_list
            if return_num_dirty:
                return rowid_list, num_dirty
            else:
                return rowid_list
        except Exception as ex:
            ut.printex(ex, 'error in add_rowids', keys=[
                'table', 'parent_rowids', 'config', 'args',
                'dirty_parent_rowids', 'table.preproc_func'])
            raise
Ejemplo n.º 7
0
def get_injured_sharks():
    """
    >>> from wbia.scripts.getshark import *  # NOQA
    """
    import requests

    url = 'http://www.whaleshark.org/getKeywordImages.jsp'
    resp = requests.get(url)
    assert resp.status_code == 200
    keywords = resp.json()['keywords']
    key_list = ut.take_column(keywords, 'indexName')
    key_to_nice = {k['indexName']: k['readableName'] for k in keywords}

    injury_patterns = [
        'injury',
        'net',
        'hook',
        'trunc',
        'damage',
        'scar',
        'nicks',
        'bite',
    ]

    injury_keys = [
        key for key in key_list if any([pat in key for pat in injury_patterns])
    ]
    noninjury_keys = ut.setdiff(key_list, injury_keys)
    injury_nice = ut.lmap(lambda k: key_to_nice[k], injury_keys)  # NOQA
    noninjury_nice = ut.lmap(lambda k: key_to_nice[k], noninjury_keys)  # NOQA
    key_list = injury_keys

    keyed_images = {}
    for key in ut.ProgIter(key_list, lbl='reading index', bs=True):
        key_url = url + '?indexName={indexName}'.format(indexName=key)
        key_resp = requests.get(key_url)
        assert key_resp.status_code == 200
        key_imgs = key_resp.json()['images']
        keyed_images[key] = key_imgs

    key_hist = {key: len(imgs) for key, imgs in keyed_images.items()}
    key_hist = ut.sort_dict(key_hist, 'vals')
    logger.info(ut.repr3(key_hist))
    nice_key_hist = ut.map_dict_keys(lambda k: key_to_nice[k], key_hist)
    nice_key_hist = ut.sort_dict(nice_key_hist, 'vals')
    logger.info(ut.repr3(nice_key_hist))

    key_to_urls = {
        key: ut.take_column(vals, 'url')
        for key, vals in keyed_images.items()
    }
    overlaps = {}
    import itertools

    overlap_img_list = []
    for k1, k2 in itertools.combinations(key_to_urls.keys(), 2):
        overlap_imgs = ut.isect(key_to_urls[k1], key_to_urls[k2])
        num_overlap = len(overlap_imgs)
        overlaps[(k1, k2)] = num_overlap
        overlaps[(k1, k1)] = len(key_to_urls[k1])
        if num_overlap > 0:
            # logger.info('[%s][%s], overlap=%r' % (k1, k2, num_overlap))
            overlap_img_list.extend(overlap_imgs)

    all_img_urls = list(set(ut.flatten(key_to_urls.values())))
    num_all = len(all_img_urls)  # NOQA
    logger.info('num_all = %r' % (num_all, ))

    # Determine super-categories
    categories = ['nicks', 'scar', 'trunc']

    # Force these keys into these categories
    key_to_cat = {'scarbite': 'other_injury'}

    cat_to_keys = ut.ddict(list)

    for key in key_to_urls.keys():
        flag = 1
        if key in key_to_cat:
            cat = key_to_cat[key]
            cat_to_keys[cat].append(key)
            continue
        for cat in categories:
            if cat in key:
                cat_to_keys[cat].append(key)
                flag = 0
        if flag:
            cat = 'other_injury'
            cat_to_keys[cat].append(key)

    cat_urls = ut.ddict(list)
    for cat, keys in cat_to_keys.items():
        for key in keys:
            cat_urls[cat].extend(key_to_urls[key])

    cat_hist = {}
    for cat in list(cat_urls.keys()):
        cat_urls[cat] = list(set(cat_urls[cat]))
        cat_hist[cat] = len(cat_urls[cat])

    logger.info(ut.repr3(cat_to_keys))
    logger.info(ut.repr3(cat_hist))

    key_to_cat = dict([(val, key) for key, vals in cat_to_keys.items()
                       for val in vals])

    # ingestset = {
    #    '__class__': 'ImageSet',
    #    'images': ut.ddict(dict)
    # }
    # for key, key_imgs in keyed_images.items():
    #    for imgdict in key_imgs:
    #        url = imgdict['url']
    #        encid = imgdict['correspondingEncounterNumber']
    #        # Make structure
    #        encdict = encounters[encid]
    #        encdict['__class__'] = 'Encounter'
    #        imgdict = ut.delete_keys(imgdict.copy(), ['correspondingEncounterNumber'])
    #        imgdict['__class__'] = 'Image'
    #        cat = key_to_cat[key]
    #        annotdict = {'relative_bbox': [.01, .01, .98, .98], 'tags': [cat, key]}
    #        annotdict['__class__'] = 'Annotation'

    #        # Ensure structures exist
    #        encdict['images'] = encdict.get('images', [])
    #        imgdict['annots'] = imgdict.get('annots', [])

    #        # Add an image to this encounter
    #        encdict['images'].append(imgdict)
    #        # Add an annotation to this image
    #        imgdict['annots'].append(annotdict)

    # # http://springbreak.wildbook.org/rest/org.ecocean.Encounter/1111
    # get_enc_url = 'http://www.whaleshark.org/rest/org.ecocean.Encounter/%s' % (encid,)
    # resp = requests.get(get_enc_url)
    # logger.info(ut.repr3(encdict))
    # logger.info(ut.repr3(encounters))

    # Download the files to the local disk
    # fpath_list =

    all_urls = ut.unique(
        ut.take_column(
            ut.flatten(
                ut.dict_subset(keyed_images,
                               ut.flatten(cat_to_keys.values())).values()),
            'url',
        ))

    dldir = ut.truepath('~/tmpsharks')
    from os.path import commonprefix, basename  # NOQA

    prefix = commonprefix(all_urls)
    suffix_list = [url_[len(prefix):] for url_ in all_urls]
    fname_list = [suffix.replace('/', '--') for suffix in suffix_list]

    fpath_list = []
    for url, fname in ut.ProgIter(zip(all_urls, fname_list),
                                  lbl='downloading imgs',
                                  freq=1):
        fpath = ut.grab_file_url(url,
                                 download_dir=dldir,
                                 fname=fname,
                                 verbose=False)
        fpath_list.append(fpath)

    # Make sure we keep orig info
    # url_to_keys = ut.ddict(list)
    url_to_info = ut.ddict(dict)
    for key, imgdict_list in keyed_images.items():
        for imgdict in imgdict_list:
            url = imgdict['url']
            info = url_to_info[url]
            for k, v in imgdict.items():
                info[k] = info.get(k, [])
                info[k].append(v)
            info['keys'] = info.get('keys', [])
            info['keys'].append(key)
            # url_to_keys[url].append(key)

    info_list = ut.take(url_to_info, all_urls)
    for info in info_list:
        if len(set(info['correspondingEncounterNumber'])) > 1:
            assert False, 'url with two different encounter nums'
    # Combine duplicate tags

    hashid_list = [
        ut.get_file_uuid(fpath_, stride=8)
        for fpath_ in ut.ProgIter(fpath_list, bs=True)
    ]
    groupxs = ut.group_indices(hashid_list)[1]

    # Group properties by duplicate images
    # groupxs = [g for g in groupxs if len(g) > 1]
    fpath_list_ = ut.take_column(ut.apply_grouping(fpath_list, groupxs), 0)
    url_list_ = ut.take_column(ut.apply_grouping(all_urls, groupxs), 0)
    info_list_ = [
        ut.map_dict_vals(ut.flatten, ut.dict_accum(*info_))
        for info_ in ut.apply_grouping(info_list, groupxs)
    ]

    encid_list_ = [
        ut.unique(info_['correspondingEncounterNumber'])[0]
        for info_ in info_list_
    ]
    keys_list_ = [ut.unique(info_['keys']) for info_ in info_list_]
    cats_list_ = [ut.unique(ut.take(key_to_cat, keys)) for keys in keys_list_]

    clist = ut.ColumnLists({
        'gpath': fpath_list_,
        'url': url_list_,
        'encid': encid_list_,
        'key': keys_list_,
        'cat': cats_list_,
    })

    # for info_ in ut.apply_grouping(info_list, groupxs):
    #    info = ut.dict_accum(*info_)
    #    info = ut.map_dict_vals(ut.flatten, info)
    #    x = ut.unique(ut.flatten(ut.dict_accum(*info_)['correspondingEncounterNumber']))
    #    if len(x) > 1:
    #        info = info.copy()
    #        del info['keys']
    #        logger.info(ut.repr3(info))

    flags = ut.lmap(ut.fpath_has_imgext, clist['gpath'])
    clist = clist.compress(flags)

    import wbia

    ibs = wbia.opendb('WS_Injury', allow_newdir=True)

    gid_list = ibs.add_images(clist['gpath'])
    clist['gid'] = gid_list

    failed_flags = ut.flag_None_items(clist['gid'])
    logger.info('# failed %s' % (sum(failed_flags), ))
    passed_flags = ut.not_list(failed_flags)
    clist = clist.compress(passed_flags)
    ut.assert_all_not_None(clist['gid'])
    # ibs.get_image_uris_original(clist['gid'])
    ibs.set_image_uris_original(clist['gid'], clist['url'], overwrite=True)

    # ut.zipflat(clist['cat'], clist['key'])
    if False:
        # Can run detection instead
        clist['tags'] = ut.zipflat(clist['cat'])
        aid_list = ibs.use_images_as_annotations(clist['gid'],
                                                 adjust_percent=0.01,
                                                 tags_list=clist['tags'])
        aid_list

    import wbia.plottool as pt
    from wbia import core_annots

    pt.qt4ensure()
    # annots = ibs.annots()
    # aids = [1, 2]
    # ibs.depc_annot.get('hog', aids , 'hog')
    # ibs.depc_annot.get('chip', aids, 'img')
    for aid in ut.InteractiveIter(ibs.get_valid_aids()):
        hogs = ibs.depc_annot.d.get_hog_hog([aid])
        chips = ibs.depc_annot.d.get_chips_img([aid])
        chip = chips[0]
        hogimg = core_annots.make_hog_block_image(hogs[0])
        pt.clf()
        pt.imshow(hogimg, pnum=(1, 2, 1))
        pt.imshow(chip, pnum=(1, 2, 2))
        fig = pt.gcf()
        fig.show()
        fig.canvas.draw()

    # logger.info(len(groupxs))

    # if False:
    # groupxs = ut.find_duplicate_items(ut.lmap(basename, suffix_list)).values()
    # logger.info(ut.repr3(ut.apply_grouping(all_urls, groupxs)))
    #    # FIX
    #    for fpath, fname in zip(fpath_list, fname_list):
    #        if ut.checkpath(fpath):
    #            ut.move(fpath, join(dirname(fpath), fname))
    #            logger.info('fpath = %r' % (fpath,))

    # import wbia
    # from wbia.dbio import ingest_dataset
    # dbdir = wbia.sysres.lookup_dbdir('WS_ALL')
    # self = ingest_dataset.Ingestable2(dbdir)

    if False:
        # Show overlap matrix
        import wbia.plottool as pt
        import pandas as pd
        import numpy as np

        dict_ = overlaps
        s = pd.Series(dict_, index=pd.MultiIndex.from_tuples(overlaps))
        df = s.unstack()
        lhs, rhs = df.align(df.T)
        df = lhs.add(rhs, fill_value=0).fillna(0)

        label_texts = df.columns.values

        def label_ticks(label_texts):
            import wbia.plottool as pt

            truncated_labels = [repr(lbl[0:100]) for lbl in label_texts]
            ax = pt.gca()
            ax.set_xticks(list(range(len(label_texts))))
            ax.set_xticklabels(truncated_labels)
            [lbl.set_rotation(-55) for lbl in ax.get_xticklabels()]
            [
                lbl.set_horizontalalignment('left')
                for lbl in ax.get_xticklabels()
            ]

            # xgrid, ygrid = np.meshgrid(range(len(label_texts)), range(len(label_texts)))
            # pt.plot_surface3d(xgrid, ygrid, disjoint_mat)
            ax.set_yticks(list(range(len(label_texts))))
            ax.set_yticklabels(truncated_labels)
            [
                lbl.set_horizontalalignment('right')
                for lbl in ax.get_yticklabels()
            ]
            [
                lbl.set_verticalalignment('center')
                for lbl in ax.get_yticklabels()
            ]
            # [lbl.set_rotation(20) for lbl in ax.get_yticklabels()]

        # df = df.sort(axis=0)
        # df = df.sort(axis=1)

        sortx = np.argsort(df.sum(axis=1).values)[::-1]
        df = df.take(sortx, axis=0)
        df = df.take(sortx, axis=1)

        fig = pt.figure(fnum=1)
        fig.clf()
        mat = df.values.astype(np.int32)
        mat[np.diag_indices(len(mat))] = 0
        vmax = mat[(1 - np.eye(len(mat))).astype(np.bool)].max()
        import matplotlib.colors

        norm = matplotlib.colors.Normalize(vmin=0, vmax=vmax, clip=True)
        pt.plt.imshow(mat, cmap='hot', norm=norm, interpolation='none')
        pt.plt.colorbar()
        pt.plt.grid('off')
        label_ticks(label_texts)
        fig.tight_layout()

    # overlap_df = pd.DataFrame.from_dict(overlap_img_list)

    class TmpImage(ut.NiceRepr):
        pass

    from skimage.feature import hog
    from skimage import data, color, exposure
    import wbia.plottool as pt

    image2 = color.rgb2gray(data.astronaut())  # NOQA

    fpath = './GOPR1120.JPG'

    import vtool as vt

    for fpath in [fpath]:
        """
        http://scikit-image.org/docs/dev/auto_examples/plot_hog.html
        """

        image = vt.imread(fpath, grayscale=True)
        image = pt.color_funcs.to_base01(image)

        fig = pt.figure(fnum=2)
        fd, hog_image = hog(
            image,
            orientations=8,
            pixels_per_cell=(16, 16),
            cells_per_block=(1, 1),
            visualise=True,
        )

        fig, (ax1, ax2) = pt.plt.subplots(1,
                                          2,
                                          figsize=(8, 4),
                                          sharex=True,
                                          sharey=True)

        ax1.axis('off')
        ax1.imshow(image, cmap=pt.plt.cm.gray)
        ax1.set_title('Input image')
        ax1.set_adjustable('box-forced')

        # Rescale histogram for better display
        hog_image_rescaled = exposure.rescale_intensity(hog_image,
                                                        in_range=(0, 0.02))

        ax2.axis('off')
        ax2.imshow(hog_image_rescaled, cmap=pt.plt.cm.gray)
        ax2.set_title('Histogram of Oriented Gradients')
        ax1.set_adjustable('box-forced')
        pt.plt.show()
Ejemplo n.º 8
0
def convert_hsdb_to_ibeis(hsdir, dbdir=None, **kwargs):
    r"""
    Args
        hsdir (str): Directory to folder *containing* _hsdb
        dbdir (str): Output directory (defaults to same as  hsdb)

    CommandLine:
        python -m ibeis convert_hsdb_to_ibeis --dbdir ~/work/Frogs
        python -m ibeis convert_hsdb_to_ibeis --hsdir "/raid/raw/RotanTurtles/Roatan HotSpotter Nov_21_2016"

    Ignore:
        from ibeis.dbio.ingest_hsdb import *  # NOQA
        hsdir = "/raid/raw/RotanTurtles/Roatan HotSpotter Nov_21_2016"
        dbdir = "~/work/RotanTurtles"

    Example:
        >>> # SCRIPT
        >>> from ibeis.dbio.ingest_hsdb import *  # NOQA
        >>> dbdir = ut.get_argval('--dbdir', type_=str, default=None)
        >>> hsdir = ut.get_argval('--hsdir', type_=str, default=dbdir)
        >>> result = convert_hsdb_to_ibeis(hsdir)
        >>> print(result)
    """
    from ibeis.control import IBEISControl
    import utool as ut

    if dbdir is None:
        dbdir = hsdir
    print('[ingest] Ingesting hsdb: %r -> %r' % (hsdir, dbdir))

    assert is_hsdb(
        hsdir
    ), 'not a hotspotter database. cannot even force convert: hsdir=%r' % (
        hsdir, )
    assert not is_succesful_convert(dbdir), 'hsdir=%r is already converted' % (
        hsdir, )
    #print('FORCE DELETE: %r' % (hsdir,))
    #ibsfuncs.delete_ibeis_database(hsdir)
    imgdir = join(hsdir, 'images')

    internal_dir = get_hsinternal(hsdir)
    nametbl_fpath = join(internal_dir, 'name_table.csv')
    imgtbl_fpath = join(internal_dir, 'image_table.csv')
    chiptbl_fpath = join(internal_dir, 'chip_table.csv')

    # READ NAME TABLE
    name_text_list = ['____']
    name_hs_nid_list = [0]
    with open(nametbl_fpath, 'r') as nametbl_file:
        name_reader = csv.reader(nametbl_file)
        for ix, row in enumerate(name_reader):
            #if ix >= 3:
            if len(row) == 0 or row[0].strip().startswith('#'):
                continue
            else:
                hs_nid = int(row[0])
                name = row[1].strip()
                name_text_list.append(name)
                name_hs_nid_list.append(hs_nid)

    # READ IMAGE TABLE
    iamge_hs_gid_list = []
    image_gname_list = []
    image_reviewed_list = []
    with open(imgtbl_fpath, 'r') as imgtb_file:
        image_reader = csv.reader(imgtb_file)
        for ix, row in enumerate(image_reader):
            if len(row) == 0 or row[0].strip().startswith('#'):
                continue
            else:
                hs_gid = int(row[0])
                gname_ = row[1].strip()
                # aif in hotspotter is equivilant to reviewed in IBEIS
                reviewed = bool(row[2])
                iamge_hs_gid_list.append(hs_gid)
                image_gname_list.append(gname_)
                image_reviewed_list.append(reviewed)

    image_gpath_list = [join(imgdir, gname) for gname in image_gname_list]

    ut.debug_duplicate_items(image_gpath_list)
    #print(image_gpath_list)
    image_exist_flags = list(map(exists, image_gpath_list))
    missing_images = []
    for image_gpath, flag in zip(image_gpath_list, image_exist_flags):
        if not flag:
            missing_images.append(image_gpath)
            print('Image does not exist: %s' % image_gpath)

    if not all(image_exist_flags):
        print('Only %d / %d image exist' %
              (sum(image_exist_flags), len(image_exist_flags)))

    SEARCH_FOR_IMAGES = False
    if SEARCH_FOR_IMAGES:
        # Hack to try and find the missing images
        from os.path import basename
        subfiles = ut.glob(hsdir,
                           '*',
                           recursive=True,
                           fullpath=True,
                           with_files=True)
        basename_to_existing = ut.group_items(subfiles,
                                              ut.lmap(basename, subfiles))

        can_copy_list = []
        for gpath in missing_images:
            gname = basename(gpath)
            if gname not in basename_to_existing:
                print('gname = %r' % (gname, ))
                pass
            else:
                existing = basename_to_existing[gname]
                can_choose = True
                if len(existing) > 1:
                    if not ut.allsame(ut.lmap(ut.get_file_uuid, existing)):
                        can_choose = False
                if can_choose:
                    found = existing[0]
                    can_copy_list.append((found, gpath))
                else:
                    print(existing)

        src, dst = ut.listT(can_copy_list)
        ut.copy_list(src, dst)

    # READ CHIP TABLE
    chip_bbox_list = []
    chip_theta_list = []
    chip_hs_nid_list = []
    chip_hs_gid_list = []
    chip_note_list = []
    with open(chiptbl_fpath, 'r') as chiptbl_file:
        chip_reader = csv.reader(chiptbl_file)
        for ix, row in enumerate(chip_reader):
            if len(row) == 0 or row[0].strip().startswith('#'):
                continue
            else:
                hs_gid = int(row[1])
                hs_nid = int(row[2])
                bbox_text = row[3]
                theta = float(row[4])
                notes = '<COMMA>'.join([item.strip() for item in row[5:]])

                bbox_text = bbox_text.replace('[', '').replace(']', '').strip()
                bbox_text = re.sub('  *', ' ', bbox_text)
                bbox_strlist = bbox_text.split(' ')
                bbox = tuple(map(int, bbox_strlist))
                #bbox = [int(item) for item in bbox_strlist]
                chip_hs_nid_list.append(hs_nid)
                chip_hs_gid_list.append(hs_gid)
                chip_bbox_list.append(bbox)
                chip_theta_list.append(theta)
                chip_note_list.append(notes)

    names = ut.ColumnLists({
        'hs_nid': name_hs_nid_list,
        'text': name_text_list,
    })

    images = ut.ColumnLists({
        'hs_gid': iamge_hs_gid_list,
        'gpath': image_gpath_list,
        'reviewed': image_reviewed_list,
        'exists': image_exist_flags,
    })

    chips = ut.ColumnLists({
        'hs_gid': chip_hs_gid_list,
        'hs_nid': chip_hs_nid_list,
        'bbox': chip_bbox_list,
        'theta': chip_theta_list,
        'note': chip_note_list,
    })

    IGNORE_MISSING_IMAGES = True
    if IGNORE_MISSING_IMAGES:
        # Ignore missing information
        print('pre')
        print('chips = %r' % (chips, ))
        print('images = %r' % (images, ))
        print('names = %r' % (names, ))
        missing_gxs = ut.where(ut.not_list(images['exists']))
        missing_gids = ut.take(images['hs_gid'], missing_gxs)
        gid_to_cxs = ut.dzip(*chips.group_indicies('hs_gid'))
        missing_cxs = ut.flatten(ut.take(gid_to_cxs, missing_gids))
        # Remove missing images and dependant chips
        images = images.remove(missing_gxs)
        chips = chips.remove(missing_cxs)
        valid_nids = set(chips['hs_nid'] + [0])
        isvalid = [nid in valid_nids for nid in names['hs_nid']]
        names = names.compress(isvalid)
        print('post')
        print('chips = %r' % (chips, ))
        print('images = %r' % (images, ))
        print('names = %r' % (names, ))

    assert all(images['exists']), 'some images dont exist'

    # if gid is None:
    #     print('Not adding the ix=%r-th Chip. Its image is corrupted image.' % (ix,))
    #     # continue
    # # Build mappings to new indexes
    # names_nid_to_nid  = {names_nid: nid for (names_nid, nid) in zip(hs_nid_list, nid_list)}
    # names_nid_to_nid[1] = names_nid_to_nid[0]  # hsdb unknknown is 0 or 1
    # images_gid_to_gid = {images_gid: gid for (images_gid, gid) in zip(hs_gid_list, gid_list)}

    ibs = IBEISControl.request_IBEISController(dbdir=dbdir,
                                               check_hsdb=False,
                                               **kwargs)
    assert len(ibs.get_valid_gids()) == 0, 'target database is not empty'

    # Add names, images, and annotations
    names['ibs_nid'] = ibs.add_names(names['text'])
    images['ibs_gid'] = ibs.add_images(
        images['gpath'])  # any failed gids will be None

    if True:
        # Remove corrupted images
        print('pre')
        print('chips = %r' % (chips, ))
        print('images = %r' % (images, ))
        print('names = %r' % (names, ))
        missing_gxs = ut.where(ut.flag_None_items(images['ibs_gid']))
        missing_gids = ut.take(images['hs_gid'], missing_gxs)
        gid_to_cxs = ut.dzip(*chips.group_indicies('hs_gid'))
        missing_cxs = ut.flatten(ut.take(gid_to_cxs, missing_gids))
        # Remove missing images and dependant chips
        chips = chips.remove(missing_cxs)
        images = images.remove(missing_gxs)
        print('post')
        print('chips = %r' % (chips, ))
        print('images = %r' % (images, ))
        print('names = %r' % (names, ))

    # Index chips using new ibs rowids
    ibs_gid_lookup = ut.dzip(images['hs_gid'], images['ibs_gid'])
    ibs_nid_lookup = ut.dzip(names['hs_nid'], names['ibs_nid'])
    try:
        chips['ibs_gid'] = ut.take(ibs_gid_lookup, chips['hs_gid'])
    except KeyError:
        chips['ibs_gid'] = [
            ibs_gid_lookup.get(index, None) for index in chips['hs_gid']
        ]
    try:
        chips['ibs_nid'] = ut.take(ibs_nid_lookup, chips['hs_nid'])
    except KeyError:
        chips['ibs_nid'] = [
            ibs_nid_lookup.get(index, None) for index in chips['hs_nid']
        ]

    ibs.add_annots(chips['ibs_gid'],
                   bbox_list=chips['bbox'],
                   theta_list=chips['theta'],
                   nid_list=chips['ibs_nid'],
                   notes_list=chips['note'])

    # aid_list = ibs.get_valid_aids()
    # flag_list = [True] * len(aid_list)
    # ibs.set_annot_exemplar_flags(aid_list, flag_list)
    # assert(all(ibs.get_annot_exemplar_flags(aid_list))), 'exemplars not set correctly'

    # Write file flagging successful conversion
    with open(join(ibs.get_ibsdir(), SUCCESS_FLAG_FNAME), 'w') as file_:
        file_.write('Successfully converted hsdir=%r' % (hsdir, ))
    print('finished ingest')
    return ibs
Ejemplo n.º 9
0
def add_feat_featweights(ibs, feat_rowid_list, config2_=None, verbose=not ut.QUIET, return_num_dirty=False):
    """ feat.featweight.add(feat_rowid_list)

    CRITICAL FUNCTION MUST EXIST FOR ALL DEPENDANTS
    Adds / ensures / computes a dependant property

    Args:
         feat_rowid_list

    Returns:
        returns featweight_rowid_list of added (or already existing featweights)

    TemplateInfo:
        Tadder_pl_dependant
        parent = feat
        leaf = featweight

    Example0:
        >>> # SLOW_DOCTEST
        >>> from ibeis.control._autogen_featweight_funcs import *  # NOQA
        >>> ibs, config2_ = testdata_ibs()
        >>> from ibeis import constants as const
        >>> aid_list = ibs.get_valid_aids(species=const.TEST_SPECIES.ZEB_PLAIN)[:2]
        >>> if 'annot' != 'feat':
        ...     feat_rowid_list = ibs.get_annot_feat_rowids(aid_list, config2_=config2_, ensure=True)
        >>> featweight_rowid_list = ibs.add_feat_featweights(feat_rowid_list, config2_=config2_)
        >>> assert len(featweight_rowid_list) == len(feat_rowid_list)
        >>> ut.assert_all_not_None(featweight_rowid_list)

    Example1:
        >>> # SLOW_DOCTEST
        >>> from ibeis.control._autogen_featweight_funcs import *  # NOQA
        >>> ibs, config2_ = testdata_ibs('PZ_MTEST')
        >>> from ibeis import constants as const
        >>> aid_list = ibs.get_valid_aids(species=const.TEST_SPECIES.ZEB_PLAIN)[0:7]
        >>> if 'annot' != 'feat':
        ...     feat_rowid_list = ibs.get_annot_feat_rowids(aid_list, config2_=config2_, ensure=True)
        >>> sub_feat_rowid_list1 = feat_rowid_list[0:6]
        >>> sub_feat_rowid_list2 = feat_rowid_list[5:7]
        >>> sub_feat_rowid_list3 = feat_rowid_list[0:7]
        >>> sub_featweight_rowid_list1 = ibs.get_feat_featweight_rowids(sub_feat_rowid_list1, config2_=config2_, ensure=True)
        >>> ibs.get_feat_featweight_rowids(sub_feat_rowid_list1, config2_=config2_, ensure=True)
        >>> sub_featweight_rowid_list1, num_dirty0 = ibs.add_feat_featweights(sub_feat_rowid_list1, config2_=config2_, return_num_dirty=True)
        >>> assert num_dirty0 == 0
        >>> ut.assert_all_not_None(sub_featweight_rowid_list1)
        >>> ibs.delete_feat_featweight(sub_feat_rowid_list2)
        >>> #ibs.delete_feat_featweight(sub_feat_rowid_list2)?
        >>> sub_featweight_rowid_list3 = ibs.get_feat_featweight_rowids(sub_feat_rowid_list3, config2_=config2_, ensure=False)
        >>> # Only the last two should be None
        >>> ut.assert_all_not_None(sub_featweight_rowid_list3[0:5], 'sub_featweight_rowid_list3[0:5])')
        >>> ut.assert_eq(sub_featweight_rowid_list3[5:7], [None, None])
        >>> sub_featweight_rowid_list3_ensured, num_dirty1 = ibs.add_feat_featweights(sub_feat_rowid_list3, config2_=config2_,  return_num_dirty=True)
        >>> ut.assert_eq(num_dirty1, 2, 'Only two params should have been computed here')
        >>> ut.assert_all_not_None(sub_featweight_rowid_list3_ensured)
    """
    from ibeis.algo.preproc import preproc_featweight
    ut.assert_all_not_None(feat_rowid_list, ' feat_rowid_list')
    # Get requested configuration id
    config_rowid = ibs.get_featweight_config_rowid(config2_=config2_)
    # Find leaf rowids that need to be computed
    initial_featweight_rowid_list = get_feat_featweight_rowids_(
        ibs, feat_rowid_list, config2_=config2_)
    # Get corresponding "dirty" parent rowids
    isdirty_list = ut.flag_None_items(initial_featweight_rowid_list)
    dirty_feat_rowid_list = ut.compress(feat_rowid_list, isdirty_list)
    num_dirty = len(dirty_feat_rowid_list)
    num_total = len(feat_rowid_list)
    if num_dirty > 0:
        if verbose:
            fmtstr = '[add_feat_featweights] adding %d / %d new featweight for config_rowid=%r'
            print(fmtstr % (num_dirty, num_total, config_rowid))
        # Dependant columns do not need true from_superkey getters.
        # We can use the Tgetter_pl_dependant_rowids_ instead
        get_rowid_from_superkey = functools.partial(
            ibs.get_feat_featweight_rowids_, config2_=config2_)
        proptup_gen = preproc_featweight.generate_featweight_properties(
            ibs, dirty_feat_rowid_list, config2_=config2_)
        dirty_params_iter = (
            (feat_rowid, config_rowid, fgweight)
            for feat_rowid, (fgweight,) in
            zip(dirty_feat_rowid_list, proptup_gen)
        )
        colnames = [
            'feature_rowid', 'config_rowid', 'featweight_forground_weight']
        #featweight_rowid_list = ibs.dbcache.add_cleanly(const.FEATURE_WEIGHT_TABLE, colnames, dirty_params_iter, get_rowid_from_superkey)
        ibs.dbcache._add(
            const.FEATURE_WEIGHT_TABLE, colnames, dirty_params_iter)
        # Now that the dirty params are added get the correct order of rowids
        featweight_rowid_list = get_rowid_from_superkey(feat_rowid_list)
    else:
        featweight_rowid_list = initial_featweight_rowid_list
    if return_num_dirty:
        return featweight_rowid_list, num_dirty
    return featweight_rowid_list
Ejemplo n.º 10
0
def add_annot_chips(ibs, aid_list, qreq_=None):
    """ annot.chip.add(aid_list)

    CRITICAL FUNCTION MUST EXIST FOR ALL DEPENDANTS
    Adds / ensures / computes a dependant property

    Args:
         aid_list

    Returns:
        returns chip_rowid_list of added (or already existing chips)

    TemplateInfo:
        Tadder_pl_dependant
        parent = annot
        leaf = chip

    CommandLine:
        python -m ibeis.control.manual_chip_funcs --test-add_annot_chips

    Example1:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.control.manual_chip_funcs import *  # NOQA
        >>> ibs, qreq_ = testdata_ibs()
        >>> aid_list = ibs._get_all_aids()[::3]
        >>> chip_rowid_list = ibs.add_annot_chips(aid_list, qreq_=qreq_)
        >>> assert len(chip_rowid_list) == len(aid_list)
        >>> ut.assert_all_not_None(chip_rowid_list)

    Example2:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.control.manual_chip_funcs import *  # NOQA
        >>> ibs, qreq_ = testdata_ibs()
        >>> aid_list = ibs._get_all_aids()[0:10]
        >>> sub_aid_list1 = aid_list[0:6]
        >>> sub_aid_list2 = aid_list[5:7]
        >>> sub_aid_list3 = aid_list[0:7]
        >>> sub_cid_list1 = ibs.get_annot_chip_rowids(sub_aid_list1, qreq_=qreq_, ensure=True)
        >>> ut.assert_all_not_None(sub_cid_list1)
        >>> ibs.delete_annot_chips(sub_aid_list2)
        >>> sub_cid_list3 = ibs.get_annot_chip_rowids(sub_aid_list3, qreq_=qreq_, ensure=False)
        >>> # Only the last two should be None
        >>> ut.assert_all_not_None(sub_cid_list3)
        >>> assert sub_cid_list3[5:7] == [None, None]
        >>> sub_cid_list3_ensured = ibs.get_annot_chip_rowids(sub_aid_list3, qreq_=qreq_, ensure=True)
        >>> # Only two params should have been computed here
        >>> ut.assert_all_not_None(sub_cid_list3_ensured)

    """
    from ibeis.model.preproc import preproc_chip
    ut.assert_all_not_None(aid_list, 'aid_list')
    # Get requested configuration id
    config_rowid = ibs.get_chip_config_rowid(qreq_=qreq_)
    # Find leaf rowids that need to be computed
    initial_chip_rowid_list = get_annot_chip_rowids_(ibs, aid_list, qreq_=qreq_)
    # Get corresponding "dirty" parent rowids
    isdirty_list = ut.flag_None_items(initial_chip_rowid_list)
    dirty_aid_list = ut.filter_items(aid_list, isdirty_list)
    num_dirty = len(dirty_aid_list)
    if num_dirty > 0:
        #if ut.VERBOSE:
        print('[add_annot_chips] adding %d / %d new chips' % (len(dirty_aid_list), len(aid_list)))
        # Dependant columns do not need true from_superkey getters.
        # We can use the Tgetter_pl_dependant_rowids_ instead
        get_rowid_from_superkey = functools.partial(
            ibs.get_annot_chip_rowids_, qreq_=qreq_)
        proptup_gen = preproc_chip.generate_chip_properties(ibs, dirty_aid_list)
        dirty_params_iter = (
            (aid, config_rowid, chip_uri, chip_width, chip_height)
            for aid, (chip_uri, chip_width, chip_height,) in
            zip(dirty_aid_list, proptup_gen)
        )
        dirty_params_iter = list(dirty_params_iter)
        colnames = ['annot_rowid', 'config_rowid',
                    'chip_uri', 'chip_width', 'chip_height']
        #chip_rowid_list = ibs.dbcache.add_cleanly(
        #    const.CHIP_TABLE, colnames, params_iter, get_rowid_from_superkey)
        ibs.dbcache._add(const.CHIP_TABLE, colnames, dirty_params_iter)
        # Now that the dirty params are added get the correct order of rowids
        chip_rowid_list = get_rowid_from_superkey(aid_list)
    else:
        chip_rowid_list = initial_chip_rowid_list
    return chip_rowid_list
Ejemplo n.º 11
0
def add_chip_feat(ibs, chip_rowid_list, config2_=None, verbose=not ut.QUIET, return_num_dirty=False):
    """ chip.feat.add(chip_rowid_list)

    CRITICAL FUNCTION MUST EXIST FOR ALL DEPENDANTS
    Adds / ensures / computes a dependant property
    Args:
         chip_rowid_list

    Returns:
        returns feat_rowid_list of added (or already existing feats)

    TemplateInfo:
        python -m ibeis.templates.template_generator --key feat --funcname-filter "\<add_chip_feat\>" --modfname=manual_feat_funcs
        Tadder_pl_dependant
        parent = chip
        leaf = feat

    CommandLine:
        python -m ibeis.control.manual_feat_funcs --test-add_chip_feat

    Example0:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.control.manual_feat_funcs import *  # NOQA
        >>> ibs, config2_ = testdata_ibs()
        >>> ibs.get_annot_chip_rowids(ibs.get_valid_aids())  # Ensure chips are computed
        >>> chip_rowid_list = ibs._get_all_chip_rowids()[::3]
        >>> feat_rowid_list = ibs.add_chip_feat(chip_rowid_list, config2_=config2_)
        >>> assert len(feat_rowid_list) == len(chip_rowid_list), 'bad length'
        >>> ut.assert_all_not_None(feat_rowid_list)

    Example1:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.control.manual_feat_funcs import *  # NOQA
        >>> ibs, config2_ = testdata_ibs()
        >>> ibs.get_annot_chip_rowids(ibs.get_valid_aids())  # Ensure chips are computed
        >>> chip_rowid_list = ibs._get_all_chip_rowids()[0:10]
        >>> assert len(chip_rowid_list) == 10, 'chips not computed'
        >>> sub_chip_rowid_list1 = chip_rowid_list[0:6]
        >>> sub_chip_rowid_list2 = chip_rowid_list[5:7]
        >>> sub_chip_rowid_list3 = chip_rowid_list[0:7]
        >>> sub_feat_rowid_list1 = ibs.get_chip_feat_rowid(sub_chip_rowid_list1, config2_=config2_, ensure=True)
        >>> ibs.get_chip_feat_rowid(sub_chip_rowid_list1, config2_=config2_, ensure=True)
        >>> sub_feat_rowid_list1, num_dirty0 = ibs.add_chip_feat(sub_chip_rowid_list1, config2_=config2_, return_num_dirty=True)
        >>> assert num_dirty0 == 0, 'num_dirty0=%r' % (num_dirty0,)
        >>> ut.assert_all_not_None(sub_feat_rowid_list1)
        >>> ibs.delete_chip_feats(sub_chip_rowid_list2)
        >>> #ibs.delete_chip_feat(sub_chip_rowid_list2)?
        >>> sub_feat_rowid_list3 = ibs.get_chip_feat_rowid(sub_chip_rowid_list3, config2_=config2_, ensure=False)
        >>> # Only the last two should be None
        >>> ut.assert_all_not_None(sub_feat_rowid_list3[0:5], 'sub_feat_rowid_list3[0:5])')
        >>> assert sub_feat_rowid_list3[5:7] == [None, None], 'sub_feat_rowid_list3=%r' % (sub_feat_rowid_list3,)
        >>> sub_feat_rowid_list3_ensured, num_dirty1 = ibs.add_chip_feat(sub_chip_rowid_list3, config2_=config2_,  return_num_dirty=True)
        >>> assert num_dirty1 == 2, 'Only two params should have been computed here'
        >>> ut.assert_all_not_None(sub_feat_rowid_list3_ensured)
    """
    from ibeis.algo.preproc import preproc_feat
    ut.assert_all_not_None(chip_rowid_list, ' chip_rowid_list')
    # Get requested configuration id
    config_rowid = ibs.get_feat_config_rowid(config2_=config2_)
    # Find leaf rowids that need to be computed
    initial_feat_rowid_list = get_chip_feat_rowids_(
        ibs, chip_rowid_list, config2_=config2_)
    # Get corresponding "dirty" parent rowids
    isdirty_list = ut.flag_None_items(initial_feat_rowid_list)
    dirty_chip_rowid_list = ut.compress(chip_rowid_list, isdirty_list)
    num_dirty = len(dirty_chip_rowid_list)
    num_total = len(chip_rowid_list)
    if num_dirty > 0:
        if verbose:
            fmtstr = '[add_chip_feat] adding %d / %d new feat for config_rowid=%r'
            print(fmtstr % (num_dirty, num_total, config_rowid))
        # Dependant columns do not need true from_superkey getters.
        # We can use the Tgetter_pl_dependant_rowids_ instead
        get_rowid_from_superkey = functools.partial(
            ibs.get_chip_feat_rowids_, config2_=config2_)
        # CALL EXTERNAL PREPROCESSING / GENERATION FUNCTION
        proptup_gen = preproc_feat.generate_feat_properties(
            ibs, dirty_chip_rowid_list, config2_=config2_)
        dirty_params_iter = (
            (chip_rowid, config_rowid, feature_nFeat,
             feature_kpt_arr, feature_vec_arr)
            for chip_rowid, (feature_nFeat, feature_kpt_arr, feature_vec_arr,) in
            zip(dirty_chip_rowid_list, proptup_gen)
        )
        colnames = ['chip_rowid', 'config_rowid',
                    'feature_num_feats', 'feature_keypoints', 'feature_vecs']
        #feat_rowid_list = ibs.dbcache.add_cleanly(const.FEATURE_TABLE, colnames, dirty_params_iter, get_rowid_from_superkey)
        CHUNKED_ADD = True
        if CHUNKED_ADD:
            chunksize = 128
            print('[add_chip_feat] adding to sql in chunks with chunksize=%r' % (chunksize,))
            for dirty_params_chunk in ut.ichunks(dirty_params_iter, chunksize=chunksize):
                print('[add_chip_feat] adding feature chunk to sql')
                nInput = len(dirty_params_chunk)
                ibs.dbcache._add(
                    const.FEATURE_TABLE, colnames, dirty_params_chunk, nInput=nInput)
        else:
            nInput = num_dirty
            ibs.dbcache._add(
                const.FEATURE_TABLE, colnames, dirty_params_iter, nInput=nInput)

        #ibs.dbcache._add(const.FEATURE_TABLE, colnames, dirty_params_iter)
        # Now that the dirty params are added get the correct order of rowids
        feat_rowid_list = get_rowid_from_superkey(chip_rowid_list)
    else:
        feat_rowid_list = initial_feat_rowid_list
    if return_num_dirty:
        return feat_rowid_list, num_dirty
    return feat_rowid_list
Ejemplo n.º 12
0
def add_annot_chips(ibs, aid_list, config2_=None, verbose=not ut.QUIET, return_num_dirty=False):
    r"""
    annot.chip.add(aid_list)

    CRITICAL FUNCTION MUST EXIST FOR ALL DEPENDANTS
    Adds / ensures / computes a dependant property

    Args:
         aid_list

    Returns:
        returns chip_rowid_list of added (or already existing chips)

    TemplateInfo:
        python -m ibeis.templates.template_generator --key chip --funcname-filter "\<add_annot_chips\>" --modfname=manual_chip_funcs
        python -m ibeis.templates.template_generator --key chip --modfname=manual_chip_funcs --funcname-filter "\<add_annot_chip"

        Tadder_pl_dependant
        parent = annot
        leaf = chip

    CommandLine:
        python -m ibeis.control.manual_chip_funcs --test-add_annot_chips

    RESTful:
        Method: POST
        URL:    /api/annot_chip/

    Example0:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.control.manual_chip_funcs import *  # NOQA
        >>> ibs, config2_ = testdata_ibs()
        >>> aid_list = ibs._get_all_aids()[::3]
        >>> chip_rowid_list = ibs.add_annot_chips(aid_list, config2_=config2_)
        >>> assert len(chip_rowid_list) == len(aid_list)
        >>> ut.assert_all_not_None(chip_rowid_list)

    Example1:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.control.manual_chip_funcs import *  # NOQA
        >>> ibs, config2_ = testdata_ibs()
        >>> aid_list = ibs._get_all_aids()[0:10]
        >>> sub_aid_list1 = aid_list[0:6]
        >>> sub_aid_list2 = aid_list[5:7]
        >>> sub_aid_list3 = aid_list[0:7]
        >>> sub_chip_rowid_list1 = ibs.get_annot_chip_rowids(sub_aid_list1, config2_=config2_, ensure=True)
        >>> ibs.get_annot_chip_rowids(sub_aid_list1, config2_=config2_, ensure=True)
        >>> sub_chip_rowid_list1, num_dirty0 = ibs.add_annot_chips(sub_aid_list1, config2_=config2_, return_num_dirty=True)
        >>> assert num_dirty0 == 0
        >>> ut.assert_all_not_None(sub_chip_rowid_list1)
        >>> ibs.delete_annot_chips(sub_aid_list2)
        >>> #ibs.delete_annot_chip(sub_aid_list2)?
        >>> sub_chip_rowid_list3 = ibs.get_annot_chip_rowids(sub_aid_list3, config2_=config2_, ensure=False)
        >>> # Only the last two should be None
        >>> ut.assert_all_not_None(sub_chip_rowid_list3[0:5], 'sub_chip_rowid_list3[0:5])')
        >>> assert sub_chip_rowid_list3[5:7] == [None, None]
        >>> sub_chip_rowid_list3_ensured, num_dirty1 = ibs.add_annot_chips(sub_aid_list3, config2_=config2_, return_num_dirty=True)
        >>> assert num_dirty1 == 2, 'Only two params should have been computed here'
        >>> ut.assert_all_not_None(sub_chip_rowid_list3_ensured)
    """
    from ibeis.algo.preproc import preproc_chip
    ut.assert_all_not_None(aid_list, ' annot_rowid_list')
    # Get requested configuration id
    config_rowid = ibs.get_chip_config_rowid(config2_=config2_)
    # Find leaf rowids that need to be computed
    initial_chip_rowid_list = get_annot_chip_rowids_(ibs, aid_list, config2_=config2_)
    # Get corresponding "dirty" parent rowids
    isdirty_list = ut.flag_None_items(initial_chip_rowid_list)
    dirty_aid_list = ut.compress(aid_list, isdirty_list)
    num_dirty = len(dirty_aid_list)
    num_total = len(aid_list)
    if num_dirty > 0:
        if verbose:
            fmtstr = '[add_annot_chips] adding %d / %d new chip for config_rowid=%r'
            print(fmtstr % (num_dirty, num_total, config_rowid))
        # Dependant columns do not need true from_superkey getters.
        # We can use the Tgetter_pl_dependant_rowids_ instead
        get_rowid_from_superkey = functools.partial(
            ibs.get_annot_chip_rowids_, config2_=config2_)
        proptup_gen = preproc_chip.generate_chip_properties(ibs, dirty_aid_list, config2_=config2_)
        dirty_params_iter = (
            (aid, config_rowid, chip_uri, chip_width, chip_height)
            for aid, (chip_uri, chip_width, chip_height,) in
            zip(dirty_aid_list, proptup_gen)
        )
        colnames = ['annot_rowid', 'config_rowid',
                    'chip_uri', 'chip_width', 'chip_height']
        #chip_rowid_list = ibs.dbcache.add_cleanly(const.CHIP_TABLE, colnames, dirty_params_iter, get_rowid_from_superkey)
        CHUNKED_ADD = True
        if CHUNKED_ADD:
            chunksize = 32 if ut.WIN32 else 128
            for dirty_params_chunk in ut.ichunks(dirty_params_iter, chunksize=chunksize):
                nInput = len(dirty_params_chunk)
                ibs.dbcache._add(
                    const.CHIP_TABLE, colnames, dirty_params_chunk, nInput=nInput)
        else:
            nInput = num_dirty
            ibs.dbcache._add(
                const.CHIP_TABLE, colnames, dirty_params_iter, nInput=nInput)
        # Now that the dirty params are added get the correct order of rowids
        chip_rowid_list = get_rowid_from_superkey(aid_list)
    else:
        chip_rowid_list = initial_chip_rowid_list
    if return_num_dirty:
        return chip_rowid_list, num_dirty
    return chip_rowid_list