Example #1
0
def on_pick(event, infr=None):
    import wbia.plottool as pt

    logger.info('ON PICK: %r' % (event, ))
    artist = event.artist
    plotdat = pt.get_plotdat_dict(artist)
    if plotdat:
        if 'node' in plotdat:
            all_node_data = ut.sort_dict(plotdat['node_data'].copy())
            visual_node_data = ut.dict_subset(all_node_data,
                                              infr.visual_node_attrs, None)
            node_data = ut.delete_dict_keys(all_node_data,
                                            infr.visual_node_attrs)
            node = plotdat['node']
            node_data['degree'] = infr.graph.degree(node)
            node_label = infr.pos_graph.node_label(node)
            logger.info('visual_node_data: ' +
                        ut.repr2(visual_node_data, nl=1))
            logger.info('node_data: ' + ut.repr2(node_data, nl=1))
            ut.cprint('node: ' + ut.repr2(plotdat['node']), 'blue')
            logger.info('(pcc) node_label = %r' % (node_label, ))
            logger.info('artist = %r' % (artist, ))
        elif 'edge' in plotdat:
            all_edge_data = ut.sort_dict(plotdat['edge_data'].copy())
            logger.info(infr.repr_edge_data(all_edge_data))
            ut.cprint('edge: ' + ut.repr2(plotdat['edge']), 'blue')
            logger.info('artist = %r' % (artist, ))
        else:
            logger.info('???: ' + ut.repr2(plotdat))
    logger.info(ut.get_timestamp())
Example #2
0
def cheetah_stats(ibs):
    filters = [
        dict(view=['right', 'frontright', 'backright'], minqual='good'),
        dict(view=['right', 'frontright', 'backright']),
    ]
    for filtkw in filters:
        annots = ibs.annots(ibs.filter_annots_general(**filtkw))
        unique_nids, grouped_annots = annots.group(annots.nids)
        annots_per_name = ut.lmap(len, grouped_annots)
        annots_per_name_freq = ut.dict_hist(annots_per_name)
        def bin_mapper(num):
            if num < 5:
                return (num, num + 1)
            else:
                for bin, mod in [(20, 5), (50, 10)]:
                    if num < bin:
                        low = (num // mod) * mod
                        high = low + mod
                        return (low, high)
                if num >= bin:
                    return (bin, None)
                else:
                    assert False, str(num)
        hist = ut.ddict(lambda: 0)
        for num in annots_per_name:
            hist[bin_mapper(num)] += 1
        hist = ut.sort_dict(hist)

        print('------------')
        print('filters = %s' % ut.repr4(filtkw))
        print('num_annots = %r' % (len(annots)))
        print('num_names = %r' % (len(unique_nids)))
        print('annots_per_name_freq = %s' % (ut.repr4(annots_per_name_freq)))
        print('annots_per_name_freq (ranges) = %s' % (ut.repr4(hist)))
        assert sum(hist.values()) == len(unique_nids)
Example #3
0
    def get_cfgstr(nnindexer, noquery=False):
        r""" returns string which uniquely identified configuration and support data

        Args:
            noquery (bool): if True cfgstr is only relevant to building the
                index. No search params are returned (default = False)

        Returns:
            str: flann_cfgstr

        CommandLine:
            python -m wbia.algo.hots.neighbor_index --test-get_cfgstr

        Example:
            >>> # DISABLE_DOCTEST
            >>> from wbia.algo.hots.neighbor_index import *  # NOQA
            >>> import wbia
            >>> cfgdict = dict(fg_on=False)
            >>> qreq_ = wbia.testdata_qreq_(defaultdb='testdb1', p='default:fg_on=False')
            >>> qreq_.load_indexer()
            >>> nnindexer = qreq_.indexer
            >>> noquery = True
            >>> flann_cfgstr = nnindexer.get_cfgstr(noquery)
            >>> result = ('flann_cfgstr = %s' % (str(flann_cfgstr),))
            >>> print(result)
            flann_cfgstr = _FLANN((algo=kdtree,seed=42,t=8,))_VECS((11260,128)gj5nea@ni0%f3aja)
        """
        flann_cfgstr_list = []
        use_params_hash = True
        use_data_hash = True
        if use_params_hash:
            flann_defaults = vt.get_flann_params(
                nnindexer.flann_params['algorithm'])
            # flann_params_clean = flann_defaults.copy()
            flann_params_clean = ut.sort_dict(flann_defaults)
            ut.update_existing(flann_params_clean, nnindexer.flann_params)
            if noquery:
                ut.delete_dict_keys(flann_params_clean, ['checks'])
            shortnames = dict(algorithm='algo',
                              checks='chks',
                              random_seed='seed',
                              trees='t')
            short_params = ut.odict([
                (shortnames.get(key, key), str(val)[0:7])
                for key, val in six.iteritems(flann_params_clean)
            ])
            flann_valsig_ = ut.repr2(short_params,
                                     nl=False,
                                     explicit=True,
                                     strvals=True)
            flann_valsig_ = flann_valsig_.lstrip('dict').replace(' ', '')
            # flann_valsig_ = str(list(flann_params.values()))
            # flann_valsig = ut.remove_chars(flann_valsig_, ', \'[]')
            flann_cfgstr_list.append('_FLANN(' + flann_valsig_ + ')')
        if use_data_hash:
            vecs_hashstr = ut.hashstr_arr(nnindexer.idx2_vec, '_VECS')
            flann_cfgstr_list.append(vecs_hashstr)
        flann_cfgstr = ''.join(flann_cfgstr_list)
        return flann_cfgstr
Example #4
0
 def print_size_info(inva):
     sizes = inva.get_size_info()
     sizes = ut.sort_dict(sizes, 'vals', ut.identity)
     total_nbytes = sum(sizes.values())
     logger.info(
         ut.align(ut.repr3(ut.map_dict_vals(ut.byte_str2, sizes), strvals=True), ':')
     )
     logger.info('total_nbytes = %r' % (ut.byte_str2(total_nbytes),))
Example #5
0
    def get_cfgstr(nnindexer, noquery=False):
        r""" returns string which uniquely identified configuration and support data

        Args:
            noquery (bool): if True cfgstr is only relevant to building the
                index. No search params are returned (default = False)

        Returns:
            str: flann_cfgstr

        CommandLine:
            python -m ibeis.algo.hots.neighbor_index --test-get_cfgstr

        Example:
            >>> # DISABLE_DOCTEST
            >>> from ibeis.algo.hots.neighbor_index import *  # NOQA
            >>> import ibeis
            >>> cfgdict = dict(fg_on=False)
            >>> qreq_ = ibeis.testdata_qreq_(defaultdb='testdb1', p='default:fg_on=False')
            >>> qreq_.load_indexer()
            >>> nnindexer = qreq_.indexer
            >>> noquery = True
            >>> flann_cfgstr = nnindexer.get_cfgstr(noquery)
            >>> result = ('flann_cfgstr = %s' % (str(flann_cfgstr),))
            >>> print(result)
            flann_cfgstr = _FLANN((algo=kdtree,seed=42,t=8,))_VECS((11260,128)gj5nea@ni0%f3aja)
        """
        flann_cfgstr_list = []
        use_params_hash = True
        use_data_hash = True
        if use_params_hash:
            flann_defaults = vt.get_flann_params(nnindexer.flann_params['algorithm'])
            #flann_params_clean = flann_defaults.copy()
            flann_params_clean = ut.sort_dict(flann_defaults)
            ut.updateif_haskey(flann_params_clean, nnindexer.flann_params)
            if noquery:
                ut.delete_dict_keys(flann_params_clean, ['checks'])
            shortnames = dict(algorithm='algo', checks='chks', random_seed='seed', trees='t')
            short_params = ut.odict([(shortnames.get(key, key), str(val)[0:7])
                                     for key, val in six.iteritems(flann_params_clean)])
            flann_valsig_ = ut.dict_str(
                short_params, nl=False, explicit=True, strvals=True)
            flann_valsig_ = flann_valsig_.lstrip('dict').replace(' ', '')
            #flann_valsig_ = str(list(flann_params.values()))
            #flann_valsig = ut.remove_chars(flann_valsig_, ', \'[]')
            flann_cfgstr_list.append('_FLANN(' + flann_valsig_ + ')')
        if use_data_hash:
            vecs_hashstr = ut.hashstr_arr(nnindexer.idx2_vec, '_VECS')
            flann_cfgstr_list.append(vecs_hashstr)
        flann_cfgstr = ''.join(flann_cfgstr_list)
        return flann_cfgstr
Example #6
0
def graph_info(graph, verbose=False):
    import utool as ut
    node_attrs = list(graph.node.values())
    edge_attrs = list(ut.take_column(graph.edges(data=True), 2))
    node_attr_hist = ut.dict_hist(ut.flatten([attr.keys() for attr in node_attrs]))
    edge_attr_hist = ut.dict_hist(ut.flatten([attr.keys() for attr in edge_attrs]))
    node_type_hist = ut.dict_hist(list(map(type, graph.nodes())))
    info_dict = ut.odict([
        ('directed', graph.is_directed()),
        ('multi', graph.is_multigraph()),
        ('num_nodes', len(graph)),
        ('num_edges', len(list(graph.edges()))),
        ('edge_attr_hist', ut.sort_dict(edge_attr_hist)),
        ('node_attr_hist', ut.sort_dict(node_attr_hist)),
        ('node_type_hist', ut.sort_dict(node_type_hist)),
        ('graph_attrs', graph.graph),
        ('graph_name', graph.name),
    ])
    #unique_attrs = ut.map_dict_vals(ut.unique, ut.dict_accum(*node_attrs))
    #ut.dict_isect_combine(*node_attrs))
    #[list(attrs.keys())]
    if verbose:
        print(ut.repr3(info_dict))
    return info_dict
Example #7
0
def estimate_twoday_count(ibs, day1, day2, filter_kw):
    #gid_list = ibs.get_valid_gids()
    all_images = ibs.images()
    dates = [dt.date() for dt in all_images.datetime]
    date_to_images = all_images.group_items(dates)
    date_to_images = ut.sort_dict(date_to_images)
    #date_hist = ut.map_dict_vals(len, date2_gids)
    #print('date_hist = %s' % (ut.repr2(date_hist, nl=2),))
    verbose = 0

    visit_dates = [day1, day2]
    visit_info_list_ = []
    for day in visit_dates:
        images = date_to_images[day]
        aids = ut.flatten(images.aids)
        aids = ibs.filter_annots_general(aids, filter_kw=filter_kw,
                                         verbose=verbose)
        nids = ibs.get_annot_name_rowids(aids)
        grouped_aids = ut.group_items(aids, nids)
        unique_nids = ut.unique(list(grouped_aids.keys()))

        if False:
            aids_list = ut.take(grouped_aids, unique_nids)
            for aids in aids_list:
                if len(aids) > 30:
                    break
            timedeltas_list = ibs.get_unflat_annots_timedelta_list(aids_list)
            # Do the five second rule
            marked_thresh = 5
            flags = []
            for nid, timedeltas in zip(unique_nids, timedeltas_list):
                flags.append(timedeltas.max() > marked_thresh)
            print('Unmarking %d names' % (len(flags) - sum(flags)))
            unique_nids = ut.compress(unique_nids, flags)
            grouped_aids = ut.dict_subset(grouped_aids, unique_nids)

        unique_aids = ut.flatten(list(grouped_aids.values()))
        info = {
            'unique_nids': unique_nids,
            'grouped_aids': grouped_aids,
            'unique_aids': unique_aids,
        }
        visit_info_list_.append(info)

    # Estimate statistics
    from ibeis.other import dbinfo
    aids_day1, aids_day2 = ut.take_column(visit_info_list_, 'unique_aids')
    nids_day1, nids_day2 = ut.take_column(visit_info_list_, 'unique_nids')
    resight_nids = ut.isect(nids_day1, nids_day2)
    nsight1 = len(nids_day1)
    nsight2 = len(nids_day2)
    resight = len(resight_nids)
    lp_index, lp_error = dbinfo.sight_resight_count(nsight1, nsight2, resight)

    if False:
        from ibeis.other import dbinfo
        print('DAY 1 STATS:')
        _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day1)  # NOQA
        print('DAY 2 STATS:')
        _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day2)  # NOQA
        print('COMBINED STATS:')
        _ = dbinfo.get_dbinfo(ibs, aid_list=aids_day1 + aids_day2)  # NOQA

    print('%d annots on day 1' % (len(aids_day1)) )
    print('%d annots on day 2' % (len(aids_day2)) )
    print('%d names on day 1' % (nsight1,))
    print('%d names on day 2' % (nsight2,))
    print('resight = %r' % (resight,))
    print('lp_index = %r ± %r' % (lp_index, lp_error))
    return nsight1, nsight2, resight, lp_index, lp_error
def clean_tags():
    zotero = get_libzotero()
    # dict of all zotero items
    # items = zotero.index
    # get sql cursor
    cur = zotero.cur
    if False:
        sorted(ut.util_sqlite.get_tablenames(cur))
        ut.print_database_structure(cur)
        # Debug info about tags table in sql

        # The `tags` table stores all tags
        # The itemTags table stores the association between items and tags
        ut.get_table_columninfo_list(cur, 'fields')
        # ut.get_table_columninfo_list(cur, 'relations')
        ut.get_table_columninfo_list(cur, 'fieldsCombined')

        ut.get_table_columninfo_list(cur, 'itemData')
        ut.get_table_columninfo_list(cur, 'itemDataValues')

        ut.get_table_columninfo_list(cur, 'tags')
        ut.get_table_columninfo_list(cur, 'itemTags')

    import pandas as pd
    pd.options.display.max_colwidth = 40
    pd.options.display.max_rows = 20
    def pandas_sql(table, columns):
        return pd.DataFrame(ut.get_table_rows(cur, table, columns),
                            columns=columns)

    item_df = pandas_sql('items', ('itemID', 'itemTypeID', 'libraryID', 'key')).set_index('itemID', drop=False)
    tags_df = pandas_sql('tags', ('tagID', 'name', 'type', 'libraryID', 'key')).set_index('tagID', drop=False)
    itemData_df = pandas_sql('itemData', ('itemID', 'fieldID', 'valueID'))

    itemTag_df = pandas_sql('itemTags', ('itemID', 'tagID'))

    itemDataValues_df = pandas_sql('itemDataValues', ('valueID', 'value')).set_index('valueID')
    field_df = pandas_sql('fields', ('fieldID', 'fieldName', 'fieldFormatID')).set_index('fieldID')

    itemData_df['value'] = itemDataValues_df['value'].loc[itemData_df['valueID'].values].values
    itemData_df['fieldName'] = field_df['fieldName'].loc[itemData_df['fieldID'].values].values

    titles = itemData_df[itemData_df['fieldName'] == 'title']
    assert len(ut.unique(ut.map_vals(len, titles.groupby('itemID').indices).values())) == 1

    # itemTag_df.groupby('itemID').count()
    # Find how often each tag is used
    tagid_to_count = itemTag_df.groupby('tagID').count()
    tagid_to_count = tagid_to_count.rename(columns={'itemID': 'nItems'})
    tagid_to_count['name'] = tags_df.loc[tagid_to_count.index]['name']
    tagid_to_count = tagid_to_count.sort_values('nItems')

    bad_tags = tagid_to_count[tagid_to_count['nItems'] == 1]

    tagid_to_count['tag_ncharsize'] = tagid_to_count['name'].apply(len)
    tagid_to_count = tagid_to_count.sort_values('tag_ncharsize')
    bad_tags = tagid_to_count[tagid_to_count['tag_ncharsize'] > 25]['name'].values.tolist()

    def clean_tags2():
        api_key = 'fBDBqRPwW9O3mYyNLiksBKZy'
        base_url = 'https://api.zotero.org'
        library_id = '1279414'
        library_type = 'user'
        from pyzotero import zotero
        zot = zotero.Zotero(library_id, library_type, api_key)

        for chunk in ut.ProgChunks(bad_tags, 50):
            zot.delete_tags(*chunk)

    if False:
        api_key = 'fBDBqRPwW9O3mYyNLiksBKZy'
        base_url = 'https://api.zotero.org'
        user_id = '1279414'
        userOrGroupPrefix = '/users/' + user_id
        params = {'v': 3, 'key': api_key}

        items_resp = requests.get(base_url + userOrGroupPrefix + '/items', params=params)
        print(items_resp.content)
        print(items_resp)

        json_tags = []
        get_url = base_url + userOrGroupPrefix + '/tags'
        while True:
            print('get_url = %r' % (get_url,))
            tag_resp = requests.get(get_url, params=params)
            if tag_resp.status_code != 200:
                break
            json_tags.extend(tag_resp.json())
            if 'next' in tag_resp.links:
                get_url = tag_resp.links['next']['url']
            else:
                break

        version_to_tags = ut.ddict(list)
        bad_tags = []
        for tag in ut.ProgIter(json_tags, label='parsing tags'):
            # x = requests.get(tag['links']['self']['href'], params=params)
            if tag['meta']['numItems'] == 1:
                import urllib2
                try:
                    bad_tags.append(urllib2.quote(tag['tag']))
                except Exception as ex:
                    print('cant encode tag=%r' % (tag,))
                    pass

        for chunk in ut.ProgIter(ut.ichunks(bad_tags, 50), length=len(bad_tags) / 50):
            search_url = base_url + userOrGroupPrefix + '/items?tag=' + ' || '.join(chunk)
            r = requests.get(search_url, params=params)
            matching_items = r.json()
            # assert len(matching_items) == 1
            for item in matching_items:
                version = item['version']
            version_to_tags[item['version']].append(tag['tag'])

        # DELETE MULTIPLE TAGS
        import requests
        for chunk in ut.ichunks(bad_tags['name'], 50):
            import urllib2
            encoded_chunk = []
            for t in chunk:
                try:
                    encoded_chunk.append(urllib2.quote(t))
                except Exception:
                    print(t)
            suffix = ' || '.join(encoded_chunk)
            delete_url = base_url + userOrGroupPrefix + '/tags?' + suffix
            print('delete_url = %r' % (delete_url,))
            resp = requests.delete(delete_url, params=params)

        bad_tags = tagid_to_count[tagid_to_count['nItems'] == 1]
        bad_tags['tagID'] = bad_tags.index
        for tagid in bad_tags:
            delete from itemTags where tagID in (select tagID from tags where type=1);
        pass
        for name in k['name'].values.tolist()
    item_df['title'] = titles.set_index('itemID')['value']
    for idx, item in zotero.index.items():
        sql_title = item_df.loc[item.id]['title']
        if item.title != sql_title:
            if pd.isnull(sql_title) and item.title is not None:
                print(item.__dict__)
                print(item_df.loc[item.id])
                print('item.title = %r' % (item.title,))
                print('sql_title = %r' % (sql_title,))
                assert False

    duplicate_tags = [
        (name, idxs) for name, idxs in tags_df.groupby('name', sort=True).indices.items() if len(idxs) > 2
    ]
    tagname_to_tagid = tags_df.groupby('name', sort=True).first()
    new_to_oldtags = {}
    # Determine which tagi to use for each name
    for tagname, idxs in duplicate_tags:
        tags_subdf = tags_df.iloc[idxs]
        mapping = itemTag_df[itemTag_df['tagID'].isin(tags_subdf['tagID'])]
        tag_hist = mapping.groupby('tagID').count()
        best_tagid = tag_hist['itemID'].idxmax()

        new_to_oldtags[best_tagid] = set(tag_hist['itemID'].values) - {best_tagid}

        tagname_to_tagid.loc[tagname] = tags_df.loc[best_tagid]
        # for col in tagname_to_tagid.columns:
        #     tagname_to_tagid.loc[tagname][col] = tags_df.loc[best_tagid][col]
        # tags_df.loc[best_tagid]

    if False:
        # Update tagIds
        for newid, oldids in new_to_oldtags.items():
            for oldid in oldids:
                # cur.execute('SELECT itemID, tagID FROM itemTags WHERE tagID=?', (oldid,))
                import sqlite3
                try:
                    cmd = 'UPDATE itemTags SET tagID=? WHERE tagID=?'
                    args = (newid, oldid)
                    print('(%s) args = %r' % (cmd, args,))
                    cur.execute(cmd, args)
                    print(cur.fetchall())
                except sqlite3.IntegrityError:
                    print('error')
                    pass

    # tags_df.groupby('name', sort=True)

    # itemTag_df.groupby('itemID')
    # duptags = tags_df.iloc[tags_df.groupby('name', sort=True).indices['animals']]
    # duptags['tagID']
    # flags = itemTag_df['tagID'].isin(duptags['tagID'])
    # dup_rel = itemTag_df[flags]
    # item_df['title'].loc[dup_rel['itemID']].values
    # tags_df.iloc[tags_df.groupby('name', sort=True).indices['animals']]

    # tags_df[tags_df['type'] == 1]
    # tags_df[tags_df['type'] == 0]
    # tags_df['libraryID'].unique()
    # tags_df['type'].unique()

    '''
    SELECT
    SELECT FROM itemTags WHERE name in (animals)
    '''

    item_tag_pairs = ut.get_table_rows(cur, 'itemTags', ('itemID', 'tagID'))
    # Group tags by item
    itemid_to_tagids = ut.group_pairs(item_tag_pairs)
    # Group items by tags
    tagid_to_itemids = ut.group_pairs(map(tuple, map(reversed, item_tag_pairs)))

    # mapping from tagid to name
    tagid_to_name = dict(ut.get_table_rows(cur, 'tags', ('tagID', 'name')))

    tagid_freq = list(ut.sort_dict(ut.map_vals(len, tagid_to_itemids), 'vals').items())
    ut.sort_dict(ut.map_vals(sum, ut.group_pairs([(freq, tagid_to_name.get(tagid, tagid)) for tagid, freq in tagid_freq])), 'vals')
    tagname_freq = ut.map_keys(lambda k: tagid_to_name.get(k, k), tagid_freq)
Example #9
0
def get_injured_sharks():
    """
    >>> from wbia.scripts.getshark import *  # NOQA
    """
    import requests

    url = 'http://www.whaleshark.org/getKeywordImages.jsp'
    resp = requests.get(url)
    assert resp.status_code == 200
    keywords = resp.json()['keywords']
    key_list = ut.take_column(keywords, 'indexName')
    key_to_nice = {k['indexName']: k['readableName'] for k in keywords}

    injury_patterns = [
        'injury',
        'net',
        'hook',
        'trunc',
        'damage',
        'scar',
        'nicks',
        'bite',
    ]

    injury_keys = [
        key for key in key_list if any([pat in key for pat in injury_patterns])
    ]
    noninjury_keys = ut.setdiff(key_list, injury_keys)
    injury_nice = ut.lmap(lambda k: key_to_nice[k], injury_keys)  # NOQA
    noninjury_nice = ut.lmap(lambda k: key_to_nice[k], noninjury_keys)  # NOQA
    key_list = injury_keys

    keyed_images = {}
    for key in ut.ProgIter(key_list, lbl='reading index', bs=True):
        key_url = url + '?indexName={indexName}'.format(indexName=key)
        key_resp = requests.get(key_url)
        assert key_resp.status_code == 200
        key_imgs = key_resp.json()['images']
        keyed_images[key] = key_imgs

    key_hist = {key: len(imgs) for key, imgs in keyed_images.items()}
    key_hist = ut.sort_dict(key_hist, 'vals')
    logger.info(ut.repr3(key_hist))
    nice_key_hist = ut.map_dict_keys(lambda k: key_to_nice[k], key_hist)
    nice_key_hist = ut.sort_dict(nice_key_hist, 'vals')
    logger.info(ut.repr3(nice_key_hist))

    key_to_urls = {
        key: ut.take_column(vals, 'url')
        for key, vals in keyed_images.items()
    }
    overlaps = {}
    import itertools

    overlap_img_list = []
    for k1, k2 in itertools.combinations(key_to_urls.keys(), 2):
        overlap_imgs = ut.isect(key_to_urls[k1], key_to_urls[k2])
        num_overlap = len(overlap_imgs)
        overlaps[(k1, k2)] = num_overlap
        overlaps[(k1, k1)] = len(key_to_urls[k1])
        if num_overlap > 0:
            # logger.info('[%s][%s], overlap=%r' % (k1, k2, num_overlap))
            overlap_img_list.extend(overlap_imgs)

    all_img_urls = list(set(ut.flatten(key_to_urls.values())))
    num_all = len(all_img_urls)  # NOQA
    logger.info('num_all = %r' % (num_all, ))

    # Determine super-categories
    categories = ['nicks', 'scar', 'trunc']

    # Force these keys into these categories
    key_to_cat = {'scarbite': 'other_injury'}

    cat_to_keys = ut.ddict(list)

    for key in key_to_urls.keys():
        flag = 1
        if key in key_to_cat:
            cat = key_to_cat[key]
            cat_to_keys[cat].append(key)
            continue
        for cat in categories:
            if cat in key:
                cat_to_keys[cat].append(key)
                flag = 0
        if flag:
            cat = 'other_injury'
            cat_to_keys[cat].append(key)

    cat_urls = ut.ddict(list)
    for cat, keys in cat_to_keys.items():
        for key in keys:
            cat_urls[cat].extend(key_to_urls[key])

    cat_hist = {}
    for cat in list(cat_urls.keys()):
        cat_urls[cat] = list(set(cat_urls[cat]))
        cat_hist[cat] = len(cat_urls[cat])

    logger.info(ut.repr3(cat_to_keys))
    logger.info(ut.repr3(cat_hist))

    key_to_cat = dict([(val, key) for key, vals in cat_to_keys.items()
                       for val in vals])

    # ingestset = {
    #    '__class__': 'ImageSet',
    #    'images': ut.ddict(dict)
    # }
    # for key, key_imgs in keyed_images.items():
    #    for imgdict in key_imgs:
    #        url = imgdict['url']
    #        encid = imgdict['correspondingEncounterNumber']
    #        # Make structure
    #        encdict = encounters[encid]
    #        encdict['__class__'] = 'Encounter'
    #        imgdict = ut.delete_keys(imgdict.copy(), ['correspondingEncounterNumber'])
    #        imgdict['__class__'] = 'Image'
    #        cat = key_to_cat[key]
    #        annotdict = {'relative_bbox': [.01, .01, .98, .98], 'tags': [cat, key]}
    #        annotdict['__class__'] = 'Annotation'

    #        # Ensure structures exist
    #        encdict['images'] = encdict.get('images', [])
    #        imgdict['annots'] = imgdict.get('annots', [])

    #        # Add an image to this encounter
    #        encdict['images'].append(imgdict)
    #        # Add an annotation to this image
    #        imgdict['annots'].append(annotdict)

    # # http://springbreak.wildbook.org/rest/org.ecocean.Encounter/1111
    # get_enc_url = 'http://www.whaleshark.org/rest/org.ecocean.Encounter/%s' % (encid,)
    # resp = requests.get(get_enc_url)
    # logger.info(ut.repr3(encdict))
    # logger.info(ut.repr3(encounters))

    # Download the files to the local disk
    # fpath_list =

    all_urls = ut.unique(
        ut.take_column(
            ut.flatten(
                ut.dict_subset(keyed_images,
                               ut.flatten(cat_to_keys.values())).values()),
            'url',
        ))

    dldir = ut.truepath('~/tmpsharks')
    from os.path import commonprefix, basename  # NOQA

    prefix = commonprefix(all_urls)
    suffix_list = [url_[len(prefix):] for url_ in all_urls]
    fname_list = [suffix.replace('/', '--') for suffix in suffix_list]

    fpath_list = []
    for url, fname in ut.ProgIter(zip(all_urls, fname_list),
                                  lbl='downloading imgs',
                                  freq=1):
        fpath = ut.grab_file_url(url,
                                 download_dir=dldir,
                                 fname=fname,
                                 verbose=False)
        fpath_list.append(fpath)

    # Make sure we keep orig info
    # url_to_keys = ut.ddict(list)
    url_to_info = ut.ddict(dict)
    for key, imgdict_list in keyed_images.items():
        for imgdict in imgdict_list:
            url = imgdict['url']
            info = url_to_info[url]
            for k, v in imgdict.items():
                info[k] = info.get(k, [])
                info[k].append(v)
            info['keys'] = info.get('keys', [])
            info['keys'].append(key)
            # url_to_keys[url].append(key)

    info_list = ut.take(url_to_info, all_urls)
    for info in info_list:
        if len(set(info['correspondingEncounterNumber'])) > 1:
            assert False, 'url with two different encounter nums'
    # Combine duplicate tags

    hashid_list = [
        ut.get_file_uuid(fpath_, stride=8)
        for fpath_ in ut.ProgIter(fpath_list, bs=True)
    ]
    groupxs = ut.group_indices(hashid_list)[1]

    # Group properties by duplicate images
    # groupxs = [g for g in groupxs if len(g) > 1]
    fpath_list_ = ut.take_column(ut.apply_grouping(fpath_list, groupxs), 0)
    url_list_ = ut.take_column(ut.apply_grouping(all_urls, groupxs), 0)
    info_list_ = [
        ut.map_dict_vals(ut.flatten, ut.dict_accum(*info_))
        for info_ in ut.apply_grouping(info_list, groupxs)
    ]

    encid_list_ = [
        ut.unique(info_['correspondingEncounterNumber'])[0]
        for info_ in info_list_
    ]
    keys_list_ = [ut.unique(info_['keys']) for info_ in info_list_]
    cats_list_ = [ut.unique(ut.take(key_to_cat, keys)) for keys in keys_list_]

    clist = ut.ColumnLists({
        'gpath': fpath_list_,
        'url': url_list_,
        'encid': encid_list_,
        'key': keys_list_,
        'cat': cats_list_,
    })

    # for info_ in ut.apply_grouping(info_list, groupxs):
    #    info = ut.dict_accum(*info_)
    #    info = ut.map_dict_vals(ut.flatten, info)
    #    x = ut.unique(ut.flatten(ut.dict_accum(*info_)['correspondingEncounterNumber']))
    #    if len(x) > 1:
    #        info = info.copy()
    #        del info['keys']
    #        logger.info(ut.repr3(info))

    flags = ut.lmap(ut.fpath_has_imgext, clist['gpath'])
    clist = clist.compress(flags)

    import wbia

    ibs = wbia.opendb('WS_Injury', allow_newdir=True)

    gid_list = ibs.add_images(clist['gpath'])
    clist['gid'] = gid_list

    failed_flags = ut.flag_None_items(clist['gid'])
    logger.info('# failed %s' % (sum(failed_flags), ))
    passed_flags = ut.not_list(failed_flags)
    clist = clist.compress(passed_flags)
    ut.assert_all_not_None(clist['gid'])
    # ibs.get_image_uris_original(clist['gid'])
    ibs.set_image_uris_original(clist['gid'], clist['url'], overwrite=True)

    # ut.zipflat(clist['cat'], clist['key'])
    if False:
        # Can run detection instead
        clist['tags'] = ut.zipflat(clist['cat'])
        aid_list = ibs.use_images_as_annotations(clist['gid'],
                                                 adjust_percent=0.01,
                                                 tags_list=clist['tags'])
        aid_list

    import wbia.plottool as pt
    from wbia import core_annots

    pt.qt4ensure()
    # annots = ibs.annots()
    # aids = [1, 2]
    # ibs.depc_annot.get('hog', aids , 'hog')
    # ibs.depc_annot.get('chip', aids, 'img')
    for aid in ut.InteractiveIter(ibs.get_valid_aids()):
        hogs = ibs.depc_annot.d.get_hog_hog([aid])
        chips = ibs.depc_annot.d.get_chips_img([aid])
        chip = chips[0]
        hogimg = core_annots.make_hog_block_image(hogs[0])
        pt.clf()
        pt.imshow(hogimg, pnum=(1, 2, 1))
        pt.imshow(chip, pnum=(1, 2, 2))
        fig = pt.gcf()
        fig.show()
        fig.canvas.draw()

    # logger.info(len(groupxs))

    # if False:
    # groupxs = ut.find_duplicate_items(ut.lmap(basename, suffix_list)).values()
    # logger.info(ut.repr3(ut.apply_grouping(all_urls, groupxs)))
    #    # FIX
    #    for fpath, fname in zip(fpath_list, fname_list):
    #        if ut.checkpath(fpath):
    #            ut.move(fpath, join(dirname(fpath), fname))
    #            logger.info('fpath = %r' % (fpath,))

    # import wbia
    # from wbia.dbio import ingest_dataset
    # dbdir = wbia.sysres.lookup_dbdir('WS_ALL')
    # self = ingest_dataset.Ingestable2(dbdir)

    if False:
        # Show overlap matrix
        import wbia.plottool as pt
        import pandas as pd
        import numpy as np

        dict_ = overlaps
        s = pd.Series(dict_, index=pd.MultiIndex.from_tuples(overlaps))
        df = s.unstack()
        lhs, rhs = df.align(df.T)
        df = lhs.add(rhs, fill_value=0).fillna(0)

        label_texts = df.columns.values

        def label_ticks(label_texts):
            import wbia.plottool as pt

            truncated_labels = [repr(lbl[0:100]) for lbl in label_texts]
            ax = pt.gca()
            ax.set_xticks(list(range(len(label_texts))))
            ax.set_xticklabels(truncated_labels)
            [lbl.set_rotation(-55) for lbl in ax.get_xticklabels()]
            [
                lbl.set_horizontalalignment('left')
                for lbl in ax.get_xticklabels()
            ]

            # xgrid, ygrid = np.meshgrid(range(len(label_texts)), range(len(label_texts)))
            # pt.plot_surface3d(xgrid, ygrid, disjoint_mat)
            ax.set_yticks(list(range(len(label_texts))))
            ax.set_yticklabels(truncated_labels)
            [
                lbl.set_horizontalalignment('right')
                for lbl in ax.get_yticklabels()
            ]
            [
                lbl.set_verticalalignment('center')
                for lbl in ax.get_yticklabels()
            ]
            # [lbl.set_rotation(20) for lbl in ax.get_yticklabels()]

        # df = df.sort(axis=0)
        # df = df.sort(axis=1)

        sortx = np.argsort(df.sum(axis=1).values)[::-1]
        df = df.take(sortx, axis=0)
        df = df.take(sortx, axis=1)

        fig = pt.figure(fnum=1)
        fig.clf()
        mat = df.values.astype(np.int32)
        mat[np.diag_indices(len(mat))] = 0
        vmax = mat[(1 - np.eye(len(mat))).astype(np.bool)].max()
        import matplotlib.colors

        norm = matplotlib.colors.Normalize(vmin=0, vmax=vmax, clip=True)
        pt.plt.imshow(mat, cmap='hot', norm=norm, interpolation='none')
        pt.plt.colorbar()
        pt.plt.grid('off')
        label_ticks(label_texts)
        fig.tight_layout()

    # overlap_df = pd.DataFrame.from_dict(overlap_img_list)

    class TmpImage(ut.NiceRepr):
        pass

    from skimage.feature import hog
    from skimage import data, color, exposure
    import wbia.plottool as pt

    image2 = color.rgb2gray(data.astronaut())  # NOQA

    fpath = './GOPR1120.JPG'

    import vtool as vt

    for fpath in [fpath]:
        """
        http://scikit-image.org/docs/dev/auto_examples/plot_hog.html
        """

        image = vt.imread(fpath, grayscale=True)
        image = pt.color_funcs.to_base01(image)

        fig = pt.figure(fnum=2)
        fd, hog_image = hog(
            image,
            orientations=8,
            pixels_per_cell=(16, 16),
            cells_per_block=(1, 1),
            visualise=True,
        )

        fig, (ax1, ax2) = pt.plt.subplots(1,
                                          2,
                                          figsize=(8, 4),
                                          sharex=True,
                                          sharey=True)

        ax1.axis('off')
        ax1.imshow(image, cmap=pt.plt.cm.gray)
        ax1.set_title('Input image')
        ax1.set_adjustable('box-forced')

        # Rescale histogram for better display
        hog_image_rescaled = exposure.rescale_intensity(hog_image,
                                                        in_range=(0, 0.02))

        ax2.axis('off')
        ax2.imshow(hog_image_rescaled, cmap=pt.plt.cm.gray)
        ax2.set_title('Histogram of Oriented Gradients')
        ax1.set_adjustable('box-forced')
        pt.plt.show()