Beispiel #1
0
 def permute_columns(self, new_order, inplace=True):
     import utool as ut
     self.header = ut.take(self.header, new_order)
     self.header_tags = ut.take(self.header_tags, new_order)
     self.row_data = ut.listT(ut.take(ut.listT(self.row_data), new_order))
     if self.short_header is not None:
         self.short_header = ut.take(self.short_header, new_order)
     return self
Beispiel #2
0
 def permute_columns(self, new_order, inplace=True):
     import utool as ut
     self.header = ut.take(self.header, new_order)
     self.header_tags = ut.take(self.header_tags, new_order)
     self.row_data = ut.listT(ut.take(ut.listT(self.row_data), new_order))
     if self.short_header is not None:
         self.short_header = ut.take(self.short_header, new_order)
     return self
Beispiel #3
0
 def fuzzy_filter_columns(self, fuzzy_headers):
     import utool as ut
     col_flags = ut.filterflags_general_tags(
         self.header_tags, logic='or',
         in_any=fuzzy_headers)
     self.header = ut.compress(self.header, col_flags)
     self.header_tags = ut.compress(self.header_tags, col_flags)
     self.row_data = ut.listT(ut.compress(ut.listT(self.row_data), col_flags))
     if self.short_header is not None:
         self.short_header = ut.compress(self.short_header, col_flags)
Beispiel #4
0
 def fuzzy_filter_columns(self, fuzzy_headers):
     import utool as ut
     col_flags = ut.filterflags_general_tags(self.header_tags,
                                             logic='or',
                                             in_any=fuzzy_headers)
     self.header = ut.compress(self.header, col_flags)
     self.header_tags = ut.compress(self.header_tags, col_flags)
     self.row_data = ut.listT(
         ut.compress(ut.listT(self.row_data), col_flags))
     if self.short_header is not None:
         self.short_header = ut.compress(self.short_header, col_flags)
Beispiel #5
0
def make_standard_csv(column_list, column_lbls=None):
    from six.moves import cStringIO as StringIO
    import utool as ut
    import csv
    stream = StringIO()
    row_list = ut.listT(column_list)
    if six.PY2:
        row_list = [[ut.ensure_unicode(c).encode('utf-8') for c in r]
                    for r in row_list]
        if column_lbls is not None:
            column_lbls = [
                ut.ensure_unicode(c).encode('utf-8') for c in column_lbls
            ]
    writer = csv.writer(stream, dialect=csv.excel)
    if column_lbls is not None:
        writer.writerow(column_lbls)
    writer.writerows(row_list)
    csv_str = stream.getvalue()
    return csv_str
Beispiel #6
0
def make_standard_csv(column_list, column_lbls=None):
    from six.moves import cStringIO as StringIO
    import utool as ut
    import csv
    stream = StringIO()
    row_list = ut.listT(column_list)
    if six.PY2:
        row_list = [[ut.ensure_unicode(c).encode('utf-8')
                     for c in r]
                    for r in row_list]
        if column_lbls is not None:
            column_lbls = [ut.ensure_unicode(c).encode('utf-8')
                           for c in column_lbls]
    writer = csv.writer(stream, dialect=csv.excel)
    if column_lbls is not None:
        writer.writerow(column_lbls)
    writer.writerows(row_list)
    csv_str = stream.getvalue()
    return csv_str
Beispiel #7
0
def get_annotmatch_rowids_between(ibs, aids1, aids2, method=None):
    """

    Example:
        >>> # ENABLE_DOCTEST
        >>> from wbia.annotmatch_funcs import *  # NOQA
        >>> import wbia
        >>> ibs = wbia.opendb('PZ_MTEST')
        >>> aids1 = aids2 = [1, 2, 3, 4, 5, 6]
        >>> rowids_between = ibs.get_annotmatch_rowids_between
        >>> ams1 = sorted(rowids_between(aids1, aids2, method=1))
        >>> ams2 = sorted(rowids_between(aids1, aids2, method=2))
        >>> assert len(ub.find_duplicates(ams1)) == 0
        >>> assert len(ub.find_duplicates(ams2)) == 0
        >>> assert sorted(ams2) == sorted(ams1)
    """
    if method is None:
        if len(aids1) * len(aids2) > 5000:
            method = 1
        else:
            method = 2
    if method == 1:
        # Strategy 1: get all existing rows and see what intersects
        # This is better when the enumerated set of rows would be larger than
        # the database size
        unflat_rowids1L = ibs.get_annotmatch_rowids_from_aid1(aids1)
        unflat_rowids1R = ibs.get_annotmatch_rowids_from_aid2(aids1)
        unflat_rowids2L = ibs.get_annotmatch_rowids_from_aid1(aids2)
        unflat_rowids2R = ibs.get_annotmatch_rowids_from_aid2(aids2)

        am_rowids1L = {
            r
            for r in ut.iflatten(unflat_rowids1L) if r is not None
        }
        am_rowids1R = {
            r
            for r in ut.iflatten(unflat_rowids1R) if r is not None
        }
        am_rowids2L = {
            r
            for r in ut.iflatten(unflat_rowids2L) if r is not None
        }
        am_rowids2R = {
            r
            for r in ut.iflatten(unflat_rowids2R) if r is not None
        }

        ams12 = am_rowids1L.intersection(am_rowids2R)
        ams21 = am_rowids2L.intersection(am_rowids1R)
        ams = sorted(ams12.union(ams21))
        # ams = sorted(am_rowids1.intersection(am_rowids2))
        # rowids2 = ibs.get_annotmatch_rowids_from_aid2(aid_list)
        # unflat_rowids1 = ibs.get_annotmatch_rowids_from_aid(aids1)
        # unflat_rowids2 = ibs.get_annotmatch_rowids_from_aid(aids2)
        # am_rowids1 = {r for r in ut.iflatten(unflat_rowids1) if r is not None}
        # am_rowids2 = {r for r in ut.iflatten(unflat_rowids2) if r is not None}
        # ams = sorted(am_rowids1.intersection(am_rowids2))
        # ams = ut.isect(am_rowids1, am_rowids2)
    elif method == 2:
        # Strategy 2: enumerate what rows could exist and see what does exist
        # This is better when the enumerated set of rows would be smaller than
        # the database size
        edges = list(ut.product_nonsame(aids1, aids2))
        if len(edges) == 0:
            ams = []
        else:
            aids1_, aids2_ = ut.listT(edges)
            # ams = ibs.get_annotmatch_rowid_from_undirected_superkey(aids1_, aids2_)
            ams = ibs.get_annotmatch_rowid_from_superkey(aids1_, aids2_)
            if ams is None:
                ams = []
            ams = ut.filter_Nones(ams)
    return ams
Beispiel #8
0
 def nice_table(self):
     import utool as ut
     return ut.make_csv_table(ut.listT(self.row_data), raw=True)
Beispiel #9
0
    def make_annot_inference_dict(infr, internal=False):
        #import uuid

        def convert_to_name_uuid(nid):
            #try:
            text = ibs.get_name_texts(nid, apply_fix=False)
            if text is None:
                text = 'NEWNAME_%s' % (str(nid), )
            #uuid_ = uuid.UUID(text)
            #except ValueError:
            #    text = 'NEWNAME_%s' % (str(nid),)
            #    #uuid_ = nid
            return text

        ibs = infr.qreq_.ibs

        if internal:
            get_annot_uuids = ut.identity
        else:
            get_annot_uuids = ibs.get_annot_uuids
            #return uuid_

        # Compile the cluster_dict
        col_list = [
            'aid_list', 'orig_nid_list', 'new_nid_list', 'exemplar_flag_list',
            'error_flag_list'
        ]
        cluster_dict = dict(zip(col_list, ut.listT(infr.cluster_tuples)))
        cluster_dict['annot_uuid_list'] = get_annot_uuids(
            cluster_dict['aid_list'])
        # We store the name's UUID as the name's text
        #cluster_dict['orig_name_uuid_list'] = [convert_to_name_uuid(nid)
        #                                       for nid in cluster_dict['orig_nid_list']]
        #cluster_dict['new_name_uuid_list'] = [convert_to_name_uuid(nid)
        # for nid in cluster_dict['new_nid_list']]
        cluster_dict['orig_name_list'] = [
            convert_to_name_uuid(nid) for nid in cluster_dict['orig_nid_list']
        ]
        cluster_dict['new_name_list'] = [
            convert_to_name_uuid(nid) for nid in cluster_dict['new_nid_list']
        ]
        # Filter out only the keys we want to send back in the dictionary
        #key_list = ['annot_uuid_list', 'orig_name_uuid_list',
        #            'new_name_uuid_list', 'exemplar_flag_list',
        #            'error_flag_list']
        key_list = [
            'annot_uuid_list', 'orig_name_list', 'new_name_list',
            'exemplar_flag_list', 'error_flag_list'
        ]
        cluster_dict = ut.dict_subset(cluster_dict, key_list)

        # Compile the annot_pair_dict
        col_list = [
            'aid_1_list', 'aid_2_list', 'p_same_list', 'confidence_list',
            'raw_score_list'
        ]
        annot_pair_dict = dict(zip(col_list, ut.listT(infr.needs_review_list)))
        annot_pair_dict['annot_uuid_1_list'] = get_annot_uuids(
            annot_pair_dict['aid_1_list'])
        annot_pair_dict['annot_uuid_2_list'] = get_annot_uuids(
            annot_pair_dict['aid_2_list'])
        zipped = zip(annot_pair_dict['annot_uuid_1_list'],
                     annot_pair_dict['annot_uuid_2_list'],
                     annot_pair_dict['p_same_list'])
        annot_pair_dict['review_pair_list'] = [{
            'annot_uuid_key': annot_uuid_1,
            'annot_uuid_1': annot_uuid_1,
            'annot_uuid_2': annot_uuid_2,
            'prior_matching_state': {
                'p_match': p_same,
                'p_nomatch': 1.0 - p_same,
                'p_notcomp': 0.0,
            }
        } for (annot_uuid_1, annot_uuid_2, p_same) in zipped]
        # Filter out only the keys we want to send back in the dictionary
        key_list = ['review_pair_list', 'confidence_list']
        annot_pair_dict = ut.dict_subset(annot_pair_dict, key_list)

        # Compile the inference dict
        inference_dict = ut.odict([
            ('cluster_dict', cluster_dict),
            ('annot_pair_dict', annot_pair_dict),
            ('_internal_state', None),
        ])
        return inference_dict
Beispiel #10
0
def fix_annotmatch_pzmaster1():
    """
    PZ_Master1 had annotmatch rowids that did not agree with the current name
    labeling. Looking at the inconsistencies in the graph interface was too
    cumbersome, because over 3000 annots were incorrectly grouped together.

    This function deletes any annotmatch rowid that is not consistent with the
    current labeling so we can go forward with using the new AnnotInference
    object
    """
    import wbia

    ibs = wbia.opendb('PZ_Master1')
    infr = wbia.AnnotInference(ibs=ibs, aids=ibs.get_valid_aids(), verbose=5)
    infr.initialize_graph()
    annots = ibs.annots()
    aid_to_nid = ut.dzip(annots.aids, annots.nids)

    if False:
        infr.reset_feedback()
        infr.ensure_mst()
        infr.apply_feedback_edges()
        infr.relabel_using_reviews()
        infr.start_qt_interface()

    # Get annotmatch rowids that agree with current labeling
    if False:
        annotmatch = ibs.db.get_table_as_pandas('annotmatch')
        import pandas as pd

        flags1 = pd.isnull(annotmatch['annotmatch_evidence_decision'])
        flags2 = annotmatch['annotmatch_tag_text'] == ''
        bad_part = annotmatch[flags1 & flags2]
        rowids = bad_part.index.tolist()
        ibs.delete_annotmatch(rowids)

    if False:
        # Delete bidirectional annotmatches
        annotmatch = ibs.db.get_table_as_pandas('annotmatch')
        df = annotmatch.set_index(['annot_rowid1', 'annot_rowid2'])

        # Find entires that have both directions
        pairs1 = annotmatch[['annot_rowid1', 'annot_rowid2']].values
        f_edges = {tuple(p) for p in pairs1}
        b_edges = {tuple(p[::-1]) for p in pairs1}
        isect_edges = {tuple(sorted(p)) for p in b_edges.intersection(f_edges)}
        isect_edges1 = list(isect_edges)
        isect_edges2 = [p[::-1] for p in isect_edges]

        # cols = ['annotmatch_evidence_decision', 'annotmatch_tag_text']
        import pandas as pd

        custom_ = {
            (559, 4909): (False, ['photobomb']),
            (7918, 8041): (False, ['photobomb']),
            (6634, 6754): (False, ['photobomb']),
            (3707, 3727): (False, ['photobomb']),
            (86, 103): (False, ['photobomb']),
        }
        extra_ = {}

        fixme_edges = []

        d1 = df.loc[isect_edges1].reset_index(drop=False)
        d2 = df.loc[isect_edges2].reset_index(drop=False)
        flags = d1['annotmatch_evidence_decision'] != d2[
            'annotmatch_evidence_decision']
        from wbia.tag_funcs import _parse_tags

        for f, r1, r2 in zip(flags, d1.iterrows(), d2.iterrows()):
            v1, v2 = r1[1], r2[1]
            aid1 = v1['annot_rowid1']
            aid2 = v1['annot_rowid2']
            truth_real = (ibs.const.EVIDENCE_DECISION.POSITIVE
                          if aid_to_nid[aid1] == aid_to_nid[aid2] else
                          ibs.const.EVIDENCE_DECISION.NEGATIVE)
            truth1 = v1['annotmatch_evidence_decision']
            truth2 = v2['annotmatch_evidence_decision']
            t1 = _parse_tags(v1['annotmatch_tag_text'])
            t2 = _parse_tags(v2['annotmatch_tag_text'])
            newtag = ut.union_ordered(t1, t2)
            if (aid1, aid2) in custom_:
                continue
            fixme_flag = False
            if not pd.isnull(truth1):
                if truth_real != truth1:
                    fixme_flag = True
            if not pd.isnull(truth2):
                if truth_real != truth2:
                    fixme_flag = True
            if fixme_flag:
                logger.info('newtag = %r' % (newtag, ))
                logger.info('truth_real = %r' % (truth_real, ))
                logger.info('truth1 = %r' % (truth1, ))
                logger.info('truth2 = %r' % (truth2, ))
                logger.info('aid1 = %r' % (aid1, ))
                logger.info('aid2 = %r' % (aid2, ))
                fixme_edges.append((aid1, aid2))
            else:
                extra_[(aid1, aid2)] = (truth_real, newtag)

        extra_.update(custom_)
        new_pairs = extra_.keys()
        new_truths = ut.take_column(ut.dict_take(extra_, new_pairs), 0)
        new_tags = ut.take_column(ut.dict_take(extra_, new_pairs), 1)
        new_tag_texts = [';'.join(t) for t in new_tags]
        aids1, aids2 = ut.listT(new_pairs)

        # Delete the old
        ibs.delete_annotmatch((d1['annotmatch_rowid'].values.tolist() +
                               d2['annotmatch_rowid'].values.tolist()))

        # Add the new
        ams = ibs.add_annotmatch_undirected(aids1, aids2)
        ibs.set_annotmatch_evidence_decision(ams, new_truths)
        ibs.set_annotmatch_tag_text(ams, new_tag_texts)

        if False:
            import wbia.guitool as gt

            gt.ensure_qapp()
            ut.qtensure()
            from wbia.gui import inspect_gui

            inspect_gui.show_vsone_tuner(ibs, aid1, aid2)

        # pairs2 = pairs1.T[::-1].T
        # idx1, idx2 = ut.isect_indices(list(map(tuple, pairs1)),
        #                               list(map(tuple, pairs2)))
        # r_edges = list(set(map(tuple, map(sorted, pairs1[idx1]))))
        # unique_pairs = list(set(map(tuple, map(sorted, pairs1[idx1]))))
        # df = annotmatch.set_index(['annot_rowid1', 'annot_rowid2'])

    x = ut.ddict(list)
    annotmatch = ibs.db.get_table_as_pandas('annotmatch')
    import ubelt as ub

    _iter = annotmatch.iterrows()
    prog = ub.ProgIter(_iter, length=len(annotmatch))
    for k, m in prog:
        aid1 = m['annot_rowid1']
        aid2 = m['annot_rowid2']
        if m['annotmatch_evidence_decision'] == ibs.const.EVIDENCE_DECISION.POSITIVE:
            if aid_to_nid[aid1] == aid_to_nid[aid2]:
                x['agree1'].append(k)
            else:
                x['disagree1'].append(k)
        elif m['annotmatch_evidence_decision'] == ibs.const.EVIDENCE_DECISION.NEGATIVE:
            if aid_to_nid[aid1] == aid_to_nid[aid2]:
                x['disagree2'].append(k)
            else:
                x['agree2'].append(k)

    ub.map_vals(len, x)
    ut.dict_hist(annotmatch.loc[x['disagree1']]['annotmatch_tag_text'])

    disagree1 = annotmatch.loc[x['disagree1']]
    pb_disagree1 = disagree1[disagree1['annotmatch_tag_text'] == 'photobomb']
    aids1 = pb_disagree1['annot_rowid1'].values.tolist()
    aids2 = pb_disagree1['annot_rowid2'].values.tolist()
    aid_pairs = list(zip(aids1, aids2))
    infr = wbia.AnnotInference.from_pairs(aid_pairs, ibs=ibs, verbose=5)
    if False:
        feedback = infr.read_wbia_annotmatch_feedback(edges=infr.edges())
        infr.external_feedback = feedback
        infr.apply_feedback_edges()
        infr.start_qt_interface(loop=False)

    # Delete these values
    if False:
        nonpb_disagree1 = disagree1[
            disagree1['annotmatch_tag_text'] != 'photobomb']
        disagree2 = annotmatch.loc[x['disagree2']]
        ibs.delete_annotmatch(nonpb_disagree1['annotmatch_rowid'])
        ibs.delete_annotmatch(disagree2['annotmatch_rowid'])

    # ut.dict_hist(disagree1['annotmatch_tag_text'])
    import networkx as nx

    graph = nx.Graph()
    graph.add_edges_from(
        zip(pb_disagree1['annot_rowid1'], pb_disagree1['annot_rowid2']))
    list(nx.connected_components(graph))

    set(annotmatch.loc[x['disagree2']]['annotmatch_tag_text'])
Beispiel #11
0
def fix_bidirectional_annotmatch(ibs):
    import wbia

    infr = wbia.AnnotInference(ibs=ibs, aids='all', verbose=5)
    infr.initialize_graph()
    annots = ibs.annots()
    aid_to_nid = ut.dzip(annots.aids, annots.nids)

    # Delete bidirectional annotmatches
    annotmatch = ibs.db.get_table_as_pandas('annotmatch')
    df = annotmatch.set_index(['annot_rowid1', 'annot_rowid2'])

    # Find entires that have both directions
    pairs1 = annotmatch[['annot_rowid1', 'annot_rowid2']].values
    f_edges = {tuple(p) for p in pairs1}
    b_edges = {tuple(p[::-1]) for p in pairs1}
    isect_edges = {tuple(sorted(p)) for p in b_edges.intersection(f_edges)}
    logger.info('Found %d bidirectional edges' % len(isect_edges))
    isect_edges1 = list(isect_edges)
    isect_edges2 = [p[::-1] for p in isect_edges]

    import pandas as pd

    extra_ = {}
    fixme_edges = []
    d1 = df.loc[isect_edges1].reset_index(drop=False)
    d2 = df.loc[isect_edges2].reset_index(drop=False)
    flags = d1['annotmatch_evidence_decision'] != d2[
        'annotmatch_evidence_decision']
    from wbia.tag_funcs import _parse_tags

    for f, r1, r2 in zip(flags, d1.iterrows(), d2.iterrows()):
        v1, v2 = r1[1], r2[1]
        aid1 = v1['annot_rowid1']
        aid2 = v1['annot_rowid2']
        truth_real = (ibs.const.EVIDENCE_DECISION.POSITIVE
                      if aid_to_nid[aid1] == aid_to_nid[aid2] else
                      ibs.const.EVIDENCE_DECISION.NEGATIVE)
        truth1 = v1['annotmatch_evidence_decision']
        truth2 = v2['annotmatch_evidence_decision']
        t1 = _parse_tags(v1['annotmatch_tag_text'])
        t2 = _parse_tags(v2['annotmatch_tag_text'])
        newtag = ut.union_ordered(t1, t2)
        fixme_flag = False
        if not pd.isnull(truth1):
            if truth_real != truth1:
                fixme_flag = True
        if not pd.isnull(truth2):
            if truth_real != truth2:
                fixme_flag = True
        if fixme_flag:
            logger.info('--')
            logger.info('t1, t2 = %r, %r' % (t1, t2))
            logger.info('newtag = %r' % (newtag, ))
            logger.info('truth_real, truth1, truth2 = %r, %r, %r' %
                        (truth_real, truth1, truth2))
            logger.info('aid1, aid2 = %r, %r' % (aid1, aid2))
            fixme_edges.append(tuple(sorted((aid1, aid2))))
        else:
            extra_[(aid1, aid2)] = (truth_real, newtag)

    if len(fixme_edges) > 0:
        # need to manually fix these edges
        fix_infr = wbia.AnnotInference.from_pairs(fixme_edges,
                                                  ibs=ibs,
                                                  verbose=5)
        feedback = fix_infr.read_wbia_annotmatch_feedback(
            only_existing_edges=True)
        infr = fix_infr

        fix_infr.external_feedback = feedback
        fix_infr.apply_feedback_edges()
        fix_infr.start_qt_interface(loop=False)
        # DELETE OLD EDGES TWICE
        ams = ibs.get_annotmatch_rowid_from_edges(fixme_edges)
        ibs.delete_annotmatch(ams)
        ams = ibs.get_annotmatch_rowid_from_edges(fixme_edges)
        ibs.delete_annotmatch(ams)

        # MANUALLY CALL THIS ONCE FINISHED
        # TO ONLY CHANGE ANNOTMATCH EDGES
        infr.write_wbia_staging_feedback()
        infr.write_wbia_annotmatch_feedback()

    # extra_.update(custom_)
    new_pairs = extra_.keys()
    new_truths = ut.take_column(ut.dict_take(extra_, new_pairs), 0)
    new_tags = ut.take_column(ut.dict_take(extra_, new_pairs), 1)
    new_tag_texts = [';'.join(t) for t in new_tags]
    aids1, aids2 = ut.listT(new_pairs)

    # Delete the old
    ibs.delete_annotmatch((d1['annotmatch_rowid'].values.tolist() +
                           d2['annotmatch_rowid'].values.tolist()))

    # Add the new
    ams = ibs.add_annotmatch_undirected(aids1, aids2)
    ibs.set_annotmatch_evidence_decision(ams, new_truths)
    ibs.set_annotmatch_tag_text(ams, new_tag_texts)

    if False:
        import wbia.guitool as gt

        gt.ensure_qapp()
        ut.qtensure()
        from wbia.gui import inspect_gui

        inspect_gui.show_vsone_tuner(ibs, aid1, aid2)
Beispiel #12
0
def convert_hsdb_to_ibeis(hsdir, dbdir=None, **kwargs):
    r"""
    Args
        hsdir (str): Directory to folder *containing* _hsdb
        dbdir (str): Output directory (defaults to same as  hsdb)

    CommandLine:
        python -m ibeis convert_hsdb_to_ibeis --dbdir ~/work/Frogs
        python -m ibeis convert_hsdb_to_ibeis --hsdir "/raid/raw/RotanTurtles/Roatan HotSpotter Nov_21_2016"

    Ignore:
        from ibeis.dbio.ingest_hsdb import *  # NOQA
        hsdir = "/raid/raw/RotanTurtles/Roatan HotSpotter Nov_21_2016"
        dbdir = "~/work/RotanTurtles"

    Example:
        >>> # SCRIPT
        >>> from ibeis.dbio.ingest_hsdb import *  # NOQA
        >>> dbdir = ut.get_argval('--dbdir', type_=str, default=None)
        >>> hsdir = ut.get_argval('--hsdir', type_=str, default=dbdir)
        >>> result = convert_hsdb_to_ibeis(hsdir)
        >>> print(result)
    """
    from ibeis.control import IBEISControl
    import utool as ut

    if dbdir is None:
        dbdir = hsdir
    print('[ingest] Ingesting hsdb: %r -> %r' % (hsdir, dbdir))

    assert is_hsdb(
        hsdir
    ), 'not a hotspotter database. cannot even force convert: hsdir=%r' % (
        hsdir, )
    assert not is_succesful_convert(dbdir), 'hsdir=%r is already converted' % (
        hsdir, )
    #print('FORCE DELETE: %r' % (hsdir,))
    #ibsfuncs.delete_ibeis_database(hsdir)
    imgdir = join(hsdir, 'images')

    internal_dir = get_hsinternal(hsdir)
    nametbl_fpath = join(internal_dir, 'name_table.csv')
    imgtbl_fpath = join(internal_dir, 'image_table.csv')
    chiptbl_fpath = join(internal_dir, 'chip_table.csv')

    # READ NAME TABLE
    name_text_list = ['____']
    name_hs_nid_list = [0]
    with open(nametbl_fpath, 'r') as nametbl_file:
        name_reader = csv.reader(nametbl_file)
        for ix, row in enumerate(name_reader):
            #if ix >= 3:
            if len(row) == 0 or row[0].strip().startswith('#'):
                continue
            else:
                hs_nid = int(row[0])
                name = row[1].strip()
                name_text_list.append(name)
                name_hs_nid_list.append(hs_nid)

    # READ IMAGE TABLE
    iamge_hs_gid_list = []
    image_gname_list = []
    image_reviewed_list = []
    with open(imgtbl_fpath, 'r') as imgtb_file:
        image_reader = csv.reader(imgtb_file)
        for ix, row in enumerate(image_reader):
            if len(row) == 0 or row[0].strip().startswith('#'):
                continue
            else:
                hs_gid = int(row[0])
                gname_ = row[1].strip()
                # aif in hotspotter is equivilant to reviewed in IBEIS
                reviewed = bool(row[2])
                iamge_hs_gid_list.append(hs_gid)
                image_gname_list.append(gname_)
                image_reviewed_list.append(reviewed)

    image_gpath_list = [join(imgdir, gname) for gname in image_gname_list]

    ut.debug_duplicate_items(image_gpath_list)
    #print(image_gpath_list)
    image_exist_flags = list(map(exists, image_gpath_list))
    missing_images = []
    for image_gpath, flag in zip(image_gpath_list, image_exist_flags):
        if not flag:
            missing_images.append(image_gpath)
            print('Image does not exist: %s' % image_gpath)

    if not all(image_exist_flags):
        print('Only %d / %d image exist' %
              (sum(image_exist_flags), len(image_exist_flags)))

    SEARCH_FOR_IMAGES = False
    if SEARCH_FOR_IMAGES:
        # Hack to try and find the missing images
        from os.path import basename
        subfiles = ut.glob(hsdir,
                           '*',
                           recursive=True,
                           fullpath=True,
                           with_files=True)
        basename_to_existing = ut.group_items(subfiles,
                                              ut.lmap(basename, subfiles))

        can_copy_list = []
        for gpath in missing_images:
            gname = basename(gpath)
            if gname not in basename_to_existing:
                print('gname = %r' % (gname, ))
                pass
            else:
                existing = basename_to_existing[gname]
                can_choose = True
                if len(existing) > 1:
                    if not ut.allsame(ut.lmap(ut.get_file_uuid, existing)):
                        can_choose = False
                if can_choose:
                    found = existing[0]
                    can_copy_list.append((found, gpath))
                else:
                    print(existing)

        src, dst = ut.listT(can_copy_list)
        ut.copy_list(src, dst)

    # READ CHIP TABLE
    chip_bbox_list = []
    chip_theta_list = []
    chip_hs_nid_list = []
    chip_hs_gid_list = []
    chip_note_list = []
    with open(chiptbl_fpath, 'r') as chiptbl_file:
        chip_reader = csv.reader(chiptbl_file)
        for ix, row in enumerate(chip_reader):
            if len(row) == 0 or row[0].strip().startswith('#'):
                continue
            else:
                hs_gid = int(row[1])
                hs_nid = int(row[2])
                bbox_text = row[3]
                theta = float(row[4])
                notes = '<COMMA>'.join([item.strip() for item in row[5:]])

                bbox_text = bbox_text.replace('[', '').replace(']', '').strip()
                bbox_text = re.sub('  *', ' ', bbox_text)
                bbox_strlist = bbox_text.split(' ')
                bbox = tuple(map(int, bbox_strlist))
                #bbox = [int(item) for item in bbox_strlist]
                chip_hs_nid_list.append(hs_nid)
                chip_hs_gid_list.append(hs_gid)
                chip_bbox_list.append(bbox)
                chip_theta_list.append(theta)
                chip_note_list.append(notes)

    names = ut.ColumnLists({
        'hs_nid': name_hs_nid_list,
        'text': name_text_list,
    })

    images = ut.ColumnLists({
        'hs_gid': iamge_hs_gid_list,
        'gpath': image_gpath_list,
        'reviewed': image_reviewed_list,
        'exists': image_exist_flags,
    })

    chips = ut.ColumnLists({
        'hs_gid': chip_hs_gid_list,
        'hs_nid': chip_hs_nid_list,
        'bbox': chip_bbox_list,
        'theta': chip_theta_list,
        'note': chip_note_list,
    })

    IGNORE_MISSING_IMAGES = True
    if IGNORE_MISSING_IMAGES:
        # Ignore missing information
        print('pre')
        print('chips = %r' % (chips, ))
        print('images = %r' % (images, ))
        print('names = %r' % (names, ))
        missing_gxs = ut.where(ut.not_list(images['exists']))
        missing_gids = ut.take(images['hs_gid'], missing_gxs)
        gid_to_cxs = ut.dzip(*chips.group_indicies('hs_gid'))
        missing_cxs = ut.flatten(ut.take(gid_to_cxs, missing_gids))
        # Remove missing images and dependant chips
        images = images.remove(missing_gxs)
        chips = chips.remove(missing_cxs)
        valid_nids = set(chips['hs_nid'] + [0])
        isvalid = [nid in valid_nids for nid in names['hs_nid']]
        names = names.compress(isvalid)
        print('post')
        print('chips = %r' % (chips, ))
        print('images = %r' % (images, ))
        print('names = %r' % (names, ))

    assert all(images['exists']), 'some images dont exist'

    # if gid is None:
    #     print('Not adding the ix=%r-th Chip. Its image is corrupted image.' % (ix,))
    #     # continue
    # # Build mappings to new indexes
    # names_nid_to_nid  = {names_nid: nid for (names_nid, nid) in zip(hs_nid_list, nid_list)}
    # names_nid_to_nid[1] = names_nid_to_nid[0]  # hsdb unknknown is 0 or 1
    # images_gid_to_gid = {images_gid: gid for (images_gid, gid) in zip(hs_gid_list, gid_list)}

    ibs = IBEISControl.request_IBEISController(dbdir=dbdir,
                                               check_hsdb=False,
                                               **kwargs)
    assert len(ibs.get_valid_gids()) == 0, 'target database is not empty'

    # Add names, images, and annotations
    names['ibs_nid'] = ibs.add_names(names['text'])
    images['ibs_gid'] = ibs.add_images(
        images['gpath'])  # any failed gids will be None

    if True:
        # Remove corrupted images
        print('pre')
        print('chips = %r' % (chips, ))
        print('images = %r' % (images, ))
        print('names = %r' % (names, ))
        missing_gxs = ut.where(ut.flag_None_items(images['ibs_gid']))
        missing_gids = ut.take(images['hs_gid'], missing_gxs)
        gid_to_cxs = ut.dzip(*chips.group_indicies('hs_gid'))
        missing_cxs = ut.flatten(ut.take(gid_to_cxs, missing_gids))
        # Remove missing images and dependant chips
        chips = chips.remove(missing_cxs)
        images = images.remove(missing_gxs)
        print('post')
        print('chips = %r' % (chips, ))
        print('images = %r' % (images, ))
        print('names = %r' % (names, ))

    # Index chips using new ibs rowids
    ibs_gid_lookup = ut.dzip(images['hs_gid'], images['ibs_gid'])
    ibs_nid_lookup = ut.dzip(names['hs_nid'], names['ibs_nid'])
    try:
        chips['ibs_gid'] = ut.take(ibs_gid_lookup, chips['hs_gid'])
    except KeyError:
        chips['ibs_gid'] = [
            ibs_gid_lookup.get(index, None) for index in chips['hs_gid']
        ]
    try:
        chips['ibs_nid'] = ut.take(ibs_nid_lookup, chips['hs_nid'])
    except KeyError:
        chips['ibs_nid'] = [
            ibs_nid_lookup.get(index, None) for index in chips['hs_nid']
        ]

    ibs.add_annots(chips['ibs_gid'],
                   bbox_list=chips['bbox'],
                   theta_list=chips['theta'],
                   nid_list=chips['ibs_nid'],
                   notes_list=chips['note'])

    # aid_list = ibs.get_valid_aids()
    # flag_list = [True] * len(aid_list)
    # ibs.set_annot_exemplar_flags(aid_list, flag_list)
    # assert(all(ibs.get_annot_exemplar_flags(aid_list))), 'exemplars not set correctly'

    # Write file flagging successful conversion
    with open(join(ibs.get_ibsdir(), SUCCESS_FLAG_FNAME), 'w') as file_:
        file_.write('Successfully converted hsdir=%r' % (hsdir, ))
    print('finished ingest')
    return ibs
Beispiel #13
0
 def nice_table(self):
     import utool as ut
     return ut.make_csv_table(ut.listT(self.row_data), raw=True)
Beispiel #14
0
    def make_annot_inference_dict(infr, internal=False):
        #import uuid

        def convert_to_name_uuid(nid):
            #try:
            text = ibs.get_name_texts(nid, apply_fix=False)
            if text is None:
                text = 'NEWNAME_%s' % (str(nid),)
            #uuid_ = uuid.UUID(text)
            #except ValueError:
            #    text = 'NEWNAME_%s' % (str(nid),)
            #    #uuid_ = nid
            return text
        ibs = infr.qreq_.ibs

        if internal:
            get_annot_uuids = ut.identity
        else:
            get_annot_uuids = ibs.get_annot_uuids
            #return uuid_

        # Compile the cluster_dict
        col_list = ['aid_list', 'orig_nid_list', 'new_nid_list',
                    'exemplar_flag_list', 'error_flag_list']
        cluster_dict = dict(zip(col_list, ut.listT(infr.cluster_tuples)))
        cluster_dict['annot_uuid_list'] = get_annot_uuids(cluster_dict['aid_list'])
        # We store the name's UUID as the name's text
        #cluster_dict['orig_name_uuid_list'] = [convert_to_name_uuid(nid)
        #                                       for nid in cluster_dict['orig_nid_list']]
        #cluster_dict['new_name_uuid_list'] = [convert_to_name_uuid(nid)
        # for nid in cluster_dict['new_nid_list']]
        cluster_dict['orig_name_list'] = [convert_to_name_uuid(nid)
                                          for nid in cluster_dict['orig_nid_list']]
        cluster_dict['new_name_list'] = [convert_to_name_uuid(nid)
                                         for nid in cluster_dict['new_nid_list']]
        # Filter out only the keys we want to send back in the dictionary
        #key_list = ['annot_uuid_list', 'orig_name_uuid_list',
        #            'new_name_uuid_list', 'exemplar_flag_list',
        #            'error_flag_list']
        key_list = ['annot_uuid_list', 'orig_name_list', 'new_name_list',
                    'exemplar_flag_list', 'error_flag_list']
        cluster_dict = ut.dict_subset(cluster_dict, key_list)

        # Compile the annot_pair_dict
        col_list = ['aid_1_list', 'aid_2_list', 'p_same_list',
                    'confidence_list', 'raw_score_list']
        annot_pair_dict = dict(zip(col_list, ut.listT(infr.needs_review_list)))
        annot_pair_dict['annot_uuid_1_list'] = get_annot_uuids(annot_pair_dict['aid_1_list'])
        annot_pair_dict['annot_uuid_2_list'] = get_annot_uuids(annot_pair_dict['aid_2_list'])
        zipped = zip(annot_pair_dict['annot_uuid_1_list'],
                     annot_pair_dict['annot_uuid_2_list'],
                     annot_pair_dict['p_same_list'])
        annot_pair_dict['review_pair_list'] = [
            {
                'annot_uuid_key'       : annot_uuid_1,
                'annot_uuid_1'         : annot_uuid_1,
                'annot_uuid_2'         : annot_uuid_2,
                'prior_matching_state' : {
                    'p_match'   : p_same,
                    'p_nomatch' : 1.0 - p_same,
                    'p_notcomp' : 0.0,
                }
            }
            for (annot_uuid_1, annot_uuid_2, p_same) in zipped
        ]
        # Filter out only the keys we want to send back in the dictionary
        key_list = ['review_pair_list', 'confidence_list']
        annot_pair_dict = ut.dict_subset(annot_pair_dict, key_list)

        # Compile the inference dict
        inference_dict = ut.odict([
            ('cluster_dict', cluster_dict),
            ('annot_pair_dict', annot_pair_dict),
            ('_internal_state', None),
        ])
        return inference_dict