Ejemplo n.º 1
0
def demodata_infr2(defaultdb='PZ_MTEST'):
    defaultdb = 'PZ_MTEST'
    import wbia

    ibs = wbia.opendb(defaultdb=defaultdb)
    annots = ibs.annots()
    names = list(annots.group_items(annots.nids).values())[0:20]

    def dummy_phi(c, n):
        x = np.arange(n)
        phi = c * x / (c * x + 1)
        phi = phi / phi.sum()
        phi = np.diff(phi)
        return phi

    phis = {c: dummy_phi(c, 30) for c in range(1, 4)}
    aids = ut.flatten(names)
    infr = wbia.AnnotInference(ibs, aids, autoinit=True)
    infr.init_termination_criteria(phis)
    infr.init_refresh_criteria()

    # Partially review
    n1, n2, n3, n4 = names[0:4]
    for name in names[4:]:
        for a, b in ut.itertwo(name.aids):
            infr.add_feedback((a, b), POSTV)

    for name1, name2 in it.combinations(names[4:], 2):
        infr.add_feedback((name1.aids[0], name2.aids[0]), NEGTV)
    return infr
Ejemplo n.º 2
0
def demodata_tarjan_bridge():
    """
    CommandLine:
        python -m wbia.algo.graph.nx_utils demodata_tarjan_bridge --show

    Example:
        >>> # ENABLE_DOCTEST
        >>> from wbia.algo.graph.nx_utils import *  # NOQA
        >>> G = demodata_tarjan_bridge()
        >>> ut.quit_if_noshow()
        >>> import wbia.plottool as pt
        >>> pt.show_nx(G)
        >>> ut.show_if_requested()
    """
    # define 2-connected compoments and bridges
    cc2 = [
        (1, 2, 4, 3, 1, 4),
        (5, 6, 7, 5),
        (8, 9, 10, 8),
        (17, 18, 16, 15, 17),
        (11, 12, 14, 13, 11, 14),
    ]
    bridges = [(4, 8), (3, 5), (3, 17)]
    G = nx.Graph(ut.flatten(ut.itertwo(path) for path in cc2 + bridges))
    return G
Ejemplo n.º 3
0
def closest_point_on_verts(p, verts):
    import vtool as vt
    candidates = [
        closest_point_on_line_segment(p, e1, e2)
        for e1, e2 in ut.itertwo(verts)
    ]
    dists = np.array([vt.L2_sqrd(p, new_pt) for new_pt in candidates])
    new_pts = candidates[dists.argmin()]
    return new_pts
Ejemplo n.º 4
0
    def to_networkx(self):
        import utool as ut

        # n = list(self.tour_tree._traverse_nodes())[0]

        # return nx.Graph(ut.itertwo(self.tour))
        # In order traversal of the tree is the tour order
        # return nx.Graph(ut.itertwo(self.tour_tree.values()))
        tour = (n.value for n in self.tour_tree._traverse_nodes())
        graph = nx.Graph(ut.itertwo(tour))
        return graph
Ejemplo n.º 5
0
 def repr_tree(self):
     """
     reconstruct represented tree as a DiGraph to
     preserve the current rootedness
     """
     import utool as ut
     import networkx as nx
     repr_tree = nx.DiGraph()
     for u, v in ut.itertwo(self.values()):
         if not repr_tree.has_edge(v, u):
             repr_tree.add_edge(u, v)
     return repr_tree
Ejemplo n.º 6
0
 def separate_math(line):
     # Break line into math and english parts
     mathsep = ut.negative_lookbehind(re.escape('\\')) + re.escape('$')
     pos = [0]
     for count, match in enumerate(re.finditer(mathsep, line)):
         pos.append(match.start() if count % 2 == 0 else match.end())
     pos.append(len(line))
     english = []
     math = []
     for count, (l, r) in enumerate(ut.itertwo(pos)):
         if count % 2 == 0 and line[l:r]:
             english.append(line[l:r])
         else:
             math.append(line[l:r])
     return english, math
Ejemplo n.º 7
0
def get_hacked_pos(netx_graph, name_nodes=None, prog='dot'):
    import pygraphviz
    import networkx as netx

    # Add "invisible" edges to induce an ordering
    # Hack for layout (ordering of top level nodes)
    netx_graph2 = netx_graph.copy()
    if getattr(netx_graph, 'ttype2_cpds', None) is not None:
        grouped_nodes = []
        for ttype in netx_graph.ttype2_cpds.keys():
            ttype_cpds = netx_graph.ttype2_cpds[ttype]
            # use defined ordering
            ttype_nodes = ut.list_getattr(ttype_cpds, 'variable')
            # ttype_nodes = sorted(ttype_nodes)
            invis_edges = list(ut.itertwo(ttype_nodes))
            netx_graph2.add_edges_from(invis_edges)
            grouped_nodes.append(ttype_nodes)

        A = netx.to_agraph(netx_graph2)
        for nodes in grouped_nodes:
            A.add_subgraph(nodes, rank='same')
    else:
        A = netx.to_agraph(netx_graph2)

    # if name_nodes is not None:
    #    #netx.set_node_attributes(netx_graph, name='label', values={n: {'label': n} for n in all_nodes})
    #    invis_edges = list(ut.itertwo(name_nodes))
    #    netx_graph2.add_edges_from(invis_edges)
    #    A.add_subgraph(name_nodes, rank='same')
    # else:
    #    A = netx.to_agraph(netx_graph2)
    args = ''
    G = netx_graph
    A.layout(prog=prog, args=args)
    # A.draw('example.png', prog='dot')
    node_pos = {}
    for n in G:
        node_ = pygraphviz.Node(A, n)
        try:
            xx, yy = node_.attr['pos'].split(',')
            node_pos[n] = (float(xx), float(yy))
        except Exception:
            logger.info('no position for node', n)
            node_pos[n] = (0.0, 0.0)
    return node_pos
Ejemplo n.º 8
0
def get_hacked_pos(netx_graph, name_nodes=None, prog='dot'):
    import pygraphviz
    import networkx as netx
    # Add "invisible" edges to induce an ordering
    # Hack for layout (ordering of top level nodes)
    netx_graph2 = netx_graph.copy()
    if getattr(netx_graph, 'ttype2_cpds', None) is not None:
        grouped_nodes = []
        for ttype in netx_graph.ttype2_cpds.keys():
            ttype_cpds = netx_graph.ttype2_cpds[ttype]
            # use defined ordering
            ttype_nodes = ut.list_getattr(ttype_cpds, 'variable')
            # ttype_nodes = sorted(ttype_nodes)
            invis_edges = list(ut.itertwo(ttype_nodes))
            netx_graph2.add_edges_from(invis_edges)
            grouped_nodes.append(ttype_nodes)

        A = netx.to_agraph(netx_graph2)
        for nodes in grouped_nodes:
            A.add_subgraph(nodes, rank='same')
    else:
        A = netx.to_agraph(netx_graph2)

    #if name_nodes is not None:
    #    #netx.set_node_attributes(netx_graph, 'label', {n: {'label': n} for n in all_nodes})
    #    invis_edges = list(ut.itertwo(name_nodes))
    #    netx_graph2.add_edges_from(invis_edges)
    #    A.add_subgraph(name_nodes, rank='same')
    #else:
    #    A = netx.to_agraph(netx_graph2)
    args = ''
    G = netx_graph
    A.layout(prog=prog, args=args)
    #A.draw('example.png', prog='dot')
    node_pos = {}
    for n in G:
        node_ = pygraphviz.Node(A, n)
        try:
            xx, yy = node_.attr["pos"].split(',')
            node_pos[n] = (float(xx), float(yy))
        except:
            print("no position for node", n)
            node_pos[n] = (0.0, 0.0)
    return node_pos
Ejemplo n.º 9
0
def get_bayesnet_layout(model, name_nodes=None, prog='dot'):
    """
    Ensures ordering of layers is in order of addition via templates
    """
    import pygraphviz
    import networkx as nx

    # Add "invisible" edges to induce an ordering
    # Hack for layout (ordering of top level nodes)
    netx_graph2 = model.copy()

    if getattr(model, 'ttype2_cpds', None) is not None:
        grouped_nodes = []
        for ttype in model.ttype2_cpds.keys():
            ttype_cpds = model.ttype2_cpds[ttype]
            # use defined ordering
            ttype_nodes = ut.list_getattr(ttype_cpds, 'variable')
            # ttype_nodes = sorted(ttype_nodes)
            invis_edges = list(ut.itertwo(ttype_nodes))
            netx_graph2.add_edges_from(invis_edges)
            grouped_nodes.append(ttype_nodes)

        agraph = nx.nx_agraph.to_agraph(netx_graph2)
        for nodes in grouped_nodes:
            agraph.add_subgraph(nodes, rank='same')
    else:
        agraph = nx.nx_agraph.to_agraph(netx_graph2)
    logger.info(agraph)

    args = ''
    agraph.layout(prog=prog, args=args)
    # agraph.draw('example.png', prog='dot')
    node_pos = {}
    for n in model:
        node_ = pygraphviz.Node(agraph, n)
        try:
            xx, yy = node_.attr['pos'].split(',')
            node_pos[n] = (float(xx), float(yy))
        except Exception:
            logger.info('no position for node', n)
            node_pos[n] = (0.0, 0.0)
    return node_pos
Ejemplo n.º 10
0
def get_bayesnet_layout(model, name_nodes=None, prog='dot'):
    """
    Ensures ordering of layers is in order of addition via templates
    """
    import pygraphviz
    import networkx as nx
    # Add "invisible" edges to induce an ordering
    # Hack for layout (ordering of top level nodes)
    netx_graph2 = model.copy()

    if getattr(model, 'ttype2_cpds', None) is not None:
        grouped_nodes = []
        for ttype in model.ttype2_cpds.keys():
            ttype_cpds = model.ttype2_cpds[ttype]
            # use defined ordering
            ttype_nodes = ut.list_getattr(ttype_cpds, 'variable')
            # ttype_nodes = sorted(ttype_nodes)
            invis_edges = list(ut.itertwo(ttype_nodes))
            netx_graph2.add_edges_from(invis_edges)
            grouped_nodes.append(ttype_nodes)

        agraph = nx.nx_agraph.to_agraph(netx_graph2)
        for nodes in grouped_nodes:
            agraph.add_subgraph(nodes, rank='same')
    else:
        agraph = nx.nx_agraph.to_agraph(netx_graph2)
    print(agraph)

    args = ''
    agraph.layout(prog=prog, args=args)
    #agraph.draw('example.png', prog='dot')
    node_pos = {}
    for n in model:
        node_ = pygraphviz.Node(agraph, n)
        try:
            xx, yy = node_.attr['pos'].split(',')
            node_pos[n] = (float(xx), float(yy))
        except:
            print('no position for node', n)
            node_pos[n] = (0.0, 0.0)
    return node_pos
Ejemplo n.º 11
0
 def get_tier_windows(drive):
     nbytes_tiers = [
         np.inf,
         2**32,
         2**30,
         2**29,
         2**28,
         2**27,
         2**26,
         2**25,
         2**24,
         2**23,
         2**22,
         2**21,
         2**20,
         2**10,
         0,
         -np.inf,
     ]
     tier_windows = list(ut.itertwo(nbytes_tiers))
     return tier_windows
Ejemplo n.º 12
0
 def get_tier_windows(drive):
     nbytes_tiers = [
         np.inf,
         2 ** 32,
         2 ** 30,
         2 ** 29,
         2 ** 28,
         2 ** 27,
         2 ** 26,
         2 ** 25,
         2 ** 24,
         2 ** 23,
         2 ** 22,
         2 ** 21,
         2 ** 20,
         2 ** 10,
         0,
         -np.inf,
     ]
     tier_windows = list(ut.itertwo(nbytes_tiers))
     return tier_windows
Ejemplo n.º 13
0
def check_image_sizes(data_uri_order, all_kpts, offset_list):
    """
    Check if any keypoints go out of bounds wrt their associated images
    """
    import vtool as vt
    from os.path import join
    imgdir = ut.truepath('/raid/work/Oxford/oxbuild_images')
    gpath_list = [join(imgdir, imgid + '.jpg') for imgid in data_uri_order]
    imgsize_list = [vt.open_image_size(gpath) for gpath in gpath_list]
    kpts_list = [all_kpts[l:r] for l, r in ut.itertwo(offset_list)]

    kpts_extent = [
        vt.get_kpts_image_extent(kpts, outer=False, only_xy=False)
        for kpts in ut.ProgIter(kpts_list, 'kpts extent')
    ]

    for i, (size, extent) in enumerate(zip(imgsize_list, kpts_extent)):
        w, h = size
        _, maxx, _, maxy = extent
        assert np.isnan(maxx) or maxx < w
        assert np.isnan(maxy) or maxy < h
Ejemplo n.º 14
0
def run_asmk_script():
    with ut.embed_on_exception_context:  # NOQA
        """
    >>> from wbia.algo.smk.script_smk import *
    """

  # NOQA

        # ==============================================
        # PREPROCESSING CONFIGURATION
        # ==============================================
        config = {
            # 'data_year': 2013,
            'data_year': None,
            'dtype': 'float32',
            # 'root_sift': True,
            'root_sift': False,
            # 'centering': True,
            'centering': False,
            'num_words': 2**16,
            # 'num_words': 1E6
            # 'num_words': 8000,
            'kmeans_impl': 'sklearn.mini',
            'extern_words': False,
            'extern_assign': False,
            'assign_algo': 'kdtree',
            'checks': 1024,
            'int_rvec': True,
            'only_xy': False,
        }
        # Define which params are relevant for which operations
        relevance = {}
        relevance['feats'] = ['dtype', 'root_sift', 'centering', 'data_year']
        relevance['words'] = relevance['feats'] + [
            'num_words',
            'extern_words',
            'kmeans_impl',
        ]
        relevance['assign'] = relevance['words'] + [
            'checks',
            'extern_assign',
            'assign_algo',
        ]
        # relevance['ydata'] = relevance['assign'] + ['int_rvec']
        # relevance['xdata'] = relevance['assign'] + ['only_xy', 'int_rvec']

        nAssign = 1

        class SMKCacher(ut.Cacher):
            def __init__(self, fname, ext='.cPkl'):
                relevant_params = relevance[fname]
                relevant_cfg = ut.dict_subset(config, relevant_params)
                cfgstr = ut.get_cfg_lbl(relevant_cfg)
                dbdir = ut.truepath('/raid/work/Oxford/')
                super(SMKCacher, self).__init__(fname,
                                                cfgstr,
                                                cache_dir=dbdir,
                                                ext=ext)

        # ==============================================
        # LOAD DATASET, EXTRACT AND POSTPROCESS FEATURES
        # ==============================================
        if config['data_year'] == 2007:
            data = load_oxford_2007()
        elif config['data_year'] == 2013:
            data = load_oxford_2013()
        elif config['data_year'] is None:
            data = load_oxford_wbia()

        offset_list = data['offset_list']
        all_kpts = data['all_kpts']
        raw_vecs = data['all_vecs']
        query_uri_order = data['query_uri_order']
        data_uri_order = data['data_uri_order']
        # del data

        # ================
        # PRE-PROCESS
        # ================
        import vtool as vt

        # Alias names to avoid errors in interactive sessions
        proc_vecs = raw_vecs
        del raw_vecs

        feats_cacher = SMKCacher('feats', ext='.npy')
        all_vecs = feats_cacher.tryload()
        if all_vecs is None:
            if config['dtype'] == 'float32':
                logger.info('Converting vecs to float32')
                proc_vecs = proc_vecs.astype(np.float32)
            else:
                proc_vecs = proc_vecs
                raise NotImplementedError('other dtype')

            if config['root_sift']:
                with ut.Timer('Apply root sift'):
                    np.sqrt(proc_vecs, out=proc_vecs)
                    vt.normalize(proc_vecs, ord=2, axis=1, out=proc_vecs)

            if config['centering']:
                with ut.Timer('Apply centering'):
                    mean_vec = np.mean(proc_vecs, axis=0)
                    # Center and then re-normalize
                    np.subtract(proc_vecs, mean_vec[None, :], out=proc_vecs)
                    vt.normalize(proc_vecs, ord=2, axis=1, out=proc_vecs)

            if config['dtype'] == 'int8':
                smk_funcs

            all_vecs = proc_vecs
            feats_cacher.save(all_vecs)
        del proc_vecs

        # =====================================
        # BUILD VISUAL VOCABULARY
        # =====================================
        if config['extern_words']:
            words = data['words']
            assert config['num_words'] is None or len(
                words) == config['num_words']
        else:
            word_cacher = SMKCacher('words')
            words = word_cacher.tryload()
            if words is None:
                with ut.embed_on_exception_context:
                    if config['kmeans_impl'] == 'sklearn.mini':
                        import sklearn.cluster

                        rng = np.random.RandomState(13421421)
                        # init_size = int(config['num_words'] * 8)
                        init_size = int(config['num_words'] * 4)
                        # converged after 26043 iterations
                        clusterer = sklearn.cluster.MiniBatchKMeans(
                            config['num_words'],
                            init_size=init_size,
                            batch_size=1000,
                            compute_labels=False,
                            max_iter=20,
                            random_state=rng,
                            n_init=1,
                            verbose=1,
                        )
                        clusterer.fit(all_vecs)
                        words = clusterer.cluster_centers_
                    elif config['kmeans_impl'] == 'yael':
                        from yael import ynumpy

                        centroids, qerr, dis, assign, nassign = ynumpy.kmeans(
                            all_vecs,
                            config['num_words'],
                            init='kmeans++',
                            verbose=True,
                            output='all',
                        )
                        words = centroids
                    word_cacher.save(words)

        # =====================================
        # ASSIGN EACH VECTOR TO ITS NEAREST WORD
        # =====================================
        if config['extern_assign']:
            assert config[
                'extern_words'], 'need extern cluster to extern assign'
            idx_to_wxs = vt.atleast_nd(data['idx_to_wx'], 2)
            idx_to_maws = np.ones(idx_to_wxs.shape, dtype=np.float32)
            idx_to_wxs = np.ma.array(idx_to_wxs)
            idx_to_maws = np.ma.array(idx_to_maws)
        else:
            from wbia.algo.smk import vocab_indexer

            vocab = vocab_indexer.VisualVocab(words)
            dassign_cacher = SMKCacher('assign')
            assign_tup = dassign_cacher.tryload()
            if assign_tup is None:
                vocab.flann_params['algorithm'] = config['assign_algo']
                vocab.build()
                # Takes 12 minutes to assign jegous vecs to 2**16 vocab
                with ut.Timer('assign vocab neighbors'):
                    _idx_to_wx, _idx_to_wdist = vocab.nn_index(
                        all_vecs, nAssign, checks=config['checks'])
                    if nAssign > 1:
                        idx_to_wxs, idx_to_maws = smk_funcs.weight_multi_assigns(
                            _idx_to_wx,
                            _idx_to_wdist,
                            massign_alpha=1.2,
                            massign_sigma=80.0,
                            massign_equal_weights=True,
                        )
                    else:
                        idx_to_wxs = np.ma.masked_array(_idx_to_wx,
                                                        fill_value=-1)
                        idx_to_maws = np.ma.ones(idx_to_wxs.shape,
                                                 fill_value=-1,
                                                 dtype=np.float32)
                        idx_to_maws.mask = idx_to_wxs.mask
                assign_tup = (idx_to_wxs, idx_to_maws)
                dassign_cacher.save(assign_tup)

        idx_to_wxs, idx_to_maws = assign_tup

        # Breakup vectors, keypoints, and word assignments by annotation
        wx_lists = [
            idx_to_wxs[left:right] for left, right in ut.itertwo(offset_list)
        ]
        maw_lists = [
            idx_to_maws[left:right] for left, right in ut.itertwo(offset_list)
        ]
        vecs_list = [
            all_vecs[left:right] for left, right in ut.itertwo(offset_list)
        ]
        kpts_list = [
            all_kpts[left:right] for left, right in ut.itertwo(offset_list)
        ]

        # =======================
        # FIND QUERY SUBREGIONS
        # =======================

        ibs, query_annots, data_annots, qx_to_dx = load_ordered_annots(
            data_uri_order, query_uri_order)
        daids = data_annots.aids
        qaids = query_annots.aids

        query_super_kpts = ut.take(kpts_list, qx_to_dx)
        query_super_vecs = ut.take(vecs_list, qx_to_dx)
        query_super_wxs = ut.take(wx_lists, qx_to_dx)
        query_super_maws = ut.take(maw_lists, qx_to_dx)
        # Mark which keypoints are within the bbox of the query
        query_flags_list = []
        only_xy = config['only_xy']
        for kpts_, bbox in zip(query_super_kpts, query_annots.bboxes):
            flags = kpts_inside_bbox(kpts_, bbox, only_xy=only_xy)
            query_flags_list.append(flags)

        logger.info('Queries are crops of existing database images.')
        logger.info('Looking at average percents')
        percent_list = [
            flags_.sum() / flags_.shape[0] for flags_ in query_flags_list
        ]
        percent_stats = ut.get_stats(percent_list)
        logger.info('percent_stats = %s' % (ut.repr4(percent_stats), ))

        import vtool as vt

        query_kpts = vt.zipcompress(query_super_kpts, query_flags_list, axis=0)
        query_vecs = vt.zipcompress(query_super_vecs, query_flags_list, axis=0)
        query_wxs = vt.zipcompress(query_super_wxs, query_flags_list, axis=0)
        query_maws = vt.zipcompress(query_super_maws, query_flags_list, axis=0)

        # =======================
        # CONSTRUCT QUERY / DATABASE REPR
        # =======================

        # int_rvec = not config['dtype'].startswith('float')
        int_rvec = config['int_rvec']

        X_list = []
        _prog = ut.ProgPartial(length=len(qaids),
                               label='new X',
                               bs=True,
                               adjust=True)
        for aid, fx_to_wxs, fx_to_maws in _prog(
                zip(qaids, query_wxs, query_maws)):
            X = new_external_annot(aid, fx_to_wxs, fx_to_maws, int_rvec)
            X_list.append(X)

        # ydata_cacher = SMKCacher('ydata')
        # Y_list = ydata_cacher.tryload()
        # if Y_list is None:
        Y_list = []
        _prog = ut.ProgPartial(length=len(daids),
                               label='new Y',
                               bs=True,
                               adjust=True)
        for aid, fx_to_wxs, fx_to_maws in _prog(zip(daids, wx_lists,
                                                    maw_lists)):
            Y = new_external_annot(aid, fx_to_wxs, fx_to_maws, int_rvec)
            Y_list.append(Y)
        # ydata_cacher.save(Y_list)

        # ======================
        # Add in some groundtruth

        logger.info('Add in some groundtruth')
        for Y, nid in zip(Y_list, ibs.get_annot_nids(daids)):
            Y.nid = nid

        for X, nid in zip(X_list, ibs.get_annot_nids(qaids)):
            X.nid = nid

        for Y, qual in zip(Y_list, ibs.get_annot_quality_texts(daids)):
            Y.qual = qual

        # ======================
        # Add in other properties
        for Y, vecs, kpts in zip(Y_list, vecs_list, kpts_list):
            Y.vecs = vecs
            Y.kpts = kpts

        imgdir = ut.truepath('/raid/work/Oxford/oxbuild_images')
        for Y, imgid in zip(Y_list, data_uri_order):
            gpath = ut.unixjoin(imgdir, imgid + '.jpg')
            Y.gpath = gpath

        for X, vecs, kpts in zip(X_list, query_vecs, query_kpts):
            X.kpts = kpts
            X.vecs = vecs

        # ======================
        logger.info('Building inverted list')
        daids = [Y.aid for Y in Y_list]
        # wx_list = sorted(ut.list_union(*[Y.wx_list for Y in Y_list]))
        wx_list = sorted(set.union(*[Y.wx_set for Y in Y_list]))
        assert daids == data_annots.aids
        assert len(wx_list) <= config['num_words']

        wx_to_aids = smk_funcs.invert_lists(daids, [Y.wx_list for Y in Y_list],
                                            all_wxs=wx_list)

        # Compute IDF weights
        logger.info('Compute IDF weights')
        ndocs_total = len(daids)
        # Use only the unique number of words
        ndocs_per_word = np.array([len(set(wx_to_aids[wx])) for wx in wx_list])
        logger.info('ndocs_perword stats: ' +
                    ut.repr4(ut.get_stats(ndocs_per_word)))
        idf_per_word = smk_funcs.inv_doc_freq(ndocs_total, ndocs_per_word)
        wx_to_weight = dict(zip(wx_list, idf_per_word))
        logger.info('idf stats: ' +
                    ut.repr4(ut.get_stats(wx_to_weight.values())))

        # Filter junk
        Y_list_ = [Y for Y in Y_list if Y.qual != 'junk']

        # =======================
        # CHOOSE QUERY KERNEL
        # =======================
        params = {
            'asmk': dict(alpha=3.0, thresh=0.0),
            'bow': dict(),
            'bow2': dict(),
        }
        # method = 'bow'
        method = 'bow2'
        method = 'asmk'
        smk = SMK(wx_to_weight, method=method, **params[method])

        # Specific info for the type of query
        if method == 'asmk':
            # Make residual vectors
            if True:
                # The stacked way is 50x faster
                # TODO: extend for multi-assignment and record fxs
                flat_query_vecs = np.vstack(query_vecs)
                flat_query_wxs = np.vstack(query_wxs)
                flat_query_offsets = np.array(
                    [0] + ut.cumsum(ut.lmap(len, query_wxs)))

                flat_wxs_assign = flat_query_wxs
                flat_offsets = flat_query_offsets
                flat_vecs = flat_query_vecs
                tup = smk_funcs.compute_stacked_agg_rvecs(
                    words, flat_wxs_assign, flat_vecs, flat_offsets)
                all_agg_vecs, all_error_flags, agg_offset_list = tup
                if int_rvec:
                    all_agg_vecs = smk_funcs.cast_residual_integer(
                        all_agg_vecs)
                agg_rvecs_list = [
                    all_agg_vecs[left:right]
                    for left, right in ut.itertwo(agg_offset_list)
                ]
                agg_flags_list = [
                    all_error_flags[left:right]
                    for left, right in ut.itertwo(agg_offset_list)
                ]

                for X, agg_rvecs, agg_flags in zip(X_list, agg_rvecs_list,
                                                   agg_flags_list):
                    X.agg_rvecs = agg_rvecs
                    X.agg_flags = agg_flags[:, None]

                flat_wxs_assign = idx_to_wxs
                flat_offsets = offset_list
                flat_vecs = all_vecs
                tup = smk_funcs.compute_stacked_agg_rvecs(
                    words, flat_wxs_assign, flat_vecs, flat_offsets)
                all_agg_vecs, all_error_flags, agg_offset_list = tup
                if int_rvec:
                    all_agg_vecs = smk_funcs.cast_residual_integer(
                        all_agg_vecs)

                agg_rvecs_list = [
                    all_agg_vecs[left:right]
                    for left, right in ut.itertwo(agg_offset_list)
                ]
                agg_flags_list = [
                    all_error_flags[left:right]
                    for left, right in ut.itertwo(agg_offset_list)
                ]

                for Y, agg_rvecs, agg_flags in zip(Y_list, agg_rvecs_list,
                                                   agg_flags_list):
                    Y.agg_rvecs = agg_rvecs
                    Y.agg_flags = agg_flags[:, None]
            else:
                # This non-stacked way is about 500x slower
                _prog = ut.ProgPartial(label='agg Y rvecs',
                                       bs=True,
                                       adjust=True)
                for Y in _prog(Y_list_):
                    make_agg_vecs(Y, words, Y.vecs)

                _prog = ut.ProgPartial(label='agg X rvecs',
                                       bs=True,
                                       adjust=True)
                for X in _prog(X_list):
                    make_agg_vecs(X, words, X.vecs)
        elif method == 'bow2':
            # Hack for orig tf-idf bow vector
            nwords = len(words)
            for X in ut.ProgIter(X_list, label='make bow vector'):
                ensure_tf(X)
                bow_vector(X, wx_to_weight, nwords)

            for Y in ut.ProgIter(Y_list_, label='make bow vector'):
                ensure_tf(Y)
                bow_vector(Y, wx_to_weight, nwords)

        if method != 'bow2':
            for X in ut.ProgIter(X_list, 'compute X gamma'):
                X.gamma = smk.gamma(X)
            for Y in ut.ProgIter(Y_list_, 'compute Y gamma'):
                Y.gamma = smk.gamma(Y)

        # Execute matches (could go faster by enumerating candidates)
        scores_list = []
        for X in ut.ProgIter(X_list, label='query %s' % (smk, )):
            scores = [smk.kernel(X, Y) for Y in Y_list_]
            scores = np.array(scores)
            scores = np.nan_to_num(scores)
            scores_list.append(scores)

        import sklearn.metrics

        avep_list = []
        _iter = list(zip(scores_list, X_list))
        _iter = ut.ProgIter(_iter, label='evaluate %s' % (smk, ))
        for scores, X in _iter:
            truth = [X.nid == Y.nid for Y in Y_list_]
            avep = sklearn.metrics.average_precision_score(truth, scores)
            avep_list.append(avep)
        avep_list = np.array(avep_list)
        mAP = np.mean(avep_list)
        logger.info('mAP  = %r' % (mAP, ))
Ejemplo n.º 15
0
def compute_stacked_agg_rvecs(words, flat_wxs_assign, flat_vecs, flat_offsets):
    """
    More efficient version of agg on a stacked structure

    Args:
        words (ndarray): entire vocabulary of words
        flat_wxs_assign (ndarray): maps a stacked index to word index
        flat_vecs (ndarray): stacked SIFT descriptors
        flat_offsets (ndarray): offset positions per annotation

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.smk.smk_funcs import *  # NOQA
        >>> data = testdata_rvecs(dim=2, nvecs=1000, nannots=10)
        >>> words = data['words']
        >>> flat_offsets = data['offset_list']
        >>> flat_wxs_assign, flat_vecs = ut.take(data, ['idx_to_wx', 'vecs'])
        >>> tup = compute_stacked_agg_rvecs(words, flat_wxs_assign, flat_vecs, flat_offsets)
        >>> all_agg_vecs, all_error_flags, agg_offset_list = tup
        >>> agg_rvecs_list = [all_agg_vecs[l:r] for l, r in ut.itertwo(agg_offset_list)]
        >>> agg_flags_list = [all_error_flags[l:r] for l, r in ut.itertwo(agg_offset_list)]
        >>> assert len(agg_flags_list) == len(flat_offsets) - 1

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.smk.smk_funcs import *  # NOQA
        >>> data = testdata_rvecs(dim=2, nvecs=100, nannots=5)
        >>> words = data['words']
        >>> flat_offsets = data['offset_list']
        >>> flat_wxs_assign, flat_vecs = ut.take(data, ['idx_to_wx', 'vecs'])
        >>> tup = compute_stacked_agg_rvecs(words, flat_wxs_assign, flat_vecs, flat_offsets)
        >>> all_agg_vecs, all_error_flags, agg_offset_list = tup
        >>> agg_rvecs_list = [all_agg_vecs[l:r] for l, r in ut.itertwo(agg_offset_list)]
        >>> agg_flags_list = [all_error_flags[l:r] for l, r in ut.itertwo(agg_offset_list)]
        >>> assert len(agg_flags_list) == len(flat_offsets) - 1
    """
    grouped_wxs = [
        flat_wxs_assign[left:right] for left, right in ut.itertwo(flat_offsets)
    ]

    # Assume single assignment, aggregate everything
    # across the entire database
    flat_offsets = np.array(flat_offsets)

    idx_to_dx = (np.searchsorted(
        flat_offsets, np.arange(len(flat_wxs_assign)), side='right') -
                 1).astype(np.int32)

    if isinstance(flat_wxs_assign, np.ma.masked_array):
        wx_list = flat_wxs_assign.T[0].compressed()
    else:
        wx_list = flat_wxs_assign.T[0].ravel()
    unique_wx, groupxs = vt.group_indices(wx_list)

    dim = flat_vecs.shape[1]
    if isinstance(flat_wxs_assign, np.ma.masked_array):
        dx_to_wxs = [np.unique(wxs.compressed()) for wxs in grouped_wxs]
    else:
        dx_to_wxs = [np.unique(wxs.ravel()) for wxs in grouped_wxs]
    dx_to_nagg = [len(wxs) for wxs in dx_to_wxs]
    num_agg_vecs = sum(dx_to_nagg)
    # all_agg_wxs = np.hstack(dx_to_wxs)
    agg_offset_list = np.array([0] + ut.cumsum(dx_to_nagg))
    # Preallocate agg residuals for all dxs
    all_agg_vecs = np.empty((num_agg_vecs, dim), dtype=np.float32)
    all_agg_vecs[:, :] = np.nan

    # precompute agg residual stack
    i_to_dxs = vt.apply_grouping(idx_to_dx, groupxs)
    subgroup = [vt.group_indices(dxs) for dxs in ut.ProgIter(i_to_dxs)]
    i_to_unique_dxs = ut.take_column(subgroup, 0)
    i_to_dx_groupxs = ut.take_column(subgroup, 1)
    num_words = len(unique_wx)

    # Overall this takes 5 minutes and 21 seconds
    # I think the other method takes about 12 minutes
    for i in ut.ProgIter(range(num_words), 'agg'):
        wx = unique_wx[i]
        xs = groupxs[i]
        dxs = i_to_unique_dxs[i]
        dx_groupxs = i_to_dx_groupxs[i]
        word = words[wx:wx + 1]

        offsets1 = agg_offset_list.take(dxs)
        offsets2 = [np.where(dx_to_wxs[dx] == wx)[0][0] for dx in dxs]
        offsets = np.add(offsets1, offsets2, out=offsets1)

        # if __debug__:
        #     assert np.bincount(dxs).max() < 2
        #     offset = agg_offset_list[dxs[0]]
        #     assert np.all(dx_to_wxs[dxs[0]] == all_agg_wxs[offset:offset +
        #                                                    dx_to_nagg[dxs[0]]])

        # Compute residuals
        rvecs = flat_vecs[xs] - word
        vt.normalize(rvecs, axis=1, out=rvecs)
        rvecs[np.all(np.isnan(rvecs), axis=1)] = 0
        # Aggregate across same images
        grouped_rvecs = vt.apply_grouping(rvecs, dx_groupxs, axis=0)
        agg_rvecs_ = [rvec_group.sum(axis=0) for rvec_group in grouped_rvecs]
        # agg_rvecs = np.vstack(agg_rvecs_)
        all_agg_vecs[offsets, :] = agg_rvecs_

    assert not np.any(np.isnan(all_agg_vecs))
    logger.info('Apply normalization')
    vt.normalize(all_agg_vecs, axis=1, out=all_agg_vecs)
    all_error_flags = np.all(np.isnan(all_agg_vecs), axis=1)
    all_agg_vecs[all_error_flags, :] = 0

    # ndocs_per_word1 = np.array(ut.lmap(len, wx_to_unique_dxs))
    # ndocs_total1 = len(flat_offsets) - 1
    # idf1 = smk_funcs.inv_doc_freq(ndocs_total1, ndocs_per_word1)

    tup = all_agg_vecs, all_error_flags, agg_offset_list
    return tup
Ejemplo n.º 16
0
def draw_bayesian_model(model,
                        evidence={},
                        soft_evidence={},
                        fnum=None,
                        pnum=None,
                        **kwargs):

    from pgmpy.models import BayesianModel
    if not isinstance(model, BayesianModel):
        model = model.to_bayesian_model()

    import plottool as pt
    import networkx as nx
    kwargs = kwargs.copy()
    factor_list = kwargs.pop('factor_list', [])

    ttype_colors, ttype_scalars = make_colorcodes(model)

    textprops = {
        'horizontalalignment': 'left',
        'family': 'monospace',
        'size': 8,
    }

    # build graph attrs
    tup = get_node_viz_attrs(model, evidence, soft_evidence, factor_list,
                             ttype_colors, **kwargs)
    node_color, pos_list, pos_dict, takws = tup

    # draw graph
    has_infered = evidence or 'factor_list' in kwargs

    if False:
        fig = pt.figure(fnum=fnum, pnum=pnum, doclf=True)  # NOQA
        ax = pt.gca()
        drawkw = dict(pos=pos_dict,
                      ax=ax,
                      with_labels=True,
                      node_size=1100,
                      node_color=node_color)
        nx.draw(model, **drawkw)
    else:
        # BE VERY CAREFUL
        if 1:
            graph = model.copy()
            graph.__class__ = nx.DiGraph
            graph.graph['groupattrs'] = ut.ddict(dict)
            #graph = model.
            if getattr(graph, 'ttype2_cpds', None) is not None:
                # Add invis edges and ttype groups
                for ttype in model.ttype2_cpds.keys():
                    ttype_cpds = model.ttype2_cpds[ttype]
                    # use defined ordering
                    ttype_nodes = ut.list_getattr(ttype_cpds, 'variable')
                    # ttype_nodes = sorted(ttype_nodes)
                    invis_edges = list(ut.itertwo(ttype_nodes))
                    graph.add_edges_from(invis_edges)
                    nx.set_edge_attributes(
                        graph, 'style',
                        {edge: 'invis'
                         for edge in invis_edges})
                    nx.set_node_attributes(
                        graph, 'groupid',
                        {node: ttype
                         for node in ttype_nodes})
                    graph.graph['groupattrs'][ttype]['rank'] = 'same'
                    graph.graph['groupattrs'][ttype]['cluster'] = False
        else:
            graph = model
        pt.show_nx(graph,
                   layout_kw={'prog': 'dot'},
                   fnum=fnum,
                   pnum=pnum,
                   verbose=0)
        pt.zoom_factory()
        fig = pt.gcf()
        ax = pt.gca()
        pass
    hacks = [
        pt.draw_text_annotations(textprops=textprops, **takw) for takw in takws
        if takw
    ]

    xmin, ymin = np.array(pos_list).min(axis=0)
    xmax, ymax = np.array(pos_list).max(axis=0)
    if 'name' in model.ttype2_template:
        num_names = len(model.ttype2_template['name'].basis)
        num_annots = len(model.ttype2_cpds['name'])
        if num_annots > 4:
            ax.set_xlim((xmin - 40, xmax + 40))
            ax.set_ylim((ymin - 50, ymax + 50))
            fig.set_size_inches(30, 7)
        else:
            ax.set_xlim((xmin - 42, xmax + 42))
            ax.set_ylim((ymin - 50, ymax + 50))
            fig.set_size_inches(23, 7)
        title = 'num_names=%r, num_annots=%r' % (
            num_names,
            num_annots,
        )
    else:
        title = ''
    map_assign = kwargs.get('map_assign', None)

    def word_insert(text):
        return '' if len(text) == 0 else text + ' '

    top_assignments = kwargs.get('top_assignments', None)
    if top_assignments is not None:
        map_assign, map_prob = top_assignments[0]
        if map_assign is not None:
            title += '\n%sMAP: ' % (word_insert(kwargs.get('method', '')))
            title += map_assign + ' @' + '%.2f%%' % (100 * map_prob, )
    if kwargs.get('show_title', True):
        pt.set_figtitle(title, size=14)

    for hack in hacks:
        hack()

    if has_infered:
        # Hack in colorbars
        # if ut.list_type(basis) is int:
        #     pt.colorbar(scalars, colors, lbl='score', ticklabels=np.array(basis) + 1)
        # else:
        #     pt.colorbar(scalars, colors, lbl='score', ticklabels=basis)
        keys = ['name', 'score']
        locs = ['left', 'right']
        for key, loc in zip(keys, locs):
            if key in ttype_colors:
                basis = model.ttype2_template[key].basis
                # scalars =
                colors = ttype_colors[key]
                scalars = ttype_scalars[key]
                pt.colorbar(scalars,
                            colors,
                            lbl=key,
                            ticklabels=basis,
                            ticklocation=loc)
Ejemplo n.º 17
0
def closest_point_on_verts(p, verts):
    import vtool as vt
    candidates = [closest_point_on_line_segment(p, e1, e2) for e1, e2 in ut.itertwo(verts)]
    dists = np.array([vt.L2_sqrd(p, new_pt) for new_pt in candidates])
    new_pts = candidates[dists.argmin()]
    return new_pts
Ejemplo n.º 18
0
def fix_sentences():
    """
    fixtex --fixsent
    """
    text = ut.read_from('main.tex')
    root = latex_parser.LatexDocPart.parse_text(text, debug=None)
    document = root.find_descendant_type('document')
    chapters = list(document.find_descendant_types('chapter'))

    def separate_math(line):
        # Break line into math and english parts
        mathsep = ut.negative_lookbehind(re.escape('\\')) + re.escape('$')
        pos = [0]
        for count, match in enumerate(re.finditer(mathsep, line)):
            pos.append(match.start() if count % 2 == 0 else match.end())
        pos.append(len(line))
        english = []
        math = []
        for count, (l, r) in enumerate(ut.itertwo(pos)):
            if count % 2 == 0 and line[l:r]:
                english.append(line[l:r])
            else:
                math.append(line[l:r])
        return english, math

    def print_acronymn_def(english_line):
        words = re.split('[~\s]', english_line.rstrip('.'))
        words = [w.rstrip(',').rstrip('.') for w in words]
        flag = 0
        for count, word in enumerate(words):
            if re.match('\\([A-Z]+\\)', word):
                ut.cprint(word, 'blue')
                flag = True
        if flag:
            print(re.sub('\\\\cite{[^}]*}', '', line))

    def has_consec_cap_words(words):
        for count, (u, v) in enumerate(ut.itertwo(words)):
            if u[0].isupper() and v[0].isupper():
                if count > 0:
                    return True

    def gen_sentences():
        for chapter in chapters:
            # ut.cprint(chapter.fpath_root(), 'yellow')
            for line in chapter.find_sentences():
                context = {'chapter': chapter}
                yield line, context

    import re
    found = ut.ddict(list)
    for line, context in gen_sentences():
        english, math = separate_math(line)
        english_line = ' '.join(english).replace(',',
                                                 '').rstrip('.').strip(' ')
        words = re.split('[~\s]+', english_line)
        words = [w.rstrip(',').rstrip('.') for w in words]

        if has_consec_cap_words(words):
            print(line)

        # print_acronymn_def(english_line)

        if 'locality sensitive' in line:
            print("LSH NEEDS DASH")

        multicap_words = []
        for count, word in enumerate(words):
            word = word.strip(')').strip('(')
            if sum(c.isupper() for c in word) > 1:
                if word.startswith('\\') and word.endswith('{}'):
                    continue
                if word.startswith('\\Cref') and word.endswith('}'):
                    if count != 0:
                        print("FIX CREF UPPER")
                        print(line)
                    continue
                if word.startswith('\\cref') and word.endswith('}'):
                    if count == 0:
                        print("FIX CREF LOWER")
                        print(line)
                    continue
                if not word.isalpha():
                    continue
                multicap_words.append(word)
        if multicap_words:
            found[context['chapter']].append(multicap_words)
        # print(ut.repr4(ut.dict_hist(found)))

    def english_tokens(line):
        # Break line into math and english parts
        mathsep = ut.negative_lookbehind(re.escape('\\')) + re.escape('$')

        def clean_word(word):
            if word.startswith('``'):
                word = word[2:]
            if word.endswith("''"):
                word = word[:-2]
            return word.strip(',').rstrip('.')

        prev = 0
        tokens = []
        for count, match in enumerate(re.finditer(mathsep, line)):
            if count % 2 == 0:
                curr = match.start()
                english = line[prev:curr]
                parts = re.split('[~\s]+', english)
                parts = (clean_word(p) for p in parts)
                parts = (p for p in parts if p)
                tokens.extend(parts)
            else:
                curr = match.end()
                math = line[prev:curr]
                tokens.append(math)
            prev = curr
        return tokens

    from fixtex.svn_converter.latexparser import DocParser
    from fixtex.svn_converter.docnodes import CaptionNode, FigureNode
    from fixtex.svn_converter.tokenizer import Tokenizer

    def caption_sentences(fpath):
        text = ut.readfrom(fpath)
        tokenstream = Tokenizer(text).tokenize()
        self = DocParser(tokenstream, fpath)
        tree = self.parse()
        for node in tree.walk():
            if isinstance(node, FigureNode):
                for x in node.walk():
                    if isinstance(x, CaptionNode):
                        for sent in ut.split_sentences2(x.resolve()):
                            yield sent

    def gen_cap():
        fpaths = [
            ut.truepath('~/latex/crall-thesis-2017/figdef1.tex'),
            ut.truepath('~/latex/crall-thesis-2017/figdef2.tex'),
            ut.truepath('~/latex/crall-thesis-2017/figdef3.tex'),
            ut.truepath('~/latex/crall-thesis-2017/figdef4.tex'),
            ut.truepath('~/latex/crall-thesis-2017/figdef5.tex'),
        ]
        for fpath in fpaths:
            context = {'fpath': fpath}
            for sent in caption_sentences(fpath):
                yield sent, context

    # Find A, An grammar errors

    # Define special cases:
    cons_sounds = {
        'unit', 'user', 'unique', 'one', 'uniform', 'unified', 'useful'
    }
    vowel_sounds = {'roc', 'mcc', 'lnbnn', 'l1', 'hour'}

    def startswith_vowel_sound(after):
        # do our best guess
        if after.startswith('$'):
            if after[1] == '8':
                return True
            if after[1] == 'x':
                return True
        if after in vowel_sounds:
            return True
        if after in cons_sounds:
            return False
        return after[0] in 'aeiou'

    cmd_map, cmd_map1 = latex_parser.LatexDocPart.read_static_defs()

    simple_cmd_re = re.compile('\\\\[A-Za-z]*{}')

    print('\nCHECK FOR A / AN ERRORS')
    import itertools as it
    generators = [
        # gen_sentences(),
        gen_cap(),
    ]

    for line, context in it.chain(*generators):
        words = english_tokens(line)
        for u, v in ut.itertwo(words):
            v_orig = v
            if simple_cmd_re.match(v):
                key = v[:-2]
                try:
                    v = cmd_map[key]
                except:
                    print(line)
                    raise

            v = v.split('-')[0]
            article = u.lower()
            if article in {'a', 'an'}:
                after = v.lower()
                # TODO ensure v is a singular countable noun
                is_vowel_sound = startswith_vowel_sound(after)

                flag = False
                if article == 'a' and is_vowel_sound:
                    flag = 'after is a consonent sound should start with a'
                if article == 'an' and not is_vowel_sound:
                    flag = 'after is a vowel sound should start with an'

                if flag:
                    print('---------')
                    print(flag)
                    print(article, after)
                    print('{} {}'.format(u, v_orig))
                    print(line)
Ejemplo n.º 19
0
    def mark_unreviewed_above_score_as_correct(qres_wgt):
        selected_qtindex_list = qres_wgt.selectedRows()
        if len(selected_qtindex_list) == 1:
            qtindex = selected_qtindex_list[0]
            # aid1, aid2 = qres_wgt.get_aidpair_from_qtindex(qtindex)
            thresh = qtindex.model().get_header_data('score', qtindex)
            logger.info('thresh = %r' % (thresh, ))

            rows = qres_wgt.review_api.ider()
            scores_ = qres_wgt.review_api.get(
                qres_wgt.review_api.col_name_list.index('score'), rows)
            valid_rows = ut.compress(rows, scores_ >= thresh)
            aids1 = qres_wgt.review_api.get(
                qres_wgt.review_api.col_name_list.index('qaid'), valid_rows)
            aids2 = qres_wgt.review_api.get(
                qres_wgt.review_api.col_name_list.index('aid'), valid_rows)
            # ibs = qres_wgt.ibs
            ibs = qres_wgt.ibs
            am_rowids = ibs.get_annotmatch_rowid_from_undirected_superkey(
                aids1, aids2)
            reviewed = ibs.get_annotmatch_reviewed(am_rowids)
            unreviewed = ut.not_list(reviewed)

            valid_rows = ut.compress(valid_rows, unreviewed)
            aids1 = ut.compress(aids1, unreviewed)
            aids2 = ut.compress(aids2, unreviewed)

            import networkx as nx

            graph = nx.Graph()
            graph.add_edges_from(list(zip(aids1, aids2)),
                                 {'user_thresh_match': True})
            review_groups = list(nx.connected_component_subgraphs(graph))

            changing_aids = list(graph.nodes())
            nids = ibs.get_annot_nids(changing_aids)
            nid2_aids = ut.group_items(changing_aids, nids)
            for nid, aids in nid2_aids.items():
                # Connect all original names in the database to denote merges
                for u, v in ut.itertwo(aids):
                    graph.add_edge(u, v)
            dbside_groups = list(nx.connected_component_subgraphs(graph))

            options = [
                'Accept',
                # 'Review More'
            ]
            msg = (ut.codeblock("""
                There are %d names and %d annotations in this mass review set.
                Mass review has discovered %d internal groups.
                Accepting will induce a database grouping of %d names.
                """) % (
                len(nid2_aids),
                len(changing_aids),
                len(review_groups),
                len(dbside_groups),
            ))

            reply = gt.user_option(msg=msg, options=options)

            if reply == options[0]:
                # This is not the smartest way to group names.
                # Ideally what will happen here, is that reviewed edges will go into
                # the new graph name inference algorithm.
                # then the chosen point will be used as the threshold. Then
                # the graph cut algorithm will be applied.
                logger_ = qres_wgt.logger
                logger_.debug(msg)
                logger_.info('START MASS_THRESHOLD_MERGE')
                logger_.info('num_groups=%d thresh=%r' % (
                    len(dbside_groups),
                    thresh,
                ))
                for count, subgraph in enumerate(dbside_groups):
                    thresh_aid_pairs = [
                        edge for edge, flag in nx.get_edge_attributes(
                            graph, 'user_thresh_match').items() if flag
                    ]
                    thresh_uuid_pairs = ibs.unflat_map(ibs.get_annot_uuids,
                                                       thresh_aid_pairs)
                    aids = list(subgraph.nodes())
                    nids = ibs.get_annot_name_rowids(aids)
                    flags = ut.not_list(ibs.is_aid_unknown(aids))
                    previous_names = ibs.get_name_texts(nids)
                    valid_nids = ut.compress(nids, flags)
                    if len(valid_nids) == 0:
                        merge_nid = ibs.make_next_nids(num=1)[0]
                        type_ = 'new'
                    else:
                        merge_nid = min(valid_nids)
                        type_ = 'existing'

                    # Need to find other non-exemplar / query names that may
                    # need merging
                    other_aids = ibs.get_name_aids(valid_nids)
                    other_aids = set(ut.flatten(other_aids)) - set(aids)
                    other_auuids = ibs.get_annot_uuids(other_aids)
                    other_previous_names = ibs.get_annot_names(other_aids)

                    merge_name = ibs.get_name_texts(merge_nid)
                    annot_uuids = ibs.get_annot_uuids(aids)
                    ###
                    # Set as reviewed (so we dont see them again), but mark it
                    # with a different code to denote that it was a MASS review
                    aid1_list = ut.take_column(thresh_aid_pairs, 0)
                    aid2_list = ut.take_column(thresh_aid_pairs, 1)
                    am_rowids = ibs.add_annotmatch_undirected(
                        aid1_list, aid2_list)
                    ibs.set_annotmatch_reviewer(
                        am_rowids, ['algo:lnbnn_thresh'] * len(am_rowids))

                    logger_.info('START GROUP %d' % (count, ))
                    logger_.info(
                        'GROUP BASED ON %d ANNOT_PAIRS WITH SCORE ABOVE (thresh=%r)'
                        % (
                            len(thresh_uuid_pairs),
                            thresh,
                        ))
                    logger_.debug('(uuid_pairs=%r)' % (thresh_uuid_pairs))
                    logger_.debug('(merge_name=%r)' % (merge_name))
                    logger_.debug(
                        'CHANGE NAME OF %d (annot_uuids=%r) WITH (previous_names=%r) TO (%s) (merge_name=%r)'
                        % (
                            len(annot_uuids),
                            annot_uuids,
                            previous_names,
                            type_,
                            merge_name,
                        ))
                    logger_.debug(
                        'ADDITIONAL CHANGE NAME OF %d (annot_uuids=%r) WITH (previous_names=%r) TO (%s) (merge_name=%r)'
                        % (
                            len(other_auuids),
                            other_auuids,
                            other_previous_names,
                            type_,
                            merge_name,
                        ))
                    logger_.info('END GROUP %d' % (count, ))
                    new_nids = [merge_nid] * len(aids)
                    ibs.set_annot_name_rowids(aids, new_nids)
                logger_.info('END MASS_THRESHOLD_MERGE')
        else:
            logger.info('[context] Multiple %d selection' %
                        (len(selected_qtindex_list), ))
Ejemplo n.º 20
0
 def has_consec_cap_words(words):
     for count, (u, v) in enumerate(ut.itertwo(words)):
         if u[0].isupper() and v[0].isupper():
             if count > 0:
                 return True
Ejemplo n.º 21
0
 def to_networkx(self):
     import utool as ut
     return nx.Graph(ut.itertwo(self.tour))
Ejemplo n.º 22
0
def draw_twoday_count(ibs, visit_info_list_):
    import copy
    visit_info_list = copy.deepcopy(visit_info_list_)

    aids_day1, aids_day2 = ut.take_column(visit_info_list_, 'aids')
    nids_day1, nids_day2 = ut.take_column(visit_info_list_, 'unique_nids')
    resight_nids = ut.isect(nids_day1, nids_day2)

    if False:
        # HACK REMOVE DATA TO MAKE THIS FASTER
        num = 20
        for info in visit_info_list:
            non_resight_nids = list(set(info['unique_nids']) - set(resight_nids))
            sample_nids2 = non_resight_nids[0:num] + resight_nids[:num]
            info['grouped_aids'] = ut.dict_subset(info['grouped_aids'], sample_nids2)
            info['unique_nids'] = sample_nids2

    # Build a graph of matches
    if False:

        debug = False

        for info in visit_info_list:
            edges = []
            grouped_aids = info['grouped_aids']

            aids_list = list(grouped_aids.values())
            ams_list = ibs.get_annotmatch_rowids_in_cliques(aids_list)
            aids1_list = ibs.unflat_map(ibs.get_annotmatch_aid1, ams_list)
            aids2_list = ibs.unflat_map(ibs.get_annotmatch_aid2, ams_list)
            for ams, aids, aids1, aids2 in zip(ams_list, aids_list, aids1_list, aids2_list):
                edge_nodes = set(aids1 + aids2)
                ##if len(edge_nodes) != len(set(aids)):
                #    #print('--')
                #    #print('aids = %r' % (aids,))
                #    #print('edge_nodes = %r' % (edge_nodes,))
                bad_aids = edge_nodes - set(aids)
                if len(bad_aids) > 0:
                    print('bad_aids = %r' % (bad_aids,))
                unlinked_aids = set(aids) - edge_nodes
                mst_links = list(ut.itertwo(list(unlinked_aids) + list(edge_nodes)[:1]))
                bad_aids.add(None)
                user_links = [(u, v) for (u, v) in zip(aids1, aids2) if u not in bad_aids and v not in bad_aids]
                new_edges = mst_links + user_links
                new_edges = [(int(u), int(v)) for u, v in new_edges if u not in bad_aids and v not in bad_aids]
                edges += new_edges
            info['edges'] = edges

        # Add edges between days
        grouped_aids1, grouped_aids2 = ut.take_column(visit_info_list, 'grouped_aids')
        nids_day1, nids_day2 = ut.take_column(visit_info_list, 'unique_nids')
        resight_nids = ut.isect(nids_day1, nids_day2)

        resight_aids1 = ut.take(grouped_aids1, resight_nids)
        resight_aids2 = ut.take(grouped_aids2, resight_nids)
        #resight_aids3 = [list(aids1) + list(aids2) for aids1, aids2 in zip(resight_aids1, resight_aids2)]

        ams_list = ibs.get_annotmatch_rowids_between_groups(resight_aids1, resight_aids2)
        aids1_list = ibs.unflat_map(ibs.get_annotmatch_aid1, ams_list)
        aids2_list = ibs.unflat_map(ibs.get_annotmatch_aid2, ams_list)

        between_edges = []
        for ams, aids1, aids2, rawaids1, rawaids2 in zip(ams_list, aids1_list, aids2_list, resight_aids1, resight_aids2):
            link_aids = aids1 + aids2
            rawaids3 = rawaids1 + rawaids2
            badaids = ut.setdiff(link_aids, rawaids3)
            assert not badaids
            user_links = [(int(u), int(v)) for (u, v) in zip(aids1, aids2)
                          if u is not None and v is not None]
            # HACK THIS OFF
            user_links = []
            if len(user_links) == 0:
                # Hack in an edge
                between_edges += [(rawaids1[0], rawaids2[0])]
            else:
                between_edges += user_links

        assert np.all(0 == np.diff(np.array(ibs.unflat_map(ibs.get_annot_nids, between_edges)), axis=1))

        import plottool_ibeis as pt
        import networkx as nx
        #pt.qt4ensure()
        #len(list(nx.connected_components(graph1)))
        #print(ut.graph_info(graph1))

        # Layout graph
        layoutkw = dict(
            prog='neato',
            draw_implicit=False, splines='line',
            #splines='curved',
            #splines='spline',
            #sep=10 / 72,
            #prog='dot', rankdir='TB',
        )

        def translate_graph_to_origin(graph):
            x, y, w, h = ut.get_graph_bounding_box(graph)
            ut.translate_graph(graph, (-x, -y))

        def stack_graphs(graph_list, vert=False, pad=None):
            graph_list_ = [g.copy() for g in graph_list]
            for g in graph_list_:
                translate_graph_to_origin(g)
            bbox_list = [ut.get_graph_bounding_box(g) for g in graph_list_]
            if vert:
                dim1 = 3
                dim2 = 2
            else:
                dim1 = 2
                dim2 = 3
            dim1_list = np.array([bbox[dim1] for bbox in bbox_list])
            dim2_list = np.array([bbox[dim2] for bbox in bbox_list])
            if pad is None:
                pad = np.mean(dim1_list) / 2
            offset1_list = ut.cumsum([0] + [d + pad for d in dim1_list[:-1]])
            max_dim2 = max(dim2_list)
            offset2_list = [(max_dim2 - d2) / 2 for d2 in dim2_list]
            if vert:
                t_xy_list = [(d2, d1) for d1, d2 in zip(offset1_list, offset2_list)]
            else:
                t_xy_list = [(d1, d2) for d1, d2 in zip(offset1_list, offset2_list)]

            for g, t_xy in zip(graph_list_, t_xy_list):
                ut.translate_graph(g, t_xy)
                nx.set_node_attributes(g, name='pin', values='true')

            new_graph = nx.compose_all(graph_list_)
            #pt.show_nx(new_graph, layout='custom', node_labels=False, as_directed=False)  # NOQA
            return new_graph

        # Construct graph
        for count, info in enumerate(visit_info_list):
            graph = nx.Graph()
            edges = [(int(u), int(v)) for u, v in info['edges']
                     if u is not None and v is not None]
            graph.add_edges_from(edges, attr_dict={'zorder': 10})
            nx.set_node_attributes(graph, name='zorder', values=20)

            # Layout in neato
            _ = pt.nx_agraph_layout(graph, inplace=True, **layoutkw)  # NOQA

            # Extract components and then flatten in nid ordering
            ccs = list(nx.connected_components(graph))
            root_aids = []
            cc_graphs = []
            for cc_nodes in ccs:
                cc = graph.subgraph(cc_nodes)
                try:
                    root_aids.append(list(ut.nx_source_nodes(cc.to_directed()))[0])
                except nx.NetworkXUnfeasible:
                    root_aids.append(list(cc.nodes())[0])
                cc_graphs.append(cc)

            root_nids = ibs.get_annot_nids(root_aids)
            nid2_graph = dict(zip(root_nids, cc_graphs))

            resight_nids_ = set(resight_nids).intersection(set(root_nids))
            noresight_nids_ = set(root_nids) - resight_nids_

            n_graph_list = ut.take(nid2_graph, sorted(noresight_nids_))
            r_graph_list = ut.take(nid2_graph, sorted(resight_nids_))

            if len(n_graph_list) > 0:
                n_graph = nx.compose_all(n_graph_list)
                _ = pt.nx_agraph_layout(n_graph, inplace=True, **layoutkw)  # NOQA
                n_graphs = [n_graph]
            else:
                n_graphs = []

            r_graphs = [stack_graphs(chunk) for chunk in ut.ichunks(r_graph_list, 100)]
            if count == 0:
                new_graph = stack_graphs(n_graphs + r_graphs, vert=True)
            else:
                new_graph = stack_graphs(r_graphs[::-1] + n_graphs, vert=True)

            #pt.show_nx(new_graph, layout='custom', node_labels=False, as_directed=False)  # NOQA
            info['graph'] = new_graph

        graph1_, graph2_ = ut.take_column(visit_info_list, 'graph')
        if False:
            _ = pt.show_nx(graph1_, layout='custom', node_labels=False, as_directed=False)  # NOQA
            _ = pt.show_nx(graph2_, layout='custom', node_labels=False, as_directed=False)  # NOQA

        graph_list = [graph1_, graph2_]
        twoday_graph = stack_graphs(graph_list, vert=True, pad=None)
        nx.set_node_attributes(twoday_graph, name='pin', values='true')

        if debug:
            ut.nx_delete_None_edge_attr(twoday_graph)
            ut.nx_delete_None_node_attr(twoday_graph)
            print('twoday_graph(pre) info' + ut.repr3(ut.graph_info(twoday_graph), nl=2))

        # Hack, no idea why there are nodes that dont exist here
        between_edges_ = [edge for edge in between_edges
                          if twoday_graph.has_node(edge[0]) and twoday_graph.has_node(edge[1])]

        twoday_graph.add_edges_from(between_edges_, attr_dict={'alpha': .2, 'zorder': 0})
        ut.nx_ensure_agraph_color(twoday_graph)

        layoutkw['splines'] = 'line'
        layoutkw['prog'] = 'neato'
        agraph = pt.nx_agraph_layout(twoday_graph, inplace=True, return_agraph=True, **layoutkw)[-1]  # NOQA
        if False:
            fpath = ut.truepath('~/ggr_graph.png')
            agraph.draw(fpath)
            ut.startfile(fpath)

        if debug:
            print('twoday_graph(post) info' + ut.repr3(ut.graph_info(twoday_graph)))

        _ = pt.show_nx(twoday_graph, layout='custom', node_labels=False, as_directed=False)  # NOQA
Ejemplo n.º 23
0
def make_expanded_input_graph(graph, target):
    """
    Starting from the `target` property we trace all possible paths in the
    `graph` back to all sources.

    Args:
        graph (nx.DiMultiGraph): the dependency graph with a single source.
        target (str): a single target node in graph

    Notes:
        Each edge in the graph must have a `local_input_id` that defines the
        type of edge it is: (eg one-to-many, one-to-one, nwise/multi).

        # Step 1: Extracting the Relevant Subgraph
        We start by searching for all sources of the graph (we assume there is
        only one). Then we extract the subgraph defined by all edges between
        the sources and the target.  We augment this graph with a dummy super
        source `s` and super sink `t`. This allows us to associate an edge with
        the real source and sink.

        # Step 2: Trace all paths from `s` to `t`.
        Create a set of all paths from the source to the sink and accumulate
        the `local_input_id` of each edge along the path. This will uniquely
        identify each path. We use a hack to condense the accumualated ids in
        order to display them nicely.

        # Step 3: Create the new `exi_graph`
        Using the traced paths with ids we construct a new graph representing
        expanded inputs. The nodes in the original graph will be copied for each
        unique path that passes through the node. We identify these nodes using
        the accumulated ids built along the edges in our path set.  For each
        path starting from the target we add each node augmented with the
        accumulated ids on its output(?) edge. We also add the edges along
        these paths which results in the final `exi_graph`.

        # Step 4: Identify valid inputs candidates
        The purpose of this graph is to identify which inputs are needed
        to compute dependant properties. One valid set of inputs is all
        sources of the graph. However, sometimes it is preferable to specify
        a model that may have been trained from many inputs. Therefore any
        node with a one-to-many input edge may also be specified as an input.

        # Step 5: Identify root-most inputs
        The user will only specify one possible set of the inputs. We refer  to
        this set as the "root-most" inputs. This is a set of candiate nodes
        such that all paths from the sink to the super source are blocked.  We
        default to the set of inputs which results in the fewest dependency
        computations. However this is arbitary.

        The last step that is not represented here is to compute the order that
        the branches must be specified in when given to the depcache for a
        computation.

    Returns:
        nx.DiGraph: exi_graph: the expanded input graph

    Notes:
        All * nodes are defined to be distinct.
        TODO: To make a * node non-distinct it must be suffixed with an
        identifier.

    CommandLine:
        python -m dtool.input_helpers make_expanded_input_graph --show

    Example:
        >>> # ENABLE_DOCTEST
        >>> from dtool.input_helpers import *  # NOQA
        >>> from dtool.example_depcache2 import * # NOQA
        >>> depc = testdata_depc3()
        >>> table = depc['smk_match']
        >>> table = depc['vsone']
        >>> graph = table.depc.explicit_graph.copy()
        >>> target = table.tablename
        >>> exi_graph = make_expanded_input_graph(graph, target)
        >>> x = list(exi_graph.nodes())[0]
        >>> print('x = %r' % (x,))
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> pt.show_nx(graph, fnum=1, pnum=(1, 2, 1))
        >>> pt.show_nx(exi_graph, fnum=1, pnum=(1, 2, 2))
        >>> ut.show_if_requested()
    """

    # FIXME: this does not work correctly when
    # The nesting of non-1-to-1 dependencies is greater than 2 (I think)
    # algorithm for finding inputs does not work.

    # FIXME: two vocabs have the same edge id, they should be the same in the
    # Expanded Input Graph as well. Their accum_id needs to be changed.

    def condense_accum_ids(rinput_path_id):
        # Hack to condense and consolidate graph sources
        prev = None
        compressed = []
        for item in rinput_path_id:
            if item == '1' and prev is not None:
                pass  # done append ones
            elif item != prev:
                compressed.append(item)
            prev = item
        #if len(compressed) > 1 and compressed[0] in ['1', '*']:
        if len(compressed) > 1 and compressed[0] == '1':
            compressed = compressed[1:]
        compressed = tuple(compressed)
        return compressed

    BIG_HACK = True

    #BIG_HACK = False

    def condense_accum_ids_stars(rinput_path_id):
        # Hack to condense and consolidate graph sources
        rcompressed = []
        has_star = False
        # Remove all but the final star (this is a really bad hack)
        for item in reversed(rinput_path_id):
            is_star = '*' in item
            if not (is_star and has_star):
                if not has_star:
                    rcompressed.append(item)
            has_star = has_star or is_star
        compressed = tuple(rcompressed[::-1])
        return compressed

    def accumulate_input_ids(edge_list):
        """
        python -m dtool.example_depcache2 testdata_depc4 --show
        """
        edge_data = ut.take_column(edge_list, 3)
        # We are accumulating local input ids
        toaccum_list_ = ut.dict_take_column(edge_data, 'local_input_id')
        if BIG_HACK and True:
            v_list = ut.take_column(edge_list, 1)
            # show the local_input_ids at the entire level
            pred_ids = ([[
                x['local_input_id']
                for x in list(graph.pred[node].values())[0].values()
            ] if len(graph.pred[node]) else [] for node in v_list])
            toaccum_list = [
                x + ':' + ';'.join(y) for x, y in zip(toaccum_list_, pred_ids)
            ]
        else:
            toaccum_list = toaccum_list_

        # Default dumb accumulation
        accum_ids_ = ut.cumsum(zip(toaccum_list), tuple())
        accum_ids = ut.lmap(condense_accum_ids, accum_ids_)
        if BIG_HACK:
            accum_ids = ut.lmap(condense_accum_ids_stars, accum_ids)
            accum_ids = [('t', ) + x for x in accum_ids]
        ut.dict_set_column(edge_data, 'accum_id', accum_ids)
        return accum_ids

    sources = list(ut.nx_source_nodes(graph))
    print(sources)
    # assert len(sources) == 1, 'expected a unique source'
    source = sources[0]

    graph = graph.subgraph(ut.nx_all_nodes_between(graph, source,
                                                   target)).copy()
    # Remove superfluous data
    ut.nx_delete_edge_attr(
        graph,
        [
            'edge_type',
            'isnwise',
            'nwise_idx',
            # 'parent_colx',
            'ismulti'
        ])

    # Make all '*' edges have distinct local_input_id's.
    # TODO: allow non-distinct suffixes
    count = ord('a')
    for edge in graph.edges(keys=True, data=True):
        dat = edge[3]
        if dat['local_input_id'] == '*':
            dat['local_input_id'] = '*' + chr(count)
            dat['taillabel'] = '*' + chr(count)
            count += 1

    # Augment with dummy super source/sink nodes
    source_input = 'source_input'
    target_output = 'target_output'
    graph.add_edge(source_input, source, local_input_id='s', taillabel='1')
    graph.add_edge(target, target_output, local_input_id='t', taillabel='1')

    # Find all paths from the table to the source.
    paths_to_source = ut.all_multi_paths(graph,
                                         source_input,
                                         target_output,
                                         data=True)

    # Build expanded input graph
    # The inputs to this table can be derived from this graph.
    # The output is a new expanded input graph.
    exi_graph = nx.DiGraph()
    for path in paths_to_source:
        # Accumlate unique identifiers along the reversed path
        edge_list = ut.reverse_path_edges(path)
        accumulate_input_ids(edge_list)

        # A node's output(?) on this path determines its expanded branch id
        exi_nodes = [
            ExiNode(v, BranchId(d['accum_id'], k, d.get('parent_colx', -1)))
            for u, v, k, d in edge_list[:-1]
        ]
        exi_node_to_label = {
            node: node[0] + '[' + ','.join([str(x) for x in node[1]]) + ']'
            for node in exi_nodes
        }
        exi_graph.add_nodes_from(exi_nodes)
        nx.set_node_attributes(exi_graph,
                               name='label',
                               values=exi_node_to_label)

        # Undo any accumulation ordering and remove dummy nodes
        old_edges = ut.reverse_path_edges(edge_list[1:-1])
        new_edges = ut.reverse_path_edges(list(ut.itertwo(exi_nodes)))
        for new_edge, old_edge in zip(new_edges, old_edges):
            u2, v2 = new_edge[:2]
            d = old_edge[3]
            taillabel = d['taillabel']
            parent_colx = d.get('parent_colx', -1)
            if not exi_graph.has_edge(u2, v2):
                exi_graph.add_edge(u2,
                                   v2,
                                   taillabel=taillabel,
                                   parent_colx=parent_colx)

    sink_nodes = list(ut.nx_sink_nodes(exi_graph))
    source_nodes = list(ut.nx_source_nodes(exi_graph))
    assert len(sink_nodes) == 1, 'expected a unique sink'
    sink_node = sink_nodes[0]

    # First identify if a node is root_specifiable
    node_dict = ut.nx_node_dict(exi_graph)
    for node in exi_graph.nodes():
        root_specifiable = False
        # for edge in exi_graph.in_edges(node, keys=True):
        for edge in exi_graph.in_edges(node):
            # key = edge[-1]
            # assert key == 0, 'multi di graph is necessary'
            edata = exi_graph.get_edge_data(*edge)
            if edata.get('taillabel').startswith('*'):
                if node != sink_node:
                    root_specifiable = True
        if exi_graph.in_degree(node) == 0:
            root_specifiable = True
        node_dict[node]['root_specifiable'] = root_specifiable

    # Need to specify any combo of red nodes such that
    # 1) for each path from a (leaf) to the (root) there is exactly one red
    # node along that path.  This garentees that all inputs are gievn.
    path_list = ut.flatten([
        nx.all_simple_paths(exi_graph, source_node, sink_node)
        for source_node in source_nodes
    ])
    rootmost_nodes = set([])
    for path in path_list:
        flags = [node_dict[node]['root_specifiable'] for node in path]
        valid_nodes = ut.compress(path, flags)
        rootmost_nodes.add(valid_nodes[-1])
    # Rootmost nodes are the ones specifiable by default when computing the
    # normal property.
    for node in rootmost_nodes:
        node_dict[node]['rootmost'] = True

    # We actually need to hack away any root-most nodes that have another
    # rootmost node as the parent.  Otherwise, this would cause constraints in
    # what the user could specify as valid input combinations.
    # ie: specify a vocab and an index, but the index depends on the vocab.
    # this forces the user to specify the vocab that was the parent of the index
    # the user should either just specify the index and have the vocab inferred
    # or for now, we just dont allow this to happen.
    nx.get_node_attributes(exi_graph, 'rootmost')

    recolor_exi_graph(exi_graph, rootmost_nodes)
    return exi_graph
Ejemplo n.º 24
0
def demodata_bridge():
    # define 2-connected compoments and bridges
    cc2 = [(1, 2, 4, 3, 1, 4), (8, 9, 10, 8), (11, 12, 13, 11)]
    bridges = [(4, 8), (3, 5), (20, 21), (22, 23, 24)]
    G = nx.Graph(ut.flatten(ut.itertwo(path) for path in cc2 + bridges))
    return G
Ejemplo n.º 25
0
    def _enrich_matches_lnbnn(extr, matches, other_aids, other_nids,
                              inplace=False):
        """
        Given a set of one-vs-one matches, searches for LNBNN normalizers in a
        larger database to enrich the matches with database-level
        distinctiveness.
        """
        from ibeis.algo.hots import nn_weights
        raise NotImplementedError('havent tested since the re-work. '
                                  'Need to ensure that things work correctly.')
        ibs = extr.ibs
        cfgdict = {
            'can_match_samename': False,
            'can_match_sameimg': True,
            'K': 3,
            'Knorm': 3,
            'prescore_method': 'csum',
            'score_method': 'csum'
        }
        custom_nid_lookup = ut.dzip(other_aids, other_nids)
        aids = [m.annot2['aid'] for m in matches]
        qreq_ = ibs.new_query_request(aids, other_aids, cfgdict=cfgdict,
                                      custom_nid_lookup=custom_nid_lookup,
                                      verbose=extr.verbose >= 2)

        qreq_.load_indexer()
        indexer = qreq_.indexer
        if not inplace:
            matches_ = [match.copy() for match in matches]
        else:
            matches_ = matches
        K = qreq_.qparams.K
        Knorm = qreq_.qparams.Knorm
        normalizer_rule  = qreq_.qparams.normalizer_rule

        extr.print('Stacking vecs for batch lnbnn matching')
        offset_list = np.cumsum([0] + [match_.fm.shape[0] for match_ in matches_])
        stacked_vecs = np.vstack([
            match_.matched_vecs2()
            for match_ in ut.ProgIter(matches_, label='stack matched vecs')
        ])

        vecs = stacked_vecs
        num = (K + Knorm)
        idxs, dists = indexer.batch_knn(vecs, num, chunksize=8192,
                                        label='lnbnn scoring')

        idx_list = [idxs[l:r] for l, r in ut.itertwo(offset_list)]
        dist_list = [dists[l:r] for l, r in ut.itertwo(offset_list)]
        iter_ = zip(matches_, idx_list, dist_list)
        prog = ut.ProgIter(iter_, length=len(matches_), label='lnbnn scoring')
        for match_, neighb_idx, neighb_dist in prog:
            qaid = match_.annot2['aid']
            norm_k = nn_weights.get_normk(qreq_, qaid, neighb_idx, Knorm,
                                          normalizer_rule)
            ndist = vt.take_col_per_row(neighb_dist, norm_k)
            vdist = match_.local_measures['match_dist']
            lnbnn_dist = nn_weights.lnbnn_fn(vdist, ndist)
            lnbnn_clip_dist = np.clip(lnbnn_dist, 0, np.inf)
            match_.local_measures['lnbnn_norm_dist'] = ndist
            match_.local_measures['lnbnn'] = lnbnn_dist
            match_.local_measures['lnbnn_clip'] = lnbnn_clip_dist
            match_.fs = lnbnn_dist
        return matches_
Ejemplo n.º 26
0
aug_graph = graph.copy()

# remove cut edges from augmented graph
edge_to_iscut = nx.get_edge_attributes(aug_graph, 'is_cut')
cut_edges = [
    (u, v)
    for (u, v, d) in aug_graph.edges(data=True)
    if not (d.get('is_cut') or d.get('decision', 'unreviewed') in ['nomatch'])
]
cut_edges = [edge for edge, flag in edge_to_iscut.items() if flag]
aug_graph.remove_edges_from(cut_edges)


# Enumerate cliques inside labels
unflat_edges = [list(ut.itertwo(nodes)) for nodes in label_to_nodes.values()]
node_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]]

# Remove candidate MST edges that exist in the original graph
orig_edges = list(aug_graph.edges())
candidate_mst_edges = [edge for edge in node_pairs if not aug_graph.has_edge(*edge)]
# randomness prevents chains and visually looks better
rng = np.random.RandomState(42)


def _randint():
    return 0
    return rng.randint(0, 100)


aug_graph.add_edges_from(candidate_mst_edges)
Ejemplo n.º 27
0
def nx_transitive_reduction(G, mode=1):
    """
    References:
        https://en.wikipedia.org/wiki/Transitive_reduction#Computing_the_reduction_using_the_closure
        http://dept-info.labri.fr/~thibault/tmp/0201008.pdf
        http://stackoverflow.com/questions/17078696/im-trying-to-perform-the-transitive-reduction-of-directed-graph-in-python

    CommandLine:
        python -m utool.util_graph nx_transitive_reduction --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from utool.util_graph import *  # NOQA
        >>> import utool as ut
        >>> import networkx as nx
        >>> G = nx.DiGraph([('a', 'b'), ('a', 'c'), ('a', 'e'),
        >>>                 ('a', 'd'), ('b', 'd'), ('c', 'e'),
        >>>                 ('d', 'e'), ('c', 'e'), ('c', 'd')])
        >>> G = testdata_graph()[1]
        >>> G_tr = nx_transitive_reduction(G, mode=1)
        >>> G_tr2 = nx_transitive_reduction(G, mode=1)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> G_ = nx.dag.transitive_closure(G)
        >>> pt.show_nx(G    , pnum=(1, 5, 1), fnum=1)
        >>> pt.show_nx(G_tr , pnum=(1, 5, 2), fnum=1)
        >>> pt.show_nx(G_tr2 , pnum=(1, 5, 3), fnum=1)
        >>> pt.show_nx(G_   , pnum=(1, 5, 4), fnum=1)
        >>> pt.show_nx(nx.dag.transitive_closure(G_tr), pnum=(1, 5, 5), fnum=1)
        >>> ut.show_if_requested()
    """

    import utool as ut
    import networkx as nx
    has_cycles = not nx.is_directed_acyclic_graph(G)
    if has_cycles:
        # FIXME: this does not work for cycle graphs.
        # Need to do algorithm on SCCs
        G_orig = G
        G = nx.condensation(G_orig)

    nodes = list(G.nodes())
    node2_idx = ut.make_index_lookup(nodes)

    # For each node u, perform DFS consider its set of (non-self) children C.
    # For each descendant v, of a node in C, remove any edge from u to v.

    if mode == 1:
        G_tr = G.copy()

        for parent in G_tr.nodes():
            # Remove self loops
            if G_tr.has_edge(parent, parent):
                G_tr.remove_edge(parent, parent)
            # For each child of the parent
            for child in list(G_tr.successors(parent)):
                # Preorder nodes includes its argument (no added complexity)
                for gchild in list(G_tr.successors(child)):
                    # Remove all edges from parent to non-child descendants
                    for descendant in nx.dfs_preorder_nodes(G_tr, gchild):
                        if G_tr.has_edge(parent, descendant):
                            G_tr.remove_edge(parent, descendant)

        if has_cycles:
            # Uncondense graph
            uncondensed_G_tr = G.__class__()
            mapping = G.graph['mapping']
            uncondensed_G_tr.add_nodes_from(mapping.keys())
            inv_mapping = ut.invert_dict(mapping, unique_vals=False)
            for u, v in G_tr.edges():
                u_ = inv_mapping[u][0]
                v_ = inv_mapping[v][0]
                uncondensed_G_tr.add_edge(u_, v_)

            for key, path in inv_mapping.items():
                if len(path) > 1:
                    directed_cycle = list(ut.itertwo(path, wrap=True))
                    uncondensed_G_tr.add_edges_from(directed_cycle)
            G_tr = uncondensed_G_tr

    else:

        def make_adj_matrix(G):
            edges = list(G.edges())
            edge2_idx = ut.partial(ut.dict_take, node2_idx)
            uv_list = ut.lmap(edge2_idx, edges)
            A = np.zeros((len(nodes), len(nodes)))
            A[tuple(np.array(uv_list).T)] = 1
            return A

        G_ = nx.dag.transitive_closure(G)

        A = make_adj_matrix(G)
        B = make_adj_matrix(G_)

        #AB = A * B
        #AB = A.T.dot(B)
        AB = A.dot(B)
        #AB = A.dot(B.T)

        A_and_notAB = np.logical_and(A, np.logical_not(AB))
        tr_uvs = np.where(A_and_notAB)

        #nodes = G.nodes()
        edges = list(zip(*ut.unflat_take(nodes, tr_uvs)))

        G_tr = G.__class__()
        G_tr.add_nodes_from(nodes)
        G_tr.add_edges_from(edges)

        if has_cycles:
            # Uncondense graph
            uncondensed_G_tr = G.__class__()
            mapping = G.graph['mapping']
            uncondensed_G_tr.add_nodes_from(mapping.keys())
            inv_mapping = ut.invert_dict(mapping, unique_vals=False)
            for u, v in G_tr.edges():
                u_ = inv_mapping[u][0]
                v_ = inv_mapping[v][0]
                uncondensed_G_tr.add_edge(u_, v_)

            for key, path in inv_mapping.items():
                if len(path) > 1:
                    directed_cycle = list(ut.itertwo(path, wrap=True))
                    uncondensed_G_tr.add_edges_from(directed_cycle)
            G_tr = uncondensed_G_tr
    return G_tr
Ejemplo n.º 28
0
def draw_bayesian_model(model, evidence={}, soft_evidence={}, fnum=None,
                        pnum=None, **kwargs):

    from pgmpy.models import BayesianModel
    if not isinstance(model, BayesianModel):
        model = model.to_bayesian_model()

    import plottool as pt
    import networkx as nx
    kwargs = kwargs.copy()
    factor_list = kwargs.pop('factor_list', [])

    ttype_colors, ttype_scalars = make_colorcodes(model)

    textprops = {
        'horizontalalignment': 'left', 'family': 'monospace', 'size': 8, }

    # build graph attrs
    tup = get_node_viz_attrs(
        model, evidence, soft_evidence, factor_list, ttype_colors, **kwargs)
    node_color, pos_list, pos_dict, takws = tup

    # draw graph
    has_infered = evidence or 'factor_list' in kwargs

    if False:
        fig = pt.figure(fnum=fnum, pnum=pnum, doclf=True)  # NOQA
        ax = pt.gca()
        drawkw = dict(pos=pos_dict, ax=ax, with_labels=True, node_size=1100,
                      node_color=node_color)
        nx.draw(model, **drawkw)
    else:
        # BE VERY CAREFUL
        if 1:
            graph = model.copy()
            graph.__class__ = nx.DiGraph
            graph.graph['groupattrs'] = ut.ddict(dict)
            #graph = model.
            if getattr(graph, 'ttype2_cpds', None) is not None:
                # Add invis edges and ttype groups
                for ttype in model.ttype2_cpds.keys():
                    ttype_cpds = model.ttype2_cpds[ttype]
                    # use defined ordering
                    ttype_nodes = ut.list_getattr(ttype_cpds, 'variable')
                    # ttype_nodes = sorted(ttype_nodes)
                    invis_edges = list(ut.itertwo(ttype_nodes))
                    graph.add_edges_from(invis_edges)
                    nx.set_edge_attributes(graph, 'style', {edge: 'invis' for edge in invis_edges})
                    nx.set_node_attributes(graph, 'groupid', {node: ttype for node in ttype_nodes})
                    graph.graph['groupattrs'][ttype]['rank'] = 'same'
                    graph.graph['groupattrs'][ttype]['cluster'] = False
        else:
            graph = model
        pt.show_nx(graph, layout_kw={'prog': 'dot'}, fnum=fnum, pnum=pnum, verbose=0)
        pt.zoom_factory()
        fig = pt.gcf()
        ax = pt.gca()
        pass
    hacks = [pt.draw_text_annotations(textprops=textprops, **takw)
             for takw in takws if takw]

    xmin, ymin = np.array(pos_list).min(axis=0)
    xmax, ymax = np.array(pos_list).max(axis=0)
    if 'name' in model.ttype2_template:
        num_names = len(model.ttype2_template['name'].basis)
        num_annots = len(model.ttype2_cpds['name'])
        if num_annots > 4:
            ax.set_xlim((xmin - 40, xmax + 40))
            ax.set_ylim((ymin - 50, ymax + 50))
            fig.set_size_inches(30, 7)
        else:
            ax.set_xlim((xmin - 42, xmax + 42))
            ax.set_ylim((ymin - 50, ymax + 50))
            fig.set_size_inches(23, 7)
        title = 'num_names=%r, num_annots=%r' % (num_names, num_annots,)
    else:
        title = ''
    map_assign = kwargs.get('map_assign', None)

    def word_insert(text):
        return '' if len(text) == 0 else text + ' '

    top_assignments = kwargs.get('top_assignments', None)
    if top_assignments is not None:
        map_assign, map_prob = top_assignments[0]
        if map_assign is not None:
            title += '\n%sMAP: ' % (word_insert(kwargs.get('method', '')))
            title += map_assign + ' @' + '%.2f%%' % (100 * map_prob,)
    if kwargs.get('show_title', True):
        pt.set_figtitle(title, size=14)

    for hack in hacks:
        hack()

    if has_infered:
        # Hack in colorbars
        # if ut.list_type(basis) is int:
        #     pt.colorbar(scalars, colors, lbl='score', ticklabels=np.array(basis) + 1)
        # else:
        #     pt.colorbar(scalars, colors, lbl='score', ticklabels=basis)
        keys = ['name', 'score']
        locs = ['left', 'right']
        for key, loc in zip(keys, locs):
            if key in ttype_colors:
                basis = model.ttype2_template[key].basis
                # scalars =
                colors = ttype_colors[key]
                scalars = ttype_scalars[key]
                pt.colorbar(scalars, colors, lbl=key, ticklabels=basis,
                            ticklocation=loc)