예제 #1
0
 def __call__(self, graph, pzero, vcount=None, length=None, **kwargs):
     """
     :param graph: a subclass of :class:`.AbstractGraph`
     :param pzero: list of vertex id, or dictionary `{vid: score}`
     """
     v_extract = self.prox_func(graph, pzero, length, **kwargs)
     v_extract = prox.sortcut(v_extract, vcount)  # limit
     return v_extract
예제 #2
0
def graph_articles(gid, graph, all_articles=True, cut=200, uuids=[], **kwargs):

    pz = [(v.index, 1.) for v in graph.vs
          if v['nodetype'] == ("_%s_article" % gid)]

    if uuids and len(uuids):
        vids = [v.index for v in graph.vs.select(uuid_in=uuids)]
        vs = extract_articles(gid, graph, dict(pz), **kwargs)
        vs = sortcut(vs, cut + len(vids))
        vs = [(v, s) for v, s in vs if v not in vids][:cut]
        vs = vs + [(v, 1.) for v in vids]
    else:
        vs = extract_articles(gid, graph, dict(pz), **kwargs)
        vs = sortcut(vs, cut)

    if all_articles:
        vs = pz + vs

    return graph.subgraph(dict(vs).keys())
예제 #3
0
def expand_subgraph(graph, expand, nodes, length=4, cut=100, weightings=None):
    pz = {}
    uuids = {v['uuid']: v.index for v in graph.vs}
    pz.update({uuids[p]: 1. / len(nodes) for p in nodes})
    pz.update({uuids[p]: 1. for p in expand})

    weightings = ["1"] if weightings in ([], None) else weightings
    wneighbors = _weights(weightings)

    vs = pure_prox(graph, pz, length, wneighbors)
    vs = sortcut(vs, cut)

    return vs
예제 #4
0
 def search(self, p0, nb_results, l):
     """ retrive a 'nb_results' number of vertices by random walk starting from p0
     """
     from cello.graphs import prox
     global_graph = self.graph
     #TODO: choix de la méthode d'extraction
     #TODO: forcer la reflexivité ou pas
     neighbors_fct = lambda graph, vtx: graph.neighbors(vtx) + [vtx]
     pline = prox.prox_markov(global_graph,
                              p0,
                              neighbors_fct=neighbors_fct,
                              l=l)
     v_extract = prox.sortcut(pline, nb_results)
     return v_extract
예제 #5
0
    def __call__(self,
                 pzero,
                 vcount=None,
                 length=None,
                 add_loops=None,
                 mode=None,
                 is_wgt=None,
                 **kwargs):
        kwargs["add_loops"] = add_loops
        kwargs["loops_weight"] = self._loops_weight
        kwargs["mode"] = self._modes["text_to_num"][mode]

        if self._wgt is not None and is_wgt == True:
            kwargs["weight"] = self._wgt

        v_extract = self.prox_func(self.global_graph, pzero, length, **kwargs)
        v_extract = prox.sortcut(v_extract, vcount)  # limit
        return v_extract
예제 #6
0
def extract(graph, pz, cut=50, weighting=None, length=3, **kwargs):
    wneighbors = _weights(weighting)
    vs = pure_prox(graph, pz, length, wneighbors)
    vs = sortcut(vs, cut)
    return vs
예제 #7
0
def main():
    """ re-Index all the Proxteam corpus """
    from pprint import pprint

    parser = argparse.ArgumentParser()

    parser.add_argument("--host",
                        action='store',
                        help="host",
                        default="http://*****:*****@ %s \n  " % (args.gid, args.host)
    bot = Botagraph(args.host, args.key)
    gid = args.gid

    if args.username and args.password:
        bot.authenticate(args.username, args.password)

    if args.infos:
        pprint(bot.get_graph(gid))
        return

    # read / parse graph
    print "\n * Reading %s" % args.path

    graph = igraph.read(args.path)

    # subgraph
    if args.cut > 0:
        print " ** cut %s based on degree()" % args.cut
        # cut method based on degree
        n = int(args.cut)
        vs = list((v.index, v.degree()) for v in graph.vs)
        vs = sorted(vs, key=lambda x: x[1], reverse=True)
        vs = vs[:n]
        graph = graph.subgraph([v[0] for v in vs])

    elif args.gl > 0:
        from cello.graphs.prox import prox_markov_dict, sortcut, ALL
        n = int(args.gl)
        extract = prox_markov_dict(graph,
                                   range(graph.vcount()),
                                   80,
                                   add_loops=True)
        vs = [i for i, v in sortcut(extract, n)]
        print "vs", vs
        graph = graph.subgraph(vs)

    print graph.summary()
    graph.es['a'] = [1 for i in xrange(graph.vcount())]

    if not bot.has_graph(gid):
        print "\n * Create graph %s" % gid
        bot.create_graph(
            gid, {
                'description': "Dicosyn experiment\n * ",
                'image': "",
                'tags': ['synonymes', 'dictionnaire']
            })

    print "\n * Get schema '%s'" % gid
    schema = bot.get_schema(gid)['schema']
    nodetypes = {n['name']: n for n in schema['nodetypes']}
    edgetypes = {e['name']: e for e in schema['edgetypes']}

    print "\n nodetypes: ", nodetypes.keys()
    print "\n edgetypes: ", edgetypes.keys()

    if not "word" in nodetypes:

        print "\n\n * Creating node type %s" % "word"
        props = {"label": Text(), "lang": Text()}
        bot.post_nodetype(gid, "word", "no description", props)

    if not "is_syn" in edgetypes:
        print "\n\n * Creating edge type %s" % "is_syn"
        bot.post_edgetype(gid, "is_syn", "no desc", {"a": Text()})

    schema = bot.get_schema(gid)['schema']
    nodetypes = {n['name']: n for n in schema['nodetypes']}
    edgetypes = {e['name']: e for e in schema['edgetypes']}

    print nodetypes
    print edgetypes

    idx = {}

    if args.wait:
        raw_input("press <enter> key to start edges and nodes importation")

    if args.seed:

        def set_node(v):
            if v['label'] not in idx:
                node = bot.post_node(
                    gid, node_payload(v, nodetypes['word']['uuid']))
                idx[v['label']] = node['uuid']
                print "inserting %s %s" % (v['label'], node['uuid'])

        idx = {}
        v1 = None

        # seeds grow into beautiful flowers

        while graph.vcount() > 0:

            v1 = graph.vs[0] if v1 is None else v1

            size = graph.vcount()

            nei = v1.neighbors()
            if not len(nei):
                graph.delete_vertices([v1.index])
                v1 = None
                continue

            for i in range(min([5, len(nei)])):

                nei = v1.neighbors()

                if i >= len(nei):
                    if graph.vcount():
                        r = randint(0, graph.vcount() - 1)
                        v1 = graph.vs[r]
                    break

                r = randint(0, len(nei) - 1)
                v2 = nei[r]

                print "inserting edge %s %s" % (v1['label'], v2['label'])

                set_node(v1)
                set_node(v2)

                eid = graph.get_eid(v1.index, v2.index)
                src, tgt = idx[v1['label']], idx[v2['label']]

                uuid = bot.post_edge(
                    gid, edge_payload(edgetypes['is_syn']['uuid'], src, tgt,
                                      {}))

                # delete  from graph
                # * inserted edges
                # * nodes with no more edges

                graph.delete_edges([eid])

                delete_nodes = [
                    v.index for v in (v1, v2) if len(graph.neighbors(v)) == 0
                ]

                if len(delete_nodes):
                    graph.delete_vertices(delete_nodes)

                    if graph.vcount():
                        r = randint(0, graph.vcount() - 1)
                        # switch v1
                        v1 = graph.vs[r]

                    else:
                        break

            # wait sometimes
            pause(args.pause)

    else:

        print "posting nodes"
        count = 0
        fail = 0
        for node, uuid in bot.post_nodes(
                gid, gen_nodes(graph, nodetypes['word']['uuid'])):
            if not uuid:
                fail += 1
            else:
                count += 1
                idx[node['properties']['label']] = uuid

        print "%s nodes inserted " % count

        #print "iterate over nodes"
        #for node in bot.find_all_nodes(gid, "word", {}):
        #pass

        # post edges
        print "posting edges"
        count = 0
        fail = 0

        inv_idx = {v: k for k, v in idx.iteritems()}

        for obj, uuid in bot.post_edges(
                gid, gen_edges(graph, edgetypes['is_syn']['uuid'], idx)):
            if not uuid:
                fail += 1
            else:
                count += 1

            # wait sometimes
            pause(args.pause)

        print "%s edges inserted, %s failed " % (count, fail)
예제 #8
0
def main():
    """ re-Index all the Proxteam corpus """
    from pprint import pprint
    
    parser = argparse.ArgumentParser()
    
    parser.add_argument("--host", action='store', help="host", default="http://*****:*****@ %s \n  " % (args.gid, args.host)
    bot = Botagraph(args.host, args.key)
    gid =  args.gid

    if args.username and args.password:
        bot.authenticate(args.username, args.password)

    if args.infos:
        pprint( bot.get_graph(gid) )
        return 


    # read / parse graph
    print "\n * Reading %s" % args.path
    
    graph = igraph.read(args.path)

    # subgraph
    if args.cut > 0:
        print " ** cut %s based on degree()" % args.cut
        # cut method based on degree
        n = int(args.cut)
        vs = list( (v.index, v.degree() ) for v in  graph.vs )
        vs = sorted( vs, key=lambda x: x[1], reverse = True )
        vs = vs[:n]
        graph = graph.subgraph( [  v[0] for v in vs ] )
        
    elif args.gl > 0:
        from cello.graphs.prox import prox_markov_dict, sortcut, ALL
        n = int(args.gl)
        extract = prox_markov_dict(graph, range(graph.vcount()), 80, add_loops=True)
        vs =  [ i for i,v in sortcut(extract,n)]
        print "vs", vs
        graph = graph.subgraph( vs )
        
         

    print graph.summary()
    graph.es['a'] = [ 1 for i in xrange(graph.vcount() ) ]


    
    if not bot.has_graph(gid) :
        print "\n * Create graph %s" % gid
        bot.create_graph(gid, { 'description':"Dicosyn experiment\n * ",
                                'image': "",
                                'tags': ['synonymes', 'dictionnaire']
                              }
                        )
                        
    print "\n * Get schema '%s'" % gid
    schema = bot.get_schema(gid)['schema']
    nodetypes = { n['name']:n for n in schema['nodetypes'] }
    edgetypes = { e['name']:e for e in schema['edgetypes'] }

    print "\n nodetypes: ", nodetypes.keys()
    print "\n edgetypes: ", edgetypes.keys()

    if not "word" in nodetypes:
         
        print "\n\n * Creating node type %s" % "word"
        props = { "label" : Text(),
                  "lang"  : Text()
                }
        bot.post_nodetype(gid, "word",  "no description", props)

    if not "is_syn" in edgetypes:
        print "\n\n * Creating edge type %s" % "is_syn"
        bot.post_edgetype(gid, "is_syn", "no desc", {"a":Text()})

    schema = bot.get_schema(gid)['schema']
    nodetypes = { n['name']:n for n in schema['nodetypes'] }
    edgetypes = { e['name']:e for e in schema['edgetypes'] }

    print nodetypes
    print edgetypes

    idx = {}

    if args.wait :
        raw_input("press <enter> key to start edges and nodes importation") 

    if args.seed: 

        def set_node(v):
            if v['label'] not in idx:
               node = bot.post_node(gid, node_payload(v, nodetypes['word']['uuid']))
               idx[ v['label'] ] = node['uuid']
               print "inserting %s %s" % (v['label'] , node['uuid'])

        idx = {}
        v1 = None

        # seeds grow into beautiful flowers 
        
        while graph.vcount() > 0:
            
            v1 = graph.vs[0] if v1 is None else v1
            
            size = graph.vcount()

            nei = v1.neighbors()
            if not len(nei):
                graph.delete_vertices([v1.index])
                v1 = None
                continue

            for i in range( min([5,len(nei)]) ):

                nei = v1.neighbors()
                
                if i >= len(nei):
                    if graph.vcount():
                        r = randint(0,graph.vcount()-1)
                        v1 = graph.vs[r]
                    break
                
                r = randint(0,len(nei)-1)
                v2 = nei[r]
                
                print "inserting edge %s %s" % (v1['label'] , v2['label'])

                set_node(v1)
                set_node(v2)

                eid = graph.get_eid(v1.index, v2.index)
                src, tgt = idx[v1['label']], idx[v2['label']]
                
                uuid = bot.post_edge(gid, edge_payload(edgetypes['is_syn']['uuid'], src, tgt, {}))

                # delete  from graph
                # * inserted edges
                # * nodes with no more edges 
                
                graph.delete_edges([eid])

                delete_nodes =  [ v.index for v in (v1, v2) if len(graph.neighbors(v)) == 0 ]

                if len(delete_nodes):
                    graph.delete_vertices(delete_nodes)
                    
                    if graph.vcount():
                        r = randint(0,graph.vcount()-1)
                        # switch v1
                        v1 = graph.vs[r]

                    else: break

            # wait sometimes
            pause(args.pause)

            
            
            
    else :

        print "posting nodes"
        count = 0
        fail = 0
        for node, uuid in bot.post_nodes( gid, gen_nodes(graph, nodetypes['word']['uuid']) ):
            if not uuid:
                fail += 1
            else :
                count += 1
                idx[node['properties']['label']] = uuid
            
        print "%s nodes inserted " % count
        
        #print "iterate over nodes"
        #for node in bot.find_all_nodes(gid, "word", {}):
            #pass

        # post edges
        print "posting edges"
        count = 0
        fail = 0

        inv_idx = { v:k for k,v in idx.iteritems() }
        
        for obj, uuid in bot.post_edges( gid, gen_edges(graph, edgetypes['is_syn']['uuid'], idx) ):
            if not uuid:
                fail += 1
            else :
                count += 1

            # wait sometimes    
            pause(args.pause)
            
        print "%s edges inserted, %s failed " % (count, fail)
예제 #9
0
 def _prox(pzero):
     pl = prox.prox_markov_dict(graph, pzero, 3, add_loops=True)
     cut = prox.sortcut(pl, 500)
     return cut
예제 #10
0
def index(es_index, cut_local=500, cut_global=-1, lcc=False, start=0, offset=0, **kwargs):
    """
    :param cut_global: <int> global vector cut -1 to keep all
    :param cut_local: <int> local vector cut -1 to keep all
    """
    path = kwargs['path']
    name = kwargs['name']
    lang = kwargs['lang']
    pos  = kwargs['pos']
    # completion from other resource goes to inputs
    completion = kwargs['completion']
    if completion is None:
        completion = lambda lang, pos, text: text

    graph = igraph.read(path)

    if lcc:
        graph = graph.clusters().giant()

    print(graph.summary())

    # { idx : (rank, prox) }    
    pg = prox.prox_markov_dict(graph, [], 4, add_loops=True)
    pg = prox.sortcut(pg, cut_global)
    pg = { e[0]: (rank+1, e[1]) for rank, e in enumerate(pg) }

    def _prox(pzero):
        pl = prox.prox_markov_dict(graph, pzero, 3, add_loops=True)
        cut = prox.sortcut(pl, 500)
        return cut

    def iter_vertices():
        count = 0
        for i, k in enumerate(pg):
            if i < start:
                continue
            if offset and count >= offset:
                break

            count +=1
            vtx = graph.vs[k]
            label = vtx['label']
            neighborhood = graph.neighborhood(vtx)
            body =  {
                'gid' : k,
                'graph': name,
                'lang': lang,
                'pos' : pos,
                'form': vtx['label'],
                'neighbors': len(neighborhood),
                'neighborhood': neighborhood,
                'prox': _prox([k]),
                'rank': pg[k][0],
                'form_suggest': { 
                    "input": completion(lang, pos, label),
                    "output": "/".join( (name, lang, pos, label)),
                    "context": {
                        'prefix': ["*", lang, pos, '%s.%s'%(lang, pos) ]
                    },
                    "weight" : len(neighborhood),
                    "payload": {
                        'graph': name,
                        'lang': lang,
                        'pos' : pos,
                        'form' : label
                    }
                }
            }
            
            line = "%s %s %s/%s %s %s" % (name, k, i, graph.vcount(), len(neighborhood),  label)
            line = line.encode('utf8')
            print(line)

            yield body

    es_index.add_documents(iter_vertices())