def __call__(self, graph, pzero, vcount=None, length=None, **kwargs): """ :param graph: a subclass of :class:`.AbstractGraph` :param pzero: list of vertex id, or dictionary `{vid: score}` """ v_extract = self.prox_func(graph, pzero, length, **kwargs) v_extract = prox.sortcut(v_extract, vcount) # limit return v_extract
def graph_articles(gid, graph, all_articles=True, cut=200, uuids=[], **kwargs): pz = [(v.index, 1.) for v in graph.vs if v['nodetype'] == ("_%s_article" % gid)] if uuids and len(uuids): vids = [v.index for v in graph.vs.select(uuid_in=uuids)] vs = extract_articles(gid, graph, dict(pz), **kwargs) vs = sortcut(vs, cut + len(vids)) vs = [(v, s) for v, s in vs if v not in vids][:cut] vs = vs + [(v, 1.) for v in vids] else: vs = extract_articles(gid, graph, dict(pz), **kwargs) vs = sortcut(vs, cut) if all_articles: vs = pz + vs return graph.subgraph(dict(vs).keys())
def expand_subgraph(graph, expand, nodes, length=4, cut=100, weightings=None): pz = {} uuids = {v['uuid']: v.index for v in graph.vs} pz.update({uuids[p]: 1. / len(nodes) for p in nodes}) pz.update({uuids[p]: 1. for p in expand}) weightings = ["1"] if weightings in ([], None) else weightings wneighbors = _weights(weightings) vs = pure_prox(graph, pz, length, wneighbors) vs = sortcut(vs, cut) return vs
def search(self, p0, nb_results, l): """ retrive a 'nb_results' number of vertices by random walk starting from p0 """ from cello.graphs import prox global_graph = self.graph #TODO: choix de la méthode d'extraction #TODO: forcer la reflexivité ou pas neighbors_fct = lambda graph, vtx: graph.neighbors(vtx) + [vtx] pline = prox.prox_markov(global_graph, p0, neighbors_fct=neighbors_fct, l=l) v_extract = prox.sortcut(pline, nb_results) return v_extract
def __call__(self, pzero, vcount=None, length=None, add_loops=None, mode=None, is_wgt=None, **kwargs): kwargs["add_loops"] = add_loops kwargs["loops_weight"] = self._loops_weight kwargs["mode"] = self._modes["text_to_num"][mode] if self._wgt is not None and is_wgt == True: kwargs["weight"] = self._wgt v_extract = self.prox_func(self.global_graph, pzero, length, **kwargs) v_extract = prox.sortcut(v_extract, vcount) # limit return v_extract
def extract(graph, pz, cut=50, weighting=None, length=3, **kwargs): wneighbors = _weights(weighting) vs = pure_prox(graph, pz, length, wneighbors) vs = sortcut(vs, cut) return vs
def main(): """ re-Index all the Proxteam corpus """ from pprint import pprint parser = argparse.ArgumentParser() parser.add_argument("--host", action='store', help="host", default="http://*****:*****@ %s \n " % (args.gid, args.host) bot = Botagraph(args.host, args.key) gid = args.gid if args.username and args.password: bot.authenticate(args.username, args.password) if args.infos: pprint(bot.get_graph(gid)) return # read / parse graph print "\n * Reading %s" % args.path graph = igraph.read(args.path) # subgraph if args.cut > 0: print " ** cut %s based on degree()" % args.cut # cut method based on degree n = int(args.cut) vs = list((v.index, v.degree()) for v in graph.vs) vs = sorted(vs, key=lambda x: x[1], reverse=True) vs = vs[:n] graph = graph.subgraph([v[0] for v in vs]) elif args.gl > 0: from cello.graphs.prox import prox_markov_dict, sortcut, ALL n = int(args.gl) extract = prox_markov_dict(graph, range(graph.vcount()), 80, add_loops=True) vs = [i for i, v in sortcut(extract, n)] print "vs", vs graph = graph.subgraph(vs) print graph.summary() graph.es['a'] = [1 for i in xrange(graph.vcount())] if not bot.has_graph(gid): print "\n * Create graph %s" % gid bot.create_graph( gid, { 'description': "Dicosyn experiment\n * ", 'image': "", 'tags': ['synonymes', 'dictionnaire'] }) print "\n * Get schema '%s'" % gid schema = bot.get_schema(gid)['schema'] nodetypes = {n['name']: n for n in schema['nodetypes']} edgetypes = {e['name']: e for e in schema['edgetypes']} print "\n nodetypes: ", nodetypes.keys() print "\n edgetypes: ", edgetypes.keys() if not "word" in nodetypes: print "\n\n * Creating node type %s" % "word" props = {"label": Text(), "lang": Text()} bot.post_nodetype(gid, "word", "no description", props) if not "is_syn" in edgetypes: print "\n\n * Creating edge type %s" % "is_syn" bot.post_edgetype(gid, "is_syn", "no desc", {"a": Text()}) schema = bot.get_schema(gid)['schema'] nodetypes = {n['name']: n for n in schema['nodetypes']} edgetypes = {e['name']: e for e in schema['edgetypes']} print nodetypes print edgetypes idx = {} if args.wait: raw_input("press <enter> key to start edges and nodes importation") if args.seed: def set_node(v): if v['label'] not in idx: node = bot.post_node( gid, node_payload(v, nodetypes['word']['uuid'])) idx[v['label']] = node['uuid'] print "inserting %s %s" % (v['label'], node['uuid']) idx = {} v1 = None # seeds grow into beautiful flowers while graph.vcount() > 0: v1 = graph.vs[0] if v1 is None else v1 size = graph.vcount() nei = v1.neighbors() if not len(nei): graph.delete_vertices([v1.index]) v1 = None continue for i in range(min([5, len(nei)])): nei = v1.neighbors() if i >= len(nei): if graph.vcount(): r = randint(0, graph.vcount() - 1) v1 = graph.vs[r] break r = randint(0, len(nei) - 1) v2 = nei[r] print "inserting edge %s %s" % (v1['label'], v2['label']) set_node(v1) set_node(v2) eid = graph.get_eid(v1.index, v2.index) src, tgt = idx[v1['label']], idx[v2['label']] uuid = bot.post_edge( gid, edge_payload(edgetypes['is_syn']['uuid'], src, tgt, {})) # delete from graph # * inserted edges # * nodes with no more edges graph.delete_edges([eid]) delete_nodes = [ v.index for v in (v1, v2) if len(graph.neighbors(v)) == 0 ] if len(delete_nodes): graph.delete_vertices(delete_nodes) if graph.vcount(): r = randint(0, graph.vcount() - 1) # switch v1 v1 = graph.vs[r] else: break # wait sometimes pause(args.pause) else: print "posting nodes" count = 0 fail = 0 for node, uuid in bot.post_nodes( gid, gen_nodes(graph, nodetypes['word']['uuid'])): if not uuid: fail += 1 else: count += 1 idx[node['properties']['label']] = uuid print "%s nodes inserted " % count #print "iterate over nodes" #for node in bot.find_all_nodes(gid, "word", {}): #pass # post edges print "posting edges" count = 0 fail = 0 inv_idx = {v: k for k, v in idx.iteritems()} for obj, uuid in bot.post_edges( gid, gen_edges(graph, edgetypes['is_syn']['uuid'], idx)): if not uuid: fail += 1 else: count += 1 # wait sometimes pause(args.pause) print "%s edges inserted, %s failed " % (count, fail)
def main(): """ re-Index all the Proxteam corpus """ from pprint import pprint parser = argparse.ArgumentParser() parser.add_argument("--host", action='store', help="host", default="http://*****:*****@ %s \n " % (args.gid, args.host) bot = Botagraph(args.host, args.key) gid = args.gid if args.username and args.password: bot.authenticate(args.username, args.password) if args.infos: pprint( bot.get_graph(gid) ) return # read / parse graph print "\n * Reading %s" % args.path graph = igraph.read(args.path) # subgraph if args.cut > 0: print " ** cut %s based on degree()" % args.cut # cut method based on degree n = int(args.cut) vs = list( (v.index, v.degree() ) for v in graph.vs ) vs = sorted( vs, key=lambda x: x[1], reverse = True ) vs = vs[:n] graph = graph.subgraph( [ v[0] for v in vs ] ) elif args.gl > 0: from cello.graphs.prox import prox_markov_dict, sortcut, ALL n = int(args.gl) extract = prox_markov_dict(graph, range(graph.vcount()), 80, add_loops=True) vs = [ i for i,v in sortcut(extract,n)] print "vs", vs graph = graph.subgraph( vs ) print graph.summary() graph.es['a'] = [ 1 for i in xrange(graph.vcount() ) ] if not bot.has_graph(gid) : print "\n * Create graph %s" % gid bot.create_graph(gid, { 'description':"Dicosyn experiment\n * ", 'image': "", 'tags': ['synonymes', 'dictionnaire'] } ) print "\n * Get schema '%s'" % gid schema = bot.get_schema(gid)['schema'] nodetypes = { n['name']:n for n in schema['nodetypes'] } edgetypes = { e['name']:e for e in schema['edgetypes'] } print "\n nodetypes: ", nodetypes.keys() print "\n edgetypes: ", edgetypes.keys() if not "word" in nodetypes: print "\n\n * Creating node type %s" % "word" props = { "label" : Text(), "lang" : Text() } bot.post_nodetype(gid, "word", "no description", props) if not "is_syn" in edgetypes: print "\n\n * Creating edge type %s" % "is_syn" bot.post_edgetype(gid, "is_syn", "no desc", {"a":Text()}) schema = bot.get_schema(gid)['schema'] nodetypes = { n['name']:n for n in schema['nodetypes'] } edgetypes = { e['name']:e for e in schema['edgetypes'] } print nodetypes print edgetypes idx = {} if args.wait : raw_input("press <enter> key to start edges and nodes importation") if args.seed: def set_node(v): if v['label'] not in idx: node = bot.post_node(gid, node_payload(v, nodetypes['word']['uuid'])) idx[ v['label'] ] = node['uuid'] print "inserting %s %s" % (v['label'] , node['uuid']) idx = {} v1 = None # seeds grow into beautiful flowers while graph.vcount() > 0: v1 = graph.vs[0] if v1 is None else v1 size = graph.vcount() nei = v1.neighbors() if not len(nei): graph.delete_vertices([v1.index]) v1 = None continue for i in range( min([5,len(nei)]) ): nei = v1.neighbors() if i >= len(nei): if graph.vcount(): r = randint(0,graph.vcount()-1) v1 = graph.vs[r] break r = randint(0,len(nei)-1) v2 = nei[r] print "inserting edge %s %s" % (v1['label'] , v2['label']) set_node(v1) set_node(v2) eid = graph.get_eid(v1.index, v2.index) src, tgt = idx[v1['label']], idx[v2['label']] uuid = bot.post_edge(gid, edge_payload(edgetypes['is_syn']['uuid'], src, tgt, {})) # delete from graph # * inserted edges # * nodes with no more edges graph.delete_edges([eid]) delete_nodes = [ v.index for v in (v1, v2) if len(graph.neighbors(v)) == 0 ] if len(delete_nodes): graph.delete_vertices(delete_nodes) if graph.vcount(): r = randint(0,graph.vcount()-1) # switch v1 v1 = graph.vs[r] else: break # wait sometimes pause(args.pause) else : print "posting nodes" count = 0 fail = 0 for node, uuid in bot.post_nodes( gid, gen_nodes(graph, nodetypes['word']['uuid']) ): if not uuid: fail += 1 else : count += 1 idx[node['properties']['label']] = uuid print "%s nodes inserted " % count #print "iterate over nodes" #for node in bot.find_all_nodes(gid, "word", {}): #pass # post edges print "posting edges" count = 0 fail = 0 inv_idx = { v:k for k,v in idx.iteritems() } for obj, uuid in bot.post_edges( gid, gen_edges(graph, edgetypes['is_syn']['uuid'], idx) ): if not uuid: fail += 1 else : count += 1 # wait sometimes pause(args.pause) print "%s edges inserted, %s failed " % (count, fail)
def _prox(pzero): pl = prox.prox_markov_dict(graph, pzero, 3, add_loops=True) cut = prox.sortcut(pl, 500) return cut
def index(es_index, cut_local=500, cut_global=-1, lcc=False, start=0, offset=0, **kwargs): """ :param cut_global: <int> global vector cut -1 to keep all :param cut_local: <int> local vector cut -1 to keep all """ path = kwargs['path'] name = kwargs['name'] lang = kwargs['lang'] pos = kwargs['pos'] # completion from other resource goes to inputs completion = kwargs['completion'] if completion is None: completion = lambda lang, pos, text: text graph = igraph.read(path) if lcc: graph = graph.clusters().giant() print(graph.summary()) # { idx : (rank, prox) } pg = prox.prox_markov_dict(graph, [], 4, add_loops=True) pg = prox.sortcut(pg, cut_global) pg = { e[0]: (rank+1, e[1]) for rank, e in enumerate(pg) } def _prox(pzero): pl = prox.prox_markov_dict(graph, pzero, 3, add_loops=True) cut = prox.sortcut(pl, 500) return cut def iter_vertices(): count = 0 for i, k in enumerate(pg): if i < start: continue if offset and count >= offset: break count +=1 vtx = graph.vs[k] label = vtx['label'] neighborhood = graph.neighborhood(vtx) body = { 'gid' : k, 'graph': name, 'lang': lang, 'pos' : pos, 'form': vtx['label'], 'neighbors': len(neighborhood), 'neighborhood': neighborhood, 'prox': _prox([k]), 'rank': pg[k][0], 'form_suggest': { "input": completion(lang, pos, label), "output": "/".join( (name, lang, pos, label)), "context": { 'prefix': ["*", lang, pos, '%s.%s'%(lang, pos) ] }, "weight" : len(neighborhood), "payload": { 'graph': name, 'lang': lang, 'pos' : pos, 'form' : label } } } line = "%s %s %s/%s %s %s" % (name, k, i, graph.vcount(), len(neighborhood), label) line = line.encode('utf8') print(line) yield body es_index.add_documents(iter_vertices())