def main(): """ re-Index all the Proxteam corpus """ parser = argparse.ArgumentParser() parser.add_argument("--host", action='store', help="host", default="http://localhost:5000") parser.add_argument("--key" , action='store', help="key", default=None) parser.add_argument("--username" , action='store', help="user", default=None) parser.add_argument("--password" , action='store', help="pwd", default=None) parser.add_argument("--path", action='store', help="path", default=None) parser.add_argument("--gid", action='store', help="graph id", default=None) args = parser.parse_args() bot = Botagraph(args.host, args.key) if args.username and args.password: bot.authenticate(args.username, args.password) (Initiales, GSR, Matrix) = load_OCR_file(args.path) if not bot.has_graph(GID): bot.create_graph(GID, {'description': 'a graph of Old Chinese phonology', 'image': "", 'tags': ['chinese','phonology']}) bot.post_nodetype(GID, 'GSR', 'Karlgren', {}) bot.post_nodetype(GID, 'Initial', '', {}) bot.post_edgetype(GID, 'Sinograms', '',{}) print "Get schema '%s'" % GID schema = bot.get_schema(GID)['schema'] nodetypes = { n['name']:n for n in schema['nodetypes'] } edgetypes = { e['name']:e for e in schema['edgetypes'] } Inidict = {} for i, (_, uuid) in enumerate(bot.post_nodes(GID, ({'nodetype':nodetypes['Initial']['uuid'], 'properties':{'label':i}} for i in Initiales))): Inidict[Initiales[i]] = uuid GSRdict = {} for i, (_, uuid) in enumerate(bot.post_nodes(GID,({'nodetype':nodetypes['GSR']['uuid'], 'properties': {'label':s}} for s in GSR))): GSRdict[GSR[i]] = uuid edges = [{'edgetype':edgetypes['Sinograms']['uuid'], 'properties':{'label': u",".join(sinos)}, 'source': GSRdict[gsr], 'target': Inidict[i]} for (i, gsr), sinos in Matrix.iteritems()] #for e in edges: # print e # bot.post_edge(GID, e) for _ in bot.post_edges(GID, iter(edges)): pass
def main(): """ """ parser = argparse.ArgumentParser() parser.add_argument("--host", action='store', help="host", default="http://localhost:5000") parser.add_argument("--key", action='store', help="key", default=None) parser.add_argument("--username", action='store', help="user", default=None) parser.add_argument("--password", action='store', help="pwd", default=None) parser.add_argument("--path", action='store', help="path", default=None) parser.add_argument("--gid", action='store', help="graph id", default=None) args = parser.parse_args() # Bot creation & login key = open(args.key, 'r').read().strip() print "\n * Connecting to %s \n " % args.host bot = Botagraph(args.host, key) # read / parse graph print "\n * Reading %s" % args.path g = Graph(args.gid) gid = g.gid if bot.has_graph(gid): bot.delete_graph(gid) if not bot.has_graph(gid): print "\n * Create graph %s" % gid bot.create_graph(gid, g.attrs) print "\n * Creating node type %s" % "Country" props = {"code": Text(), "label": Text()} bot.post_nodetype(gid, "Country", "Country ", props) print "\n * Creating edge type %s" % "alliance" props = { 'id': Numeric(), 'label': Text(), 'starts': Text(), 'ends': Text(), 'defense': Numeric(), 'neutrality': Numeric(), 'nonaggression': Numeric(), 'entente': Numeric(), } bot.post_edgetype(gid, "alliance", "alliance terms", props) schema = bot.get_schema(gid)['schema'] nodetypes = {n['name']: n for n in schema['nodetypes']} edgetypes = {e['name']: e for e in schema['edgetypes']} import csv with open(args.path, 'rb') as csvfile: reader = csv.reader(csvfile, delimiter=',', quotechar='"') for i, row in enumerate(reader): # undirected if i == 0: continue if i % 2 != 0: continue node = lambda x: { 'nodetype': nodetypes["Country"]['uuid'], 'properties': { 'code': x[0], 'label': x[1] } } g.vs[row[1]] = node(row[1:3]) g.vs[row[3]] = node(row[3:5]) left_censor, right_censor = row[11:13] alliances = "defense neutrality nonaggression entente".split() es = dict(zip(alliances, row[13:17])) es['starts'] = "/".join(row[5:8]) es['ends'] = "/".join(row[8:11]) es['id'] = row[0] es['label'] = " ".join([a for a in alliances if es[a] in (1, "1")]) es.update() g.es[i] = { 'edgetype': edgetypes["alliance"]['uuid'], 'source': row[1], 'target': row[3], 'properties': es } print len(g.vs), len(g.es) idx = {} for node, uuid in bot.post_nodes(gid, g.vs.itervalues()): idx[node['properties']['code']] = uuid bot.star_nodes(gid, idx.values()) print "%s nodes inserted . " % (len(idx)) for edge in g.es.itervalues(): edge['source'] = idx[edge['source']] edge['target'] = idx[edge['target']] fail = count = 0 for edge, uuid in bot.post_edges(gid, g.es.itervalues()): if not uuid: fail += 1 else: count += 1 print "%s edges inserted, %s failed " % (count, fail)
'url': url} yield {'nodetype': pad_nodetype, 'properties': props} def getEdgeIterator(node_idx): for f in glob.glob("./hackpad-backup-g0v/*.html"): page_id = node_idx[f.rsplit("/",1)[-1][:-5]] with open(f) as F: html = open(f).read() bs = BS(html) for url in [x['href'] for x in bs.findAll("a")]: m = re_link.search(url) if m: link = m.group(1) target_id = node_idx.get(link,None) if target_id is not None: yield { 'edgetype': link_edgetype, 'source': page_id, 'target': target_id, 'properties': {}} # Posting Nodes node_idx = {} for node, uuid in bot.post_nodes(GRAPHNAME, getNodeIterator()): nid = node['properties']['id'] node_idx[nid] = uuid # Posting Edges list(bot.post_edges(GRAPHNAME, getEdgeIterator(node_idx)))
def main(): """ re-Index all the Proxteam corpus """ parser = argparse.ArgumentParser() parser.add_argument("--host", action='store', help="host", default="http://localhost:5000") parser.add_argument("--key", action='store', help="key", default=None) parser.add_argument("--username", action='store', help="user", default=None) parser.add_argument("--password", action='store', help="pwd", default=None) parser.add_argument("--path", action='store', help="path", default=None) parser.add_argument("--gid", action='store', help="graph id", default=None) args = parser.parse_args() # Bot creation & login bot = Botagraph(args.host, args.key) if args.username and args.password: bot.authenticate(args.username, args.password) # read / parse graph graph = py2neo.Graph() # create empty graph gid = args.gid if not bot.has_graph(gid): print "create graph %s" % gid bot.create_graph( gid, { 'description': SCHEMA["description"], 'image': "", 'tags': SCHEMA['tags'] }) for infos in SCHEMA["Nodes"]: print "create node type %s" % infos["type"] bot.post_nodetype(gid, *buildType(infos)) for infos in SCHEMA["Links"]: print "create edge type %s" % infos["type"] bot.post_edgetype(gid, *buildType(infos)) print "Get schema '%s'" % gid schema = bot.get_schema(gid)['schema'] nodetypes = {n['name']: n for n in schema['nodetypes']} edgetypes = {e['name']: e for e in schema['edgetypes']} print nodetypes print edgetypes idx = {} print "posting nodes" count = 0 fail = 0 for infos in SCHEMA['Nodes']: for node, uuid in bot.post_nodes( gid, gen_nodes(graph, nodetypes[infos['type']]['uuid'], infos)): if not uuid: fail += 1 else: count += 1 idx[node['properties'][SCHEMA['key']]] = uuid print "%s nodes inserted " % count # post edges print "posting edges" count = 0 fail = 0 inv_idx = {v: k for k, v in idx.iteritems()} for infos in SCHEMA['Links']: for obj, uuid in bot.post_edges( gid, gen_edges(graph, edgetypes[infos['type']]['uuid'], idx, infos)): if not uuid: fail += 1 else: count += 1 #print "%s [ %s -- %s --> %s ] " % ( uuid, inv_idx.get(obj['source'], None) , "syn", inv_idx.get(obj['target'], None) ) print "%s edges inserted, %s failed " % (count, fail)
def main(): """ re-Index all the Proxteam corpus """ parser = argparse.ArgumentParser() parser.add_argument("--host", action='store', help="host", default="http://localhost:5000") parser.add_argument("--key" , action='store', help="key", default=None) parser.add_argument("--username" , action='store', help="user", default=None) parser.add_argument("--password" , action='store', help="pwd", default=None) parser.add_argument("--path", action='store', help="path", default=None) parser.add_argument("--gid", action='store', help="graph id", default=None) args = parser.parse_args() # Bot creation & login bot = Botagraph(args.host, args.key) if args.username and args.password: bot.authenticate(args.username, args.password) # read / parse graph graph = py2neo.Graph() # create empty graph gid = args.gid if not bot.has_graph(gid) : print "create graph %s" % gid bot.create_graph(gid, { 'description': SCHEMA["description"], 'image': "", 'tags': SCHEMA['tags'] } ) for infos in SCHEMA["Nodes"]: print "create node type %s" % infos["type"] bot.post_nodetype(gid, *buildType(infos)) for infos in SCHEMA["Links"]: print "create edge type %s" % infos["type"] bot.post_edgetype(gid, *buildType(infos)) print "Get schema '%s'" % gid schema = bot.get_schema(gid)['schema'] nodetypes = { n['name']:n for n in schema['nodetypes'] } edgetypes = { e['name']:e for e in schema['edgetypes'] } print nodetypes print edgetypes idx = {} print "posting nodes" count = 0 fail = 0 for infos in SCHEMA['Nodes']: for node, uuid in bot.post_nodes( gid, gen_nodes(graph, nodetypes[infos['type']]['uuid'], infos) ): if not uuid: fail += 1 else : count += 1 idx[node['properties'][SCHEMA['key']]] = uuid print "%s nodes inserted " % count # post edges print "posting edges" count = 0 fail = 0 inv_idx = { v:k for k,v in idx.iteritems() } for infos in SCHEMA['Links']: for obj, uuid in bot.post_edges( gid, gen_edges(graph, edgetypes[infos['type']]['uuid'], idx, infos) ): if not uuid: fail += 1 else : count += 1 #print "%s [ %s -- %s --> %s ] " % ( uuid, inv_idx.get(obj['source'], None) , "syn", inv_idx.get(obj['target'], None) ) print "%s edges inserted, %s failed " % (count, fail)
def main(): """ re-Index all the Proxteam corpus """ parser = argparse.ArgumentParser() parser.add_argument("--host", action='store', help="host", default="http://localhost:5000") parser.add_argument("--key", action='store', help="key", default=None) parser.add_argument("--username", action='store', help="user", default=None) parser.add_argument("--password", action='store', help="pwd", default=None) parser.add_argument("--gid", action='store', help="graph id", default=None) parser.add_argument("files", action='store', default=None, nargs='+') args = parser.parse_args() print " analysing {}".format("".join(args.files)) # Bot creation & login bot = Botagraph(args.host, args.key) if args.username and args.password: bot.authenticate(args.username, args.password) gid = args.gid if not bot.has_graph(gid): print "create graph %s" % gid bot.create_graph(gid, "no description") print "create node type %s" % "file" props = {"label": Text()} bot.post_nodetype(gid, "file", "no description", props) bot.post_nodetype(gid, "message", "no description", props) print "create edge type %s" % "is_syn" bot.post_edgetype(gid, "listen", "no desc", {}) bot.post_edgetype(gid, "trigger", "no desc", {}) print "Get schema '%s'" % gid schema = bot.get_schema(gid)['schema'] nodetypes = {n['name']: n for n in schema['nodetypes']} edgetypes = {e['name']: e for e in schema['edgetypes']} nodetype_file = nodetypes['file']['uuid'] nodetype_message = nodetypes['message']['uuid'] edgetype_listen = edgetypes['listen']['uuid'] edgetype_trigger = edgetypes['trigger']['uuid'] vs = {} es = [] for js_file in args.files: if js_file.endswith('min.js'): #ignore minified js continue with codecs.open(js_file, 'r', 'utf8') as FILE: js_file = re.sub('^[./]+', '', js_file) vs[js_file] = {'label': js_file, 'nodetype': nodetype_file} for l in FILE: #remove comments: l = l.strip().split('//', 1)[0] m = re.search(r'listenTo\([^"]*"([a-z-]+)"', l) if m: message = m.group(1) if message not in vs: vs[message] = { 'label': message, 'nodetype': nodetype_message } es.append((js_file, edgetype_listen, message)) m = re.search(r'trigger\([^"]*"([a-z-]+)"', l) if m: message = m.group(1) if message not in vs: vs[message] = { 'label': message, 'nodetype': nodetype_message } es.append((js_file, edgetype_trigger, message)) idx = {} print "posting nodes" count = 0 fail = 0 for node, uuid in bot.post_nodes(gid, gen_nodes(vs)): if not uuid: fail += 1 else: count += 1 idx[node['properties']['label']] = uuid print "%s nodes inserted " % count #print "iterate over nodes" #for node in bot.find_all_nodes(gid, "word", {}): #pass#print node # post edges print "posting edges" count = 0 fail = 0 inv_idx = {v: k for k, v in idx.iteritems()} for _, uuid in bot.post_edges(gid, gen_edges(es, idx)): if not uuid: fail += 1 else: count += 1 #print "%s [ %s -- %s --> %s ] " % ( uuid, inv_idx.get(obj['source'], None) , "syn", inv_idx.get(obj['target'], None) ) print "%s edges inserted, %s failed " % (count, fail)
def main(): """ re-Index all the Proxteam corpus """ parser = argparse.ArgumentParser() parser.add_argument("--host", action='store', help="host", default="http://localhost:5000") parser.add_argument("--key", action='store', help="key", default=None) parser.add_argument("--username", action='store', help="user", default=None) parser.add_argument("--password", action='store', help="pwd", default=None) parser.add_argument("--path", action='store', help="path", default=None) parser.add_argument("--gid", action='store', help="graph id", default=None) args = parser.parse_args() bot = Botagraph(args.host, args.key) if args.username and args.password: bot.authenticate(args.username, args.password) (Initiales, GSR, Matrix) = load_OCR_file(args.path) if not bot.has_graph(GID): bot.create_graph( GID, { 'description': 'a graph of Old Chinese phonology', 'image': "", 'tags': ['chinese', 'phonology'] }) bot.post_nodetype(GID, 'GSR', 'Karlgren', {}) bot.post_nodetype(GID, 'Initial', '', {}) bot.post_edgetype(GID, 'Sinograms', '', {}) print "Get schema '%s'" % GID schema = bot.get_schema(GID)['schema'] nodetypes = {n['name']: n for n in schema['nodetypes']} edgetypes = {e['name']: e for e in schema['edgetypes']} Inidict = {} for i, (_, uuid) in enumerate( bot.post_nodes(GID, ({ 'nodetype': nodetypes['Initial']['uuid'], 'properties': { 'label': i } } for i in Initiales))): Inidict[Initiales[i]] = uuid GSRdict = {} for i, (_, uuid) in enumerate( bot.post_nodes(GID, ({ 'nodetype': nodetypes['GSR']['uuid'], 'properties': { 'label': s } } for s in GSR))): GSRdict[GSR[i]] = uuid edges = [{ 'edgetype': edgetypes['Sinograms']['uuid'], 'properties': { 'label': u",".join(sinos) }, 'source': GSRdict[gsr], 'target': Inidict[i] } for (i, gsr), sinos in Matrix.iteritems()] #for e in edges: # print e # bot.post_edge(GID, e) for _ in bot.post_edges(GID, iter(edges)): pass
props = {'id': pad_id, 'label': title} yield {'nodetype': pad_nodetype, 'properties': props} names = set([n.strip() for names in data.findAll("name") for n in names.text.split(",")]) for n in names: yield {'nodetype': author_nodetype, 'properties': {'label': n, 'id': n}} def getEdgeIterator(node_idx): for entry in data.findAll("entry"): pad_id = node_idx[entry.findAll("id")[0].text] names = set([n.strip() for n in entry.findAll("name")[0].text.split(",")]) for n in names: nid = node_idx[n] yield { 'edgetype': link_edgetype, 'source': pad_id, 'target': nid, 'properties': {}} # Posting Nodes node_idx = {} for node, uuid in bot.post_nodes(GRAPHNAME, getNodeIterator()): nid = node['properties']['id'] node_idx[nid] = uuid # Posting Edges list(bot.post_edges(GRAPHNAME, getEdgeIterator(node_idx))) bot.star_nodes(GRAPHNAME, node_idx.values())
'edgetype': edgetypes_uuids[EdgeVote.name], 'source': user_id, 'target': com_id, 'properties': {} } if vote_dict[str(i)] == '1': com_id = nodes_uuids['+' + str(i)] yield { 'edgetype': edgetypes_uuids[EdgeVote.name], 'source': user_id, 'target': com_id, 'properties': {} } def getOppositeIterator(): for row in comments['data']: d = {k: v for k, v in zip(comments['select_columns'], row)} c1 = '+' + d['comment-id'] c2 = '-' + d['comment-id'] yield { 'edgetype': edgetypes_uuids[EdgeOppositeComment.name], 'source': c1, 'target': c2, 'properties': {} } list(bot.post_edges(GRAPHNAME, getVoteIterator())) list(bot.post_edges(GRAPHNAME, getOppositeIterator()))
def main(): """ re-Index all the Proxteam corpus """ from pprint import pprint parser = argparse.ArgumentParser() parser.add_argument("--host", action='store', help="host", default="http://*****:*****@ %s \n " % (args.gid, args.host) bot = Botagraph(args.host, args.key) gid = args.gid if args.username and args.password: bot.authenticate(args.username, args.password) if args.infos: pprint(bot.get_graph(gid)) return # read / parse graph print "\n * Reading %s" % args.path graph = igraph.read(args.path) # subgraph if args.cut > 0: print " ** cut %s based on degree()" % args.cut # cut method based on degree n = int(args.cut) vs = list((v.index, v.degree()) for v in graph.vs) vs = sorted(vs, key=lambda x: x[1], reverse=True) vs = vs[:n] graph = graph.subgraph([v[0] for v in vs]) elif args.gl > 0: from cello.graphs.prox import prox_markov_dict, sortcut, ALL n = int(args.gl) extract = prox_markov_dict(graph, range(graph.vcount()), 80, add_loops=True) vs = [i for i, v in sortcut(extract, n)] print "vs", vs graph = graph.subgraph(vs) print graph.summary() graph.es['a'] = [1 for i in xrange(graph.vcount())] if not bot.has_graph(gid): print "\n * Create graph %s" % gid bot.create_graph( gid, { 'description': "Dicosyn experiment\n * ", 'image': "", 'tags': ['synonymes', 'dictionnaire'] }) print "\n * Get schema '%s'" % gid schema = bot.get_schema(gid)['schema'] nodetypes = {n['name']: n for n in schema['nodetypes']} edgetypes = {e['name']: e for e in schema['edgetypes']} print "\n nodetypes: ", nodetypes.keys() print "\n edgetypes: ", edgetypes.keys() if not "word" in nodetypes: print "\n\n * Creating node type %s" % "word" props = {"label": Text(), "lang": Text()} bot.post_nodetype(gid, "word", "no description", props) if not "is_syn" in edgetypes: print "\n\n * Creating edge type %s" % "is_syn" bot.post_edgetype(gid, "is_syn", "no desc", {"a": Text()}) schema = bot.get_schema(gid)['schema'] nodetypes = {n['name']: n for n in schema['nodetypes']} edgetypes = {e['name']: e for e in schema['edgetypes']} print nodetypes print edgetypes idx = {} if args.wait: raw_input("press <enter> key to start edges and nodes importation") if args.seed: def set_node(v): if v['label'] not in idx: node = bot.post_node( gid, node_payload(v, nodetypes['word']['uuid'])) idx[v['label']] = node['uuid'] print "inserting %s %s" % (v['label'], node['uuid']) idx = {} v1 = None # seeds grow into beautiful flowers while graph.vcount() > 0: v1 = graph.vs[0] if v1 is None else v1 size = graph.vcount() nei = v1.neighbors() if not len(nei): graph.delete_vertices([v1.index]) v1 = None continue for i in range(min([5, len(nei)])): nei = v1.neighbors() if i >= len(nei): if graph.vcount(): r = randint(0, graph.vcount() - 1) v1 = graph.vs[r] break r = randint(0, len(nei) - 1) v2 = nei[r] print "inserting edge %s %s" % (v1['label'], v2['label']) set_node(v1) set_node(v2) eid = graph.get_eid(v1.index, v2.index) src, tgt = idx[v1['label']], idx[v2['label']] uuid = bot.post_edge( gid, edge_payload(edgetypes['is_syn']['uuid'], src, tgt, {})) # delete from graph # * inserted edges # * nodes with no more edges graph.delete_edges([eid]) delete_nodes = [ v.index for v in (v1, v2) if len(graph.neighbors(v)) == 0 ] if len(delete_nodes): graph.delete_vertices(delete_nodes) if graph.vcount(): r = randint(0, graph.vcount() - 1) # switch v1 v1 = graph.vs[r] else: break # wait sometimes pause(args.pause) else: print "posting nodes" count = 0 fail = 0 for node, uuid in bot.post_nodes( gid, gen_nodes(graph, nodetypes['word']['uuid'])): if not uuid: fail += 1 else: count += 1 idx[node['properties']['label']] = uuid print "%s nodes inserted " % count #print "iterate over nodes" #for node in bot.find_all_nodes(gid, "word", {}): #pass # post edges print "posting edges" count = 0 fail = 0 inv_idx = {v: k for k, v in idx.iteritems()} for obj, uuid in bot.post_edges( gid, gen_edges(graph, edgetypes['is_syn']['uuid'], idx)): if not uuid: fail += 1 else: count += 1 # wait sometimes pause(args.pause) print "%s edges inserted, %s failed " % (count, fail)
def to_padagraph(host, key, gid, path): from reliure.types import Text, Numeric from botapi import Botagraph, BotApiError bot = Botagraph(host, key) nodes, edges = parse(path) if not bot.has_graph(gid) : print "\n * Create graph %s" % gid attrs = { 'description': """ http://utopies-concretes.org/#/fr Ils ont essayé de nous enterrer, ils ne savaient pas que nous étions des graines. Un graphe de près de 3000 sites internet de collectifs, structures, médias, blogs — positions relatives et interconnexions """.replace(" ", ""), 'image': "", 'tags': ['social-network', 'utopies-concretes'] } print "\n * Creating graph %s" % gid bot.create_graph(gid, attrs ) print "\n * Creating node type %s" % "" props = { 'label' : Text(), 'url' : Text(), 'tags' : Text(multi=True, uniq=True), 'image' : Text(), 'description' : Text() } bot.post_nodetype(gid, "Site", "Site ", props) print "\n * Creating edge type %s" % "follows" props = { 'score' : Numeric(), } bot.post_edgetype(gid, "is_related", "is_related", props ) schema = bot.get_schema(gid)['schema'] nodetypes = { n['name']:n for n in schema['nodetypes'] } edgetypes = { e['name']:e for e in schema['edgetypes'] } def gen_nodes(): for k,v in nodes.iteritems(): yield { 'nodetype': nodetypes['Site']['uuid'], 'properties': v } print "posting nodes" count = 0 fail = 0 idx = {} for node, uuid in bot.post_nodes( gid, gen_nodes() ): if not uuid: fail += 1 else : count += 1 idx[node['properties']['pid']] = uuid print "%s nodes inserted " % count def gen_edges(): for e in edges: src = idx.get(e["source"], None) tgt = idx.get(e["target"], None) if src and tgt: yield { 'edgetype': edgetypes['is_related']['uuid'], 'source': src, 'label' : "is_related", 'target': tgt, 'properties': {'score':1} } print "posting edges" count = fail = 0 for obj, uuid in bot.post_edges( gid, gen_edges() ): if not uuid: fail += 1 else : count += 1 print "%s edges inserted " % count
def iterEdges(self,nodes_uuids, rels_uuids): for _, row in self.df.iterrows(): n1_uuid = nodes_uuids[row.node_1] n2_uuid = nodes_uuids[row.node_2] rel_uuid = rels_uuids[row.rel_type] yield {'edgetype': rel_uuid, 'source': n1_uuid, 'target': n2_uuid, 'properties': {}} bot = Botagraph(PDG_HOST, PDG_KEY) bot.create_graph(GRAPHNAME, {'description': "IJIC's data", "tags": ["panama", "leak"]}) nodes_uuids = {} for nodetype in nodetypes: nImporter = NodesImporter(nodetype) nt = nImporter.buildNodeType() type_uuid = bot.post_nodetype(GRAPHNAME,nt.name, nt.description, nt.properties) for node, uuid in bot.post_nodes(GRAPHNAME,nImporter.iterNodes(type_uuid)): nodes_uuids[node['properties']['node_id']] = uuid eImporter = AllEdgesImporter() types_uuid = {} for et in eImporter.buildEdgeTypes(): types_uuid[et.name] = bot.post_edgetype(GRAPHNAME, et.name, et.description, et.properties) list(bot.post_edges(GRAPHNAME, eImporter.iterEdges(nodes_uuids, types_uuid)))
def main(): """ re-Index all the Proxteam corpus """ from pprint import pprint parser = argparse.ArgumentParser() parser.add_argument("--host", action='store', help="host", default="http://*****:*****@ %s \n " % (args.gid, args.host) bot = Botagraph(args.host, args.key) gid = args.gid if args.username and args.password: bot.authenticate(args.username, args.password) if args.infos: pprint( bot.get_graph(gid) ) return # read / parse graph print "\n * Reading %s" % args.path graph = igraph.read(args.path) # subgraph if args.cut > 0: print " ** cut %s based on degree()" % args.cut # cut method based on degree n = int(args.cut) vs = list( (v.index, v.degree() ) for v in graph.vs ) vs = sorted( vs, key=lambda x: x[1], reverse = True ) vs = vs[:n] graph = graph.subgraph( [ v[0] for v in vs ] ) elif args.gl > 0: from cello.graphs.prox import prox_markov_dict, sortcut, ALL n = int(args.gl) extract = prox_markov_dict(graph, range(graph.vcount()), 80, add_loops=True) vs = [ i for i,v in sortcut(extract,n)] print "vs", vs graph = graph.subgraph( vs ) print graph.summary() graph.es['a'] = [ 1 for i in xrange(graph.vcount() ) ] if not bot.has_graph(gid) : print "\n * Create graph %s" % gid bot.create_graph(gid, { 'description':"Dicosyn experiment\n * ", 'image': "", 'tags': ['synonymes', 'dictionnaire'] } ) print "\n * Get schema '%s'" % gid schema = bot.get_schema(gid)['schema'] nodetypes = { n['name']:n for n in schema['nodetypes'] } edgetypes = { e['name']:e for e in schema['edgetypes'] } print "\n nodetypes: ", nodetypes.keys() print "\n edgetypes: ", edgetypes.keys() if not "word" in nodetypes: print "\n\n * Creating node type %s" % "word" props = { "label" : Text(), "lang" : Text() } bot.post_nodetype(gid, "word", "no description", props) if not "is_syn" in edgetypes: print "\n\n * Creating edge type %s" % "is_syn" bot.post_edgetype(gid, "is_syn", "no desc", {"a":Text()}) schema = bot.get_schema(gid)['schema'] nodetypes = { n['name']:n for n in schema['nodetypes'] } edgetypes = { e['name']:e for e in schema['edgetypes'] } print nodetypes print edgetypes idx = {} if args.wait : raw_input("press <enter> key to start edges and nodes importation") if args.seed: def set_node(v): if v['label'] not in idx: node = bot.post_node(gid, node_payload(v, nodetypes['word']['uuid'])) idx[ v['label'] ] = node['uuid'] print "inserting %s %s" % (v['label'] , node['uuid']) idx = {} v1 = None # seeds grow into beautiful flowers while graph.vcount() > 0: v1 = graph.vs[0] if v1 is None else v1 size = graph.vcount() nei = v1.neighbors() if not len(nei): graph.delete_vertices([v1.index]) v1 = None continue for i in range( min([5,len(nei)]) ): nei = v1.neighbors() if i >= len(nei): if graph.vcount(): r = randint(0,graph.vcount()-1) v1 = graph.vs[r] break r = randint(0,len(nei)-1) v2 = nei[r] print "inserting edge %s %s" % (v1['label'] , v2['label']) set_node(v1) set_node(v2) eid = graph.get_eid(v1.index, v2.index) src, tgt = idx[v1['label']], idx[v2['label']] uuid = bot.post_edge(gid, edge_payload(edgetypes['is_syn']['uuid'], src, tgt, {})) # delete from graph # * inserted edges # * nodes with no more edges graph.delete_edges([eid]) delete_nodes = [ v.index for v in (v1, v2) if len(graph.neighbors(v)) == 0 ] if len(delete_nodes): graph.delete_vertices(delete_nodes) if graph.vcount(): r = randint(0,graph.vcount()-1) # switch v1 v1 = graph.vs[r] else: break # wait sometimes pause(args.pause) else : print "posting nodes" count = 0 fail = 0 for node, uuid in bot.post_nodes( gid, gen_nodes(graph, nodetypes['word']['uuid']) ): if not uuid: fail += 1 else : count += 1 idx[node['properties']['label']] = uuid print "%s nodes inserted " % count #print "iterate over nodes" #for node in bot.find_all_nodes(gid, "word", {}): #pass # post edges print "posting edges" count = 0 fail = 0 inv_idx = { v:k for k,v in idx.iteritems() } for obj, uuid in bot.post_edges( gid, gen_edges(graph, edgetypes['is_syn']['uuid'], idx) ): if not uuid: fail += 1 else : count += 1 # wait sometimes pause(args.pause) print "%s edges inserted, %s failed " % (count, fail)
def main(): """ re-Index all the Proxteam corpus """ parser = argparse.ArgumentParser() parser.add_argument("--host", action='store', help="host", default="http://localhost:5000") parser.add_argument("--key" , action='store', help="key", default=None) parser.add_argument("--username" , action='store', help="user", default=None) parser.add_argument("--password" , action='store', help="pwd", default=None) parser.add_argument("--gid", action='store', help="graph id", default=None) parser.add_argument("files", action='store', default=None, nargs='+') args = parser.parse_args() print " analysing {}".format("".join(args.files)) # Bot creation & login bot = Botagraph(args.host, args.key) if args.username and args.password: bot.authenticate(args.username, args.password) gid = args.gid if not bot.has_graph(gid) : print "create graph %s" % gid bot.create_graph(gid, "no description") print "create node type %s" % "file" props = { "label": Text()} bot.post_nodetype(gid, "file", "no description", props) bot.post_nodetype(gid, "message", "no description", props) print "create edge type %s" % "is_syn" bot.post_edgetype(gid, "listen", "no desc", {}) bot.post_edgetype(gid, "trigger", "no desc", {}) print "Get schema '%s'" % gid schema = bot.get_schema(gid)['schema'] nodetypes = { n['name']:n for n in schema['nodetypes'] } edgetypes = { e['name']:e for e in schema['edgetypes'] } nodetype_file = nodetypes['file']['uuid'] nodetype_message = nodetypes['message']['uuid'] edgetype_listen = edgetypes['listen']['uuid'] edgetype_trigger = edgetypes['trigger']['uuid'] vs = {} es = [] for js_file in args.files: if js_file.endswith('min.js'): #ignore minified js continue with codecs.open(js_file, 'r', 'utf8') as FILE: js_file = re.sub('^[./]+', '', js_file) vs[js_file] = {'label': js_file, 'nodetype': nodetype_file} for l in FILE: #remove comments: l = l.strip().split('//',1)[0] m = re.search(r'listenTo\([^"]*"([a-z-]+)"', l) if m: message = m.group(1) if message not in vs: vs[message] = {'label': message, 'nodetype': nodetype_message} es.append((js_file, edgetype_listen, message)) m = re.search(r'trigger\([^"]*"([a-z-]+)"', l) if m: message = m.group(1) if message not in vs: vs[message] = {'label': message, 'nodetype': nodetype_message} es.append((js_file, edgetype_trigger, message)) idx = {} print "posting nodes" count = 0 fail = 0 for node, uuid in bot.post_nodes( gid, gen_nodes(vs) ): if not uuid: fail += 1 else : count += 1 idx[node['properties']['label']] = uuid print "%s nodes inserted " % count #print "iterate over nodes" #for node in bot.find_all_nodes(gid, "word", {}): #pass#print node # post edges print "posting edges" count = 0 fail = 0 inv_idx = { v:k for k,v in idx.iteritems() } for _, uuid in bot.post_edges(gid, gen_edges(es, idx)): if not uuid: fail += 1 else : count += 1 #print "%s [ %s -- %s --> %s ] " % ( uuid, inv_idx.get(obj['source'], None) , "syn", inv_idx.get(obj['target'], None) ) print "%s edges inserted, %s failed " % (count, fail)