Esempio n. 1
0
def main():
    """ re-Index all the Proxteam corpus """
    parser = argparse.ArgumentParser()
    
    parser.add_argument("--host", action='store', help="host", default="http://localhost:5000")
    parser.add_argument("--key" , action='store', help="key", default=None)
    parser.add_argument("--username" , action='store', help="user", default=None)
    parser.add_argument("--password" , action='store', help="pwd", default=None)
    parser.add_argument("--path", action='store', help="path", default=None)
    parser.add_argument("--gid", action='store', help="graph id", default=None)

    args = parser.parse_args()
    
    bot = Botagraph(args.host, args.key)

    if args.username and args.password:
        bot.authenticate(args.username, args.password)

    (Initiales, GSR, Matrix) = load_OCR_file(args.path)
    if not bot.has_graph(GID):
        bot.create_graph(GID, {'description': 'a graph of Old Chinese phonology',
                                     'image': "",
                                     'tags': ['chinese','phonology']})
        bot.post_nodetype(GID, 'GSR', 'Karlgren', {}) 
        bot.post_nodetype(GID, 'Initial', '', {}) 
        bot.post_edgetype(GID, 'Sinograms', '',{})

    print "Get schema '%s'" % GID
    schema = bot.get_schema(GID)['schema']
    nodetypes = { n['name']:n for n in schema['nodetypes'] }
    edgetypes = { e['name']:e for e in schema['edgetypes'] }
    Inidict = {}
    for  i, (_, uuid) in enumerate(bot.post_nodes(GID,
        ({'nodetype':nodetypes['Initial']['uuid'], 'properties':{'label':i}} for i in Initiales))): 
        Inidict[Initiales[i]] = uuid
    
    GSRdict = {}
    for  i, (_, uuid) in enumerate(bot.post_nodes(GID,({'nodetype':nodetypes['GSR']['uuid'], 'properties': {'label':s}} for s in GSR))): 
        GSRdict[GSR[i]] = uuid
    edges = [{'edgetype':edgetypes['Sinograms']['uuid'],
              'properties':{'label': u",".join(sinos)},
              'source': GSRdict[gsr],
              'target': Inidict[i]} for (i, gsr), sinos in Matrix.iteritems()]
    #for e in edges:
    #    print e
    #    bot.post_edge(GID, e)
    for _ in bot.post_edges(GID, iter(edges)):
        pass
Esempio n. 2
0
def main():
    """  """
    parser = argparse.ArgumentParser()

    parser.add_argument("--host",
                        action='store',
                        help="host",
                        default="http://localhost:5000")
    parser.add_argument("--key", action='store', help="key", default=None)
    parser.add_argument("--username",
                        action='store',
                        help="user",
                        default=None)
    parser.add_argument("--password", action='store', help="pwd", default=None)
    parser.add_argument("--path", action='store', help="path", default=None)
    parser.add_argument("--gid", action='store', help="graph id", default=None)

    args = parser.parse_args()

    # Bot creation & login
    key = open(args.key, 'r').read().strip()
    print "\n * Connecting to %s \n  " % args.host
    bot = Botagraph(args.host, key)

    # read / parse graph
    print "\n * Reading %s" % args.path
    g = Graph(args.gid)
    gid = g.gid

    if bot.has_graph(gid):
        bot.delete_graph(gid)

    if not bot.has_graph(gid):
        print "\n * Create graph %s" % gid
        bot.create_graph(gid, g.attrs)

        print "\n * Creating node type %s" % "Country"
        props = {"code": Text(), "label": Text()}
        bot.post_nodetype(gid, "Country", "Country ", props)

        print "\n * Creating edge type %s" % "alliance"
        props = {
            'id': Numeric(),
            'label': Text(),
            'starts': Text(),
            'ends': Text(),
            'defense': Numeric(),
            'neutrality': Numeric(),
            'nonaggression': Numeric(),
            'entente': Numeric(),
        }
        bot.post_edgetype(gid, "alliance", "alliance terms", props)

    schema = bot.get_schema(gid)['schema']
    nodetypes = {n['name']: n for n in schema['nodetypes']}
    edgetypes = {e['name']: e for e in schema['edgetypes']}

    import csv
    with open(args.path, 'rb') as csvfile:
        reader = csv.reader(csvfile, delimiter=',', quotechar='"')
        for i, row in enumerate(reader):
            # undirected
            if i == 0: continue
            if i % 2 != 0: continue

            node = lambda x: {
                'nodetype': nodetypes["Country"]['uuid'],
                'properties': {
                    'code': x[0],
                    'label': x[1]
                }
            }

            g.vs[row[1]] = node(row[1:3])
            g.vs[row[3]] = node(row[3:5])

            left_censor, right_censor = row[11:13]

            alliances = "defense neutrality nonaggression entente".split()
            es = dict(zip(alliances, row[13:17]))
            es['starts'] = "/".join(row[5:8])
            es['ends'] = "/".join(row[8:11])
            es['id'] = row[0]
            es['label'] = " ".join([a for a in alliances if es[a] in (1, "1")])

            es.update()

            g.es[i] = {
                'edgetype': edgetypes["alliance"]['uuid'],
                'source': row[1],
                'target': row[3],
                'properties': es
            }

        print len(g.vs), len(g.es)

    idx = {}
    for node, uuid in bot.post_nodes(gid, g.vs.itervalues()):
        idx[node['properties']['code']] = uuid

    bot.star_nodes(gid, idx.values())

    print "%s nodes inserted . " % (len(idx))

    for edge in g.es.itervalues():
        edge['source'] = idx[edge['source']]
        edge['target'] = idx[edge['target']]

    fail = count = 0
    for edge, uuid in bot.post_edges(gid, g.es.itervalues()):
        if not uuid:
            fail += 1
        else:
            count += 1

    print "%s edges inserted, %s failed " % (count, fail)
Esempio n. 3
0
                     'url': url}
            yield {'nodetype': pad_nodetype,
                    'properties': props}

def getEdgeIterator(node_idx):
    for f in glob.glob("./hackpad-backup-g0v/*.html"):
        page_id = node_idx[f.rsplit("/",1)[-1][:-5]]
        with open(f) as F:
            html = open(f).read()
            bs = BS(html)
            for url in [x['href'] for x in bs.findAll("a")]:
                m = re_link.search(url)
                if m:
                    link = m.group(1)
                    target_id = node_idx.get(link,None)
                    if target_id is not None:
                        yield { 'edgetype': link_edgetype,
                                'source': page_id,
                                'target': target_id,
                                'properties': {}}
# Posting Nodes
node_idx = {}
for node, uuid in bot.post_nodes(GRAPHNAME, getNodeIterator()):
    nid = node['properties']['id']
    node_idx[nid] = uuid

# Posting Edges

list(bot.post_edges(GRAPHNAME, getEdgeIterator(node_idx)))

Esempio n. 4
0
def main():
    """ re-Index all the Proxteam corpus """
    parser = argparse.ArgumentParser()

    parser.add_argument("--host",
                        action='store',
                        help="host",
                        default="http://localhost:5000")
    parser.add_argument("--key", action='store', help="key", default=None)
    parser.add_argument("--username",
                        action='store',
                        help="user",
                        default=None)
    parser.add_argument("--password", action='store', help="pwd", default=None)
    parser.add_argument("--path", action='store', help="path", default=None)
    parser.add_argument("--gid", action='store', help="graph id", default=None)

    args = parser.parse_args()

    # Bot creation & login
    bot = Botagraph(args.host, args.key)

    if args.username and args.password:
        bot.authenticate(args.username, args.password)

    # read / parse graph
    graph = py2neo.Graph()

    # create empty graph
    gid = args.gid

    if not bot.has_graph(gid):
        print "create graph %s" % gid
        bot.create_graph(
            gid, {
                'description': SCHEMA["description"],
                'image': "",
                'tags': SCHEMA['tags']
            })
        for infos in SCHEMA["Nodes"]:
            print "create node type %s" % infos["type"]
            bot.post_nodetype(gid, *buildType(infos))
        for infos in SCHEMA["Links"]:
            print "create edge type %s" % infos["type"]
            bot.post_edgetype(gid, *buildType(infos))

    print "Get schema '%s'" % gid
    schema = bot.get_schema(gid)['schema']
    nodetypes = {n['name']: n for n in schema['nodetypes']}
    edgetypes = {e['name']: e for e in schema['edgetypes']}

    print nodetypes
    print edgetypes

    idx = {}

    print "posting nodes"
    count = 0
    fail = 0
    for infos in SCHEMA['Nodes']:
        for node, uuid in bot.post_nodes(
                gid, gen_nodes(graph, nodetypes[infos['type']]['uuid'],
                               infos)):
            if not uuid:
                fail += 1
            else:
                count += 1
                idx[node['properties'][SCHEMA['key']]] = uuid

    print "%s nodes inserted " % count

    # post edges
    print "posting edges"
    count = 0
    fail = 0

    inv_idx = {v: k for k, v in idx.iteritems()}
    for infos in SCHEMA['Links']:
        for obj, uuid in bot.post_edges(
                gid,
                gen_edges(graph, edgetypes[infos['type']]['uuid'], idx,
                          infos)):
            if not uuid:
                fail += 1
            else:
                count += 1
            #print "%s [ %s -- %s --> %s ] " % ( uuid, inv_idx.get(obj['source'], None) , "syn", inv_idx.get(obj['target'], None) )
    print "%s edges inserted, %s failed " % (count, fail)
Esempio n. 5
0
def main():
    """ re-Index all the Proxteam corpus """
    parser = argparse.ArgumentParser()
    
    parser.add_argument("--host", action='store', help="host", default="http://localhost:5000")
    parser.add_argument("--key" , action='store', help="key", default=None)
    parser.add_argument("--username" , action='store', help="user", default=None)
    parser.add_argument("--password" , action='store', help="pwd", default=None)
    parser.add_argument("--path", action='store', help="path", default=None)
    parser.add_argument("--gid", action='store', help="graph id", default=None)

    args = parser.parse_args()


    # Bot creation & login 
    bot = Botagraph(args.host, args.key)

    if args.username and args.password:
        bot.authenticate(args.username, args.password)

    # read / parse graph
    graph = py2neo.Graph()

    # create empty graph
    gid =  args.gid


    if not bot.has_graph(gid) :
        print "create graph %s" % gid
        bot.create_graph(gid, { 'description': SCHEMA["description"],
                                'image': "",
                                'tags': SCHEMA['tags']
                              }
                        )
        for infos in SCHEMA["Nodes"]:
            print "create node type %s" % infos["type"]
            bot.post_nodetype(gid, *buildType(infos))
        for infos in SCHEMA["Links"]:
            print "create edge type %s" % infos["type"]
            bot.post_edgetype(gid, *buildType(infos))

    print "Get schema '%s'" % gid
    schema = bot.get_schema(gid)['schema']
    nodetypes = { n['name']:n for n in schema['nodetypes'] }
    edgetypes = { e['name']:e for e in schema['edgetypes'] }

    print nodetypes
    print edgetypes

    idx = {}
    
    print "posting nodes"
    count = 0
    fail = 0
    for infos in SCHEMA['Nodes']:
        for node, uuid in bot.post_nodes( gid, gen_nodes(graph, nodetypes[infos['type']]['uuid'], infos) ):
            if not uuid:
                fail += 1
            else :
                count += 1
                idx[node['properties'][SCHEMA['key']]] = uuid
            
    print "%s nodes inserted " % count

    # post edges
    print "posting edges"
    count = 0
    fail = 0

    inv_idx = { v:k for k,v in idx.iteritems() }
    for infos in SCHEMA['Links']: 
        for obj, uuid in bot.post_edges( gid, gen_edges(graph, edgetypes[infos['type']]['uuid'], idx, infos) ):
            if not uuid:
                fail += 1
            else :
                count += 1
            #print "%s [ %s -- %s --> %s ] " % ( uuid, inv_idx.get(obj['source'], None) , "syn", inv_idx.get(obj['target'], None) )
    print "%s edges inserted, %s failed " % (count, fail)
Esempio n. 6
0
def main():
    """ re-Index all the Proxteam corpus """
    parser = argparse.ArgumentParser()

    parser.add_argument("--host",
                        action='store',
                        help="host",
                        default="http://localhost:5000")
    parser.add_argument("--key", action='store', help="key", default=None)
    parser.add_argument("--username",
                        action='store',
                        help="user",
                        default=None)
    parser.add_argument("--password", action='store', help="pwd", default=None)
    parser.add_argument("--gid", action='store', help="graph id", default=None)
    parser.add_argument("files", action='store', default=None, nargs='+')

    args = parser.parse_args()

    print " analysing {}".format("".join(args.files))

    # Bot creation & login
    bot = Botagraph(args.host, args.key)

    if args.username and args.password:
        bot.authenticate(args.username, args.password)

    gid = args.gid
    if not bot.has_graph(gid):
        print "create graph %s" % gid
        bot.create_graph(gid, "no description")
        print "create node type %s" % "file"
        props = {"label": Text()}
        bot.post_nodetype(gid, "file", "no description", props)
        bot.post_nodetype(gid, "message", "no description", props)
        print "create edge type %s" % "is_syn"
        bot.post_edgetype(gid, "listen", "no desc", {})
        bot.post_edgetype(gid, "trigger", "no desc", {})

    print "Get schema '%s'" % gid
    schema = bot.get_schema(gid)['schema']
    nodetypes = {n['name']: n for n in schema['nodetypes']}
    edgetypes = {e['name']: e for e in schema['edgetypes']}
    nodetype_file = nodetypes['file']['uuid']
    nodetype_message = nodetypes['message']['uuid']
    edgetype_listen = edgetypes['listen']['uuid']
    edgetype_trigger = edgetypes['trigger']['uuid']

    vs = {}
    es = []

    for js_file in args.files:
        if js_file.endswith('min.js'):
            #ignore minified js
            continue
        with codecs.open(js_file, 'r', 'utf8') as FILE:
            js_file = re.sub('^[./]+', '', js_file)
            vs[js_file] = {'label': js_file, 'nodetype': nodetype_file}

            for l in FILE:
                #remove comments:
                l = l.strip().split('//', 1)[0]
                m = re.search(r'listenTo\([^"]*"([a-z-]+)"', l)
                if m:
                    message = m.group(1)
                    if message not in vs:
                        vs[message] = {
                            'label': message,
                            'nodetype': nodetype_message
                        }
                    es.append((js_file, edgetype_listen, message))
                m = re.search(r'trigger\([^"]*"([a-z-]+)"', l)
                if m:
                    message = m.group(1)
                    if message not in vs:
                        vs[message] = {
                            'label': message,
                            'nodetype': nodetype_message
                        }
                    es.append((js_file, edgetype_trigger, message))

    idx = {}
    print "posting nodes"
    count = 0
    fail = 0
    for node, uuid in bot.post_nodes(gid, gen_nodes(vs)):
        if not uuid:
            fail += 1
        else:
            count += 1
            idx[node['properties']['label']] = uuid

    print "%s nodes inserted " % count

    #print "iterate over nodes"
    #for node in bot.find_all_nodes(gid, "word", {}):
    #pass#print node

    # post edges
    print "posting edges"
    count = 0
    fail = 0

    inv_idx = {v: k for k, v in idx.iteritems()}

    for _, uuid in bot.post_edges(gid, gen_edges(es, idx)):
        if not uuid:
            fail += 1
        else:
            count += 1
        #print "%s [ %s -- %s --> %s ] " % ( uuid, inv_idx.get(obj['source'], None) , "syn", inv_idx.get(obj['target'], None) )
    print "%s edges inserted, %s failed " % (count, fail)
Esempio n. 7
0
def main():
    """ re-Index all the Proxteam corpus """
    parser = argparse.ArgumentParser()

    parser.add_argument("--host",
                        action='store',
                        help="host",
                        default="http://localhost:5000")
    parser.add_argument("--key", action='store', help="key", default=None)
    parser.add_argument("--username",
                        action='store',
                        help="user",
                        default=None)
    parser.add_argument("--password", action='store', help="pwd", default=None)
    parser.add_argument("--path", action='store', help="path", default=None)
    parser.add_argument("--gid", action='store', help="graph id", default=None)

    args = parser.parse_args()

    bot = Botagraph(args.host, args.key)

    if args.username and args.password:
        bot.authenticate(args.username, args.password)

    (Initiales, GSR, Matrix) = load_OCR_file(args.path)
    if not bot.has_graph(GID):
        bot.create_graph(
            GID, {
                'description': 'a graph of Old Chinese phonology',
                'image': "",
                'tags': ['chinese', 'phonology']
            })
        bot.post_nodetype(GID, 'GSR', 'Karlgren', {})
        bot.post_nodetype(GID, 'Initial', '', {})
        bot.post_edgetype(GID, 'Sinograms', '', {})

    print "Get schema '%s'" % GID
    schema = bot.get_schema(GID)['schema']
    nodetypes = {n['name']: n for n in schema['nodetypes']}
    edgetypes = {e['name']: e for e in schema['edgetypes']}
    Inidict = {}
    for i, (_, uuid) in enumerate(
            bot.post_nodes(GID, ({
                'nodetype': nodetypes['Initial']['uuid'],
                'properties': {
                    'label': i
                }
            } for i in Initiales))):
        Inidict[Initiales[i]] = uuid

    GSRdict = {}
    for i, (_, uuid) in enumerate(
            bot.post_nodes(GID, ({
                'nodetype': nodetypes['GSR']['uuid'],
                'properties': {
                    'label': s
                }
            } for s in GSR))):
        GSRdict[GSR[i]] = uuid
    edges = [{
        'edgetype': edgetypes['Sinograms']['uuid'],
        'properties': {
            'label': u",".join(sinos)
        },
        'source': GSRdict[gsr],
        'target': Inidict[i]
    } for (i, gsr), sinos in Matrix.iteritems()]
    #for e in edges:
    #    print e
    #    bot.post_edge(GID, e)
    for _ in bot.post_edges(GID, iter(edges)):
        pass
        props = {'id': pad_id,
                 'label': title}
        yield {'nodetype': pad_nodetype,
                'properties': props}
    names = set([n.strip() for names in data.findAll("name") for n in names.text.split(",")])
    for n in names:
        yield {'nodetype': author_nodetype,
                'properties': {'label': n,
                                'id': n}}

def getEdgeIterator(node_idx):
    for entry in data.findAll("entry"):
        pad_id = node_idx[entry.findAll("id")[0].text]
        names = set([n.strip() for n in entry.findAll("name")[0].text.split(",")])
        for n in names:
            nid = node_idx[n]
            yield { 'edgetype': link_edgetype,
                    'source': pad_id,
                    'target': nid,
                    'properties': {}}
# Posting Nodes
node_idx = {}
for node, uuid in bot.post_nodes(GRAPHNAME, getNodeIterator()):
    nid = node['properties']['id']
    node_idx[nid] = uuid

# Posting Edges

list(bot.post_edges(GRAPHNAME, getEdgeIterator(node_idx)))
bot.star_nodes(GRAPHNAME, node_idx.values())
Esempio n. 9
0

def getUserNodesIterator():
    for row in votes['data']:
        d = {k: v for k, v in zip(votes['select_columns'], row)}
        props = {
            'id': d['participant'],
            'label': d['participant'],
            'shape': 'circle'
        }
        yield {'nodetype': nodetypes_uuids[NodeUser.name], 'properties': props}


# Posting Nodes
nodes_uuids = {}
for node, uuid in bot.post_nodes(GRAPHNAME, getCommentNodesIterator()):
    nid = node['properties']['polarity'] + node['properties']['id']
    nodes_uuids[nid] = uuid

for node, uuid in bot.post_nodes(GRAPHNAME, getUserNodesIterator()):
    nid = node['properties']['id']
    nodes_uuids[nid] = uuid

# Posting Edges


def getVoteIterator():
    for row in votes['data']:
        vote_dict = {k: v for k, v in zip(votes['select_columns'], row)}
        user_id = nodes_uuids[vote_dict['participant']]
        for i in range(N_COMMENTS):
Esempio n. 10
0
def main():
    """ re-Index all the Proxteam corpus """
    from pprint import pprint

    parser = argparse.ArgumentParser()

    parser.add_argument("--host",
                        action='store',
                        help="host",
                        default="http://*****:*****@ %s \n  " % (args.gid, args.host)
    bot = Botagraph(args.host, args.key)
    gid = args.gid

    if args.username and args.password:
        bot.authenticate(args.username, args.password)

    if args.infos:
        pprint(bot.get_graph(gid))
        return

    # read / parse graph
    print "\n * Reading %s" % args.path

    graph = igraph.read(args.path)

    # subgraph
    if args.cut > 0:
        print " ** cut %s based on degree()" % args.cut
        # cut method based on degree
        n = int(args.cut)
        vs = list((v.index, v.degree()) for v in graph.vs)
        vs = sorted(vs, key=lambda x: x[1], reverse=True)
        vs = vs[:n]
        graph = graph.subgraph([v[0] for v in vs])

    elif args.gl > 0:
        from cello.graphs.prox import prox_markov_dict, sortcut, ALL
        n = int(args.gl)
        extract = prox_markov_dict(graph,
                                   range(graph.vcount()),
                                   80,
                                   add_loops=True)
        vs = [i for i, v in sortcut(extract, n)]
        print "vs", vs
        graph = graph.subgraph(vs)

    print graph.summary()
    graph.es['a'] = [1 for i in xrange(graph.vcount())]

    if not bot.has_graph(gid):
        print "\n * Create graph %s" % gid
        bot.create_graph(
            gid, {
                'description': "Dicosyn experiment\n * ",
                'image': "",
                'tags': ['synonymes', 'dictionnaire']
            })

    print "\n * Get schema '%s'" % gid
    schema = bot.get_schema(gid)['schema']
    nodetypes = {n['name']: n for n in schema['nodetypes']}
    edgetypes = {e['name']: e for e in schema['edgetypes']}

    print "\n nodetypes: ", nodetypes.keys()
    print "\n edgetypes: ", edgetypes.keys()

    if not "word" in nodetypes:

        print "\n\n * Creating node type %s" % "word"
        props = {"label": Text(), "lang": Text()}
        bot.post_nodetype(gid, "word", "no description", props)

    if not "is_syn" in edgetypes:
        print "\n\n * Creating edge type %s" % "is_syn"
        bot.post_edgetype(gid, "is_syn", "no desc", {"a": Text()})

    schema = bot.get_schema(gid)['schema']
    nodetypes = {n['name']: n for n in schema['nodetypes']}
    edgetypes = {e['name']: e for e in schema['edgetypes']}

    print nodetypes
    print edgetypes

    idx = {}

    if args.wait:
        raw_input("press <enter> key to start edges and nodes importation")

    if args.seed:

        def set_node(v):
            if v['label'] not in idx:
                node = bot.post_node(
                    gid, node_payload(v, nodetypes['word']['uuid']))
                idx[v['label']] = node['uuid']
                print "inserting %s %s" % (v['label'], node['uuid'])

        idx = {}
        v1 = None

        # seeds grow into beautiful flowers

        while graph.vcount() > 0:

            v1 = graph.vs[0] if v1 is None else v1

            size = graph.vcount()

            nei = v1.neighbors()
            if not len(nei):
                graph.delete_vertices([v1.index])
                v1 = None
                continue

            for i in range(min([5, len(nei)])):

                nei = v1.neighbors()

                if i >= len(nei):
                    if graph.vcount():
                        r = randint(0, graph.vcount() - 1)
                        v1 = graph.vs[r]
                    break

                r = randint(0, len(nei) - 1)
                v2 = nei[r]

                print "inserting edge %s %s" % (v1['label'], v2['label'])

                set_node(v1)
                set_node(v2)

                eid = graph.get_eid(v1.index, v2.index)
                src, tgt = idx[v1['label']], idx[v2['label']]

                uuid = bot.post_edge(
                    gid, edge_payload(edgetypes['is_syn']['uuid'], src, tgt,
                                      {}))

                # delete  from graph
                # * inserted edges
                # * nodes with no more edges

                graph.delete_edges([eid])

                delete_nodes = [
                    v.index for v in (v1, v2) if len(graph.neighbors(v)) == 0
                ]

                if len(delete_nodes):
                    graph.delete_vertices(delete_nodes)

                    if graph.vcount():
                        r = randint(0, graph.vcount() - 1)
                        # switch v1
                        v1 = graph.vs[r]

                    else:
                        break

            # wait sometimes
            pause(args.pause)

    else:

        print "posting nodes"
        count = 0
        fail = 0
        for node, uuid in bot.post_nodes(
                gid, gen_nodes(graph, nodetypes['word']['uuid'])):
            if not uuid:
                fail += 1
            else:
                count += 1
                idx[node['properties']['label']] = uuid

        print "%s nodes inserted " % count

        #print "iterate over nodes"
        #for node in bot.find_all_nodes(gid, "word", {}):
        #pass

        # post edges
        print "posting edges"
        count = 0
        fail = 0

        inv_idx = {v: k for k, v in idx.iteritems()}

        for obj, uuid in bot.post_edges(
                gid, gen_edges(graph, edgetypes['is_syn']['uuid'], idx)):
            if not uuid:
                fail += 1
            else:
                count += 1

            # wait sometimes
            pause(args.pause)

        print "%s edges inserted, %s failed " % (count, fail)
Esempio n. 11
0
def to_padagraph(host, key, gid, path):
    from reliure.types import Text, Numeric 
    from botapi import Botagraph, BotApiError
    
    bot = Botagraph(host, key)

    nodes, edges = parse(path)
    
    if not bot.has_graph(gid) :
        
        print "\n * Create graph %s" % gid
        attrs = {
            'description':
            """
            http://utopies-concretes.org/#/fr
            
            Ils ont essayé de nous enterrer, ils ne savaient pas que nous étions des graines.

            Un graphe de près de 3000 sites internet de collectifs, structures, médias, blogs — positions relatives et interconnexions
            """.replace("    ", ""),
    
            'image': "",
            'tags': ['social-network', 'utopies-concretes']
        }

        print "\n * Creating graph %s" % gid
        
        bot.create_graph(gid, attrs )
                        
        print "\n * Creating node type %s" % ""
        props = {
                    'label' : Text(),
                    'url'  : Text(),
                    'tags' : Text(multi=True, uniq=True),
                    'image' : Text(),
                    'description' : Text()
                }
        bot.post_nodetype(gid, "Site",  "Site ", props)

        print "\n * Creating edge type %s" % "follows"
        props = {
                    'score' : Numeric(),
                }
        bot.post_edgetype(gid, "is_related", "is_related", props )
    

    schema = bot.get_schema(gid)['schema']
    nodetypes = { n['name']:n for n in schema['nodetypes'] }
    edgetypes = { e['name']:e for e in schema['edgetypes'] }

    def gen_nodes():
        for k,v in nodes.iteritems():     
            
            yield {
                'nodetype': nodetypes['Site']['uuid'],
                'properties': v
            }
    
    print "posting nodes"
    count = 0
    fail = 0
    idx = {}
    for node, uuid in bot.post_nodes( gid, gen_nodes() ):
        if not uuid:
            fail += 1
        else :
            count += 1
            idx[node['properties']['pid']] = uuid
        
    print "%s nodes inserted " % count

    
    def gen_edges():
        for e in edges: 

            src = idx.get(e["source"], None)
            tgt = idx.get(e["target"], None)
            if src and tgt:
                yield {
                    'edgetype': edgetypes['is_related']['uuid'],
                    'source': src,
                    'label' : "is_related",
                    'target': tgt,
                    'properties': {'score':1}
                }

    print "posting edges"
    count = fail = 0

    for obj, uuid in bot.post_edges( gid, gen_edges() ):
        if not uuid:
            fail += 1
        else :
            count += 1
    print "%s edges inserted " % count
Esempio n. 12
0
    def iterEdges(self,nodes_uuids, rels_uuids):
        for _, row in self.df.iterrows():
            n1_uuid = nodes_uuids[row.node_1]
            n2_uuid = nodes_uuids[row.node_2]
            rel_uuid = rels_uuids[row.rel_type]
            yield {'edgetype': rel_uuid,
                    'source': n1_uuid,
                    'target': n2_uuid,
                    'properties': {}}

bot = Botagraph(PDG_HOST, PDG_KEY)
bot.create_graph(GRAPHNAME, {'description': "IJIC's data",
    "tags": ["panama", "leak"]})

nodes_uuids = {}

for nodetype in nodetypes:
    nImporter = NodesImporter(nodetype)
    nt = nImporter.buildNodeType()
    type_uuid = bot.post_nodetype(GRAPHNAME,nt.name, nt.description, nt.properties)
    for node, uuid in bot.post_nodes(GRAPHNAME,nImporter.iterNodes(type_uuid)):
        nodes_uuids[node['properties']['node_id']] = uuid

eImporter = AllEdgesImporter()
types_uuid = {}
for et in eImporter.buildEdgeTypes():
    types_uuid[et.name] = bot.post_edgetype(GRAPHNAME, et.name, et.description, et.properties)

list(bot.post_edges(GRAPHNAME, eImporter.iterEdges(nodes_uuids, types_uuid)))
Esempio n. 13
0
def main():
    """ re-Index all the Proxteam corpus """
    from pprint import pprint
    
    parser = argparse.ArgumentParser()
    
    parser.add_argument("--host", action='store', help="host", default="http://*****:*****@ %s \n  " % (args.gid, args.host)
    bot = Botagraph(args.host, args.key)
    gid =  args.gid

    if args.username and args.password:
        bot.authenticate(args.username, args.password)

    if args.infos:
        pprint( bot.get_graph(gid) )
        return 


    # read / parse graph
    print "\n * Reading %s" % args.path
    
    graph = igraph.read(args.path)

    # subgraph
    if args.cut > 0:
        print " ** cut %s based on degree()" % args.cut
        # cut method based on degree
        n = int(args.cut)
        vs = list( (v.index, v.degree() ) for v in  graph.vs )
        vs = sorted( vs, key=lambda x: x[1], reverse = True )
        vs = vs[:n]
        graph = graph.subgraph( [  v[0] for v in vs ] )
        
    elif args.gl > 0:
        from cello.graphs.prox import prox_markov_dict, sortcut, ALL
        n = int(args.gl)
        extract = prox_markov_dict(graph, range(graph.vcount()), 80, add_loops=True)
        vs =  [ i for i,v in sortcut(extract,n)]
        print "vs", vs
        graph = graph.subgraph( vs )
        
         

    print graph.summary()
    graph.es['a'] = [ 1 for i in xrange(graph.vcount() ) ]


    
    if not bot.has_graph(gid) :
        print "\n * Create graph %s" % gid
        bot.create_graph(gid, { 'description':"Dicosyn experiment\n * ",
                                'image': "",
                                'tags': ['synonymes', 'dictionnaire']
                              }
                        )
                        
    print "\n * Get schema '%s'" % gid
    schema = bot.get_schema(gid)['schema']
    nodetypes = { n['name']:n for n in schema['nodetypes'] }
    edgetypes = { e['name']:e for e in schema['edgetypes'] }

    print "\n nodetypes: ", nodetypes.keys()
    print "\n edgetypes: ", edgetypes.keys()

    if not "word" in nodetypes:
         
        print "\n\n * Creating node type %s" % "word"
        props = { "label" : Text(),
                  "lang"  : Text()
                }
        bot.post_nodetype(gid, "word",  "no description", props)

    if not "is_syn" in edgetypes:
        print "\n\n * Creating edge type %s" % "is_syn"
        bot.post_edgetype(gid, "is_syn", "no desc", {"a":Text()})

    schema = bot.get_schema(gid)['schema']
    nodetypes = { n['name']:n for n in schema['nodetypes'] }
    edgetypes = { e['name']:e for e in schema['edgetypes'] }

    print nodetypes
    print edgetypes

    idx = {}

    if args.wait :
        raw_input("press <enter> key to start edges and nodes importation") 

    if args.seed: 

        def set_node(v):
            if v['label'] not in idx:
               node = bot.post_node(gid, node_payload(v, nodetypes['word']['uuid']))
               idx[ v['label'] ] = node['uuid']
               print "inserting %s %s" % (v['label'] , node['uuid'])

        idx = {}
        v1 = None

        # seeds grow into beautiful flowers 
        
        while graph.vcount() > 0:
            
            v1 = graph.vs[0] if v1 is None else v1
            
            size = graph.vcount()

            nei = v1.neighbors()
            if not len(nei):
                graph.delete_vertices([v1.index])
                v1 = None
                continue

            for i in range( min([5,len(nei)]) ):

                nei = v1.neighbors()
                
                if i >= len(nei):
                    if graph.vcount():
                        r = randint(0,graph.vcount()-1)
                        v1 = graph.vs[r]
                    break
                
                r = randint(0,len(nei)-1)
                v2 = nei[r]
                
                print "inserting edge %s %s" % (v1['label'] , v2['label'])

                set_node(v1)
                set_node(v2)

                eid = graph.get_eid(v1.index, v2.index)
                src, tgt = idx[v1['label']], idx[v2['label']]
                
                uuid = bot.post_edge(gid, edge_payload(edgetypes['is_syn']['uuid'], src, tgt, {}))

                # delete  from graph
                # * inserted edges
                # * nodes with no more edges 
                
                graph.delete_edges([eid])

                delete_nodes =  [ v.index for v in (v1, v2) if len(graph.neighbors(v)) == 0 ]

                if len(delete_nodes):
                    graph.delete_vertices(delete_nodes)
                    
                    if graph.vcount():
                        r = randint(0,graph.vcount()-1)
                        # switch v1
                        v1 = graph.vs[r]

                    else: break

            # wait sometimes
            pause(args.pause)

            
            
            
    else :

        print "posting nodes"
        count = 0
        fail = 0
        for node, uuid in bot.post_nodes( gid, gen_nodes(graph, nodetypes['word']['uuid']) ):
            if not uuid:
                fail += 1
            else :
                count += 1
                idx[node['properties']['label']] = uuid
            
        print "%s nodes inserted " % count
        
        #print "iterate over nodes"
        #for node in bot.find_all_nodes(gid, "word", {}):
            #pass

        # post edges
        print "posting edges"
        count = 0
        fail = 0

        inv_idx = { v:k for k,v in idx.iteritems() }
        
        for obj, uuid in bot.post_edges( gid, gen_edges(graph, edgetypes['is_syn']['uuid'], idx) ):
            if not uuid:
                fail += 1
            else :
                count += 1

            # wait sometimes    
            pause(args.pause)
            
        print "%s edges inserted, %s failed " % (count, fail)
Esempio n. 14
0
def main():
    """ re-Index all the Proxteam corpus """
    parser = argparse.ArgumentParser()
    
    parser.add_argument("--host", action='store', help="host", default="http://localhost:5000")
    parser.add_argument("--key" , action='store', help="key", default=None)
    parser.add_argument("--username" , action='store', help="user", default=None)
    parser.add_argument("--password" , action='store', help="pwd", default=None)
    parser.add_argument("--gid", action='store', help="graph id", default=None)
    parser.add_argument("files", action='store', default=None, nargs='+')

    args = parser.parse_args()

    print " analysing {}".format("".join(args.files))

    # Bot creation & login 
    bot = Botagraph(args.host, args.key)

    if args.username and args.password:
        bot.authenticate(args.username, args.password)
    
    gid = args.gid
    if not bot.has_graph(gid) :
        print "create graph %s" % gid
        bot.create_graph(gid, "no description")
        print "create node type %s" % "file"
        props = { "label": Text()}
        bot.post_nodetype(gid, "file",  "no description", props)
        bot.post_nodetype(gid, "message",  "no description", props)
        print "create edge type %s" % "is_syn"
        bot.post_edgetype(gid, "listen", "no desc", {})
        bot.post_edgetype(gid, "trigger", "no desc", {})

    print "Get schema '%s'" % gid
    schema = bot.get_schema(gid)['schema']
    nodetypes = { n['name']:n for n in schema['nodetypes'] }
    edgetypes = { e['name']:e for e in schema['edgetypes'] }
    nodetype_file = nodetypes['file']['uuid']
    nodetype_message = nodetypes['message']['uuid']
    edgetype_listen = edgetypes['listen']['uuid']
    edgetype_trigger = edgetypes['trigger']['uuid']
   
    vs = {}
    es = []

    for js_file in args.files:
        if js_file.endswith('min.js'):
            #ignore minified js
            continue
        with codecs.open(js_file, 'r', 'utf8') as FILE:
            js_file = re.sub('^[./]+', '', js_file)
            vs[js_file] = {'label': js_file, 'nodetype': nodetype_file}

            for l in FILE:
                #remove comments:
                l = l.strip().split('//',1)[0]
                m = re.search(r'listenTo\([^"]*"([a-z-]+)"', l)
                if m:
                    message = m.group(1)
                    if message not in vs:
                        vs[message] = {'label': message, 'nodetype': nodetype_message}
                    es.append((js_file, edgetype_listen, message))
                m = re.search(r'trigger\([^"]*"([a-z-]+)"', l)
                if m:
                    message = m.group(1)
                    if message not in vs:
                        vs[message] = {'label': message, 'nodetype': nodetype_message}
                    es.append((js_file, edgetype_trigger, message))

    idx = {}
    print "posting nodes"
    count = 0
    fail = 0
    for node, uuid in bot.post_nodes( gid, gen_nodes(vs) ):
        if not uuid:
            fail += 1
        else :
            count += 1
            idx[node['properties']['label']] = uuid
        
    print "%s nodes inserted " % count

    
    #print "iterate over nodes"
    #for node in bot.find_all_nodes(gid, "word", {}):
        #pass#print node

    # post edges
    print "posting edges"
    count = 0
    fail = 0

    inv_idx = { v:k for k,v in idx.iteritems() }
    
    for _, uuid in bot.post_edges(gid, gen_edges(es, idx)):
        if not uuid:
            fail += 1
        else :
            count += 1
        #print "%s [ %s -- %s --> %s ] " % ( uuid, inv_idx.get(obj['source'], None) , "syn", inv_idx.get(obj['target'], None) )
    print "%s edges inserted, %s failed " % (count, fail)