Ejemplo n.º 1
0
def save(G, fname):
    json.dump(dict(nodes=[[n, G.node[n]] for n in G.nodes()],
                   edges=[[u, v, G.edge[u][v]] for u, v in G.edges()]),
              open(
                  fname,
                  'w',
              ),
              indent=2)
Ejemplo n.º 2
0
    def wikilinks_graph(self):
        """
        Generate a wikilinks graph using networkx
        :rtype: Graph
        """
        import tempfile
        from networkx.readwrite import json_graph
        import networkx as nx
        import re
        import requests

        tmpdir = tempfile.gettempdir()
        graph_object = tmpdir + '/' + str(self.id) + '.wikilinks.json'

        def _get_links(ngram):
            ngram_links = json.loads(requests.get(template_query.format(ngram)).text)
            try:
                ngram_links = ngram_links['query']['pages'].values()[0]['links']
            except KeyError:
                return []
            ngram_links = [re.sub(r' \(.+\)', '', link['title'].lower()) for link in ngram_links]
            ngram_links = set([ngram for ngram in ngram_links if len(ngram.split()) > 1])
            return ngram_links

        if not os.path.exists(graph_object):
            graph = nx.Graph()
            links_dict = {}
            template_query = u'http://en.wikipedia.org/w/api.php?action=query&titles={0}&prop=links&plnamespace=0&pllimit=500&format=json'
            article_ngrams = list(self.articlecollocation_set.values_list('ngram', flat=True))
            for i, ngram1 in enumerate(article_ngrams):
                if ngram1 in links_dict:
                    ngram1_links = links_dict[ngram1]
                else:
                    ngram1_links = _get_links(ngram1)
                    links_dict[ngram1] = ngram1_links
                for j in range(i+1, len(article_ngrams)):
                    ngram2 = article_ngrams[j]
                    if ngram2 in links_dict:
                        ngram2_links = links_dict[ngram2]
                    else:
                        ngram2_links = _get_links(ngram2)
                        links_dict[ngram2] = ngram2_links
                    if ngram1 in ngram2_links or ngram2 in ngram1_links:
                        graph.add_edge(ngram1, ngram2)
            json_graph.dump(graph, open(graph_object, 'w'))
            return graph

        else:
            graph = json_graph.load(open(graph_object))
            return graph
Ejemplo n.º 3
0
def generate_local_instance(edge_type='prior_nodes'):
	
	#massive database call. Iterate through this if the database gets too large
	objs=Objecttype.objects.all()
	G=nx.DiGraph()
	
	i=0
	#generate subgraphs for each node (along with metadata) and merge them    
	for each in objs:       
		sub_G=nbh_subgraph(each, edge_type)
		G.add_edges_from(sub_G.edges())
		G.add_nodes_from(zip(sub_G.node.keys(),sub_G.node.values()))        
		print i
		i=i+1

	#serialize data into json format and save locally
	g_json = json_graph.node_link_data(G) # node-link format to serialize
	json_graph.dump(g_json, open("static/local_instance.json",'w'))
Ejemplo n.º 4
0
def consumer(q):
    while not q.empty():
        module = open(q.get())
        try:
            this_module = get_module(module)
            if not filter_on_package('com.nytimes',this_module):
                continue
            graph.add_node(this_module)
            imports = map(get_package_from_line,filter(is_import_line,module))
            for impt in imports:
                if filter_on_package('com.nytimes',impt):
                    graph.add_node(impt)
                    graph.add_edge(this_module,impt)
        except StopIteration:
            print module.name

    dc = nx.degree_centrality(graph)
    nx.set_node_attributes(graph,'degree_cent',dc)
    print json_graph.dump(graph,open('graph.json','w'))
Ejemplo n.º 5
0
        nx.draw_networkx_nodes(g, pos, nodelist=inds, node_color='gray', node_size=map(node_size, inds))
        nx.draw_networkx_edges(g, pos, alpha=0.05)
        plt.show()
    else:
        # TODO: Create a script that compiles all external files into the
        #       govtrack file (see virtualenv for details on how to do this).
        #       With that done, create all of the HTML, CSS, and JS files on
        #       invocation and write them to a temp directory.

        # Serialize the graph to the network.json file
        pwd = os.path.dirname(os.path.realpath(__file__))
        browser_dir = os.path.join(pwd, 'browser')
        network_file = os.path.join(browser_dir, 'js', 'network.json')
        with open(network_file, 'w') as fout:
            if args.resize is not None:
                g.graph['resize'] = args.resize
            json_graph.dump(g, fout)

        # Switch to the browser directory and start up a simple HTTP server
        os.chdir(browser_dir)
        Server = type('Server', (TCPServer, object), {'allow_reuse_address': True})
        httpd = Server(("", 8080), SimpleHTTPRequestHandler)
        p = Process(target=httpd.serve_forever)
        p.start()
        webbrowser.open("http://localhost:8080")
        try:
            print "Press Ctrl-c to quit..."
            p.join()
        except KeyboardInterrupt:
            p.terminate()
Ejemplo n.º 6
0
def files_to_json(
        chosen_pred,
        clasp_filename,
        clasp_is_timestamped,
        clasp_is_optimizing,
        cost_pred,
        gringo_text_filename,
        is_directed,
        json_data_filename,
        json_time_filename,
        json_soln_filename
    ):

    edge_id_map = None
    costs       = None

    # If "gringo -t" output is given, then we use that.
    if gringo_text_filename and cost_pred:
        costs = parse_costs_from_gringo_text(
            cost_pred, gringo_text_filename
        )
    # Otherwise, if we do not have the graph information, default to
    # using an undirected fully connected graph (ie. complete graph).
    # We parse the node names from the clasp answer file.
    else:
        nodes = parse_nodes_from_solution_file(
            chosen_pred, clasp_filename, clasp_is_timestamped
        )
        nodes, edges, costs = create_complete_graph(nodes)
        is_directed = False

    # Just a check that nothing went wrong...
    if costs == None or len(costs) == 0:
        return (False, 'Costs can not be None or length 0')

    # Create a dictionary that, when converted to JSON, is compatible with
    # the vis.js javascript library.
    visjs_json_dict, edge_id_map = create_visjs_dict(
        costs, is_directed
    )

    # Create two assisiting dictionaries:
    # - timing the animation: the time differences between answers
    # - edge sets: which edges belong to which answer
    # These dictionaries are also converted to JSON later.
    timing_dict, answer_sets_dict = create_timing_and_answer_set_dicts(
        chosen_pred, clasp_filename,
        clasp_is_timestamped, clasp_is_optimizing, edge_id_map
    )

    # Create directories for the JSON files, if they do not exist.
    mkpath(os.path.dirname(json_data_filename))
    mkpath(os.path.dirname(json_time_filename))
    mkpath(os.path.dirname(json_soln_filename))

    # Save the JSON files.
    # Write the graph to a JSON file.
    with open(json_data_filename, 'w') as fh:
        nxjson.dump(visjs_json_dict, fh, indent=2)
    # Write the timings to a JSON file.
    with open(json_time_filename, 'w') as fh:
        nxjson.dump(timing_dict, fh, indent=2)
    # Write the solutions to a JSON file.
    with open(json_soln_filename, 'w') as fh:
        nxjson.dump(answer_sets_dict, fh, indent=2)

    return (True, 'Success')
Ejemplo n.º 7
0
def save(G, fname):
	json.dump(dict(nodes=[[n, G.node[n]] for n in G.nodes()],
                   edges=[[u, v, G.edge[u][v]] for u,v in G.edges()]),
              open(fname, 'w',), indent=2)
Ejemplo n.º 8
0
def main():
    LOGGER.setLevel(logging.DEBUG)
    APILOGGER.setLevel(logging.DEBUG)
    parser = argparse.ArgumentParser()
    parser.add_argument('seed', metavar='U', type=str, nargs='+',\
            help='seed users')
    parser.add_argument('--saved', dest='savefile', metavar='SAVED', type=str,\
            help='saved progress .json', default='')
    args = parser.parse_args()
    oauth = {}
    with open('oauth.json') as raw:
        oauth = json.load(raw)
        
    tapi = api.Wrapper(oauth['access'], oauth['accessSecret'],\
            oauth['consumer'], oauth['consumerSecret'])
    if args.savefile:
        with open(args.savefile) as saved:
            graph = nxjson.load(saved)
    else:
        graph = nx.DiGraph()
        seed = set(map(int, args.seed))
        users = seed.union(*[tapi.followers(acct) for acct in seed])
        graph.add_nodes_from(users)
 
    users = graph.nodes()
    progress = 0
    total = 0
    for user in users:
        total += 1
        if graph.node[user].get('username'):
            continue
        info = tapi.info(user)
        if not info.get('screen_name') or info.get('protected'):
            graph.remove_node(user)
            continue
        progress += 1
        LOGGER.debug('Adding user %s aka @%s', user, info.get('screen_name'))
        graph.add_node(user, name=info.get('name',''), location=info.get('location',''),\
                followers=info.get('followers_count',0), lang=info.get('lang',''),\
                following=info.get('friends_count',0), username=info.get('screen_name',''),\
                protected=info.get('protected'))
        if progress == 100:
            progress = 0
            with open('tmp.json', 'w') as garph:
                nxjson.dump(graph, garph)
            LOGGER.info('Saved info for %s/%s users', total, len(users))

    LOGGER.info('Info collected')
    with open('garph.json', 'w') as garph:
        nxjson.dump(graph, garph)
    nodeSet = set(graph.nodes())
    users = graph.nodes() # Removed some nodes earlier, can't try to access them
    progress = 0

    while users: # for some reason no edges to seed, but they are in users because we got their data
        user = users.pop()
        if graph.node[user].get('complete'):
            continue
        graph.node[user]['complete'] = True
        try:
            followers = tapi.followers(user)
        except api.NoDataError:
            continue
        relevant = followers & nodeSet
        graph.node[user]['follower_ids'] = ','.join(map(str, followers))
        LOGGER.debug('Adding edges for user %s aka @%s', user, graph.node[user]['username'])
        graph.add_edges_from([(follower, user) for follower in relevant])
        progress += 1
        if progress == 5:
            progress = 0
            with open('tmp.json', 'w') as garph:
                nxjson.dump(graph, garph)
            LOGGER.info('Saved graph with %s user edgesets remaining', len(users))

    with open('garph.json', 'w') as garph:
        nxjson.dump(graph, garph)
    nx.write_gexf(graph, 'garph.gexf')
    LOGGER.info('Edges collected')
    LOGGER.info('Job complete')
Ejemplo n.º 9
0
    def dbpedia_graph(self, redirects=True):
        """
        Generate a dbpedia category TREE using networkx
        :rtype: nx.Graph
        """
        import tempfile
        import requests
        from networkx.readwrite import json_graph
        tmpdir = tempfile.gettempdir()
        if redirects:
            graph_object = tmpdir + '/' + str(self.id) + 'redirects.' + '.dbpedia.json'
        else:
            graph_object = tmpdir + '/' + str(self.id) + '.dbpedia.json'
        if not os.path.exists(graph_object):

            stop_uris_set = open(settings.ABS_PATH('stop_uri.txt')).read().split()
            stop_uris_set = set([x.split('/')[-1] for x in stop_uris_set])

            def recurse_populate_graph(resource, graph, depth):
                if resource in stop_uris_set:
                    return
                if depth == 0:
                    return
                if 'Category' in resource:
                    query = u'SELECT ?broader, ?related, ?broaderof WHERE' \
                            u' {{{{ <http://dbpedia.org/resource/{0}> skos:broader ?broader }}' \
                            u' UNION {{ ?broaderof skos:broader <http://dbpedia.org/resource/{0}> }}' \
                            u' UNION {{ ?related skos:related <http://dbpedia.org/resource/{0}> }}' \
                            u' UNION {{ <http://dbpedia.org/resource/{0}> skos:related ?related }}}}'.format(resource)

                    results = []
                    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
                    sparql.setReturnFormat(JSON)
                    sparql.setQuery(query)
                    results.extend(sparql.query().convert()['results']['bindings'])
                    for result in results:
                        for rel_type, value in result.iteritems():
                            uri = value['value']
                            parent_resource = uri.split('/')[-1]
                            #print '  ' * (3 - depth), resource, '->', parent_resource
                            graph.add_edge(resource, parent_resource, type=rel_type)
                            recurse_populate_graph(parent_resource, graph, depth-1)
                else:
                    if resource == 'cumulative gain':
                        resource = 'Discounted_cumulative_gain'
                    elif resource == 'world wide web conference':
                        resource = 'International_World_Wide_Web_Conference'
                    wiki_cat_query = u'http://en.wikipedia.org/w/api.php?action=query&titles={0}&prop=categories&cllimit=50&clshow=!hidden&format=json&redirects'
                    results = json.loads(requests.get(wiki_cat_query.format(resource)).text)['query']['pages'].values()[0]
                    if 'missing' in results:
                        results = json.loads(requests.get(wiki_cat_query.format(resource.title())).text)['query']['pages'].values()[0]
                        if 'missing' in results:
                            print results, resource
                            results = []
                        else:
                            results = [c['title'].replace(' ', '_') for c in results['categories']]
                    else:
                        results = [c['title'].replace(' ', '_') for c in results['categories']]
                    rel_type = "subject"
                    for parent_resource in results:
                        #print '  ' * (3 - depth), resource, '->', parent_resource
                        graph.add_edge(resource, parent_resource, type=rel_type)
                        recurse_populate_graph(parent_resource, graph, depth-1)

            import networkx as nx
            from SPARQLWrapper import SPARQLWrapper, JSON

            graph = nx.Graph()
            ngrams = set(self.articlecollocation_set.values_list('ngram', flat=True))
            ngrams = self.CollocationModel.COLLECTION_MODEL.objects.filter(ngram__in=ngrams)
            for ngram in ngrams:
                if 'dbpedia' in ngram.source or (redirects and 'wiki_redirect' in ngram.source):
                    recurse_populate_graph(ngram.ngram, graph, 2)

            json_graph.dump(graph, open(graph_object, 'w'))
        else:
            graph = json_graph.load(open(graph_object))
        return graph
Ejemplo n.º 10
0
                advisor_id = match.split('id=')[1].split('"')[0]
                current = current.split(match)[1]
                advisor_name = current.split('</a>')[0]

                advisors[advisor_id] = advisor_name

    author_id = name + author_id
    if not G.has_node(author_id):
        G.add_node(author_id, name=name)
    for advisor in advisors:
        advisor_id = advisors[advisor] + advisor
        if not G.has_node(advisor_id):
            G.add_node(advisor_id, name=advisors[advisor])
        if not G.has_edge(advisor_id, author_id):
            G.add_edge(advisor_id, author_id)

    done = (i / total_files) * 100
    if i % 300 == 0:
        print done

print 'Graph created'
print 'Nodes:'
print len(G.nodes())
print 'Edges:'
print len(G.edges())
print 'Writing file...'
nx.write_gexf(G, './data/genealogy.gexf')
with open('./data/genealogy.json', 'w') as outfile:
    json_graph.dump(G, outfile)
nx.write_edgelist(G, './data/genealogy.csv')
print 'done'
Ejemplo n.º 11
0
def save_output_graph(contacts, residues, residues_info, prg, options):
    descriptions_dict = load_json(options.descriptions_dict)
    g = nx.MultiDiGraph()
    for r in residues:
        r_id = ""
        if options.dont_normalize:
            r_id += r.get_parent().get_id()
        r_id += str(r.get_id()[1])
        resname = r.resname.strip()
        kwargs = {'resname': resname}
        if residues_info.has_key(r_id):
            kwargs['conf'] = residues_info[r_id]['conf']
        g.add_node(r_id, **kwargs)
    if prg == '--':
        edge_type = 'dist'
    else:
        edge_type = 'contact'
    all_nodes = set(g.nodes())
    for (num1, num2), (r1, r2), desc in contacts:
        tmp_desc = desc
        if prg == '--':
            short_desc = 'close-doublet'
        else:
            if prg == 'MC':
                tmp_desc = re.sub("_\d+$", "", tmp_desc)
            elif prg == 'FR':
                if re.match('^n', tmp_desc):
                    tmp_desc = ""
                    desc = ""
            short_desc = descriptions_dict[prg].get(tmp_desc, 'UNK_SHORT_DESC')
        if prg == "RV":
            # RNA-view uppercase all chains, so we should check the lowercase version
            _num1 = num1[0].lower() + num1[1:]
            _num2 = num2[0].lower() + num2[1:]
            if num1 not in all_nodes and _num1 in all_nodes:
                num1 = _num1
            if num2 not in all_nodes and _num2 in all_nodes:
                num2 = _num2
        n_type = r1 + r2
        if desc != "":
            g.add_edge(num1,
                       num2,
                       type=edge_type,
                       prg=prg,
                       desc=short_desc,
                       full_desc=desc,
                       n_type=n_type)
        if prg != 'FR':
            if prg == '--':
                rev_short_desc = 'close-doublet'
            else:
                rev_short_desc = reverse_desc(short_desc)
            g.add_edge(num2,
                       num1,
                       type=edge_type,
                       prg=prg,
                       desc=rev_short_desc,
                       full_desc="REV:" + desc,
                       reverse=True,
                       n_type=n_type[::-1])
    if re.match(r"^.*\.gz$", options.output_graph):
        f = gzip.open(options.output_graph, "w")
    else:
        f = open(options.output_graph, "w")
    json_graph.dump(g, f, indent=2)
    f.close()
Ejemplo n.º 12
0
def save_to_jsonfile(graph, filename):
    g=graph
    g_json=json_graph.node_link_data(g)
    json_graph.dump(g_json, open(filename,'w'))