Exemple #1
0
def dump(nodes, weighted_edges, white, black, flatten_colours,
         flatten_weights):
    graph = DiGraph()
    for node in nodes:
        graph.add_node(node.id, label=node.name)
    (min_weight, max_weight) = log_range('Weight',
                                         map(lambda we: we[0], weighted_edges))
    if flatten_colours or flatten_weights:
        flattened_weights = flatten(
            dict((weight, weight) for (weight, edge) in weighted_edges))
    for (weight, edge) in weighted_edges:
        colour = flattened_weights[weight] if flatten_colours else (
            weight - min_weight) / (max_weight - min_weight)
        weight = flattened_weights[weight] if flatten_weights else weight
        graph.add_edge(edge.from_.id,
                       edge.to_.id,
                       weight=weight,
                       viz=rgb(colour, white, black))
    #write_graphml(graph, 'uykfe.graphml')
    write_gexf(graph, 'uykfe.gexf')
    # fix namespace bug
    with open('uykfe.gexf') as input:
        xml = input.read()
    xml = xml.replace('xmlns:viz="http://www.gexf.net/1.1draft/viz" ', '', 1)
    with open('uykfe.gexf', 'w') as output:
        xml = output.write(xml)
    def __init__(self, graph, outfile):
        self.outfile = outfile
        self.graph = graph

        self.network = nx.MultiDiGraph()
        edge_labels = set()

        # iterate through rdf triples and add to the network graph
        # NOTE: could also iterate through the graph by contexts...
        for cx in self.graph.contexts():
            for triple in cx.triples((None, None, None)):
                subj, pred, obj = triple

                # NOTE: skipping rdf sequences here because treating
                # as normal triples makes for weird results
                if pred == rdflib.RDF.first or pred == rdflib.RDF.rest:
                    continue

                # make sure subject and object are added to the graph as nodes,
                # if appropriate
                self._add_nodes(triple)

                # get the short-hand name for property or edge label
                name = self._edge_label(pred)

                # if the object is a literal, add it to the node as a property of the subject
                if subj in self.network and isinstance(obj, rdflib.Literal) \
                  or pred == rdflib.RDF.type:
                    if pred == rdflib.RDF.type:
                        ns, val = rdflib.namespace.split_uri(obj)
                        # special case (for now)
                        if val == 'Manuscript' and isinstance(cx.value(subj, rdfns.DC.title), rdflib.BNode):
                            val = 'BelfastGroupSheet'
                    else:
                        val = unicode(obj)

                    self.network.node[self._uri_to_node_id(subj)][name] = normalize_whitespace(val)

                # otherwise, add an edge between the two resource nodes
                else:
                    # NOTE: gephi doesn't support multiple edges, and
                    # the d3/json output probably elides them also.
                    # Consider instead: if an edge already exists,
                    # add to the strength of the exesting edge
                    edge_labels.add(name)
                    self.network.add_edge(self._uri_to_node_id(subj),
                        self._uri_to_node_id(obj),
                            label=name,
                            weight=connection_weights.get(name, 1))


        print '%d nodes, %d edges in full network' % \
            (self.network.number_of_nodes(), self.network.number_of_edges())

        # TODO: useful for verbose output? (also report on relations with no weight?)
        #print 'edge labels: %s' % ', '.join(edge_labels)

        gexf.write_gexf(self.network, self.outfile)
def full_gexf(request):
    """Generate the same Belfast Group network data exposed in :meth:`full_js`
    in the GEXF format, for download and use in tools like Gephi."""
    graph = _network_graph()
    buf = StringIO()
    gexf.write_gexf(graph, buf)
    response = HttpResponse(buf.getvalue(), content_type="application/gexf+xml")
    response["Content-Disposition"] = "attachment; filename=belfastgroup.gexf"
    return response
    def __init__(self, files, outfile):
        self.outfile = outfile

        self.graph = rdflib.Graph()
        for infile in files:
            self.graph.parse(infile)
        print '%d triples in %d files' % (len(self.graph), len(files))

        self.network = nx.MultiDiGraph()
        edge_labels = set()

        # iterate through rdf triples and add to the graph
        for triple in self.graph:
            subj, pred, obj = triple

            if pred == rdflib.RDF.first or pred == rdflib.RDF.rest:
                continue
            # FIXME: iterating through all triples results in
            # rdf sequences (first/rest) being handled weirdly...

            # make sure subject and object are added to the graph as nodes,
            # if appropriate
            self._add_nodes(triple)

            # get the short-hand name for property or edge label
            name = self._edge_label(pred)

            # if the object is a literal, add it to the node as a property of the subject
            if subj in self.network and isinstance(obj, rdflib.Literal) \
               or pred == rdflib.RDF.type:
                if pred == rdflib.RDF.type:
                    ns, val = rdflib.namespace.split_uri(obj)
                    # special case (for now)
                    if val == 'Manuscript':
                        if isinstance(self.graph.value(subj, DC.title), rdflib.BNode):
                            val = 'BelfastGroupSheet'

                else:
                    val = unicode(obj)
                self.network.node[subj][name] = val

            # otherwise, add an edge between the two resource nodes
            else:
                edge_labels.add(name)
                self.network.add_edge(subj, obj, label=name,
                                      weight=connection_weights.get(name, 1))

        print '%d nodes, %d edges' % (self.network.number_of_nodes(),
                                      self.network.number_of_edges())

        # TODO: useful for verbose output? (also report on relations with no weight?)
        #print 'edge labels: %s' % ', '.join(edge_labels)

        gexf.write_gexf(self.network, self.outfile)
Exemple #5
0
def write_data(maindir, nxgraph):
    """
    Writes data from networkx graph to .json and .gexf file formats for
    rendering by other apps.
    :param maindir: Scrape directory, with subdirectories for each domain
    :param nxgraph: The networkx graph to be populated
    :return: None
    """
    data = json_graph.node_link_data(nxgraph)   # json formatted data
    filename = maindir.split('/')[-2]  # just the Scrape-directory filename
    os.chdir(start)  # Back to /Grapher
    with open('%s_graph.json' % filename, 'w') as w:
        json.dump(data, w)
    gexf.write_gexf(nxgraph, '%s_graph.gexf' % filename)
def gexf_content(request, mode):
    """Make network data available as GEXF files for download and use in
    tools like Gephi."""
    if mode == "all":
        graph = network_data()
    elif mode == "group-people":
        # filtered graph of people/places/organizations used for
        # first BG network graph displayed on the site
        # - same data used in :meth:`full_js`
        graph = _network_graph()
    elif mode == "groupsheets":
        graph = gexf.read_gexf(settings.GEXF_DATA["bg1"])

    buf = StringIO()
    gexf.write_gexf(graph, buf)
    response = HttpResponse(buf.getvalue(), content_type="application/gexf+xml")
    response["Content-Disposition"] = "attachment; filename=belfastgroup-%s.gexf" % mode
    return response
Exemple #7
0
def export_graph(graph, save_path=_get_module_root_dir(), no_verbose=False):
    if not save_path or not graph:
        if not no_verbose:
            print('Failed to save graph:\nsave_path =', save_path, '\ngraph =',
                  graph)
        return
    if not save_path.endswith('/'):
        save_path += '/'
    graph = _ensure_right_type(graph)
    if graph is None:
        if not no_verbose:
            print(
                'Failed to export graph. graph should be a dict or networkx.classes.digraph.DiGraph',
                graph)
        return
    try:
        gexf.write_gexf(graph, save_path + 'graph.gexf')
    except IOError as e:
        if not no_verbose:
            print('Failed to export graph: ', e.reason)
def draw_graph(previous_samples, results, session):
    g = nx.Graph()
    g.add_nodes_from(previous_samples)
    labels = {}
    for sha256, analysis_id, file_name in results:
        response = session.get(
            BASE_URL +
            '/analyses/{}/sub-analyses/root/code-reuse'.format(analysis_id))
        response.raise_for_status()
        gene_count = response.json()['gene_count']
        labels[sha256] = '{} ({})'.format(file_name, gene_count)

    for sha256, (related_samples) in previous_samples.items():
        for analysis in related_samples:
            if analysis['analysis']['sha256'] in previous_samples:
                g.add_edge(sha256,
                           analysis['analysis']['sha256'],
                           gene_count=analysis['reused_genes']['gene_count'])

    gexf.write_gexf(g, 'output.gexf')
    print('graph was saved as output.gexf')
Exemple #9
0
def dump(nodes, weighted_edges, white, black, flatten_colours, flatten_weights):
    graph = DiGraph()
    for node in nodes:
        graph.add_node(node.id, label=node.name)
    (min_weight, max_weight) = log_range('Weight', map(lambda we: we[0], weighted_edges))
    if flatten_colours or flatten_weights:
        flattened_weights = flatten(dict((weight, weight) for (weight, edge) in weighted_edges))
    for (weight, edge) in weighted_edges:
        colour = flattened_weights[weight] if flatten_colours else (weight - min_weight) / (max_weight - min_weight)
        weight = flattened_weights[weight] if flatten_weights else weight
        graph.add_edge(edge.from_.id, edge.to_.id, 
                       weight=weight, 
                       viz=rgb(colour, white, black))
    #write_graphml(graph, 'uykfe.graphml')
    write_gexf(graph, 'uykfe.gexf')
    # fix namespace bug
    with open('uykfe.gexf') as input:
        xml = input.read()
    xml = xml.replace('xmlns:viz="http://www.gexf.net/1.1draft/viz" ', '', 1)
    with open('uykfe.gexf', 'w') as output:
        xml = output.write(xml)
def export_gexf(G, path):
    gexf.write_gexf(G, path)    
Exemple #11
0
def write(fh, graph, format=None):
    from networkx.readwrite import gexf
    gexf.write_gexf(graph, fh)
    def __init__(self, graph, outfile):
        self.outfile = outfile
        self.graph = graph

        self.network = nx.Graph()
        for bg in self.bg_nodes:
            self.network.add_node(bg, label=bg, type='Organization')
        # assert the two phases are connected to each other:
        # self.edge_weights[(self.bg_nodes[0], self.bg_nodes[1])] += 1

        ms = set(list(graph.subjects(predicate=rdflib.RDF.type, object=rdfns.BG.GroupSheet)))

        for m in ms:
            coverage = graph.value(subject=m, predicate=rdfns.DC.coverage)
            bg_period = '%s, %s' % (self.bg_label, coverage)
            if bg_period not in self.bg_nodes:
                print 'Error: coverage %s doesn\'t map to a recognized Belfast Group period' % coverage
                continue

            authors = list(graph.objects(subject=m, predicate=rdfns.DC.creator))
            for i, a in enumerate(authors):
                author_id = str(a)  # stringify author uri
                # if not in the network, add it
                if author_id not in self.network:
                    name = None
                    # use preferred label instead if possible
                    names = graph.preferredLabel(a)
                    # returns list of labelprop (preflabel or label), value
                    # if we got any matches, grab the first value
                    if names:
                        name = names[0][1]
                    if not name:
                        name = graph.value(a, rdfns.SCHEMA_ORG.name)

                    self.network.add_node(author_id,
                        # label=graph.value(a, rdfns.SCHEMA_ORG.name),
                        label=name,
                        type='Person')
                # increase connection weight by one for each groupsheet
                self.edge_weights[(author_id, bg_period)] += 0.4

                # make connection between co-authors
                if len(authors) > (i + 1):
                    for co_author in authors[i+1:]:
                        self.edge_weights[(author_id, str(co_author))] += 0.2

            # groupsheet owners are also associated with the group of the same period
            # and the groupsheet authors
            owners = list(graph.subjects(predicate=rdfns.SCHEMA_ORG.owns, object=m))

            for i, o in enumerate(owners):
                # same basic logic as for owners
                owner_id = str(o)
                if owner_id not in self.network:
                    # use preferred label if available; otherwise, use name
                    names = graph.preferredLabel(o)
                    # returns list of labelprop (preflabel or label), value
                    # if we got any matches, grab the first value
                    if names:
                        name = names[0][1]

                    if not name:
                        name = graph.value(o, rdfns.SCHEMA_ORG.name)
                    self.network.add_node(owner_id,
                        label=name,
                        type='Person')
                # increase connection weight by one for each groupsheet
                self.edge_weights[(owner_id, bg_period)] += 0.2

                # connected to groupsheet authors
                for auth in authors:
                    self.edge_weights[(owner_id, str(auth))] += 0.2
                # connected to other groupsheet owners
                if len(owners) > (i + 1):
                    for co_owner in owners[i+1:]:
                        self.edge_weights[(owner_id, str(co_owner))] += 0.2


        # convert dict into list of tuple that can be easily added to the network graph
        edge_bunch = [(s, t, w) for (s, t), w in self.edge_weights.iteritems()]
        self.network.add_weighted_edges_from(edge_bunch)

        print '%d nodes, %d edges in Belfast Group network based on groupsheets' \
            % (self.network.number_of_nodes(), self.network.number_of_edges())

        gexf.write_gexf(self.network, self.outfile)
def save_graph(G, filepath):
    gexf.write_gexf(G, filepath)
Exemple #14
0
 def export_to_gexf(self, filename=None):
     from networkx.readwrite.gexf import write_gexf
     filename = filename or (self.filename + '.gexf')
     write_gexf(self.dep_graph, filename)
Exemple #15
0
 def export_to_gexf(self,fname):
     write_gexf(self.G,fname)
Exemple #16
0
 def export_to_gexf(self, fname):
     write_gexf(self.G, fname)
Exemple #17
0
 def export_to_gexf(self, filename=None):
     from networkx.readwrite.gexf import write_gexf
     filename = filename or (self.filename + '.gexf')
     write_gexf(self.dep_graph, filename)
Exemple #18
0
    # Merge duplicate author names
authors = [(author, HumanName(author)) for author in solar_coauthorship.nodes]
lnfi = defaultdict(list)  # Last name, first initial
for author, parsed in authors:
    if len(parsed.first) > 0:
        lnfi[parsed.last, parsed.first[0]].append((author, parsed))

for (last_name, first_initial), group in lnfi.items():
    # Filter non-initials only
    filtered_group = [(author, parsed) for author, parsed in group
                      if len(parsed.first.rstrip(".")) > 1]
    if len(filtered_group) >= 1:
        # Sort by descending degree
        sorted_group = sorted(
            filtered_group,
            key=lambda g: nx.degree(solar_coauthorship, g[0]),
            reverse=True,
        )
        first_name = sorted_group[0][1].first
        for alt_author, alt_parsed in sorted_group[1:]:
            if SequenceMatcher(None, first_name,
                               alt_parsed.first).ratio() >= 0.8:
                alt_parsed.first = first_name
            else:
                print(
                    f"Warning: Multiple first name candidates for last name "
                    f"{last_name} and first initial {first_initial}: {first_name} "
                    f"and {alt_parsed.first}")

    write_gexf(solar_coauthorship, "solar/solar_coauthorship.gexf.gz")
Exemple #19
0
G_start = G_betweenness_gt_0.copy() 



df_decompose_g = pd.DataFrame({
    # 'iteration': [],|
    # 'tag': [], 
    # 'betweeness': [], 
    # 'nodes': [], 
    # 'edges': [], 
    # 'filename': [] 
    })

df_decompose_g = df_decompose_g.append(make_row(-1, 'start', 0, G_start), ignore_index=True)

gexf.write_gexf(G_start, df_decompose_g.iloc[0, :]['filename'])
# %%

k = 500
for i in tqdm.tqdm(range(k)):
    temp_betweenness_df = df_from_betweeness(G_start)
    tag_str, betweenness_value = temp_betweenness_df.iloc[0, :].tag, temp_betweenness_df.iloc[0, :].betweenness
    df_decompose_g = df_decompose_g.append(make_row(i, tag_str, betweenness_value, G_start), ignore_index=True)
    print(f"{i:02d}-{tag_str}: {betweenness_value} >>>> {df_decompose_g.iloc[i+1, :]['filename']}")
    G_start.remove_node(tag_str)
    # print(df_decompose_g.iloc[i+1,:]['filename'])
    gexf.write_gexf(G_start, df_decompose_g.iloc[i+1,:]['filename'])

df_decompose_g.to_csv('3mj/decomposed_3mj.csv')
# %%
df_decompose_g.edges.plot.line()
Exemple #20
0
 def save_gexf(self, pth):
     write_gexf(G=self.g, path=pth, prettyprint=True)
import pandas as pd
import networkx as nx
from networkx.readwrite.gexf import write_gexf

df = pd.read_pickle('sp500.pkl')

G = nx.Graph()

for row in df.itertuples():
    G.add_node(row.symbol, type='company')
    G.add_node(row.Name, type='officer')
    G.add_edge(row.symbol, row.Name)

write_gexf(G, 'graph.gexf')
Exemple #22
0
 def export_to_gexf(self, filename):
     write_gexf(self.graph, filename)
Exemple #23
0
 def export_to_gexf(self, filename):
     write_gexf(self.graph, filename)