Beispiel #1
0
def initialize_Markov_chain(connection, config):
    """
	This initializes an empty Markov chain and returns the transition matrix and state space
	:param connection: ipython-cypher connection string (eg: http://username:[email protected]/7474/db/data
	:param config: ipython-cypher configuration named tuple
	:return: transition matrix (numpy array) and state space (list of tuples: (rel, node))
	"""
    relationship_types = cypher.run("MATCH ()-[r]-() RETURN DISTINCT type(r)",
                                    conn=connection,
                                    config=config)
    relationship_types = [item[0] for item in relationship_types]
    node_labels = cypher.run("MATCH (n) RETURN DISTINCT labels(n)[1]",
                             conn=connection,
                             config=config)
    node_labels = [item[0] for item in node_labels]

    # Markov chain will have states = (relationship_label, node_label) since this is a multigraph
    state_space = []
    for relationship_type in relationship_types:
        for node_label in node_labels:
            state = (relationship_type, node_label)
            state_space.append(state)

    #trans_mat = np.zeros((len(state_space), len(state_space)))
    quad_to_matrix_index = dict()
    for state1 in state_space:
        for state2 in state_space:
            quad_to_matrix_index[state1 + state2] = (state_space.index(state1),
                                                     state_space.index(state2))

    return state_space, quad_to_matrix_index
 def EigenVectorCentrality(self):
     print("DOGUM GUNU PAGERANK CENTRALITY")
     resultDogum = cypher.run(
         "UNWIND range(1,10) AS round MATCH (n:User) WHERE rand() < 0.1  MATCH (n:User)-[:DOGUMGUNU*..10]->(m:User) SET m.rank = coalesce(m.rank,0) + 1"
     )
     resultDogumRank = cypher.run(
         "MATCH (n:User) WHERE n.rank is not null return id(n), n.rank order by n.rank desc"
     )
     print(resultDogumRank)
     #For other relationship pagerank relationship is like below
     '''
 def ClosenessCentrality(self):
     print("DOGUM GUNU CLOSENESS CENTRALITY")
     resultsDogumClose = cypher.run(
         "MATCH (a:User), (b:User) WHERE a<>b WITH length(shortestPath((a)-[:DOGUMGUNU*]-(b))) AS dist, a, b RETURN DISTINCT  id(a), sum(1.0/dist) AS closenessCentrality ORDER BY closenessCentrality DESC"
     )
     print(resultsDogumClose)
     print("TESEKKUR CLOSENESS CENTRALITY")
     resultsTesekkurClose = cypher.run(
         "MATCH (a:User), (b:User) WHERE a<>b WITH length(shortestPath((a)-[:TESEKKUR*]-(b))) AS dist, a, b RETURN DISTINCT  id(a), sum(1.0/dist) AS closenessCentrality ORDER BY closenessCentrality DESC"
     )
     print(resultsTesekkurClose)
     print("TAKDIR CLOSENESS CENTRALITY")
     resultsTakdirClose = cypher.run(
         "MATCH (a:User), (b:User) WHERE a<>b WITH length(shortestPath((a)-[:TAKDIR*]-(b))) AS dist, a, b RETURN DISTINCT  id(a), sum(1.0/dist) AS closenessCentrality ORDER BY closenessCentrality DESC"
     )
     print(resultsTakdirClose)
 def DegreeCentrality(self):
     print("DOGUM GUNU DEGREE CENTRALITY")
     resultsDogumDegree = cypher.run(
         "MATCH (n:User)-[r:DOGUMGUNU]-(m:User) return n.name,n.uID,n.departmanName,count(r) as DegreeScore order by DegreeScore desc"
     )
     print(resultsDogumDegree)
     print("TESEKKUR DEGREE CENTRALITY")
     resultsTesekkurDegree = cypher.run(
         "MATCH (n:User)-[r:TESEKKUR]-(m:User) return n.name,n.uID,n.departmanName,count(r) as DegreeScore order by DegreeScore desc"
     )
     print(resultsTesekkurDegree)
     print("TAKDIR DEGREE CENTRALITY")
     resultsTakdirDegree = cypher.run(
         "MATCH (n:User)-[r:TAKDIR]-(m:User) return n.name,n.uID,n.departmanName,count(r) as DegreeScore order by DegreeScore desc"
     )
     print(resultsTakdirDegree)
 def BetweennessCentrality(self):
     print("DOGUM GUNU BETWEENNESS CENTRALITY")
     resultsDogumBetween = cypher.run(
         "MATCH p=allShortestPaths((source:User)-[:DOGUMGUNU*]-(target:User)) UNWIND nodes(p)[1..-1] as n RETURN id(n), count(*) as betweenness order by betweenness desc"
     )
     print(resultsDogumBetween)
     print("TESEKKUR BETWEENNESS CENTRALITY")
     resultsTesekkurBetween = cypher.run(
         "MATCH p=allShortestPaths((source:User)-[:TESEKKUR*]-(target:User)) UNWIND nodes(p)[1..-1] as n RETURN id(n), count(*) as betweenness order by betweenness desc"
     )
     print(resultsTesekkurBetween)
     print("TAKDIR BETWEENNESS CENTRALITY")
     resultsTakdirBetween = cypher.run(
         "MATCH p=allShortestPaths((source:User)-[:TAKDIR*]-(target:User)) UNWIND nodes(p)[1..-1] as n RETURN id(n), count(*) as betweenness order by betweenness desc"
     )
     print(resultsTakdirBetween)
 def drawGraph(self):
     print("GRAPH")
     results = cypher.run(
         "MATCH p= (:User)-[:DOGUMGUNU]->(:User)  RETURN p")
     #results = cypher.run("MATCH p= (:User)-[:TESEKKUR]->(:User)  RETURN p")
     #results = cypher.run("MATCH p= (:User)-[:TAKDIR]->(:User)  RETURN p")
     g = results.get_graph()
     nx.draw(g)
Beispiel #7
0
 def run_cypher_query(self, query):
     """
     Runs a Cypher query
     :param query
     :return:
             ResultSet (cypher)
     """
     data = cypher.run(query, conn=self.connection)
     return data.get_graph()
Beispiel #8
0
def test_get_graph():
    query = "MATCH path=allShortestPaths((s:omim_disease)-[*1..%d]-(t:disont_disease)) " \
      "WHERE s.name='%s' AND t.name='%s' " \
      "RETURN path" % (4, 'OMIM:137920', 'DOID:11476')
    res = cypher.run(query, conn=connection, config=config)
    graph = get_graph(res, directed=True)
    if type(graph) is not nx.classes.MultiDiGraph:
        raise (Exception("A networkx graph was not returned"))
    if graph.number_of_nodes() < 1:
        raise (Exception("An empty graph was returned"))
Beispiel #9
0
def expected_graph_distance(omim, doid, max_path_len=4, directed=True, connection=connection, defaults=defaults):
	"""
	Given a source omim and target doid, extract the subgraph from neo4j consisting of all paths connecting these
	two nodes. Treat this as a uniform Markov chain (all outgoing edges with equal weight) and calculate the expected
	path length. This is equivalent to starting a random walker at the source node and calculating how long, on
	average, it takes to reach the target node.
	:param omim: Input OMIM ID (eg: 'OMIM:1234'), source
	:param doid: Input DOID ID (eg: 'DOID:1234'), target
	:param max_path_len: maximum path length to consider (default=4)
	:param directed: treat the Markov chain as directed or undirected (default=True (directed))
	:param connection: ipython-cypher connection string
	:param defaults: ipython-cypher configurations named tuple
	:return: a pair of floats giving the expected path length from source to target, and target to source respectively
	along with the basis (list of omim ID's).
	"""
	if directed:
		query = "MATCH path=allShortestPaths((s:omim_disease)-[*1..%d]->(t:disont_disease)) " \
				"WHERE s.name='%s' AND t.name='%s' " \
				"RETURN path" % (max_path_len, omim, doid)
	else:
		query = "MATCH path=allShortestPaths((s:omim_disease)-[*1..%d]-(t:disont_disease)) " \
				"WHERE s.name='%s' AND t.name='%s' " \
				"RETURN path" % (max_path_len, omim, doid)
	res = cypher.run(query, conn=connection, config=defaults)
	graph = get_graph(res, directed=directed)  # Note: I may want to make this directed, but sometimes this means no path from OMIM
	mat = nx.to_numpy_matrix(graph)  # get the indidence matrix
	basis = [i[1] for i in list(graph.nodes(data='names'))]  # basis for the matrix (i.e. list of ID's)
	doid_index = basis.index(doid)  # position of the target
	omim_index = basis.index(omim)  # position of the source
	#print(omim)  # diagnostics
	if directed:  # if directed, then add a sink node just after the target, make sure we can pass over it
		sink_column = np.zeros((mat.shape[0], 1))
		sink_column[doid_index] = 1  # connect doid to sink node
		sink_row = np.zeros((1, mat.shape[0] + 1))
		sink_row[0, -1] = 1  # make sink node got to itself
		mat = np.vstack([np.append(mat, sink_column, 1), sink_row])  # append the sink row and column
		row_sums = mat.sum(axis=1)
		zero_indicies = np.where(row_sums == 0)[0]  # even after this, some nodes may not have out-going arrows
		for index in zero_indicies:
			mat[index, index] = 1  # put a self loop in, so we don't get division by zero
		row_sums = mat.sum(axis=1)
		mat_norm = mat / row_sums  # row normalize
	else:
		row_sums = mat.sum(axis=1)
		mat_norm = mat / row_sums
	exp_o_to_d = np.sum([float(i) * LA.matrix_power(mat_norm, i)[omim_index, doid_index] for i in range(15)])
	exp_d_to_o = np.sum([float(i) * LA.matrix_power(mat_norm, i)[doid_index, omim_index] for i in range(15)])
	if exp_o_to_d == 0:
		exp_o_to_d = float("inf")  # no such path
	if exp_d_to_o == 0:
		exp_d_to_o = float("inf")  # no such path
	return (exp_o_to_d, exp_d_to_o)  # (E(source->target), E(target->source))
Beispiel #10
0
def calculatePageRankCentralityUsingNetworkX(session):
    """
		calculates pagerank using Neo4j session.
	"""
    print "page rank using networkx:"
    results = cypher.run(
        "MATCH (a:Employee)-[r:appreciation]-(b:Employee) RETURN id(a), id(b),r",
        conn="http://*****:*****@localhost:7474/db/data")
    g = results.get_graph()
    data = nx.pagerank_numpy(g)
    print data
    #give chart a number and draw chart according the data
    plt.figure(6)
    drawChart(data, "Page Rank using NetworkX")
Beispiel #11
0
def calculateEigenvectorCentrality(session):
    """
		calculates eigenvector centrality using Networkx.
	"""
    print "Eigenvector centrality:"

    # get the data from neo4j
    results = cypher.run("MATCH p = ()-[]-() RETURN p",
                         conn="http://*****:*****@localhost:7474/db/data")
    g = results.get_graph()

    # calculate the eigenvector centrality using networkx
    dictionary_centrality = nx.eigenvector_centrality_numpy(g)

    #give chart a number and draw chart according the data
    plt.figure(4)
    drawChart(dictionary_centrality, "Eigenvector Centrality")
Beispiel #12
0
def return_subgraph_paths_of_type(session,
                                  omim,
                                  doid,
                                  relationship_list,
                                  debug=False):
    """
	This function extracts the subgraph of a neo4j database consisting of those paths that have the relationships (in
	order) of those given by relationship_list
	:param session: neo4j session
	:param omim: source OMIM ID (eg: OMIM:1234)
	:param doid: target disease ID (eg: 'DOID:1235')
	:param relationship_list: list of relationships (must be valid neo4j relationship types), if this is a list of lists
	then the subgraph consisting of all valid paths will be returned
	:param debug: Flag indicating if the cypher query should be returned
	:return: networkx graph
	"""
    if not any(isinstance(el, list) for el in
               relationship_list):  # It's a single list of relationships
        query = "MATCH path=(s:disease)-"
        for i in range(len(relationship_list) - 1):
            query += "[:" + relationship_list[i] + "]-()-"
        query += "[:" + relationship_list[-1] + "]-" + "(t:disease) "
        query += "WHERE s.name='%s' and t.name='%s' " % (omim, doid)
        query += "RETURN path"
        if debug:
            return query
    else:  # it's a list of lists
        query = "MATCH (s:disease{name:'%s'}) " % omim
        for rel_index in range(len(relationship_list)):
            rel_list = relationship_list[rel_index]
            query += "OPTIONAL MATCH path%d=(s)-" % rel_index
            for i in range(len(rel_list) - 1):
                query += "[:" + rel_list[i] + "]-()-"
            query += "[:" + rel_list[-1] + "]-" + "(t:disease)"
            query += " WHERE t.name='%s' " % doid
        query += "RETURN "
        for rel_index in range(len(relationship_list) - 1):
            query += "collect(path%d)+" % rel_index
        query += "collect(path%d)" % (len(relationship_list) - 1)
        if debug:
            return query
    graph = get_graph(cypher.run(query, conn=connection, config=defaults))
    return graph
Beispiel #13
0
import cypher
import matplotlib.pyplot as plt
data = cypher.run("MATCH (a)-[l:LINK]-(b) RETURN a, b, l", conn="http://*****:*****@localhost:7474/")
data.get_graph()
data.draw()
import time
plt.show()
while True:
    time.sleep(0.5)
Beispiel #14
0
def get_neighbor_count():
    #Get neighbor count
    return cypher.run(
        "MATCH (n)--(m) RETURN n.username, count(m) as neighbors",
        conn="http://*****:*****@localhost:7474")
Beispiel #15
0
def get_neo4_graph():
    query = "MATCH p = ()-[]-() RETURN p"
    results = cypher.run(query, conn="http://*****:*****@localhost:7474")
    return results.get_graph()
Beispiel #16
0
import sys
import itertools
from py2neo import *
import cypher

graph = Graph()
query = """
MATCH r = (p1:Product)-[:WITH]->(p2:Product)
WHERE p1.id = '1'
RETURN count(r)
"""

results = cypher.run(query)

# df = results.get_dataframe()
# df.head()

for d in results:
    print(d)