def weightDist(graph): inDict = defaultdict(int) outDict = defaultdict(int) inWeightDict = defaultdict(int) outWeightDict = defaultdict(int) inAvgWeightDict = defaultdict(int) #should use float outAvgWeightDict = defaultdict(int) #should use float for src, dest, weight in G.getEdges(graph, weight=True): inDict[dest] += 1 inWeightDict[dest] += weight outDict[src] += 1 outDict[src] += weight inDist = defaultdict(int) outDist = defaultdict(int) #calculating average now for node in inDict: inAvgWeightDict[node] = inWeightDict[node] / inDict[node] for node in outDict: outAvgWeightDict[node] = outWeightDict[node] / outDict[node] for node in G.getNodes(graph): inDist[inAvgWeightDict[node]] += 1 outDist[outAvgWeightDict[node]] += 1 return inDist, outDist
def __init__(self, Digraph, r): self.nodes = Digraph.getNodes() self.visited = {} self.path = {} for i in self.nodes: self.visited[i] = False for i in self.nodes: self.path[i] = "" self.stack = [] self.edges = graph.getEdges(r) self.path[r] = 0 self.visited[r] = True def run(self): for j in range(len(self.edges) - 1): self.stack.append = (self.edges[j], r) while (len(self.stack) != 0): pop = self.stack.pop() if self.visited.get(pop[0]) != True: self.visited[pop[0]] = True self.path[pop[0]] = pop[1] self.edges = graph.getEdges(pop) for j in range(len(self.edges) - 1): self.stack.append = (self.edges[j], pop) return self.path
def run(self): for j in range(len(self.edges) - 1): self.stack.append = (self.edges[j], r) while (len(self.stack) != 0): pop = self.stack.pop() if self.visited.get(pop[0]) != True: self.visited[pop[0]] = True self.path[pop[0]] = pop[1] self.edges = graph.getEdges(pop) for j in range(len(self.edges) - 1): self.stack.append = (self.edges[j], pop) return self.path
def randomNodeNeighbor(graph, nodes=1000): nodes = min(nodes, len(G.getEdges(graph))) #dataNodes = G.getNodes(getEdges) sampleGraph = {} sampleNodes = {} while True: if len(G.getNodes(sampleGraph)) >= nodes: break randomNode = random.choice(graph.keys()) sampleGraph[randomNode] = graph[randomNode] return sampleGraph
def degreeDist(graph): inDict = defaultdict(int) outDict = defaultdict(int) for src, dest in G.getEdges(graph, weight=False): inDict[dest] += 1 outDict[src] += 1 inDist = defaultdict(int) outDist = defaultdict(int) for node in G.getNodes(graph): inDist[inDict[node]] += 1 outDist[outDict[node]] += 1 return inDist, outDist
def randomEdge(graph, nodes=1000): edges = G.getEdges(graph, weight=True) nodes = min(nodes, G.getNodes(graph)) sampleGraph = {} sampleEdges = set() while True: edge = random.choice(edges) if not edge in sampleEdges: G.addEdge(sampleGraph, edge[0], edge[1], edge[2]) sampleEdges.add(edge) if len(G.getNodes(sampleGraph)) == nodes: break return sampleGraph
def deleteRandomEdges(graph, resizeRatio=0.7): sampleGraph = graph.copy() sampleEdges = G.getEdges(sampleGraph) edges = len(sampleEdges) sampleEdgeCount = edges while True: #print sampleEdgeCount, edges if 1.0 * sampleEdgeCount / edges <= resizeRatio: break sampleEdgeCount -= 1 randomEdge = random.choice(sampleEdges) sampleEdges.remove(randomEdge) G.removeEdge(sampleGraph, randomEdge[0], randomEdge[1]) return sampleGraph
def __init__(self, graph, nodes=1000, minP=0.0, weights=True): self.sampleGraph = {} self.graph = graph self.weights = weights self.nodesOriginal = G.getNodes(self.graph) self.edgesOriginal = G.getEdges(self.graph, weight=True) self.nodes = min(nodes, len(self.nodesOriginal)) self.nodesSample = set() self.minP = minP #min probability required to select a edge self.startNode = random.choice(self.nodesOriginal) self.currentNode = self.startNode while True: if len(self.nodesSample) == self.nodes: #print 'processing done' #print self.sampleGraph break self.neighbours = G.getNeighbours(self.graph, self.currentNode) self.chooseRandomNeighbour()
def sampleRN(graph, nodes=1000): sampleGraph = {} nodesOriginal = G.getNodes(graph) edgesOriginal = G.getEdges(graph, weight=False) nodesSample = set() size = min(nodes, len(nodesOriginal)) while (len(nodesSample) < size): nodesSample.add(random.choice(nodesOriginal)) nodesSample = list(nodesSample) for src in nodesSample: if src in graph.keys(): for dest, weight in graph[src]: if dest in nodesSample: G.addEdge(sampleGraph, src, dest, weight) currentNodes = G.getNodes(sampleGraph) for node in nodesSample: if node not in currentNodes: sampleGraph[node] = [] return sampleGraph
def hopDist(graph): nodes = sorted(G.getNodes(graph)) dist = {} for node in nodes: dist[node] = {} for n in nodes: dist[node][n] = sys.maxint for src, dest, weight in G.getEdges(graph): dist[src][dest] = weight for node in nodes: dist[node][node] = 0 for k in nodes: for i in nodes: for j in nodes: dist[i][j] = min(dist[i][j], dist[i][k] + dist[k][j]) hop = defaultdict(int) for i in nodes: for j in nodes: hop[dist[i][j]] += 1 hop.pop(sys.maxint, None) return hop
import graph as G import random def deleteRandomEdges(graph, resizeRatio=0.7): sampleGraph = graph.copy() sampleEdges = G.getEdges(sampleGraph) edges = len(sampleEdges) sampleEdgeCount = edges while True: #print sampleEdgeCount, edges if 1.0 * sampleEdgeCount / edges <= resizeRatio: break sampleEdgeCount -= 1 randomEdge = random.choice(sampleEdges) sampleEdges.remove(randomEdge) G.removeEdge(sampleGraph, randomEdge[0], randomEdge[1]) return sampleGraph if __name__ == "__main__": print "fetching data" data = G.readGraph("../data/5000x25000.data") d = G.readGraph("../data/5000x25000.data") l = len(G.getEdges(data)) print "running deleteRandomEdges" sample = deleteRandomEdges(d.copy(), resizeRatio=0.7) print "variables available globally: data, sample" print l, len(G.getEdges(sample)), len(G.getEdges(data))