/
marvel_graph_comparing_shortest_paths.py
158 lines (130 loc) · 4.31 KB
/
marvel_graph_comparing_shortest_paths.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
""" Utility Functions"""
import csv
import time
from random import randint
import heapq
from priority_dictionary import priority_dict
from collections import deque
def make_link(G, node1, node2):
if node1 not in G:
G[node1] = {}
if node2 not in G[node1]:
G[node1][node2] = 1.0
else:
G[node1][node2] += 1.0
if node2 not in G:
G[node2] = {}
if node1 not in G[node2]:
G[node2][node1] = 1.0
else:
G[node2][node1] += 1.0
return G
def weight_graph(G):
for node1 in G:
node1 = G[node1]
for node2 in node1:
node1[node2] = 1.0/node1[node2]
return G
def unweighted_graph(G):
for node1 in G:
for node2 in G[node1]:
G[node1][node2] = 1
return G
def read_graph(filename):
tsv = csv.reader(open(filename),delimiter = '\t')
marvelG, characters, comicbooks, charG = {}, {}, {}, {}
index = 0
for (char, comic) in tsv:
characters[char] = True
comicbooks[comic] = True
marvelG = make_link(marvelG, char, comic)
done = set()
for char1 in characters:
done.add(char1)
for book in marvelG[char1]:
for char2 in marvelG[book]:
if char2 not in done:
charG = make_link(charG, char1, char2)
return marvelG, charG
w_charG = {}
u_charG = {}
marvelG, charG = read_graph('C:\Python27\uniq_edges.tsv')
print 'generating graphs, marvelG, charG...'
w_charG = weight_graph(charG)
print 'generating weighted graph and unweighted graph'
u_charG = unweighted_graph(charG)
"""Below is the list of characters in question."""
chars = ['SPIDER-MAN/PETER PAR',
'GREEN GOBLIN/NORMAN ',
'WOLVERINE/LOGAN ',
'PROFESSOR X/CHARLES ',
'CAPTAIN AMERICA']
def dijkstra(G,v):
dist_so_far = priority_dict()
dist_so_far[v] = 0
final_dist = {}
dist = {}
dist[v] = 0
shortest_paths = {}
shortest_paths[v] = [[v]]
while len(dist_so_far) != 0:
w = dist_so_far.pop_smallest()
# lock it down!
final_dist[w] = dist[w]
for x in G[w]:
if x not in final_dist:
if x not in dist:
dist[x] = final_dist[w] + G[w][x]
dist_so_far[x] = final_dist[w] + G[w][x]
paths = [path + [x] for path in shortest_paths[w]]
shortest_paths[x] = paths
elif final_dist[w] + G[w][x] < dist[x]:
dist[x] = final_dist[w] + G[w][x]
dist_so_far[x] = final_dist[w] + G[w][x]
paths = [path + [x] for path in shortest_paths[w]]
shortest_paths[x] = paths
elif final_dist[w] + G[w][x] == dist[x]:
paths = [path + [x] for path in shortest_paths[w]]
shortest_paths[x].extend(paths)
dist_so_far[x] = final_dist[w] + G[w][x]
return final_dist, shortest_paths
def bfs(G, node):
final_dist = {node:(0, node, None)}
open_list = deque([node])
while len(open_list) > 0:
node = open_list.popleft()
dist, _, _ = final_dist[node]
for neighbor in G[node]:
if neighbor not in final_dist:
continue
final_dist[neighbor] = (dist + 1, neighbor, node)
open_list.append(neighbor)
return final_dist
def get_parent(pair): return pair[2]
def find_path(dist, target):
node = target
path = [target]
while True:
prev = get_parent(dist[node])
if prev is None:
# We've rached our target, so return
# the path
return path
path.append(prev)
node = prev
total = 0
counter = 0
answers = []
print 'calculating shortest paths...'
for char1 in chars:
w_final_dist, w_shortest_paths = dijkstra(w_charG, char1)
u_final_dist = bfs(u_charG, char1)
for char2 in w_final_dist:
if char1 == char2: continue
char_path = min(w_shortest_paths[char2], key = lambda n: len(n))
hop_path = find_path(u_final_dist, char1)
if len(char_path) > len(hop_path):
answers.append(((char1, char2), (char_path, hop_path)))
print char1, len(answers) - counter
counter = len(answers)
print len(answers)