forked from tnoulas/UrbanEpidemicSim
-
Notifications
You must be signed in to change notification settings - Fork 0
/
PlaceNetSim_v2.py
180 lines (139 loc) · 7.62 KB
/
PlaceNetSim_v2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import random
from datetime import datetime, timedelta
import matplotlib.pylab as plt
import networkx as nx
import pandas as pd
from Place import Place
# https://stackoverflow.com/questions/10688006/generate-a-list-of-datetimes-between-an-interval
def perdelta(start, end, delta):
curr = start
while curr < end:
yield curr
curr += delta
class PlaceNetSim:
def __init__(self):
self.start_date = datetime(2010, 12, 21, 20, 0, 0)
self.end_date = datetime(2011, 9, 19, 17, 0, 0)
self.cur_time = None
self.NYC_graph = nx.DiGraph()
self.places = {}
self.init_graph()
self.load_transitions_data()
def init_graph(self):
# read venue (node) data
# node_data = {}
self.pos_dict = {} # will be used to draw the nodes in the graph with geographic topology
for l in open('./shared_data/newyork_anon_locationData_newcrawl.txt'):
splits = l.split('*;*')
venue_id = int(splits[0])
venue_info = eval(splits[1])
# add place to graph
self.NYC_graph.add_node(venue_id)
self.NYC_graph.nodes[venue_id][
'info'] = venue_info # (40.760265, -73.989105, 'Italian', '217', '291', 'Ristorante Da Rosina')
# initialise placee and within place, population information
self.places[venue_id] = Place(venue_info)
# this will be used for drawing the network
self.pos_dict[venue_id] = (venue_info[1], venue_info[0])
def load_transitions_data(self):
# read transitions and respective timestamp information
self.df_transitions = pd.read_csv('./shared_data/newyork_placenet_transitions.csv', error_bad_lines=False)
self.df_transitions['timestamp1'] = pd.to_datetime(self.df_transitions.timestamp1)
self.df_transitions['timestamp2'] = pd.to_datetime(self.df_transitions.timestamp2)
self.total_population_in_data = len(self.df_transitions)
# sort transitions by date
self.df_transitions = self.df_transitions.sort_values(by='timestamp1')
# load total movements originating at each place ::: TODO IMPROVE THIS BIT AS IT IS THE SLOWEST PART OF INITIALISING THE SYSTEM
for place_id in self.places:
mask = (self.df_transitions['venue1'] == place_id)
place_transitions = self.df_transitions.loc[mask]
# this initialises also the population of the place -- experimental
self.places[place_id].set_total_movements(len(place_transitions))
def run_simulation(self):
# create temporal snapshots of the network: these will be our simulation
# first date 2010-12-21 20:27:13
# last date 2011-09-19 16:08:50
# start_date = datetime(2010, 12, 21, 20, 0, 0)
# end_date = datetime(2011, 9, 19, 17, 0, 0)
epoch = 0
date1 = None
date2 = None
self.frac_infected_over_time = [] # store for each epoch the fraction of infected population
for date2 in perdelta(self.start_date, self.end_date, timedelta(days=1)):
total_pop_in_epoch = 0
total_infected = 0
# kick start
if date1 == None:
date1 = date2
continue
# print (result)
print('epoch: ' + str(epoch))
# filter snapshot from original dataset
mask = (self.df_transitions['timestamp1'] > date1) & (self.df_transitions['timestamp2'] <= date2)
df_transitions_snap = self.df_transitions.loc[mask]
# simulate 'spread': each row in the transitions graph is a movement from place 1 to place 2
# this information will be used to describe population exchanges between places
for row in df_transitions_snap.iterrows():
# print ('hi')
# print (row[0])
venue1 = row[1][0]
venue2 = row[1][1]
# check if interaction involves 'infected' nodes and if yes, spread the virus
# if (NYC_graph.nodes[venue1]['status'] == 1 and NYC_graph.nodes[venue2]['status'] == 0):
# NYC_graph.nodes[venue2]['status'] = 1
### AT EVERY TEMPORAL SNAPSHOT WE NEED A DISEASE INCUBATION STEP (1)
self.places[venue1].incubate_cycle(date1)
self.places[venue2].incubate_cycle(date2) # check
### AND A POPULATION EXCHANGE STEP (2)
# for the time being: move a randomly chosen 'fraction' of population at place 1 to place 2
# fraction size is equal to the number of transitions / total number of transitions out-going from place 1
venue1_population_set = self.places[venue1].get_population()
venue2_population_set = self.places[venue2].get_population()
# moving_population_size = int(1.0 / self.places[venue1].get_total_movements() )
moving_population_size = 1 # this has to be an integer
# pick random sample of population at origin, then remove from place 1 and add to place 2
moving_pop = random.sample(venue1_population_set, moving_population_size)
new_venue1_pop = venue1_population_set.difference(moving_pop)
self.places[venue1].set_population(new_venue1_pop)
new_venue2_pop = venue2_population_set.union(set(moving_pop))
self.places[venue1].set_population(new_venue2_pop)
# record number infected and total populations after incubation has taken place
total_infected += self.places[venue1].get_total_infected()
total_infected += self.places[venue2].get_total_infected()
total_pop_in_epoch += len(self.places[venue1].get_population())
total_pop_in_epoch += len(self.places[venue2].get_population())
# increment epoch index and reset date
epoch += 1
date1 = date2
if total_pop_in_epoch == 0:
continue
self.frac_infected_over_time.append(total_infected / self.total_population_in_data) # total_pop_in_epoch)
def draw_infection_graphs(self):
# TODO: fix this so it displays geographically infected population fractions vs infected places
# draw the network of infected nodes
infected_node_list = [venue for venue in self.NYC_graph.nodes() if self.NYC_graph.nodes[venue]['status'] == 1]
inf_pos_dict = dict((k, self.pos_dict[k]) for k in infected_node_list)
infected_graph = self.NYC_graph.subgraph(infected_node_list)
frac_infected = round(infected_graph.order() / self.NYC_graph.order() * 10, 2)
# plt.figure(1,figsize=(50,50))
plt.xlabel('longitude')
plt.ylabel('latitude')
plt.grid(True)
# nx.draw_networkx_nodes(infected_graph, pos=pos_dict, nodelist = infected_node_list , node_size=1, alpha=0.1)
nx.draw(infected_graph, pos=inf_pos_dict, node_size=0.1, node_color='red', alpha=0.1)
plt.title(self.date2.strftime("%m/%d/%Y") + ' ' + str(frac_infected) + '%' + ' of 85k places infected')
plt.savefig('./netgraphs/nyc_net_' + str(self.epoch) + '.png')
plt.close()
def plot_infected_vs_total(self):
xs = [i for i in range(len(self.frac_infected_over_time))]
ys = self.frac_infected_over_time
plt.xlabel('epoch')
plt.ylabel('Fraction Infected')
plt.plot(xs, ys, 'k.')
plt.grid(True)
plt.savefig('infected_per_epoch.pdf')
plt.close()
if __name__ == "__main__":
psim = PlaceNetSim()
psim.run_simulation()
psim.plot_infected_vs_total()