-
Notifications
You must be signed in to change notification settings - Fork 0
/
tracking.py
397 lines (329 loc) · 13 KB
/
tracking.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
__author__ = ['Stella', 'phil0']
'''
PROJECT CSE 415
Our program receives real data of the positions of proteins in E.coli cells in each time
frame. It starts with random tracks and then modifies the tracks of the proteins in
each frame with proteins observed in the next frame attempting to create the track.
It then attempts to add splits and merges to the tracks to all the possible places
were that can happen (ends and starts of tracks), and it decides whether that is a
good choice or not according to the cost. It uses simulated anneal to make that decision
so sometimes less good choices are accepted
'''
from scipy import misc, io
import numpy as np
from random import *
import matplotlib
import matplotlib.pyplot as plt
import math
from copy import deepcopy
MIN_SCORE = 3
ALLOW_SPLITS = True
BIRTH_PENALTY = 10
DEATH_PENALTY = 10
ALLOW_MERGES = True
MAX_TIME_WINDOW = 5
MAX_JUMP = 4
# returns true if all nan's
def has_nans(biglist):
for x in biglist:
for y in x:
if np.isfinite(y):
return False
return True
# returns the indexes of tracks that are not all empty/full of nanas
def good_tracks(state):
goodtracks = []
for track in range(elements):
if not has_nans(state[track]):
goodtracks.append(track)
return goodtracks
# spots has the format Track1: [[x1,y1,score,hashcode],[x2,y2,score,hashcode],[x3,y4,score,hashcode],... ]
# Track 2: [[],[],[]]
def convertMatFile(filename):
celldata = io.matlab.loadmat(filename)
global lifetime
global elements
global maxl
lifetime = len(celldata['xx'][0])
elements = len(celldata['xx'])
maxl = celldata['lx'][0][lifetime-1]
splits = [np.nan for y in range(elements)]
merges = [np.nan for y in range(elements)]
state = [list([[np.nan] for y in range(lifetime)]) for i in range(elements)]
for spot in range(elements):
for time in range(lifetime):
if celldata['sc'][spot][time] > MIN_SCORE:
state[spot][time] = [celldata['xx'][spot][time], celldata['yy'][spot][time], celldata['sc'][spot][time]]
return neighbor_remove_spots(state),splits,merges
def run(filename):
[initial_state,splits,merges] = convertMatFile(filename)
state = deepcopy(initial_state)
plt.ion()
plot(state,splits,merges)
[final_state,splits,merges,c] = sim_anneal(initial_state,splits,merges)
try:
input("Press enter to continue.")
except SyntaxError:
pass
print('done')
# finds the time at which the first spot that appears in the track
# finds the first non-nan element in a track
def find_first (track):
for i in range(len(track)):
if np.isfinite(track[i][0]):
return (i)
return np.nan
# finds the time at which the last spot appears in the track
# finds the last non-nan element in a track
def find_last (track):
for i in reversed(range(len(track))):
if np.isfinite(track[i][0]):
return (i)
return(np.nan)
# simulated anneal function
# main simulated annealing algorithm
def sim_anneal(state,splits,merges):
old_cost = cost(state,splits,merges)
T = 1.0
T_min = 0.0001
alpha = 0.99
iterations = 300
while T > T_min:
i = 1
if i <= iterations:
if i < 4*iterations/5.0 :
[new_state,new_splits,new_merges] = neighbor_switch_jumps(state,splits,merges)
else:
new_state,new_splits,new_merges = neighbor_merge_split(state,splits,merges)
new_cost = cost(new_state,new_splits,new_merges)
ap = acceptance_probability(old_cost, new_cost, T)
print('new cost: ' +str(new_cost) +' vs old cost: '+ str(old_cost))
if ap > random() and old_cost != new_cost:
print('accepted')
state = deepcopy(new_state)
splits=new_splits
merges=new_merges
plot(state,splits,merges)
old_cost = new_cost
i += 1
T = T * alpha
return state,splits,merges,old_cost
# either 'splits' or 'merges' at each potential bifurcation point
def neighbor_merge_split(state,splits,merges):
# take one of this random options for operators
operatorlist = []
for start in find_starts_ends(state)[0]:
if start > 0 and ALLOW_SPLITS :
operatorlist.append(1)
for end in find_starts_ends(state)[1]:
if end < lifetime and ALLOW_MERGES :
operatorlist.append(2)
operator = choice(operatorlist)
# exchange edges
if operator ==1:
# find start connect it to a middle - split
print('Trying a split')
return neighbor_split(state,splits,merges)
elif operator ==2:
# find end connect it to a middle - merge
print('Trying a merge')
return neighbor_merge(state,splits,merges)
# merges each split with a random neighboring track
def neighbor_merge(state,splits,merges):
mergetrack=[]
possible_merges = []
ends = find_starts_ends(state)[1]
#print('Each track ends : ' + str(ends))
for itrack in range(elements):
if ends[itrack] < lifetime-3:
possible_merges.append(itrack)
if possible_merges!=[]:
mergetrack = choice(possible_merges)
end_time = find_last(state[mergetrack])
parent_tracks = []
print(mergetrack)
for itrack in range(elements):
if ends[itrack] > end_time and not np.isnan(state[itrack][end_time+1][0]):
parent_tracks.append(itrack)
if parent_tracks!=[]:
parent=choice(parent_tracks)
merges[mergetrack] = parent
return state,splits,merges
# splits a track into two different tracks
def neighbor_split(state,splits,merges):
possible_splits = []
starts = find_starts_ends(state)[0]
for itrack in range(elements):
if starts[itrack] > 0:
possible_splits.append(itrack)
splittrack = choice(possible_splits)
start_split = find_first(state[splittrack])
parent_tracks = []
for itrack in range(elements):
if starts[itrack] < start_split and np.isfinite(state[itrack][start_split][0]):
parent_tracks.append(itrack)
if parent_tracks != []:
parent=choice(parent_tracks)
splits[splittrack] = parent
return state,splits,merges
# finds outlying spots and removes them from the track
def neighbor_remove_spots(state):
for track in range(len(state)):
potential_outlier = 0
nan_length = 0
for t in range(len(state[track])):
if np.isnan(state[track][t][0]):
nan_length += 1
else:
if nan_length > 1 and not potential_outlier:
potential_outlier = t
nan_length = 0
if (nan_length > 1 or t + 2 > len(state[track])) and potential_outlier:
state[track][potential_outlier] = [np.nan]
potential_outlier = 0
return state
# find large (unlikely) jumps and switch them with smaller (more likely) jumps
def neighbor_switch_jumps(state,splits,merges):
big_jumps = find_big_jumps(state)
goodTrks = good_tracks(state)
track1=choice(goodTrks)
goodTrks.remove(track1)
num_of_jumps = len(big_jumps[track1])
if num_of_jumps >= 1:
which_jump = randint(0,num_of_jumps-1)
time_jump1 = big_jumps[track1][which_jump]
track2=choice(goodTrks) # if one is not found put a random one?
for next_track in goodTrks:
for k in big_jumps[next_track]:
if k == time_jump1:
track2=next_track
break
temp = state[track1][time_jump1:lifetime]
state[track1][time_jump1:lifetime] = state[track2][time_jump1:lifetime]
state[track2][time_jump1:lifetime] = temp
return state,splits,merges
# randomly connect a data point to a track
def neighbor_onespot(state):
# make random change for one random spots
timepoint = randint(0, lifetime - 2)
goodTrks = good_tracks(state)
track1=choice(goodTrks)
goodTrks.remove(track1)
track2=choice(goodTrks)
temp = state[track1][timepoint]
state[track1][timepoint] = state[track2][timepoint]
state[track2][timepoint] = temp
temp2 = state[track1][timepoint+1]
state[track1][timepoint+1] = state[track2][timepoint+1]
state[track2][timepoint+1] = temp2
return state
# find the beginning and end of each track
def find_starts_ends(state):
starts = [np.nan for i in range(elements)]
ends = [np.nan for i in range(elements)]
for track in range(elements):
starts[track]=find_first(state[track])
ends[track]=find_last(state[track])
return starts,ends
# cost evaluation function for simulated annealing (euclidean_distance + heuristics)
def cost(state,splits,merges):
distance_metric = [0 for i in range(elements)]
for track in range(elements):
for time in range(1, lifetime):
if np.isfinite(state[track][time][0]) and np.isfinite(state[track][time - 1][0]):
distance_metric[track] += euclidean_distance(state[track][time], state[track][time - 1])
icost = 0
for i in distance_metric:
icost = icost + i
icost = icost/10
big_jump_count =count_big_jumps(state)
splitcost=0
for sTrack in range(elements):
if find_first(state[sTrack]) != 0 and np.isnan(splits[sTrack]): # penanlty for starts in the middle of the timeline
splitcost += BIRTH_PENALTY
elif np.isfinite(splits[sTrack]):
start = find_first(state[sTrack])
parent = splits[sTrack]
if np.isfinite(state[sTrack][start][0]) and np.isfinite(state[parent][time - 1][0]):
splitcost += euclidean_distance(state[sTrack][start], state[parent][time - 1])
else:
splitcost += 100
mergecost=0
for sTrack in range(elements):
if find_last(state[sTrack]) < lifetime-1 and np.isnan(merges[sTrack]): # penanlty for starts in the middle of the timeline
mergecost += DEATH_PENALTY
elif np.isfinite(merges[sTrack]):
end = find_last(state[sTrack])
parent = merges[sTrack]
if end < lifetime -2 and np.isfinite(state[sTrack][end][0]) and np.isfinite(state[parent][end + 1][0]):
mergecost += euclidean_distance(state[sTrack][end], state[parent][end + 1])
else:
mergecost += 100
# find nan time gaps
nancount=0
for track in range(elements):
start = find_first(state[track])
end = find_last(state[track])
if not np.isnan(start):
for time in range(start,end):
if np.isnan(state[track][time][0]):
nancount +=1
icost +=splitcost
icost +=mergecost
icost+= big_jump_count*10
icost = icost +nancount*10
return icost
# finds times at which the big (unlikely) jumps happen in each track
def find_big_jumps(state):
total_result = [[] for i in range(elements)]
for track in range(elements):
result=[]
for time in range(1, lifetime):
if not np.isnan(state[track][time-1][0]) and not np.isnan(state[track][time][0]):
distance = euclidean_distance(state[track][time-1], state[track][time])
if distance > MAX_JUMP:
result.append (time)
total_result[track]=result
return total_result
# counts number of big (unlikely) jumps in each track
def count_big_jumps(state):
count=0
for track in state:
for i in range(len(track)):
if i + 1 < len(track) and not np.isnan(track[i][0]) and not np.isnan(track[i+1][0]):
distance = euclidean_distance(track[i], track[i + 1])
if distance > MAX_JUMP:
count += 1
return count
# simulated annealing acceptance probability
def acceptance_probability(old_cost, new_cost, T):
ap = math.exp((old_cost - new_cost) / T)
return ap
# plots current state
def plot(state,splits,merges):
plt.clf()
plt.axis([0, lifetime, -(maxl/2), maxl/2])
for track in range(elements):
newplot = []
gaps=[]
gapst=[]
start = find_first(state[track])
end = find_last(state[track])
if np.isfinite(splits[track]):
parent = splits[track]
plt.plot([start-1,start],[state[parent][start-1][0],state[track][start][0]],'--')
if np.isfinite(start) and np.isfinite(end):
for t in range(start,end+1):
newplot.append(state[track][t][0])
if np.isfinite(state[track][t][0]): # to plot dotted lines between the gaps
gapst.append(t)
gaps.append(state[track][t][0])
plt.plot(gapst, gaps, ':')
plt.draw()
plt.plot(range(start, end+1), newplot, '.-')
plt.draw()
plt.show()
# points are of the form [x, y, (intensity)]
def euclidean_distance(point1, point2):
return pow(pow(point1[0] - point2[0], 2) + pow(point1[1] - point2[1], 2), 0.5)
run('simpleTrack.mat')