-
Notifications
You must be signed in to change notification settings - Fork 0
/
STDBSCAN.py
63 lines (48 loc) · 2.51 KB
/
STDBSCAN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import math
from datetime import timedelta
from geopy.distance import great_circle
def ST_DBSCAN(df, spatial_threshold, temporal_threshold, min_neighbors):
cluster_label = 0
NOISE = -1
UNMARKED = 777777
stack = []
# initialize each point with unmarked
df['cluster'] = UNMARKED
# for each point in database
for index, point in df.iterrows():
if df.loc[index]['cluster'] == UNMARKED:
neighborhood = retrieve_neighbors(index, df, spatial_threshold, temporal_threshold)
if len(neighborhood) < min_neighbors:
df.set_value(index, 'cluster', NOISE)
else: # found a core point
cluster_label = cluster_label + 1
df.set_value(index, 'cluster', cluster_label)# assign a label to core point
for neig_index in neighborhood: # assign core's label to its neighbourhood
df.set_value(neig_index, 'cluster', cluster_label)
stack.append(neig_index) # append neighbourhood to stack
while len(stack) > 0: # find new neighbors from core point neighbourhood
current_point_index = stack.pop()
new_neighborhood = retrieve_neighbors(current_point_index, df, \
spatial_threshold, temporal_threshold)
if len(new_neighborhood) >= min_neighbors: # current_point is a new core
for neig_index in new_neighborhood:
neig_cluster = df.loc[neig_index]['cluster']
if (neig_cluster != NOISE) & (neig_cluster == UNMARKED):
df.set_value(neig_index, 'cluster', cluster_label)
stack.append(neig_index)
return df
def retrieve_neighbors(index_center, df, spatial_threshold, temporal_threshold):
neigborhood = []
center_point = df.loc[index_center]
# filter by time
min_time = center_point['unix'] - temporal_threshold
max_time = center_point['unix'] + temporal_threshold
df = df[(df['unix'] >= min_time) & (df['unix'] <= max_time)]
# filter by distance
for index, point in df.iterrows():
if index != index_center:
distance = great_circle((center_point['latitude'], center_point['longitude']), \
(point['latitude'], point['longitude'])).meters
if distance <= spatial_threshold:
neigborhood.append(index)
return neigborhood