/
find_clusters.py
executable file
·246 lines (195 loc) · 7.32 KB
/
find_clusters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
#!/usr/bin/env python3
# CatAmount analyzes GPS collar data to find time/space relationships.
# Copyright (C) 2012-2019 Michael Rickard
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# This code was based on a reading of work done by Mike Warren at the
# University of Alberta and Kyle Knopff at The Central East Slopes
# Cougar Study.
# This file provides facilities to identify one cat's time/space groupings, called clusters.
# IMPORT
import os
import sys
import argparse
from csv import reader as csvreader
import catamount.common as catcm
import catamount.find_clusters as catfc
import catamount.sunmetrics as catsm
# BEGIN SCRIPT
argman = argparse.ArgumentParser(
prog='FIND_CLUSTERS',
description='Find clusters in GPS collar data',
epilog='For this to work, there needs to be a text database of collar data, and a config file'
)
argman.add_argument(
'-f', '--datafile_path',
dest='datafile_path', action='store',
type=catcm.check_file_arg, default=catcm.cfg_datafile_path,
help='Interpret this data file.'
)
argman.add_argument(
'-o', '--outdir_path',
dest='outdir_path', action='store',
type=catcm.check_dir_arg, default=catcm.cfg_outdir_path,
help='Specify an output directory.'
)
argman.add_argument(
'-c', '--catid',
dest='catid', action='store',
type=str, required=True,
help='Show clusters for this cat.'
)
argman.add_argument(
'-r', '--radius',
dest='radius', action='store',
type=int, default=catcm.cfg_cluster_radius,
help='Design radius of a cluster.'
)
argman.add_argument(
'-t', '--time_cutoff',
dest='time_cutoff', action='store',
type=catcm.hours_arg_to_seconds, default=catcm.cfg_cluster_time_cutoff,
help='Design time cutoff of a cluster in hours.'
)
argman.add_argument(
'-mc', '--minimum_count',
dest='minimum_count', action='store',
type=int, default=catcm.cfg_cluster_minimum_count,
help='Minimum number of points to qualify as a cluster.'
)
argman.add_argument(
'-ms', '--minimum_stay',
dest='minimum_stay', action='store',
type=catcm.hours_arg_to_seconds, default=catcm.cfg_cluster_minimum_stay,
help='Minimum elapsed hours of clusters.'
)
argman.add_argument(
'-d1', '--start_date',
dest='start_date', action='store',
type=catcm.date_string_to_objects, default=catcm.cfg_cluster_start_date,
help='Limit clusters to ones that start after this date. YYYY-MM-DD'
)
argman.add_argument(
'-d2', '--end_date',
dest='end_date', action='store',
type=catcm.date_string_to_objects, default=catcm.cfg_cluster_end_date,
help='Limit clusters to ones that start before this date. YYYY-MM-DD'
)
argman.add_argument(
'-x', '--text_style',
dest='text_style', action='store',
choices=['csv', 'csv-all', 'descriptive', 'descriptive-all'], default='csv',
help='Text output style: csv, csv-all, descriptive, descriptive-all'
)
argman.add_argument(
'-z', '--clusterid',
dest='clusterid', action='store',
type=str, default=False,
help='Zoom in on a specific cluster.'
)
# Using the argument parser to do a lot of work:
# * Prefer command line arguments
# * Fall back on config file values
# * Convert to appropriate types
# * Check validity of arguments
args = argman.parse_args()
# Make sure integer arguments are in a reasonable range.
args.radius = catcm.constrain_integer(args.radius, 0, 1000)
args.time_cutoff = catcm.constrain_integer(args.time_cutoff, 0, 31536000)
args.minimum_count = catcm.constrain_integer(args.minimum_count, 0, 100)
args.minimum_stay = catcm.constrain_integer(args.minimum_stay, 0, 8640000)
# Create a SunMetrics object so any fixes can compute day and night
sun_metrics = catsm.SunMetrics()
# Open and process the data file
with open(args.datafile_path, 'rt') as datafile:
csvrows = csvreader(datafile)
# Limit to just one cat
csvrows = [csvrow for csvrow in csvrows if csvrow[int(catcm.cfg_data_column_catid)] == args.catid]
# If no rows were retrieved, warn user that cat is not represented in the current data
if not csvrows:
sys.exit('No CSV data was found for cat with id {}.'.format(args.catid))
# Create a new Trail object, which is a series of fixes
trail = catfc.FCTrail(args.catid, args.radius, args.time_cutoff, args.minimum_count, args.minimum_stay)
# For every row, create a Fix object and add it to the Trail
for csvrow in csvrows:
try:
new_fix = catcm.Fix(csvrow, sun_metrics)
except IndexError:
sys.stderr.write('CSV row doesn’t have expected number of columns: {}\n'.format(csvrow))
continue
except:
sys.stderr.write('CSV row doesn’t look like data: {}\n'.format(csvrow))
continue
# Add the fix to the trail
trail.fixes.append(new_fix)
# Limit by date, if requested
if args.start_date:
trail.start_dateobj = args.start_date[0]
trail.start_time = args.start_date[1]
if args.end_date:
trail.end_dateobj = args.end_date[0]
trail.end_time = args.end_date[1]
trail.filter_by_date()
# Filter by date may have removed everything
if len(trail.fixes) < 1:
sys.exit('No data remaining after filtering by date. Try adjusting the date range.')
# Put the current collecton of fixes in order by time
trail.order_by_time()
# Remove any duplicate entries, seen in some data sets
trail.remove_duplicates()
# Find the farthest distance in each direction
trail.find_bounds()
# Find clusters
trail.find_clusters()
# Calculate averages, after all points have been added
trail.calculate_cluster_averages()
# Get image name and path ready
imagename = catfc.create_filename(args.catid, args.start_date, args.end_date, args.clusterid)
imagepath = os.path.join(args.outdir_path, imagename + '.png')
# Prepare date limiting strings for use in image legends
if args.start_date:
trail.legend_start_date = args.start_date[0].strftime(catcm.DATE_FMT_ISO_SHORT)
if args.end_date:
trail.legend_end_date = args.end_date[0].strftime(catcm.DATE_FMT_ISO_SHORT)
# If we're zooming in on a certain cluster, do so now
if args.clusterid:
# Limit to one cluster
cluster = trail.return_cluster_by_id(args.clusterid)
# Create feedback image
cluster.create_image(imagepath, 'auto')
# Do a text report of this cluster; show all points by default
if args.text_style == 'csv':
cluster.csv_report
else:
cluster.descriptive_report(True)
# Account of what was done
sys.stderr.write('Cluster {} shown.\n'.format(args.clusterid))
# Otherwise report on all clusters
else:
# Apply limiting filters to collection of clusters
trail.filter_clusters_by_count()
trail.filter_clusters_by_stay()
# Create feedback image
trail.create_image(imagepath, 50)
# Do a text report of all clusters found
if args.text_style == 'descriptive-all':
trail.descriptive_report(True)
elif args.text_style == 'descriptive':
trail.descriptive_report(False)
elif args.text_style == 'csv-all':
trail.csv_report(True)
else:
trail.csv_report(False)
# Account of what was done
sys.stderr.write('{} clusters found.\n'.format(len(trail.clusters)))