/
filterMetadata.py
executable file
·90 lines (70 loc) · 2.88 KB
/
filterMetadata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/env python
import os, yaml
from optparse import OptionParser
import gyrodata
from itertools import groupby
from operator import itemgetter
def isValidEntry(entry, config):
filters = config['data-filters']
identicalAttributes = ['gender']
containsAttributes = ['activity', 'position', 'activityFolder', 'mount', 'direction']
presentAttributes = ['accfile', 'gyrofile', 'weight', 'height', 'age']
for attr in identicalAttributes:
if attr in filters:
if filters[attr] and filters[attr] != entry[attr]:
return False
for attr in containsAttributes:
if attr in filters:
if filters[attr] and filters[attr] not in entry[attr]:
return False
for attr in presentAttributes:
if attr in filters:
if filters[attr] and filters[attr] and not entry[attr]:
return False
if 'extreme' in filters:
variable = filters['extreme']
splits = config['output']['buckets']['splits']
if float(entry[variable]) >= float(splits[0]) and float(entry[variable]) <= float(splits[1]):
return False
def checkValidFile(path, numKeys, minRecords):
# check for valid values in acc file
data = gyrodata.readCsvData(path)
if len(data) < minRecords:
return False
# count all non-falsy keys
keys = [len([e for e in l if e]) for l in data]
return max(keys) == numKeys
if filters['accfile'] and not checkValidFile(entry['accfile'], 4, filters['minRecords']):
return False
if filters['gyrofile'] and not checkValidFile(entry['gyrofile'], 4, filters['minRecords']):
return False
return True
def main():
parser = OptionParser(usage="usage: %prog [options] source")
parser.add_option("-c", "--config-file",
action="store",
dest="config",
default="gold.yml",
help="Configuration file for filter",)
parser.add_option("-o", "--output",
action="store",
dest="output",
default="filtered.csv",
help="Output file",)
(options, args) = parser.parse_args()
if len(args) != 1:
parser.error("wrong number of arguments")
try:
with open(options.config, 'r') as configFile:
config = yaml.load(configFile)
except IOError:
sys.exit("Unable to find configuration file " + options.config)
data = gyrodata.readMetadata(args[0])
filteredData = [entry for entry in data if isValidEntry(entry, config)]
# filter by person
if config['data-filters']['unique']:
filteredData = [group.next() for key, group in groupby(filteredData, key=itemgetter('person'))]
print("Loaded %d entries!" % (len(filteredData)))
gyrodata.writeMetadata(filteredData, options.output)
if __name__ == '__main__':
main()