/
dataProcess.py
45 lines (36 loc) · 1.04 KB
/
dataProcess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import json
import sys
from DataSet import *
def main():
if len(sys.argv) < 2:
print 'Usage: python dataProcess.py [filenames]'
fnames = sys.argv[1:]
for fname in fnames:
f = open(fname)
dataArr = json.loads(f.read())
lowerDataArr = sanitizeDataDict(dataArr)
ds = DataSet(lowerDataArr)
classHist = ds.getHistRepr("difficulty")
print classHist
classHistBinned = BinnedDataDict(classHist)
print classHistBinned
def sanitizeDataDict(dataArr):
outArr = []
for group in dataArr:
newGroup = {}
for k, v in group.iteritems():
if hasattr(v, 'lower'):
lowerV = v.lower()
else:
lowerV = v
try:
finalV = int(lowerV)
except TypeError:
finalV = lowerV
except ValueError:
finalV = lowerV
newGroup[k] = finalV
outArr.append(newGroup)
return outArr
if __name__ == "__main__":
main()