/
getTotals.py
157 lines (139 loc) · 4.2 KB
/
getTotals.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
'''Prints frequencies of adversity measures to csv'''
from collections import OrderedDict
from datetime import datetime
from manifest import *
from numpy import zeros
import os
from pandas import DataFrame, ExcelWriter
from windowspath import *
class Total():
def __init__(self):
self.pos = []
self.neg = []
self.control = []
def getNA(self):
# Returns % of NAs by ER status
ret = []
ret.append((self.pos.count(-1)/len(self.pos)*100))
ret.append((self.neg.count(-1)/len(self.neg)*100))
ret.append((self.control.count(-1)/len(self.control)*100))
return ret
def add(self, status, val):
# Adds value to appropriate dict
if status == "P":
self.pos.append(val)
elif status == "N":
self.neg.append(val)
else:
self.control.append(val)
def setKeys(self):
# Get sorted list of all keys
l = []
l.extend(self.pos)
l.extend(self.neg)
l.extend(self.control)
keys = set(l)
keys = list(keys)
keys.sort()
return keys
def getDF(self):
# Converts to data frame
col = ["ER+", "ER-", "Control"]
keys = self.setKeys()
keys.insert(0, "Total")
ret = DataFrame(zeros((len(keys), len(col)), dtype = int), columns = col, index = keys)
for k in keys:
ret.loc[k, "ER+"] = self.pos.count(k)
ret.loc[k, "ER-"] = self.neg.count(k)
ret.loc[k, "Control"] = self.control.count(k)
ret.loc["Total"] = ret.sum()
return ret
class Counter():
def __init__(self):
c = Columns()
self.infile = getMergedFile(subset = True)
self.outfile = ("{}adversityTotals.{}.xlsx").format(setPath(), datetime.now().strftime("%Y-%m-%d"))
self.header = {}
self.totals = {}
self.complete = {"P":0, "N":0, "C":0}
self.all = {"P":0, "N":0, "C":0}
self.columns = c.plot
self.__setFields__()
self.__getTotals__()
def __setFields__(self):
# Sets new class for each field in self.totals
for i in self.columns:
self.totals[i] = Total()
def __parentAlive__(self, k, row):
# Determines if given parent is still alive
if k == "AgeMaD" and row[self.header["MAlive18"]] == "1":
return True
elif k == "AgePaD" and row[self.header["PAlive18"]] == "1":
return True
return False
def __parseRow__(self, status, row):
# Extracts relevant data from row
for idx, k in enumerate(self.columns):
try:
val = float(row[self.header[k]])
except ValueError:
# Record NAs
val = -1
self.totals[k].add(status, val)
if row[self.header["Complete"]] == "1":
self.complete[status] += 1
if row[self.header["AllMeasures"]] == "1":
self.all[status] += 1
def __getStatus__(self, row):
# Returns ER status from line
ret = None
if len(row) > self.header["Case"]:
if row[self.header["Case"]].strip() == "1":
s = row[self.header["ER"]].strip()
if s == "0":
ret = "P"
elif s == "1":
ret = "N"
else:
ret = "C"
return ret
def __getTotals__(self):
# Counts total occurances for each field by ER status
first = True
print("\n\tReading input file...")
with open(self.infile, "r") as f:
for line in f:
line = line.strip()
if first == False:
row = line.split(d)
status = self.__getStatus__(row)
if status is not None:
self.__parseRow__(status, row)
else:
d = getDelim(line)
self.header = setHeader(line.split(d))
first = False
def writeXLSX(self):
# Writes each dict to csv
print("\tWriting tables to file...")
with ExcelWriter(self.outfile) as writer:
for k in self.totals.keys():
df = self.totals[k].getDF()
df.to_excel(writer, sheet_name = k)
def printComplete(self):
# Prints number of complete records to the screen
print("\n\tNumber of complete records:")
print("\t\tStatus\tComplete\tAllMeasures")
print(("\t\tER+\t{}\t\t{}").format(self.complete["P"], self.all["P"]))
print(("\t\tER-\t{}\t\t{}").format(self.complete["N"], self.all["N"]))
print(("\t\tControl\t{}\t\t{}\n").format(self.complete["C"], self.all["C"]))
fileTotals("Core measures complete", self.complete["P"] + self.complete["N"], self.complete["C"])
fileTotals("All measures complete", self.all["P"] + self.all["N"], self.all["C"])
def main():
start = datetime.now()
c = Counter()
c.writeXLSX()
c.printComplete()
print(("\tTotal runtime: {}\n").format(datetime.now() - start))
if __name__ == "__main__":
main()