-
Notifications
You must be signed in to change notification settings - Fork 0
/
summary.py
37 lines (34 loc) · 1.2 KB
/
summary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
## quick stats on gaps
import csv
import sys
import numpy as np
import pandas as pd
from joblib import Parallel, delayed
import preproc
import training
def summarize(file_name):
file_path = '/Users/aubrey9012/Downloads/medical_ICU-features/data/mimic/'+str(file_name)+'.csv'
output_path = '/Users/aubrey9012/Downloads/medical_ICU-features/data/output/summary_of_'+str(file_name)+'.csv'
f = open(file_path, 'r')
output = open(output_path, 'wt')
writer = csv.writer(output)
reader = csv.reader(f)
h = reader.next()
header = [str(i)[1:-1] for i in h]
print header
f.close()
f = open(file_path, 'r')
key = ['black_cnt', 'gaps', 'gap_indeces', 'missing_value_proportion']
for col in header:
writer.writerow([col])
writer.writerow(key)
sig = preproc.get_column(col, file_path)
sig = sig[2:]
res = preproc.check(sig, return_series =False)
writer.writerow(res)
f.close()
output.close()
return
if __name__ == "__main__":
file_names = [3642023,3655233,3656395,3668415] ## put files name to summarize
Parallel(n_jobs =2)(delayed(summarize)(file_name) for file_name in file_names)