def readData(path): syms = 0 rows = [] with open(path, 'r') as fin: lines, syms = [int(item) for item in fin.readline().strip().split(' ')] ocsv.runFunc(fin, lambda line:rows.append([int(item) for item in line.strip().split(' ')[1:]])) return lines, syms, rows
# list of list of list seqs = [] def func(line): global currentpid, seqs row = line.strip().split(',') #print row[flipCol2] pid = row[col['PID']] if pid == currentpid: seqs[-1][0].append(row[flipCol]) seqs[-1][1].append(row[col['cost\n']]) else: seqs.append([[row[flipCol]], [row[col['cost\n']]]]) currentpid = pid ocsv.runFunc(fin, func) fin.close() #print seqs #print seqs freq = dict() fout = open("C:\\Users\\AadarshSam\\Desktop\\CAPSTONE PROJECT_SCRIPTS\\my_output.csv", 'w') final_list=[] trial_list=[] for seq in seqs: #print seq[1] # skip sequence longer than 100
newline = line.strip() row = newline.split(',') if skipLast and row[col['nextLOS']] == '': return newline += ',' + bucketLOS(row[col['LOS']]) newline += ',' + bucketLOS(row[col['nextLOS']]) newline += ',' + bucketCost(row[col['cost']]) newline += ',' + bucketCost(row[col['nextCost']]) newline += ',' + dxmap[row[col['diag_p']]] newline += ',' + prmap[row[col['proc_p']]] fout.write(newline + '\n') skipLast = sys.argv[3] == 'True' fin = open(sys.argv[1], 'r') fout = open(sys.argv[2], 'w') chf = 'chf' in sys.argv[1].lower() colline = fin.readline().strip() col = ocsv.getColumns(colline) dxmap = parseICD9Mapping('AppendixASingleDX.txt') prmap = parseICD9Mapping('AppendixBSinglePR.txt') # add new columns newcols = ['LOS_b', 'nextLOS_b', 'cost_b', 'nextCost_b', 'diag_p_ccs', 'proc_p_ccs'] for newcol in newcols: colline += ',' + newcol col[newcol] = len(col) fout.write(colline + '\n') ocsv.runFunc(fin, convert) fin.close() fout.close()
fin = open(path, "r") col = ocsv.getColumns(fin.readline().strip()) syms = set() seqs = [] pidToSeq = dict() def getSeqs(line): row = line.strip().split(",") pid = row[col["PID"]] sym = row[col[target]] syms.add(sym) if pid in pidToSeq: pidToSeq[pid].append(sym) else: newSeq = [sym] seqs.append(newSeq) pidToSeq[pid] = newSeq ocsv.runFunc(fin, getSeqs) fin.close() fout = open(out, "w") dum = fout.write(str(len(seqs)) + " " + str(max([int(sym) for sym in syms]) + 1) + "\n") for seq in seqs: fout.write(str(len(seq)) + " " + " ".join(seq) + "\n") fout.close()
truepos = truepos + 1 else: falsepos = falsepos + 1 elif row[col['thirtyday']] == '0': trueneg = trueneg + 1 else: falseneg = falseneg + 1 seqRange = range(int(sys.argv[5]), int(sys.argv[6]) + 1) if len(sys.argv) > 4 else [0] fout = open(sys.argv[4], 'w') msg = 'SeqLength,T+,T-,F+,F-,rowcount,accuracy,precision,baseline' fout.write(msg + '\n') print(msg) for seqLength in seqRange: ftest = open(sys.argv[1], 'r') col = ocsv.getColumns(ftest.readline()) currentPID = '' currentSeq = [] truepos = 0 trueneg = 0 falsepos = 0 falseneg = 0 rowcount = 0 ocsv.runFunc(ftest, nb) ftest.close() result = [seqLength, truepos, trueneg, falsepos, falseneg, rowcount, (truepos + trueneg) / rowcount, truepos / (truepos + falsepos), (trueneg + falsepos) / rowcount] msg = ','.join([str(item) for item in result]) fout.write(msg + '\n') print(msg) fout.close()
if rows[i][col['patzip']] in badZips and rows[i - 1][col['patzip']] not in badZips: rows[i][col['patzip']] = rows[i - 1][col['patzip']] # replace from the front for i in range(len(rows) - 1, 0, -1): if rows[i - 1][col['patzip']] in badZips and rows[i][col['patzip']] not in badZips: rows[i - 1][col['patzip']] = rows[i][col['patzip']] elif badZips == patZips: # replace by majority hospital zip hplZipTbl = ocsv.table([row[col['hplzip']] for row in rows]) newZip = sorted(hplZipTbl.items(), key = lambda item: item[1], reverse=True)[0][0] for row in rows: row[col['patzip']] = newZip for row in rows: fout.write(','.join(row) + '\n') ocsv.runFunc(fin, func, True) fin.close() fout.close() #below code is for zip validation #fin = open(inpath, 'r') #col = ocsv.getColumns(fin.readline().strip()) #patZips = dict() #patHplZips = dict() #hplZips = dict() #def func(line): # row = line.strip().split(',') # pid = row[col['PID']] # zp = row[col['patzip']] # hplzp = row[col['hplzip']] # hid = row[col['facility']]
import ocsv import sys fin = open(sys.argv[1], 'r') fout = open(sys.argv[2], 'w') CCS = sys.argv[3] pids = set() def saveCohortPID(line): row = line.strip('\n').split(',') if row[col['DXCCS_' + CCS]] == '1': pids.add(row[col['PID']]) def outputCohort(line): row = line.strip('\n').split(',') if row[col['PID']] in pids: fout.write(line) line = fin.readline() fout.write(line) col = ocsv.getColumns(line.strip('\n')) print('Finding all PID in this cohort') ocsv.runFunc(fin, saveCohortPID) print('There are totally ' + str(len(pids)) + ' PIDs in this cohort') fin.close() fin = open(sys.argv[1], 'r') line = fin.readline() print('Writing cohort to output') ocsv.runFunc(fin, outputCohort)
import ocsv import sys import random # argv[1] input file # argv[2] public output file # argv[3] private output file # read input file the first round to load all PIDs fin = open(sys.argv[1], 'r') col = ocsv.getColumns(fin.readline()) pids = set() ocsv.runFunc(fin, lambda line: pids.add(line.strip().split(',')[col['PID']])) fin.close() # sample private PIDs pripids = set(random.sample(pids, int(len(pids) / 10))) # read input file the second round to divide into public and private fpub = open(sys.argv[2], 'w') fpri = open(sys.argv[3], 'w') fin = open(sys.argv[1], 'r') line = fin.readline() fpub.write(line) fpri.write(line) def write(line): if line.strip().split(',')[col['PID']] in pripids: fpri.write(line) else: fpub.write(line)