Ejemplo n.º 1
0
import ocsv
import sys

# argv[1] - input file
# argv[2] - name of the column to be flipped
# argv[3] - output file

fin = open("C:\\Users\\AadarshSam\\Desktop\\CAPSTONE PROJECT_SCRIPTS\\PID.csv", 'r')


col = ocsv.getColumns(fin.readline())
flipCol = col['daystonext']
flipCol2=col['agyradm']
flipcol3=col['Gender']
currentpid = ''
# list of list of list
seqs = []
def func(line):
  global currentpid, seqs
  row = line.strip().split(',')
  #print row[flipCol2]
  pid = row[col['PID']]
  if pid == currentpid:
    seqs[-1][0].append(row[flipCol])

    seqs[-1][1].append(row[col['cost\n']])
  else:
    seqs.append([[row[flipCol]], [row[col['cost\n']]]])

  currentpid = pid
ocsv.runFunc(fin, func)
Ejemplo n.º 2
0
  newline = line.strip()
  row = newline.split(',')
  if skipLast and row[col['nextLOS']] == '': return
  newline += ',' + bucketLOS(row[col['LOS']])
  newline += ',' + bucketLOS(row[col['nextLOS']])
  newline += ',' + bucketCost(row[col['cost']])
  newline += ',' + bucketCost(row[col['nextCost']])
  newline += ',' + dxmap[row[col['diag_p']]]
  newline += ',' + prmap[row[col['proc_p']]]
  fout.write(newline + '\n')

skipLast = sys.argv[3] == 'True'
fin = open(sys.argv[1], 'r')
fout = open(sys.argv[2], 'w')
chf = 'chf' in sys.argv[1].lower()
colline = fin.readline().strip()
col = ocsv.getColumns(colline)
dxmap = parseICD9Mapping('AppendixASingleDX.txt')
prmap = parseICD9Mapping('AppendixBSinglePR.txt')

# add new columns
newcols = ['LOS_b', 'nextLOS_b', 'cost_b', 'nextCost_b', 'diag_p_ccs', 'proc_p_ccs']
for newcol in newcols:
  colline += ',' + newcol
  col[newcol] = len(col)

fout.write(colline + '\n')
ocsv.runFunc(fin, convert)
fin.close()
fout.close()
Ejemplo n.º 3
0
import ocsv
import sys
import random

# argv[1] input file
# argv[2] first output file
# argv[3] second output file

f1 = open(sys.argv[2], 'w')
f2 = open(sys.argv[3], 'w')
fin = open(sys.argv[1], 'r')
line = fin.readline()
col = ocsv.getColumns(line)
f1.write(line)
f2.write(line)

pid1, pid2 = '', ''
def func(line):
  global pid1, pid2
  row = line.strip().split(',')
  pid = row[col['PID']]
  if pid != pid1 and pid != pid2:
    if int(row[col['gender']]) == 0: # 0 is female
      pid1 = pid
    else:
      pid2 = pid
  if pid == pid1:
    f1.write(line)
  else:
    f2.write(line)
Ejemplo n.º 4
0
import ocsv
import sys

if len(sys.argv) > 2:
  path = sys.argv[1] # input path
  out = sys.argv[2] # output path

fin = open(path, 'r')
fout = open(out, 'w')
line = fin.readline()
fout.write(line)
col = ocsv.getColumns(line.strip())

samePtnLines = []
pid = ''
skipPtn = False

def func(line):
  global samePtnLines, pid, skipPtn
  row = line.strip().split(',')
  rowpid = row[col['PID']]
  if pid != '' and pid != rowpid:
    if not skipPtn:
      for samePLine in samePtnLines:
        fout.write(samePLine)
    skipPtn = False
    samePtnLines = []
  pid = rowpid
  if skipPtn:
    return
  if float(row[col['cost']]) <= 1:
Ejemplo n.º 5
0
      truepos = truepos + 1
    else:
      falsepos = falsepos + 1
  elif row[col['thirtyday']] == '0':
    trueneg = trueneg + 1
  else:
    falseneg = falseneg + 1

seqRange = range(int(sys.argv[5]), int(sys.argv[6]) + 1) if len(sys.argv) > 4 else [0]
fout = open(sys.argv[4], 'w')
msg = 'SeqLength,T+,T-,F+,F-,rowcount,accuracy,precision,baseline'
fout.write(msg + '\n')
print(msg)
for seqLength in seqRange:
  ftest = open(sys.argv[1], 'r')
  col = ocsv.getColumns(ftest.readline())
  currentPID = ''
  currentSeq = []
  truepos = 0
  trueneg = 0
  falsepos = 0
  falseneg = 0
  rowcount = 0
  ocsv.runFunc(ftest, nb)
  ftest.close()
  result = [seqLength, truepos, trueneg, falsepos, falseneg, rowcount, (truepos + trueneg) / rowcount, truepos / (truepos + falsepos), (trueneg + falsepos) / rowcount]
  msg = ','.join([str(item) for item in result])
  fout.write(msg + '\n')
  print(msg)
fout.close()
Ejemplo n.º 6
0
import ocsv
import sys

fin = open(sys.argv[1], 'r')
fout = open(sys.argv[2], 'w')
CCS = sys.argv[3]

pids = set()
def saveCohortPID(line):
  row = line.strip('\n').split(',')
  if row[col['DXCCS_' + CCS]] == '1':
    pids.add(row[col['PID']])

def outputCohort(line):
  row = line.strip('\n').split(',')
  if row[col['PID']] in pids:
    fout.write(line)

line = fin.readline()
fout.write(line)
col = ocsv.getColumns(line.strip('\n'))
print('Finding all PID in this cohort')
ocsv.runFunc(fin, saveCohortPID)
print('There are totally ' + str(len(pids)) + ' PIDs in this cohort')

fin.close()
fin = open(sys.argv[1], 'r')
line = fin.readline()
print('Writing cohort to output')
ocsv.runFunc(fin, outputCohort)