예제 #1
0
파일: ngram.py 프로젝트: darrenhon/projects
def readData(path):
  syms = 0
  rows = []
  with open(path, 'r') as fin:
    lines, syms = [int(item) for item in fin.readline().strip().split(' ')]
    ocsv.runFunc(fin, lambda line:rows.append([int(item) for item in line.strip().split(' ')[1:]]))
  return lines, syms, rows
예제 #2
0
# list of list of list
seqs = []
def func(line):
  global currentpid, seqs
  row = line.strip().split(',')
  #print row[flipCol2]
  pid = row[col['PID']]
  if pid == currentpid:
    seqs[-1][0].append(row[flipCol])

    seqs[-1][1].append(row[col['cost\n']])
  else:
    seqs.append([[row[flipCol]], [row[col['cost\n']]]])

  currentpid = pid
ocsv.runFunc(fin, func)
fin.close()
#print seqs

#print seqs



freq = dict()
fout = open("C:\\Users\\AadarshSam\\Desktop\\CAPSTONE PROJECT_SCRIPTS\\my_output.csv", 'w')

final_list=[]
trial_list=[]
for seq in seqs:
  #print seq[1]
  # skip sequence longer than 100
예제 #3
0
  newline = line.strip()
  row = newline.split(',')
  if skipLast and row[col['nextLOS']] == '': return
  newline += ',' + bucketLOS(row[col['LOS']])
  newline += ',' + bucketLOS(row[col['nextLOS']])
  newline += ',' + bucketCost(row[col['cost']])
  newline += ',' + bucketCost(row[col['nextCost']])
  newline += ',' + dxmap[row[col['diag_p']]]
  newline += ',' + prmap[row[col['proc_p']]]
  fout.write(newline + '\n')

skipLast = sys.argv[3] == 'True'
fin = open(sys.argv[1], 'r')
fout = open(sys.argv[2], 'w')
chf = 'chf' in sys.argv[1].lower()
colline = fin.readline().strip()
col = ocsv.getColumns(colline)
dxmap = parseICD9Mapping('AppendixASingleDX.txt')
prmap = parseICD9Mapping('AppendixBSinglePR.txt')

# add new columns
newcols = ['LOS_b', 'nextLOS_b', 'cost_b', 'nextCost_b', 'diag_p_ccs', 'proc_p_ccs']
for newcol in newcols:
  colline += ',' + newcol
  col[newcol] = len(col)

fout.write(colline + '\n')
ocsv.runFunc(fin, convert)
fin.close()
fout.close()
예제 #4
0
파일: spicify.py 프로젝트: darrenhon/oshpd
fin = open(path, "r")
col = ocsv.getColumns(fin.readline().strip())
syms = set()
seqs = []
pidToSeq = dict()


def getSeqs(line):
    row = line.strip().split(",")
    pid = row[col["PID"]]
    sym = row[col[target]]
    syms.add(sym)
    if pid in pidToSeq:
        pidToSeq[pid].append(sym)
    else:
        newSeq = [sym]
        seqs.append(newSeq)
        pidToSeq[pid] = newSeq


ocsv.runFunc(fin, getSeqs)
fin.close()

fout = open(out, "w")
dum = fout.write(str(len(seqs)) + " " + str(max([int(sym) for sym in syms]) + 1) + "\n")

for seq in seqs:
    fout.write(str(len(seq)) + " " + " ".join(seq) + "\n")

fout.close()
예제 #5
0
      truepos = truepos + 1
    else:
      falsepos = falsepos + 1
  elif row[col['thirtyday']] == '0':
    trueneg = trueneg + 1
  else:
    falseneg = falseneg + 1

seqRange = range(int(sys.argv[5]), int(sys.argv[6]) + 1) if len(sys.argv) > 4 else [0]
fout = open(sys.argv[4], 'w')
msg = 'SeqLength,T+,T-,F+,F-,rowcount,accuracy,precision,baseline'
fout.write(msg + '\n')
print(msg)
for seqLength in seqRange:
  ftest = open(sys.argv[1], 'r')
  col = ocsv.getColumns(ftest.readline())
  currentPID = ''
  currentSeq = []
  truepos = 0
  trueneg = 0
  falsepos = 0
  falseneg = 0
  rowcount = 0
  ocsv.runFunc(ftest, nb)
  ftest.close()
  result = [seqLength, truepos, trueneg, falsepos, falseneg, rowcount, (truepos + trueneg) / rowcount, truepos / (truepos + falsepos), (trueneg + falsepos) / rowcount]
  msg = ','.join([str(item) for item in result])
  fout.write(msg + '\n')
  print(msg)
fout.close()
예제 #6
0
      if rows[i][col['patzip']] in badZips and rows[i - 1][col['patzip']] not in badZips:
        rows[i][col['patzip']] = rows[i - 1][col['patzip']]
    # replace from the front
    for i in range(len(rows) - 1, 0, -1):
      if rows[i - 1][col['patzip']] in badZips and rows[i][col['patzip']] not in badZips:
        rows[i - 1][col['patzip']] = rows[i][col['patzip']]
  elif badZips == patZips:
    # replace by majority hospital zip
    hplZipTbl = ocsv.table([row[col['hplzip']] for row in rows])
    newZip = sorted(hplZipTbl.items(), key = lambda item: item[1], reverse=True)[0][0]
    for row in rows:
      row[col['patzip']] = newZip
  for row in rows:
    fout.write(','.join(row) + '\n')

ocsv.runFunc(fin, func, True)
fin.close()
fout.close()

#below code is for zip validation
#fin = open(inpath, 'r')
#col = ocsv.getColumns(fin.readline().strip())
#patZips = dict()
#patHplZips = dict()
#hplZips = dict()
#def func(line):
#  row = line.strip().split(',')
#  pid = row[col['PID']]
#  zp = row[col['patzip']]
#  hplzp = row[col['hplzip']]
#  hid = row[col['facility']]
예제 #7
0
import ocsv
import sys

fin = open(sys.argv[1], 'r')
fout = open(sys.argv[2], 'w')
CCS = sys.argv[3]

pids = set()
def saveCohortPID(line):
  row = line.strip('\n').split(',')
  if row[col['DXCCS_' + CCS]] == '1':
    pids.add(row[col['PID']])

def outputCohort(line):
  row = line.strip('\n').split(',')
  if row[col['PID']] in pids:
    fout.write(line)

line = fin.readline()
fout.write(line)
col = ocsv.getColumns(line.strip('\n'))
print('Finding all PID in this cohort')
ocsv.runFunc(fin, saveCohortPID)
print('There are totally ' + str(len(pids)) + ' PIDs in this cohort')

fin.close()
fin = open(sys.argv[1], 'r')
line = fin.readline()
print('Writing cohort to output')
ocsv.runFunc(fin, outputCohort)
예제 #8
0
import ocsv
import sys
import random

# argv[1] input file
# argv[2] public output file
# argv[3] private output file

# read input file the first round to load all PIDs
fin = open(sys.argv[1], 'r')
col = ocsv.getColumns(fin.readline())
pids = set()
ocsv.runFunc(fin, lambda line: pids.add(line.strip().split(',')[col['PID']]))
fin.close()

# sample private PIDs
pripids = set(random.sample(pids, int(len(pids) / 10)))

# read input file the second round to divide into public and private
fpub = open(sys.argv[2], 'w')
fpri = open(sys.argv[3], 'w')
fin = open(sys.argv[1], 'r')
line = fin.readline()
fpub.write(line)
fpri.write(line)

def write(line):
  if line.strip().split(',')[col['PID']] in pripids:
    fpri.write(line)
  else:
    fpub.write(line)