def read_elixhauser(fn): dx2elix = {} fn = rscfn(__name__, fn) with open(fn, "r") as fp: start = False end = False dxlst = [] for line in fp.readlines(): if line.strip() == "Value $RCOMFMT": start = True if start and line.strip() == ";": end = True break if start and not end: if "=" in line: pttr = r"\"(.*)\"=\"(.*)\"" matches = re.findall(pttr, line) if len(matches) > 0 and len(matches[0]) == 2: dx, elix = matches[0][0], matches[0][1] dxlst.append(dx) for dx in dxlst: dx2elix[dx] = elix dxlst = [] elif "," in line: pttr = r"\"(.*)\"," matches = re.findall(pttr, line) if len(matches) > 0: dx = matches[0] dxlst.append(dx) return dx2elix
def read_dx2cc(fn): dx2cc = {} fn = rscfn(__name__, fn) with open(fn, "r") as fp: reader = csv.reader(fp, delimiter="\t") dx2cc = {x[0].strip(): "HCC"+x[1].strip() for x in reader} return dx2cc
def read_dx(fn): fn = rscfn(__name__, fn) with open(fn, mode='r') as f: d = {} for row in csv.reader(f, delimiter='|'): d[row[0].replace('.', '')] = row[1].replace('.', '') return d
def read_product(digits=11): products = {} fn = rscfn(__name__, "data/product.txt") col_dct = { "ndc": { "cnm": "PRODUCTNDC" }, "name_proprietary": { "cnm": "PROPRIETARYNAME" }, "name_generic": { "cnm": "NONPROPRIETARYNAME" }, "substance_lst": { "cnm": "SUBSTANCENAME" }, "pc_lst": { "cnm": "PHARM_CLASSES" } } with open(fn, "r", encoding="ISO-8859-1") as fp: reader = csv.reader(fp, delimiter="\t") header = next(reader) for k, v in col_dct.items(): v["idx"] = header.index(v["cnm"]) for row in reader: d = {} for k, v in col_dct.items(): if "_lst" in k: d[k] = [x for x in row[v["idx"]].split(",") if len(x) > 0] else: d[k] = row[v["idx"]] key = row[col_dct["ndc"]["idx"]] products[key] = d return products
def read_new_zealand(fn): fn = rscfn(__name__, fn) with open(fn, mode='r') as f: d = {} next(f) for row in csv.reader(f, delimiter=','): d[row[2]] = row[1] return d
def read_cache(fn): fn = rscfn(__name__, fn) exists = os.path.isfile(fn) cache = {} if exists: with open(fn, "r") as fp: cache = json.load(fp) return cache
def download_cpt(): if not license_cpt(): print("No CPT files downloaded.") return 1 url = ("https://www.hcup-us.ahrq.gov/toolssoftware/" + "ccs_svcsproc/2019_ccs_services_procedures.zip") fn = "2019_ccs_services_procedures.csv" res = urlopen(url) cpt2ccs = {} with ZipFile(BytesIO(res.read())) as zp: for line in zp.open(fn).readlines(): row = _clnrw(line.decode("utf-8").split(",")) if len(row) != 3 or "-" not in row[0]: continue ccs = row[1] desc = row[2] for cpt in _expand_cpt(row[0]): cpt2ccs[cpt] = {"ccs": ccs, "ccs_desc": desc} fn = rscfn(__name__, "data/cpt2ccs.json") with open(fn, "w") as fp: json.dump(cpt2ccs, fp=fp, indent=2, sort_keys=True) cpt2sflag = {} desc = {"1": "broad", "2": "narrow"} url = ("https://www.hcup-us.ahrq.gov/toolssoftware/surgflags/" + "surgery_flags_cpt_2017.csv") res = urlopen(url) reader = csv.reader(StringIO(res.read().decode("utf-8"))) meta = next(reader) header = next(reader) for row in reader: row = _clnrw(row) if "-" not in row[0]: continue flag = row[1] for cpt in _expand_cpt(row[0]): cpt2sflag[cpt] = {"flag": flag, "desc": desc[flag]} fn = rscfn(__name__, "data/cpt2sflag.json") with open(fn, "w") as fp: json.dump(cpt2sflag, fp=fp, indent=2, sort_keys=True) return 0
def load(self, fn=""): if fn == "": fn = "data/CY2019Q2_CPTHCPCS_CMS_20190425.csv" fn = rscfn(__name__, fn) with open(fn, "r", encoding="ISO-8859-1") as fp: reader = csv.reader(fp) for row in reader: if len(row) == 3 and row[2] == "yes": self.cpts[row[0]] = 1
def read_coefn(fn): coef = {} fn = rscfn(__name__, fn) with open(fn, "r") as fp: reader = csv.reader(fp, delimiter=",") header = [k.strip() for k in next(reader)] values = [float(x) for x in next(reader)] coef = {k: v for k, v in zip(header, values)} return coef
def license_cpt(): fn = rscfn(__name__, "data/license.txt") aggreement = "reject" with open(fn, "r") as fp: for line in fp.readlines(): if line == "\n": continue print(line) agreement = input("(accept/reject):") return agreement == "accept"
def read_cci(fn): dx2cci = {} fn = rscfn(__name__, fn) descmap = { "1": "Infectious and parasitic disease", "2": "Neoplasms", "3": ("Endocrine, nutritional, and metabolic diseases " + "and immunity disorders"), "4": "Diseases of blood and blood-forming organs", "5": "Mental disorders", "6": "Diseases of the nervous system and sense organs", "7": "Diseases of the circulatory system", "8": "Diseases of the respiratory system", "9": "Diseases of the digestive system", "10": "Diseases of the genitourinary system", "11": "Complications of pregnancy, childbirth, and the puerperium", "12": "Diseases of the skin and subcutaneous tissue", "13": "Diseases of the musculoskeletal system", "14": "Congenital anomalies", "15": "Certain conditions originating in the perinatal period", "16": "Symptoms, signs, and ill-defined conditions", "17": "Injury and poisoning", "18": ("Factors influencing health status " + "and contact with health services"), "None": "N/A" } with open(fn, "r") as fp: reader = csv.reader(fp, delimiter=",") header = next(reader) for row in reader: row = _clnrw(row) dx2cci[row[0]] = { "is_chronic": row[2] == "1", "body_system": row[3], "body_system_desc": descmap[row[3]] } return dx2cci
def read_cpt_sect(fn): cpt2sect = {} fn = rscfn(__name__, fn) with open(fn, "r") as fp: reader = csv.reader(fp, delimiter=",") for row in reader: sect = row[2] desc = row[1] for cpt in _expand_cpt(row[0]): cpt2sect[cpt] = {"sect": sect, "desc": desc} return cpt2sect
def read_utilflag(fn): utilmap = {} fn = rscfn(__name__, fn) with open(fn, "r") as fp: reader = csv.reader(fp, delimiter=",") header = next(reader) for row in reader: row = _clnrw(row) key = (row[3], row[4], row[5]) utilmap[key] = row[2] return utilmap
def read_prcls(fn): pr2cls = {} fn = rscfn(__name__, fn) with open(fn, "r") as fp: reader = csv.reader(fp, delimiter=",") meta = next(reader) header = next(reader) for row in reader: row = _clnrw(row) pr2cls[row[0]] = {"class": row[2], "desc": row[3]} return pr2cls
def read_hier(fn): hiers = {} pttr = r"%SET0\(CC=(\d+).+%STR\((.+)\)\)" fn = rscfn(__name__, fn) with open(fn, "r") as fp: for line in fp.readlines(): matches = re.findall(pttr, line) if len(matches) < 1 or len(matches[0]) < 2: continue k = "HCC"+str(matches[0][0]) v = ["HCC"+x.strip() for x in matches[0][1].split(",")] hiers[k] = v return hiers
def read_label(fn): labels = {} pttr = r"HCC(\d+)\s+=\"(.+)\"" fn = rscfn(__name__, fn) with open(fn, "r") as fp: for line in fp.readlines(): matches = re.findall(pttr, line) if len(matches) < 1 or len(matches[0]) < 2: continue k = str(matches[0][0]) v = matches[0][1].strip() labels[k] = v return labels
def read_package(digits=11): packages = {} products = read_product(digits) fn = rscfn(__name__, "data/package.txt") with open(fn, "r", encoding="ISO-8859-1") as fp: reader = csv.reader(fp, delimiter="\t") header = next(reader) for row in reader: ndc_package = row[2] ndc_product = row[1] if ndc_product in products: key = convert_ndc(ndc_package, digits) packages[key] = products[ndc_product] #packages[key]["ndc_org"] = ndc_package return packages
def read_ccs(fn): icd2ccs = {} fn = rscfn(__name__, fn) with open(fn, "r") as fp: reader = csv.reader(fp, delimiter=",") header = next(reader) for row in reader: row = _clnrw(row) icd2ccs[row[0]] = { "ccs": row[1], "ccs_desc": row[3], "ccs_lv1": row[4], "ccs_lv1_desc": row[5], "ccs_lv2": row[6], "ccs_lv2_desc": row[7] } return icd2ccs
def read_f(fn="data/appendix_F_J.txt"): uormap = {} is_uor_section = False fn = rscfn(__name__, fn) with open(fn, "r") as fp: for line in fp: if "DRG 989 NON-EXTENSIVE O.R. PROCEDURE" in line: is_uor_section = True elif is_uor_section and len(line) > 0 and line[0]==":": break if is_uor_section: if len(line) < 9: continue code = line[:9].strip() if code != "": uormap[code] = 1 return uormap
def read_dx2cc(fn): dx2cc = {} fn = rscfn(__name__, fn) with open(fn, "r") as fp: reader = csv.reader(fp, delimiter="\t") for x in reader: # NOTE: one-to-one mapping was assumed originally # NOTE: dx to hcc mapping changes to one-to-many from V24 # This change will accomodate the mapping #dx2cc = {x[0].strip(): "HCC"+x[1].strip() for x in reader} dx = x[0].strip() hcc = "HCC"+x[1].strip() if dx not in dx2cc: dx2cc[dx] = [] dx2cc[dx].append(hcc) return dx2cc
def read_e(fn="data/appendix_D_E.txt"): orpcsmap = {} is_orpcs_section = False fn = rscfn(__name__, fn) with open(fn, "r") as fp: for line in fp: if line.strip() == "CODE MDC MS-DRG SURGICAL CATEGORY": is_orpcs_section = True elif line.strip() == "Procedure Cluster/MS-DRG Index": # end of the orpcs section break if is_orpcs_section: if len(line) < 9: continue code = line[:9].strip() is_nonorpcs = (line[9] == "*") if not is_nonorpcs and code != "": orpcsmap[code] = 1 return orpcsmap
def read_c(fn="data/appendix_C.txt"): ccmap = {} exmap = {} is_cc_section = False is_pdx_section = False is_part2 = False pdx_code = "" fn = rscfn(__name__, fn) with open(fn, "r") as fp: for line in fp: if line.strip() == "": continue if "I10 Dx Lev PDX Exclusions" in line: is_cc_section = True continue elif "PDX collection " in line: is_pdx_section = True is_cc_section = False pdx = line.split("collection")[1].strip() continue elif "Appendix C Part 2:" in line: is_part2 = True is_pdx_section = False continue if is_cc_section: dx = line[:9].strip() cc = line[9:12].strip() pdx = line[12:29].strip().split(":")[0] ccmap[dx] = {"pdx": pdx, "level": cc, "aowa": False} # aowa: apply only when alive elif is_pdx_section: dx = line.split()[0] if pdx not in exmap: exmap[pdx] = [] exmap[pdx].append(dx) elif is_part2: dx = line[:8].strip() ccmap[dx]["aowa"] = True return ccmap, exmap
def read_a(fn="data/appendix_A.txt"): drgmap = {} is_drg_section = False fn = rscfn(__name__, fn) with open(fn, "r") as fp: for line in fp: drg = line[:4].strip() if drg == "DRG": is_drg_section = True continue if not is_drg_section: continue mdc = line[4:8].strip() medsurg = line[8:11].strip() desc = line[11:].strip() drgmap[drg] = {"drg": drg, "mdc": mdc, "is_medical": medsurg=="M", "is_surgical": medsurg=="P", "desc": desc} return drgmap
def read_d(fn="data/appendix_D_E.txt"): rankmap = {} rank = 0 is_rank_section = False fn = rscfn(__name__, fn) with open(fn, "r") as fp: for line in fp: if line[:3] == "MDC": is_rank_section = True continue elif is_rank_section and line.strip() == "": # End of the rank section break if is_rank_section: tokens = line[:9].strip().split("-") if len(tokens) == 1: tokens.append(tokens[0]) for drg in range(int(tokens[0]), int(tokens[1])+1): rankmap[str(drg)] = rank rank += 1 return rankmap
def read_label_short(fn): fn = rscfn(__name__, fn) label_short = {} with open(fn, "r") as fp: label_short = json.load(fp) return label_short
def read_cpt2ccs(fn): fn = rscfn(__name__, fn) with open(fn, "r") as fp: return json.load(fp)
def read_surgeryflag(fn): fn = rscfn(__name__, fn) with open(fn, "r") as fp: return json.load(fp)
def write_cache(cache, fn): fn = rscfn(__name__, fn) with open(fn, "w") as fp: json.dump(cache, fp=fp, indent=2)
def read(fn, dxmap, prmap): """ A :?=======... A MDC XX... A :?=======... A MDC contents A (blank) B +-------... B | Abstract Table about DRG... B +-------... B (blank) C DRG XXX... C (blank) D Details... D (blank) E Codes... *pattern: A -> [B -> [C -> [D -> (E)]]] """ _cursor = "F" cursor = "F" cache = {"A": "", "C": "", "D": "", "E": [], "_": defaultdict(dict), "L": {}} fn = rscfn(__name__, fn) with open(fn, "r") as fp: for line in fp: line = line.replace("\n","") if line.strip()=="": pass elif is_A(line, cursor): cursor = "A" elif is_B(line, cursor): cursor = "B" elif is_C(line, cursor): cursor = "C" elif is_D(line, cursor): cursor = "D" elif is_E(line, cursor): cursor = "E" if cursor == "A": parse_A(line, cursor, dxmap, cache) elif cursor == "B": pass elif cursor == "C": parse_C(line, cursor, cache, _cursor) elif cursor == "D": parse_D(line, cursor, cache, _cursor) elif cursor == "E": parse_E(line, cursor, dxmap, prmap, cache, _cursor) _cursor = cursor return dxmap, prmap