def runImport(doc_handler=None): """ Run the actual linking. You can provide a :class:`DocHandler` subclass which will handle the JSON documents, for example store them to MongoDB for the MongoDocHandler. These classes are defined in `rxnorm_link_run.py` for now. """ # install keyboard interrupt handler def signal_handler(signal, frame): print("\nx> Aborted") sys.exit(0) signal.signal(signal.SIGINT, signal_handler) # prepare RxNorm databases try: RxNorm.check_database() rxhandle = RxNormLookup() rxhandle.prepare_to_cache_classes() except Exception as e: logging.error(e) sys.exit(1) # fetch rxcui's for drug-type concepts (i.e. restrict by TTY) drug_types = ('SCD', 'SCDC', 'SBDG', 'SBD', 'SBDC', 'BN', 'SBDF', 'SCDG', 'SCDF', 'IN', 'MIN', 'PIN', 'BPCK', 'GPCK') param = ', '.join(['?' for d in drug_types]) all_sql = "SELECT RXCUI, TTY from RXNCONSO where SAB='RXNORM' and TTY in ({})".format( param) all_drugs = rxhandle.fetchAll(all_sql, drug_types) num_drugs = len(all_drugs) # traverse VA classes; starts the VA drug class caching process if needed, # which runs a minute or two if rxhandle.can_cache(): initVA(rxhandle) traverseVA(rxhandle, rounds=5, expect=num_drugs) # loop all concepts i = 0 w_ti = 0 w_va = 0 w_either = 0 last_report = datetime.now() print('-> Indexing {} items'.format(num_drugs)) for res in all_drugs: params = [res[0]] params.extend(drug_types) label = rxhandle.lookup_rxcui_name(res[0]) # fast (indexed column) ndc = rxhandle.ndc_for_rxcui(res[0]) # fast (indexed column) ndc = RxNorm.ndc_normalize_list(ndc) # fast (string permutation) # find ingredients, drug classes and more ingr = toIngredients(rxhandle, [res[0]], res[1]) # rather slow ti = toTreatmentIntents(rxhandle, ingr, 'IN') # requires "ingr" va = toDrugClasses(rxhandle, res[0]) # fast, loads from our cached table gen = toBrandAndGeneric(rxhandle, [res[0]], res[1]) # fast comp = toComponents(rxhandle, [res[0]], res[1]) # fast mech = toMechanism(rxhandle, ingr, 'IN') # fast # create JSON-ready dictionary (save space by not adding empty properties) d = { 'rxcui': res[0], 'tty': res[1], 'label': label, } if len(ndc) > 0: d['ndc'] = list(ndc) if len(ingr) > 0: d['ingredients'] = list(ingr) if len(ti) > 0: d['treatmentIntents'] = list(ti) if len(va) > 0: d['drugClasses'] = list(va) if len(gen) > 0: d['generics'] = list(gen) if len(comp) > 0: d['components'] = list(comp) if len(mech) > 0: d['mechanisms'] = list(mech) # count i += 1 if len(ti) > 0: w_ti += 1 if len(va) > 0: w_va += 1 if len(ti) > 0 or len(va) > 0: w_either += 1 # The dictionary "d" at this point contains all the drug's precomputed # properties, to debug print this: #print(json.dumps(d, sort_keys=True, indent=2)) if doc_handler: doc_handler.addDocument(d) # log progress every 2 seconds or so if (datetime.now() - last_report).seconds > 2: last_report = datetime.now() print('--> {:.1%} n: {}, ti: {}, va: {}, either: {}'.format( i / num_drugs, i, w_ti, w_va, w_either), end="\r") # loop done, finalize if doc_handler: doc_handler.finalize() print('--> {:.1%} n: {}, ti: {}, va: {}, either: {}'.format( i / num_drugs, i, w_ti, w_va, w_either)) print('-> Done')
def runImport(doc_handler=None): """ Run the actual linking. You can provide a :class:`DocHandler` subclass which will handle the JSON documents, for example store them to MongoDB for the MongoDocHandler. These classes are defined in `rxnorm_link_run.py` for now. """ # install keyboard interrupt handler def signal_handler(signal, frame): print("\nx> Aborted") sys.exit(0) signal.signal(signal.SIGINT, signal_handler) # prepare RxNorm databases try: RxNorm.check_database() rxhandle = RxNormLookup() rxhandle.prepare_to_cache_classes() except Exception as e: logging.error(e) sys.exit(1) # fetch rxcui's for drug-type concepts (i.e. restrict by TTY) drug_types = ('SCD', 'SCDC', 'SBDG', 'SBD', 'SBDC', 'BN', 'SBDF', 'SCDG', 'SCDF', 'IN', 'MIN', 'PIN', 'BPCK', 'GPCK') param = ', '.join(['?' for d in drug_types]) all_sql = "SELECT RXCUI, TTY from RXNCONSO where SAB='RXNORM' and TTY in ({})".format(param) all_drugs = rxhandle.fetchAll(all_sql, drug_types) num_drugs = len(all_drugs) # traverse VA classes; starts the VA drug class caching process if needed, # which runs a minute or two if rxhandle.can_cache(): initVA(rxhandle) traverseVA(rxhandle, rounds=5, expect=num_drugs) # loop all concepts i = 0 w_ti = 0 w_va = 0 w_either = 0 last_report = datetime.now() print('-> Indexing {} items'.format(num_drugs)) for res in all_drugs: params = [res[0]] params.extend(drug_types) label = rxhandle.lookup_rxcui_name(res[0]) # fast (indexed column) ndc = rxhandle.ndc_for_rxcui(res[0]) # fast (indexed column) ndc = RxNorm.ndc_normalize_list(ndc) # fast (string permutation) # find ingredients, drug classes and more ingr = toIngredients(rxhandle, [res[0]], res[1]) # rather slow ti = toTreatmentIntents(rxhandle, ingr, 'IN') # requires "ingr" va = toDrugClasses(rxhandle, res[0]) # fast, loads from our cached table gen = toBrandAndGeneric(rxhandle, [res[0]], res[1]) # fast comp = toComponents(rxhandle, [res[0]], res[1]) # fast mech = toMechanism(rxhandle, ingr, 'IN') # fast # create JSON-ready dictionary (save space by not adding empty properties) d = { 'rxcui': res[0], 'tty': res[1], 'label': label, } if len(ndc) > 0: d['ndc'] = list(ndc) if len(ingr) > 0: d['ingredients'] = list(ingr) if len(ti) > 0: d['treatmentIntents'] = list(ti) if len(va) > 0: d['drugClasses'] = list(va) if len(gen) > 0: d['generics'] = list(gen) if len(comp) > 0: d['components'] = list(comp) if len(mech) > 0: d['mechanisms'] = list(mech) # count i += 1 if len(ti) > 0: w_ti += 1 if len(va) > 0: w_va += 1 if len(ti) > 0 or len(va) > 0: w_either += 1 # The dictionary "d" at this point contains all the drug's precomputed # properties, to debug print this: #print(json.dumps(d, sort_keys=True, indent=2)) if doc_handler: doc_handler.addDocument(d) # log progress every 5 seconds or so if (datetime.now() - last_report).seconds > 5: last_report = datetime.now() print('-> {:.1%} n: {}, ti: {}, va: {}, either: {}'.format(i / num_drugs, i, w_ti, w_va, w_either), end="\r") # loop done, finalize if doc_handler: doc_handler.finalize() print('-> {:.1%} n: {}, ti: {}, va: {}, either: {}'.format(i / num_drugs, i, w_ti, w_va, w_either)) print('=> Done')
def test_ndc_normalization(self): """ Test NDC normalization. """ # 6-4-2 self.assertEqual('00074148614', RxNorm.ndc_normalize('000074-1486-14')) self.assertEqual('51227615900', RxNorm.ndc_normalize('051227-6159-**')) self.assertEqual('58734000101', RxNorm.ndc_normalize('058734-0001-*1')) # 6-4-1 self.assertEqual('00854684102', RxNorm.ndc_normalize('000854-6841-2')) # 6-4: treat as 6-4-2 with two trailing zeroes self.assertEqual('57982011000', RxNorm.ndc_normalize('057982-0110')) self.assertEqual('12579005600', RxNorm.ndc_normalize('012579-*056')) # 6-3-2 self.assertEqual('57982012312', RxNorm.ndc_normalize('057982-123-12')) # 6-3-1 self.assertEqual('57982098709', RxNorm.ndc_normalize('057982-987-9')) # 5-4-2 self.assertEqual('17317093201', RxNorm.ndc_normalize('17317-0932-01')) # 5-4-1 self.assertEqual('36987315601', RxNorm.ndc_normalize('36987-3156-1')) # 5-3-2 self.assertEqual('24730041205', RxNorm.ndc_normalize('24730-412-05')) # 4-4-2 self.assertEqual('00268010310', RxNorm.ndc_normalize('0268-0103-10')) # 12 digit VANDF self.assertEqual('03475476541', RxNorm.ndc_normalize('003475476541')) # normalized already self.assertEqual('04458632698', RxNorm.ndc_normalize('04458632698')) # invalid self.assertIsNone(RxNorm.ndc_normalize('0054478962')) self.assertIsNone(RxNorm.ndc_normalize('547668531244')) self.assertIsNone(RxNorm.ndc_normalize('0054478962796')) self.assertIsNone(RxNorm.ndc_normalize('0a79b2-c87-9')) self.assertIsNone(RxNorm.ndc_normalize('si-lly-te-st')) self.assertIsNone(RxNorm.ndc_normalize('just-a-rand-test-string'))