Beispiel #1
def runImport(doc_handler=None):
    """ Run the actual linking.
	You can provide a :class:`DocHandler` subclass which will handle the JSON
	documents, for example store them to MongoDB for the MongoDocHandler. These
	classes are defined in `` for now.

    # install keyboard interrupt handler
    def signal_handler(signal, frame):
        print("\nx>  Aborted")

    signal.signal(signal.SIGINT, signal_handler)

    # prepare RxNorm databases
        rxhandle = RxNormLookup()
    except Exception as e:

    # fetch rxcui's for drug-type concepts (i.e. restrict by TTY)
    drug_types = ('SCD', 'SCDC', 'SBDG', 'SBD', 'SBDC', 'BN', 'SBDF', 'SCDG',
                  'SCDF', 'IN', 'MIN', 'PIN', 'BPCK', 'GPCK')
    param = ', '.join(['?' for d in drug_types])
    all_sql = "SELECT RXCUI, TTY from RXNCONSO where SAB='RXNORM' and TTY in ({})".format(

    all_drugs = rxhandle.fetchAll(all_sql, drug_types)
    num_drugs = len(all_drugs)

    # traverse VA classes; starts the VA drug class caching process if needed,
    # which runs a minute or two
    if rxhandle.can_cache():
        traverseVA(rxhandle, rounds=5, expect=num_drugs)

    # loop all concepts
    i = 0
    w_ti = 0
    w_va = 0
    w_either = 0
    last_report =
    print('->  Indexing {} items'.format(num_drugs))

    for res in all_drugs:
        params = [res[0]]
        label = rxhandle.lookup_rxcui_name(res[0])  # fast (indexed column)
        ndc = rxhandle.ndc_for_rxcui(res[0])  # fast (indexed column)
        ndc = RxNorm.ndc_normalize_list(ndc)  # fast (string permutation)

        # find ingredients, drug classes and more
        ingr = toIngredients(rxhandle, [res[0]], res[1])  # rather slow
        ti = toTreatmentIntents(rxhandle, ingr, 'IN')  # requires "ingr"
        va = toDrugClasses(rxhandle,
                           res[0])  # fast, loads from our cached table
        gen = toBrandAndGeneric(rxhandle, [res[0]], res[1])  # fast
        comp = toComponents(rxhandle, [res[0]], res[1])  # fast
        mech = toMechanism(rxhandle, ingr, 'IN')  # fast

        # create JSON-ready dictionary (save space by not adding empty properties)
        d = {
            'rxcui': res[0],
            'tty': res[1],
            'label': label,
        if len(ndc) > 0:
            d['ndc'] = list(ndc)

        if len(ingr) > 0:
            d['ingredients'] = list(ingr)
        if len(ti) > 0:
            d['treatmentIntents'] = list(ti)
        if len(va) > 0:
            d['drugClasses'] = list(va)
        if len(gen) > 0:
            d['generics'] = list(gen)
        if len(comp) > 0:
            d['components'] = list(comp)
        if len(mech) > 0:
            d['mechanisms'] = list(mech)

        # count
        i += 1
        if len(ti) > 0:
            w_ti += 1
        if len(va) > 0:
            w_va += 1
        if len(ti) > 0 or len(va) > 0:
            w_either += 1

        # The dictionary "d" at this point contains all the drug's precomputed
        # properties, to debug print this:
        #print(json.dumps(d, sort_keys=True, indent=2))
        if doc_handler:

        # log progress every 2 seconds or so
        if ( - last_report).seconds > 2:
            last_report =
            print('-->  {:.1%}   n: {}, ti: {}, va: {}, either: {}'.format(
                i / num_drugs, i, w_ti, w_va, w_either),

    # loop done, finalize
    if doc_handler:

    print('-->  {:.1%}   n: {}, ti: {}, va: {}, either: {}'.format(
        i / num_drugs, i, w_ti, w_va, w_either))
    print('->  Done')
Beispiel #2
def runImport(doc_handler=None):
	""" Run the actual linking.
	You can provide a :class:`DocHandler` subclass which will handle the JSON
	documents, for example store them to MongoDB for the MongoDocHandler. These
	classes are defined in `` for now.
	# install keyboard interrupt handler
	def signal_handler(signal, frame):
		print("\nx>  Aborted")
	signal.signal(signal.SIGINT, signal_handler)
	# prepare RxNorm databases
		rxhandle = RxNormLookup()
	except Exception as e:
	# fetch rxcui's for drug-type concepts (i.e. restrict by TTY)
	drug_types = ('SCD', 'SCDC', 'SBDG', 'SBD', 'SBDC', 'BN', 'SBDF', 'SCDG', 'SCDF', 'IN', 'MIN', 'PIN', 'BPCK', 'GPCK')
	param = ', '.join(['?' for d in drug_types])
	all_sql = "SELECT RXCUI, TTY from RXNCONSO where SAB='RXNORM' and TTY in ({})".format(param)
	all_drugs = rxhandle.fetchAll(all_sql, drug_types)
	num_drugs = len(all_drugs)
	# traverse VA classes; starts the VA drug class caching process if needed,
	# which runs a minute or two
	if rxhandle.can_cache():
		traverseVA(rxhandle, rounds=5, expect=num_drugs)
	# loop all concepts
	i = 0
	w_ti = 0
	w_va = 0
	w_either = 0
	last_report =
	print('->  Indexing {} items'.format(num_drugs))
	for res in all_drugs:
		params = [res[0]]
		label = rxhandle.lookup_rxcui_name(res[0])				# fast (indexed column)
		ndc = rxhandle.ndc_for_rxcui(res[0])					# fast (indexed column)
		ndc = RxNorm.ndc_normalize_list(ndc)			        # fast (string permutation)
		# find ingredients, drug classes and more
		ingr = toIngredients(rxhandle, [res[0]], res[1])		# rather slow
		ti = toTreatmentIntents(rxhandle, ingr, 'IN')			# requires "ingr"
		va = toDrugClasses(rxhandle, res[0])					# fast, loads from our cached table
		gen = toBrandAndGeneric(rxhandle, [res[0]], res[1])		# fast
		comp = toComponents(rxhandle, [res[0]], res[1])			# fast
		mech = toMechanism(rxhandle, ingr, 'IN')				# fast
		# create JSON-ready dictionary (save space by not adding empty properties)
		d = {
			'rxcui': res[0],
			'tty': res[1],
			'label': label,
		if len(ndc) > 0:
			d['ndc'] = list(ndc)
		if len(ingr) > 0:
			d['ingredients'] = list(ingr)
		if len(ti) > 0:
			d['treatmentIntents'] = list(ti)
		if len(va) > 0:
			d['drugClasses'] = list(va)
		if len(gen) > 0:
			d['generics'] = list(gen)
		if len(comp) > 0:
			d['components'] = list(comp)
		if len(mech) > 0:
			d['mechanisms'] = list(mech)
		# count
		i += 1
		if len(ti) > 0:
			w_ti += 1
		if len(va) > 0:
			w_va += 1
		if len(ti) > 0 or len(va) > 0:
			w_either += 1
		# The dictionary "d" at this point contains all the drug's precomputed
		# properties, to debug print this:
		#print(json.dumps(d, sort_keys=True, indent=2))
		if doc_handler:
		# log progress every 5 seconds or so
		if ( - last_report).seconds > 5:
			last_report =
			print('->  {:.1%}   n: {}, ti: {}, va: {}, either: {}'.format(i / num_drugs, i, w_ti, w_va, w_either), end="\r")
	# loop done, finalize
	if doc_handler:
	print('->  {:.1%}   n: {}, ti: {}, va: {}, either: {}'.format(i / num_drugs, i, w_ti, w_va, w_either))
	print('=>  Done')
	def test_ndc_normalization(self):
		""" Test NDC normalization.
		# 6-4-2
		self.assertEqual('00074148614', RxNorm.ndc_normalize('000074-1486-14'))
		self.assertEqual('51227615900', RxNorm.ndc_normalize('051227-6159-**'))
		self.assertEqual('58734000101', RxNorm.ndc_normalize('058734-0001-*1'))
		# 6-4-1
		self.assertEqual('00854684102', RxNorm.ndc_normalize('000854-6841-2'))
		# 6-4: treat as 6-4-2 with two trailing zeroes
		self.assertEqual('57982011000', RxNorm.ndc_normalize('057982-0110'))
		self.assertEqual('12579005600', RxNorm.ndc_normalize('012579-*056'))
		# 6-3-2
		self.assertEqual('57982012312', RxNorm.ndc_normalize('057982-123-12'))
		# 6-3-1
		self.assertEqual('57982098709', RxNorm.ndc_normalize('057982-987-9'))
		# 5-4-2
		self.assertEqual('17317093201', RxNorm.ndc_normalize('17317-0932-01'))
		# 5-4-1
		self.assertEqual('36987315601', RxNorm.ndc_normalize('36987-3156-1'))
		# 5-3-2
		self.assertEqual('24730041205', RxNorm.ndc_normalize('24730-412-05'))
		# 4-4-2
		self.assertEqual('00268010310', RxNorm.ndc_normalize('0268-0103-10'))
		# 12 digit VANDF
		self.assertEqual('03475476541', RxNorm.ndc_normalize('003475476541'))
		# normalized already
		self.assertEqual('04458632698', RxNorm.ndc_normalize('04458632698'))
		# invalid