Exemple #1
0
    def run(self):
        ''' 
        Make a key-value map of certain attributes in the Open FDA dataset
        '''
        print('Acquiring Records')
        for record in self.source.acquire_labels():
            if 'openfda' in record and 'product_ndc' in record['openfda']:
                for entry in record['openfda']['product_ndc']:
                    ndc = ProductNdc.parse(entry)
                    id = ndc.format()
                    for op in self.features:
                        op['feature'].accumulate(id, record)

        print('Writing Features')
        for op in self.features:
            feature = op['feature']
            baseName = '-'.join(feature.fields)
            fileName = io.relativeToAbsolute('../../data/' + baseName + '.txt')

            with open(fileName, 'w', encoding='utf-8') as f:
                print('product_ndc', op['column'], sep='\t', file=f)
                for pair in sorted(feature.data, key=itemgetter(0, 1)):
                    value = pair[1]
                    for fn in op['transform']:
                        value = fn(value)
                    print(pair[0], value, sep='\t', file=f)
    def run(self):
        ''' 
        Make a key-value map of certain attributes in the Open FDA dataset
        '''
        print('Acquiring Records')
        for record in self.source.acquire_labels():
            if 'openfda' in record and 'product_ndc' in record['openfda']:
                for entry in record['openfda']['product_ndc']:
                    ndc = ProductNdc.parse(entry)
                    id = ndc.format()
                    for op in self.features:
                        op['feature'].accumulate(id, record)
        
        print('Writing Features')
        for op in self.features:
            feature = op['feature']
            baseName = '-'.join(feature.fields)
            fileName = io.relativeToAbsolute('../../data/'+baseName+'.txt')

            with open(fileName, 'w', encoding='utf-8') as f:
                print('product_ndc', op['column'], sep='\t', file=f)
                for pair in sorted(feature.data, key=itemgetter(0, 1)):
                    value = pair[1]
                    for fn in op['transform']:
                        value = fn(value)
                    print(pair[0],value,sep='\t',file=f)
 def acquire_fda_ndc(self):
     ''' Loads records from the FDA product list.
     Source: [http://www.fda.gov/Drugs/InformationOnDrugs/ucm142438.htm]
     ''' 
     fileName = io.relativeToAbsolute('../../data/FDA Product NDC 20150618.txt')
     with open(fileName) as f:
         for row in csv.DictReader(f, dialect=csv.excel_tab):
             yield self.map_fda(row)
 def acquire_nadac(self):
     ''' Loads the records from the Medicare cost spreadsheet.
     Source: [http://www.medicaid.gov/Medicaid-CHIP-Program-Information/By-Topics/Benefits/Prescription-Drugs/Pharmacy-Pricing.html]
     '''
     fileName = io.relativeToAbsolute('../../data/NADAC 20150617.txt')
     with open(fileName) as f:
         for row in csv.DictReader(f, dialect=csv.excel_tab):
             yield self.map_nadac(row)
Exemple #5
0
 def acquire_nadac(self):
     ''' Loads the records from the Medicare cost spreadsheet.
     Source: [http://www.medicaid.gov/Medicaid-CHIP-Program-Information/By-Topics/Benefits/Prescription-Drugs/Pharmacy-Pricing.html]
     '''
     fileName = io.relativeToAbsolute('../../data/NADAC 20150617.txt')
     with open(fileName) as f:
         for row in csv.DictReader(f, dialect=csv.excel_tab):
             yield self.map_nadac(row)
Exemple #6
0
 def acquire_fda_ndc(self):
     ''' Loads records from the FDA product list.
     Source: [http://www.fda.gov/Drugs/InformationOnDrugs/ucm142438.htm]
     '''
     fileName = io.relativeToAbsolute(
         '../../data/FDA Product NDC 20150618.txt')
     with open(fileName) as f:
         for row in csv.DictReader(f, dialect=csv.excel_tab):
             yield self.map_fda(row)
Exemple #7
0
    def run(self):
        ''' Use the size of a record in the FDA data set to determine which 
        package or product NDC is considered _the_ representitive for the same
        proprietary name
        '''
        print('Loading White List')
        whiteListFileName = io.relativeToAbsolute('../../data/product_ndc.txt')
        records = []
        with open(whiteListFileName) as f:
            for row in csv.DictReader(f, dialect=csv.excel_tab):
                # for some reason a weird 'None' column appears
                records.append({k:v for k,v in row.items() if k})

        partitions = {x['proprietary_name']: [] for x in records}
        products = {x['product_ndc'] for x in records if x['proprietary_name']}

        print('Mapping Labels')
        for node in self._mapLabels():
            nameKey = node['proprietary_name']
            prodKey = node['ndc']
            if nameKey in partitions and prodKey in products:
                partitions[nameKey].append(node)

        print('Reducing to Canon')
        outFileName = io.relativeToAbsolute('../../data/canon_drugs.txt')
        canon = {x for x in self._reduceToCanon(partitions)}

        print('Updating NDC Whitelist')
        for row in records:
            tuple = (row['proprietary_name'], row['product_ndc'])
            if tuple in canon:
                # consume because multiple package codes map to this key
                canon.remove(tuple) 
                row['is_canon'] = 'true'
            else:
                row['is_canon'] = 'false'

        print('Saving')
        tempName = io.relativeToAbsolute('../../data/product_ndc_canon.txt')
        io.saveAsTabbedText(records, '../../data/product_ndc_canon.txt')

        # no errors, rename
        os.remove(whiteListFileName)
        os.rename(tempName, whiteListFileName)
    def acquire_labels(self):
        """ Retrieve the full set of drug labeling data from the FDA.
        Since the FDA data set is limited to 5000 records, partition on the 
        'labeler' code of the product NDC
        """
        # get the list of labelers (minus some missing ones)
        fileName = io.relativeToAbsolute("../../data/product_ndc.txt")
        with open(fileName) as f:
            labelers = {
                x["labeler"]
                for x in csv.DictReader(f, dialect=csv.excel_tab)
                if x["labeler"] not in ["49158", "60687", "62107", "62542", "69235"]
            }

        proxy = OpenFdaProxy()
        cache = WebServiceCache(proxy)
        base = "https://api.fda.gov/drug/label.json?search=openfda.product_ndc:"
        for labeler in sorted(labelers):
            url = base + "%04d" % int(labeler)
            for record in cache.get(url):
                yield record
Exemple #9
0
    def acquire_labels(self):
        ''' Retrieve the full set of drug labeling data from the FDA.
        Since the FDA data set is limited to 5000 records, partition on the 
        'labeler' code of the product NDC
        '''
        # get the list of labelers (minus some missing ones)
        fileName = io.relativeToAbsolute('../../data/product_ndc.txt')
        with open(fileName) as f:
            labelers = {
                x['labeler']
                for x in csv.DictReader(f, dialect=csv.excel_tab)
                if x['labeler'] not in
                ['49158', '60687', '62107', '62542', '69235']
            }

        proxy = OpenFdaProxy()
        cache = WebServiceCache(proxy)
        base = 'https://api.fda.gov/drug/label.json?search=openfda.product_ndc:'
        for labeler in sorted(labelers):
            url = base + '%04d' % int(labeler)
            for record in cache.get(url):
                yield record
Exemple #10
0
 def __init__(self):
     self.sourceFileName = io.relativeToAbsolute(
         '../../data/openfda-manufacturer_name.txt')
     self.targetFileName = io.relativeToAbsolute(
         '../../data/cleaned-manufacturer_name.txt')
     self.groups = defaultdict(list)
Exemple #11
0
 def _createFileName(self, url):
     c = urlparse(url)
     dir = str(c.netloc).replace('.', '-')
     file = self._removeBadFileNameCharacters(c.query) + '.json'
     relative = os.path.join('../cache/', dir, file)
     return io.relativeToAbsolute(relative) 
 def __init__(self):
     self.sourceFileName = io.relativeToAbsolute('../../data/openfda-manufacturer_name.txt')
     self.targetFileName = io.relativeToAbsolute('../../data/cleaned-manufacturer_name.txt')
     self.groups = defaultdict(list)