Beispiel #1
0
def raw_drugs_begin():
    #csvpath = os.path.join(userhome, 'Documents', 'Programming', 'Python', 'drugPrices', 'NADAC', 'NADAC 20121004.csv')
    csvpath = "test NADAC.csv"
    headers = ["Name", "NDC", "Price", "Effective date", "Pricing Unit", "Pharmacy Type", "OTC or Not",
               "Explanation Code", "Brand or Generic"]
    count = 1
    drug_dict = {}
    with open(csvpath, 'r') as csvfile:
        drug_data = csv.reader(csvfile)
        for row in drug_data:
            if count < 5:
                pass
            else:
                drug = {}
                for i in range(len(headers)):
                    drug[headers[i]] = row[i]
                price = drug["Price"]
                this_drug = Drug(drug["Name"], drug["NDC"], drug["Pricing Unit"], drug["OTC or Not"],
                                 drug["Brand or Generic"], "NADAC")
                Drug.add_price(this_drug, "20141015", price)
                drug_dict[drug["NDC"]] = this_drug
            count += 1
        csvfile.close()
    drug_dict = add_FDA_info(drug_dict)
    return drug_dict
Beispiel #2
0
def add_FDA_info(drug_dict):

    # this function supplies drug and vendor names via NDCs from the FDA's official database.
    # sadly the FDA and NADAC have a slightly different format for NDCs, so it's not perfect.
    # It is very unlikely to assign an incorrect NDC -- rather, it will pass on drugs it cannot find a match for.
    name_file = os.path.join(userhome, 'Documents', 'Programming', 'Python', 'drugPrices', 'FDA-20160118.csv')
    headers = ["PRODUCTID", "PRODUCTNDC", "PRODUCTTYPENAME", "PROPRIETARYNAME", "PROPRIETARYNAMESUFFIX",
               "NONPROPRIETARYNAME", "DOSAGEFORMNAME", "ROUTENAME", "STARTMARKETINGDATE", "ENDMARKETINGDATE",
               "MARKETINGCATEGORYNAME", "APPLICATIONNUMBER", "LABELERNAME", "SUBSTANCENAME",
               "ACTIVE_NUMERATOR_STRENGTH", "ACTIVE_INGRED_UNIT", "PHARM_CLASSES", "DEASCHEDULE"]
    count = 1
    current_list = [key for key in drug_dict]
    with open(name_file, "r") as namecsv:
        csvreader = csv.reader(namecsv)
        for line in csvreader:
            if count == 1:
                pass
            else:
                drug = {}
                for i in range(len(headers)):
                    if headers [i] == "PRODUCTNDC":
                        drug["NDC"] = line[i].translate(None,"-")
                        templist = line[i].split("-")
                        templist[0] = templist[0] + "0"
                        drug["NDC2"] = "".join(templist)
                    else:
                        drug[headers[i]] = line[i]
                findings = [item for item in current_list if ( drug["NDC"] in item ) or ( drug["NDC2"] in item )]
                if len(findings) == 1:
                    this_drug = drug_dict[findings[0]]
                    Drug.add_vendor(this_drug, drug["LABELERNAME"])
                    Drug.add_sci_name(this_drug, drug["NONPROPRIETARYNAME"])
                    Drug.add_desc(this_drug, drug["PHARM_CLASSES"])
                    current_list.remove(findings[0])
                    print "%i drugs left to ID..." % len(current_list)
                elif len(findings) == 0:
                    pass
                else:
                    findings_as_drugs = [drug_dict[key] for key in findings] #converts list of keys into list of Drugs
                    findings = multiple_results(drug, findings_as_drugs) #list of Drugs
                    for finding in findings: #finding is a Drug
                        Drug.add_vendor(finding, drug["LABELERNAME"])
                        Drug.add_sci_name(finding, drug["NONPROPRIETARYNAME"])
                        Drug.add_desc(finding, drug["PHARM_CLASSES"])
                        current_list.remove(finding.id)
                        print "%i drugs left to ID..." % len(current_list)

                            # BECAUSE THE FDA NDCS ARE ONLY EIGHT DIGITS LONG, THIS SEARCH OFTEN RETURNS 2+ RESULTS.
                            # TODO: write a function to check other info to figure out which is the correct one.
                            # or, worst-case scenario, prompt user to choose which looks right, maybe?
            count += 1

    return drug_dict