def run(self, record): if(record.ruid not in self.masterDict): self.masterDict[record.ruid] = {} #TODO: the medications part of this regex needs to get until the end of listed medications, not just 1000 characters after prescriptionsRegex = r'Medications\sKnown\sto\sbe\sPrescribed(.*?)--|Medications:.{0,1000}' prescriptionsSearch = re.findall(prescriptionsRegex, record.content, re.IGNORECASE) match = "" for group in prescriptionsSearch: match = match + " " + group if(match != ""): for drugName in self.drugNames: drugNameSearch = re.search(drugName, match, re.IGNORECASE) if(drugNameSearch): #find the final records with the corresponding ruid for fr in self.finalRecords: if fr.ruid == record.ruid: #add drug and entry date to final record drug dict if drugName not in fr.drugs: dr = Drug() dr.name = drugName dr.startDate = record.entry_date dr.endDate = record.entry_date fr.drugs[drugName] = dr else: #get start date for that drug and see if this date is before it if record.entry_date < fr.drugs[drugName].startDate: fr.drugs[drugName].startDate = record.entry_date #get end date for that drug and see if this date is after it elif record.entry_date > fr.drugs[drugName].endDate: fr.drugs[drugName].endDate = record.entry_date
def run(self, record): if (record.ruid not in self.masterDict): self.masterDict[record.ruid] = {} #TODO: the medications part of this regex needs to get until the end of listed medications, not just 1000 characters after prescriptionsRegex = r'Medications\sKnown\sto\sbe\sPrescribed(.*?)--|Medications:.{0,1000}' prescriptionsSearch = re.findall(prescriptionsRegex, record.content, re.IGNORECASE) match = "" for group in prescriptionsSearch: match = match + " " + group if (match != ""): for drugName in self.drugNames: drugNameSearch = re.search(drugName, match, re.IGNORECASE) if (drugNameSearch): #find the final records with the corresponding ruid for fr in self.finalRecords: if fr.ruid == record.ruid: #add drug and entry date to final record drug dict if drugName not in fr.drugs: dr = Drug() dr.name = drugName dr.startDate = record.entry_date dr.endDate = record.entry_date fr.drugs[drugName] = dr else: #get start date for that drug and see if this date is before it if record.entry_date < fr.drugs[ drugName].startDate: fr.drugs[ drugName].startDate = record.entry_date #get end date for that drug and see if this date is after it elif record.entry_date > fr.drugs[ drugName].endDate: fr.drugs[ drugName].endDate = record.entry_date
def main(): first_drug = Drug('essentiale') second_drug = Drug('Karvalol', 'Use before eating', 30, 'transparent', 4, 10) third_drug = Drug('Anaferon', 'Use after eating', 15, 'some active subtance', 8, 20) drugs = [first_drug, second_drug, third_drug] for drug in drugs: print(drug)
def search_by_str(): search = raw_input("Enter your search term: ") drugs = start() drugs = return_match(drugs, search) if len(drugs) > 0: drugs = add_FDA_info(drugs) count = 1 for drug in drugs: print str(count) + "." Drug.printer(drugs[drug]) print count += 1 else: print "No results."
def worker(self): '''worker function to store a list of drug, and sort them''' fileInput = reader(open(sys.argv[1], "r"), delimiter=',') fileOutput = writer(open(sys.argv[2], "w+"), delimiter=',') rowItem = self.readInput(fileInput) # skip the header rowItem = self.readInput(fileInput) drugList = [] drugDict = {} drugPrescriberDict = {} while rowItem != None: entry_id, last, first, drug_name, drug_cost = rowItem drug_cost = int(float(drug_cost)) if drug_name not in drugDict: drug = Drug(drug_name, 1, drug_cost) drugList.append(drug) drugDict[drug_name] = len(drugList) - 1 drugPrescriberDict[(drug_name, last, first)] = len(drugList) - 1 else: if (drug_name, last, first) not in drugPrescriberDict: drugindex = drugDict[drug_name] drug = drugList[drugindex] drug.num_pres += 1 drug.total_cost += drug_cost drugPrescriberDict[(drug_name, last, first)] = drugindex else: drugindex = drugDict[drug_name] drug = drugList[drugindex] drug.total_cost += drug_cost rowItem = self.readInput(fileInput) drugList.sort(reverse=True) self.saveOutput(drugList, fileOutput)
def search_by_num(): print "Enter minimum and maximum current price and percent increase in two years." print "You can just hit enter if you don't want a minimum or maximum." raw_min_price = raw_input("MINIMUM CURRENT PRICE: ") raw_min_percent = raw_input("MINIMUM PERCENT INCREASE: ") raw_max_price = raw_input("MAXIMUM CURRENT PRICE: ") raw_max_percent = raw_input("MAXIMUM PERCENT INCREASE: ") print if raw_min_percent: min_percent = float(remove_stuff(raw_min_percent)) / 100 else: min_percent = -10000000 if raw_min_price: min_price = float(remove_stuff(raw_min_price)) else: min_price = 0 if raw_max_percent: max_percent = float(remove_stuff(raw_max_percent)) / 100 else: max_percent = 1000000 if raw_max_price: max_price = float(remove_stuff(raw_max_price)) else: max_price = 100000000 drugs = start() drugs = return_highest(drugs, min_percent, min_price, max_percent, max_price) if len(drugs) > 0: drugs = add_FDA_info(drugs) count = 1 print for drug in drugs: print str(count) + "." Drug.printer(drugs[drug]) print count += 1 ask_for_graph(drugs) else: print "No results."
def __init__(self, filepath): with io.open(filepath, 'r', encoding='utf-8') as f: self.raw = json.load(f) self.compounds = [ Compound(x['name'], x['xrefs']) for x in self.raw['compounds'] ] self.remedies = [ Remedy(x['name'], x['xrefs']) for x in self.raw['remedies'] ] self.enzymes = [ Enzyme(x['name'], x['xrefs']) for x in self.raw['enzymes'] ] self.transporter = [ Transporter(x['name'], x['xrefs']) for x in self.raw['transporter'] ] self.drugs = [Drug(x['name'], x['xrefs']) for x in self.raw['drugs']] publication = self.raw['publication'] doi = publication['doi'] if 'doi' in publication else None self.publication = Reference(publication['pmid'], doi, publication['citation'])
def GetDrugData(self): """ Gets info on seperate drugs, such as name and associated terms terms give info on what the drug does :return: """ print 'Getting drug data /(>_<)\\}' # drop and re-create table drugs self.remakeTable("drugs") # get all the important drug ids (dids) from # the known gene-drug connections table self.sql.execute('SELECT DISTINCT did FROM drugpairs') # fetch matching did and use to create uri for query for result in tqdm(self.sql.fetchall()): did = result[0] d = Drug(did) for item in d.terms: term = item['term'] # check for duplicates with chemical name if d.name not in term.lower(): item = (did, d.name, term) # insert into drugs table self.insertValues("drugs", item) self.conn.commit()
def main(): base_url = "https://www.drugs.com" alpha_url = "/alpha" alphabet = "abcdefghijklmnopqrstuvwxyz" #alphabet = "a" raw_all_drugs = "" for letter in alphabet: print(letter) url = base_url + alpha_url + "/" + letter + "1.html" # print(url) page = BeautifulSoup(requests.get(url).content, "lxml") letter_drugs = page.find_all("div", ["boxListPopular"]) raw_all_drugs += str(letter_drugs[0]) all_drugs = {} drug_parsed = BeautifulSoup(raw_all_drugs, "lxml") for drug in drug_parsed.find_all("a", href=True): all_drugs[drug.string] = Drug(drug.string, base_url + drug["href"]) with open('all_drugs.json', 'w') as f: for name, drug in all_drugs.items(): json.dump({name: drug.overview_questions}, f) f.write('\n')
def new_fda(drug_dict): # this function supplies drug and vendor names via NDCs from the FDA's official database. # sadly the FDA and NADAC have a slightly different format for NDCs, so it's not perfect. # It is very unlikely to assign an incorrect NDC -- rather, it will pass on drugs it cannot find a match for. name_file = "FDA-20160118.csv" headers = ["PRODUCTID", "PRODUCTNDC", "PRODUCTTYPENAME", "PROPRIETARYNAME", "PROPRIETARYNAMESUFFIX", "NONPROPRIETARYNAME", "DOSAGEFORMNAME", "ROUTENAME", "STARTMARKETINGDATE", "ENDMARKETINGDATE", "MARKETINGCATEGORYNAME", "APPLICATIONNUMBER", "LABELERNAME", "SUBSTANCENAME", "ACTIVE_NUMERATOR_STRENGTH", "ACTIVE_INGRED_UNIT", "PHARM_CLASSES", "DEASCHEDULE"] count = 1 results = {} current_list = [key for key in drug_dict] with open(name_file, "r") as namecsv: csvreader = csv.reader(namecsv) for line in csvreader: if count == 1: pass else: drug = {} for i in range(len(headers)): if headers [i] == "PRODUCTNDC": drug["NDC"] = line[i].translate(None,"-") templist = line[i].split("-") templist[0] = templist[0] + "0" drug["NDC2"] = "".join(templist) else: drug[headers[i]] = line[i] # print "Looking for NDC " + drug["NDC"] + "..." findings = [item for item in current_list if ( drug["NDC"] in item ) or ( drug["NDC2"] in item )] if len(findings) == 1: this_drug = drug_dict[findings[0]] Drug.add_vendor(this_drug, drug["LABELERNAME"]) Drug.add_sci_name(this_drug, drug["NONPROPRIETARYNAME"]) Drug.add_desc(this_drug, drug["PHARM_CLASSES"]) current_list.remove(findings[0]) print "%i drugs left to ID..." % len(current_list) elif len(findings) == 0: pass else: findings_as_drugs = [drug_dict[key] for key in findings] #converts list of keys into list of Drugs findings = multiple_results(drug, findings_as_drugs) #list of Drugs for finding in findings: #finding is a Drug Drug.add_vendor(finding, drug["LABELERNAME"]) Drug.add_sci_name(finding, drug["NONPROPRIETARYNAME"]) Drug.add_desc(finding, drug["PHARM_CLASSES"]) current_list.remove(finding.id) print "%i drugs left to ID..." % len(current_list) # BECAUSE THE FDA NDCS ARE ONLY EIGHT DIGITS LONG, THIS SEARCH OFTEN RETURNS 2+ RESULTS. # TODO: write a function to check other info to figure out which is the correct one. # or, worst-case scenario, prompt user to choose which looks right, maybe? count += 1 return drug_dict
def write(drugs): fieldnames = ["ID", "Name", "Scientific Name", "Unit", "OTC", "Brand/Generic", "Vendor", "Package", "Description"] dates = get_date_list() for i in range(len(dates)): fieldnames.append(dates[i]) with open("FullPrices.csv","a") as prices_csv: writer = csv.writer(prices_csv, delimiter='|') writer.writerow(fieldnames) for entry in drugs: drug = drugs[entry] drugrow = [drug.id, drug.name, drug.scientific_name, drug.unit, drug.otc, drug.b_or_g, drug.vendor, drug.package, drug.desc] for i in range(len(dates)): try: drugrow.append(drug.prices[dates[i]]) except KeyError: drugrow.append(None) writer.writerow(drugrow) prices_csv.close() print "Done" if __name__ == '__main__': drugs = create_list() count = 0 for drug in drugs: print "%i." % count Drug.printer(drugs[drug]) print count += 1 write(drugs)
def main(argv): i = int(argv[1]) u = Utils() db = Database('Mimir from Munnin') np.random.seed(u.RANDOM_STATE) df_patients = u.load_df('df_patients') sex_adr = db.get_list('select meddra_pt_id from gender_terms') drugs = db.get_list('select atc_5_id from atc_5_name') test = [(x, y) for x, y in u.load_np('prr_test')] df_test = pd.DataFrame(test, columns=['drug', 'adr']).sort_values(by='drug')[i:] PID_M = set(df_patients.query('Sex=="M"').get('PID').values) PID_F = set(df_patients.query('Sex=="F"').get('PID').values) for drugID, data in df_test.groupby('drug'): filename = 'Post_PRR/' + str(drugID) pth = u.DATA_PATH + filename + '.feather' if os.path.exists(pth): print(drugID) continue prr_counts = [] q = 'select PID from atc_5_patient where atc_5_id = ' + str(drugID) takes_drug = set(db.get_list(q)) try: drug = Drug(drugID) drug.match() except NameError: df = pd.DataFrame( columns=['drug', 'adr', 'sex', 'a_post', 'c_post']) u.save_df(df, filename) if drug.match_m is None or drug.match_f is None: df = pd.DataFrame( columns=['drug', 'adr', 'sex', 'a_post', 'c_post']) u.save_df(df, filename) db = Database('Mimir from Munnin') continue for adrID in data.adr.values: q = 'select PID from pt_patient where meddra_concept_id = ' + str( adrID) has_adr = set(db.get_list(q)) for sex in ['M', 'F']: if sex == 'M': PSM = drug.match_m else: PSM = drug.match_f a_post = len([1 for x in PSM if x in (has_adr & takes_drug)]) c_post = len([1 for x in PSM if x in (takes_drug - has_adr)]) res = { 'drug': drugID, 'adr': adrID, 'sex': sex, 'a_post': a_post, 'c_post': c_post } prr_counts.append(res) df = pd.DataFrame(prr_counts) u.save_df(df, filename) break
def main(argv): u = Utils() iterations = 25 idx = int(argv[1]) drugs = u.load_np('drugs') drugID = drugs[idx] status = u.read_status(drugID) if status == 'no': try: u.write_status(drugID, 'working') drug = Drug(drugID) for itr in range(1, iterations + 1): drug.match() drug.count_adr() drug.assign_abcd(itr) drug.do_chi_square() drug.calc_logROR() drug.reset_for_next_itr() x = drug.save_results(iterations) if x: u.write_status(drugID, 'yes') else: u.write_status(drugID, 'no') except: info = str(sys.exc_info()[1]) u.write_status(drugID, 'error ' + info)
def get_drugs(file_path): with open(file_path, 'r') as drugs_file: reader = csv.reader(drugs_file, delimiter=",") header = next(reader) return [(Drug(line[0], line[1])) for line in reader if len(line) == 2]
index = self.mapItemtoHeapIndex[item] #print index item.total_cost += amount self.sift_up(index) def __str__(self): ''' print the heap out''' res = '[' for item in self.hq: res += item.__str__() + ', ' res += ']' return res if __name__ == "__main__": drug1 = Drug("drug1", 2, 1000) drug2 = Drug("drug2", 1, 1500) drug3 = Drug("drug3", 4, 500) drug4 = Drug("drug4", 3, 1500) drug5 = Drug("drug5", 1, 1500) drug6 = Drug("drug06", 1, 1500) maxheap = MaxHeap() #print maxheap maxheap.heappush(drug1) #print maxheap maxheap.heappush(drug2) #print maxheap maxheap.heappush(drug3) print maxheap maxheap.heappush(drug4) maxheap.heappush(drug5)