def plotSlopeHists(data): summed = util.sumBy(data,'CCG') getPercents(summed) slopes = findSlopes(summed) pyplot.hist(slopes, bins = 50) pyplot.title('Actual distribution of rate of change \n of diclophenac prescription', fontsize = 18) pyplot.xlabel('rate of change of percent diclofenac prescribed', fontsize = 14) pyplot.ylabel('number of CCGs', fontsize = 14) pyplot.axis([-.06, .08, 0, 38]) pyplot.show() CCGs = data.to_dict(outtype='list')['CCG'] random.shuffle(CCGs) data['CCG'] = CCGs summed = util.sumBy(data,'CCG') getPercents(summed) slopes = findSlopes(summed) pyplot.hist(slopes, bins = 50) pyplot.title('Example distribution of rate of change \n of diclofenac prescription without \ngovernance or geographic effect', fontsize = 18) pyplot.xlabel('rate of change of percent diclofenac prescribed', fontsize = 14) pyplot.ylabel('number of CCGs', fontsize = 14) pyplot.axis([-.06, .08, 0, 38]) pyplot.show()
def plotFinalHists(data): data['days_prescribed_diclofenac'] = data['days_prescribed_diclofenac'].map(lambda x: 0 if x!=x else x) data['days_prescribed_naproxen'] = data['days_prescribed_naproxen'].map(lambda x: 0 if x!=x else x) summed = util.sumBy(data,'CCG') summed['perc'] = summed['days_prescribed_diclofenac']/(summed['days_prescribed_naproxen']+summed['days_prescribed_diclofenac']) pyplot.hist(summed['perc'], bins = 100) pyplot.title('Actual distribution of rate of diclophenac prescription', fontsize = 18) pyplot.xlabel('percent diclofenac prescribed', fontsize = 14) pyplot.ylabel('number of CCGs', fontsize = 14) pyplot.axis([0, 1, 0, 30]) pyplot.show() CCGs = data.to_dict(outtype='list')['CCG'] random.shuffle(CCGs) data['CCG'] = CCGs summed = util.sumBy(data,'CCG') summed['perc'] = summed['days_prescribed_diclofenac']/(summed['days_prescribed_naproxen']+summed['days_prescribed_diclofenac']) pyplot.hist(summed['perc'], bins = 100) pyplot.title('Example distribution of rate of diclofenac \nprescription without governance or geographic effect', fontsize = 18) pyplot.xlabel('percent diclofenac prescribed', fontsize = 14) pyplot.ylabel('number of CCGs', fontsize = 14) pyplot.axis([0, 1, 0, 30]) pyplot.show()
def run(self): bfile = self.Config.append_dir("WriteRatioDatasetBrand") gfile = self.Config.append_dir("WriteRatioDatasetGeneric") ofile = self.Config.append_dir("WriteRatioDatasetOut") for (brand, generic, outfile) in zip(bfile,gfile,ofile): brandfile = self.loadDF(brand) # if not brandfile: # continue genericfile = self.loadDF(generic) # if not genericfile: # continue groupedbrand = util.sumBy(brandfile,[self.Config.keys['practice'],'postal code']) groupedgeneric = util.sumBy(genericfile,[self.Config.keys['practice'],'postal code']) output = pandas.DataFrame.merge( groupedbrand, groupedgeneric, on=self.Config.keys['practice'], how = 'outer', suffixes =('brand','generic')) items = self.Config.keys['items'] quan = self.Config.keys['quantity'] nic = self.Config.keys['nic'] parts = [[items,quan,nic],['brand','generic']] drops = [] for a in parts[0]: for b in parts[1]: output[a+b] = output[a+b].map(lambda x: 0 if x!=x else x) drops.append(a+b) drops = drops+['INCLUDEbrand','GENERICbrand','INCLUDEgeneric', 'GENERICgeneric','postal codebrand','postal codegeneric'] output['postal code'] = output.apply( lambda row: row['postal codegeneric'] if row['postal codebrand']!=row['postal codebrand'] else row['postal codebrand'], axis=1) output['sumitems'] = output[items+'brand']+output[items+'generic'] output['sumquantity'] = output[quan+'brand']+output[quan+'generic'] output['sumnic'] = output[nic+'brand']+output[nic+'generic'] output['ratioitems']= output[items+'brand']/output['sumitems'] output['ratioquantity'] = output[quan+'brand']/output['sumquantity'] output['rationic'] = output[nic+'brand']/output['sumnic'] output = output.drop(drops,axis=1) output.to_csv(outfile, index = False)
def run(self): infiles = self.Config.append_dir("OutCodeRatiosIn") outfiles = self.Config.append_dir("OutCodeRatiosOut") outcodes = self.loadDF('postcodes.csv') if not outcodes: return for infile,outfile in zip(infiles,outfiles): data = self.loadDF(infile) if not data: continue labels = ['items','quantity','nic'] for a in labels: data['tot'+a] = data['sum'+a]*data['ratio'+a] print data print data['postal code'] data['outcode'] = data['postal code'].map(lambda x: x.partition(' ')[0]) data = util.sumBy(data,'outcode') for a in labels: data['ratio'+a] = data['tot'+a]/data['sum'+a] data.drop('tot'+a,axis=1) data = pandas.DataFrame.merge(data,outcodes,on='outcode',how='left') data.to_csv(outfile)
def calc_drug_over_time(Config): folder = 'SepGeneric' infiles = Config.append_dir(folder) quantity = {} nic = {} for month in infiles: df = loadDF(month) month = month[-11:-4] grouped = util.sumBy(df,Config.keys['bnf']) for index,row in grouped.iterrows(): item =row[Config.keys['bnf']] if item in quantity.keys(): quantity[item][month] = (row[Config.keys['quantity']])#,row[Config.keys['nic']]) nic[item][month] = (row[Config.keys['nic']])#,row[Config.keys['nic']]) else: quantity[item] = {} quantity[item][month] = (row[Config.keys['quantity']])#,row[Config.keys['nic']]) nic[item] = {} nic[item][month] = (row[Config.keys['nic']])#,row[Config.keys['nic']]) return quantity,nic
def calc_drug_over_time(self, grouping = 'practice'): folder = 'NSAIDSummed' infiles = self.Config.append_dir(folder) naproxen = {} diclofenac = {} for month in infiles: df = self.loadDF(month) if grouping == 'CCG': df = util.sumBy(df,self.Config.keys['ccg']) time.sleep(20) month = month[-11:-4] for index,row in df.iterrows(): if grouping == 'CCG': item =row[self.Config.keys['ccg']] else: item = row[self.Config.keys['practice']] if item in naproxen.keys(): naproxen[item][month] = row['days_prescribed_naproxen'] diclofenac[item][month] = row['days_prescribed_diclofenac'] else: naproxen[item] = {} naproxen[item][month] = (row['days_prescribed_naproxen']) diclofenac[item] = {} diclofenac[item][month] = (row['days_prescribed_diclofenac']) return naproxen,diclofenac
def findSlopeMeanVar(base): summed = util.sumBy(base,'CCG') getPercents(summed) slopes = findSlopes(summed) return ts.MeanVar(slopes)
def run(self): infiles = self.Config.append_dir("OutCodeDrugsIn") outfiles = self.Config.append_dir("OutCodeDrugsOut") outcodes = self.loadDF('postcodes.csv') if not outcodes: return for infile,outfile in zip(infiles,outfiles): data = self.loadDF(infile) if not data: continue data['outcode'] = data['postal code'].map(lambda x: x.partition(' ')[0]) data = util.sumBy(data,['outcode','ChemID']) items = self.Config.keys['items'] quan = self.Config.keys['quantity'] nic = self.Config.keys['nic'] cols = [items,quan,nic] for col in cols: data['sum'+col] = data[col+'_brand']+data[col+'_gen'] data['percent'+col]= data[col+'_brand']/data['sum'+col] data = data.drop([col+'_brand',col+'_gen'],axis=1) data = pandas.DataFrame.merge(data,outcodes,on='outcode',how='left') data.to_csv(outfile, index=False)
def plotRegression(self): lines = self.loadDF("Results/CostQuanCorrs_PPI.csv") lines = lines.groupby("Group") for name, group in lines: group = group.groupby("Month") for month, fits in group: data = self.loadDF(name + "/" + month + ".csv") data = util.sumBy(data, self.Config.keys["bnf"]) data["cost"] = data[Config.keys["nic"]] / data[Config.keys["quantity"]] data = data.to_dict(outtype="list") pp.scatter(data["cost"], data[self.Config.keys["items"]]) xs = [x / 20.0 for x in range(30)] for index, row in fits.iterrows(): if row["transform"] == "linear": ys = [x * row["slope"] + row["inter"] for x in xs] pp.plot(xs, ys, "-.", label="linear") if row["transform"] == "log": ys = [math.exp(x * row["slope"] + row["inter"]) for x in xs] pp.plot(xs, ys, "-", label="log") pp.legend() pp.title(name + " " + month) pp.ylabel("items") pp.xlabel("cost") pp.yscale("log") pp.show()
def plotAllTime(self): lines = self.loadDF("Results/CostQuanCorrs_PPI_AllTime.csv") lines = lines.groupby("Group") for name, group in lines: xs = [] ys = [] for month in self.Config.filenames: data = self.loadDF(name + "/" + month) data = util.sumBy(data, self.Config.keys["bnf"]) data["cost"] = data[Config.keys["nic"]] / data[Config.keys["quantity"]] data = data.to_dict(outtype="list") xs = xs + data["cost"] ys = ys + data[self.Config.keys["items"]] pp.scatter(xs, ys) fxs = [x / 20.0 for x in range(30)] for index, row in group.iterrows(): if row["transform"] == "linear": fys = [x * row["slope"] + row["inter"] for x in fxs] pp.plot(fxs, fys, "-.", label="linear") if row["transform"] == "log": fys = [math.exp(x * row["slope"] + row["inter"]) for x in fxs] pp.plot(fxs, fys, "-", label="log") pp.legend() pp.title(name) pp.ylabel("items") pp.xlabel("cost") pp.yscale("log") pp.xscale("log") pp.show()
def chemGenBrandComp(Config): infiles = Config.append_dir("Summed_Chem_Gen", group="psych") antipsych = pandas.read_csv("Criteria/antipsychotics.csv", index_col="chem code") data = {} for i, r in antipsych.iterrows(): data[r["name"]] = {"brand": [0 for i in range(len(infiles))], "gen": [0 for i in range(len(infiles))]} mon = 0 for infile in infiles: print "Loading", infile df = pandas.read_csv(infile, index_col=False) df = util.sumBy(df, ["chem code", "name", "generic"]) for i, r in df.iterrows(): if r["generic"] == 1: gen = "gen" else: gen = "brand" tot = r[Config.keys["items"]] data[r["name"]][gen][mon] = tot mon += 1 for key, value in data.items(): if np.sum(value["brand"]) > 0 or np.sum(value["gen"]) > 0: pp.plot(value["brand"], "r*-", label="brand") pp.plot(value["gen"], "bo-", label="generic") pp.legend() pp.title("Total prescriptions of " + key) pp.ylabel("# prescriptions") pp.xlabel("months since Jan 2012") pp.show()
def run(self): infiles = self.Config.append_dir("Sum_by_ccg_in", group = 'NSAID') outfiles = self.Config.append_dir("Sum_by_ccg_out", group = 'NSAID') for (infile,outfile) in zip(infiles,outfiles): df = self.loadDF(infile) data = util.sumBy(df,[self.Config.keys['ccg']]) data['sum'] = data['days_prescribed_naproxen']+data['days_prescribed_diclofenac'] data['percent']= data['days_prescribed_diclofenac']/data['sum'] data.to_csv(outfile, index = False)
def plotFinalCDF(data): data['days_prescribed_diclofenac'] = data['days_prescribed_diclofenac'].map(lambda x: 0 if x!=x else x) data['days_prescribed_naproxen'] = data['days_prescribed_naproxen'].map(lambda x: 0 if x!=x else x) summed = util.sumBy(data,'CCG') summed['perc'] = summed['days_prescribed_diclofenac']/(summed['days_prescribed_naproxen']+summed['days_prescribed_diclofenac']) cdf1 = ts.MakeCdfFromList(summed['perc']) pmf1 = ts.EstimatedPdf(summed['perc']).MakePmf([x/50.0 for x in range(51)]) CCGs = data.to_dict(outtype='list')['CCG'] random.shuffle(CCGs) data['CCG'] = CCGs summed = util.sumBy(data,'CCG') summed['perc'] = summed['days_prescribed_diclofenac']/(summed['days_prescribed_naproxen']+summed['days_prescribed_diclofenac']) cdf2 = ts.MakeCdfFromList(summed['perc']) pmf2 = ts.EstimatedPdf(summed['perc']).MakePmf([x/50.0 for x in range(51)]) pyplot.subplot(1,2,1) xs,ys = cdf1.Render() pyplot.plot(xs,ys,'r-',linewidth=2,label = 'Actual') xs,ys = cdf2.Render() pyplot.plot(xs,ys,'b--',linewidth=2,label = 'No CCG correlation') pyplot.title('CDF') # pyplot.title('Actual distribution of percent diclofenac prescribed\ncompared to distribution with no CCG correlation', # fontsize = 18) pyplot.xlabel('percent diclofenac prescribed', fontsize = 14) pyplot.ylabel('fraction of CCGs at or below percentage', fontsize = 14) pyplot.legend(loc=4) pyplot.axis([0, 1, 0, 1]) pyplot.subplot(1,2,2) xs,ys = pmf1.Render() pyplot.plot(xs,ys,'r-',linewidth=2,label = 'Actual') xs,ys = pmf2.Render() pyplot.plot(xs,ys,'b--',linewidth=2,label = 'No CCG correlation') pyplot.title('PDF') # pyplot.title('Actual distribution of percent diclofenac prescribed\ncompared to distribution with no CCG correlation', # fontsize = 18) pyplot.xlabel('percent diclofenac prescribed', fontsize = 14) pyplot.ylabel('probability density', fontsize = 14) pyplot.legend(loc=1) pyplot.show()
def run(self): na_infiles = self.Config.append_dir("Sum_by_practice_in_nap", group='NSAID') dic_infiles = self.Config.append_dir("Sum_by_practice_in_dic", group = 'NSAID') outfiles = self.Config.append_dir("Sum_by_practice_out", group = 'NSAID') for (na_infile, dic_infile, outfile) in zip(na_infiles,dic_infiles, outfiles): na_df = self.loadDF(na_infile) dic_df = self.loadDF(dic_infile) grouped_na = util.sumBy(na_df,[self.Config.keys['practice'], self.Config.keys['ccg'],self.Config.keys['pct']]) grouped_dic = util.sumBy(dic_df,[self.Config.keys['practice'], self.Config.keys['ccg'],self.Config.keys['pct']]) output = pandas.DataFrame.merge( grouped_na, grouped_dic, on=[self.Config.keys['practice'], self.Config.keys['ccg'], self.Config.keys['pct']], how = 'outer', suffixes =('_naproxen','_diclofenac')) print output output.to_csv(outfile, index = False)
def plotSlopeCDF(data): summed = util.sumBy(data,'CCG') getPercents(summed) slopes = findSlopes(summed) cdf1 = ts.MakeCdfFromList(slopes) pmf1 = ts.EstimatedPdf(slopes).MakePmf([x/200.0 for x in range(-12,21)]) CCGs = data.to_dict(outtype='list')['CCG'] random.shuffle(CCGs) data['CCG'] = CCGs summed = util.sumBy(data,'CCG') getPercents(summed) slopes = findSlopes(summed) cdf2 = ts.MakeCdfFromList(slopes) pmf2 = ts.EstimatedPdf(slopes).MakePmf([x/200.0 for x in range(-12,21)]) pyplot.subplot(1,2,1) xs,ys = cdf1.Render() pyplot.plot(xs,ys,'r-',linewidth=2,label = 'Actual') xs,ys = cdf2.Render() pyplot.plot(xs,ys,'b--',linewidth=2,label='No CCG correlation') pyplot.xlabel('rate of change of percent diclofenac prescribed', fontsize = 14) pyplot.ylabel('fraction of CCGs at or below rate', fontsize = 14) pyplot.legend(loc=4) pyplot.subplot(1,2,2) xs,ys = pmf1.Render() pyplot.plot(xs,ys,'r-',linewidth=2,label = 'Actual') xs,ys = pmf2.Render() pyplot.plot(xs,ys,'b--',linewidth=2,label = 'No CCG correlation') # pyplot.title('Actual distribution of rate of change\nof diclophenac prescription compared to\ndistribtuion without any CCG correlation ', # fontsize = 18) pyplot.xlabel('rate of change of percent diclofenac prescribed', fontsize = 14) pyplot.ylabel('probability density', fontsize = 14) pyplot.legend(loc=1) pyplot.show()
def plotCostVQuantity(filenames, Config): for datafile in datafiles: print datafile df = pandas.read_csv(datafile) df = util.sumBy(df, Config.keys["bnf"]) df["ITEM COST"] = df[Config.keys["nic"]] / df[Config.keys["quantity"]] plotTransform(df, "ITEM COST", Config.keys["quantity"], np.log, "Log") inter, slope = findCorr(df, "ITEM COST", Config.keys["quantity"], np.log) ys = [inter + slope * x for x in np.log(df["ITEM COST"])] pp.scatter(np.log(df["ITEM COST"]), ys, color="red") pp.show()
def runAllTime(self, items_cutoff=0, cost_cutoff=0): out = {} infolders = self.Config.directories["CorrsIn"] for folder in infolders: infiles = self.Config.append_dir(folder) xs = [] ys = [] for infile in infiles: data = self.loadDF(infile) data = util.sumBy(data, self.Config.keys["bnf"]) data["cost"] = data[self.Config.keys["nic"]] / data[self.Config.keys["quantity"]] data = data[data[self.Config.keys["items"]] > items_cutoff] data = data[data["cost"] > cost_cutoff] data = data.to_dict(outtype="list") xs = xs + data["cost"] ys = ys + data[self.Config.keys["items"]] corr = ts.SpearmanCorr(xs, ys) pVal = self.PValue(xs, ys, actual=corr, n=1000) sdev, serr, inter, slope = self.regress(xs, ys) out["Group"] = out.get("Group", []) + [folder] out["transform"] = out.get("transform", []) + ["linear"] out["Scorr"] = out.get("Scorr", []) + [corr] out["p"] = out.get("p", []) + [pVal] out["Stan Dev"] = out.get("Stan Dev", []) + [sdev] out["Stan Err"] = out.get("Stan Err", []) + [serr] out["inter"] = out.get("inter", []) + [inter] out["slope"] = out.get("slope", []) + [slope] sdev, serr, inter, slope = self.regress(xs, ys, ylog=True) out["Group"] = out.get("Group", []) + [folder] out["transform"] = out.get("transform", []) + ["log"] out["Scorr"] = out.get("Scorr", []) + [corr] out["p"] = out.get("p", []) + [pVal] out["Stan Dev"] = out.get("Stan Dev", []) + [sdev] out["Stan Err"] = out.get("Stan Err", []) + [serr] out["inter"] = out.get("inter", []) + [inter] out["slope"] = out.get("slope", []) + [slope] # pp.scatter(xs,ys) # pp.yscale('log') # pp.show() data = pandas.DataFrame(out, index=None) data.to_csv("Results/CostQuanCorrs_PPI_AllTime.csv")
def run(self): infiles = self.Config.append_dir("MakeDrugPairsIn") outfiles = self.Config.append_dir("MakeDrugPairsOut") for (infile,outfile) in zip(infiles,outfiles): data = self.loadDF(infile) if not data: continue data['ChemID'] = data[self.Config.keys['bnf']].map(lambda x: x[0:9]) data = util.sumBy(data,[self.Config.keys['practice'],'ChemID',self.Config.keys['gen'],'postal code']) grouped = pandas.groupby(data,self.Config.keys['gen']) data = pandas.merge(grouped.get_group(1.0),grouped.get_group(0.0), on =[self.Config.keys['practice'],'ChemID'], left_index=False, right_index = False, how = 'outer', sort = False, suffixes = ('_gen','_brand')) for col in data.columns.values.tolist(): data[col] = data[col].map(lambda x: 0 if x!=x else x) data['postal code'] = data.apply( lambda row: row['postal code_gen'] if row['postal code_brand']!=row['postal code_brand'] else row['postal code_brand'], axis=1) items = self.Config.keys['items'] quan = self.Config.keys['quantity'] nic = self.Config.keys['nic'] cols = [items,quan,nic] for col in cols: data['sum'+col] = data[col+'_brand']+data[col+'_gen'] data['percent'+col]= data[col+'_brand']/data['sum'+col] data = data.drop(['INCLUDE_gen','INCLUDE_brand', 'GENERIC_gen','GENERIC_brand', 'postal code_gen','postal code_brand'], axis=1) data.to_csv(outfile, index = False)
def stackPlot(Config): infiles = Config.append_dir("Ingest_out", group="psych") antipsych = pandas.read_csv("Criteria/antipsychotics.csv", index_col="chem code") data = {} for i, r in antipsych.iterrows(): data[r["name"]] = [] for infile in infiles: print "Loading", infile df = pandas.read_csv(infile) df = util.sumBy(df, ["chem code", "name"]) df = df.set_index("chem code") for i, r in antipsych.iterrows(): try: # relies on chem code being index tot = df.loc[i][Config.keys["items"]] except KeyError: tot = 0 if tot != tot: tot = 0 data[r["name"]].append(tot) prev = None lines = [] legends = [] for key, value in sorted(data.items(), key=lambda x: numpy.mean(x[1])): if prev: prev = sumLists(prev, value) else: prev = value if numpy.mean(value) < 1000: pp.plot(prev) else: lines = pp.plot(prev) + lines legends = [key] + legends pp.legend(lines, legends) pp.title("Cumulative presecriptions of antipsychotics") pp.ylabel("# prescriptions") pp.xlabel("months since Jan 2012") pp.show()
def run(self): for files, dirname in zip(self.inputs,self.inDirs): dists = {} for datafile in files: date = datafile[-11:-4] df = self.loadDF(datafile) df = util.sumBy(df,self.Config.keys['bnf']) for distFun in self.distFuns: res = distFun[1](df) res['Date'] = date dists[distFun[0]] = dists.get(distFun[0],[]) + [res] for distFun in self.distFuns: outfile = self.outDir+'/'+dirname+'_'+distFun[0]+'_SummaryStats.csv' df = pandas.DataFrame(dists[distFun[0]]) df.to_csv(outfile,index=None)
def run(self): infiles = self.Config.append_dir("Sum_by_practice_out", group='NSAID') outfiles = self.Config.append_dir("NSAIDGov") pctNames = pandas.read_csv("Criteria/pctCodeToName.csv") for infile, outfile in zip(infiles,outfiles): df = self.loadDF(infile) output = util.sumBy(df,[self.Config.keys['ccg'],self.Config.keys['pct']]) dd = output['days_prescribed_diclofenac'] dn = output['days_prescribed_naproxen'] output['percent'] = dd/(dd+dn) out = pandas.DataFrame.merge( output, pctNames, left_on=self.Config.keys['pct'], right_on = 'code', how = 'outer') out.to_csv(outfile, index = False)
def run(self): infiles = self.Config.append_dir("AllDrugsIn") outfiles = self.Config.append_dir("AllDrugsOut") for infile,outfile in zip(infiles,outfiles): data = self.loadDF(infile) if not data: continue data = util.sumBy(data,['ChemID']) items = self.Config.keys['items'] quan = self.Config.keys['quantity'] nic = self.Config.keys['nic'] cols = [items,quan,nic] for col in cols: data['sum'+col] = data[col+'_brand']+data[col+'_gen'] data['percent'+col]= data[col+'_brand']/data['sum'+col] data = data.drop([col+'_brand',col+'_gen'],axis=1) data.to_csv(outfile, index=False)
# data[row['CCG']] = [row['percent']] + data.get(row['CCG'],[]) # for key, value in data.items(): # pyplot.plot(value) # ''' # pyplot.plot(data['mean'], label = 'average') # pyplot.plot(data['dev_up'],'.', label = '1 std dev up') # pyplot.plot(data['dev_down'],'.', label = '1 std dev down') # pyplot.legend() # pyplot.title('Averge percent diclofenac over all CCGs (Jan 2012 to Oct 2013)') # pyplot.ylabel('Percent diclofenac') # pyplot.xlabel('Months since Jan 2012') # pyplot.show() infile = Config.nsaid_directories['Ingest_out_nap']+'/Oct2013.csv' data = pandas.read_csv(infile) data = util.sumBy(data,Config.keys['bnf']) plotLogCDF(data, 'days_prescribed', 'Log(x) CDF of days prescribed for NSAID preparations (Oct 2013)', '','Days prescribed') # infile = Config.directories['Ingest_out']+'/Oct2013.csv' # data = pandas.read_csv(infile) # data = util.sumBy(data,Config.keys['bnf']) # plotLogCDF(data, # Config.keys['items'], # 'Log(x) CDF of items for all drug preparations (Oct 2013)', # '','Prescriptions filled') # data['cost'] = data[Config.keys['nic']]/data[Config.keys['quantity']] # plotLogCDF(data,
def process(self, df): return util.sumBy(df, ["chem code", "name", "CCG", "PCT", "generic"])
mkPlotMedianMean(data2,"blue",name=data2label) def mkPlotVersus(df,x,y,**args): data = df.to_dict(outtype='list') pp.scatter(data[x],data[y],**args) if __name__ =="__main__": Config = config.Config() '''PPI items vs. cost''' for brand, gen in zip(Config.append_dir('SepBrand'),Config.append_dir('SepGeneric')): df_Brand = pandas.read_csv(brand) df_Gen = pandas.read_csv(gen) df_Brand = util.sumBy(df_Brand,Config.keys['bnf']) df_Gen = util.sumBy(df_Gen,Config.keys['bnf']) df_Brand['cost'] = df_Brand[Config.keys['nic']]/df_Brand[Config.keys['quantity']] df_Gen['cost'] = df_Gen[Config.keys['nic']]/df_Gen[Config.keys['quantity']] mkPlotVersus(df_Brand,'cost',Config.keys['items'],marker='x',s=35,color="red",label="Brand") mkPlotVersus(df_Gen,'cost',Config.keys['items'],marker = '*',s=35,color="blue",label="Generic") pp.title(brand) # pp.title('PPIs (Oct 2013)') pp.ylabel('Number of prescriptions for drug') pp.xlabel('Drug Cost') # pp.legend() # pp.yscale('log') pp.show()
def makeDrugSums(filename,Config): '''returns dataframe from file, aggregated by bnf code''' df = pandas.read_csv(filename) df = util.sumBy(df,Config.keys['bnf']) return df
return data if __name__ == "__main__": Config = config.TestConfig() ''' data = makeDrugSums('CompressedData/Oct2013.csv',Config) data['cost'] = data[Config.keys['nic']]/data[Config.keys['quantity']] ''' data = pandas.read_csv('RatioDataset/Oct2013.csv') labels = ['items','quantity','nic'] for a in labels: data['tot'+a] = data['sum'+a]*data['ratio'+a] #data['outcode'] = data['postal code'].map(lambda x: x.partition(' ')[0]) data = util.sumBy(data,Config.keys['practice']) for a in labels: data['ratio'+a] = data['tot'+a]/data['sum'+a] data.drop('tot'+a,axis=1) plotLogNormal(data,'ratioitems',res=4000) plotLogNormal(data,'rationic',res = 4000) ''' print 'All Drugs (Cost)' df = makeDrugSums("CompressedData/Oct2013.csv", Config) df['cost'] = df[Config.keys['nic']]/df[Config.keys['quantity']] util.plotLogNormal(df,'cost')
def findFinalMeanVar(data): summed = util.sumBy(data,'CCG') summed['perc'] = summed['days_prescribed_diclofenac']/(summed['days_prescribed_naproxen']+summed['days_prescribed_diclofenac']) return ts.MeanVar(summed['perc'])