def validate_once(true_cf = [pl.ones(3)/3.0, pl.ones(3)/3.0], true_std = 0.01*pl.ones(3), std_bias = [1., 1., 1.], save=False, dir='', i=0): """ Generate a set of simulated estimates for the provided true cause fractions; Fit the bad model and the latent simplex model to this simulated data and calculate quality metrics. """ # generate simulation data X = data.sim_data_for_validation(1000, true_cf, true_std, std_bias) # fit bad model, calculate fit metrics bad_model = models.bad_model(X) bad_model_metrics = calc_quality_metrics(true_cf, true_std, std_bias, bad_model) retrieve_estimates(bad_model, True, 'bad_model', dir, i) # fit latent simplex model, calculate fit metrics m, latent_simplex = models.fit_latent_simplex(X) latent_simplex_metrics = calc_quality_metrics(true_cf, true_std, std_bias, latent_simplex) retrieve_estimates(latent_simplex, True, 'latent_simplex', dir, i) # either write results to disk or return them if save: pl.rec2csv(bad_model_metrics, '%s/metrics_bad_model_%i.csv' % (dir, i)) pl.rec2csv(latent_simplex_metrics, '%s/metrics_latent_simplex_%i.csv' % (dir, i)) else: return bad_model_metrics, latent_simplex_metrics
def validate_once(true_cf=[pl.ones(3) / 3.0, pl.ones(3) / 3.0], true_std=0.01 * pl.ones(3), std_bias=[1., 1., 1.], save=False, dir='', i=0): """ Generate a set of simulated estimates for the provided true cause fractions; Fit the bad model and the latent simplex model to this simulated data and calculate quality metrics. """ # generate simulation data X = data.sim_data_for_validation(1000, true_cf, true_std, std_bias) # fit bad model, calculate fit metrics bad_model = models.bad_model(X) bad_model_metrics = calc_quality_metrics(true_cf, true_std, std_bias, bad_model) retrieve_estimates(bad_model, True, 'bad_model', dir, i) # fit latent simplex model, calculate fit metrics m, latent_simplex = models.fit_latent_simplex(X) latent_simplex_metrics = calc_quality_metrics(true_cf, true_std, std_bias, latent_simplex) retrieve_estimates(latent_simplex, True, 'latent_simplex', dir, i) # either write results to disk or return them if save: pl.rec2csv(bad_model_metrics, '%s/metrics_bad_model_%i.csv' % (dir, i)) pl.rec2csv(latent_simplex_metrics, '%s/metrics_latent_simplex_%i.csv' % (dir, i)) else: return bad_model_metrics, latent_simplex_metrics
import pylab as pl import scipy.stats.mstats as st # set parameters outdir = '/home/j/Project/Causes of Death/Under Five Deaths/CoD Correct Output' indir = '/home/j/Project/Causes of Death/Under Five Deaths/CoD Correct Input Data' age, iso3, sex = sys.argv[1:4] full_dir = '%s/v02_prep_%s' % (indir, iso3) # get cause list causes = list(set([file.split('+')[1] for file in os.listdir(full_dir) if re.search(age, file)])) causes.remove('HIV') # temporary until Miriam fixes the HIV files # gather data and fit model cf = data.get_cod_data(full_dir, causes, age, iso3, sex) m, pi = models.fit_latent_simplex(cf) # calculate summary measures N, T, J = pi.shape mean = pi.mean(0) lower = pl.array([[st.mquantiles(pi[:,t,j], 0.025)[0] for j in range(J)] for t in range(T)]) upper = pl.array([[st.mquantiles(pi[:,t,j], 0.975)[0] for j in range(J)] for t in range(T)]) # format summary and save output = pl.np.core.records.fromarrays(mean.T, names=['%s_mean' % c for c in causes]) output = pl.rec_append_fields(output, ['%s_lower' % c for c in causes], lower.T) output = pl.rec_append_fields(output, ['%s_upper' % c for c in causes], upper.T) pl.rec2csv(output, '%s/%s+%s+%s+summary.csv' % (outdir, iso3, age, sex)) # format all sims and save pi.shape = (N*T, J)
import graphics import data import models import pylab as pl countries = 'ABW,AFG,AGO,ALB,ANT,ARE,ARG,ARM,AUS,AUT,AZE,BDI,BEL,BEN,BFA,BGD,BGR,BHR,BHS,BIH,BLR,BLZ,BOL,BRA,BRB,BRN,BTN,BWA,CAF,CAN,CHE,CHL,CHN,CIV,CMR,COD,COG,COL,COM,CPV,CRI,CUB,CYP,CZE,DEU,DJI,DNK,DOM,DZA,ECU,EGY,ERI,ESH,ESP,EST,ETH,FIN,FJI,FRA,FSM,GAB,GBR,GEO,GHA,GIN,GLP,GMB,GNB,GNQ,GRC,GRD,GTM,GUF,GUM,GUY,HKG,HND,HRV,HTI,HUN,IDN,IND,IRL,IRN,IRQ,ISL,ISR,ITA,JAM,JOR,JPN,KAZ,KEN,KGZ,KHM,KOR,KWT,LAO,LBN,LBR,LBY,LCA,LKA,LSO,LTU,LUX,LVA,MAC,MAR,MDA,MDG,MDV,MEX,MKD,MLI,MLT,MMR,MNE,MNG,MOZ,MRT,MTQ,MUS,MWI,MYS,NAM,NCL,NER,NGA,NIC,NLD,NOR,NPL,NZL,OMN,PAK,PAN,PER,PHL,PNG,POL,PRI,PRK,PRT,PRY,PSE,PYF,QAT,REU,ROU,RUS,RWA,SAU,SDN,SEN,SGP,SLB,SLE,SLV,SOM,SRB,STP,SUR,SVK,SVN,SWE,SWZ,SYR,TCD,TGO,THA,TJK,TKM,TLS,TON,TTO,TUN,TUR,TZA,UGA,UKR,URY,USA,UZB,VCT,VEN,VIR,VNM,VUT,WSM,YEM,ZAF,ZMB,ZWE'.split(',') import random random.shuffle(countries) for iso3 in countries: print iso3 try: F, causes = data.get_cod_data_all_causes(iso3=iso3) N, T, J = F.shape pi = pl.zeros((1000, T, J)) for t in range(T): print t+1, 'of', T model, pi_t = models.fit_latent_simplex(F[:,t:(t+1),:]) pi[:,t,:] = pi_t[:,0,:] except Exception, e: print e continue graphics.plot_F_and_pi(F, pi, causes, iso3) pl.savefig('/home/j/Project/Models/cod-correct/%s.png'%iso3)