def main(): ## Setup [systime0,pyfilename,pyfiledir,homedir,currentdir,scratchdir,hostname,sysname]=setupdirs() filename=scratchdir+'/'+pyfilename+'-'+systime0.strftime("%Y-%m-%d-%H:%M:%S") filename=scratchdir+'/'+pyfilename print(80*'*') print(f'hostname={hostname} (OS={sysname})') print(f'start={systime0.strftime("%Y-%m-%d-%H:%M:%S")}\npyfilename={pyfilename}\ncurrentdir={currentdir}\nscratchdir={scratchdir}') print(f'filename ={filename}') print(80*'*') ## Load data. with open(currentdir+'/../Data/ctrfSchoolComposition.data', 'rb') as filehandle: [num_nets, size_nets, attr, data_a, data_g]=pickle.load(filehandle) sampleinfo=[num_nets, size_nets] sample_k = [pmf_k(jn) for jn in size_nets] I9 = [(jattr.grade>8.1).to_numpy(np.float) for jattr in attr] ## Parameter setup. theta_setup = pd.read_csv(currentdir+'/../Estimation/priors/parameterSetup.csv') estimatesfile ='/../Estimation/estimates/1000draws_posterior.csv' thetastar_draws = pd.read_csv(currentdir+estimatesfile) ## Size MC for state (inner loop). vec_numsim_kCD = np.ones(num_nets,dtype=int)*20000 numsim = 1000 mcJump = 0.05 # Probability of large jumps for the MCMC. np.random.seed(2026642028) sim_state_args = [num_nets, attr, I9, data_a, data_g, size_nets, vec_numsim_kCD, thetastar_draws, theta_setup, sampleinfo, sample_k, mcJump, numsim, filename] sim_state_wrapper = functools.partial(sim_state,*sim_state_args) if DO_PARALLEL: pool = multiprocessing.Pool(processes=NUM_WORKERS) if not DO_PARALLEL: ctrfData = [sim_state_wrapper(jjscid) for jjscid in range(num_nets)] #for jjscid in tqdm(range(num_net),'scid')] else: ctrfData = pool.map(sim_state_wrapper,range(num_nets)) #ctrfPriceSim # [scid] # [simdata,simdataFixedNet,simdataPEoff,simdataNoPE] # [sim_a, sim_g, jprice, jscid] ctrfData=pd.concat(ctrfData,ignore_index=True) #all scids in one df with open(filename+'.data', 'wb') as filehandle: pickle.dump([ctrfData,numsim,num_nets, size_nets, attr, data_a, data_g], filehandle) print(f'{pyfilename} saved {filename}')
def main(): ## Setup [systime0,pyfilename,pyfiledir,homedir,currentdir,scratchdir,hostname,sysname]=setupdirs() posteriorfile=scratchdir+'/'+pyfilename+'-'+systime0.strftime("%Y-%m-%d-%H:%M:%S") posteriorfile=scratchdir+'/'+pyfilename kCDstatefile=scratchdir+'/kCD-states/'+pyfilename+'-'+systime0.strftime("%Y-%m-%d-%H:%M:%S") print(80*'*') print(f'hostname={hostname} (OS={sysname})') print(f'start={systime0.strftime("%Y-%m-%d-%H:%M:%S")}\npyfilename={pyfilename}\ncurrentdir={currentdir}\nscratchdir={scratchdir}') print(f'posterior ={posteriorfile}') print(80*'*') ## Load data. datadir='../Data/' with open(datadir+'/estimation_top8_100plus.data', 'rb') as filehandle: [num_nets, size_nets, attr, data_a, data_g]=pickle.load(filehandle) #data_a=[jnet_data_a.reshape(-1,1) for jnet_data_a in data_a] #reshape as 2D from 1D if needed sampleinfo=[num_nets, size_nets] sample_k = [pmf_k(jn) for jn in size_nets] I9 = [(jattr.grade>8.1).to_numpy(np.float) for jattr in attr] ## Parameter setup. theta_setup = pd.read_csv('priors/income-parameterSetup.csv') theta0 = theta_setup.PriorMean[theta_setup.FlagInclude==1].to_numpy() nparams = len(theta0) theta_labels= list(theta_setup.Label[theta_setup.FlagInclude==1]) ## Prior. mu_prior = theta0 s1_prior = theta_setup.PriorSD[theta_setup.FlagInclude==1].to_numpy() s2_prior = np.diag(s1_prior*s1_prior) inv_s2_prior = np.linalg.inv(s2_prior) ## Proposal -- random walk. mu_prop = np.zeros([nparams,1]) s1_prop = theta_setup.PropSD[theta_setup.FlagInclude==1].to_numpy() s2_prop = np.diag(s1_prop*s1_prop) ## Size MC for state (inner loop). vec_numsim_kCD = np.clip(np.asarray(size_nets, dtype=int)*150,a_min=None,a_max=15000) numsim_theta = 100000 mcJump = 0.05 # 0.02 Probability of large jumps for the MCMC. ## Posterior sample. theta_post = pd.DataFrame(data=np.zeros([numsim_theta+1,nparams],dtype=float,order='C'),columns=theta_labels) theta_post.iloc[0]= theta0 ## Plots and logs. savefreq = 5000 printfreq = 1 np.random.seed(2026642028) #np.random.RandomState(2026642028) if DO_PARALLEL: pool = multiprocessing.Pool(processes=NUM_WORKERS) for js in range(numsim_theta): # if js%printfreq==0: # print(f'Simulation {js+1} {(datetime.now()).strftime("%Y-%m-%d-%H:%M:%S")}') theta1 = theta0+np.random.multivariate_normal(mu_prop[:,0],s2_prop) run_sim_args = [num_nets, attr, I9, data_a, data_g, size_nets, vec_numsim_kCD, theta0, theta1, theta_setup, sampleinfo, sample_k, mcJump, kCDstatefile, js] run_sim_wrapper = functools.partial(run_sim,*run_sim_args) if not DO_PARALLEL: result = [run_sim_wrapper(jjscid) for jjscid in range(num_nets)] #for jjscid in tqdm(range(num_net),'scid')] else: result = pool.map(run_sim_wrapper,range(num_nets)) lnaccept_jnet = result lnaccept1 = np.sum(lnaccept_jnet) lnaccept2 = -0.5*np.matmul((theta1-mu_prior).reshape(1,nparams), np.matmul(inv_s2_prior, (theta1-mu_prior).reshape(nparams,1))) lnaccept2 += 0.5*np.matmul((theta0-mu_prior).reshape(1,nparams), np.matmul(inv_s2_prior, (theta0-mu_prior).reshape(nparams,1))) lnaccept2 = lnaccept2[0,0] lnaccept_th = lnaccept1+lnaccept2 if np.random.uniform(0,1)<lnaccept_th or lnaccept_th>0: theta0 = theta1.copy() theta_post.iloc[js+1]= theta0 if (js+1)%savefreq==0: print(f'Sim = {js+1:4d}; Saving {posteriorfile}') #np.savetxt(posteriorfile+'.csv', theta_post, delimiter=',', fmt='%f',header=theta_header, comments="") theta_post.iloc[0:js+2].to_csv(posteriorfile+'.csv', encoding='utf-8', index=False)
def main(): ## Setup [ systime0, pyfilename, pyfiledir, homedir, currentdir, scratchdir, hostname, sysname ] = setupdirs() filename = scratchdir + '/' + pyfilename print(80 * '*') print(f'hostname={hostname} (OS={sysname})') print( f'start={systime0.strftime("%Y-%m-%d-%H:%M:%S")}\npyfilename={pyfilename}\ncurrentdir={currentdir}\nscratchdir={scratchdir}' ) print(f'filename ={filename}') print(80 * '*') ## Load data. with open(currentdir + '/../Data/estimation_top8_100plus.data', 'rb') as filehandle: [num_nets, size_nets, attr, data_a, data_g] = pickle.load(filehandle) sampleinfo = [num_nets, size_nets] sample_k = [pmf_k(jn) for jn in size_nets] I9 = [(jattr.grade > 8.1).to_numpy(np.float) for jattr in attr] ## Parameter setup. # Model # Restricted net (true coeff but agents restrected from adjusting, FixedNet priors) # Fixed net # No Net Data (no local PE) priors_dir = currentdir + '/../Estimation/priors/' estimates_dir = currentdir + '/../Estimation/estimates/' setup_files = [ 'parameterSetup', 'parameterSetupFixedNet', 'parameterSetupFixedNet', 'parameterSetupNoNetData' ] estimates_files = [ '1000draws_posterior', '1000draws_posteriorRestrictNet', '1000draws_posteriorFixedNet', '1000draws_posteriorNoNetData' ] theta_setups = [ pd.read_csv(priors_dir + jfile + '.csv') for jfile in setup_files ] thetastars = [ pd.read_csv(estimates_dir + jfile + '.csv') for jfile in estimates_files ] ## Size MC for state (inner loop). vec_numsim_kCD = np.ones(num_nets, dtype=int) * 20000 numsim = 1000 mcJump = 0.05 # Probability of large jumps for the MCMC. np.random.seed(2026642028) gridDeltaPrice = [float(x * 220 / 11) for x in range(0, 9)] nctrf = len(gridDeltaPrice) nscid = num_nets cpuinfo = [NUM_WORKERS, nscid, nctrf] sim_args = [ num_nets, attr, I9, data_a, data_g, size_nets, vec_numsim_kCD, thetastars, theta_setups, sampleinfo, sample_k, gridDeltaPrice, mcJump, numsim, filename, cpuinfo ] sim_wrapper = functools.partial(sim_ctrfPrice_jscid_jctrf, *sim_args) if DO_PARALLEL: pool = multiprocessing.Pool(processes=NUM_WORKERS) if not DO_PARALLEL: ctrfPriceData = [sim_wrapper(jcpu) for jcpu in range(num_nets * nctrf)] else: ctrfPriceData = pool.map(sim_wrapper, range(num_nets * nctrf)) ctrfPriceData = pd.concat(ctrfPriceData, ignore_index=True) #all scids in one df with open(filename + '.data', 'wb') as filehandle: pickle.dump([ ctrfPriceData, numsim, gridDeltaPrice, num_nets, size_nets, attr, data_a, data_g ], filehandle) print( f'{pyfilename} saved {filename} at {(datetime.now()).strftime("%Y-%m-%d-%H:%M:%S")}' )
def main(): ## Setup [systime0,pyfilename,pyfiledir,homedir,currentdir,scratchdir,hostname,sysname]=setupdirs() filename=scratchdir+'/'+pyfilename+'-'+systime0.strftime("%Y-%m-%d-%H:%M:%S") filename=scratchdir+'/'+pyfilename print(80*'*') print(f'hostname={hostname} (OS={sysname})') print(f'start={systime0.strftime("%Y-%m-%d-%H:%M:%S")}\npyfilename={pyfilename}\ncurrentdir={currentdir}\nscratchdir={scratchdir}') print(f'filename ={filename}') print(80*'*') ## Load data. with open(currentdir+'/../Data/estimation_top8_100plus.data', 'rb') as filehandle: [num_nets, size_nets, attr, data_a, data_g]=pickle.load(filehandle) sampleinfo=[num_nets, size_nets] sample_k = [pmf_k(jn) for jn in size_nets] I9 = [(jattr.grade>8.1).to_numpy(np.float) for jattr in attr] ## Parameter setup. theta_setup = pd.read_csv(currentdir+'/../Estimation/priors/parameterSetup.csv') estimatesfile ='/../Estimation/estimates/1000draws_posterior.csv' thetastar_draws = pd.read_csv(currentdir+estimatesfile) #Fixed net theta_setupFixedNet = pd.read_csv(currentdir+'/../Estimation/priors/parameterSetupFixedNet.csv') estimatesfile='/../Estimation/estimates/1000draws_posteriorFixedNet.csv' thetastarFixedNet_draws = pd.read_csv(currentdir+estimatesfile) #No Net Data (no local PE) theta_setupNoNetData = pd.read_csv(currentdir+'/../Estimation/priors/parameterSetupNoNetData.csv') estimatesfile='/../Estimation/estimates/1000draws_posteriorNoNetData.csv' thetastarNoNetData_draws = pd.read_csv(currentdir+estimatesfile) nctrf=3 # Model, Fixed net, No net data ## Size MC for state (inner loop). vec_numsim_kCD = np.ones(num_nets,dtype=int)*20000 numsim = 1000 mcJump = 0.05 # Probability of large jumps for the MCMC. grid_nnosmoke=[0, 0.03, 0.05, 0.10, 0.20, 0.30, 0.50] lengrid=len(grid_nnosmoke) target_scid=1 #1,7 medium size/medium-high smoking for representative experiements # netid prev # 1 0.44654088050314467 # 7 0.4397590361445783 # Prep synthetic sample num_nets = lengrid n = size_nets[target_scid] size_nets = [n]*num_nets sampleinfo= [num_nets, size_nets] sample_k = [pmf_k(jn) for jn in size_nets] jattr = attr[target_scid] jattr.index=jattr.id-1 #to be able to subset on ids I9 = [(jattr.grade>8.1).to_numpy(np.float)]*num_nets jA = data_a[target_scid].copy() jG = data_g[target_scid].copy() vec_numsim_kCD = np.ones(num_nets,dtype=int)*20000 print(f'Target scid = {target_scid:3.0f} ({n})') sim_state_args = [num_nets, grid_nnosmoke, jattr, I9, jA, jG, size_nets, vec_numsim_kCD, thetastar_draws, thetastarFixedNet_draws, thetastarNoNetData_draws, theta_setup, theta_setupFixedNet, theta_setupNoNetData, sampleinfo, sample_k, mcJump, numsim] sim_state_wrapper = functools.partial(sim_state_spills,*sim_state_args) if DO_PARALLEL: pool = multiprocessing.Pool(processes=NUM_WORKERS) if not DO_PARALLEL: result = [sim_state_wrapper(jcpu) for jcpu in range(num_nets*nctrf)] #for jjscid in tqdm(range(num_net),'scid')] else: result = pool.map(sim_state_wrapper,range(num_nets*nctrf)) ctrfSpills = pd.concat(result,ignore_index=True) filename = filename+f'_scid_{target_scid}.data' with open(filename, 'wb') as filehandle: pickle.dump([ctrfSpills,numsim,grid_nnosmoke,num_nets,size_nets,jattr,jA,jG,target_scid], filehandle) print(f'{pyfilename} saved {filename}')
def main(): ## Setup [ systime0, pyfilename, pyfiledir, homedir, currentdir, scratchdir, hostname, sysname ] = setupdirs() filename = scratchdir + '/' + pyfilename + '-' + systime0.strftime( "%Y-%m-%d-%H:%M:%S") filename = scratchdir + '/' + pyfilename ## Load data. with open(currentdir + '/../Data/estimation_top8_100plus.data', 'rb') as filehandle: [num_nets, size_nets, attr, data_a, data_g] = pickle.load(filehandle) #data_a=[jnet_data_a.reshape(-1,1) for jnet_data_a in data_a] #reshape as 2D from 1D if needed sampleinfo = [num_nets, size_nets] sample_k = [pmf_k(jn) for jn in size_nets] I9 = [(jattr.grade > 8.1).to_numpy(np.float) for jattr in attr] ## Posterior & parameter setup. theta_setup = pd.read_csv(currentdir + '/../Estimation/priors/parameterSetup.csv') theta_draws = pd.read_csv( currentdir + '/../Estimation/estimates/1000draws_posterior.csv') numsim = 1000 varnames = theta_draws.columns.values.tolist() numvars = len(varnames) print(80 * '*') print( f'start={systime0.strftime("%Y-%m-%d-%H:%M:%S")}\npyfilename={pyfilename}\ncurrentdir={currentdir}\nscratchdir={scratchdir}' ) print(f'sim file name={filename}') print(80 * '-') print(f'hostname={hostname} (OS={sysname}):') print(os.environ.get('PYTHONPATH', '').split(os.pathsep)) print(sys.path) print(80 * '*') ## Size MC for state (inner loop). vec_numsim_kCD = np.ones(num_nets, dtype=int) * 20000 mcJump = 0.05 # Probability of large jumps for the MCMC. np.random.seed(2026642028) #np.random.RandomState(2026642028) simdata = [] if DO_PARALLEL: pool = multiprocessing.Pool(processes=NUM_WORKERS) gen_sample_args = [ num_nets, attr, I9, data_a, data_g, size_nets, sampleinfo, numsim, vec_numsim_kCD, theta_draws, theta_setup, sample_k, mcJump ] gen_sample_wrapper = functools.partial(gen_sample, *gen_sample_args) if not DO_PARALLEL: simdata = [gen_sample_wrapper(jjscid) for jjscid in range(num_nets)] #for jjscid in tqdm(range(num_net),'scid')] else: simdata = pool.map(gen_sample_wrapper, range(num_nets)) with open(filename + '.data', 'wb') as filehandle: pickle.dump(simdata, filehandle) print(f'Saved simdata in {filename}.data') print(f'end={datetime.now().strftime("%Y-%m-%d-%H:%M:%S")}')