예제 #1
0
def main():
    ## Setup
    [systime0,pyfilename,pyfiledir,homedir,currentdir,scratchdir,hostname,sysname]=setupdirs()
    filename=scratchdir+'/'+pyfilename+'-'+systime0.strftime("%Y-%m-%d-%H:%M:%S")
    filename=scratchdir+'/'+pyfilename
    print(80*'*')
    print(f'hostname={hostname} (OS={sysname})')
    print(f'start={systime0.strftime("%Y-%m-%d-%H:%M:%S")}\npyfilename={pyfilename}\ncurrentdir={currentdir}\nscratchdir={scratchdir}')
    print(f'filename ={filename}')
    print(80*'*')
     
    ## Load data.
    with open(currentdir+'/../Data/ctrfSchoolComposition.data', 'rb') as filehandle:
        [num_nets, size_nets, attr, data_a, data_g]=pickle.load(filehandle)
    sampleinfo=[num_nets, size_nets]
    sample_k = [pmf_k(jn) for jn in size_nets]
    I9 = [(jattr.grade>8.1).to_numpy(np.float) for jattr in attr]
    
    ## Parameter setup.
    theta_setup     = pd.read_csv(currentdir+'/../Estimation/priors/parameterSetup.csv')
    estimatesfile   ='/../Estimation/estimates/1000draws_posterior.csv'
    thetastar_draws = pd.read_csv(currentdir+estimatesfile)

    ## Size MC for state (inner loop).
    vec_numsim_kCD = np.ones(num_nets,dtype=int)*20000

    numsim = 1000
    mcJump = 0.05 # Probability of large jumps for the MCMC.

    np.random.seed(2026642028)
    
    sim_state_args = [num_nets, attr, I9, data_a, data_g, size_nets, vec_numsim_kCD,
                    thetastar_draws, theta_setup, sampleinfo, sample_k, mcJump, numsim, filename]
    sim_state_wrapper = functools.partial(sim_state,*sim_state_args)
    if DO_PARALLEL:
        pool = multiprocessing.Pool(processes=NUM_WORKERS)
    if not DO_PARALLEL:
        ctrfData = [sim_state_wrapper(jjscid)
                   for jjscid in range(num_nets)]
                  #for jjscid in tqdm(range(num_net),'scid')]
    else:
        ctrfData = pool.map(sim_state_wrapper,range(num_nets))
    

    #ctrfPriceSim
    #   [scid]
    #       [simdata,simdataFixedNet,simdataPEoff,simdataNoPE]
    #           [sim_a, sim_g, jprice, jscid]

    ctrfData=pd.concat(ctrfData,ignore_index=True) #all scids in one df
    with open(filename+'.data', 'wb') as filehandle:
        pickle.dump([ctrfData,numsim,num_nets, size_nets, attr, data_a, data_g], filehandle)
    print(f'{pyfilename} saved {filename}')
예제 #2
0
def main():
    ## Setup
    [systime0,pyfilename,pyfiledir,homedir,currentdir,scratchdir,hostname,sysname]=setupdirs()
    posteriorfile=scratchdir+'/'+pyfilename+'-'+systime0.strftime("%Y-%m-%d-%H:%M:%S")
    posteriorfile=scratchdir+'/'+pyfilename
    kCDstatefile=scratchdir+'/kCD-states/'+pyfilename+'-'+systime0.strftime("%Y-%m-%d-%H:%M:%S")
    print(80*'*')
    print(f'hostname={hostname} (OS={sysname})')
    print(f'start={systime0.strftime("%Y-%m-%d-%H:%M:%S")}\npyfilename={pyfilename}\ncurrentdir={currentdir}\nscratchdir={scratchdir}')
    print(f'posterior ={posteriorfile}')
    print(80*'*')
     
    ## Load data.
    datadir='../Data/'
    with open(datadir+'/estimation_top8_100plus.data', 'rb') as filehandle:
        [num_nets, size_nets, attr, data_a, data_g]=pickle.load(filehandle)
    #data_a=[jnet_data_a.reshape(-1,1) for jnet_data_a in data_a] #reshape as 2D from 1D if needed
    sampleinfo=[num_nets, size_nets]
    sample_k = [pmf_k(jn) for jn in size_nets]
    I9 = [(jattr.grade>8.1).to_numpy(np.float) for jattr in attr]

    ## Parameter setup.
    theta_setup = pd.read_csv('priors/income-parameterSetup.csv')
    theta0      = theta_setup.PriorMean[theta_setup.FlagInclude==1].to_numpy()
    nparams     = len(theta0)
    theta_labels= list(theta_setup.Label[theta_setup.FlagInclude==1])
    
    
    ## Prior.
    mu_prior = theta0
    s1_prior = theta_setup.PriorSD[theta_setup.FlagInclude==1].to_numpy()
    s2_prior = np.diag(s1_prior*s1_prior)
    inv_s2_prior = np.linalg.inv(s2_prior)

    ## Proposal -- random walk.
    mu_prop = np.zeros([nparams,1])
    s1_prop = theta_setup.PropSD[theta_setup.FlagInclude==1].to_numpy()
    s2_prop = np.diag(s1_prop*s1_prop)

    ## Size MC for state (inner loop).
    vec_numsim_kCD = np.clip(np.asarray(size_nets, dtype=int)*150,a_min=None,a_max=15000)

    numsim_theta = 100000
    mcJump = 0.05 # 0.02 Probability of large jumps for the MCMC.

    ## Posterior sample.
    theta_post = pd.DataFrame(data=np.zeros([numsim_theta+1,nparams],dtype=float,order='C'),columns=theta_labels)
    theta_post.iloc[0]= theta0

    ## Plots and logs.
    savefreq  = 5000
    printfreq = 1
    
    np.random.seed(2026642028)
    #np.random.RandomState(2026642028)

    if DO_PARALLEL:
        pool = multiprocessing.Pool(processes=NUM_WORKERS)

    for js in range(numsim_theta):
#        if js%printfreq==0:
#            print(f'Simulation {js+1} {(datetime.now()).strftime("%Y-%m-%d-%H:%M:%S")}')
        theta1 = theta0+np.random.multivariate_normal(mu_prop[:,0],s2_prop)
        
        run_sim_args = [num_nets, attr, I9, data_a, data_g, 
                        size_nets, vec_numsim_kCD, theta0, theta1, 
                        theta_setup, sampleinfo, sample_k, mcJump, kCDstatefile, js]
        run_sim_wrapper = functools.partial(run_sim,*run_sim_args)
        if not DO_PARALLEL:
            result = [run_sim_wrapper(jjscid)
                       for jjscid in range(num_nets)]
                      #for jjscid in tqdm(range(num_net),'scid')]
        else:
            result = pool.map(run_sim_wrapper,range(num_nets))
        
        lnaccept_jnet = result

        lnaccept1 = np.sum(lnaccept_jnet)
        lnaccept2 = -0.5*np.matmul((theta1-mu_prior).reshape(1,nparams),
                                   np.matmul(inv_s2_prior,
                                             (theta1-mu_prior).reshape(nparams,1)))
        lnaccept2 += 0.5*np.matmul((theta0-mu_prior).reshape(1,nparams),
                                   np.matmul(inv_s2_prior,
                                             (theta0-mu_prior).reshape(nparams,1)))
        lnaccept2 = lnaccept2[0,0]
        lnaccept_th = lnaccept1+lnaccept2
        if np.random.uniform(0,1)<lnaccept_th or lnaccept_th>0:
            theta0 = theta1.copy()
        theta_post.iloc[js+1]= theta0
        if (js+1)%savefreq==0:
            print(f'Sim = {js+1:4d}; Saving {posteriorfile}')
            #np.savetxt(posteriorfile+'.csv', theta_post, delimiter=',', fmt='%f',header=theta_header, comments="")
            theta_post.iloc[0:js+2].to_csv(posteriorfile+'.csv', encoding='utf-8', index=False)
예제 #3
0
def main():
    ## Setup
    [
        systime0, pyfilename, pyfiledir, homedir, currentdir, scratchdir,
        hostname, sysname
    ] = setupdirs()
    filename = scratchdir + '/' + pyfilename
    print(80 * '*')
    print(f'hostname={hostname} (OS={sysname})')
    print(
        f'start={systime0.strftime("%Y-%m-%d-%H:%M:%S")}\npyfilename={pyfilename}\ncurrentdir={currentdir}\nscratchdir={scratchdir}'
    )
    print(f'filename ={filename}')
    print(80 * '*')

    ## Load data.
    with open(currentdir + '/../Data/estimation_top8_100plus.data',
              'rb') as filehandle:
        [num_nets, size_nets, attr, data_a, data_g] = pickle.load(filehandle)
    sampleinfo = [num_nets, size_nets]
    sample_k = [pmf_k(jn) for jn in size_nets]
    I9 = [(jattr.grade > 8.1).to_numpy(np.float) for jattr in attr]

    ## Parameter setup.
    # Model
    # Restricted net (true coeff but agents restrected from adjusting, FixedNet priors)
    # Fixed net
    # No Net Data (no local PE)
    priors_dir = currentdir + '/../Estimation/priors/'
    estimates_dir = currentdir + '/../Estimation/estimates/'
    setup_files = [
        'parameterSetup', 'parameterSetupFixedNet', 'parameterSetupFixedNet',
        'parameterSetupNoNetData'
    ]
    estimates_files = [
        '1000draws_posterior', '1000draws_posteriorRestrictNet',
        '1000draws_posteriorFixedNet', '1000draws_posteriorNoNetData'
    ]
    theta_setups = [
        pd.read_csv(priors_dir + jfile + '.csv') for jfile in setup_files
    ]
    thetastars = [
        pd.read_csv(estimates_dir + jfile + '.csv')
        for jfile in estimates_files
    ]

    ## Size MC for state (inner loop).
    vec_numsim_kCD = np.ones(num_nets, dtype=int) * 20000

    numsim = 1000
    mcJump = 0.05  # Probability of large jumps for the MCMC.
    np.random.seed(2026642028)

    gridDeltaPrice = [float(x * 220 / 11) for x in range(0, 9)]
    nctrf = len(gridDeltaPrice)
    nscid = num_nets
    cpuinfo = [NUM_WORKERS, nscid, nctrf]

    sim_args = [
        num_nets, attr, I9, data_a, data_g, size_nets, vec_numsim_kCD,
        thetastars, theta_setups, sampleinfo, sample_k, gridDeltaPrice, mcJump,
        numsim, filename, cpuinfo
    ]
    sim_wrapper = functools.partial(sim_ctrfPrice_jscid_jctrf, *sim_args)
    if DO_PARALLEL:
        pool = multiprocessing.Pool(processes=NUM_WORKERS)
    if not DO_PARALLEL:
        ctrfPriceData = [sim_wrapper(jcpu) for jcpu in range(num_nets * nctrf)]
    else:
        ctrfPriceData = pool.map(sim_wrapper, range(num_nets * nctrf))
    ctrfPriceData = pd.concat(ctrfPriceData,
                              ignore_index=True)  #all scids in one df

    with open(filename + '.data', 'wb') as filehandle:
        pickle.dump([
            ctrfPriceData, numsim, gridDeltaPrice, num_nets, size_nets, attr,
            data_a, data_g
        ], filehandle)
    print(
        f'{pyfilename} saved {filename} at {(datetime.now()).strftime("%Y-%m-%d-%H:%M:%S")}'
    )
예제 #4
0
def main():
    ## Setup
    [systime0,pyfilename,pyfiledir,homedir,currentdir,scratchdir,hostname,sysname]=setupdirs()
    filename=scratchdir+'/'+pyfilename+'-'+systime0.strftime("%Y-%m-%d-%H:%M:%S")
    filename=scratchdir+'/'+pyfilename
    print(80*'*')
    print(f'hostname={hostname} (OS={sysname})')
    print(f'start={systime0.strftime("%Y-%m-%d-%H:%M:%S")}\npyfilename={pyfilename}\ncurrentdir={currentdir}\nscratchdir={scratchdir}')
    print(f'filename ={filename}')
    print(80*'*')
     
    ## Load data.
    with open(currentdir+'/../Data/estimation_top8_100plus.data', 'rb') as filehandle:
        [num_nets, size_nets, attr, data_a, data_g]=pickle.load(filehandle)
    sampleinfo=[num_nets, size_nets]
    sample_k = [pmf_k(jn) for jn in size_nets]
    I9 = [(jattr.grade>8.1).to_numpy(np.float) for jattr in attr]
    
    ## Parameter setup.
    theta_setup     = pd.read_csv(currentdir+'/../Estimation/priors/parameterSetup.csv')
    estimatesfile   ='/../Estimation/estimates/1000draws_posterior.csv'
    thetastar_draws = pd.read_csv(currentdir+estimatesfile)
    
    #Fixed net
    theta_setupFixedNet = pd.read_csv(currentdir+'/../Estimation/priors/parameterSetupFixedNet.csv')
    estimatesfile='/../Estimation/estimates/1000draws_posteriorFixedNet.csv'
    thetastarFixedNet_draws = pd.read_csv(currentdir+estimatesfile)
    
    #No Net Data (no local PE)
    theta_setupNoNetData = pd.read_csv(currentdir+'/../Estimation/priors/parameterSetupNoNetData.csv')
    estimatesfile='/../Estimation/estimates/1000draws_posteriorNoNetData.csv'
    thetastarNoNetData_draws = pd.read_csv(currentdir+estimatesfile)
    
    nctrf=3 # Model, Fixed net, No net data

    ## Size MC for state (inner loop).
    vec_numsim_kCD = np.ones(num_nets,dtype=int)*20000
    
    numsim = 1000
    mcJump = 0.05 # Probability of large jumps for the MCMC.

    grid_nnosmoke=[0, 0.03, 0.05, 0.10, 0.20, 0.30, 0.50]
    lengrid=len(grid_nnosmoke)
    target_scid=1 #1,7 medium size/medium-high smoking for representative experiements
    # netid	prev
    # 1 0.44654088050314467
    # 7	0.4397590361445783
    
    # Prep synthetic sample
    num_nets  = lengrid
    n         = size_nets[target_scid]
    size_nets = [n]*num_nets
    sampleinfo= [num_nets, size_nets]
    sample_k  = [pmf_k(jn) for jn in size_nets]
    jattr     = attr[target_scid]
    jattr.index=jattr.id-1 #to be able to subset on ids
    I9 = [(jattr.grade>8.1).to_numpy(np.float)]*num_nets
    jA = data_a[target_scid].copy()
    jG = data_g[target_scid].copy()
    vec_numsim_kCD = np.ones(num_nets,dtype=int)*20000
    print(f'Target scid = {target_scid:3.0f} ({n})')


    sim_state_args = [num_nets, grid_nnosmoke, jattr, I9, jA, jG, size_nets, vec_numsim_kCD,
                      thetastar_draws, thetastarFixedNet_draws, thetastarNoNetData_draws,
                      theta_setup, theta_setupFixedNet, theta_setupNoNetData, sampleinfo, sample_k,
                      mcJump, numsim]
    sim_state_wrapper = functools.partial(sim_state_spills,*sim_state_args)
    if DO_PARALLEL:
        pool = multiprocessing.Pool(processes=NUM_WORKERS)
    if not DO_PARALLEL:
        result = [sim_state_wrapper(jcpu) for jcpu in range(num_nets*nctrf)]
                  #for jjscid in tqdm(range(num_net),'scid')]
    else:
        result = pool.map(sim_state_wrapper,range(num_nets*nctrf))

    ctrfSpills = pd.concat(result,ignore_index=True)
    filename   = filename+f'_scid_{target_scid}.data'
    with open(filename, 'wb') as filehandle:
        pickle.dump([ctrfSpills,numsim,grid_nnosmoke,num_nets,size_nets,jattr,jA,jG,target_scid], filehandle)
    print(f'{pyfilename} saved {filename}')
예제 #5
0
파일: modelFit.py 프로젝트: antonbadev/NEkS
def main():
    ## Setup
    [
        systime0, pyfilename, pyfiledir, homedir, currentdir, scratchdir,
        hostname, sysname
    ] = setupdirs()
    filename = scratchdir + '/' + pyfilename + '-' + systime0.strftime(
        "%Y-%m-%d-%H:%M:%S")
    filename = scratchdir + '/' + pyfilename

    ## Load data.
    with open(currentdir + '/../Data/estimation_top8_100plus.data',
              'rb') as filehandle:
        [num_nets, size_nets, attr, data_a, data_g] = pickle.load(filehandle)
    #data_a=[jnet_data_a.reshape(-1,1) for jnet_data_a in data_a] #reshape as 2D from 1D if needed
    sampleinfo = [num_nets, size_nets]
    sample_k = [pmf_k(jn) for jn in size_nets]
    I9 = [(jattr.grade > 8.1).to_numpy(np.float) for jattr in attr]

    ## Posterior & parameter setup.
    theta_setup = pd.read_csv(currentdir +
                              '/../Estimation/priors/parameterSetup.csv')
    theta_draws = pd.read_csv(
        currentdir + '/../Estimation/estimates/1000draws_posterior.csv')
    numsim = 1000
    varnames = theta_draws.columns.values.tolist()
    numvars = len(varnames)

    print(80 * '*')
    print(
        f'start={systime0.strftime("%Y-%m-%d-%H:%M:%S")}\npyfilename={pyfilename}\ncurrentdir={currentdir}\nscratchdir={scratchdir}'
    )
    print(f'sim file name={filename}')
    print(80 * '-')
    print(f'hostname={hostname} (OS={sysname}):')
    print(os.environ.get('PYTHONPATH', '').split(os.pathsep))
    print(sys.path)
    print(80 * '*')

    ## Size MC for state (inner loop).
    vec_numsim_kCD = np.ones(num_nets, dtype=int) * 20000
    mcJump = 0.05  # Probability of large jumps for the MCMC.
    np.random.seed(2026642028)
    #np.random.RandomState(2026642028)
    simdata = []

    if DO_PARALLEL:
        pool = multiprocessing.Pool(processes=NUM_WORKERS)

    gen_sample_args = [
        num_nets, attr, I9, data_a, data_g, size_nets, sampleinfo, numsim,
        vec_numsim_kCD, theta_draws, theta_setup, sample_k, mcJump
    ]
    gen_sample_wrapper = functools.partial(gen_sample, *gen_sample_args)
    if not DO_PARALLEL:
        simdata = [gen_sample_wrapper(jjscid) for jjscid in range(num_nets)]
        #for jjscid in tqdm(range(num_net),'scid')]
    else:
        simdata = pool.map(gen_sample_wrapper, range(num_nets))

    with open(filename + '.data', 'wb') as filehandle:
        pickle.dump(simdata, filehandle)
    print(f'Saved simdata in {filename}.data')
    print(f'end={datetime.now().strftime("%Y-%m-%d-%H:%M:%S")}')