Esempio n. 1
0
def main():
    ## Setup
    [systime0,pyfilename,pyfiledir,homedir,currentdir,scratchdir,hostname,sysname]=setupdirs()
    filename=scratchdir+'/'+pyfilename+'-'+systime0.strftime("%Y-%m-%d-%H:%M:%S")
    filename=scratchdir+'/'+pyfilename
    print(80*'*')
    print(f'hostname={hostname} (OS={sysname})')
    print(f'start={systime0.strftime("%Y-%m-%d-%H:%M:%S")}\npyfilename={pyfilename}\ncurrentdir={currentdir}\nscratchdir={scratchdir}')
    print(f'filename ={filename}')
    print(80*'*')
     
    ## Load data.
    with open(currentdir+'/../Data/ctrfSchoolComposition.data', 'rb') as filehandle:
        [num_nets, size_nets, attr, data_a, data_g]=pickle.load(filehandle)
    sampleinfo=[num_nets, size_nets]
    sample_k = [pmf_k(jn) for jn in size_nets]
    I9 = [(jattr.grade>8.1).to_numpy(np.float) for jattr in attr]
    
    ## Parameter setup.
    theta_setup     = pd.read_csv(currentdir+'/../Estimation/priors/parameterSetup.csv')
    estimatesfile   ='/../Estimation/estimates/1000draws_posterior.csv'
    thetastar_draws = pd.read_csv(currentdir+estimatesfile)

    ## Size MC for state (inner loop).
    vec_numsim_kCD = np.ones(num_nets,dtype=int)*20000

    numsim = 1000
    mcJump = 0.05 # Probability of large jumps for the MCMC.

    np.random.seed(2026642028)
    
    sim_state_args = [num_nets, attr, I9, data_a, data_g, size_nets, vec_numsim_kCD,
                    thetastar_draws, theta_setup, sampleinfo, sample_k, mcJump, numsim, filename]
    sim_state_wrapper = functools.partial(sim_state,*sim_state_args)
    if DO_PARALLEL:
        pool = multiprocessing.Pool(processes=NUM_WORKERS)
    if not DO_PARALLEL:
        ctrfData = [sim_state_wrapper(jjscid)
                   for jjscid in range(num_nets)]
                  #for jjscid in tqdm(range(num_net),'scid')]
    else:
        ctrfData = pool.map(sim_state_wrapper,range(num_nets))
    

    #ctrfPriceSim
    #   [scid]
    #       [simdata,simdataFixedNet,simdataPEoff,simdataNoPE]
    #           [sim_a, sim_g, jprice, jscid]

    ctrfData=pd.concat(ctrfData,ignore_index=True) #all scids in one df
    with open(filename+'.data', 'wb') as filehandle:
        pickle.dump([ctrfData,numsim,num_nets, size_nets, attr, data_a, data_g], filehandle)
    print(f'{pyfilename} saved {filename}')
Esempio n. 2
0
def main():
    [
        systime0, pyfilename, pyfiledir, homedir, currentdir, scratchdir,
        hostname, sysname
    ] = setupdirs()

    posteriorsdir = scratchdir
    estimatesdir = currentdir + 'estimates/'

    posteriors = [
        'posterior', 'posteriorFixedNet', 'posteriorNoNetData',
        'posteriorNoPE', 'posteriorNoTri', 'posteriorNoCost'
    ]

    ndraws = 1000
    np.random.seed(2026642028)

    for jp, posterior in enumerate(posteriors):
        try:
            theta_post = pd.read_csv(posteriorsdir + posterior + '.csv')
        except:
            print(f'Not found {posteriorsdir+posterior}')
            continue
        theta_draws = posteriorSample(theta_post, 0.2, ndraws)

        estimates = f'{ndraws}' + 'draws_' + posterior.split('-')[0]
        print(f'posterior={posterior} --> estimates={estimates}')

        theta_draws.to_csv(estimatesdir + estimates + '.csv',
                           encoding='utf-8',
                           index=False)

        if jp == 0:
            #theta_setup         = pd.read_csv(currentdir+'priors/parameterSetup.csv')
            theta_setupFixedNet = pd.read_csv(
                currentdir + 'priors/parameterSetupFixedNet.csv')
            varlist = theta_setupFixedNet.Label[theta_setupFixedNet.FlagInclude
                                                == 1].tolist()
            theta_draws[varlist].to_csv(estimatesdir + estimates +
                                        'RestrictNet.csv',
                                        encoding='utf-8',
                                        index=False)
Esempio n. 3
0
def main():
    ## Setup
    [systime0,pyfilename,pyfiledir,homedir,currentdir,scratchdir,hostname,sysname]=setupdirs()
    posteriorfile=scratchdir+'/'+pyfilename+'-'+systime0.strftime("%Y-%m-%d-%H:%M:%S")
    posteriorfile=scratchdir+'/'+pyfilename
    kCDstatefile=scratchdir+'/kCD-states/'+pyfilename+'-'+systime0.strftime("%Y-%m-%d-%H:%M:%S")
    print(80*'*')
    print(f'hostname={hostname} (OS={sysname})')
    print(f'start={systime0.strftime("%Y-%m-%d-%H:%M:%S")}\npyfilename={pyfilename}\ncurrentdir={currentdir}\nscratchdir={scratchdir}')
    print(f'posterior ={posteriorfile}')
    print(80*'*')
     
    ## Load data.
    datadir='../Data/'
    with open(datadir+'/estimation_top8_100plus.data', 'rb') as filehandle:
        [num_nets, size_nets, attr, data_a, data_g]=pickle.load(filehandle)
    #data_a=[jnet_data_a.reshape(-1,1) for jnet_data_a in data_a] #reshape as 2D from 1D if needed
    sampleinfo=[num_nets, size_nets]
    sample_k = [pmf_k(jn) for jn in size_nets]
    I9 = [(jattr.grade>8.1).to_numpy(np.float) for jattr in attr]

    ## Parameter setup.
    theta_setup = pd.read_csv('priors/income-parameterSetup.csv')
    theta0      = theta_setup.PriorMean[theta_setup.FlagInclude==1].to_numpy()
    nparams     = len(theta0)
    theta_labels= list(theta_setup.Label[theta_setup.FlagInclude==1])
    
    
    ## Prior.
    mu_prior = theta0
    s1_prior = theta_setup.PriorSD[theta_setup.FlagInclude==1].to_numpy()
    s2_prior = np.diag(s1_prior*s1_prior)
    inv_s2_prior = np.linalg.inv(s2_prior)

    ## Proposal -- random walk.
    mu_prop = np.zeros([nparams,1])
    s1_prop = theta_setup.PropSD[theta_setup.FlagInclude==1].to_numpy()
    s2_prop = np.diag(s1_prop*s1_prop)

    ## Size MC for state (inner loop).
    vec_numsim_kCD = np.clip(np.asarray(size_nets, dtype=int)*150,a_min=None,a_max=15000)

    numsim_theta = 100000
    mcJump = 0.05 # 0.02 Probability of large jumps for the MCMC.

    ## Posterior sample.
    theta_post = pd.DataFrame(data=np.zeros([numsim_theta+1,nparams],dtype=float,order='C'),columns=theta_labels)
    theta_post.iloc[0]= theta0

    ## Plots and logs.
    savefreq  = 5000
    printfreq = 1
    
    np.random.seed(2026642028)
    #np.random.RandomState(2026642028)

    if DO_PARALLEL:
        pool = multiprocessing.Pool(processes=NUM_WORKERS)

    for js in range(numsim_theta):
#        if js%printfreq==0:
#            print(f'Simulation {js+1} {(datetime.now()).strftime("%Y-%m-%d-%H:%M:%S")}')
        theta1 = theta0+np.random.multivariate_normal(mu_prop[:,0],s2_prop)
        
        run_sim_args = [num_nets, attr, I9, data_a, data_g, 
                        size_nets, vec_numsim_kCD, theta0, theta1, 
                        theta_setup, sampleinfo, sample_k, mcJump, kCDstatefile, js]
        run_sim_wrapper = functools.partial(run_sim,*run_sim_args)
        if not DO_PARALLEL:
            result = [run_sim_wrapper(jjscid)
                       for jjscid in range(num_nets)]
                      #for jjscid in tqdm(range(num_net),'scid')]
        else:
            result = pool.map(run_sim_wrapper,range(num_nets))
        
        lnaccept_jnet = result

        lnaccept1 = np.sum(lnaccept_jnet)
        lnaccept2 = -0.5*np.matmul((theta1-mu_prior).reshape(1,nparams),
                                   np.matmul(inv_s2_prior,
                                             (theta1-mu_prior).reshape(nparams,1)))
        lnaccept2 += 0.5*np.matmul((theta0-mu_prior).reshape(1,nparams),
                                   np.matmul(inv_s2_prior,
                                             (theta0-mu_prior).reshape(nparams,1)))
        lnaccept2 = lnaccept2[0,0]
        lnaccept_th = lnaccept1+lnaccept2
        if np.random.uniform(0,1)<lnaccept_th or lnaccept_th>0:
            theta0 = theta1.copy()
        theta_post.iloc[js+1]= theta0
        if (js+1)%savefreq==0:
            print(f'Sim = {js+1:4d}; Saving {posteriorfile}')
            #np.savetxt(posteriorfile+'.csv', theta_post, delimiter=',', fmt='%f',header=theta_header, comments="")
            theta_post.iloc[0:js+2].to_csv(posteriorfile+'.csv', encoding='utf-8', index=False)
Esempio n. 4
0
def main():

    [
        systime0, pyfilename, pyfiledir, homedir, currentdir, scratchdir,
        hostname, sysname
    ] = setupdirs()
    ctrfIncomeDir = scratchdir
    ctrfIncomeData = '/ctrfIncome_test_10.data'  #3 SCENARIOS
    ctrfIncomeData = '/ctrfIncome.data'  #3 SCENARIOS
    with open(ctrfIncomeDir + ctrfIncomeData, 'rb') as filehandle:
        [
            ctrfIncomeData, numsim, gridDeltaIncome, num_nets, size_nets, attr,
            data_a, data_g
        ] = pickle.load(filehandle)
    #ctrfPriceSim
    #   [scid]
    #       [simdata,simdataFixedNet,simdataPEoff,simdataNoPE]
    #           [sim_a, sim_g, jprice, jscid]

    texdir = currentdir + '/../TeX/'
    texfile = 'table_ctrfIncome.tex'
    texfile2 = 'table_ctrfIncome_tests.tex'

    ctrfPriceData = ctrfIncomeData
    gridDeltaPrice = gridDeltaIncome

    ctrfPrice = pd.concat(ctrfPriceData,
                          ignore_index=True)  #all scids in one df
    ctrfPrice0 = ctrfPrice[(ctrfPrice.sim == 0)].copy(
    )  #sim=0 is data, for all scid, for all ctrf scenarios
    ctrfPrice = ctrfPrice[~(ctrfPrice.sim == 0)].copy()
    ctrfPrice.sim = ctrfPrice.sim - 1

    #weighted & unweighted mean
    meanCtrf = ctrfPrice.groupby(['dprice']).mean().reset_index()
    ctrfPrice['mean_weights'] = float(num_nets) * ctrfPrice.netsize / float(
        sum(size_nets))
    WctrfPrice = ctrfPrice.copy()
    for j, jvar in enumerate(list(WctrfPrice.columns)[3:-1]):
        print(jvar)
        WctrfPrice[jvar] = WctrfPrice[jvar] * WctrfPrice['mean_weights']
    WmeanCtrf = WctrfPrice.groupby(['dprice']).mean().reset_index()  #).copy()
    ctrfPolicy = WmeanCtrf - WmeanCtrf.iloc[0].values.squeeze()

    table_ctrfPrice = ''
    for ii in range(len(gridDeltaPrice) - 1):
        dp_ii = int(100 - 100 * gridDeltaPrice[ii + 1])
        texline = f'{dp_ii:4.0f}'
        for j, jscenario in enumerate(
            ['prev-Model', 'prev-FixedNet', 'prev-noNetData']):
            deltaPrev = -100 * ctrfPolicy.loc[ii + 1, jscenario]
            texline = texline + f' & {deltaPrev:4.1f}'
        table_ctrfPrice = table_ctrfPrice + texline + r' \\' + ' \n'

    texsignature = f'% tex created by {pyfilename}.py \n'
    texheader = r'''
    \begin{table}[!t]
    \caption{Changes in the price of tobacco}
    \label{table:ctrf-price}
    \begin{center}
    \begin{tabular}{cccccc}
    Price increase & Model  &  Exog net & No net data\\ \hline \hline
    '''

    texfooter = r'''
    \hline
    \end{tabular}
    \end{center}
    \fignotetitle{Note:} The first column shows proposed increases in tobacco prices in cents. 
    The average price of a pack of cigarettes is \$1.67 so that 20 cents is approximately 10\%. 
    The second through fourth columns show the predicted increase in the overall smoking (baseline 41\%) in ppt 
    from the full model, from the model when the friendship network is fixed, and 
    from the model when no social network data is available. 
    In this last scenario the local peer effect term is absent so that $\phi$ is forced to equal zero in the estimation. 
    \end{table}
    '''

    texcontent = texsignature + texheader + table_ctrfPrice + texfooter
    with open(texdir + texfile, 'w') as f:
        f.write(texcontent)

    # -B- Compare the distribution of overall smoking under different price scenarios
    graphdir = texdir
    graphfile = 'fig_ctrfPrice_compareDistSmoking.pdf'
    models_labels = [
        'Endog net dP=40', 'Fixed net dP=40', 'Endog net dP=80',
        'Fixed net dP=80', 'Endog net dP=120', 'Fixed net dP=120'
    ]
    models_labels = [
        'dP=40', 'Fixed net dP=40', 'dP=80', 'Fixed net dP=80', 'dP=120',
        'Fixed net dP=120', 'dP=160', 'Fixed net dP=160'
    ]

    prev_range = np.linspace(
        0, 30, 100)[:, np.newaxis]  #adds axis to 1d to make it 2d array

    simCtrfPrice = (ctrfPrice.loc[:, :'prev-noNetData']).copy()
    simCtrfPrice['weights'] = simCtrfPrice['netsize'] / allnetssize
    for j, stat in enumerate(simCtrfPrice.columns):
        if j > 3 and j < 7:
            simCtrfPrice[stat] = simCtrfPrice[stat] * simCtrfPrice['weights']
    drawsCtrfPrice = simCtrfPrice.groupby(['sim',
                                           'dprice']).sum().reset_index()
    drawsCtrfPrice_wide = -100 * drawsCtrfPrice.pivot(
        index='sim',
        columns='dprice',
        values=['prev-Model', 'prev-FixedNet', 'prev-noNetData'])

    fig, axs = plt.subplots()
    linestyles1 = ['g-', 'b-', 'y-', 'r-']
    linestyles2 = ['g:', 'b:', 'y:', 'r:']
    linewidths = [2, 2, 2, 2, 2]

    for j, jj in enumerate([3, 5, 7]):
        print(j, jj)
        x = drawsCtrfPrice_wide.iloc[
            1:, jj][:, np.newaxis] - drawsCtrfPrice_wide.iloc[
                1:, 1][:, np.newaxis]  #.to_numpy()
        xx = drawsCtrfPrice_wide.iloc[
            1:, jj + 10][:, np.newaxis] - drawsCtrfPrice_wide.iloc[
                1:, 1 + 10][:, np.newaxis]  #.to_numpy()

        kde = KernelDensity(kernel='gaussian', bandwidth=1.5).fit(x)
        log_dens = kde.score_samples(prev_range)
        axs.plot(prev_range,
                 np.exp(log_dens),
                 linestyles1[j],
                 linewidth=linewidths[j],
                 label="{0}".format(models_labels[jj - 3]))
        kde = KernelDensity(kernel='gaussian', bandwidth=1.5).fit(xx)
        log_dens = kde.score_samples(prev_range)
        #axs.plot(prev_range, np.exp(log_dens), linestyles[jj-2],linewidth=linewidths[j], label="{0}".format(models_labels[jj-2]))
        axs.plot(prev_range,
                 np.exp(log_dens),
                 linestyles2[j],
                 linewidth=linewidths[j])

    axs.legend(loc='upper right')
    #ax.plot(X[:, 0], -0.005 - 0.01 * np.random.random(X.shape[0]), '+k')
    axs.set_xlim(np.min(prev_range), np.max(prev_range))
    #axs.tick_params(axis='y', bottom='off', top='off', labelbottom='off', right='off', left='off', labelleft='off')
    #axs.axes.get_yaxis().set_visible(False)
    xmarks = [
        i for i in np.linspace(np.min(prev_range), np.max(prev_range), 5)
    ]
    plt.xticks(xmarks, rotation=45)
    axs.spines['right'].set_visible(False)
    axs.spines['top'].set_visible(False)
    axs.spines['left'].set_visible(False)
    axs.set_yticklabels([])
    axs.set_yticks([])

    plt.savefig(graphdir + graphfile, dpi=300)

    # -C- Compare policy effects for different levels of price change
    models_labels = ['dP=20', 'dP=40', 'dP=60', 'dP=80', 'dP=100', 'dP=120']
    models_labels = ['20', '40', '60', '80', '100', '120']
    nmodels = len(models_labels)
    p1 = np.zeros([nmodels, nmodels])
    p2 = np.zeros([nmodels, nmodels])
    for j in range(nmodels):
        for jj in range(nmodels):
            rvs1 = (drawsCtrfPrice_wide.iloc[1:, j + 2] -
                    drawsCtrfPrice_wide.iloc[1:, 1]).to_numpy()
            rvs2 = (drawsCtrfPrice_wide.iloc[1:, jj + 2] -
                    drawsCtrfPrice_wide.iloc[1:, 1]).to_numpy()

            #rvs1=posteriorsPrice.iloc[:,j].to_numpy()
            #rvs2=posteriorsPrice.iloc[:,jj].to_numpy()
            [t, p1[j, jj]] = stats.ttest_ind(rvs1, rvs2, equal_var=False)

            #rvs1=posteriorsPrice.iloc[:,j].to_numpy()
            #rvs2=posteriorsPrice.iloc[:,jj].to_numpy()
            [t, p2[j, jj]] = stats.ks_2samp(rvs1, rvs2)

    texsignature = f'% tex created by {pyfilename}.py \n'
    texheader = r'''
    \begin{table}[!h]
    \caption{Pairwise tests of the policy effects for different levels of price change}
    \label{table:ctrf-posteriorPrice-tests}
    \begin{center}
    \begin{tabular}{lccccccc}
    Policy & \multirow{2}{*}{20} & \multirow{2}{*}{40}&\multirow{2}{*}{60}&\multirow{2}{*}{80}&\multirow{2}{*}{100}&\multirow{2}{*}{120} \\
    level (dP)&  \\ \hline \hline
    '''
    #grid_policy=range(0,60,10)
    table_ctrf = ''
    for r, model in enumerate(models_labels):
        texline = f'{model:12}'
        for c in range(r + 1):
            texline = texline + f' & {p1[r,c]:4.2f} ({p2[r,c]:4.2f})'
        table_ctrf = table_ctrf + texline + r' \\' + ' \n'

    texfooter = r'''
    \hline
    \end{tabular}
    \end{center}
    \fignotetitle{Note:} \fignotetext{
    Each cell compares the policy effects for a pair of price changes.
    The two p-values are from testing a hypothesis of equal means and from testing a hypothesis of equal distributions
    (two-sample Kolmogorov-Smirnov test).}
    \end{table}
    '''

    texcontent = texsignature + texheader + table_ctrf + texfooter
    with open(texdir + texfile2, 'w') as f:
        f.write(texcontent)
Esempio n. 5
0
def main():
    ## Setup
    [
        systime0, pyfilename, pyfiledir, homedir, currentdir, scratchdir,
        hostname, sysname
    ] = setupdirs()
    filename = scratchdir + '/' + pyfilename
    print(80 * '*')
    print(f'hostname={hostname} (OS={sysname})')
    print(
        f'start={systime0.strftime("%Y-%m-%d-%H:%M:%S")}\npyfilename={pyfilename}\ncurrentdir={currentdir}\nscratchdir={scratchdir}'
    )
    print(f'filename ={filename}')
    print(80 * '*')

    ## Load data.
    with open(currentdir + '/../Data/estimation_top8_100plus.data',
              'rb') as filehandle:
        [num_nets, size_nets, attr, data_a, data_g] = pickle.load(filehandle)
    sampleinfo = [num_nets, size_nets]
    sample_k = [pmf_k(jn) for jn in size_nets]
    I9 = [(jattr.grade > 8.1).to_numpy(np.float) for jattr in attr]

    ## Parameter setup.
    # Model
    # Restricted net (true coeff but agents restrected from adjusting, FixedNet priors)
    # Fixed net
    # No Net Data (no local PE)
    priors_dir = currentdir + '/../Estimation/priors/'
    estimates_dir = currentdir + '/../Estimation/estimates/'
    setup_files = [
        'parameterSetup', 'parameterSetupFixedNet', 'parameterSetupFixedNet',
        'parameterSetupNoNetData'
    ]
    estimates_files = [
        '1000draws_posterior', '1000draws_posteriorRestrictNet',
        '1000draws_posteriorFixedNet', '1000draws_posteriorNoNetData'
    ]
    theta_setups = [
        pd.read_csv(priors_dir + jfile + '.csv') for jfile in setup_files
    ]
    thetastars = [
        pd.read_csv(estimates_dir + jfile + '.csv')
        for jfile in estimates_files
    ]

    ## Size MC for state (inner loop).
    vec_numsim_kCD = np.ones(num_nets, dtype=int) * 20000

    numsim = 1000
    mcJump = 0.05  # Probability of large jumps for the MCMC.
    np.random.seed(2026642028)

    gridDeltaPrice = [float(x * 220 / 11) for x in range(0, 9)]
    nctrf = len(gridDeltaPrice)
    nscid = num_nets
    cpuinfo = [NUM_WORKERS, nscid, nctrf]

    sim_args = [
        num_nets, attr, I9, data_a, data_g, size_nets, vec_numsim_kCD,
        thetastars, theta_setups, sampleinfo, sample_k, gridDeltaPrice, mcJump,
        numsim, filename, cpuinfo
    ]
    sim_wrapper = functools.partial(sim_ctrfPrice_jscid_jctrf, *sim_args)
    if DO_PARALLEL:
        pool = multiprocessing.Pool(processes=NUM_WORKERS)
    if not DO_PARALLEL:
        ctrfPriceData = [sim_wrapper(jcpu) for jcpu in range(num_nets * nctrf)]
    else:
        ctrfPriceData = pool.map(sim_wrapper, range(num_nets * nctrf))
    ctrfPriceData = pd.concat(ctrfPriceData,
                              ignore_index=True)  #all scids in one df

    with open(filename + '.data', 'wb') as filehandle:
        pickle.dump([
            ctrfPriceData, numsim, gridDeltaPrice, num_nets, size_nets, attr,
            data_a, data_g
        ], filehandle)
    print(
        f'{pyfilename} saved {filename} at {(datetime.now()).strftime("%Y-%m-%d-%H:%M:%S")}'
    )
Esempio n. 6
0
def main():
    [systime0,pyfilename,pyfiledir,homedir,currentdir,scratchdir,hostname,sysname]=setupdirs()
    estimatesdir = scratchdir+'/../../Estimation/'
    texdir   = currentdir + '/../../TeX/'
    texfile  = 'table_posterior_price_tests.tex'
    graphdir = texdir
    graphfile = 'fig_posterior_price.pdf'
    
    paramSetups=[
        'parameterSetup',
        'parameterSetupFixedNet',
        'parameterSetupNoNetData',
        'parameterSetupNoPE',
        'parameterSetupNoTri',
        'parameterSetupNoCost'
        ]

    posteriors=[
        'posterior',
        'posteriorFixedNet',
        'posteriorNoNetData',
        'posteriorNoPE',
        'posteriorNoTri',
        'posteriorNoCost'
        ]
    
    models_labels   =[ 'Model','Fixed net','No net data', 'No PE', 'No tri', 'No cost']
    price_range     = np.linspace(-0.015, 0.005, 100)[:, np.newaxis]#adds axis to 1d to make it 2d array
    posteriorsPrice = pd.DataFrame(columns=models_labels)
    
    fig, axs = plt.subplots()
    linestyles = ['r-', 'g--', 'b-.', '*k:', ':',':']
    linewidths = [2,2,2,2,2,2]
    
    for jp, posterior in enumerate(posteriors):
    
        theta_post=pd.read_csv(estimatesdir+posterior+'.csv')
        numobs=np.sum((theta_post.iloc[:,0]>0).to_numpy())
        print(numobs)
        burnin=np.floor(0.2*numobs).astype(int)
        price = theta_post.loc[burnin:numobs,'vPrice'][:, np.newaxis].copy()
        posteriorsPrice[models_labels[jp]]=price[:,0]
        kde = KernelDensity(kernel='gaussian', bandwidth=0.0003).fit(price)
        log_dens = kde.score_samples(price_range)
        axs.plot(price_range, np.exp(log_dens), linestyles[jp],linewidth=linewidths[jp], label="{0}".format(models_labels[jp]))
    
    
    axs.legend(loc='upper left')
    axs.set_xlim(np.min(price_range), np.max(price_range))
    xmarks=[i for i in np.linspace(np.min(price_range),np.max(price_range),5)]
    plt.xticks(xmarks,rotation=45)
    axs.spines['right'].set_visible(False)
    axs.spines['top'].set_visible(False)
    axs.spines['left'].set_visible(False)
    axs.set_yticklabels([])
    axs.set_yticks([])
    
    plt.savefig(graphdir+graphfile, dpi=300)
    
    
    # -B- Test equal distribution/equal mean ---------------------------------
    nmodels=len(models_labels)
    p1=np.zeros([nmodels,nmodels])
    p2=np.zeros([nmodels,nmodels])
    for j in range(nmodels):
        for jj in range(nmodels):
            rvs1=posteriorsPrice.iloc[:,j].to_numpy()
            rvs2=posteriorsPrice.iloc[:,jj].to_numpy()
            [t, p1[j,jj]]=stats.ttest_ind(rvs1,rvs2, equal_var = False)
    
            #rvs1=posteriorsPrice.iloc[:,j].to_numpy()
            #rvs2=posteriorsPrice.iloc[:,jj].to_numpy()
            [t, p2[j,jj]]=stats.ks_2samp(rvs1,rvs2)


    texsignature=f'% tex created by {pyfilename}.py \n'
    texheader = r'''
    \begin{table}[!h]
    \caption{Pairwise tests of the posteriors for the price parameter under different estimation scenarios}
    \label{table:ctrf-posteriorPrice-tests}
    \begin{center}
    \begin{tabular}{lccccccc}
    Estimation & \multirow{2}{*}{Model} & \multirow{2}{*}{Fixed net}&\multirow{2}{*}{No net data}&\multirow{2}{*}{No PE}&\multirow{2}{*}{No tri}&\multirow{2}{*}{No cost} \\
    scenarios &  \\ \hline \hline
    '''
    #grid_policy=range(0,60,10)
    table_ctrf=''
    for r,model in enumerate(models_labels):
        texline= f'{model:12}'
        for c in range(r+1):
            texline = texline + f' & {p1[r,c]:4.2f} ({p2[r,c]:4.2f})'        
        table_ctrf = table_ctrf + texline + r' \\' + ' \n'
        
    texfooter = r'''
    \hline
    \end{tabular}
    \end{center}
    \fignotetitle{Note:} \fignotetext{
    Each cell compares the posterior distribution of the parameter price between a pair of estimation scenarios.
    The two p-values are from testing a hypothesis of equal means and from testing a hypothesis of equal distributions
    (two-sample Kolmogorov-Smirnov test).}
    \end{table}
    '''
    
    texcontent = texsignature + texheader + table_ctrf + texfooter
    with open(texdir+texfile,'w') as f:
         f.write(texcontent)
Esempio n. 7
0
def main():
    ## Setup
    [systime0,pyfilename,pyfiledir,homedir,currentdir,scratchdir,hostname,sysname]=setupdirs()
    filename=scratchdir+'/'+pyfilename+'-'+systime0.strftime("%Y-%m-%d-%H:%M:%S")
    filename=scratchdir+'/'+pyfilename
    print(80*'*')
    print(f'hostname={hostname} (OS={sysname})')
    print(f'start={systime0.strftime("%Y-%m-%d-%H:%M:%S")}\npyfilename={pyfilename}\ncurrentdir={currentdir}\nscratchdir={scratchdir}')
    print(f'filename ={filename}')
    print(80*'*')
     
    ## Load data.
    with open(currentdir+'/../Data/estimation_top8_100plus.data', 'rb') as filehandle:
        [num_nets, size_nets, attr, data_a, data_g]=pickle.load(filehandle)
    sampleinfo=[num_nets, size_nets]
    sample_k = [pmf_k(jn) for jn in size_nets]
    I9 = [(jattr.grade>8.1).to_numpy(np.float) for jattr in attr]
    
    ## Parameter setup.
    theta_setup     = pd.read_csv(currentdir+'/../Estimation/priors/parameterSetup.csv')
    estimatesfile   ='/../Estimation/estimates/1000draws_posterior.csv'
    thetastar_draws = pd.read_csv(currentdir+estimatesfile)
    
    #Fixed net
    theta_setupFixedNet = pd.read_csv(currentdir+'/../Estimation/priors/parameterSetupFixedNet.csv')
    estimatesfile='/../Estimation/estimates/1000draws_posteriorFixedNet.csv'
    thetastarFixedNet_draws = pd.read_csv(currentdir+estimatesfile)
    
    #No Net Data (no local PE)
    theta_setupNoNetData = pd.read_csv(currentdir+'/../Estimation/priors/parameterSetupNoNetData.csv')
    estimatesfile='/../Estimation/estimates/1000draws_posteriorNoNetData.csv'
    thetastarNoNetData_draws = pd.read_csv(currentdir+estimatesfile)
    
    nctrf=3 # Model, Fixed net, No net data

    ## Size MC for state (inner loop).
    vec_numsim_kCD = np.ones(num_nets,dtype=int)*20000
    
    numsim = 1000
    mcJump = 0.05 # Probability of large jumps for the MCMC.

    grid_nnosmoke=[0, 0.03, 0.05, 0.10, 0.20, 0.30, 0.50]
    lengrid=len(grid_nnosmoke)
    target_scid=1 #1,7 medium size/medium-high smoking for representative experiements
    # netid	prev
    # 1 0.44654088050314467
    # 7	0.4397590361445783
    
    # Prep synthetic sample
    num_nets  = lengrid
    n         = size_nets[target_scid]
    size_nets = [n]*num_nets
    sampleinfo= [num_nets, size_nets]
    sample_k  = [pmf_k(jn) for jn in size_nets]
    jattr     = attr[target_scid]
    jattr.index=jattr.id-1 #to be able to subset on ids
    I9 = [(jattr.grade>8.1).to_numpy(np.float)]*num_nets
    jA = data_a[target_scid].copy()
    jG = data_g[target_scid].copy()
    vec_numsim_kCD = np.ones(num_nets,dtype=int)*20000
    print(f'Target scid = {target_scid:3.0f} ({n})')


    sim_state_args = [num_nets, grid_nnosmoke, jattr, I9, jA, jG, size_nets, vec_numsim_kCD,
                      thetastar_draws, thetastarFixedNet_draws, thetastarNoNetData_draws,
                      theta_setup, theta_setupFixedNet, theta_setupNoNetData, sampleinfo, sample_k,
                      mcJump, numsim]
    sim_state_wrapper = functools.partial(sim_state_spills,*sim_state_args)
    if DO_PARALLEL:
        pool = multiprocessing.Pool(processes=NUM_WORKERS)
    if not DO_PARALLEL:
        result = [sim_state_wrapper(jcpu) for jcpu in range(num_nets*nctrf)]
                  #for jjscid in tqdm(range(num_net),'scid')]
    else:
        result = pool.map(sim_state_wrapper,range(num_nets*nctrf))

    ctrfSpills = pd.concat(result,ignore_index=True)
    filename   = filename+f'_scid_{target_scid}.data'
    with open(filename, 'wb') as filehandle:
        pickle.dump([ctrfSpills,numsim,grid_nnosmoke,num_nets,size_nets,jattr,jA,jG,target_scid], filehandle)
    print(f'{pyfilename} saved {filename}')
Esempio n. 8
0
def main():

    [systime0,pyfilename,pyfiledir,homedir,currentdir,scratchdir,hostname,sysname] = setupdirs()    
    ## Load data.
    with open(currentdir+'/../../Data/estimation_top8_100plus.data', 'rb') as filehandle:
        [num_nets, size_nets, attr, data_a, data_g]=pickle.load(filehandle)
        
    attr2=pd.read_csv(currentdir+'/../../Data/attr2.csv') #extended attr
    # 100 plus
    estimation_sample = [7,8,9,12,13,14,15,16]
    attr2 = attr2[attr2['netid'].isin(estimation_sample)]

    texdir   = currentdir + '/../../TeX/'
    texfile = 'table_sampleStats.tex'

    netid=np.zeros(num_nets)
    prev=np.zeros(num_nets)
    density=np.zeros(num_nets)
    avgDeg=np.zeros(num_nets)
    minDeg=np.zeros(num_nets)
    maxDeg=np.zeros(num_nets)
    AGA=np.zeros(num_nets)
    IAGIA=np.zeros(num_nets)
    tri=np.zeros(num_nets)
    twolinksonly=np.zeros(num_nets)
    twolinksonly2=np.zeros(num_nets)
    HI=np.zeros(num_nets)
    CHI=np.zeros(num_nets)
    FSI=np.zeros(num_nets)
    nn=np.zeros(num_nets)
    ns=np.zeros(num_nets)
    sn=np.zeros(num_nets)
    ss=np.zeros(num_nets)
    for jnet,jattr in enumerate(attr):
        n=size_nets[jnet]
        netid[jnet]=jnet+1
        A=data_a[jnet]
        G=data_g[jnet]
        [prev[jnet],
         density[jnet],
         avgDeg[jnet],
         minDeg[jnet],
         maxDeg[jnet],
         AGA[jnet],
         IAGIA[jnet],
         tri[jnet],
         twolinksonly[jnet],
         twolinksonly2[jnet],
         stateStatsLabels]=stateStats2(G,A,n)
        [HI[jnet],CHI[jnet],FSI[jnet]]=homophily(G,A,n,True)
        [nn[jnet],ns[jnet],sn[jnet],ss[jnet]] = list((mixingMat(G,A.astype(int),n,2).ravel('C')).astype(float))

    data_stats=pd.DataFrame(data=np.column_stack([netid,prev,density,avgDeg,minDeg,maxDeg,AGA,IAGIA,np.multiply(tri,size_nets),twolinksonly,twolinksonly2,HI,CHI,FSI,nn,ns,sn,ss]),
                    dtype=float,
                    columns=['netid','prev','density','avgDeg','minDeg','maxDeg','AGA','IAGIA','tri','twolinks','twolinks2','HI','CHI','FSI','nn','ns','sn','ss'])
    allattr=attr2 #pd.concat(attr)
    
    allattr['male']=(allattr['sex']==1).to_numpy(dtype=float)
    allattr['white']=(allattr['race']==1).to_numpy(dtype=float)
    allattr['as-hi-ot']=(allattr['race']==3).to_numpy(dtype=float)
    
    summary_attr=allattr.groupby('netid').agg(['count','mean'])
    
    var=summary_attr.loc[:,[(      'id', 'count')]].to_numpy()
    texline=f'Students    & {np.sum(var):12.0f} &  {np.min(var):12.0f} & {np.max(var):12.0f} \\\ \n'
    varlabels=['Smoking','Male','Whites','Blacks','As-Hi-Ot','Price','Avg income','Mom edu','HH smokes','Avg friends']
    for j,jvar in enumerate(['tobacco','male','white','black','as-hi-ot','price_level','income_level','mom_ed','hhsmokes','deg']):
        jscidvar=summary_attr.loc[:,[(      jvar, 'mean')]].to_numpy()
        texline+=f'{varlabels[j]:12}  & {np.mean(allattr[jvar]):12.2f} &  {np.min(jscidvar):12.2f} & {np.max(jscidvar):12.2f} \\\ \n'
    
    texsignature=f'% tex created by {pyfilename}.py \n'
    texheader = r'''
\begin{table}[t]
\label{table:descriptive_stats}
\begin{center}
\caption{Descriptive Statistics for the estimation sample}
\begin{tabular}{lccc}
\hline \hline
              & Overall & Min      & Max   \\ \hline
'''
    texfooter = r'''
\hline
\end{tabular}
\label{table:descriptive_stats}
\end{center}
\fignotetitle{Note:} \fignotetext{The final sample contains students from 8 high schools. Min and max are reported at a school level.}
\end{table}
'''
    
    texcontent = texsignature + texheader + texline + texfooter
    with open(texdir+texfile,'w') as f:
        f.write(texcontent)
Esempio n. 9
0
Note output in texdir
'''

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from libsetups import state2pickle
from libsetups import setupdirs
from libposteriors import posteriorStats
from libposteriors import posterior_significance

[
    systime0, pyfilename, pyfiledir, homedir, currentdir, scratchdir, hostname,
    sysname
] = setupdirs()
estimatesdir = scratchdir + '/../../Estimation/'
priorsdir = currentdir + '/../../Estimation/priors/'
texdir = currentdir + '/../../TeX/'
texfile = 'table_estimates.tex'

paramSetups = [
    'parameterSetup', 'parameterSetupFixedNet', 'parameterSetupNoNetData',
    'parameterSetupNoPE', 'parameterSetupNoTri', 'parameterSetupNoCost'
]
posteriors = [
    'posterior', 'posteriorFixedNet', 'posteriorNoNetData', 'posteriorNoPE',
    'posteriorNoTri', 'posteriorNoCost'
]

models_labels = [
Esempio n. 10
0
def main():
    ## Setup
    [
        systime0, pyfilename, pyfiledir, homedir, currentdir, scratchdir,
        hostname, sysname
    ] = setupdirs()
    filename = scratchdir + '/' + pyfilename + '-' + systime0.strftime(
        "%Y-%m-%d-%H:%M:%S")
    filename = scratchdir + '/' + pyfilename

    ## Load data.
    with open(currentdir + '/../Data/estimation_top8_100plus.data',
              'rb') as filehandle:
        [num_nets, size_nets, attr, data_a, data_g] = pickle.load(filehandle)
    #data_a=[jnet_data_a.reshape(-1,1) for jnet_data_a in data_a] #reshape as 2D from 1D if needed
    sampleinfo = [num_nets, size_nets]
    sample_k = [pmf_k(jn) for jn in size_nets]
    I9 = [(jattr.grade > 8.1).to_numpy(np.float) for jattr in attr]

    ## Posterior & parameter setup.
    theta_setup = pd.read_csv(currentdir +
                              '/../Estimation/priors/parameterSetup.csv')
    theta_draws = pd.read_csv(
        currentdir + '/../Estimation/estimates/1000draws_posterior.csv')
    numsim = 1000
    varnames = theta_draws.columns.values.tolist()
    numvars = len(varnames)

    print(80 * '*')
    print(
        f'start={systime0.strftime("%Y-%m-%d-%H:%M:%S")}\npyfilename={pyfilename}\ncurrentdir={currentdir}\nscratchdir={scratchdir}'
    )
    print(f'sim file name={filename}')
    print(80 * '-')
    print(f'hostname={hostname} (OS={sysname}):')
    print(os.environ.get('PYTHONPATH', '').split(os.pathsep))
    print(sys.path)
    print(80 * '*')

    ## Size MC for state (inner loop).
    vec_numsim_kCD = np.ones(num_nets, dtype=int) * 20000
    mcJump = 0.05  # Probability of large jumps for the MCMC.
    np.random.seed(2026642028)
    #np.random.RandomState(2026642028)
    simdata = []

    if DO_PARALLEL:
        pool = multiprocessing.Pool(processes=NUM_WORKERS)

    gen_sample_args = [
        num_nets, attr, I9, data_a, data_g, size_nets, sampleinfo, numsim,
        vec_numsim_kCD, theta_draws, theta_setup, sample_k, mcJump
    ]
    gen_sample_wrapper = functools.partial(gen_sample, *gen_sample_args)
    if not DO_PARALLEL:
        simdata = [gen_sample_wrapper(jjscid) for jjscid in range(num_nets)]
        #for jjscid in tqdm(range(num_net),'scid')]
    else:
        simdata = pool.map(gen_sample_wrapper, range(num_nets))

    with open(filename + '.data', 'wb') as filehandle:
        pickle.dump(simdata, filehandle)
    print(f'Saved simdata in {filename}.data')
    print(f'end={datetime.now().strftime("%Y-%m-%d-%H:%M:%S")}')
Esempio n. 11
0
def main():

    [
        systime0, pyfilename, pyfiledir, homedir, currentdir, scratchdir,
        hostname, sysname
    ] = setupdirs()
    modelfitData = '/../modelFit.data'

    with open(scratchdir + modelfitData, 'rb') as filehandle:
        simdata = pickle.load(filehandle)

    texdir = currentdir + '/../../TeX/'
    texfile = 'table_fit.tex'
    texfile2 = 'table_fit_mixing.tex'

    ## Load data.
    with open(currentdir + '/../../Data/estimation_top8_100plus.data',
              'rb') as filehandle:
        [num_nets, size_nets, attr, data_a, data_g] = pickle.load(filehandle)

    nodestats = [None] * num_nets
    ntot = int(0)
    for jnet, simnet in enumerate(simdata):
        AA = simnet[0]
        GG = simnet[1]
        nsim = len(AA)
        n = len(AA[0])
        ntot = ntot + n
        prev = np.zeros(nsim)
        density = np.zeros(nsim)
        avgDeg = np.zeros(nsim)
        minDeg = np.zeros(nsim)
        maxDeg = np.zeros(nsim)
        AGA = np.zeros(nsim)
        IAGIA = np.zeros(nsim)
        tri = np.zeros(nsim)
        twolinksonly = np.zeros(nsim)
        twolinksonly2 = np.zeros(nsim)
        HI = np.zeros(nsim)
        CHI = np.zeros(nsim)
        FSI = np.zeros(nsim)
        nn = np.zeros(nsim)
        ns = np.zeros(nsim)
        sn = np.zeros(nsim)
        ss = np.zeros(nsim)
        nodestats[jnet] = nodeStats(GG[0], n, True)
        for s in range(nsim):
            A = AA[s]  #subdimensional array
            G = GG[s]  #subdimensional array
            [
                prev[s], density[s], avgDeg[s], minDeg[s], maxDeg[s], AGA[s],
                IAGIA[s], tri[s], twolinksonly[s], twolinksonly2[s],
                stateStatsLabels
            ] = stateStats2(G, A, len(A))
            [HI[s], CHI[s], FSI[s]] = homophily(G, A, n, True)
            [nn[s], ns[s], sn[s], ss[s]] = list(
                (mixingMat(G, A.astype(int), n, 2).ravel('C')).astype(float))

        jnetstats = pd.DataFrame(data=np.column_stack([
            prev, density, avgDeg, minDeg, maxDeg, AGA, IAGIA, tri,
            twolinksonly, twolinksonly2, HI, CHI, FSI, nn, ns, sn, ss
        ]),
                                 dtype=float,
                                 columns=[
                                     'prev', 'density', 'avgDeg', 'minDeg',
                                     'maxDeg', 'AGA', 'IAGIA', 'tri',
                                     'twolinks', 'twolinks2', 'HI', 'CHI',
                                     'FSI', 'nn', 'ns', 'sn', 'ss'
                                 ])
        jnetstats['netid'] = (jnet + 1)  #.astype(int)
        jnetstats['netsize'] = n  #.astype(int)
        jnetstats['sim'] = list(range(nsim))

        if jnet == 0:
            stats = jnetstats
        else:
            stats = pd.concat([stats, jnetstats])

    allstats_model = stats[stats.sim > 0].groupby('netid').describe()
    allstats_data = stats[stats.sim == 0].groupby('netid').describe()
    allstats = allstats_data.append(allstats_model,
                                    ignore_index=True)  #top data, botom model

    median_model = stats[stats.sim > 0].groupby('netid').median()
    median_model['netsize'] = np.asarray(size_nets, dtype=int)
    median_data = stats[stats.sim == 0].groupby('netid').median()
    median_data['netsize'] = np.asarray(size_nets, dtype=int)
    mean_model = stats[stats.sim > 0].groupby('netid').mean()
    mean_model['netsize'] = np.asarray(size_nets, dtype=int)
    mean_data = stats[stats.sim == 0].groupby('netid').mean()
    mean_data['netsize'] = np.asarray(size_nets, dtype=int)

    del AA, GG, prev, density, avgDeg, minDeg, maxDeg, AGA, IAGIA, tri

    varlist = [
        'prev', 'density', 'avgDeg', 'minDeg', 'maxDeg', 'AGA', 'IAGIA',
        'twolinks', 'tri', 'HI', 'CHI', 'FSI'
    ]
    varlabels = [
        'Prevalence', 'Density', 'Avg degree', 'Min degree', 'Max degree',
        '$a_ig_{ij}a_j/n$', '$(1-a_i)g_{ij}(1-a_j)/n$', 'Two-paths$/n$',
        'Triangles$/n$', 'HI', 'CHI', 'FSI'
    ]
    statsdata = np.zeros(len(varlist))
    statsmodel = np.zeros(len(varlist))
    for j, var in enumerate(varlist):
        statsdata[j] = weighted_average(stats[stats.sim == 0], var, 'netsize')
        statsmodel[j] = weighted_average(stats[stats.sim > 0], var, 'netsize')

    meddata = np.zeros(len(varlist))
    medmodel = np.zeros(len(varlist))
    for j, var in enumerate(varlist):
        meddata[j] = weighted_average(median_data, var, 'netsize')
        medmodel[j] = weighted_average(median_model, var, 'netsize')

    table_fit = ''
    for j in range(9):
        texline = varlabels[j].rjust(24, ' ')
        texline = f'{texline} & {statsmodel[j]:5.3f} ({medmodel[j]:5.3f}) & {statsdata[j]:5.3f} '
        table_fit = table_fit + texline + r' \\' + ' \n'

        table_mixing = ''
    for j in range(9, len(varlist)):
        texline = varlabels[j].rjust(24, ' ')
        texline = f'{texline} & {statsmodel[j]:5.3f} ({medmodel[j]:5.3f}) & {statsdata[j]:5.3f} '
        table_mixing = table_mixing + texline + r' \\' + ' \n'

    texsignature = f'% tex created by {pyfilename}.py \n'
    texheader = r'''
\begin{table}[t]
\caption{Model fit}
\label{table:fit}
\begin{center}
\begin{tabular}{lcc}
\hline \hline
\multicolumn{3}{c}{\textit{Selected moments}} \\
    Moment & Model & Data \\ \hline 
'''
    texmid = r'''
\\
\multicolumn{3}{c}{\textit{Mixing patterns}} \\ 
'''
    texfooter = r'''
\hline \\
\end{tabular}
\end{center}

\fignotetitle{Note:} 
\fignotetext{Columns Data and Model compare selected moments of the estimation sample with those of synthetic data 
generated by the estimated model. For the latter mean and median are reported (median in parentheses). 
Two-paths is defined as $\sum_{i>j} g_{ij}g_{il}(1-g_{il})$. Triangles is defined as $\sum_{i>j>l}g_{ij}g_{il}g_{il}$
For details on computing homophily indices see \cite{CurrariniJacksonPin2010} Definitions 1 and 2 in the supplemental appendix.}
\end{table} 
'''

    texcontent = texsignature + texheader + table_fit + texmid + table_mixing + texfooter
    with open(texdir + texfile, 'w') as f:
        f.write(texcontent)

    #MIXING MAT
    nn_data = weighted_average(stats[stats.sim == 0], 'nn', 'netsize')
    nn_model = weighted_average(stats[stats.sim > 0], 'nn', 'netsize')
    ns_data = weighted_average(stats[stats.sim == 0], 'ns', 'netsize')
    ns_model = weighted_average(stats[stats.sim > 0], 'ns', 'netsize')
    sn_data = weighted_average(stats[stats.sim == 0], 'sn', 'netsize')
    sn_model = weighted_average(stats[stats.sim > 0], 'sn', 'netsize')
    ss_data = weighted_average(stats[stats.sim == 0], 'ss', 'netsize')
    ss_model = weighted_average(stats[stats.sim > 0], 'ss', 'netsize')

    row1 = r'''& Smoker   & \textbf{''' + f'{100*ss_model/(ss_model+sn_model):4.0f}\% ({ss_model:4.1f})' + r'''}'''
    row1 = row1 + f' & {100*sn_model/(ss_model+sn_model):4.0f}\% ({sn_model:4.1f})'
    row1 = row1 + r''' & \textbf{''' + f'{100*ss_data/(ss_data+sn_data):4.0f}\% ({ss_data:4.1f})' + r'''}'''
    row1 = row1 + f' & {100*sn_data/(ss_data+sn_data):4.0f}\% ({sn_data:4.1f})'
    row2 = f' & Nonsmoker   & {100*ns_model/(ns_model+nn_model):4.0f}\% ({ns_model:4.1f})'
    row2 = row2 + r'''& \textbf{''' + f' {100*nn_model/(ns_model+nn_model):4.0f}\% ({nn_model:4.1f})' + r'''}'''
    row2 = row2 + f' & {100*ns_data/(ns_data+nn_data):4.0f}\% ({ns_data:4.1f})'
    row2 = row2 + r''' & \textbf{''' + f' {100*nn_data/(ns_data+nn_data):4.0f}\% ({nn_data:4.1f})' + r'''}'''
    #
    #  & Smoker     & \textbf{77\% (53.2)}  &  23\% (16.1)           & \textbf{77\% (52.8)}  &  23\% (15.5)          \\
    #  & Nonsmoker  & 44\% (16.1)           & \textbf{66\% (20.8)}   & 41\% (15.5)           & \textbf{59\% (22.8)}\\  \cline{2-6}

    texheader = r'''
\begin{table}[t]
\caption{Fit mixing matrix (model left, data right)}
\label{table:fit_mixing}
\centering
\begin{footnotesize}
\begin{tabular}{llcccc}
%\cmidrule{2-4} \morecmidrules \cmidrule{2-4}
  &  & \multicolumn{2}{c}{\axislabel{Nominee}}                                 & \multicolumn{2}{c}{\axislabel{Nominee}} \\
  \multirow{5}{*}{\rotatebox[origin=c]{90}{\axislabel{Nominator}}}
  &            & Smoker               & Nonsmoker                      & Smoker               & Nonsmoker           \\\cmidrule{2-6}
'''

    texfooter = r'''
\\
\cmidrule{2-6}
\end{tabular}

\end{footnotesize}
%\fignotetitle{Source:} \fignotetext{The National Longitudinal Study of Adolescent Health (Add Health) - Wave I, 1994-95 school year (Estimation sample: $14$ schools, $1,125$ students, $21\%$ smokers).}
\end{table}
'''

    texcontent = texsignature + texheader + row1 + r'''\\''' + ' \n' + row2 + texfooter
    with open(texdir + texfile2, 'w') as f:
        f.write(texcontent)
Esempio n. 12
0
def main():
    [systime0,pyfilename,pyfiledir,homedir,currentdir,scratchdir,hostname,sysname]=setupdirs()


    ctrfDir = scratchdir + '/../'
    ctrfFiles=[
        'ctrfSpillovers_scid_1.data',
        'ctrfSpillovers_scid_7.data'
        ]
    
    
    texdir  = currentdir + '/../../TeX/'
    texfile = 'table_ctrfSpills.tex'
    texfile_extended = 'table_ctrfSpills_extended.tex'

    jctrfSpills = [None]*len(ctrfFiles)
    for j,ctrfFile in enumerate(ctrfFiles):
        print(f'Opening file: {ctrfFile}')
        with open(ctrfDir+ctrfFile, 'rb') as filehandle:
            [jctrfSpills[j],numsim,grid_nnosmoke,num_nets, size_nets, jattr, jA, jG, target_scid]=pickle.load(filehandle)

    # Estimation scenario: 0,1,2 (Model, Fixed net, No net data)
    ctrfSpills     = pd.concat(jctrfSpills,ignore_index=True)
    ctrfSpills.policy_size         = ctrfSpills.policy_size.astype(int)
    ctrfSpills.estimation_scenario = ctrfSpills.estimation_scenario.astype(int)
    ctrfSpillsSims       = ctrfSpills[ctrfSpills.sim!=0].copy()
    ctrfSpillsData       = ctrfSpills[ctrfSpills.sim==0].copy()
    
    ctrfSpills['weight'] = ctrfSpills['netsize']/np.mean(ctrfSpills['netsize'])
    for jcol in ctrfSpillsSims.columns[3:]:
        ctrfSpillsSims.loc[:,jcol]=ctrfSpillsSims.loc[:,jcol]*ctrfSpills['weight']
    meanSims     = ctrfSpillsSims.groupby(['policy_size', 'estimation_scenario']).mean().reset_index()
    medianSims   = ctrfSpillsSims.groupby(['policy_size', 'estimation_scenario']).median().reset_index()
    meanSimsWide = meanSims.pivot(index='policy_size',columns='estimation_scenario')

    texsignature=f'% tex created by {pyfilename}.py \n'
    texheader = r'''
    \begin{table}[!h]
    \caption{Spillovers}
    \label{table:ctrf-spillovers}
    \begin{center}
    \begin{tabular}{ccccc}
    \multirow{2}{*}{Campaign (\%)}  & \multirow{2}{*}{Smoking}  & \multicolumn{1}{c}{Predicted effect} & Actual & \multirow{2}{*}{Multiplier} \\
    & & proportional & effect \\
     \hline \hline
    '''
    baseline_prev = 100*meanSimsWide.loc[0,('prev',0)]
    table_ctrf= f'- & {baseline_prev:4.1f} & - & - &' + r' \\' + ' \n'
    for j in range(1,len(grid_nnosmoke)):
        mean_smoking_policy_j = 100*meanSimsWide.loc[j,('prev',0)]
        texline = f'{grid_nnosmoke[j]*100:4.0f} & {mean_smoking_policy_j:4.1f}'
        texline = texline + f'& {baseline_prev*grid_nnosmoke[j]:4.1f}' 
        texline = texline + f'& {baseline_prev - mean_smoking_policy_j:4.1f}' 
        texline = texline + f'& {(baseline_prev - mean_smoking_policy_j)/(baseline_prev*grid_nnosmoke[j]):4.1f}' 
        table_ctrf = table_ctrf + texline + r' \\' + ' \n'
        
    texfooter = r'''
    \hline
    \end{tabular}
    \end{center}
    \fignotetitle{Note:} The first column lists the alternative attendance rates. The second and third columns display the smoking rate and the change in smoking rate respectively if the decrease would be proportional to the intervention, i.e. computes a baseline without peer effects. The last column computes the ratio between the percentage change in the number of smokers and the attendance rate. Note that that attendance is random with respect to the smoking status of the students. If the campaign is able to target only students who are currently smokers, the spillover effects will be even larger.
    \end{table}
    '''
    
    texcontent = texsignature + texheader + table_ctrf + texfooter
    with open(texdir+texfile,'w') as f:
         f.write(texcontent)



    # Extended table
    texsignature=f'% tex created by {pyfilename}.py \n'
    texheader = r'''
    \begin{table}[!h]
    \caption{Spillovers}
    \label{table:ctrf-spillovers}
    \begin{center}
    \begin{tabular}{ccccccc}
    \multirow{2}{2cm}{Campaign size (\%)}  
    & \multirow{2}{2cm}{Predicted smoking} 
    & \multicolumn{4}{c}{Predicted effect}
    & \multirow{2}{*}{Multiplier} \\
    & & Model
    & Exog net
    & No network
    & Prop    
    \\
     \hline \hline
    '''
    delta_prev          = 100*meanSimsWide.loc[0,('prev',0)] - 100*meanSimsWide.loc[1:,('prev',0)] #meanCtrfData.prev[0]-meanCtrfData.prev[1:]
    delta_prev_fixednet = 100*meanSimsWide.loc[0,('prev',1)] - 100*meanSimsWide.loc[1:,('prev',1)] #meanCtrfDataFixedNet.prev[0]-meanCtrfDataFixedNet.prev[1:]
    delta_prev_nonet    = 100*meanSimsWide.loc[0,('prev',2)] - 100*meanSimsWide.loc[1:,('prev',2)] #meanCtrfDataNoNet.prev[0]-meanCtrfDataNoNet.prev[1:]
    table_ctrf= f'- & {baseline_prev:4.1f} & - & - & - & - &' + r' \\' + ' \n'
    for j in range(1,len(grid_nnosmoke)):
        mean_smoking_policy_j = 100*meanSimsWide.loc[j,('prev',0)]
        texline = f'{grid_nnosmoke[j]*100:4.0f}'
        texline = texline + f'& {mean_smoking_policy_j:4.1f}'
        texline = texline + f'& {delta_prev[j]:4.1f}'
        texline = texline + f'& {delta_prev_fixednet[j]:4.1f}' 
        texline = texline + f'& {delta_prev_nonet[j]:4.1f}' 
        texline = texline + f'& {mean_smoking_policy_j*grid_nnosmoke[j]:4.1f}' 
        texline = texline + f'& {(baseline_prev - mean_smoking_policy_j)/(baseline_prev*grid_nnosmoke[j]):4.1f}' 
        table_ctrf = table_ctrf + texline + r' \\' + ' \n'
        
    texfooter = r'''
    \hline
    \end{tabular}
    \end{center}
    \fignotetitle{Note:} The first and the second columns list the alternative attendance rates and the simulated smoking prevalences respectively.
    Columns three to six display the simulated decrease in overall smoking for different estimation scenarios:
        the full model, the model with exogenous (fixed) social network, the model with no social network data, and 
        the policy effect if it were (only) proportional to the intervention, i.e. a baseline without peer effects. 
        The last column computes the ratio between the percentage change in the number of smokers and the attendance rate.
        %Note that that attendance is random with respect to the smoking status of the students. 
        %If the campaign is able to target only students who are currently smokers, the spillover effects will be even larger.
    \end{table}
    '''
    
    texcontent = texsignature + texheader + table_ctrf + texfooter
    with open(texdir+texfile_extended,'w') as f:
         f.write(texcontent)