def _checkdata(X,Y1_level,Y0_level,numAgents):    
    
    # get numCovarsOut from the ini.file.
    initDict = grmReader.read()
    
    Y1_beta_=initDict['Y1_beta']
        
    numCovarsOut  = np.array(Y1_beta_).shape[0]
    
    # check X
    assert(isinstance(X,np.ndarray))
    assert(X.shape==(numAgents, numCovarsOut))
    assert(np.all(np.isfinite(X)))

    # check Y1_level
    assert(isinstance(Y1_level,np.ndarray))
    assert(Y1_level.shape==(numAgents, ))
    assert(np.all(np.isfinite(Y1_level)))
    
    # check Y0_level
    assert(isinstance(Y0_level,np.ndarray))
    assert(Y0_level.shape==(numAgents, ))
    assert(np.all(np.isfinite(Y0_level)))
    
    return True
def _getdata():
    '''read the .dat file and export the simulated data
    '''
 
    # Process initialization file.
    initDict = grmReader.read()

    #read the data from the .dat file
    data_ = np.genfromtxt(initDict['fileName'], dtype = 'float')
        
    return data_
def simulate():
    ''' Simulate data generation process of the Generalized Roy Model. '''

    print "Simulating Data."

    # Process initFile.
    initDict = grmReader.read()


    #
    # Distribute parametrization and (limited) type conversions.
    #

    numAgents  = initDict['numAgents']
    fileName   = initDict['fileName']
    
    Y1_beta    = np.array(initDict['Y1_beta'])
    Y0_beta    = np.array(initDict['Y0_beta'])
    
    D_gamma    = np.array(initDict['D_gamma'])
    
    U1_var     = initDict['U1_var'] 
    U0_var     = initDict['U0_var'] 
    V_var      = initDict['V_var']
    
    U1V_rho    = initDict['U1V_rho']  
    U0V_rho    = initDict['U0V_rho']  
    
    randomSeed = initDict['randomSeed']  

    # Set random seed
    np.random.seed(randomSeed)
    
    # Construct auxiliary objects.
    numCovarsOut  = Y1_beta.shape[0]
    numCovarsCost = D_gamma.shape[0]
    
    U1V_cov      = U1V_rho*np.sqrt(U1_var)*np.sqrt(V_var)
    U0V_cov      = U0V_rho*np.sqrt(U0_var)*np.sqrt(V_var)
    

    #
    # Simulation
    #

    # Simulate observable agent characteristics.
    means = np.tile(0.0, numCovarsOut)
    covs  = np.identity(numCovarsOut)
    
    X      = np.random.multivariate_normal(means, covs, numAgents)
    X[:,0] = 1.0
    
    means = np.tile(0.0, numCovarsCost)
    covs  = np.identity(numCovarsCost)
    
    Z     = np.random.multivariate_normal(means, covs, numAgents)
    
    # Construct level indicators for outcomes and choices. 
    Y1_level = np.dot(Y1_beta, X.T)
    Y0_level = np.dot(Y0_beta, X.T)
    D_level  = np.dot(D_gamma, Z.T)
    
    # Simulate unobservables from the model.
    means = np.tile(0.0, 3)
    vars_ = [U1_var, U0_var, V_var]
    
    covs  = np.diag(vars_)
    
    covs[0,2] = U1V_cov 
    covs[2,0] = covs[0,2]
    
    covs[1,2] = U0V_cov
    covs[2,1] = covs[1,2]
    
    U = np.random.multivariate_normal(means, covs, numAgents)
    
    # Simulate individual outcomes and choices.
    Y1 = np.tile(np.nan, (numAgents))
    Y0 = np.tile(np.nan, (numAgents))
    Y  = np.tile(np.nan, (numAgents))
    
    D  = np.tile(np.nan, (numAgents))
    
    for i in range(numAgents):
        
        # Distribute unobservables.
        U1 = U[i,0]
        U0 = U[i,1]
        V  = U[i,2]
    
        # Decision Rule.
        expectedBenefits = Y1_level[i] - Y0_level[i]
        cost             = D_level[i]  + V 
        
        D[i] = np.float((expectedBenefits - cost > 0))
        
        # Potential outcomes.
        Y1[i] = Y1_level[i] + U1
        Y0[i] = Y0_level[i] + U0
        
        # Observed outcomes.
        Y[i]  = D[i]*Y1[i] + (1.0 - D[i])*Y0[i]
        
    # Check quality of simulated sample. 
    assert (np.all(np.isfinite(Y1)))
    assert (np.all(np.isfinite(Y0)))
    
    assert (np.all(np.isfinite(Y)))
    assert (np.all(np.isfinite(D)))
    
    assert (Y1.shape == (numAgents, ))
    assert (Y0.shape == (numAgents, ))
    
    assert (Y.shape  == (numAgents, ))
    assert (D.shape  == (numAgents, ))
    
    assert (Y1.dtype == 'float')
    assert (Y0.dtype == 'float')
    
    assert (Y.dtype == 'float')
    assert (D.dtype == 'float')
    
    assert ((D.all() in [1.0, 0.0]))
       
    # Export sample to *.txt file for further processing. 
    np.savetxt(fileName, np.column_stack((Y, D, X, Z)), fmt= '%8.3f')
def estimate(outputfile = False):
    ''' Public interface to request an estimation of the Generalized
        Roy Model.
    
    '''
    print "Estimating."

    # Checks.
    assert (os.path.exists('grmInit.ini')) 
    
    # Process initialization file.
    initDict = grmReader.read()
    
    # Checks.
    assert (os.path.exists(initDict['fileName'])) 
    
    # Process initFile.
    _initializeLogging()
    

    #
    # Distribute useful covariates.
    #

    numAgents = initDict['numAgents']

    Y1_beta   = np.array(initDict['Y1_beta'])
    Y0_beta   = np.array(initDict['Y0_beta'])
    
    D_gamma   = np.array(initDict['D_gamma'])    
    
    U1_var    = initDict['U1_var'] 
    U0_var    = initDict['U0_var'] 
    
    U1V_rho   = initDict['U1V_rho']  
    U0V_rho   = initDict['U0V_rho']  
    
    maxiter   = initDict['maxiter']
    

    #
    # Construct auxiliary objects.
    #

    numCovarsOut  = Y1_beta.shape[0]
    numCovarsCost = D_gamma.shape[0]
    

    #
    # Read and check dataset, distribute entries.
    #

    data = np.genfromtxt(initDict['fileName'], dtype = 'float')

    assert (_checkData(data, numAgents, numCovarsOut, numCovarsCost) == True)
    
    Y = data[:,0]
    D = data[:,1]
    
    X = data[:,2:(numCovarsOut + 2)]
    Z = data[:,-numCovarsCost:]
    

    #
    # Maximization Script.
    #
    
    # Construct starting values.    
    startVals = np.concatenate((Y1_beta, Y0_beta, D_gamma, 
                    [U1_var], [U0_var], [U1V_rho], [U0V_rho]))
                               
    # Run maximization.
    sys.stdout = open('grmLogging.txt', 'a')
    
    rslt = fmin_bfgs(_maxAlgorithmInterface, startVals, \
                     args = (Y, D, X, Z), maxiter = maxiter, \
                     full_output = False)
    
    sys.stdout = sys.__stdout__
    
    # Output
    rslt = _distributeEvaluationValues(rslt, numCovarsOut, True)

    rslt['Y1_beta'] = rslt['Y1_beta'].tolist()
    rslt['Y0_beta'] = rslt['Y0_beta'].tolist()
    rslt['D_gamma'] = rslt['D_gamma'].tolist()

    if outputfile is not False:
        with open(outputfile, 'w') as file_:
            json.dump(rslt, file_)
        print "Estimates saved to \'{}\'.".format(outputfile)

    return rslt
def estimate():
    """ Public interface to request an estimation of the Generalized
        Roy Model.
    
    """
    # Checks.
    assert os.path.exists("grmInit.ini")

    # Process initialization file.
    initDict = grmReader.read()

    # Checks.
    assert os.path.exists(initDict["fileName"])

    # Process initFile.
    _initializeLogging()

    """ Distribute useful covariates.
    """
    numAgents = initDict["numAgents"]

    Y1_beta = np.array(initDict["Y1_beta"])
    Y0_beta = np.array(initDict["Y0_beta"])

    D_gamma = np.array(initDict["D_gamma"])

    U1_var = initDict["U1_var"]
    U0_var = initDict["U0_var"]

    U1V_rho = initDict["U1V_rho"]
    U0V_rho = initDict["U0V_rho"]

    maxiter = initDict["maxiter"]

    """ Construct auxiliary objects.
    """
    numCovarsOut = Y1_beta.shape[0]
    numCovarsCost = D_gamma.shape[0]

    """ Read and check dataset, distribute entries.
    """
    data = np.genfromtxt(initDict["fileName"], dtype="float")

    assert _checkData(data, numAgents, numCovarsOut, numCovarsCost) == True

    Y = data[:, 0]
    D = data[:, 1]

    X = data[:, 2 : (numCovarsOut + 2)]
    Z = data[:, -numCovarsCost:]

    """ Maximization Script.
    """

    # Construct starting values.
    startVals = np.concatenate((Y1_beta, Y0_beta, D_gamma, [U1_var], [U0_var], [U1V_rho], [U0V_rho]))

    # Run maximization.
    sys.stdout = open("grmLogging.txt", "a")

    rslt = fmin_bfgs(_maxAlgorihtmInterface, startVals, args=(Y, D, X, Z), maxiter=maxiter, full_output=True)

    sys.stdout = sys.__stdout__

    # Construct dictionary with results.
    rslt = _distributeEvaluationValues(rslt, numCovarsOut, True)

    #  Write out the *.json file.
    with open("grmRslt.json", "w") as file_:

        json.dump(initDict, file_)
def simulate(numAgents):
    ''' Simulate data generation process of the Generalized Roy Model.
    
    '''
    # Process initFile.
    initDict = grmReader.read()

    ''' Distribute parametrization and (limited) type conversions.
    '''
    
    #Turning off numAgents and fileName to let the ParallelRun do the job
    #numAgents  = initDict['numAgents']
    #fileName   = initDict['fileName']
    
    Y1_beta    = np.array(initDict['Y1_beta'])
    Y0_beta    = np.array(initDict['Y0_beta'])
    
    D_gamma    = np.array(initDict['D_gamma'])
    
    U1_var     = initDict['U1_var'] 
    U0_var     = initDict['U0_var'] 
    V_var      = initDict['V_var']
    
    U1V_rho    = initDict['U1V_rho']  
    U0V_rho    = initDict['U0V_rho']  
    
    randomSeed = initDict['randomSeed']  
    
    ''' Set random seed
    '''
    np.random.seed(randomSeed)
    
    ''' Construct auxiliary objects.
    '''
    numCovarsOut  = Y1_beta.shape[0]
    numCovarsCost = D_gamma.shape[0]
    
    U1V_cov       = U1V_rho*np.sqrt(U1_var)*np.sqrt(V_var)
    U0V_cov       = U0V_rho*np.sqrt(U0_var)*np.sqrt(V_var)
    
    ''' Simulate observable agent characteristics.
    '''
    means  = np.tile(0.0, numCovarsOut)
    covs   = np.identity(numCovarsOut)
    
    X      = np.random.multivariate_normal(means, covs, numAgents)
    X[:,0] = 1.0
    
    means  = np.tile(0.0, numCovarsCost)
    covs   = np.identity(numCovarsCost)
    
    Z      = np.random.multivariate_normal(means, covs, numAgents)
    
    ''' Construct level indicators for outcomes and choices. 
    '''
    Y1_level = np.dot(Y1_beta, X.T)
    Y0_level = np.dot(Y0_beta, X.T)
    D_level  = np.dot(D_gamma, Z.T)
    
    ''' Simulate unobservables from the model.
    '''
    means = np.tile(0.0, 3)
    vars_ = [U1_var, U0_var, V_var]
    
    covs  = np.diag(vars_)
    
    covs[0,2] = U1V_cov 
    covs[2,0] = covs[0,2]
    
    covs[1,2] = U0V_cov
    covs[2,1] = covs[1,2]
    
    U = np.random.multivariate_normal(means, covs, numAgents)
    
    ''' Simulate individual outcomes and choices.
    '''
    Y1 = np.tile(np.nan, (numAgents))
    Y0 = np.tile(np.nan, (numAgents))
    Y  = np.tile(np.nan, (numAgents))
    
    D  = np.tile(np.nan, (numAgents))
    
    for i in range(numAgents):
        
        # Distribute unobservables.
        U1 = U[i,0]
        U0 = U[i,1]
        V  = U[i,2]
    
        # Decision Rule.
        expectedBenefits = Y1_level[i] - Y0_level[i]
        cost             = D_level[i]  + V 
        
        D[i] = np.float((expectedBenefits - cost > 0))
        
        # Potential outcomes.
        Y1[i] = Y1_level[i] + U1
        Y0[i] = Y0_level[i] + U0
        
        # Observed outcomes.
        Y[i]  = D[i]*Y1[i] + (1.0 - D[i])*Y0[i]
        
    ''' Calculating the treatment effects and exporting
    '''
            
    ATE  = np.sum(Y1-Y0)/np.size(Y1-Y0)
    ATT  = np.sum((Y1-Y0)[D==1])/np.size((Y1-Y0)[D==1])
    ATUT = np.sum((Y1-Y0)[D==0])/np.size((Y1-Y0)[D==0])
    
    #Deactivating the output below, since we'll do it in ParallelRun
    #np.savetxt('Treatments.txt', np.column_stack((ATE,ATT,ATUT)), fmt= '%8.3f')
    
        
    ''' Check quality of simulated sample. 
    '''
    assert (np.all(np.isfinite(Y1)))
    assert (np.all(np.isfinite(Y0)))
    
    assert (np.all(np.isfinite(Y)))
    assert (np.all(np.isfinite(D)))
    
    assert (Y1.shape == (numAgents, ))
    assert (Y0.shape == (numAgents, ))
    
    assert (Y.shape  == (numAgents, ))
    assert (D.shape  == (numAgents, ))
    
    assert (Y1.dtype == 'float')
    assert (Y0.dtype == 'float')
    
    assert (Y.dtype == 'float')
    assert (D.dtype == 'float')
    
    assert ((D.all() in [1.0, 0.0]))
    
    assert (ATE.dtype == 'float')
    assert (ATT.dtype == 'float')
    assert (ATUT.dtype == 'float')

       
    ''' Non longer export sample to *.txt file for further processing.
    Instead, output to a variable to be gathered by ParallelRun 
    '''
    
    variablesOutput  =  np.column_stack( (Y, D, X, Z)     )
    treatmentsOutput =  np.column_stack( (ATE, ATT, ATUT) )
    
    return {'variables':variablesOutput, 'treatments': treatmentsOutput}
def simulate():
    ''' Simulate data generation process of the Generalized Roy Model.
    
    '''
    # Process initFile.
    initDict = grmReader.read()

    ''' Distribute parametrization and (limited) type conversions.
    '''
    numAgents  = initDict['numAgents']
    fileName   = initDict['fileName']
    
    Y1_beta    = np.array(initDict['Y1_beta'])
    Y0_beta    = np.array(initDict['Y0_beta'])
    
    D_gamma    = np.array(initDict['D_gamma'])
    
    U1_var     = initDict['U1_var'] 
    U0_var     = initDict['U0_var'] 
    V_var      = initDict['V_var']
    
    U1V_rho    = initDict['U1V_rho']  
    U0V_rho    = initDict['U0V_rho']  
    
    randomSeed = initDict['randomSeed']  
    
    ''' Set random seed
    '''
    np.random.seed(randomSeed)
    
    ''' Construct auxiliary objects.
    '''
    numCovarsOut  = Y1_beta.shape[0]
    numCovarsCost = D_gamma.shape[0]
    
    U1V_cov      = U1V_rho*np.sqrt(U1_var)*np.sqrt(V_var)
    U0V_cov      = U0V_rho*np.sqrt(U0_var)*np.sqrt(V_var)
    
    ''' Simulate observable agent characteristics.
    '''
    means = np.tile(0.0, numCovarsOut)
    covs  = np.identity(numCovarsOut)
    
    X      = np.random.multivariate_normal(means, covs, numAgents)
    X[:,0] = 1.0
    
    means = np.tile(0.0, numCovarsCost)
    covs  = np.identity(numCovarsCost)
    
    Z     = np.random.multivariate_normal(means, covs, numAgents)
    
    ''' Construct level indicators for outcomes and choices. 
    '''
    Y1_level = np.dot(Y1_beta, X.T)
    Y0_level = np.dot(Y0_beta, X.T)
    D_level  = np.dot(D_gamma, Z.T)
    
    ''' Simulate unobservables from the model.
    '''
    means = np.tile(0.0, 3)
    vars_ = [U1_var, U0_var, V_var]
    
    covs  = np.diag(vars_)
    
    covs[0,2] = U1V_cov 
    covs[2,0] = covs[0,2]
    
    covs[1,2] = U0V_cov
    covs[2,1] = covs[1,2]
    
    U = np.random.multivariate_normal(means, covs, numAgents)
    
    ''' Simulate individual outcomes and choices.
    '''
    Y1 = np.tile(np.nan, (numAgents))
    Y0 = np.tile(np.nan, (numAgents))
    Y  = np.tile(np.nan, (numAgents))
    
    D  = np.tile(np.nan, (numAgents))
    
    for i in range(numAgents):
        
        # Distribute unobservables.
        U1 = U[i,0]
        U0 = U[i,1]
        V  = U[i,2]
    
        # Decision Rule.
        expectedBenefits = Y1_level[i] - Y0_level[i]
        cost             = D_level[i]  + V 
        
        D[i] = np.float((expectedBenefits - cost > 0))
        
        # Potential outcomes.
        Y1[i] = Y1_level[i] + U1
        Y0[i] = Y0_level[i] + U0
        
        # Observed outcomes.
        Y[i]  = D[i]*Y1[i] + (1.0 - D[i])*Y0[i]
        
    ''' Check quality of simulated sample. 
    '''
    assert (np.all(np.isfinite(Y1)))
    assert (np.all(np.isfinite(Y0)))
    
    assert (np.all(np.isfinite(Y)))
    assert (np.all(np.isfinite(D)))
    
    assert (Y1.shape == (numAgents, ))
    assert (Y0.shape == (numAgents, ))
    
    assert (Y.shape  == (numAgents, ))
    assert (D.shape  == (numAgents, ))
    
    assert (Y1.dtype == 'float')
    assert (Y0.dtype == 'float')
    
    assert (Y.dtype == 'float')
    assert (D.dtype == 'float')
    
    assert ((D.all() in [1.0, 0.0]))
       
    ''' Export sample to *.txt file for further processing. 
    '''
    np.savetxt(fileName, np.column_stack((Y, D, X, Z)), fmt= '%8.3f')
'''
grmToolbox.simulate()

''' Estimation.
'''
rslt=grmToolbox.estimate()
Y1_beta    = (rslt['Y1_beta'])
Y0_beta    = (rslt['Y0_beta'])   
U1_var     = rslt['U1_var'] 
U0_var     = rslt['U0_var']

# Get the X's from the data
# Checks.
assert (os.path.exists('grmInit.ini'))     
# Process initialization file.
initDict = grmReader.read()
data = numpy.genfromtxt(initDict['fileName'], dtype = 'float')
trash   = numpy.array(initDict['Y1_beta'])
numCovarsOut  = trash.shape[0]  
X = data[:,2:(numCovarsOut + 2)]
D = data[:,1]
N_type1=sum(D)
index_0=numpy.where(D==0)[0]
index_1=numpy.where(D==1)[0]

# Now Create Y1 and Y0 without the errors; we will generate the errors later
# using Parallel Computing
D1_wo_errors_Y1=numpy.dot(X[index_1,:],Y1_beta)
D1_wo_errors_Y1_avg=numpy.mean(D1_wo_errors_Y1)

D1_wo_errors_Y0=numpy.dot(X[index_1,:],Y0_beta)
def estimate():
    ''' Public interface to request an estimation of the Generalized
        Roy Model.
    
    '''
    # Checks.
    assert (os.path.exists('grmInit.ini')) 
    
    # Process initialization file.
    initDict = grmReader.read()
    
    # Checks.
    assert (os.path.exists(initDict['fileName'])) 
    
    # Process initFile.
    _initializeLogging()
    
    ''' Distribute useful covariates.
    '''
    numAgents = initDict['numAgents']

    Y1_beta   = np.array(initDict['Y1_beta'])
    Y0_beta   = np.array(initDict['Y0_beta'])
    
    D_gamma   = np.array(initDict['D_gamma'])    
    
    U1_var    = initDict['U1_var'] 
    U0_var    = initDict['U0_var'] 
    
    U1V_rho   = initDict['U1V_rho']  
    U0V_rho   = initDict['U0V_rho']  
    
    maxiter   = initDict['maxiter']
    
    ''' Construct auxiliary objects.
    '''
    numCovarsOut  = Y1_beta.shape[0]
    numCovarsCost = D_gamma.shape[0]
    
    ''' Read and check dataset, distribute entries.
    '''
    data = np.genfromtxt(initDict['fileName'], dtype = 'float')

    assert (_checkData(data, numAgents, numCovarsOut, numCovarsCost) == True)
    
    Y = data[:,0]
    D = data[:,1]
    
    X = data[:,2:(numCovarsOut + 2)]
    Z = data[:,-numCovarsCost:]
    
    ''' Maximization Script.
    '''
    
    # Construct starting values.    
    startVals = np.concatenate((Y1_beta, Y0_beta, D_gamma, 
                    [U1_var], [U0_var], [U1V_rho], [U0V_rho]))
                               
    # Run maximization.
    sys.stdout = open('grmLogging.txt', 'a')
    
    rslt = fmin_bfgs(_maxAlgorihtmInterface, startVals, \
                     args = (Y, D, X, Z), maxiter = maxiter, \
                     full_output = True)
    
    sys.stdout = sys.__stdout__
    
    # Construct dictionary with results.
    rslt = _distributeEvaluationValues(rslt[0], numCovarsOut, True)
    return rslt
def evaluate():
    
    # read grmRslt.json and get the dictionary
    
    para=_getpara()    

    # get the estimated parameters from the dictionary
    
    Y1_beta=para['Y1_beta']
    
    Y0_beta=para['Y0_beta']
    
    D_gamma=para['D_gamma']
    
    U1_var=para['U1_var']
    
    U0_var=para['U0_var']
    
    U1V_rho=para['U1V_rho']
    
    U0V_rho=para['U0V_rho']
    
    # normalization
    V_var=1
    
    # calculate the covariance between U1 and V
    U1V_cov=U1V_rho*np.sqrt(U1_var)*np.sqrt(V_var)
    
    # calculate covariance between U0 and V
    U0V_cov=U0V_rho*np.sqrt(U0_var)*np.sqrt(V_var)
    
    # get the data from the .dat file
    data_=_getdata()
    
    # get numbAgents, numCovarsOut, numCovarsCost and randomSeed from the ini.file.
    initDict = grmReader.read()
    
    numAgents = initDict['numAgents']
    
    Y1_beta_=initDict['Y1_beta']
        
    numCovarsOut  = np.array(Y1_beta_).shape[0]
    
    D_gamma_=initDict['D_gamma']
    
    numCovarsCost = np.array(D_gamma_).shape[0]
    
    randomSeed=initDict['randomSeed']
    
    #set random seed
    np.random.seed(randomSeed)

    # get the simulated X-covariates 
    
    X=data_[:,2:(numCovarsOut + 2)]
    
    # get the simulated Z-covariates
    
    Z=data_[:,-numCovarsCost:]
    
    # calculate the level of Y_1 by using the estimated Y1_beta and simulated X
    Y1_level=np.dot(Y1_beta,X.T)
    
    # calculate the level of Y_0 by using the estimated Y0_beta and simulated X
    Y0_level=np.dot(Y0_beta,X.T)
    
    # calculate the level of D by using the estimated D_gamma and simulated Z
    D_level=np.dot(D_gamma,Z.T)
    
    # simulate the unobservables based on the estimated distributions
    var_=[U1_var, U0_var, V_var]
    cov=np.diag(var_)
    cov[0,2]=U1V_cov
    cov[2,0]=cov[0,2]
    cov[1,2]=U0V_cov
    cov[2,1]=cov[1,2]
    
    U = np.random.multivariate_normal(np.tile(0.0,3), cov, numAgents)
    
    U1=U[:,0]
    U0=U[:,1]
    V=U[:,2]
  
    # simulate people's decisions
    D = np.array((Y1_level-Y0_level+U1-U0-D_level-V) > 0)
    
    # get the number of people who are treated (D=1)
    numTreated=sum(D)
    
    # get the number of people who are untreated (D=0)
    numUntreated=numAgents-numTreated
    
    # checks
    assert (_checkdata(X,Y1_level,Y0_level,numAgents)==True)
    
    '''calculate ATE
    '''
    ATE = (sum(Y1_level)-sum(Y0_level)+sum(U1)-sum(U0))/numAgents
    
    '''calculate ATT
    '''
    # create an index indicating people who are treated (D=1)
    index_tr=np.where(D==1)[0]
    
    # get the X-covariates for people who are treated (D=1)
    X_tr=X[index_tr,:]
    
    # get U1 for people who are treated (D=1)
    U1_tr=U1[index_tr,:]
    
    # get U0 for people who are treated (D=1)
    U0_tr=U0[index_tr,:]
    
    # calculate the level of Y_1 for the treated agents (D=1)
    Y1_level_tr=np.dot(Y1_beta,X_tr.T)
    
    # calculate the level of Y_0 for the treated agents (D=1)
    Y0_level_tr=np.dot(Y0_beta,X_tr.T)
    
    # checks
    assert (_checkdata(X_tr,Y1_level_tr,Y0_level_tr,numTreated)==True)
    
    # calculate ATT
    ATT = (sum(Y1_level_tr)-sum(Y0_level_tr)+sum(U1_tr)-sum(U0_tr))/numTreated

    
    ''' calculate ATU
    '''
    # create an index indicating people who are untreated (D=0)
    index_utr=np.where(D==0)[0]
    
    # get the X-covariates for people who are untreated (D=0)
    X_utr=X[index_utr,:]
    
    # get U1 for people who are untreated (D=0)
    U1_utr=U1[index_utr,:]
    
    # get U0 for people who are untreated (D=0)
    U0_utr=U0[index_utr,:]
    
    # calculate the level of Y_1 for the untreated agents (D=0)
    Y1_level_utr=np.dot(Y1_beta,X_utr.T)
    
    # calculate the level of Y_1 for the untreated agents (D=0)
    Y0_level_utr=np.dot(Y0_beta,X_utr.T)
    
    # checks
    assert (_checkdata(X_utr,Y1_level_utr,Y0_level_utr,numUntreated)==True)
   
    # calculate ATU
    ATU = (sum(Y1_level_utr)-sum(Y0_level_utr)+sum(U1_utr)-sum(U0_utr))/numUntreated

    #MPI
    TreatmentEffects = np.array(comm.gather([ATE,ATT,ATU],root=0))
    if rank==0:
        
        TreatmentEffects = np.mean(TreatmentEffects, axis=0)
        Treatments={}
        Treatments['ATE']  = TreatmentEffects[0]
        Treatments['ATT']  = TreatmentEffects[1]
        Treatments['ATU']= TreatmentEffects[2]
        
        print "ATE = %s" % Treatments['ATE']
        print "ATT = %s" % Treatments['ATT']
        print "ATU = %s" % Treatments['ATU']
Beispiel #11
0
def estimate():
    ''' Public interface to request an estimation of the Generalized
        Roy Model.
    
    '''
    # Checks.
    assert (os.path.exists('grmInit.ini'))

    # Process initialization file.
    initDict = grmReader.read()

    # Checks.
    assert (os.path.exists(initDict['fileName']))

    # Process initFile.
    _initializeLogging()
    ''' Distribute useful covariates.
    '''
    numAgents = initDict['numAgents']

    Y1_beta = np.array(initDict['Y1_beta'])
    Y0_beta = np.array(initDict['Y0_beta'])

    D_gamma = np.array(initDict['D_gamma'])

    U1_var = initDict['U1_var']
    U0_var = initDict['U0_var']

    U1V_rho = initDict['U1V_rho']
    U0V_rho = initDict['U0V_rho']

    maxiter = initDict['maxiter']
    ''' Construct auxiliary objects.
    '''
    numCovarsOut = Y1_beta.shape[0]
    numCovarsCost = D_gamma.shape[0]
    ''' Read and check dataset, distribute entries.
    '''
    data = np.genfromtxt(initDict['fileName'], dtype='float')

    assert (_checkData(data, numAgents, numCovarsOut, numCovarsCost) == True)

    Y = data[:, 0]
    D = data[:, 1]

    X = data[:, 2:(numCovarsOut + 2)]
    Z = data[:, -numCovarsCost:]
    ''' Maximization Script.
    '''

    # Construct starting values.
    startVals = np.concatenate(
        (Y1_beta, Y0_beta, D_gamma, [U1_var], [U0_var], [U1V_rho], [U0V_rho]))

    # Run maximization.
    sys.stdout = open('grmLogging.txt', 'a')

    rslt = fmin_bfgs(_maxAlgorihtmInterface, startVals, \
                     args = (Y, D, X, Z), maxiter = maxiter, \
                     full_output = True)

    sys.stdout = sys.__stdout__

    # Construct dictionary with results.
    rslt = _distributeEvaluationValues(rslt, numCovarsOut, True)

    #  Write out the *.json file.
    with open('grmRslt.json', 'w') as file_:

        json.dump(initDict, file_)