Ejemplo n.º 1
0
     def func_grad_lnX_Nthreads(self, params):

        """
        Use multiprocessing to calculate negative-log-likelihood and gradinets
        w.r.t lnX, plus the terms coming from th regularization terms.
        """
        n_samples = self.N
        self.lnX = params
        #self.fl, self.f, self.g, self.H, Nthreads = args


	Pool = InterruptiblePool(Nthreads)
        mapfn = Pool.map
        Nchunk = np.ceil(1. / Nthreads * n_samples).astype(np.int)

        arglist = [None] * Nthreads
        for i in range(Nthreads):
          s = int(i * Nchunk)
          e = int(s + Nchunk)
	  arglist[i] = (self.lnX, self.F, self.B, self.fl, self.f, self.g, self.H, s, e)  
        result = list(mapfn(fg, [ars for ars in arglist]))    
        nll, grad = result[0]
        a = time.time()
        for i in range(1, Nthreads):
           nll += result[i][0]
           grad += result[i][1]
        #print "adding up nll's from individual threads", time.time() - a
        Pool.close()
        Pool.terminate()
        Pool.join()
        #computing the regularization term and its derivative w.r.t lnX
        reg_func, reg_grad = self.reg_func_grad_lnX() 
        return nll + reg_func, grad + reg_grad	
Ejemplo n.º 2
0
def initial_pool(prior_obj, eps0, N_particles, N_threads=1):
    """ Initial Pool
    """

    args_list = [[i, prior_obj, eps0, N_particles]
                 for i in xrange(N_particles)]
    if N_threads > 1:
        pool = InterruptiblePool(processes=N_threads)
        mapfn = pool.map
        results = mapfn(initial_pool_sampling, args_list)

        pool.close()
        pool.terminate()
        pool.join()
    else:
        results = []
        for arg in args_list:
            results.append(initial_pool_sampling(arg))

    results = np.array(results).T
    theta_t = results[1:prior_obj.n_params + 1, :]
    w_t = results[prior_obj.n_params + 1, :]
    rhos = results[prior_obj.n_params + 2, :]
    sig_t = covariance(theta_t, w_t)

    return theta_t, w_t, rhos, sig_t
Ejemplo n.º 3
0
def initial_pool(prior_obj, eps0, N_particles, N_threads=1):
    """ Initial Pool
    """
    
    args_list = [[i, prior_obj, eps0, N_particles] for i in xrange(N_particles)]
    if N_threads > 1: 
        pool = InterruptiblePool(processes = N_threads)
        mapfn = pool.map
        results = mapfn(initial_pool_sampling, args_list)
        
        pool.close()
        pool.terminate()
        pool.join()
    else: 
        results = [] 
        for arg in args_list:  	
            results.append(initial_pool_sampling(arg))
    
    results = np.array(results).T
    theta_t = results[1:prior_obj.n_params+1,:]
    w_t = results[prior_obj.n_params+1,:]
    rhos = results[prior_obj.n_params+2,:]
    sig_t = covariance(theta_t , w_t)
    
    return theta_t, w_t, rhos, sig_t
Ejemplo n.º 4
0
def log_multivariate_gaussian_Nthreads(x, mu, V, xcov, Nthreads=1):
    """
    Use multiprocessing to calculate log likelihoods.
    """
    n_samples = x.shape[0]
    pool = InterruptiblePool(Nthreads)
    mapfn = pool.map
    Nchunk = np.ceil(1. / Nthreads * n_samples).astype(np.int)

    arglist = [None] * Nthreads
    for i in range(Nthreads):
        s = i * Nchunk
        e = s + Nchunk
        arglist[i] = (x[s:e], mu, V, xcov[s:e])

    result = list(mapfn(lmg, [args for args in arglist]))

    logls = result[0]
    for i in range(1, Nthreads):
       logls = np.vstack((logls, result[i]))
       
    pool.close()
    pool.terminate()
    pool.join()
    return logls
Ejemplo n.º 5
0
def pmc_abc(N_threads = N_threads): 
    
    # initial pool
    theta_t, w_t, rhos, sig_t = initial_pool()
    t = 0 # iternation number
    
    plot_thetas(theta_t , w_t, t)
    
    plt.savefig("/home/mj/public_html/scatter_hod_gaussian_t"+str(t)+".png")
    plt.close()
    
    while t < N_iter: 
        
        eps_t = np.percentile(rhos, 75)
        print 'New Distance Threshold Eps_t = ', eps_t
        
        theta_t_1 = theta_t.copy()
        w_t_1 = w_t.copy()
        sig_t_1 = sig_t.copy()
    
        """these lines are borrowed from initial sampling to double-check multiprocessing"""
        #pool = InterruptiblePool(processes = N_threads)
    	#mapfn = pool.map
    	#args_list = [i for i in xrange(N_particles)]
    	#results = mapfn(initial_pool_sampling, args_list)
    
    	#pool.close()
    	#pool.terminate()
    	#pool.join()

        pool = InterruptiblePool(processes = N_threads)
        mapfn = pool.map
        args_list = [[i, theta_t_1, w_t_1, sig_t_1, eps_t] for i in xrange(N_particles)]
        #results = [] 
        #for args in args_list: 
        #    pool_sample = importance_pool_sampling(args)
        #    results.append( pool_sample )
        results = mapfn(importance_pool_sampling, args_list)
        pool.close()
        pool.terminate()
        pool.join()
        
        sig_t = np.cov(theta_t)
                 
        results = np.array(results).T
        theta_t = results[1:n_params+1,:]
        w_t = results[n_params+1,:]
        rhos = results[n_params+2,:]
        sig_t = np.cov(theta_t)
        
        t += 1
        
        plot_thetas(theta_t, w_t , t)
        plt.savefig("/home/mj/public_html/scatter_hod_gaussian_t"+str(t)+".png")
        plt.close()
Ejemplo n.º 6
0
def pmc_abc(N_threads=N_threads):

    # initial pool
    theta_t, w_t, rhos, sig_t = initial_pool()
    t = 0  # iternation number

    plot_thetas(theta_t, w_t, t)

    plt.savefig("/home/mj/public_html/scatter_hod_gaussian_t" + str(t) + ".png")
    plt.close()

    while t < N_iter:

        eps_t = np.percentile(rhos, 75)
        print "New Distance Threshold Eps_t = ", eps_t

        theta_t_1 = theta_t.copy()
        w_t_1 = w_t.copy()
        sig_t_1 = sig_t.copy()

        """these lines are borrowed from initial sampling to double-check multiprocessing"""
        # pool = InterruptiblePool(processes = N_threads)
        # mapfn = pool.map
        # args_list = [i for i in xrange(N_particles)]
        # results = mapfn(initial_pool_sampling, args_list)

        # pool.close()
        # pool.terminate()
        # pool.join()

        pool = InterruptiblePool(processes=N_threads)
        mapfn = pool.map
        args_list = [[i, theta_t_1, w_t_1, sig_t_1, eps_t] for i in xrange(N_particles)]
        # results = []
        # for args in args_list:
        #    pool_sample = importance_pool_sampling(args)
        #    results.append( pool_sample )
        results = mapfn(importance_pool_sampling, args_list)
        pool.close()
        pool.terminate()
        pool.join()

        sig_t = np.cov(theta_t)

        results = np.array(results).T
        theta_t = results[1 : n_params + 1, :]
        w_t = results[n_params + 1, :]
        rhos = results[n_params + 2, :]
        sig_t = np.cov(theta_t)

        t += 1

        plot_thetas(theta_t, w_t, t)
        plt.savefig("/home/mj/public_html/scatter_hod_gaussian_t" + str(t) + ".png")
        plt.close()
Ejemplo n.º 7
0
def pmc_abc(prior_dict, N_particles=100, N_iter=30, eps0=20.0, N_threads=1):
    """
    """
    prior_obj = Prior(prior_dict)

    # initial pool
    theta_t, w_t, rhos, sig_t = initial_pool(prior_obj,
                                             eps0,
                                             N_particles,
                                             N_threads=N_threads)
    t = 0  # iternation number

    #plot_thetas(theta_t , w_t, prior_dict, t)

    while t < N_iter:

        eps_t = np.percentile(rhos, 75)
        print 'New Distance Threshold Eps_t = ', eps_t

        theta_t_1 = theta_t.copy()
        w_t_1 = w_t.copy()
        sig_t_1 = sig_t.copy()

        args_list = [[i, prior_obj, theta_t_1, w_t_1, sig_t_1, eps_t]
                     for i in xrange(N_particles)]

        if N_threads > 1:
            pool = InterruptiblePool(processes=N_threads)
            mapfn = pool.map
            results = mapfn(importance_pool_sampling, args_list)
            pool.close()
            pool.terminate()
            pool.join()
        else:
            results = []
            for args in args_list:
                pool_sample = importance_pool_sampling(args)
                results.append(pool_sample)

        results = np.array(results).T
        theta_t = results[1:prior_obj.n_params + 1, :]
        w_t = results[prior_obj.n_params + 1, :]
        rhos = results[prior_obj.n_params + 2, :]

        sig_t = covariance(theta_t, w_t)

        t += 1

        plot_thetas(theta_t, w_t, prior_dict, t)
Ejemplo n.º 8
0
def pmc_abc(prior_dict, N_particles=100, N_iter=30, eps0=20.0, N_threads = 1): 
    """
    """
    prior_obj = Prior(prior_dict)

    # initial pool
    theta_t, w_t, rhos, sig_t = initial_pool(prior_obj, eps0, N_particles, N_threads=N_threads)
    t = 0 # iternation number
    
    #plot_thetas(theta_t , w_t, prior_dict, t)
    
    while t < N_iter: 
        
        eps_t = np.percentile(rhos, 75)
        print 'New Distance Threshold Eps_t = ', eps_t
        
        theta_t_1 = theta_t.copy()
        w_t_1 = w_t.copy()
        sig_t_1 = sig_t.copy()
    

        args_list = [[i, prior_obj, theta_t_1, w_t_1, sig_t_1, eps_t] for i in xrange(N_particles)]

        if N_threads > 1: 
            pool = InterruptiblePool(processes = N_threads)
            mapfn = pool.map
            results = mapfn(importance_pool_sampling, args_list)
            pool.close()
            pool.terminate()
            pool.join()
        else: 
            results = [] 
            for args in args_list: 
                pool_sample = importance_pool_sampling(args)
                results.append( pool_sample )
                 
        results = np.array(results).T
        theta_t = results[1:prior_obj.n_params+1,:]
        w_t = results[prior_obj.n_params+1,:]
        rhos = results[prior_obj.n_params+2,:]

        sig_t = covariance(theta_t , w_t)
        
        t += 1
        
        plot_thetas(theta_t , w_t, prior_dict, t)
Ejemplo n.º 9
0
def pmc_abc(N_threads = N_threads): 
    
    # initial pool
    theta_t, w_t, rhos, sig_t = initial_pool()
    w_t = w_t/np.sum(w_t)
    t = 0 # iternation number
    
    plot_thetas(theta_t , w_t, t)
    
    
    while t < N_iter: 
        if t < 4 :
           eps_t = np.percentile(np.atleast_2d(rhos), 20, axis=1)
        else:
           eps_t = np.percentile(np.atleast_2d(rhos), 50, axis=1)
        print 'New Distance Threshold Eps_t = ', eps_t , "t=" , t
        
        theta_t_1 = theta_t.copy()
        w_t_1 = w_t.copy()
        sig_t_1 = sig_t.copy()
    

        args_list = [[i, theta_t_1, w_t_1, sig_t_1, eps_t] for i in xrange(N_particles)]
        """serial"""
        results = [] 
        #for args in args_list: 
        #    pool_sample = importance_pool_sampling(args)
        #    results.append( pool_sample )
        """parallel"""
        pool = InterruptiblePool(processes = N_threads)
        mapfn = pool.map
        results = mapfn(importance_pool_sampling, args_list)
        pool.close()
        pool.terminate()
        pool.join()
        
        results = np.array(results).T
        theta_t = results[1:n_params+1,:]
        w_t = results[n_params+1,:]
        w_t = w_t/np.sum(w_t)
        rhos = results[n_params+2:,:]
        #sig_t = knn_sigma(theta_t , k = 10)
        sig_t = 2. * covariance(theta_t , w_t) 
        t += 1
        
        plot_thetas(theta_t, w_t , t)
Ejemplo n.º 10
0
def pmc_abc(N_threads=N_threads):

    # initial pool
    theta_t, w_t, rhos, sig_t = initial_pool()
    w_t = w_t / np.sum(w_t)
    t = 0  # iternation number

    plot_thetas(theta_t, w_t, t)

    while t < N_iter:
        if t < 4:
            eps_t = np.percentile(np.atleast_2d(rhos), 20, axis=1)
        else:
            eps_t = np.percentile(np.atleast_2d(rhos), 50, axis=1)
        print 'New Distance Threshold Eps_t = ', eps_t, "t=", t

        theta_t_1 = theta_t.copy()
        w_t_1 = w_t.copy()
        sig_t_1 = sig_t.copy()

        args_list = [[i, theta_t_1, w_t_1, sig_t_1, eps_t]
                     for i in xrange(N_particles)]
        """serial"""
        results = []
        #for args in args_list:
        #    pool_sample = importance_pool_sampling(args)
        #    results.append( pool_sample )
        """parallel"""
        pool = InterruptiblePool(processes=N_threads)
        mapfn = pool.map
        results = mapfn(importance_pool_sampling, args_list)
        pool.close()
        pool.terminate()
        pool.join()

        results = np.array(results).T
        theta_t = results[1:n_params + 1, :]
        w_t = results[n_params + 1, :]
        w_t = w_t / np.sum(w_t)
        rhos = results[n_params + 2:, :]
        #sig_t = knn_sigma(theta_t , k = 10)
        sig_t = 2. * covariance(theta_t, w_t)
        t += 1

        plot_thetas(theta_t, w_t, t)
Ejemplo n.º 11
0
def initial_pool():

    pool = InterruptiblePool(processes = N_threads)
    mapfn = pool.map
    args_list = [i for i in xrange(N_particles)]
    results = mapfn(initial_pool_sampling, args_list)
    
    pool.close()
    pool.terminate()
    pool.join()
    
    results = np.array(results).T
    theta_t = results[1:n_params+1,:]
    w_t = results[n_params+1,:]
    rhos = results[n_params+2,:]
    sig_t = np.cov(theta_t)
    
    return theta_t, w_t, rhos, sig_t
Ejemplo n.º 12
0
def initial_pool():

    pool = InterruptiblePool(processes=N_threads)
    mapfn = pool.map
    args_list = [i for i in xrange(N_particles)]
    results = mapfn(initial_pool_sampling, args_list)

    pool.close()
    pool.terminate()
    pool.join()

    results = np.array(results).T
    theta_t = results[1 : n_params + 1, :]
    w_t = results[n_params + 1, :]
    rhos = results[n_params + 2, :]
    sig_t = np.cov(theta_t)

    return theta_t, w_t, rhos, sig_t
Ejemplo n.º 13
0
def parallel_bulkfit(path, num_splits=20, ncores=8, start_pt=0):
    '''
    Run bulk fitting in parallel. Results are outputted in chunks to make
    restarting easier.
    '''

    spectra = [f for f in os.listdir(path) if f[-4:] == 'fits']

    split_at = len(spectra) / num_splits

    splits = [split_at*i for i in range(1, num_splits)]
    splits.append(len(spectra))

    splits = splits[start_pt:]

    prev_split = 0

    for i, split in enumerate(splits):

        print("On split " + str(i+1) + " of " + str(len(splits)))
        print(str(datetime.now()))

        split_spectra = spectra[prev_split:split]

        pool = Pool(processes=ncores)

        output = pool.map(do_specfit, split_spectra)

        pool.close()
        pool.join()

        df = DataFrame(output[0], columns=split_spectra[:1])

        for out, spec in zip(output[1:], split_spectra[1:]):
            df[spec[:-5]] = out

        df.to_csv("spectral_fitting_"+str(i+1)+".csv")

        prev_split = split
Ejemplo n.º 14
0
def initial_pool():

    args_list = np.arange(N_particles)
    """serial"""
    #results = []
    #for arg in args_list:
    #    results.append(initial_pool_sampling(arg))
    """parallel"""
    pool = InterruptiblePool(processes=N_threads)
    mapfn = pool.map
    results = mapfn(initial_pool_sampling, args_list)
    pool.close()
    pool.terminate()
    pool.join()

    results = np.array(results).T
    theta_t = results[1:n_params + 1, :]
    w_t = results[n_params + 1, :]
    w_t = w_t / np.sum(w_t)
    rhos = results[n_params + 2:, :]
    sig_t = covariance(theta_t, w_t)
    return theta_t, w_t, rhos, sig_t
Ejemplo n.º 15
0
def initial_pool():

    args_list = np.arange(N_particles)
    """serial"""
    #results = [] 
    #for arg in args_list:  	
    #    results.append(initial_pool_sampling(arg))
    """parallel"""
    pool = InterruptiblePool(processes = N_threads)
    mapfn = pool.map
    results = mapfn(initial_pool_sampling, args_list)
    pool.close()
    pool.terminate()
    pool.join()
    
    results = np.array(results).T
    theta_t = results[1:n_params+1,:]
    w_t = results[n_params+1,:]
    w_t = w_t / np.sum(w_t)
    rhos = results[n_params+2:,:]
    sig_t = covariance(theta_t , w_t)  
    return theta_t, w_t, rhos, sig_t
Ejemplo n.º 16
0
def parallel_bulkfit(path, num_splits=20, ncores=8, start_pt=0):
    '''
    Run bulk fitting in parallel. Results are outputted in chunks to make
    restarting easier.
    '''

    spectra = [f for f in os.listdir(path) if f[-4:] == 'fits']

    split_at = len(spectra) / num_splits

    splits = [split_at * i for i in range(1, num_splits)]
    splits.append(len(spectra))

    splits = splits[start_pt:]

    prev_split = 0

    for i, split in enumerate(splits):

        print("On split " + str(i + 1) + " of " + str(len(splits)))
        print(str(datetime.now()))

        split_spectra = spectra[prev_split:split]

        pool = Pool(processes=ncores)

        output = pool.map(do_specfit, split_spectra)

        pool.close()
        pool.join()

        df = DataFrame(output[0], columns=split_spectra[:1])

        for out, spec in zip(output[1:], split_spectra[1:]):
            df[spec[:-5]] = out

        df.to_csv("spectral_fitting_" + str(i + 1) + ".csv")

        prev_split = split
Ejemplo n.º 17
0
    def pmc_abc(self): 
        """
        """
        self.rhos = self.initial_pool()

        while self.t < self.T: 
            self.eps_t = np.percentile(self.rhos, 75)

            print 'Epsilon t', self.eps_t

            self.theta_t_1 = self.theta_t.copy()

            self.w_t_1 = self.w_t.copy()

            self.sig_t_1 = self.sig_t.copy()

            pool = InterruptiblePool(self.Nthreads)
            mapfn = pool.map
            args_list = [ i for i in xrange(self.N) ] 
            results = mapfn(unwrap_self_importance_sampling, zip([self]*len(args_list), args_list))

            pool.close()
            pool.terminate()
            pool.join()

            pars = np.array(results).T
            self.theta_t = pars[1:self.n_params+1,:].copy()
            self.w_t     = pars[self.n_params+1,:].copy()
            self.rhos    = pars[self.n_params+2,:].copy()

            self.sig_t = 2.0 * np.cov(self.theta_t)
            self.t += 1 

            self.writeout()
            self.plotout()

        return None
Ejemplo n.º 18
0
        if zstr == 'z': 
            zbool = True
        elif zstr == 'real': 
            zbool = False
        else: 
            raise ValueError
        nthreads = int(Sys.argv[7])

        if nthreads > 1: 
            args_list = [(mneut, ireal, nzbin, zbool) for ireal in np.arange(nreal_i, nreal_f+1)]
            pool = Pewl(processes=nthreads)
            mapfn = pool.map
            results = mapfn(_NeutHalo_pre3PCF, args_list) 
            pool.close()
            pool.terminate()
            pool.join()
        else:
            for ireal in range(nreal_i, nreal_f+1):  
                NeutHalo_pre3PCF(mneut, ireal, nzbin, zspace=zbool) 
    elif arg1 == 'plk': 
        nreal = int(Sys.argv[3])
        nzbin = int(Sys.argv[4])
        zstr = Sys.argv[5]
        if zstr == 'z': 
            zbool = True
        elif zstr == 'real': 
            zbool = False
        else: 
            raise ValueError
        NeutHalo_Plk(mneut, nreal, nzbin, zspace=zbool)
    else: 
Ejemplo n.º 19
0
def build_multipro(type, catalog_name, corr_name, n_mocks, Nthreads=8, ell=2, Ngrid=360, **kwargs): 
    ''' 
    Calculate dLOS for catalogs in parallel using interruptible
    pool, which is multiprocessing pool that allows for interrputions

    Parameters
    ----------
    catalog_name : Name of catalog 
    corr_name : Name of correction
    n_mocks : Number of mock catalogs to calculate 
    Nthreads : Number of CPUs to use 

    '''
    
    if isinstance(n_mocks, list): 
        n_mock_list = n_mocks
    else:
        n_mock_list = range(1, n_mocks + 1)

    corrdict = {} 
    if catalog_name == 'nseries':
        
        if isinstance(corr_name, dict): 
            corrdict = corr_name
        else:
            corrdict['name'] = corr_name

            if 'dlospeak' in corr_name: 
                # hardcoded values for bestfit dlos peak
                # parameters
                corrdict['fit'] = 'gauss'
                corrdict['sigma'] = 3.9
                corrdict['fpeak'] = 0.68

            if 'env' in corr_name: 
                # hardcoded values for galaxy environment
                # parameters
                corrdict['n_NN'] = 5

            if 'photoz' in corr_name: 

                corrdict['d_photoz_tail_cut'] = 15 

            if corr_name == 'fourier_tophat': 
                corrdict['fs'] = 1.0 
                corrdict['rc'] = 0.43 
                corrdict['k_fit'] = 0.7 
                corrdict['k_fixed'] = 0.84
    
    if type == 'bk':
        arglist = [ 
                [{ 
                    'catalog': {'name': catalog_name, 'n_mock': i_mock}, 
                    'spec': {'P0': 20000, 'Lbox': 3600, 'Ngrid': Ngrid} 
                    }, kwargs]
                for i_mock in n_mock_list]
    else:
        arglist = [ [{
                    'catalog': {'name': catalog_name, 'n_mock': i_mock}, 
                    'correction': corrdict, 
                    'spec': {
                        'P0': 20000, #P0 
                        'Lbox': 3600, 
                        'Ngrid': Ngrid, 
                        'ell': ell 
                        }

                    }, ell, kwargs]
                for i_mock in n_mock_list
                ]
    
    if Nthreads > 1: 
        pool = Pewl(processes=Nthreads)
        mapfn = pool.map
    
        if type == 'data': 
            mapfn( build_corrdata_wrapper, [arg for arg in arglist])
        elif type == 'pk': 
            mapfn( build_pk_wrapper, [arg for arg in arglist])
        elif type == 'bk': 
            mapfn( build_bk_wrapper, [arg for arg in arglist])

        pool.close()
        pool.terminate()
        pool.join() 
    else: 
        for arg in arglist: 
            if type == 'data': 
                build_corrdata_wrapper(arg)
            elif type == 'pk': 
                build_pk_wrapper(arg)
            elif type == 'bk': 
                build_bk_wrapper(arg)

    return None