Example #1
0
def init_folder(working_path, sub_folder):
  
  working_folder = os.path.join(working_path, sub_folder)
  if (not os.path.isdir(working_folder)):
      os.makedirs(working_folder)
      
  #arg_file = os.path.join(working_path, 'arg.in')
  #shutil.copy(arg_file, os.path.join(working_folder,''))
  #bodies_file = os.path.join(working_path, 'bodies.lst')
  #shutil.copy(bodies_file, os.path.join(working_folder,''))
  #obd = open(bodies_file, 'r')
  #for line in obd.readlines():
    #shutil.copy(os.path.join(working_path, line.strip().split()[0]), os.path.join(working_folder,''))
  #obd.close()
  #t0files = glob.glob(os.path.join(working_path,'NB*_observations.dat'))
  #for t0f in t0files:
    #shutil.copy(t0f, os.path.join(working_folder,''))
  #if(os.path.exists(os.path.join(working_path,'obsRV.dat'))):
    #shutil.copy(os.path.join(working_path,'obsRV.dat'), os.path.join(working_folder,''))
  
  # copy files
  anc.copy_simulation_files(working_path, working_folder)
  
  run_log = os.path.join(working_folder, "trades_run.log")
  of_run = open(run_log, 'w')
  anc.print_both("# pyTRADES LOG FILE", of_run)
  anc.print_both("# working_path = %s" %(working_path), of_run)
  anc.print_both("# working_folder = %s" %(working_folder), of_run)
  anc.print_both("# run_log = %s" %(run_log), of_run)
  
  return working_folder, run_log, of_run
Example #2
0
def init_folder(working_path, sub_folder):
  working_folder = os.path.join(working_path, sub_folder)
  if (not os.path.isdir(working_folder)):
      os.makedirs(working_folder)
  # copy files
  anc.copy_simulation_files(working_path, working_folder)
  
  run_log = os.path.join(working_folder, "trades_run.log")
  of_run = open(run_log, 'w')
  anc.print_both("# pyTRADES LOG FILE", of_run)
  anc.print_both("# working_path = %s" %(working_path), of_run)
  anc.print_both("# working_folder = %s" %(working_folder), of_run)
  anc.print_both("# run_log = %s" %(run_log), of_run)
  
  return working_folder, run_log, of_run
Example #3
0
def init_folder(working_path, sub_folder):
    working_folder = os.path.join(working_path, sub_folder)
    if (not os.path.isdir(working_folder)):
        os.makedirs(working_folder)
    # copy files
    anc.copy_simulation_files(working_path, working_folder)

    run_log = os.path.join(working_folder, "trades_run.log")
    of_run = open(run_log, 'w')
    anc.print_both("# pyTRADES LOG FILE", of_run)
    anc.print_both("# working_path = %s" % (working_path), of_run)
    anc.print_both("# working_folder = %s" % (working_folder), of_run)
    anc.print_both("# run_log = %s" % (run_log), of_run)

    return working_folder, run_log, of_run
Example #4
0
def main():

    # MAIN -- TRADES + EMCEE
    # READ COMMAND LINE ARGUMENTS
    cli = get_args()

    # STARTING TIME
    start = time.time()

    # RENAME
    working_path = cli.full_path
    nthreads = cli.nthreads

    # INITIALISE TRADES WITH SUBROUTINE WITHIN TRADES_LIB -> PARAMETER NAMES, MINMAX, INTEGRATION ARGS, READ DATA ...
    pytrades_lib.pytrades.initialize_trades(working_path, cli.sub_folder,
                                            nthreads)

    # RETRIEVE DATA AND VARIABLES FROM TRADES_LIB MODULE

    #global n_bodies, n_planets, ndata, npar, nfit, dof, inv_dof
    n_bodies = pytrades_lib.pytrades.n_bodies  # NUMBER OF TOTAL BODIES OF THE SYSTEM
    n_planets = n_bodies - 1  # NUMBER OF PLANETS IN THE SYSTEM
    ndata = pytrades_lib.pytrades.ndata  # TOTAL NUMBER OF DATA AVAILABLE
    npar = pytrades_lib.pytrades.npar  # NUMBER OF TOTAL PARAMATERS ~n_planets X 6
    nfit = pytrades_lib.pytrades.nfit  # NUMBER OF PARAMETERS TO FIT
    nfree = pytrades_lib.pytrades.nfree  # NUMBER OF FREE PARAMETERS (ie nrvset)
    dof = pytrades_lib.pytrades.dof  # NUMBER OF DEGREES OF FREEDOM = NDATA - NFIT
    #inv_dof = np.float64(1.0 / dof)
    inv_dof = pytrades_lib.pytrades.inv_dof

    # READ THE NAMES OF THE PARAMETERS FROM THE TRADES_LIB AND CONVERT IT TO PYTHON STRINGS
    #trades_names = anc.convert_fortran2python_strarray(pytrades_lib.pytrades.parameter_names,
    #nfit, str_len=10
    #)
    ##parameter_names = anc.trades_names_to_emcee(trades_names)
    str_len = pytrades_lib.pytrades.str_len
    temp_names = pytrades_lib.pytrades.get_parameter_names(nfit, str_len)
    trades_names = anc.convert_fortran_charray2python_strararray(temp_names)
    parameter_names = trades_names

    fitting_parameters = pytrades_lib.pytrades.fitting_parameters  # INITIAL PARAMETER SET (NEEDED ONLY TO HAVE THE PROPER ARRAY/VECTOR)
    parameters_minmax = pytrades_lib.pytrades.parameters_minmax  # PARAMETER BOUNDARIES
    delta_parameters = np.abs(
        parameters_minmax[:, 1] -
        parameters_minmax[:, 0])  # DELTA BETWEEN MAX AND MIN OF BOUNDARIES

    # RADIAL VELOCITIES SET
    n_rv = pytrades_lib.pytrades.nrv
    n_set_rv = pytrades_lib.pytrades.nrvset

    # TRANSITS SET
    n_t0 = pytrades_lib.pytrades.nt0
    n_t0_sum = pytrades_lib.pytrades.ntts
    n_set_t0 = 0
    for i in range(0, n_bodies - 1):
        if (n_t0[i] > 0): n_set_t0 += 1

    # compute global constant for the loglhd
    global ln_err_const

    #try:
    #e_RVo = np.asarray(pytrades_lib.pytrades.ervobs[:], dtype=np.float64) # fortran variable RV in python will be rv!!!
    #except:
    #e_RVo = np.asarray([0.], dtype=np.float64)
    #try:
    #e_T0o = np.asarray(pytrades_lib.pytrades.et0obs[:,:], dtype=np.float64).reshape((-1))
    #except:
    #e_T0o = np.asarray([0.], dtype=np.float64)
    #ln_err_const = anc.compute_ln_err_const(ndata, dof, e_RVo, e_T0o, cli.ln_flag)
    ln_err_const = pytrades_lib.pytrades.ln_err_const

    # SET EMCEE PARAMETERS:
    nwalkers, nruns, nsave, npost = get_emcee_arguments(cli, nfit)

    # INITIALISE SCRIPT FOLDER/LOG FILE
    working_folder, run_log, of_run = init_folder(working_path, cli.sub_folder)

    anc.print_both('', of_run)
    anc.print_both(' ======== ', of_run)
    anc.print_both(' pyTRADES', of_run)
    anc.print_both(' ======== ', of_run)
    anc.print_both('', of_run)
    anc.print_both(' WORKING PATH = %s' % (working_path), of_run)
    anc.print_both(' NUMBER OF THREADS = %d' % (nthreads), of_run)
    anc.print_both(
        ' dof = ndata(%d) - nfit(%d) - nfree(%d) = %d' %
        (ndata, nfit, nfree, dof), of_run)
    anc.print_both(' Total N_RV = %d for %d set(s)' % (n_rv, n_set_rv), of_run)
    anc.print_both(
        ' Total N_T0 = %d for %d out of %d planet(s)' %
        (n_t0_sum, n_set_t0, n_planets), of_run)
    anc.print_both(' %s = %.7f' % ('log constant error = ', ln_err_const),
                   of_run)
    anc.print_both(
        ' %s = %.7f' % ('IN FORTRAN log constant error = ',
                        pytrades_lib.pytrades.ln_err_const), of_run)

    # INITIALISE PSO ARGUMENTS FROM pso.opt FILE
    pytrades_lib.pytrades.init_pso(1, working_path)  # read PSO options
    # PSO VARIABLES
    np_pso = pytrades_lib.pytrades.np_pso
    nit_pso = pytrades_lib.pytrades.nit_pso
    n_global = pytrades_lib.pytrades.n_global
    #n_global = 1
    anc.print_both(
        ' PSO n_global = %d npop = %d ngen = %d' % (n_global, np_pso, nit_pso),
        of_run)

    # RUN PSO+EMCEE n_global TIMES
    for iter_global in range(0, n_global):

        threads_pool = emcee.interruptible_pool.InterruptiblePool(1)

        # CREATES PROPER WORKING PATH AND NAME
        i_global = iter_global + 1
        pso_path = os.path.join(
            os.path.join(working_folder, '%04d_pso2emcee' % (i_global)), '')
        pytrades_lib.pytrades.path_change(pso_path)

        anc.print_both(
            '\n\n GLOBAL RUN %04d INTO PATH: %s\n' % (i_global, pso_path),
            of_run)

        if (cli.pso_type == 'run'):
            # RUN PSO
            anc.print_both(' RUN PSO', of_run)

            pso_start = time.time()
            if (not os.path.exists(pso_path)): os.makedirs(pso_path)
            # copy files
            anc.copy_simulation_files(working_path, pso_path)

            # CALL RUN_PSO SUBROUTINE FROM TRADES_LIB: RUNS PSO AND COMPUTES THE BEST SOLUTION, SAVING ALL THE POPULATION EVOLUTION
            pso_parameters = fitting_parameters.copy()
            pso_fitness = 0.
            pso_parameters, pso_fitness = pytrades_lib.pytrades.pyrun_pso(
                nfit, i_global)
            anc.print_both(' completed run_pso', of_run)

            pso_best_evolution = np.asarray(
                pytrades_lib.pytrades.pso_best_evolution[...],
                dtype=np.float64)
            anc.print_both(' pso_best_evolution retrieved', of_run)

            anc.print_both(' last pso_best_evolution', of_run)
            last_pso_parameters = np.asarray(pso_best_evolution[:nfit, -1],
                                             dtype=np.float64)
            last_pso_fitness = pso_best_evolution[-1, -1].astype(np.float64)
            anc.print_both(' fitness = %.f' % (last_pso_fitness), of_run)

            # SAVE PSO SIMULATION IN pso_run.hdf5 FILE
            print ' Creating pso hdf5 file: %s' % (os.path.join(
                pso_path, 'pso_run.hdf5'))
            pso_hdf5 = h5py.File(os.path.join(pso_path, 'pso_run.hdf5'), 'w')
            pso_hdf5.create_dataset('population',
                                    data=pytrades_lib.pytrades.population,
                                    dtype=np.float64)
            pso_hdf5.create_dataset(
                'population_fitness',
                data=pytrades_lib.pytrades.population_fitness,
                dtype=np.float64)
            pso_hdf5.create_dataset('pso_parameters',
                                    data=pso_parameters,
                                    dtype=np.float64)
            pso_hdf5.create_dataset('pso_fitness',
                                    data=np.array(pso_fitness),
                                    dtype=np.float64)
            pso_hdf5.create_dataset('pso_best_evolution',
                                    data=pso_best_evolution,
                                    dtype=np.float64)
            pso_hdf5.create_dataset('parameters_minmax',
                                    data=parameters_minmax,
                                    dtype=np.float64)
            pso_hdf5.create_dataset('parameter_names',
                                    data=parameter_names,
                                    dtype='S10')
            pso_hdf5['population'].attrs['npop'] = np_pso
            pso_hdf5['population'].attrs['niter'] = nit_pso
            pso_hdf5['population'].attrs['iter_global'] = iter_global + 1
            pso_hdf5['population'].attrs['nfit'] = nfit
            pso_hdf5.close()

            population = np.asarray(pytrades_lib.pytrades.population,
                                    dtype=np.float64)
            population_fitness = np.asarray(
                pytrades_lib.pytrades.population_fitness, dtype=np.float64)

            anc.print_both(' ', of_run)
            fitness_iter, lgllhd_iter, check_iter = pytrades_lib.pytrades.write_summary_files(
                i_global, pso_parameters)
            elapsed = time.time() - pso_start
            elapsed_d, elapsed_h, elapsed_m, elapsed_s = anc.computation_time(
                elapsed)
            anc.print_both(' ', of_run)
            anc.print_both(
                ' PSO FINISHED in %2d day %02d hour %02d min %.2f sec - bye bye'
                % (int(elapsed_d), int(elapsed_h), int(elapsed_m), elapsed_s),
                of_run)

            #p0, pso_fitness_p0 = pso_to_emcee(nfit, nwalkers, population, population_fitness, pso_parameters, pso_fitness, pso_best_evolution)
            p0 = compute_initial_walkers(nfit, nwalkers, pso_parameters,
                                         parameters_minmax, parameter_names,
                                         delta_sigma, of_run)

        elif (cli.pso_type == 'exists'):
            # READ PREVIOUS PSO_RUN.HDF5 FILE AND INITIALISE POPULATION FOR EMCEE
            anc.print_both(
                ' READ PREVIOUS PSO_RUN.HDF5 FILE AND INITIALISE POPULATION FOR EMCEE',
                of_run)

            population, population_fitness, pso_parameters, pso_fitness, pso_best_evolution, pso_parameters_minmax, pso_parameter_names, pop_shape = get_pso_data(
                os.path.join(pso_path, 'pso_run.hdf5'))

            fitness_iter, lgllhd_iter, check_iter = pytrades_lib.pytrades.write_summary_files(
                i_global, pso_parameters)

            anc.print_both(
                ' read pso_run.hdf5 file with best pso_fitness = %.7f' %
                (pso_fitness), of_run)

            #p0, pso_fitness_p0 = pso_to_emcee(nfit, nwalkers, population, population_fitness, pso_parameters, pso_fitness, pso_best_evolution)
            p0 = compute_initial_walkers(nfit, nwalkers, pso_parameters,
                                         parameters_minmax, parameter_names,
                                         delta_sigma, of_run)

        elif (cli.pso_type == 'skip'):
            # DO NOT RUN PSO, ONLY EMCEE
            anc.print_both(' DO NOT RUN PSO, ONLY EMCEE', of_run)

            #p0 = [parameters_minmax[:,0] + np.random.random(nfit)*delta_parameters for i in range(0, nwalkers)]
            p0 = compute_initial_walkers(nfit, nwalkers, fitting_parameters,
                                         parameters_minmax, parameter_names,
                                         delta_sigma, of_run)

        anc.print_both(
            ' emcee chain: nwalkers = %d nruns = %d' % (nwalkers, nruns),
            of_run)
        anc.print_both(' sampler ... ', of_run)
        # old version with threads
        #sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob, threads=nthreads)

        #sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob_sq, threads=nthreads, args=[parameter_names])

        # close the pool of threads
        threads_pool.close()
        threads_pool.terminate()
        threads_pool.join()

        threads_pool = emcee.interruptible_pool.InterruptiblePool(nthreads)
        sampler = emcee.EnsembleSampler(nwalkers,
                                        nfit,
                                        lnprob,
                                        pool=threads_pool)

        anc.print_both(' ready to go', of_run)
        anc.print_both(' with nsave = %r' % (nsave), of_run)
        sys.stdout.flush()

        #sys.exit()

        if (nsave != False):
            # save temporary sampling during emcee every nruns*10%
            #if(os.path.exists(os.path.join(pso_path, 'emcee_temp.hdf5')) and os.path.isfile(os.path.join(pso_path, 'emcee_temp.hdf5'))):
            #os.remove(os.path.join(pso_path, 'emcee_temp.hdf5'))
            if (os.path.exists(os.path.join(pso_path, 'emcee_summary.hdf5'))
                    and os.path.isfile(
                        os.path.join(pso_path, 'emcee_summary.hdf5'))):
                os.remove(os.path.join(pso_path, 'emcee_summary.hdf5'))
            f_hdf5 = h5py.File(os.path.join(pso_path, 'emcee_summary.hdf5'),
                               'a')
            f_hdf5.create_dataset('parameter_names',
                                  data=parameter_names,
                                  dtype='S10')
            f_hdf5.create_dataset('boundaries',
                                  data=parameters_minmax,
                                  dtype=np.float64)
            temp_dset = f_hdf5.create_dataset('chains',
                                              (nwalkers, nruns, nfit),
                                              dtype=np.float64)
            f_hdf5['chains'].attrs['nwalkers'] = nwalkers
            f_hdf5['chains'].attrs['nruns'] = nruns
            f_hdf5['chains'].attrs['nfit'] = nfit
            f_hdf5['chains'].attrs['nfree'] = nfree
            temp_lnprob = f_hdf5.create_dataset('lnprobability',
                                                (nwalkers, nruns),
                                                dtype=np.float64)
            temp_lnprob.attrs['ln_err_const'] = ln_err_const
            temp_acceptance = f_hdf5.create_dataset('acceptance_fraction',
                                                    data=np.zeros((nfit)),
                                                    dtype=np.float64)
            temp_acor = f_hdf5.create_dataset('autocor_time',
                                              data=np.zeros((nfit)),
                                              dtype=np.float64)
            f_hdf5.close()
            pos = p0
            niter_save = int(nruns / nsave)
            state = None
            anc.print_both(' Running emcee with temporary saving', of_run)
            sys.stdout.flush()

            for i in range(0, niter_save):
                anc.print_both('', of_run)
                anc.print_both(' iter: %6d ' % (i + 1), of_run)
                aaa = i * nsave
                bbb = aaa + nsave
                pos, prob, state = sampler.run_mcmc(pos,
                                                    N=nsave,
                                                    rstate0=state)
                anc.print_both('completed %d steps of %d' % (bbb, nruns),
                               of_run)
                f_hdf5 = h5py.File(
                    os.path.join(pso_path, 'emcee_summary.hdf5'), 'a')
                temp_dset = f_hdf5['chains']  #[:,:,:]
                temp_dset[:, aaa:bbb, :] = sampler.chain[:, aaa:bbb, :]
                temp_dset.attrs['completed_steps'] = bbb

                temp_lnprob = f_hdf5['lnprobability']  #[:,:]
                temp_lnprob[:, aaa:bbb] = sampler.lnprobability[:, aaa:bbb]
                shape_lnprob = sampler.lnprobability.shape

                acceptance_fraction = sampler.acceptance_fraction
                temp_acceptance = f_hdf5['acceptance_fraction']
                temp_acceptance = acceptance_fraction
                #f_hdf5.create_dataset('acceptance_fraction', data=acceptance_fraction, dtype=np.float64)
                mean_acceptance_fraction = np.mean(acceptance_fraction)

                #temp_chains_T = np.zeros((bbb, nwalkers, nfit))
                #for ifit in range(0,nfit):
                #temp_chains_T[:,:,ifit] = sampler.chain[:, :bbb, ifit].T
                #acor_time = anc.compute_autocor_time(temp_chains_T, walkers_transposed=True)
                acor_time = anc.compute_acor_time(sampler, steps_done=bbb)
                temp_acor = f_hdf5['autocor_time']
                temp_acor[...] = acor_time

                #f_hdf5.create_dataset('autocor_time', data=np.array(acor_temp, dtype=np.float64), dtype=np.float64)
                #f_hdf5.create_dataset('autocor_time', data=np.array(sampler.acor, dtype=np.float64), dtype=np.float64) # not working
                #print 'aaa = %6d bbb = %6d -> sampler.lnprobability.shape = (%6d , %6d)' %(aaa, bbb, shape_lnprob[0], shape_lnprob[1])
                f_hdf5.close()
                sys.stdout.flush()

            anc.print_both('', of_run)
            anc.print_both(
                '...done with saving temporary total shape = %s' %
                (str(np.shape(sampler.chain))), of_run)
            anc.print_both('', of_run)
            sys.stdout.flush()

        else:
            # GOOD COMPLETE SINGLE RUNNING OF EMCEE, WITHOUT REMOVING THE BURN-IN
            anc.print_both(' Running full emcee ...', of_run)
            sys.stdout.flush()
            sampler.run_mcmc(p0, nruns)
            anc.print_both('done', of_run)
            anc.print_both('', of_run)
            sys.stdout.flush()
            flatchains = sampler.chain[:, :, :].reshape(
                (nwalkers * nruns, nfit))  # full chain values
            acceptance_fraction = sampler.acceptance_fraction
            mean_acceptance_fraction = np.mean(acceptance_fraction)
            #autocor_time = sampler.acor
            #temp_chains_T = np.zeros((bbb, nwalkers, nfit))
            #for ifit in range(0,nfit):
            #temp_chains_T[:,:,ifit] = sampler.chain[:, :, ifit].T
            #acor_time = anc.compute_autocor_time(temp_chains_T, walkers_transposed=True)
            acor_time = anc.compute_acor_time(sampler)
            lnprobability = sampler.lnprobability
            # save chains with original shape as hdf5 file
            f_hdf5 = h5py.File(os.path.join(pso_path, 'emcee_summary.hdf5'),
                               'w')
            f_hdf5.create_dataset('chains',
                                  data=sampler.chain,
                                  dtype=np.float64)
            f_hdf5['chains'].attrs['nwalkers'] = nwalkers
            f_hdf5['chains'].attrs['nruns'] = nruns
            f_hdf5['chains'].attrs['nfit'] = nfit
            f_hdf5['chains'].attrs['nfree'] = nfree
            f_hdf5['chains'].attrs['completed_steps'] = nruns
            f_hdf5.create_dataset('parameter_names',
                                  data=parameter_names,
                                  dtype='S10')
            f_hdf5.create_dataset('boundaries',
                                  data=parameters_minmax,
                                  dtype=np.float64)
            f_hdf5.create_dataset('acceptance_fraction',
                                  data=acceptance_fraction,
                                  dtype=np.float64)
            f_hdf5.create_dataset('autocor_time',
                                  data=acor_time,
                                  dtype=np.float64)
            f_hdf5.create_dataset('lnprobability',
                                  data=lnprobability,
                                  dtype=np.float64)
            f_hdf5['lnprobability'].attrs['ln_err_const'] = ln_err_const
            f_hdf5.close()

        anc.print_both(
            " Mean_acceptance_fraction should be between [0.25-0.5] = %.6f" %
            (mean_acceptance_fraction), of_run)
        anc.print_both('', of_run)

        # close the pool of threads
        threads_pool.close()
        threads_pool.terminate()
        threads_pool.join()

        anc.print_both('COMPLETED EMCEE', of_run)

        elapsed = time.time() - start
        elapsed_d, elapsed_h, elapsed_m, elapsed_s = anc.computation_time(
            elapsed)

        anc.print_both('', of_run)
        anc.print_both(
            ' pyTRADES: EMCEE FINISHED in %2d day %02d hour %02d min %.2f sec - bye bye'
            % (int(elapsed_d), int(elapsed_h), int(elapsed_m), elapsed_s),
            of_run)
        anc.print_both('', of_run)

    of_run.close()
    pytrades_lib.pytrades.deallocate_variables()

    return
Example #5
0
def compute_initial_walkers(nfit, nwalkers, fitting_parameters,
                            parameters_minmax, parameter_names, delta_sigma,
                            of_run):

    # initial walkers as input fitting_parameters + N(loc=0.,sigma=1.,size=nwalkers)*delta_sigma
    #p0 = [parameters_minmax[:,0] + np.random.random(nfit)*delta_parameters for i in range(0, nwalkers)]
    anc.print_both(
        ' Inititializing walkers with delta_sigma = %s' %
        (str(delta_sigma).strip()), of_run)
    p0 = []
    i_p0 = 0

    anc.print_both(' good p0:', of_run)

    # 2017-02-03 LUCA --0--
    try:
        d_sigma = np.float64(delta_sigma)
    except:
        d_sigma = np.float64(1.e-4)
    delta_sigma_out = compute_proper_sigma(nfit, d_sigma, parameter_names)
    print ' ',
    # init all initial walkers
    while True:
        test_p0 = np.array([
            fitting_parameters[ifit] +
            np.random.normal(loc=0., scale=delta_sigma_out[ifit])
            for ifit in range(0, nfit)
        ],
                           dtype=np.float64)
        test_lg = lnprob(test_p0)
        #test_lg = lnprob_sq(test_p0, parameter_names)
        if (not np.isinf(test_lg)):
            i_p0 += 1
            p0.append(test_p0)
            print i_p0,
            if (i_p0 == nwalkers): break
    p0[-1] = fitting_parameters  # I want the original fitting paramameters in the initial walkers
    print
    # if 'random' opt ==> create other Gaussian starting points (<->nwalkers)
    if ('ran' in str(delta_sigma).strip().lower()):
        delta_parameters = np.abs(
            parameters_minmax[:, 1] -
            parameters_minmax[:, 0])  # DELTA BETWEEN MAX AND MIN OF BOUNDARIES
        nw_min = 30
        n_gpts = int(
            (nwalkers - nw_min) / nw_min
        )  # a new Gaussian starting point each nw_min walkers, keeping at least nw_min walkers Gaussian to the original fitting parameters
        print ' new gaussian starting points: ', n_gpts
        if (n_gpts > 0):
            print ' doing random-gaussian points ... '
            for i_gpt in range(0, n_gpts):
                # create new starting point, but check if lnL != -inf
                new_start = fitting_parameters.copy()
                sel_fit = int(np.random.random() *
                              (nfit - 1))  # change only parameter...
                print 'gpt ', i_gpt + 1
                print 'selected sel_fit = ', sel_fit, ' ==> ', parameter_names[
                    sel_fit]
                print 'val = ', new_start[
                    sel_fit], ' with min = ', parameters_minmax[
                        sel_fit, 0], ' and delta = ', delta_parameters[sel_fit]
                while True:
                    new_start[sel_fit] = parameters_minmax[
                        sel_fit,
                        0] + delta_parameters[sel_fit] * np.random.random()
                    test_lg = lnprob(new_start)
                    if (not np.isinf(test_lg)): break
                i_pos = nw_min * i_gpt
                print 'i_pos = ',
                while True:
                    test_p0 = np.array([
                        new_start[ifit] +
                        np.random.normal(loc=0., scale=delta_sigma_out[ifit])
                        for ifit in range(0, nfit)
                    ],
                                       dtype=np.float64)
                    test_lg = lnprob(test_p0)
                    if (not np.isinf(test_lg)):
                        p0[i_pos] = test_p0
                        print i_pos,
                        i_pos += 1
                        if (i_pos % nw_min == 0): break
            print
        print

    anc.print_both(' done initial walkers.', of_run)

    return p0
Example #6
0
def main():
  
  # MAIN -- TRADES + EMCEE
  # READ COMMAND LINE ARGUMENTS
  cli = get_args()

  # STARTING TIME
  start = time.time()

  # RENAME 
  working_path = cli.full_path
  nthreads = cli.nthreads

  # INITIALISE TRADES WITH SUBROUTINE WITHIN TRADES_LIB -> PARAMETER NAMES, MINMAX, INTEGRATION ARGS, READ DATA ...
  pytrades_lib.pytrades.initialize_trades(working_path, cli.sub_folder, nthreads)

  # RETRIEVE DATA AND VARIABLES FROM TRADES_LIB MODULE
  
  #global n_bodies, n_planets, ndata, npar, nfit, dof, inv_dof
  n_bodies = pytrades_lib.pytrades.n_bodies # NUMBER OF TOTAL BODIES OF THE SYSTEM
  n_planets = n_bodies - 1 # NUMBER OF PLANETS IN THE SYSTEM
  ndata = pytrades_lib.pytrades.ndata # TOTAL NUMBER OF DATA AVAILABLE
  npar  = pytrades_lib.pytrades.npar # NUMBER OF TOTAL PARAMATERS ~n_planets X 6
  nfit  = pytrades_lib.pytrades.nfit # NUMBER OF PARAMETERS TO FIT
  nfree  = pytrades_lib.pytrades.nfree # NUMBER OF FREE PARAMETERS (ie nrvset)
  dof   = pytrades_lib.pytrades.dof # NUMBER OF DEGREES OF FREEDOM = NDATA - NFIT
  #inv_dof = np.float64(1.0 / dof)
  inv_dof = pytrades_lib.pytrades.inv_dof
  
  # READ THE NAMES OF THE PARAMETERS FROM THE TRADES_LIB AND CONVERT IT TO PYTHON STRINGS
  #trades_names = anc.convert_fortran2python_strarray(pytrades_lib.pytrades.parameter_names,
                                                     #nfit, str_len=10
                                                    #)
  ##parameter_names = anc.trades_names_to_emcee(trades_names)
  str_len = pytrades_lib.pytrades.str_len
  temp_names = pytrades_lib.pytrades.get_parameter_names(nfit,str_len)
  trades_names = anc.convert_fortran_charray2python_strararray(temp_names)
  parameter_names = trades_names
  
  fitting_parameters = pytrades_lib.pytrades.fitting_parameters # INITIAL PARAMETER SET (NEEDED ONLY TO HAVE THE PROPER ARRAY/VECTOR)
  parameters_minmax = pytrades_lib.pytrades.parameters_minmax # PARAMETER BOUNDARIES
  delta_parameters = np.abs(parameters_minmax[:,1] - parameters_minmax[:,0]) # DELTA BETWEEN MAX AND MIN OF BOUNDARIES

  # RADIAL VELOCITIES SET
  n_rv = pytrades_lib.pytrades.nrv
  n_set_rv = pytrades_lib.pytrades.nrvset

  # TRANSITS SET
  n_t0 = pytrades_lib.pytrades.nt0
  n_t0_sum = pytrades_lib.pytrades.ntts
  n_set_t0 = 0
  for i in range(0, n_bodies-1):
    if (n_t0[i] > 0): n_set_t0 += 1

  # compute global constant for the loglhd
  global ln_err_const

  #try:
    #e_RVo = np.asarray(pytrades_lib.pytrades.ervobs[:], dtype=np.float64) # fortran variable RV in python will be rv!!!
  #except:
    #e_RVo = np.asarray([0.], dtype=np.float64)
  #try:
    #e_T0o = np.asarray(pytrades_lib.pytrades.et0obs[:,:], dtype=np.float64).reshape((-1))
  #except:
    #e_T0o = np.asarray([0.], dtype=np.float64)
  #ln_err_const = anc.compute_ln_err_const(ndata, dof, e_RVo, e_T0o, cli.ln_flag)
  ln_err_const = pytrades_lib.pytrades.ln_err_const

  # SET EMCEE PARAMETERS:
  nwalkers, nruns, nsave, npost = get_emcee_arguments(cli,nfit)

  # INITIALISE SCRIPT FOLDER/LOG FILE
  working_folder, run_log, of_run = init_folder(working_path, cli.sub_folder)

  anc.print_both('',of_run)
  anc.print_both(' ======== ',of_run)
  anc.print_both(' pyTRADES' ,of_run)
  anc.print_both(' ======== ',of_run)
  anc.print_both('',of_run)
  anc.print_both(' WORKING PATH = %s' %(working_path),of_run)
  anc.print_both(' NUMBER OF THREADS = %d' %(nthreads),of_run)
  anc.print_both(' dof = ndata(%d) - nfit(%d) - nfree(%d) = %d' %(ndata, nfit, nfree, dof),of_run)
  anc.print_both(' Total N_RV = %d for %d set(s)' %(n_rv, n_set_rv),of_run)
  anc.print_both(' Total N_T0 = %d for %d out of %d planet(s)' %(n_t0_sum, n_set_t0, n_planets),of_run)
  anc.print_both(' %s = %.7f' %('log constant error = ', ln_err_const),of_run)
  anc.print_both(' %s = %.7f' %('IN FORTRAN log constant error = ', pytrades_lib.pytrades.ln_err_const),of_run)

  # INITIALISE PSO ARGUMENTS FROM pso.opt FILE
  pytrades_lib.pytrades.init_pso(1,working_path) # read PSO options
  # PSO VARIABLES
  np_pso = pytrades_lib.pytrades.np_pso
  nit_pso = pytrades_lib.pytrades.nit_pso
  n_global = pytrades_lib.pytrades.n_global
  #n_global = 1
  anc.print_both(' PSO n_global = %d npop = %d ngen = %d' %(n_global, np_pso, nit_pso), of_run)

  # RUN PSO+EMCEE n_global TIMES
  for iter_global in range(0,n_global):

    threads_pool = emcee.interruptible_pool.InterruptiblePool(1)

    # CREATES PROPER WORKING PATH AND NAME
    i_global = iter_global + 1
    pso_path = os.path.join(os.path.join(working_folder, '%04d_pso2emcee' %(i_global)), '')
    pytrades_lib.pytrades.path_change(pso_path)
    
    anc.print_both('\n\n GLOBAL RUN %04d INTO PATH: %s\n' %(i_global, pso_path), of_run)

    if (cli.pso_type == 'run'):
      # RUN PSO
      anc.print_both(' RUN PSO', of_run)

      pso_start = time.time()
      if(not os.path.exists(pso_path)): os.makedirs(pso_path)
      # copy files
      anc.copy_simulation_files(working_path, pso_path)

      # CALL RUN_PSO SUBROUTINE FROM TRADES_LIB: RUNS PSO AND COMPUTES THE BEST SOLUTION, SAVING ALL THE POPULATION EVOLUTION
      pso_parameters = fitting_parameters.copy()
      pso_fitness = 0.
      pso_parameters, pso_fitness = pytrades_lib.pytrades.pyrun_pso(nfit,i_global)
      anc.print_both(' completed run_pso', of_run)
      
      pso_best_evolution = np.asarray(pytrades_lib.pytrades.pso_best_evolution[...], dtype=np.float64)
      anc.print_both(' pso_best_evolution retrieved', of_run)
      
      anc.print_both(' last pso_best_evolution', of_run)
      last_pso_parameters = np.asarray(pso_best_evolution[:nfit,-1],dtype=np.float64)
      last_pso_fitness = pso_best_evolution[-1,-1].astype(np.float64)
      anc.print_both(' fitness = %.f' %(last_pso_fitness), of_run)
      
      # SAVE PSO SIMULATION IN pso_run.hdf5 FILE
      print ' Creating pso hdf5 file: %s' %(os.path.join(pso_path, 'pso_run.hdf5'))
      pso_hdf5 = h5py.File(os.path.join(pso_path, 'pso_run.hdf5'), 'w')
      pso_hdf5.create_dataset('population', data=pytrades_lib.pytrades.population, dtype=np.float64)
      pso_hdf5.create_dataset('population_fitness', data=pytrades_lib.pytrades.population_fitness, dtype=np.float64)
      pso_hdf5.create_dataset('pso_parameters', data=pso_parameters, dtype=np.float64)
      pso_hdf5.create_dataset('pso_fitness', data=np.array(pso_fitness), dtype=np.float64)
      pso_hdf5.create_dataset('pso_best_evolution', data=pso_best_evolution, dtype=np.float64)
      pso_hdf5.create_dataset('parameters_minmax', data=parameters_minmax, dtype=np.float64)
      pso_hdf5.create_dataset('parameter_names', data=parameter_names, dtype='S10')
      pso_hdf5['population'].attrs['npop'] = np_pso
      pso_hdf5['population'].attrs['niter'] = nit_pso
      pso_hdf5['population'].attrs['iter_global'] = iter_global+1
      pso_hdf5['population'].attrs['nfit'] = nfit
      pso_hdf5.close()

      population = np.asarray(pytrades_lib.pytrades.population, dtype=np.float64)
      population_fitness = np.asarray(pytrades_lib.pytrades.population_fitness, dtype=np.float64)
      
      anc.print_both(' ', of_run)
      fitness_iter, lgllhd_iter, check_iter = pytrades_lib.pytrades.write_summary_files(i_global, pso_parameters)
      elapsed = time.time() - pso_start
      elapsed_d, elapsed_h, elapsed_m, elapsed_s = anc.computation_time(elapsed)
      anc.print_both(' ', of_run)
      anc.print_both(' PSO FINISHED in %2d day %02d hour %02d min %.2f sec - bye bye' %(int(elapsed_d), int(elapsed_h), int(elapsed_m), elapsed_s), of_run)
      
      #p0, pso_fitness_p0 = pso_to_emcee(nfit, nwalkers, population, population_fitness, pso_parameters, pso_fitness, pso_best_evolution)
      p0 = compute_initial_walkers(nfit, nwalkers, pso_parameters, parameters_minmax, parameter_names, delta_sigma, of_run)

    elif (cli.pso_type == 'exists'):
      # READ PREVIOUS PSO_RUN.HDF5 FILE AND INITIALISE POPULATION FOR EMCEE
      anc.print_both(' READ PREVIOUS PSO_RUN.HDF5 FILE AND INITIALISE POPULATION FOR EMCEE', of_run)
      
      population, population_fitness, pso_parameters, pso_fitness, pso_best_evolution, pso_parameters_minmax, pso_parameter_names, pop_shape = get_pso_data(os.path.join(pso_path, 'pso_run.hdf5'))
      
      fitness_iter, lgllhd_iter, check_iter = pytrades_lib.pytrades.write_summary_files(i_global, pso_parameters)
      
      anc.print_both(' read pso_run.hdf5 file with best pso_fitness = %.7f' %(pso_fitness), of_run)
      
      #p0, pso_fitness_p0 = pso_to_emcee(nfit, nwalkers, population, population_fitness, pso_parameters, pso_fitness, pso_best_evolution)
      p0 = compute_initial_walkers(nfit, nwalkers, pso_parameters, parameters_minmax, parameter_names, delta_sigma, of_run)
          
      
    elif (cli.pso_type == 'skip'):
      # DO NOT RUN PSO, ONLY EMCEE
      anc.print_both(' DO NOT RUN PSO, ONLY EMCEE', of_run)
      
      #p0 = [parameters_minmax[:,0] + np.random.random(nfit)*delta_parameters for i in range(0, nwalkers)]
      p0 = compute_initial_walkers(nfit, nwalkers, fitting_parameters, parameters_minmax, parameter_names, delta_sigma, of_run)
      

    anc.print_both(' emcee chain: nwalkers = %d nruns = %d' %(nwalkers, nruns), of_run)
    anc.print_both(' sampler ... ',of_run)
    # old version with threads
    #sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob, threads=nthreads)
    
    #sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob_sq, threads=nthreads, args=[parameter_names])
    
    # close the pool of threads
    threads_pool.close()
    threads_pool.terminate()
    threads_pool.join()
    
    threads_pool = emcee.interruptible_pool.InterruptiblePool(nthreads)
    sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob, pool=threads_pool)
    
    anc.print_both(' ready to go', of_run)
    anc.print_both(' with nsave = %r' %(nsave), of_run)
    sys.stdout.flush()

    #sys.exit()

    if (nsave != False):
      # save temporary sampling during emcee every nruns*10%
      #if(os.path.exists(os.path.join(pso_path, 'emcee_temp.hdf5')) and os.path.isfile(os.path.join(pso_path, 'emcee_temp.hdf5'))):
        #os.remove(os.path.join(pso_path, 'emcee_temp.hdf5'))
      if(os.path.exists(os.path.join(pso_path, 'emcee_summary.hdf5')) and os.path.isfile(os.path.join(pso_path, 'emcee_summary.hdf5'))):
        os.remove(os.path.join(pso_path, 'emcee_summary.hdf5'))
      f_hdf5 = h5py.File(os.path.join(pso_path, 'emcee_summary.hdf5'), 'a')
      f_hdf5.create_dataset('parameter_names', data=parameter_names, dtype='S10')
      f_hdf5.create_dataset('boundaries', data=parameters_minmax, dtype=np.float64)
      temp_dset = f_hdf5.create_dataset('chains', (nwalkers, nruns, nfit), dtype=np.float64)
      f_hdf5['chains'].attrs['nwalkers'] = nwalkers
      f_hdf5['chains'].attrs['nruns'] = nruns
      f_hdf5['chains'].attrs['nfit'] = nfit
      f_hdf5['chains'].attrs['nfree'] = nfree
      temp_lnprob = f_hdf5.create_dataset('lnprobability', (nwalkers, nruns), dtype=np.float64)
      temp_lnprob.attrs['ln_err_const'] = ln_err_const
      temp_acceptance = f_hdf5.create_dataset('acceptance_fraction', data=np.zeros((nfit)), dtype=np.float64)
      temp_acor = f_hdf5.create_dataset('autocor_time', data=np.zeros((nfit)), dtype=np.float64)
      f_hdf5.close()
      pos = p0
      niter_save = int(nruns/nsave)
      state=None
      anc.print_both(' Running emcee with temporary saving', of_run)
      sys.stdout.flush()
      
      for i in range(0, niter_save):
        anc.print_both('', of_run)
        anc.print_both(' iter: %6d ' %(i+1), of_run)
        aaa = i*nsave
        bbb = aaa+nsave
        pos, prob, state = sampler.run_mcmc(pos, N=nsave, rstate0=state)
        anc.print_both('completed %d steps of %d' %(bbb, nruns), of_run)
        f_hdf5 = h5py.File(os.path.join(pso_path, 'emcee_summary.hdf5'), 'a')
        temp_dset = f_hdf5['chains'] #[:,:,:]
        temp_dset[:,aaa:bbb,:] = sampler.chain[:, aaa:bbb, :]
        temp_dset.attrs['completed_steps'] = bbb
        
        temp_lnprob = f_hdf5['lnprobability'] #[:,:]
        temp_lnprob[:, aaa:bbb] = sampler.lnprobability[:, aaa:bbb]
        shape_lnprob = sampler.lnprobability.shape
        
        acceptance_fraction = sampler.acceptance_fraction
        temp_acceptance = f_hdf5['acceptance_fraction']
        temp_acceptance = acceptance_fraction
        #f_hdf5.create_dataset('acceptance_fraction', data=acceptance_fraction, dtype=np.float64)
        mean_acceptance_fraction = np.mean(acceptance_fraction)
      
        #temp_chains_T = np.zeros((bbb, nwalkers, nfit))
        #for ifit in range(0,nfit):
          #temp_chains_T[:,:,ifit] = sampler.chain[:, :bbb, ifit].T
        #acor_time = anc.compute_autocor_time(temp_chains_T, walkers_transposed=True)
        acor_time = anc.compute_acor_time(sampler, steps_done=bbb)
        temp_acor = f_hdf5['autocor_time']
        temp_acor[...] = acor_time
        
        #f_hdf5.create_dataset('autocor_time', data=np.array(acor_temp, dtype=np.float64), dtype=np.float64)
        #f_hdf5.create_dataset('autocor_time', data=np.array(sampler.acor, dtype=np.float64), dtype=np.float64) # not working
        #print 'aaa = %6d bbb = %6d -> sampler.lnprobability.shape = (%6d , %6d)' %(aaa, bbb, shape_lnprob[0], shape_lnprob[1])
        f_hdf5.close()
        sys.stdout.flush()
      
      
      anc.print_both('', of_run)
      anc.print_both('...done with saving temporary total shape = %s' %(str(np.shape(sampler.chain))), of_run)
      anc.print_both('', of_run)
      sys.stdout.flush()

    else:
      # GOOD COMPLETE SINGLE RUNNING OF EMCEE, WITHOUT REMOVING THE BURN-IN
      anc.print_both(' Running full emcee ...', of_run)
      sys.stdout.flush()
      sampler.run_mcmc(p0, nruns)
      anc.print_both('done', of_run)
      anc.print_both('', of_run)
      sys.stdout.flush()
      flatchains = sampler.chain[:, :, :].reshape((nwalkers*nruns, nfit)) # full chain values
      acceptance_fraction = sampler.acceptance_fraction
      mean_acceptance_fraction = np.mean(acceptance_fraction)
      #autocor_time = sampler.acor
      #temp_chains_T = np.zeros((bbb, nwalkers, nfit))
      #for ifit in range(0,nfit):
        #temp_chains_T[:,:,ifit] = sampler.chain[:, :, ifit].T
      #acor_time = anc.compute_autocor_time(temp_chains_T, walkers_transposed=True)
      acor_time = anc.compute_acor_time(sampler)
      lnprobability = sampler.lnprobability
      # save chains with original shape as hdf5 file
      f_hdf5 = h5py.File(os.path.join(pso_path, 'emcee_summary.hdf5'), 'w')
      f_hdf5.create_dataset('chains', data=sampler.chain, dtype=np.float64)
      f_hdf5['chains'].attrs['nwalkers'] = nwalkers
      f_hdf5['chains'].attrs['nruns'] = nruns
      f_hdf5['chains'].attrs['nfit'] = nfit
      f_hdf5['chains'].attrs['nfree'] = nfree
      f_hdf5['chains'].attrs['completed_steps'] = nruns
      f_hdf5.create_dataset('parameter_names', data=parameter_names, dtype='S10')
      f_hdf5.create_dataset('boundaries', data=parameters_minmax, dtype=np.float64)
      f_hdf5.create_dataset('acceptance_fraction', data=acceptance_fraction, dtype=np.float64)
      f_hdf5.create_dataset('autocor_time', data=acor_time, dtype=np.float64)
      f_hdf5.create_dataset('lnprobability', data=lnprobability, dtype=np.float64)
      f_hdf5['lnprobability'].attrs['ln_err_const'] = ln_err_const
      f_hdf5.close()

    anc.print_both(" Mean_acceptance_fraction should be between [0.25-0.5] = %.6f" %(mean_acceptance_fraction), of_run)
    anc.print_both('', of_run)

    # close the pool of threads
    threads_pool.close()
    threads_pool.terminate()
    threads_pool.join()
    
    anc.print_both('COMPLETED EMCEE', of_run)

    elapsed = time.time() - start
    elapsed_d, elapsed_h, elapsed_m, elapsed_s = anc.computation_time(elapsed)

    anc.print_both('', of_run)
    anc.print_both(' pyTRADES: EMCEE FINISHED in %2d day %02d hour %02d min %.2f sec - bye bye' %(int(elapsed_d), int(elapsed_h), int(elapsed_m), elapsed_s), of_run)
    anc.print_both('', of_run)
    
    
  of_run.close()
  pytrades_lib.pytrades.deallocate_variables()

  return
Example #7
0
def compute_initial_walkers(nfit, nwalkers, fitting_parameters, parameters_minmax, parameter_names, delta_sigma, of_run):
  
  # initial walkers as input fitting_parameters + N(loc=0.,sigma=1.,size=nwalkers)*delta_sigma
  #p0 = [parameters_minmax[:,0] + np.random.random(nfit)*delta_parameters for i in range(0, nwalkers)]
  anc.print_both(' Inititializing walkers with delta_sigma = %s' %(str(delta_sigma).strip()), of_run)
  p0 = []
  i_p0 = 0
  
  anc.print_both(' good p0:', of_run)
  
  # 2017-02-03 LUCA --0--
  try:
    d_sigma = np.float64(delta_sigma)
  except:
    d_sigma = np.float64(1.e-4)
  delta_sigma_out = compute_proper_sigma(nfit, d_sigma, parameter_names)
  print ' ',
  # init all initial walkers
  while True:
      test_p0 = np.array([fitting_parameters[ifit] + np.random.normal(loc=0., scale=delta_sigma_out[ifit]) for ifit in range(0,nfit)], dtype=np.float64)
      test_lg = lnprob(test_p0)
      #test_lg = lnprob_sq(test_p0, parameter_names)
      if(not np.isinf(test_lg)):
        i_p0 +=1
        p0.append(test_p0)
        print i_p0,
        if(i_p0 == nwalkers): break
  p0[-1] = fitting_parameters # I want the original fitting paramameters in the initial walkers
  print
  # if 'random' opt ==> create other Gaussian starting points (<->nwalkers)
  if('ran' in str(delta_sigma).strip().lower()):
    delta_parameters = np.abs(parameters_minmax[:,1] - parameters_minmax[:,0]) # DELTA BETWEEN MAX AND MIN OF BOUNDARIES
    nw_min = 30
    n_gpts = int((nwalkers-nw_min)/nw_min) # a new Gaussian starting point each nw_min walkers, keeping at least nw_min walkers Gaussian to the original fitting parameters
    print ' new gaussian starting points: ',n_gpts
    if(n_gpts > 0):
      print ' doing random-gaussian points ... '
      for i_gpt in range(0, n_gpts):
        # create new starting point, but check if lnL != -inf
        new_start = fitting_parameters.copy()
        sel_fit = int(np.random.random()*(nfit-1)) # change only parameter...
        print 'gpt ',i_gpt+1
        print 'selected sel_fit = ',sel_fit,' ==> ',parameter_names[sel_fit]
        print 'val = ', new_start[sel_fit],' with min = ',parameters_minmax[sel_fit,0],' and delta = ',delta_parameters[sel_fit]
        while True:
          new_start[sel_fit] = parameters_minmax[sel_fit,0] + delta_parameters[sel_fit]*np.random.random()
          test_lg = lnprob(new_start)
          if(not np.isinf(test_lg)): break
        i_pos = nw_min * i_gpt
        print 'i_pos = ',
        while True:
          test_p0 = np.array([new_start[ifit] + np.random.normal(loc=0., scale=delta_sigma_out[ifit]) for ifit in range(0,nfit)], dtype=np.float64)
          test_lg = lnprob(test_p0)
          if(not np.isinf(test_lg)):
            p0[i_pos] = test_p0
            print i_pos,
            i_pos +=1
            if(i_pos%nw_min == 0): break
      print
    print
   
  anc.print_both(' done initial walkers.', of_run)
  
  return p0
Example #8
0
def main():
  # MAIN -- TRADES + EMCEE
  # READ COMMAND LINE ARGUMENTS
  cli = get_args()

  # STARTING TIME
  start = time.time()

  # RENAME
  working_path = cli.full_path
  nthreads = cli.nthreads
  np.random.RandomState(cli.seed)

  # INITIALISE TRADES WITH SUBROUTINE WITHIN TRADES_LIB -> PARAMETER NAMES, MINMAX, INTEGRATION ARGS, READ DATA ...
  pytrades_lib.pytrades.initialize_trades(working_path, cli.sub_folder, nthreads)

  # RETRIEVE DATA AND VARIABLES FROM TRADES_LIB MODULE

  #global n_bodies, n_planets, ndata, npar, nfit, dof, inv_dof
  n_bodies = pytrades_lib.pytrades.n_bodies # NUMBER OF TOTAL BODIES OF THE SYSTEM
  n_planets = n_bodies - 1 # NUMBER OF PLANETS IN THE SYSTEM
  ndata = pytrades_lib.pytrades.ndata # TOTAL NUMBER OF DATA AVAILABLE
  npar  = pytrades_lib.pytrades.npar # NUMBER OF TOTAL PARAMATERS ~n_planets X 6
  nfit  = pytrades_lib.pytrades.nfit # NUMBER OF PARAMETERS TO FIT
  nfree  = pytrades_lib.pytrades.nfree # NUMBER OF FREE PARAMETERS (ie nrvset)
  dof   = pytrades_lib.pytrades.dof # NUMBER OF DEGREES OF FREEDOM = NDATA - NFIT
  global inv_dof
  #inv_dof = np.float64(1.0 / dof)
  inv_dof = pytrades_lib.pytrades.inv_dof

  # READ THE NAMES OF THE PARAMETERS FROM THE TRADES_LIB AND CONVERT IT TO PYTHON STRINGS
  #reshaped_names = pytrades_lib.pytrades.parameter_names.reshape((10,nfit), order='F').T
  #parameter_names = [''.join(reshaped_names[i,:]).strip() for i in range(0,nfit)]

  #parameter_names = anc.convert_fortran2python_strarray(pytrades_lib.pytrades.parameter_names, nfit, str_len=10)
  #trades_names = anc.convert_fortran2python_strarray(pytrades_lib.pytrades.parameter_names,
                                                     #nfit, str_len=10
                                                    #)
  str_len = pytrades_lib.pytrades.str_len
  temp_names = pytrades_lib.pytrades.get_parameter_names(nfit,str_len)
  trades_names = anc.convert_fortran_charray2python_strararray(temp_names)
  parameter_names = anc.trades_names_to_emcee(trades_names)

  if(cli.trades_previous is not None):
    temp_names, trades_parameters = anc.read_fitted_file(cli.trades_previous)
    if(nfit != np.shape(trades_parameters)[0]):
      anc.print_both(' NUMBER OF PARAMETERS (%d) IN TRADES-PREVIOUS FILE DOES NOT' \
                 'MATCH THE CURRENT CONFIGURATION nfit=%d\nSTOP' \
                 %(np.shape(trades_parameters)[0], nfit)
                )
      sys.exit()
    del temp_names
  else:
    # INITIAL PARAMETER SET (NEEDED ONLY TO HAVE THE PROPER ARRAY/VECTOR)
    #fitting_parameters = pytrades_lib.pytrades.fitting_parameters
    trades_parameters = pytrades_lib.pytrades.fitting_parameters
  # save initial_fitting parameters into array
  original_fit_parameters = trades_parameters.copy()
  fitting_parameters = anc.e_to_sqrte_fitting(trades_parameters, trades_names)

  trades_minmax = pytrades_lib.pytrades.parameters_minmax # PARAMETER BOUNDARIES
  #parameters_minmax = trades_minmax.copy()
  #parameters_minmax[:,0] = anc.e_to_sqrte_fitting(trades_minmax[:,0], trades_names)
  #parameters_minmax[:,1] = anc.e_to_sqrte_fitting(trades_minmax[:,1], trades_names)
  parameters_minmax = anc.e_to_sqrte_boundaries(trades_minmax, trades_names)

  # RADIAL VELOCITIES SET
  n_rv = pytrades_lib.pytrades.nrv
  n_set_rv = pytrades_lib.pytrades.nrvset

  # TRANSITS SET
  n_t0 = pytrades_lib.pytrades.nt0
  n_t0_sum = pytrades_lib.pytrades.ntts
  n_set_t0 = 0
  for i in range(0, n_bodies-1):
    if (n_t0[i] > 0): n_set_t0 += 1

  # compute global constant for the loglhd
  global ln_err_const

  #try:
    ## fortran variable RV in python will be rv!!!
    #e_RVo = np.array(pytrades_lib.pytrades.ervobs[:], dtype=np.float64)
  #except:
    #e_RVo = np.array([0.], dtype=np.float64)
  #try:
    #e_T0o = np.array(pytrades_lib.pytrades.et0obs[:,:], dtype=np.float64).reshape((-1))
  #except:
    #e_T0o = np.array([0.], dtype=np.float64)
  #ln_err_const = anc.compute_ln_err_const(dof, e_RVo, e_T0o, cli.ln_flag)
  ln_err_const = pytrades_lib.pytrades.ln_err_const

  # SET EMCEE PARAMETERS:
  nwalkers, nruns, nsave, npost = get_emcee_arguments(cli,nfit)

  # INITIALISE SCRIPT FOLDER/LOG FILE
  working_folder, run_log, of_run = init_folder(working_path, cli.sub_folder)

  anc.print_both('',of_run)
  anc.print_both(' ======== ',of_run)
  anc.print_both(' pyTRADES' ,of_run)
  anc.print_both(' ======== ',of_run)
  anc.print_both('',of_run)
  anc.print_both(' WORKING PATH = %s' %(working_path),of_run)
  anc.print_both(' NUMBER OF THREADS = %d' %(nthreads),of_run)
  anc.print_both(' dof = ndata(%d) - nfit(%d) - nfree(%d) = %d' %(ndata, nfit, nfree, dof),of_run)
  anc.print_both(' Total N_RV = %d for %d set(s)' %(n_rv, n_set_rv),of_run)
  anc.print_both(' Total N_T0 = %d for %d out of %d planet(s)' %(n_t0_sum, n_set_t0, n_planets),of_run)
  anc.print_both(' %s = %.7f' %('log constant error = ', ln_err_const),of_run)
  anc.print_both(' %s = %.7f' %('IN FORTRAN log constant error = ', pytrades_lib.pytrades.ln_err_const),of_run)
  anc.print_both(' seed = %s' %(str(cli.seed)), of_run)

  if(cli.trades_previous is not None):
    anc.print_both('\n ******\n INITIAL FITTING PARAMETERS FROM PREVIOUS' \
              ' TRADES-EMCEE SIM IN FILE:\n %s\n ******\n' %(cli.trades_previous),
              of_run
              )

  anc.print_both(' ORIGINAL PARAMETER VALUES -> 0000', of_run)
  fitness_0000, lgllhd_0000, check_0000 = pytrades_lib.pytrades.write_summary_files(0, original_fit_parameters)
  anc.print_both(' ', of_run)
  anc.print_both(' TESTING LNPROB_SQ ...', of_run)

  lgllhd_zero = lnprob(trades_parameters)
  lgllhd_sq_zero = lnprob_sq(fitting_parameters, parameter_names)

  anc.print_both(' ', of_run)
  anc.print_both(' %15s %23s %23s %15s %23s' %('trades_names', 'original_trades', 'trades_par', 'emcee_names', 'emcee_par'), of_run)
  for ifit in range(0, nfit):
    anc.print_both(' %15s %23.16e %23.16e %15s %23.16e' %(trades_names[ifit], original_fit_parameters[ifit], trades_parameters[ifit], parameter_names[ifit], fitting_parameters[ifit]), of_run)
  anc.print_both(' ', of_run)
  anc.print_both(' %15s %23.16e %23.16e %15s %23.16e' %('lnprob', lgllhd_0000, lgllhd_zero, 'lnprob_sq', lgllhd_sq_zero), of_run)
  anc.print_both(' ', of_run)

  # INITIALISES THE WALKERS
  if(cli.emcee_previous is not None):
    anc.print_both(' Use a previous emcee simulation: %s' %(cli.emcee_previous), of_run)
    last_p0, old_nwalkers, last_done = anc.get_last_emcee_iteration(cli.emcee_previous, nwalkers)
    if(not last_done):
      anc.print_both('**STOP: USING A DIFFERENT NUMBER OF WALKERS (%d) W.R.T. PREVIOUS EMCEE SIMULATION (%d).' %(nwalkers, old_nwalkers), of_run)
      sys.exit()
    p0 = last_p0
  else:
    p0 = compute_initial_walkers(nfit, nwalkers, fitting_parameters, parameters_minmax, parameter_names, cli.delta_sigma, of_run)

  anc.print_both(' emcee chain: nwalkers = %d nruns = %d' %(nwalkers, nruns), of_run)
  anc.print_both(' sampler ... ',of_run)

  # old version with threads
  #sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob, threads=nthreads)
  #sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob_sq, threads=nthreads, args=[parameter_names]) # needed to use sqrt(e) in emcee instead of e (in fortran)

  threads_pool = emcee.interruptible_pool.InterruptiblePool(nthreads)
  #sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob, pool=threads_pool)
  sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob_sq,
                                  pool=threads_pool,
                                  args=[parameter_names]
                                  ) # needed to use sqrt(e) in emcee instead of e (in fortran)

  anc.print_both(' TEST A PRE-EMCEE OF 1000 STEPS', of_run)
  p0, prob, state = sampler.run_mcmc(p0, 1000)
  anc.print_both(' TEST A RESET OF THE SAMPLER', of_run)
  sampler.reset()
  
  anc.print_both(' ready to go', of_run)
  anc.print_both(' with nsave = %s' %(str(nsave)), of_run)
  sys.stdout.flush()

  #sys.exit()

  if (nsave != False):
    # save temporary sampling during emcee every nruns*10%
    #if(os.path.exists(os.path.join(working_folder, 'emcee_temp.hdf5')) and os.path.isfile(os.path.join(working_folder, 'emcee_temp.hdf5'))):
      #os.remove(os.path.join(working_folder, 'emcee_temp.hdf5'))
    if(os.path.exists(os.path.join(working_folder, 'emcee_summary.hdf5')) and os.path.isfile(os.path.join(working_folder, 'emcee_summary.hdf5'))):
      os.remove(os.path.join(working_folder, 'emcee_summary.hdf5'))
    f_hdf5 = h5py.File(os.path.join(working_folder, 'emcee_summary.hdf5'), 'a')
    f_hdf5.create_dataset('parameter_names', data=parameter_names, dtype='S10')
    f_hdf5.create_dataset('boundaries', data=parameters_minmax, dtype=np.float64)
    temp_dset = f_hdf5.create_dataset('chains', (nwalkers, nruns, nfit), dtype=np.float64)
    temp_lnprob = f_hdf5.create_dataset('lnprobability', (nwalkers, nruns), dtype=np.float64)
    temp_lnprob.attrs['ln_err_const'] = ln_err_const
    temp_acceptance = f_hdf5.create_dataset('acceptance_fraction', data=np.zeros((nfit)), dtype=np.float64)
    temp_acor = f_hdf5.create_dataset('autocor_time', data=np.zeros((nfit)), dtype=np.float64)
    f_hdf5.close()
    pos = p0
    nchains = int(nruns/nsave)
    state=None
    anc.print_both(' Running emcee with temporary saving', of_run)
    sys.stdout.flush()
    for i in range(0, nchains):
      anc.print_both('', of_run)
      anc.print_both(' iter: %6d ' %(i+1), of_run)
      aaa = i*nsave
      bbb = aaa+nsave
      pos, prob, state = sampler.run_mcmc(pos, N=nsave, rstate0=state)
      anc.print_both('completed %d steps of %d' %(bbb, nruns), of_run)
      f_hdf5 = h5py.File(os.path.join(working_folder, 'emcee_summary.hdf5'), 'a')
      temp_dset = f_hdf5['chains'] #[:,:,:]
      temp_dset[:,aaa:bbb,:] = sampler.chain[:, aaa:bbb, :]
      #f_hdf5['chains'].attrs['completed_steps'] = bbb
      temp_dset.attrs['completed_steps'] = bbb
      temp_lnprob = f_hdf5['lnprobability'] #[:,:]
      temp_lnprob[:, aaa:bbb] = sampler.lnprobability[:, aaa:bbb]
      shape_lnprob = sampler.lnprobability.shape

      acceptance_fraction = sampler.acceptance_fraction
      temp_acceptance = f_hdf5['acceptance_fraction']
      temp_acceptance = acceptance_fraction
      #f_hdf5.create_dataset('acceptance_fraction', data=acceptance_fraction, dtype=np.float64)
      mean_acceptance_fraction = np.mean(acceptance_fraction)

      #temp_chains_T = np.zeros((bbb, nwalkers, nfit))
      #for ifit in range(0,nfit):
        #temp_chains_T[:,:,ifit] = sampler.chain[:, :bbb, ifit].T
      #acor_time = anc.compute_autocor_time(temp_chains_T, walkers_transposed=True)
      acor_time = anc.compute_acor_time(sampler, steps_done=bbb)
      temp_acor = f_hdf5['autocor_time']
      temp_acor[...] = acor_time

      #f_hdf5.create_dataset('autocor_time', data=np.array(acor_temp, dtype=np.float64), dtype=np.float64)
      #f_hdf5.create_dataset('autocor_time', data=np.array(sampler.acor, dtype=np.float64), dtype=np.float64) # not working
      #print 'aaa = %6d bbb = %6d -> sampler.lnprobability.shape = (%6d , %6d)' %(aaa, bbb, shape_lnprob[0], shape_lnprob[1])
      f_hdf5.close()
      sys.stdout.flush()
    anc.print_both('', of_run)
    anc.print_both('...done with saving temporary total shape = %s' %(str(np.shape(sampler.chain))), of_run)
    anc.print_both('', of_run)
    sys.stdout.flush()

  # RUN EMCEE AND RESET AFTER REMOVE BURN-IN
  #pos, prob, state = sampler.run_mcmc(p0, npost)
  #sampler.reset()
  #sampler.run_mcmc(pos, nruns, rstate0=state)
  else:
    # GOOD COMPLETE SINGLE RUNNING OF EMCEE, WITHOUT REMOVING THE BURN-IN
    anc.print_both(' Running full emcee ...', of_run)
    sys.stdout.flush()
    sampler.run_mcmc(p0, nruns)
    anc.print_both('done', of_run)
    anc.print_both('', of_run)
    sys.stdout.flush()
    flatchains = sampler.chain[:, :, :].reshape((nwalkers*nruns, nfit)) # full chain values
    acceptance_fraction = sampler.acceptance_fraction
    mean_acceptance_fraction = np.mean(acceptance_fraction)
    #autocor_time = sampler.acor
    #temp_chains_T = np.zeros((nwalkers, nsteps, nfit))
    #for ifit in range(0,nfit):
      #temp_chains_T[:,:,ifit] = sampler.chain[:, :, ifit].T
    #acor_time = anc.compute_autocor_time(temp_chains_T, walkers_transposed=True)
    acor_time = anc.compute_acor_time(sampler)
    lnprobability = sampler.lnprobability
    # save chains with original shape as hdf5 file
    f_hdf5 = h5py.File(os.path.join(working_folder, 'emcee_summary.hdf5'), 'w')
    f_hdf5.create_dataset('chains', data=sampler.chain, dtype=np.float64)
    f_hdf5['chains'].attrs['completed_steps'] = nruns
    f_hdf5.create_dataset('parameter_names', data=parameter_names, dtype='S10')
    f_hdf5.create_dataset('boundaries', data=parameters_minmax, dtype=np.float64)
    f_hdf5.create_dataset('acceptance_fraction', data=acceptance_fraction, dtype=np.float64)
    f_hdf5.create_dataset('autocor_time', data=acor_time, dtype=np.float64)
    f_hdf5.create_dataset('lnprobability', data=lnprobability, dtype=np.float64)
    f_hdf5['lnprobability'].attrs['ln_err_const'] = ln_err_const
    f_hdf5.close()

  anc.print_both(" Mean_acceptance_fraction should be between [0.25-0.5] = %.6f" %(mean_acceptance_fraction), of_run)
  anc.print_both('', of_run)

  # close the pool of threads
  threads_pool.close()
  threads_pool.terminate()
  threads_pool.join()

  anc.print_both('COMPLETED EMCEE', of_run)

  elapsed = time.time() - start
  elapsed_d, elapsed_h, elapsed_m, elapsed_s = anc.computation_time(elapsed)

  anc.print_both('', of_run)
  anc.print_both(' pyTRADES: EMCEE FINISHED in %2d day %02d hour %02d min %.2f sec - bye bye' %(int(elapsed_d), int(elapsed_h), int(elapsed_m), elapsed_s), of_run)
  anc.print_both('', of_run)
  of_run.close()
  pytrades_lib.pytrades.deallocate_variables()

  return
Example #9
0
def main():
  
  # READ COMMAND LINE ARGUMENTS
  cli = get_args()


  # STARTING TIME
  start = time.localtime()
  pc_output_dir = '%d-%02d-%02dT%02dh%02dm%02ds_' %(start.tm_year, 
                                                    start.tm_mon, 
                                                    start.tm_mday,
                                                    start.tm_hour, 
                                                    start.tm_min, 
                                                    start.tm_sec)
  pc_output_files = 'trades_pc'

  # RENAME 
  working_path = cli.full_path
  nthreads=1

  # INITIALISE TRADES WITH SUBROUTINE WITHIN TRADES_LIB -> PARAMETER NAMES, MINMAX, INTEGRATION ARGS, READ DATA ...
  pytrades.initialize_trades(working_path, cli.sub_folder, nthreads)

  # RETRIEVE DATA AND VARIABLES FROM TRADES_LIB MODULE
  
  #global n_bodies, n_planets, ndata, npar, nfit, dof, inv_dof
  n_bodies = pytrades.n_bodies # NUMBER OF TOTAL BODIES OF THE SYSTEM
  n_planets = n_bodies - 1 # NUMBER OF PLANETS IN THE SYSTEM
  ndata = pytrades.ndata # TOTAL NUMBER OF DATA AVAILABLE
  npar  = pytrades.npar # NUMBER OF TOTAL PARAMATERS ~n_planets X 6
  nfit  = pytrades.nfit # NUMBER OF PARAMETERS TO FIT
  nfree  = pytrades.nfree # NUMBER OF FREE PARAMETERS (ie nrvset)
  dof   = pytrades.dof # NUMBER OF DEGREES OF FREEDOM = NDATA - NFIT
  global inv_dof
  #inv_dof = np.float64(1.0 / dof)
  inv_dof = pytrades_lib.pytrades.inv_dof
  
  # READ THE NAMES OF THE PARAMETERS FROM THE TRADES_LIB AND CONVERT IT TO PYTHON STRINGS
  str_len = pytrades.str_len
  temp_names = pytrades.get_parameter_names(nfit,str_len)
  trades_names = anc.convert_fortran_charray2python_strararray(temp_names)
  fitting_names = anc.trades_names_to_emcee(trades_names)
  
  # save initial_fitting parameters into array
  original_fit_parameters = trades_parameters.copy()
  fitting_parameters = anc.e_to_sqrte_fitting(trades_parameters, trades_names)
  
  trades_minmax = pytrades.parameters_minmax # PARAMETER BOUNDARIES
  parameters_minmax = anc.e_to_sqrte_boundaries(trades_minmax, trades_names)

    # RADIAL VELOCITIES SET
  n_rv = pytrades_lib.pytrades.nrv
  n_set_rv = pytrades_lib.pytrades.nrvset

  # TRANSITS SET
  n_t0 = pytrades_lib.pytrades.nt0
  n_t0_sum = pytrades_lib.pytrades.ntts
  n_set_t0 = 0
  for i in range(0, n_bodies-1):
    if (n_t0[i] > 0): n_set_t0 += 1

  # compute global constant for the loglhd
  global ln_err_const

  #try:
    ## fortran variable RV in python will be rv!!!
    #e_RVo = np.array(pytrades_lib.pytrades.ervobs[:], dtype=np.float64)
  #except:
    #e_RVo = np.array([0.], dtype=np.float64)
  #try:
    #e_T0o = np.array(pytrades_lib.pytrades.et0obs[:,:], dtype=np.float64).reshape((-1))
  #except:
    #e_T0o = np.array([0.], dtype=np.float64)
  #ln_err_const = anc.compute_ln_err_const(dof, e_RVo, e_T0o, True)
  ln_err_const = pytrades_lib.pytrades.ln_err_const

  # INITIALISE SCRIPT FOLDER/LOG FILE
  working_folder, run_log, of_run = init_folder(working_path, cli.sub_folder)
  anc.print_both('',of_run)
  anc.print_both(' ======== ',of_run)
  anc.print_both(' pyTRADES' ,of_run)
  anc.print_both(' ======== ',of_run)
  anc.print_both('',of_run)
  anc.print_both(' WORKING PATH = %s' %(working_path),of_run)
  anc.print_both(' dof = ndata(%d) - nfit(%d) - nfree(%d) = %d' %(ndata, nfit, nfree, dof),of_run)
  anc.print_both(' Total N_RV = %d for %d set(s)' %(n_rv, n_set_rv),of_run)
  anc.print_both(' Total N_T0 = %d for %d out of %d planet(s)' %(n_t0_sum, n_set_t0, n_planets),of_run)
  anc.print_both(' %s = %.7f' %('log constant error = ', ln_err_const),of_run)
  
  # SET PYPOLYCHORD
  # needed to define number of derived parameters for PyPolyChord
  nder = 0
  
  # define the loglikelihood function for PyPolyChord
  def likelihood(fitting_par):
    
    # derived parameters
    derived_par = [0.0] * nder
    # convert fitting_par to trades_par
    trades_par = anc.sqrte_to_e_fitting(fitting_par, fitting_names)
    loglhd = 0.
    check = 1
    loglhd, check = pytrades.fortran_loglikelihood(np.array(trades_par, dtype=np.float64))
    #print loglhd, ln_err_const
    loglhd = loglhd + ln_err_const # ln_err_const: global variable
    
    return loglhd, derived_par

  # define the prior for the fitting parameters
  def prior(hypercube):
    """ Uniform prior from [-1,1]^D. """

    fitting_par = [0.0] * nfit
    for i, x in enumerate(hypercube):
        fitting_par[i] = PC_priors.UniformPrior(parameters_minmax[i,0], parameters_minmax[i,1])(x)

    return fitting_par

  # set PyPolyChord: the pc_settings define how to run PC, e.g. nlive, precision_criterio, etc.
  pc_settings = PC_settings.PolyChordSettings(nfit, nder)
  pc_settings.base_dir = cli.pc_output_dir
  pc_settings.file_root = cli.pc_output_files
  pc_settings.do_clustering = True
  # Possible PyPolyChord settings:
  #Keyword arguments
  #-----------------
  #nlive: int
      #(Default: nDims*25)
      #The number of live points.
      #Increasing nlive increases the accuracy of posteriors and evidences,
      #and proportionally increases runtime ~ O(nlive).

  #num_repeats : int
      #(Default: nDims*5)
      #The number of slice slice-sampling steps to generate a new point.
      #Increasing num_repeats increases the reliability of the algorithm.
      #Typically
      #* for reliable evidences need num_repeats ~ O(5*nDims).
      #* for reliable posteriors need num_repeats ~ O(nDims)

  #nprior : int
      #(Default: nlive)
      #The number of prior samples to draw before starting compression.

  #do_clustering : boolean
      #(Default: True)
      #Whether or not to use clustering at run time.

  #feedback : {0,1,2,3}
      #(Default: 1)
      #How much command line feedback to give

  #precision_criterion : float
      #(Default: 0.001)
      #Termination criterion. Nested sampling terminates when the evidence
      #contained in the live points is precision_criterion fraction of the
      #total evidence.

  #max_ndead : int
      #(Default: -1)
      #Alternative termination criterion. Stop after max_ndead iterations.
      #Set negative to ignore (default).

  #boost_posterior : float
      #(Default: 0.0)
      #Increase the number of posterior samples produced.  This can be set
      #arbitrarily high, but you won't be able to boost by more than
      #num_repeats
      #Warning: in high dimensions PolyChord produces _a lot_ of posterior
      #samples. You probably don't need to change this

  #posteriors : boolean
      #(Default: True)
      #Produce (weighted) posterior samples. Stored in <root>.txt.

  #equals : boolean
      #(Default: True)
      #Produce (equally weighted) posterior samples. Stored in
      #<root>_equal_weights.txt

  #cluster_posteriors : boolean
      #(Default: True)
      #Produce posterior files for each cluster?
      #Does nothing if do_clustering=False.

  #write_resume : boolean
      #(Default: True)
      #Create a resume file.

  #read_resume : boolean
      #(Default: True)
      #Read from resume file.

  #write_stats : boolean
      #(Default: True)
      #Write an evidence statistics file.

  #write_live : boolean
      #(Default: True)
      #Write a live points file.

  #write_dead : boolean
      #(Default: True)
      #Write a dead points file.

  #write_dead : boolean
      #(Default: True)
      #Write a prior points file.

  #update_files : int
      #(Default: nlive)
      #How often to update the files in <base_dir>.

  #base_dir : string
      #(Default: 'chains')
      #Where to store output files.

  #file_root : string
      #(Default: 'test')
      #Root name of the files produced.

  #grade_frac : List[float]
      #(Default: 1)
      #The amount of time to spend in each speed.

  #grade_dims : List[int]
      #(Default: 1)
      #The number of parameters within each speed.
  

  # RUN POLYCHORD
  pc_run = PC.run_polychord(likelihood, nfit, nder, pc_settings, prior)
  
  # set label and legend names
  kel_plot_labels = anc.keplerian_legend(fitting_names, cli.m_type)
  pc_paramnames = [('%s' %(fitting_names[i]), r'%s' %(kel_plot_labels[i])) for i in range(nfit)]
  #pc_paramnames += [('r*', 'r')]
  pc_run.make_paramnames_files(pc_paramnames)
  
  if(cli.pc_plot):
    import getdist.plots
    import matplotlib.pyplot as plt
    plt.rc('font',**{'family':'serif','serif':['Computer Modern Roman']})
    plt.rc('text', usetex=True)
    posterior = pc_run.posterior
    g = getdist.plots.getSubplotPlotter()
    g.triangle_plot(posterior, filled=True)
    plt.show()
  
  return
Example #10
0
def main():
    cli = anc.get_args()
    # read derived posterior file
    derived_file = os.path.join(cli.full_path, 'derived_posterior.hdf5')
    h5f = h5py.File(derived_file, 'r')
    derived_names = np.array(h5f['derived_names'], dtype='S10')
    derived_posterior_in = np.array(h5f['derived_posterior'], dtype=np.float64)
    h5f.close()

    n_der = derived_names.shape[0]
    n_flatchain = derived_posterior_in.shape[0]

    derived_posterior = anc.derived_posterior_check(derived_names,
                                                    derived_posterior_in)

    label_separation = -0.90  # if uses this, comment ax.xyaxis.labelpad = label_pad
    label_pad = 12  # it uses this, comment ax.xyaxis.set_label_coords()...
    label_size = 8
    ticklabel_size = 4

    if (n_der > 2):
        #label_separation = -0.1 - ( 0.075 * (n_der-2) )
        label_separation = -0.15 - (0.125 * (n_der - 2))
    #else:
    #label_separation = -0.15

    #label_size = label_size - 1 * int(n_der / 10.)
    #label_size = label_size - 1 * int(n_der / 5.)
    label_size = label_size - 1 * int(n_der / 2.5)

    labels_list = anc.derived_labels(derived_names, cli.m_type)

    k = anc.get_bins(derived_posterior, rule='doane')

    if (cli.overplot is not None):
        ## OPEN summary_parameters.hdf5 FILE
        s_h5f = h5py.File(
            os.path.join(cli.full_path, 'summary_parameters.hdf5'), 'r')
        # take only the selected sample
        s_overplot = '%04d' % (cli.overplot)
        #overp_der = s_h5f['parameters/%s/derived/parameters' %(s_overplot)][...]
        read_der = s_h5f['parameters/%s/derived/parameters' %
                         (s_overplot)][...]
        s_h5f.close()

        overp_der = anc.derived_parameters_check(derived_names, read_der,
                                                 derived_posterior)

    #fig = plt.figure(figsize=(12,12))
    fig = plt.figure(figsize=(6, 6))
    fig.subplots_adjust(hspace=0.05, wspace=0.05)

    for ix in range(0, n_der):
        x_data = derived_posterior[:, ix]
        x_min, x_max = anc.compute_limits(x_data, 0.05)
        if (x_min == x_max):
            x_min = x_min - 1.
            x_max = x_max + 1.

        for iy in range(0, n_der):
            y_data = derived_posterior[:, iy]
            y_min, y_max = anc.compute_limits(y_data, 0.05)
            if (y_min == y_max):
                y_min = y_min - 1.
                y_max = y_max + 1.

            if (iy > ix):  # correlation plot
                anc.print_both('correlation %s vs %s' %
                               (derived_names[ix], derived_names[iy]))
                ax = plt.subplot2grid((n_der + 1, n_der), (iy, ix))

                hist2d_counts, xedges, yedges, image2d = ax.hist2d(\
                  x_data, y_data, bins=k,
                  range=[[x_data.min(), x_data.max()],[y_data.min(), y_data.max()]],
                  cmap=cm.gray_r,
                  #normed=True
                  normed=False
                  )

                #new_k = int(k/3)
                new_k = k
                hist2d_counts_2, xedges_2, yedges_2 = np.histogram2d(\
                  x_data, y_data, bins=new_k,
                  range=[[x_data.min(), x_data.max()],[y_data.min(), y_data.max()]],
                  #normed=True
                  density=False
                  )

                x_bins = [
                    0.5 * (xedges_2[i] + xedges_2[i + 1])
                    for i in range(0, new_k)
                ]
                y_bins = [
                    0.5 * (yedges_2[i] + yedges_2[i + 1])
                    for i in range(0, new_k)
                ]

                nl = 5
                levels = [1. - np.exp(-0.5 * ii) for ii in range(0, nl)
                          ]  # 2D sigmas: 0sigma, 1sigma, 2sigma, 3sigma, ..
                ax.contour(
                    x_bins,
                    y_bins,
                    hist2d_counts_2.T,
                    nl,
                    cmap=cm.viridis,
                    linestyles='solid',
                    linewidths=0.5,
                    #normed=True
                )

                if (cli.overplot is not None):
                    # plot selected overplot sample
                    # check angle and plot %360 and %-360...
                    if ('w' in derived_names[ix] or 'lN' in derived_names[ix]
                            or 'mA' in derived_names[ix]):
                        ax.axvline(overp_der[ix] % 360.,
                                   color='C0',
                                   ls='--',
                                   lw=1.1,
                                   alpha=0.7)
                        ax.axvline(overp_der[ix] % -360.,
                                   color='C0',
                                   ls='--',
                                   lw=1.1,
                                   alpha=0.7)
                    else:
                        ax.axvline(overp_der[ix],
                                   color='C0',
                                   ls='--',
                                   lw=1.1,
                                   alpha=0.7)
                    if ('w' in derived_names[iy] or 'lN' in derived_names[iy]
                            or 'mA' in derived_names[iy]):
                        ax.axhline(overp_der[iy] % 360.,
                                   color='C0',
                                   ls='--',
                                   lw=1.1,
                                   alpha=0.7)
                        ax.axhline(overp_der[iy] % -360.,
                                   color='C0',
                                   ls='--',
                                   lw=1.1,
                                   alpha=0.7)
                    else:
                        ax.axhline(overp_der[iy],
                                   color='C0',
                                   ls='--',
                                   lw=1.1,
                                   alpha=0.7)

                ax.get_xaxis().set_visible(False)
                ax.get_yaxis().set_visible(False)
                if (iy == n_der - 1):
                    set_xaxis(ax, label_size, label_separation, label_pad,
                              ticklabel_size, labels_list[ix],
                              [xedges[0], xedges[-1], 4])
                if (ix == 0):
                    set_yaxis(ax, label_size, label_separation, label_pad,
                              ticklabel_size, labels_list[iy],
                              [yedges[0], yedges[-1], 5])

                ax.set_ylim([y_min, y_max])
                ax.set_xlim([x_min, x_max])
                plt.draw()

            elif (iy == ix):  # distribution plot
                anc.print_both('%s histogram' % (derived_names[ix]))
                ax = plt.subplot2grid((n_der + 1, n_der), (ix, ix))
                if (ix == n_der - 1):
                    hist_orientation = 'horizontal'
                else:
                    hist_orientation = 'vertical'

                idx = np.argsort(x_data)

                if (not cli.cumulative):
                    # HISTOGRAM
                    hist_counts, edges, patces = ax.hist(
                        x_data,
                        bins=k,
                        range=[x_data.min(), x_data.max()],
                        histtype='stepfilled',
                        color='darkgrey',
                        #edgecolor='lightgray',
                        edgecolor='None',
                        align='mid',
                        orientation=hist_orientation,
                        #normed=True,
                        density=True,
                        stacked=True)

                else:
                    # CUMULATIVE HISTOGRAM
                    hist_counts, edges, patces = ax.hist(
                        x_data,
                        bins=k,
                        range=[x_data.min(), x_data.max()],
                        histtype='stepfilled',
                        color='darkgrey',
                        #edgecolor='lightgray',
                        edgecolor='None',
                        align='mid',
                        orientation=hist_orientation,
                        density=True,
                        stacked=True,
                        cumulative=True)

                #print parameter_names_emcee[ix], overp_der[ix]
                if (ix == n_der - 1):
                    if (cli.overplot is not None):
                        # check angle and plot %360 and %-360...
                        if ('w' in derived_names[ix]
                                or 'lN' in derived_names[ix]
                                or 'mA' in derived_names[ix]):
                            ax.axhline(overp_der[ix] % 360.,
                                       color='C0',
                                       ls='--',
                                       lw=1.1,
                                       alpha=0.7)
                            ax.axhline(overp_der[ix] % -360.,
                                       color='C0',
                                       ls='--',
                                       lw=1.1,
                                       alpha=0.7)
                        else:
                            # plot selected overplot sample
                            ax.axhline(overp_der[ix],
                                       color='C0',
                                       ls='--',
                                       lw=1.1,
                                       alpha=0.7)
                    ax.set_ylim([y_min, y_max])
                else:
                    if (cli.overplot is not None):
                        if ('w' in derived_names[ix]
                                or 'lN' in derived_names[ix]
                                or 'mA' in derived_names[ix]):
                            ax.axvline(overp_der[ix] % 360.,
                                       color='C0',
                                       ls='--',
                                       lw=1.1,
                                       alpha=0.7)
                            ax.axvline(overp_der[ix] % -360.,
                                       color='C0',
                                       ls='--',
                                       lw=1.1,
                                       alpha=0.7)
                        else:
                            # plot selected overplot sample
                            ax.axvline(overp_der[ix],
                                       color='C0',
                                       ls='--',
                                       lw=1.1,
                                       alpha=0.7)
                    ax.set_xlim([x_min, x_max])
                if (cli.overplot is not None):
                    print derived_names[ix], ' overplot val = ', overp_der[
                        ix], ' min = ', x_data.min(), ' max = ', x_data.max()

                ax.get_xaxis().set_visible(False)
                ax.get_yaxis().set_visible(False)
                ax.set_title(labels_list[ix], fontsize=label_size)
                plt.draw()

    plot_folder = os.path.join(cli.full_path, 'plots')
    if (not os.path.isdir(plot_folder)):
        os.makedirs(plot_folder)
    correlation_file = os.path.join(plot_folder, 'derived_triangle.png')
    fig.savefig(correlation_file, bbox_inches='tight', dpi=300)
    anc.print_both('png done')
    correlation_file = os.path.join(plot_folder, 'derived_triangle.pdf')
    fig.savefig(correlation_file, bbox_inches='tight', dpi=96)
    anc.print_both('pdf done')
    plt.close(fig)

    return
Example #11
0
def main():
  # =================================================================================
  # MAIN

  print ""
  print " --- read_finalpar_v2.py --- "
  print ""

  cli = get_args()
  fpath, idsim, lmflag, boot, mtype, mgauss, fit_type = cli.fpath, cli.idsim, cli.lmflag, cli.boot, cli.mtype, cli.mgauss, cli.fit_type

  nfit, NB, bodies_file, id_fit, id_all, nfit_list, cols_list, case_list = anc.get_fitted(fpath)

  MR_star = get_MR_start(fpath, bodies_file[0])
  if(len(MR_star.shape)==2):
    Mstar = MR_star[0,0]
  else:
    Mstar = MR_star[0].copy()
    MR_star = np.zeros((2,2))
    MR_star[0,0] = Mstar

  if(boot):
    file_boot = os.path.join(fpath, '%s_bootstrap_sim.dat' %(idsim))
    try:
      bootstrap = np.genfromtxt(file_boot)[:,1:]
    except:
      sys.exit(' CANNOT FIND BOOTSTRAP FILE: %s' %(file_boot))
    if(mgauss):
      m_factor, mass_unit = anc.mass_type_factor(Ms=Mstar, mtype=mtype, mscale=True)
      m_factor_boot = m_factor
    else:
      m_factor, mass_unit = anc.mass_type_factor(Ms=1.0, mtype=mtype, mscale=False)
      np.random.seed(seed=cli.seed)
      Ms_gaussian = MR_star[0,0] + np.random.normal(0., 1., size=(np.shape(bootstrap)[0]))*MR_star[0,1] # if exists an error on the mass, it creates a Normal distribution for the values and use it to re-scale mp/Ms to mp.
      m_factor_boot = m_factor * Ms_gaussian # given the factor from Msun to mass_unit it multiply it by the Normal Mstar.
      m_factor = m_factor * MR_star[0,0]
  else:
    bootstrap = None
    m_factor, mass_unit = anc.mass_type_factor(Ms=Mstar, mtype=mtype, mscale=True)
  
  kel_file, kep_elem = anc.elements(fpath, int(idsim), int(lmflag))

  file_par = parameters_file(fpath, idsim, lmflag)
  names_par, par, fitness_s, fitness_x_dof_s, bic, chi2, ndata, dof = read_parameters(file_par, lmflag)

  #if (boot):
    #file_boot = os.path.join(fpath, '%s_bootstrap_sim.dat' %(idsim))
    #try:
      #bootstrap = np.genfromtxt(file_boot)[:,1:]
    #except:
      #sys.exit(' CANNOT FIND BOOTSTRAP FILE: %s' %(file_boot))
  #else:
    #bootstrap = None
    
  units_par = anc.get_units(names_par, mass_unit)
  names_derived, derived_par = anc.compute_derived_parameters(names_par, kep_elem, id_fit, case_list, cols_list, par, conv_factor=m_factor)
  units_der = anc.get_units(names_derived, mass_unit)

  if(boot):
    sigma_par = anc.compute_intervals(bootstrap, par, anc.percentile_val)
    names_derived, der_posterior = anc.compute_derived_posterior(names_par, kep_elem, id_fit, case_list, cols_list, bootstrap, conv_factor=m_factor_boot)
    derived_par, der_posterior = anc.adjust_derived_parameters(names_derived, derived_par, der_posterior)
    sigma_derived = anc.compute_intervals(der_posterior, derived_par, anc.percentile_val)
  else:
    sigma_par = None
    sigma_derived = None
    
  output_file = '%s_%s.log' %(os.path.splitext(file_par)[0], mass_unit)
  out = open(output_file, 'w')

  top_header, header = anc.get_header(anc.percentile_val)

  # print to screen and into file
  anc.print_both('', out)
  anc.print_both('# Number of bodies = %d' %(NB), out)
  anc.print_both('# OUTPUT FILE: %s' %(output_file), out)
  anc.print_both('# fitness = %s' %(fitness_s), out)
  anc.print_both('# fitness x dof = %s' %(fitness_x_dof_s), out)
  anc.print_both('# bic = %s' %(bic), out)
  anc.print_both('# chi2 = %s' %(chi2), out)
  anc.print_both('# ndata = %s' %(ndata), out)
  anc.print_both('# dof = %s' %(dof), out)
  anc.print_both('# Mstar = %.4f +/- %.4f M_sun' %(MR_star[0,0], MR_star[0,1]), out)
  anc.print_both('# FITTED PARAMETERS (nfit = %d)' %(nfit), out)
  
  
  anc.print_both('# FITTED PARAMETERS', out)
  anc.print_parameters(top_header, header, names_par, units_par, par, sigma_par, out)
  
  anc.print_both('# DERIVED PARAMETERS', out)
  anc.print_parameters(top_header, header, names_derived, units_der, derived_par, sigma_derived, out)
  out.close()
  
  return
Example #12
0
def main():
    print
    print ' TRADES: EMCEE confidence intervals'
    print

    cli = anc.get_args()

    # init trades
    pytrades_lib.pytrades.initialize_trades(os.path.join(cli.full_path, ''),
                                            '', 1)

    nfit, NB, bodies_file, id_fit, id_all, nfit_list, cols_list, case_list = anc.get_fitted(
        cli.full_path)
    ndata = pytrades_lib.pytrades.ndata
    nfree = pytrades_lib.pytrades.nfree
    dof = pytrades_lib.pytrades.dof

    # read emcee data
    emcee_file, emcee_best, folder_best = anc.get_emcee_file_and_best(
        cli.full_path, cli.temp_status)
    # get data from the hdf5 file
    names_par, parameter_boundaries, chains, acceptance_fraction, autocor_time, lnprobability, ln_err_const, completed_steps = anc.get_data(
        emcee_file, cli.temp_status)
    # print Memory occupation of ...
    anc.print_memory_usage(chains)

    nfit, nwalkers, nruns, nburnin, nruns_sel = anc.get_emcee_parameters(
        chains, cli.temp_status, cli.nburnin, completed_steps)

    #chains_T, parameter_boundaries = anc.select_transpose_convert_chains(nfit, nwalkers, nburnin, nruns, nruns_sel, m_factor, names_par, parameter_boundaries, chains)
    chains_T_full = np.zeros((nruns, nwalkers, nfit))
    for ii in xrange(0, nfit):
        chains_T_full[:, :, ii] = chains[:, :nruns, ii].T  # transpose

    chains_T, flatchain_posterior_0, lnprob_burnin, thin_steps = anc.thin_the_chains(
        cli.use_thin,
        nburnin,
        nruns,
        nruns_sel,
        autocor_time,
        chains_T_full,
        lnprobability,
        burnin_done=False)

    # lambda fix
    flatchain_posterior_0 = anc.fix_lambda(flatchain_posterior_0, names_par)

    # computes mass conversion factor
    #m_factor = anc.mass_conversion_factor(cli.m_type)
    MR_star = pytrades_lib.pytrades.mr_star
    m_factor_0, mass_unit = anc.mass_type_factor(Ms=1.0,
                                                 mtype=cli.m_type,
                                                 mscale=False)
    np.random.seed(seed=cli.seed)
    Ms_gaussian = MR_star[0, 0] + np.random.normal(
        0., 1., size=(np.shape(flatchain_posterior_0)[0])
    ) * MR_star[
        0,
        1]  # if exists an error on the mass, it creates a Normal distribution for the values and use it to re-scale mp/Ms to mp.
    m_factor_boot = m_factor_0 * Ms_gaussian  # given the factor from Msun to mass_unit it multiply it by the Normal Mstar.
    m_factor = m_factor_0 * MR_star[0, 0]

    # set label and legend names
    #kel_legends, labels_list = anc.keplerian_legend(names_par, cli.m_type)

    flatchain_posterior = flatchain_posterior_0.copy()
    for ifit in range(0, nfit):
        if ('Ms' in names_par[ifit]):
            flatchain_posterior[:,
                                ifit] = m_factor_0 * flatchain_posterior[:,
                                                                         ifit]
    posterior_file = os.path.join(cli.full_path, 'posterior.hdf5')
    p_h5f = h5py.File(posterior_file, 'w')
    p_h5f.create_dataset('posterior',
                         data=flatchain_posterior,
                         dtype=np.float64)
    p_h5f.create_dataset('loglikelihood',
                         data=lnprob_burnin.reshape((-1)),
                         dtype=np.float64)
    p_h5f['posterior'].attrs['nfit'] = nfit
    p_h5f['posterior'].attrs['nposterior'] = np.shape(flatchain_posterior)[0]
    p_h5f.create_dataset('parameter_names', data=names_par, dtype='S10')
    p_h5f.close()
    anc.print_both(' Saved posterior file: %s' % (posterior_file))

    top_header, header = anc.get_header(anc.percentile_val)

    # ==============================================================================
    # ==============================================================================
    # 2017-01-26 EMCEE NOW USED sqrt(e)cos(w), sqrt(e)sin(w)
    # GET INTERVALS
    # ==============================================================================
    # ==============================================================================
    def get_intervals(full_path,
                      id_sim,
                      names_par_in,
                      parameters_in,
                      flatchain_posterior_in,
                      derived_type=None,
                      full_output=False,
                      idx_sample=None,
                      summary_file_hdf5=None):

        names_trades = anc.emcee_names_to_trades(
            names_par_in)  # emcee to trades
        parameters_trades = anc.sqrte_to_e_fitting(
            parameters_in, names_par_in)  # emcee to trades

        names_par = names_par_in  # emcee kind
        parameters = parameters_in  # emcee kind
        flatchain_posterior = flatchain_posterior_in  # emcee kind

        loglhdx, checkx = pytrades_lib.pytrades.fortran_loglikelihood(
            np.array(parameters_trades, dtype=np.float64))
        loglhdx = loglhdx + ln_err_const

        out_folder = os.path.join(
            os.path.join(full_path, '%04d_sim' % (id_sim)), '')
        if (not os.path.isdir(out_folder)):
            os.makedirs(out_folder)
        out_file = os.path.join(out_folder, 'parameters_summary.txt')
        out = open(out_file, 'w')
        pytrades_lib.pytrades.path_change(out_folder)

        anc.print_both(' #', out)
        anc.print_both(' # --------------------------------- ', out)
        anc.print_both(' # PARAMETER VALUES -> %d' % (id_sim), out)
        fitness, lgllhd, check = pytrades_lib.pytrades.write_summary_files(
            id_sim, parameters_trades)

        kel_file, kep_elem = anc.elements(out_folder, id_sim, lmf=0)

        #sigma_par = anc.compute_intervals(flatchain_posterior, parameters, anc.percentile_val)
        sigma_par = anc.compute_sigma_hdi(flatchain_posterior,
                                          parameters)  # uses HDI
        sigma_par = sigma_par.T
        units_par = anc.get_units(names_par, mass_unit)

        if (not bool(check)):
            print 'WRTING WARNING FILE: %s' % (os.path.join(
                out_folder, 'WARNING.txt'))
            warn_o = open(os.path.join(out_folder, 'WARNING.txt'), 'w')
            warn_o.write(
                '*******\nWARNING: FITTED PARAMETERS COULD NOT BE PHYSICAL!\nWARNING: BE VERY CAREFUL WITH THIS PARAMETER SET!\n*******'
            )
            warn_o.close()

        nbins = anc.get_auto_bins(flatchain_posterior_0)

        names_derived, der_posterior = anc.compute_derived_posterior(
            names_par,
            kep_elem,
            id_fit,
            case_list,
            cols_list,
            flatchain_posterior,
            conv_factor=m_factor_boot)

        #der_posterior_T = der_posterior
        der_posterior_T = anc.derived_posterior_check(names_derived,
                                                      der_posterior)

        par_type = ''
        descr = ''
        if (str(derived_type).strip().lower() == 'median'):
            # MEDIAN PARAMETERS ID == 1050
            derived_par = np.percentile(der_posterior_T,
                                        50.,
                                        axis=0,
                                        interpolation='midpoint')
            par_type = 'MEDIAN:'
            descr = 'median of posterior and median of derived posterior'
        elif (str(derived_type).strip().lower() == 'mode'):
            # MODE-LIKE PARAMETERS -> id 3050
            #k = anc.get_bins(flatchain_posterior, rule='doane')

            der_bin, derived_par = anc.get_mode_parameters(
                der_posterior_T, nbins)
            par_type = 'MODE'
            descr = 'mode of posterior and mode of derived posterior'
        else:
            # ORIGINAL FITTING PARAMETERS ID == 0
            # or
            # MAX LNPROBABILITY -> id 2050
            names_derived, derived_par = anc.compute_derived_parameters(
                names_par,
                kep_elem,
                id_fit,
                case_list,
                cols_list,
                parameters,
                conv_factor=m_factor)
            derived_par, der_posterior_T = anc.adjust_derived_parameters(
                names_derived, derived_par, der_posterior_T)
            if (id_sim == 0):
                par_type = 'ORIGINAL FIT:'
                descr = 'initial set of parameters'
            elif (id_sim == 1051):
                par_type = 'MEDIAN PARAMETERS TO DERIVED:'
                descr = 'median of posterior and converted to derived parameter'
            elif (id_sim == 2050):
                par_type = 'MAX LNPROB'
            elif (id_sim == 3051):
                par_type = 'MODE PARAMETERS TO DERIVED:'
                descr = 'mode of posterior and converted to derived parameter'
            elif (id_sim == 666):
                par_type = 'SELECTED SAMPLE WITHIN HDI'
                # ***COMMENTED 2017-02-02: TO CHECK IF REALLY NEEDED
                #if(idx_sample is not None):
                #par_type = '%s <-> idx = %d' %(par_type, idx_sample)
                #derived_par = der_posterior_T[idx_sample, :]
                #for ider in range(0,np.shape(derived_par)[0]):
                ##print ider, names_derived[ider], names_derived[ider][0], names_derived[ider][1]
                #if(names_derived[ider][0] == 'm' and names_derived[ider][1] != 'A'):
                ##print 'doing'
                #derived_par[ider] = der_posterior_T[idx_sample, ider]*m_factor/m_factor_boot[idx_sample]
            elif (id_sim == 667):
                par_type = 'SELECTED SAMPLE CLOSE TO MEDIAN LGLLHD WITHIN POSTERIOR HDI'
                descr = ""
            elif (id_sim == 668):
                par_type = 'MAX LGLLHD WITHIN POSTERIOR HDI:'
                descr = "Select posterior within HDI and take the parameter set with higher loglikelihood."
            else:
                par_type = 'AD HOC'
                descr = "from input file"

        par_type = '%s %s' % (par_type, descr)
        #sigma_derived = anc.compute_intervals(der_posterior_T, derived_par, anc.percentile_val)
        sigma_derived = anc.compute_sigma_hdi(der_posterior_T, derived_par)
        sigma_derived = sigma_derived.T

        units_der = anc.get_units(names_derived, mass_unit)

        if (s_h5f is not None):
            s_id_sim = '%04d' % (id_sim)
            s_h5f.create_dataset('parameters/%s/fitted/parameters' %
                                 (s_id_sim),
                                 data=parameters,
                                 dtype=np.float64,
                                 compression='gzip')
            s_h5f.create_dataset('parameters/%s/fitted/names' % (s_id_sim),
                                 data=names_par,
                                 dtype='S10',
                                 compression='gzip')
            s_h5f.create_dataset('parameters/%s/fitted/units' % (s_id_sim),
                                 data=units_par,
                                 dtype='S15',
                                 compression='gzip')
            s_h5f.create_dataset('parameters/%s/fitted/sigma' % (s_id_sim),
                                 data=sigma_par.T,
                                 dtype=np.float64,
                                 compression='gzip')
            s_h5f['parameters/%s/fitted/sigma' %
                  (s_id_sim)].attrs['percentiles'] = anc.percentile_val

            s_h5f.create_dataset('parameters/%s/derived/parameters' %
                                 (s_id_sim),
                                 data=derived_par,
                                 dtype=np.float64,
                                 compression='gzip')
            s_h5f.create_dataset('parameters/%s/derived/names' % (s_id_sim),
                                 data=names_derived,
                                 dtype='S10',
                                 compression='gzip')
            s_h5f.create_dataset('parameters/%s/derived/units' % (s_id_sim),
                                 data=units_der,
                                 dtype='S15',
                                 compression='gzip')
            s_h5f.create_dataset('parameters/%s/derived/sigma' % (s_id_sim),
                                 data=sigma_derived.T,
                                 dtype=np.float64,
                                 compression='gzip')
            s_h5f['parameters/%s/derived/sigma' %
                  (s_id_sim)].attrs['percentiles'] = anc.percentile_val

            s_h5f['parameters/%s' %
                  (s_id_sim)].attrs['info'] = '%s ==> %s' % (s_id_sim,
                                                             par_type)
            s_h5f['parameters/%s' % (s_id_sim)].attrs['fitness'] = fitness
            s_h5f['parameters/%s' % (s_id_sim)].attrs['lgllhd'] = lgllhd
            s_h5f['parameters/%s' % (s_id_sim)].attrs['check'] = check
            if (idx_sample is not None):
                s_h5f['parameters/%s' %
                      (s_id_sim)].attrs['idx_sample'] = idx_sample

        #print '\nComputed sigma_par with shape ',np.shape(sigma_par)
        #print 'Computed sigma_derived with shape ',np.shape(sigma_derived)
        anc.print_both('\n# SUMMARY: %s' % (par_type), out)
        anc.print_both('# FITTED PARAMETERS', out)
        anc.print_parameters(top_header, header, names_par, units_par,
                             parameters, sigma_par, out)

        anc.print_both('# DERIVED PARAMETERS', out)
        anc.print_parameters(top_header, header, names_derived, units_der,
                             derived_par, sigma_derived, out)
        out.close()

        if (full_output):
            return out_folder, names_derived, der_posterior_T
        else:
            return out_folder
# ==============================================================================
# ==============================================================================

# ==============================================================================
## CREATE A HDF5 FILE WITH CONFIDNCE INTERVALS AND ALL THE SUMMARY PARAMETERS
# ==============================================================================

    summary_file = os.path.join(cli.full_path, 'summary_parameters.hdf5')
    s_h5f = h5py.File(summary_file, 'w')

    ### COMPUTE CONFIDENCE INTERVALS OF THE FITTED PARAMETER DISTRIBUTIONS
    #ci_fitted = np.percentile(flatchain_posterior_0, anc.percentile_val[2:], axis=0, interpolation='midpoint') # (n_percentile-2 x nfit) ==> skip 1st and 2nd items, the 68.27th and 50th
    # ==============================================================================
    # HDI INSTEAD OF CREDIBLE INTERVALS
    # ==============================================================================
    nbins = anc.get_auto_bins(flatchain_posterior_0)
    hdi_ci, mode_parameters = anc.compute_hdi_full(flatchain_posterior_0,
                                                   mode_output=True)
    ci_fitted = hdi_ci.T
    print ' shape: hdi_ci = ', np.shape(hdi_ci), ' ci_fitted = ', np.shape(
        ci_fitted)
    # hdi_ci: nfit x nci
    # ci_fitted: nci x nfit
    # nci -> -1sigma(0) +1sigma(1) -2sigma(2) +2sigma(3) -3sigma(4) +3sigma(5)

    #sys.exit()

    units_par = anc.get_units(names_par, mass_unit)

    s_h5f.create_dataset('confidence_intervals/fitted/ci',
                         data=ci_fitted.T,
                         dtype=np.float64,
                         compression='gzip')
    s_h5f.create_dataset('confidence_intervals/fitted/names',
                         data=names_par,
                         dtype='S10',
                         compression='gzip')
    s_h5f.create_dataset('confidence_intervals/fitted/units',
                         data=units_par,
                         dtype='S15',
                         compression='gzip')
    s_h5f.create_dataset('confidence_intervals/fitted/percentiles',
                         data=np.array(anc.percentile_val[2:]),
                         dtype=np.float64,
                         compression='gzip')  # now it not true...

    s_h5f['confidence_intervals/fitted'].attrs['nfit'] = nfit
    s_h5f['confidence_intervals/fitted'].attrs['nfree'] = nfree
    s_h5f['confidence_intervals/fitted'].attrs['ndata'] = ndata
    s_h5f['confidence_intervals/fitted'].attrs['dof'] = dof
    # ==============================================================================

    # ==============================================================================
    ## ORIGINAL FITTING PARAMETERS ID == 0
    # ==============================================================================
    # save initial_fitting parameters into array
    original_fit_parameters = pytrades_lib.pytrades.fitting_parameters  # INITIAL PARAMETER SET (NEEDED ONLY TO HAVE THE PROPER ARRAY/VECTOR)
    #folder_0 = get_intervals(cli.full_path, 0, names_par, original_fit_parameters, flatchain_posterior_0, derived_type=None, summary_file_hdf5=s_h5f)
    # WARNING: original_fit_parameters from TRADES have to be converted to emcee parameters:
    # (ecosw,esinw) => (sqrecosw, sqrtesinw)
    trades_names = anc.emcee_names_to_trades(names_par)
    original_parameters = anc.e_to_sqrte_fitting(original_fit_parameters,
                                                 trades_names)
    folder_0 = get_intervals(cli.full_path,
                             0,
                             names_par,
                             original_parameters,
                             flatchain_posterior_0,
                             derived_type=None,
                             summary_file_hdf5=s_h5f)
    # ==============================================================================

    print
    print

    # ==============================================================================
    ## MAX LNPROBABILITY AND PARAMETERS -> id 2050
    # ==============================================================================
    max_lnprob, max_lnprob_parameters, max_lnprob_perc68, max_lnprob_confint = anc.get_maxlnprob_parameters(
        lnprob_burnin, chains_T, flatchain_posterior_0)
    max_id1, max_id2 = anc.get_max_indices(lnprob_burnin)
    folder_2050, names_derived, der_posterior = get_intervals(
        cli.full_path,
        2050,
        names_par,
        max_lnprob_parameters,
        flatchain_posterior_0,
        derived_type=None,
        full_output=True,
        summary_file_hdf5=s_h5f)
    units_der = anc.get_units(names_derived, mass_unit)
    # write out the derived names and posterior into an hdf5 file
    der_post_file = os.path.join(cli.full_path, 'derived_posterior.hdf5')
    h5f = h5py.File(der_post_file, 'w')
    h5f.create_dataset('derived_names',
                       data=names_derived,
                       dtype='S10',
                       compression='gzip')
    h5f.create_dataset('derived_posterior',
                       data=der_posterior,
                       dtype=np.float64,
                       compression='gzip')
    h5f.create_dataset('units_derived',
                       data=units_der,
                       dtype='S15',
                       compression='gzip')
    h5f.close()
    # ==============================================================================

    ### COMPUTE CONFIDENCE INTERVALS OF THE DERIVED PARAMETER DISTRIBUTIONS
    #ci_derived = np.percentile(der_posterior, anc.percentile_val[2:], axis=0, interpolation='midpoint') # (n_percentile-1 x nder) ==> skip first item, the 68.27th
    # ==============================================================================
    # HDI INSTEAD OF CREDIBLE INTERVALS
    # ==============================================================================
    #npost_der, nder = np.shape(der_posterior)
    #k_der = anc.get_auto_bins(der_posterior)
    hdi_ci_derived = anc.compute_hdi_full(der_posterior, mode_output=False)
    ci_derived = hdi_ci_derived.T

    s_h5f.create_dataset('confidence_intervals/derived/ci',
                         data=ci_derived.T,
                         dtype=np.float64,
                         compression='gzip')
    s_h5f.create_dataset('confidence_intervals/derived/names',
                         data=names_derived,
                         dtype='S10',
                         compression='gzip')
    s_h5f.create_dataset('confidence_intervals/derived/units',
                         data=units_der,
                         dtype='S15',
                         compression='gzip')
    s_h5f.create_dataset('confidence_intervals/derived/percentiles',
                         data=np.array(anc.percentile_val[2:]),
                         dtype=np.float64,
                         compression='gzip')
    # ==============================================================================
    print
    print

    # ==============================================================================
    ## MEDIAN PARAMETERS ID == 1050
    # ==============================================================================
    median_parameters, median_perc68, median_confint = anc.get_median_parameters(
        flatchain_posterior_0)
    folder_1050 = get_intervals(cli.full_path,
                                1050,
                                names_par,
                                median_parameters,
                                flatchain_posterior_0,
                                derived_type='median',
                                summary_file_hdf5=s_h5f)
    ## MEDIAN PARAMETERS ID == 1051
    folder_1051 = get_intervals(cli.full_path,
                                1051,
                                names_par,
                                median_parameters,
                                flatchain_posterior_0,
                                derived_type=None,
                                summary_file_hdf5=s_h5f)
    # ==============================================================================

    print
    print

    # ==============================================================================
    # select n_samples from the posterior within the CI
    # ==============================================================================
    if (cli.n_samples > 0):
        anc.print_both(' Selecting %d samples from the posterior ...' %
                       (cli.n_samples))
        sys.stdout.flush()
        samples_fit_par = anc.take_n_samples(flatchain_posterior_0,
                                             ci_fitted[0:2, :],
                                             n_samples=cli.n_samples)
        samples_fit_par[
            0, :] = median_parameters  # first sample as the median of the posterior
        anc.print_both(' Running TRADES and computing the T0s and RVs ...')
        samples_file = os.path.join(cli.full_path, 'samples_ttra_rv.hdf5')
        anc.print_both(' Saving into %s' % (samples_file))
        smp_h5 = h5py.File(samples_file, 'w')
        save_ttra_and_rv_from_samples(samples_fit_par, names_par, NB,
                                      cli.n_samples, smp_h5)
        #tra_gr = smp_h5.create_group('T0')
        #for key in ttra_samples.keys():
        #tra_gr.create_dataset(key, data=ttra_samples[key], dtype=np.float64, compression='gzip')
        #rv_gr =  smp_h5.create_group('RV')
        #for key in rv_samples.keys():
        #rv_gr.create_dataset(key, data=rv_samples[key], dtype=np.float64, compression='gzip')
        #rv_gr['time_rv_mod'].attrs['tepoch'] = pytrades_lib.pytrades.tepoch
        smp_h5.close()
        anc.print_both(' ... done')
        sys.stdout.flush()
    #sys.exit()
# ==============================================================================

    print
    print

    # ==============================================================================
    ## MODE-LIKE PARAMETERS -> id 3050
    # ==============================================================================
    ## take the mean of 5 bin centered to the higher bin

    #anc.print_both('nbins = %d' %(nbins))
    #sys.stdout.flush()
    #mode_bin, mode_parameters = anc.get_mode_parameters(flatchain_posterior_0, nbins)
    # mode_parameters computed at the beginning with hdi

    if (np.any(np.isnan(mode_parameters))):
        print 'Some values are Nan, skip the mode parameters'
    else:
        folder_3050 = get_intervals(cli.full_path,
                                    3050,
                                    names_par,
                                    mode_parameters,
                                    flatchain_posterior_0,
                                    derived_type='mode',
                                    summary_file_hdf5=s_h5f)
        ## MODE-LIKE PARAMETERS -> id 3051
        folder_3051 = get_intervals(cli.full_path,
                                    3051,
                                    names_par,
                                    mode_parameters,
                                    flatchain_posterior_0,
                                    derived_type=None,
                                    summary_file_hdf5=s_h5f)
# ==============================================================================

    print
    print

    # ==============================================================================
    # ONE SAMPLE PARAMETER SET --> 666
    # ==============================================================================
    name_par, name_excluded = anc.get_sample_list(cli.sample_str, names_par)
    sample_parameters, idx_sample = anc.pick_sample_parameters(
        flatchain_posterior_0,
        names_par,
        name_par=name_par,
        name_excluded=name_excluded,
        post_ci=ci_fitted[0:2, :])
    if (sample_parameters is not None):
        folder_666 = get_intervals(cli.full_path,
                                   666,
                                   names_par,
                                   sample_parameters,
                                   flatchain_posterior_0,
                                   idx_sample=idx_sample,
                                   summary_file_hdf5=s_h5f)
        s_h5f['parameters/%04d' % (666)].attrs['par_selection'] = name_par
        if (name_excluded is not None):
            s_h5f['parameters/%04d' %
                  (666)].attrs['par_excluded'] = name_excluded
    else:
        print 'NONE SAMPLE PARAMETERS!!!'
# ==============================================================================

# ==============================================================================
## SELECT AD HOC PARAMETERS:
# ==============================================================================
#adhoc_par = median_parameters.copy()
##adhoc_par[10:] = mode_parameters[10:].copy()
#adhoc_par[12] = mode_parameters[12].copy()
#if(cli.overplot is not None):
    if (cli.adhoc is not None):
        print cli.overplot, cli.adhoc
        adhoc_names, adhoc_par_trades = anc.read_fitted_file(cli.adhoc)
        adhoc_par = anc.e_to_sqrte_fitting(adhoc_par_trades, adhoc_names)
        folder_777 = get_intervals(cli.full_path,
                                   777,
                                   names_par,
                                   adhoc_par,
                                   flatchain_posterior_0,
                                   derived_type=777,
                                   summary_file_hdf5=s_h5f)
# ==============================================================================

# ==============================================================================
# select the sample within post_ci and close to median lgllhd --> 667
# ==============================================================================
    sample2_parameters, sample2_lgllhd = anc.get_sample_by_sorted_lgllhd(
        flatchain_posterior_0,
        lnprob_burnin.T,
        #post_ci = ci_fitted[0:2,:]
        post_ci=ci_fitted.T)
    folder_667 = get_intervals(cli.full_path,
                               667,
                               names_par,
                               sample2_parameters,
                               flatchain_posterior_0,
                               derived_type=667,
                               summary_file_hdf5=s_h5f)
    # ==============================================================================

    # ==============================================================================
    # another kind of selection: parameter set within HDI, then take the max(loglikelihood) --> 668
    # ==============================================================================
    name_par, name_excluded = anc.get_sample_list(cli.sample_str, names_par)
    #sample3_parameters, sample3_lgllhd = anc.get_sample_by_par_and_lgllhd(flatchain_posterior_0,
    #lnprob_burnin.T,
    #names_par,
    #post_ci = ci_fitted[0:2,:],
    #name_par= name_par)
    sample3_parameters, sample3_lgllhd = \
                               anc.select_maxlglhd_with_hdi(flatchain_posterior_0,
                                                            #ci_fitted[0:2,:],
                                                            ci_fitted.T,
                                                            lnprob_burnin.T
                                                            )
    folder_668 = get_intervals(cli.full_path,
                               668,
                               names_par,
                               sample3_parameters,
                               flatchain_posterior_0,
                               derived_type=668,
                               summary_file_hdf5=s_h5f)
    # ==============================================================================

    s_h5f.close()

    print

    # ==============================================================================
    # print into file CONFIDENCE INTERVALS of fitted and derived parameters
    # ==============================================================================
    ci_file = os.path.join(cli.full_path, 'confidence_intervals.dat')
    oci = open(ci_file, 'w')
    anc.print_both('\n# SUMMARY:\n# CONFIDENCE INTERVALS', oci)

    anc.print_both('## FITTED PARAMETERS', oci)
    #anc.print_confidence_intervals(anc.percentile_val[2:], conf_interv=ci_fitted, name_parameters=names_par, unit_parameters=units_par, output=oci)
    anc.print_hdi(conf_interv=ci_fitted,
                  name_parameters=names_par,
                  unit_parameters=units_par,
                  output=oci)

    anc.print_both('## DERIVED PARAMETERS', oci)
    #anc.print_confidence_intervals(anc.percentile_val[2:], conf_interv=ci_derived, name_parameters=names_derived, unit_parameters=units_der, output=oci)
    anc.print_hdi(conf_interv=ci_derived,
                  name_parameters=names_derived,
                  unit_parameters=units_der,
                  output=oci)

    oci.close()
    # ==============================================================================

    pytrades_lib.pytrades.deallocate_variables()

    return
Example #13
0
    def get_intervals(full_path,
                      id_sim,
                      names_par_in,
                      parameters_in,
                      flatchain_posterior_in,
                      derived_type=None,
                      full_output=False,
                      idx_sample=None,
                      summary_file_hdf5=None):

        names_trades = anc.emcee_names_to_trades(
            names_par_in)  # emcee to trades
        parameters_trades = anc.sqrte_to_e_fitting(
            parameters_in, names_par_in)  # emcee to trades

        names_par = names_par_in  # emcee kind
        parameters = parameters_in  # emcee kind
        flatchain_posterior = flatchain_posterior_in  # emcee kind

        loglhdx, checkx = pytrades_lib.pytrades.fortran_loglikelihood(
            np.array(parameters_trades, dtype=np.float64))
        loglhdx = loglhdx + ln_err_const

        out_folder = os.path.join(
            os.path.join(full_path, '%04d_sim' % (id_sim)), '')
        if (not os.path.isdir(out_folder)):
            os.makedirs(out_folder)
        out_file = os.path.join(out_folder, 'parameters_summary.txt')
        out = open(out_file, 'w')
        pytrades_lib.pytrades.path_change(out_folder)

        anc.print_both(' #', out)
        anc.print_both(' # --------------------------------- ', out)
        anc.print_both(' # PARAMETER VALUES -> %d' % (id_sim), out)
        fitness, lgllhd, check = pytrades_lib.pytrades.write_summary_files(
            id_sim, parameters_trades)

        kel_file, kep_elem = anc.elements(out_folder, id_sim, lmf=0)

        #sigma_par = anc.compute_intervals(flatchain_posterior, parameters, anc.percentile_val)
        sigma_par = anc.compute_sigma_hdi(flatchain_posterior,
                                          parameters)  # uses HDI
        sigma_par = sigma_par.T
        units_par = anc.get_units(names_par, mass_unit)

        if (not bool(check)):
            print 'WRTING WARNING FILE: %s' % (os.path.join(
                out_folder, 'WARNING.txt'))
            warn_o = open(os.path.join(out_folder, 'WARNING.txt'), 'w')
            warn_o.write(
                '*******\nWARNING: FITTED PARAMETERS COULD NOT BE PHYSICAL!\nWARNING: BE VERY CAREFUL WITH THIS PARAMETER SET!\n*******'
            )
            warn_o.close()

        nbins = anc.get_auto_bins(flatchain_posterior_0)

        names_derived, der_posterior = anc.compute_derived_posterior(
            names_par,
            kep_elem,
            id_fit,
            case_list,
            cols_list,
            flatchain_posterior,
            conv_factor=m_factor_boot)

        #der_posterior_T = der_posterior
        der_posterior_T = anc.derived_posterior_check(names_derived,
                                                      der_posterior)

        par_type = ''
        descr = ''
        if (str(derived_type).strip().lower() == 'median'):
            # MEDIAN PARAMETERS ID == 1050
            derived_par = np.percentile(der_posterior_T,
                                        50.,
                                        axis=0,
                                        interpolation='midpoint')
            par_type = 'MEDIAN:'
            descr = 'median of posterior and median of derived posterior'
        elif (str(derived_type).strip().lower() == 'mode'):
            # MODE-LIKE PARAMETERS -> id 3050
            #k = anc.get_bins(flatchain_posterior, rule='doane')

            der_bin, derived_par = anc.get_mode_parameters(
                der_posterior_T, nbins)
            par_type = 'MODE'
            descr = 'mode of posterior and mode of derived posterior'
        else:
            # ORIGINAL FITTING PARAMETERS ID == 0
            # or
            # MAX LNPROBABILITY -> id 2050
            names_derived, derived_par = anc.compute_derived_parameters(
                names_par,
                kep_elem,
                id_fit,
                case_list,
                cols_list,
                parameters,
                conv_factor=m_factor)
            derived_par, der_posterior_T = anc.adjust_derived_parameters(
                names_derived, derived_par, der_posterior_T)
            if (id_sim == 0):
                par_type = 'ORIGINAL FIT:'
                descr = 'initial set of parameters'
            elif (id_sim == 1051):
                par_type = 'MEDIAN PARAMETERS TO DERIVED:'
                descr = 'median of posterior and converted to derived parameter'
            elif (id_sim == 2050):
                par_type = 'MAX LNPROB'
            elif (id_sim == 3051):
                par_type = 'MODE PARAMETERS TO DERIVED:'
                descr = 'mode of posterior and converted to derived parameter'
            elif (id_sim == 666):
                par_type = 'SELECTED SAMPLE WITHIN HDI'
                # ***COMMENTED 2017-02-02: TO CHECK IF REALLY NEEDED
                #if(idx_sample is not None):
                #par_type = '%s <-> idx = %d' %(par_type, idx_sample)
                #derived_par = der_posterior_T[idx_sample, :]
                #for ider in range(0,np.shape(derived_par)[0]):
                ##print ider, names_derived[ider], names_derived[ider][0], names_derived[ider][1]
                #if(names_derived[ider][0] == 'm' and names_derived[ider][1] != 'A'):
                ##print 'doing'
                #derived_par[ider] = der_posterior_T[idx_sample, ider]*m_factor/m_factor_boot[idx_sample]
            elif (id_sim == 667):
                par_type = 'SELECTED SAMPLE CLOSE TO MEDIAN LGLLHD WITHIN POSTERIOR HDI'
                descr = ""
            elif (id_sim == 668):
                par_type = 'MAX LGLLHD WITHIN POSTERIOR HDI:'
                descr = "Select posterior within HDI and take the parameter set with higher loglikelihood."
            else:
                par_type = 'AD HOC'
                descr = "from input file"

        par_type = '%s %s' % (par_type, descr)
        #sigma_derived = anc.compute_intervals(der_posterior_T, derived_par, anc.percentile_val)
        sigma_derived = anc.compute_sigma_hdi(der_posterior_T, derived_par)
        sigma_derived = sigma_derived.T

        units_der = anc.get_units(names_derived, mass_unit)

        if (s_h5f is not None):
            s_id_sim = '%04d' % (id_sim)
            s_h5f.create_dataset('parameters/%s/fitted/parameters' %
                                 (s_id_sim),
                                 data=parameters,
                                 dtype=np.float64,
                                 compression='gzip')
            s_h5f.create_dataset('parameters/%s/fitted/names' % (s_id_sim),
                                 data=names_par,
                                 dtype='S10',
                                 compression='gzip')
            s_h5f.create_dataset('parameters/%s/fitted/units' % (s_id_sim),
                                 data=units_par,
                                 dtype='S15',
                                 compression='gzip')
            s_h5f.create_dataset('parameters/%s/fitted/sigma' % (s_id_sim),
                                 data=sigma_par.T,
                                 dtype=np.float64,
                                 compression='gzip')
            s_h5f['parameters/%s/fitted/sigma' %
                  (s_id_sim)].attrs['percentiles'] = anc.percentile_val

            s_h5f.create_dataset('parameters/%s/derived/parameters' %
                                 (s_id_sim),
                                 data=derived_par,
                                 dtype=np.float64,
                                 compression='gzip')
            s_h5f.create_dataset('parameters/%s/derived/names' % (s_id_sim),
                                 data=names_derived,
                                 dtype='S10',
                                 compression='gzip')
            s_h5f.create_dataset('parameters/%s/derived/units' % (s_id_sim),
                                 data=units_der,
                                 dtype='S15',
                                 compression='gzip')
            s_h5f.create_dataset('parameters/%s/derived/sigma' % (s_id_sim),
                                 data=sigma_derived.T,
                                 dtype=np.float64,
                                 compression='gzip')
            s_h5f['parameters/%s/derived/sigma' %
                  (s_id_sim)].attrs['percentiles'] = anc.percentile_val

            s_h5f['parameters/%s' %
                  (s_id_sim)].attrs['info'] = '%s ==> %s' % (s_id_sim,
                                                             par_type)
            s_h5f['parameters/%s' % (s_id_sim)].attrs['fitness'] = fitness
            s_h5f['parameters/%s' % (s_id_sim)].attrs['lgllhd'] = lgllhd
            s_h5f['parameters/%s' % (s_id_sim)].attrs['check'] = check
            if (idx_sample is not None):
                s_h5f['parameters/%s' %
                      (s_id_sim)].attrs['idx_sample'] = idx_sample

        #print '\nComputed sigma_par with shape ',np.shape(sigma_par)
        #print 'Computed sigma_derived with shape ',np.shape(sigma_derived)
        anc.print_both('\n# SUMMARY: %s' % (par_type), out)
        anc.print_both('# FITTED PARAMETERS', out)
        anc.print_parameters(top_header, header, names_par, units_par,
                             parameters, sigma_par, out)

        anc.print_both('# DERIVED PARAMETERS', out)
        anc.print_parameters(top_header, header, names_derived, units_der,
                             derived_par, sigma_derived, out)
        out.close()

        if (full_output):
            return out_folder, names_derived, der_posterior_T
        else:
            return out_folder
Example #14
0
def main():
    # MAIN -- TRADES + EMCEE
    # READ COMMAND LINE ARGUMENTS
    cli = get_args()

    # STARTING TIME
    start = time.time()

    # RENAME
    working_path = cli.full_path
    nthreads = cli.nthreads
    np.random.RandomState(cli.seed)

    # INITIALISE TRADES WITH SUBROUTINE WITHIN TRADES_LIB -> PARAMETER NAMES, MINMAX, INTEGRATION ARGS, READ DATA ...
    pytrades_lib.pytrades.initialize_trades(working_path, cli.sub_folder,
                                            nthreads)

    # RETRIEVE DATA AND VARIABLES FROM TRADES_LIB MODULE

    #global n_bodies, n_planets, ndata, npar, nfit, dof, inv_dof
    n_bodies = pytrades_lib.pytrades.n_bodies  # NUMBER OF TOTAL BODIES OF THE SYSTEM
    n_planets = n_bodies - 1  # NUMBER OF PLANETS IN THE SYSTEM
    ndata = pytrades_lib.pytrades.ndata  # TOTAL NUMBER OF DATA AVAILABLE
    npar = pytrades_lib.pytrades.npar  # NUMBER OF TOTAL PARAMATERS ~n_planets X 6
    nfit = pytrades_lib.pytrades.nfit  # NUMBER OF PARAMETERS TO FIT
    nfree = pytrades_lib.pytrades.nfree  # NUMBER OF FREE PARAMETERS (ie nrvset)
    dof = pytrades_lib.pytrades.dof  # NUMBER OF DEGREES OF FREEDOM = NDATA - NFIT
    global inv_dof
    #inv_dof = np.float64(1.0 / dof)
    inv_dof = pytrades_lib.pytrades.inv_dof

    # READ THE NAMES OF THE PARAMETERS FROM THE TRADES_LIB AND CONVERT IT TO PYTHON STRINGS
    #reshaped_names = pytrades_lib.pytrades.parameter_names.reshape((10,nfit), order='F').T
    #parameter_names = [''.join(reshaped_names[i,:]).strip() for i in range(0,nfit)]

    #parameter_names = anc.convert_fortran2python_strarray(pytrades_lib.pytrades.parameter_names, nfit, str_len=10)
    #trades_names = anc.convert_fortran2python_strarray(pytrades_lib.pytrades.parameter_names,
    #nfit, str_len=10
    #)
    str_len = pytrades_lib.pytrades.str_len
    temp_names = pytrades_lib.pytrades.get_parameter_names(nfit, str_len)
    trades_names = anc.convert_fortran_charray2python_strararray(temp_names)
    parameter_names = anc.trades_names_to_emcee(trades_names)

    if (cli.trades_previous is not None):
        temp_names, trades_parameters = anc.read_fitted_file(
            cli.trades_previous)
        if (nfit != np.shape(trades_parameters)[0]):
            anc.print_both(' NUMBER OF PARAMETERS (%d) IN TRADES-PREVIOUS FILE DOES NOT' \
                       'MATCH THE CURRENT CONFIGURATION nfit=%d\nSTOP' \
                       %(np.shape(trades_parameters)[0], nfit)
                      )
            sys.exit()
        del temp_names
    else:
        # INITIAL PARAMETER SET (NEEDED ONLY TO HAVE THE PROPER ARRAY/VECTOR)
        #fitting_parameters = pytrades_lib.pytrades.fitting_parameters
        trades_parameters = pytrades_lib.pytrades.fitting_parameters
    # save initial_fitting parameters into array
    original_fit_parameters = trades_parameters.copy()
    fitting_parameters = anc.e_to_sqrte_fitting(trades_parameters,
                                                trades_names)

    trades_minmax = pytrades_lib.pytrades.parameters_minmax  # PARAMETER BOUNDARIES
    #parameters_minmax = trades_minmax.copy()
    #parameters_minmax[:,0] = anc.e_to_sqrte_fitting(trades_minmax[:,0], trades_names)
    #parameters_minmax[:,1] = anc.e_to_sqrte_fitting(trades_minmax[:,1], trades_names)
    parameters_minmax = anc.e_to_sqrte_boundaries(trades_minmax, trades_names)

    # RADIAL VELOCITIES SET
    n_rv = pytrades_lib.pytrades.nrv
    n_set_rv = pytrades_lib.pytrades.nrvset

    # TRANSITS SET
    n_t0 = pytrades_lib.pytrades.nt0
    n_t0_sum = pytrades_lib.pytrades.ntts
    n_set_t0 = 0
    for i in range(0, n_bodies - 1):
        if (n_t0[i] > 0): n_set_t0 += 1

    # compute global constant for the loglhd
    global ln_err_const

    #try:
    ## fortran variable RV in python will be rv!!!
    #e_RVo = np.array(pytrades_lib.pytrades.ervobs[:], dtype=np.float64)
    #except:
    #e_RVo = np.array([0.], dtype=np.float64)
    #try:
    #e_T0o = np.array(pytrades_lib.pytrades.et0obs[:,:], dtype=np.float64).reshape((-1))
    #except:
    #e_T0o = np.array([0.], dtype=np.float64)
    #ln_err_const = anc.compute_ln_err_const(dof, e_RVo, e_T0o, cli.ln_flag)
    ln_err_const = pytrades_lib.pytrades.ln_err_const

    # SET EMCEE PARAMETERS:
    nwalkers, nruns, nsave, npost = get_emcee_arguments(cli, nfit)

    # INITIALISE SCRIPT FOLDER/LOG FILE
    working_folder, run_log, of_run = init_folder(working_path, cli.sub_folder)

    anc.print_both('', of_run)
    anc.print_both(' ======== ', of_run)
    anc.print_both(' pyTRADES', of_run)
    anc.print_both(' ======== ', of_run)
    anc.print_both('', of_run)
    anc.print_both(' WORKING PATH = %s' % (working_path), of_run)
    anc.print_both(' NUMBER OF THREADS = %d' % (nthreads), of_run)
    anc.print_both(
        ' dof = ndata(%d) - nfit(%d) - nfree(%d) = %d' %
        (ndata, nfit, nfree, dof), of_run)
    anc.print_both(' Total N_RV = %d for %d set(s)' % (n_rv, n_set_rv), of_run)
    anc.print_both(
        ' Total N_T0 = %d for %d out of %d planet(s)' %
        (n_t0_sum, n_set_t0, n_planets), of_run)
    anc.print_both(' %s = %.7f' % ('log constant error = ', ln_err_const),
                   of_run)
    anc.print_both(
        ' %s = %.7f' % ('IN FORTRAN log constant error = ',
                        pytrades_lib.pytrades.ln_err_const), of_run)
    anc.print_both(' seed = %s' % (str(cli.seed)), of_run)

    if (cli.trades_previous is not None):
        anc.print_both('\n ******\n INITIAL FITTING PARAMETERS FROM PREVIOUS' \
                  ' TRADES-EMCEE SIM IN FILE:\n %s\n ******\n' %(cli.trades_previous),
                  of_run
                  )

    anc.print_both(' ORIGINAL PARAMETER VALUES -> 0000', of_run)
    fitness_0000, lgllhd_0000, check_0000 = pytrades_lib.pytrades.write_summary_files(
        0, original_fit_parameters)
    anc.print_both(' ', of_run)
    anc.print_both(' TESTING LNPROB_SQ ...', of_run)

    lgllhd_zero = lnprob(trades_parameters)
    lgllhd_sq_zero = lnprob_sq(fitting_parameters, parameter_names)

    anc.print_both(' ', of_run)
    anc.print_both(
        ' %15s %23s %23s %15s %23s' %
        ('trades_names', 'original_trades', 'trades_par', 'emcee_names',
         'emcee_par'), of_run)
    for ifit in range(0, nfit):
        anc.print_both(
            ' %15s %23.16e %23.16e %15s %23.16e' %
            (trades_names[ifit], original_fit_parameters[ifit],
             trades_parameters[ifit], parameter_names[ifit],
             fitting_parameters[ifit]), of_run)
    anc.print_both(' ', of_run)
    anc.print_both(
        ' %15s %23.16e %23.16e %15s %23.16e' %
        ('lnprob', lgllhd_0000, lgllhd_zero, 'lnprob_sq', lgllhd_sq_zero),
        of_run)
    anc.print_both(' ', of_run)

    # INITIALISES THE WALKERS
    if (cli.emcee_previous is not None):
        anc.print_both(
            ' Use a previous emcee simulation: %s' % (cli.emcee_previous),
            of_run)
        last_p0, old_nwalkers, last_done = anc.get_last_emcee_iteration(
            cli.emcee_previous, nwalkers)
        if (not last_done):
            anc.print_both(
                '**STOP: USING A DIFFERENT NUMBER OF WALKERS (%d) W.R.T. PREVIOUS EMCEE SIMULATION (%d).'
                % (nwalkers, old_nwalkers), of_run)
            sys.exit()
        p0 = last_p0
    else:
        p0 = compute_initial_walkers(nfit, nwalkers, fitting_parameters,
                                     parameters_minmax, parameter_names,
                                     cli.delta_sigma, of_run)

    anc.print_both(
        ' emcee chain: nwalkers = %d nruns = %d' % (nwalkers, nruns), of_run)
    anc.print_both(' sampler ... ', of_run)

    # old version with threads
    #sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob, threads=nthreads)
    #sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob_sq, threads=nthreads, args=[parameter_names]) # needed to use sqrt(e) in emcee instead of e (in fortran)

    threads_pool = emcee.interruptible_pool.InterruptiblePool(nthreads)
    #sampler = emcee.EnsembleSampler(nwalkers, nfit, lnprob, pool=threads_pool)
    sampler = emcee.EnsembleSampler(
        nwalkers, nfit, lnprob_sq, pool=threads_pool,
        args=[parameter_names
              ])  # needed to use sqrt(e) in emcee instead of e (in fortran)

    anc.print_both(' TEST A PRE-EMCEE OF 1000 STEPS', of_run)
    p0, prob, state = sampler.run_mcmc(p0, 1000)
    anc.print_both(' TEST A RESET OF THE SAMPLER', of_run)
    sampler.reset()

    anc.print_both(' ready to go', of_run)
    anc.print_both(' with nsave = %s' % (str(nsave)), of_run)
    sys.stdout.flush()

    #sys.exit()

    if (nsave != False):
        # save temporary sampling during emcee every nruns*10%
        #if(os.path.exists(os.path.join(working_folder, 'emcee_temp.hdf5')) and os.path.isfile(os.path.join(working_folder, 'emcee_temp.hdf5'))):
        #os.remove(os.path.join(working_folder, 'emcee_temp.hdf5'))
        if (os.path.exists(os.path.join(working_folder, 'emcee_summary.hdf5'))
                and os.path.isfile(
                    os.path.join(working_folder, 'emcee_summary.hdf5'))):
            os.remove(os.path.join(working_folder, 'emcee_summary.hdf5'))
        f_hdf5 = h5py.File(os.path.join(working_folder, 'emcee_summary.hdf5'),
                           'a')
        f_hdf5.create_dataset('parameter_names',
                              data=parameter_names,
                              dtype='S10')
        f_hdf5.create_dataset('boundaries',
                              data=parameters_minmax,
                              dtype=np.float64)
        temp_dset = f_hdf5.create_dataset('chains', (nwalkers, nruns, nfit),
                                          dtype=np.float64)
        temp_lnprob = f_hdf5.create_dataset('lnprobability', (nwalkers, nruns),
                                            dtype=np.float64)
        temp_lnprob.attrs['ln_err_const'] = ln_err_const
        temp_acceptance = f_hdf5.create_dataset('acceptance_fraction',
                                                data=np.zeros((nfit)),
                                                dtype=np.float64)
        temp_acor = f_hdf5.create_dataset('autocor_time',
                                          data=np.zeros((nfit)),
                                          dtype=np.float64)
        f_hdf5.close()
        pos = p0
        nchains = int(nruns / nsave)
        state = None
        anc.print_both(' Running emcee with temporary saving', of_run)
        sys.stdout.flush()
        for i in range(0, nchains):
            anc.print_both('', of_run)
            anc.print_both(' iter: %6d ' % (i + 1), of_run)
            aaa = i * nsave
            bbb = aaa + nsave
            pos, prob, state = sampler.run_mcmc(pos, N=nsave, rstate0=state)
            anc.print_both('completed %d steps of %d' % (bbb, nruns), of_run)
            f_hdf5 = h5py.File(
                os.path.join(working_folder, 'emcee_summary.hdf5'), 'a')
            temp_dset = f_hdf5['chains']  #[:,:,:]
            temp_dset[:, aaa:bbb, :] = sampler.chain[:, aaa:bbb, :]
            #f_hdf5['chains'].attrs['completed_steps'] = bbb
            temp_dset.attrs['completed_steps'] = bbb
            temp_lnprob = f_hdf5['lnprobability']  #[:,:]
            temp_lnprob[:, aaa:bbb] = sampler.lnprobability[:, aaa:bbb]
            shape_lnprob = sampler.lnprobability.shape

            acceptance_fraction = sampler.acceptance_fraction
            temp_acceptance = f_hdf5['acceptance_fraction']
            temp_acceptance = acceptance_fraction
            #f_hdf5.create_dataset('acceptance_fraction', data=acceptance_fraction, dtype=np.float64)
            mean_acceptance_fraction = np.mean(acceptance_fraction)

            #temp_chains_T = np.zeros((bbb, nwalkers, nfit))
            #for ifit in range(0,nfit):
            #temp_chains_T[:,:,ifit] = sampler.chain[:, :bbb, ifit].T
            #acor_time = anc.compute_autocor_time(temp_chains_T, walkers_transposed=True)
            acor_time = anc.compute_acor_time(sampler, steps_done=bbb)
            temp_acor = f_hdf5['autocor_time']
            temp_acor[...] = acor_time

            #f_hdf5.create_dataset('autocor_time', data=np.array(acor_temp, dtype=np.float64), dtype=np.float64)
            #f_hdf5.create_dataset('autocor_time', data=np.array(sampler.acor, dtype=np.float64), dtype=np.float64) # not working
            #print 'aaa = %6d bbb = %6d -> sampler.lnprobability.shape = (%6d , %6d)' %(aaa, bbb, shape_lnprob[0], shape_lnprob[1])
            f_hdf5.close()
            sys.stdout.flush()
        anc.print_both('', of_run)
        anc.print_both(
            '...done with saving temporary total shape = %s' %
            (str(np.shape(sampler.chain))), of_run)
        anc.print_both('', of_run)
        sys.stdout.flush()

    # RUN EMCEE AND RESET AFTER REMOVE BURN-IN
    #pos, prob, state = sampler.run_mcmc(p0, npost)
    #sampler.reset()
    #sampler.run_mcmc(pos, nruns, rstate0=state)
    else:
        # GOOD COMPLETE SINGLE RUNNING OF EMCEE, WITHOUT REMOVING THE BURN-IN
        anc.print_both(' Running full emcee ...', of_run)
        sys.stdout.flush()
        sampler.run_mcmc(p0, nruns)
        anc.print_both('done', of_run)
        anc.print_both('', of_run)
        sys.stdout.flush()
        flatchains = sampler.chain[:, :, :].reshape(
            (nwalkers * nruns, nfit))  # full chain values
        acceptance_fraction = sampler.acceptance_fraction
        mean_acceptance_fraction = np.mean(acceptance_fraction)
        #autocor_time = sampler.acor
        #temp_chains_T = np.zeros((nwalkers, nsteps, nfit))
        #for ifit in range(0,nfit):
        #temp_chains_T[:,:,ifit] = sampler.chain[:, :, ifit].T
        #acor_time = anc.compute_autocor_time(temp_chains_T, walkers_transposed=True)
        acor_time = anc.compute_acor_time(sampler)
        lnprobability = sampler.lnprobability
        # save chains with original shape as hdf5 file
        f_hdf5 = h5py.File(os.path.join(working_folder, 'emcee_summary.hdf5'),
                           'w')
        f_hdf5.create_dataset('chains', data=sampler.chain, dtype=np.float64)
        f_hdf5['chains'].attrs['completed_steps'] = nruns
        f_hdf5.create_dataset('parameter_names',
                              data=parameter_names,
                              dtype='S10')
        f_hdf5.create_dataset('boundaries',
                              data=parameters_minmax,
                              dtype=np.float64)
        f_hdf5.create_dataset('acceptance_fraction',
                              data=acceptance_fraction,
                              dtype=np.float64)
        f_hdf5.create_dataset('autocor_time', data=acor_time, dtype=np.float64)
        f_hdf5.create_dataset('lnprobability',
                              data=lnprobability,
                              dtype=np.float64)
        f_hdf5['lnprobability'].attrs['ln_err_const'] = ln_err_const
        f_hdf5.close()

    anc.print_both(
        " Mean_acceptance_fraction should be between [0.25-0.5] = %.6f" %
        (mean_acceptance_fraction), of_run)
    anc.print_both('', of_run)

    # close the pool of threads
    threads_pool.close()
    threads_pool.terminate()
    threads_pool.join()

    anc.print_both('COMPLETED EMCEE', of_run)

    elapsed = time.time() - start
    elapsed_d, elapsed_h, elapsed_m, elapsed_s = anc.computation_time(elapsed)

    anc.print_both('', of_run)
    anc.print_both(
        ' pyTRADES: EMCEE FINISHED in %2d day %02d hour %02d min %.2f sec - bye bye'
        % (int(elapsed_d), int(elapsed_h), int(elapsed_m), elapsed_s), of_run)
    anc.print_both('', of_run)
    of_run.close()
    pytrades_lib.pytrades.deallocate_variables()

    return
def main():
  cli = anc.get_args()
  # read derived posterior file
  derived_file = os.path.join(cli.full_path, 'derived_posterior.hdf5')
  h5f = h5py.File(derived_file, 'r')
  derived_names = np.array(h5f['derived_names'], dtype='S10')
  derived_posterior_in = np.array(h5f['derived_posterior'], dtype=np.float64)
  h5f.close()

  n_der = derived_names.shape[0]
  n_flatchain = derived_posterior_in.shape[0]

  derived_posterior = anc.derived_posterior_check(derived_names, derived_posterior_in)

      
  label_separation=-0.90 # if uses this, comment ax.xyaxis.labelpad = label_pad
  label_pad = 12 # it uses this, comment ax.xyaxis.set_label_coords()...
  label_size = 8
  ticklabel_size = 4

  if(n_der > 2):
    #label_separation = -0.1 - ( 0.075 * (n_der-2) )
    label_separation = -0.15 - ( 0.125 * (n_der-2) )
  #else:
    #label_separation = -0.15

  #label_size = label_size - 1 * int(n_der / 10.)
  #label_size = label_size - 1 * int(n_der / 5.)
  label_size = label_size - 1 * int(n_der / 2.5)

  labels_list = anc.derived_labels(derived_names, cli.m_type)

  k = anc.get_bins(derived_posterior, rule='doane')
  
  if(cli.overplot is not None):
    ## OPEN summary_parameters.hdf5 FILE
    s_h5f = h5py.File(os.path.join(cli.full_path, 'summary_parameters.hdf5'), 'r')
    # take only the selected sample
    s_overplot = '%04d' %(cli.overplot)
    #overp_der = s_h5f['parameters/%s/derived/parameters' %(s_overplot)][...]
    read_der = s_h5f['parameters/%s/derived/parameters' %(s_overplot)][...]
    s_h5f.close()
  
    overp_der = anc.derived_parameters_check(derived_names, read_der, derived_posterior)
  
  #fig = plt.figure(figsize=(12,12))
  fig = plt.figure(figsize=(6,6))
  fig.subplots_adjust(hspace=0.05, wspace=0.05)
  
  for ix in range(0, n_der):
    x_data = derived_posterior[:,ix]
    x_min, x_max = anc.compute_limits(x_data, 0.05)
    if(x_min == x_max):
      x_min = x_min - 1.
      x_max = x_max + 1.
   
    for iy in range(0, n_der):
      y_data = derived_posterior[:,iy]
      y_min, y_max = anc.compute_limits(y_data, 0.05)
      if(y_min == y_max):
        y_min = y_min - 1.
        y_max = y_max + 1.
        
      if(iy > ix): # correlation plot
        anc.print_both('correlation %s vs %s' %(derived_names[ix], derived_names[iy]) )
        ax = plt.subplot2grid((n_der+1, n_der), (iy,ix))
        
        hist2d_counts, xedges, yedges, image2d = ax.hist2d(\
          x_data, y_data, bins=k,
          range=[[x_data.min(), x_data.max()],[y_data.min(), y_data.max()]], 
          cmap=cm.gray_r,
          #normed=True
          normed=False
          )
        
        #new_k = int(k/3)
        new_k = k
        hist2d_counts_2, xedges_2, yedges_2 = np.histogram2d(\
          x_data, y_data, bins=new_k,
          range=[[x_data.min(), x_data.max()],[y_data.min(), y_data.max()]],
          #normed=True
          density=False
          )
        
        x_bins = [0.5*(xedges_2[i]+xedges_2[i+1]) for i in range(0, new_k)]
        y_bins = [0.5*(yedges_2[i]+yedges_2[i+1]) for i in range(0, new_k)]
        
        nl = 5
        levels = [1.-np.exp(-0.5*ii) for ii in range(0,nl)] # 2D sigmas: 0sigma, 1sigma, 2sigma, 3sigma, ..
        ax.contour(x_bins, y_bins, hist2d_counts_2.T, 
                   nl, cmap=cm.viridis,
                   linestyles='solid', linewidths=0.5,
                   #normed=True
                   )
        
        if(cli.overplot is not None):
          # plot selected overplot sample
          # check angle and plot %360 and %-360...
          if('w' in derived_names[ix] or 
             'lN' in derived_names[ix] or 
             'mA' in derived_names[ix]):
            ax.axvline(overp_der[ix]%360., color='C0', ls='--', lw=1.1, alpha=0.7)
            ax.axvline(overp_der[ix]%-360., color='C0', ls='--', lw=1.1, alpha=0.7)
          else:
            ax.axvline(overp_der[ix], color='C0', ls='--', lw=1.1, alpha=0.7)
          if('w' in derived_names[iy] or 
             'lN' in derived_names[iy] or 
             'mA' in derived_names[iy]):
            ax.axhline(overp_der[iy]%360., color='C0', ls='--', lw=1.1, alpha=0.7)
            ax.axhline(overp_der[iy]%-360., color='C0', ls='--', lw=1.1, alpha=0.7)
          else:
            ax.axhline(overp_der[iy], color='C0', ls='--', lw=1.1, alpha=0.7)
        
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        if(iy == n_der-1):
          set_xaxis(ax, label_size, label_separation, label_pad, ticklabel_size, labels_list[ix], [xedges[0], xedges[-1], 4])
        if(ix == 0): 
          set_yaxis(ax, label_size, label_separation, label_pad, ticklabel_size, labels_list[iy], [yedges[0], yedges[-1], 5])
        
        ax.set_ylim([y_min, y_max])
        ax.set_xlim([x_min, x_max])
        plt.draw()
  
      elif(iy == ix): # distribution plot
        anc.print_both('%s histogram' %(derived_names[ix]))
        ax = plt.subplot2grid((n_der+1, n_der), (ix,ix))
        if (ix == n_der-1):
          hist_orientation='horizontal'
        else:
          hist_orientation='vertical'
        
        idx = np.argsort(x_data)
        
        if(not cli.cumulative):
          # HISTOGRAM
          hist_counts, edges, patces = ax.hist(x_data, bins=k,
                                               range=[x_data.min(), x_data.max()], 
                                               histtype='stepfilled', 
                                               color='darkgrey', 
                                               #edgecolor='lightgray',
                                               edgecolor='None',
                                               align='mid', 
                                               orientation=hist_orientation, 
                                               #normed=True,
                                               density=True,
                                               stacked=True
                                               )
          
        else:
          # CUMULATIVE HISTOGRAM
          hist_counts, edges, patces = ax.hist(x_data, bins=k,
                                               range=[x_data.min(), x_data.max()],
                                               histtype='stepfilled', 
                                               color='darkgrey', 
                                               #edgecolor='lightgray',
                                               edgecolor='None',
                                               align='mid', 
                                               orientation=hist_orientation, 
                                               density=True,
                                               stacked=True,
                                               cumulative=True
                                               )
        
        #print parameter_names_emcee[ix], overp_der[ix]
        if (ix == n_der-1):
          if(cli.overplot is not None):
            # check angle and plot %360 and %-360...
            if('w' in derived_names[ix] or 
               'lN' in derived_names[ix] or 
               'mA' in derived_names[ix]):
              ax.axhline(overp_der[ix]%360., color='C0', ls='--', lw=1.1, alpha=0.7)
              ax.axhline(overp_der[ix]%-360., color='C0', ls='--', lw=1.1, alpha=0.7)
            else:
              # plot selected overplot sample
              ax.axhline(overp_der[ix], color='C0', ls='--', lw=1.1, alpha=0.7)
          ax.set_ylim([y_min, y_max])
        else:
          if(cli.overplot is not None):
            if('w' in derived_names[ix] or 
               'lN' in derived_names[ix] or 
               'mA' in derived_names[ix]):
              ax.axvline(overp_der[ix]%360., color='C0', ls='--', lw=1.1, alpha=0.7)
              ax.axvline(overp_der[ix]%-360., color='C0', ls='--', lw=1.1, alpha=0.7)
            else:
              # plot selected overplot sample
              ax.axvline(overp_der[ix], color='C0', ls='--', lw=1.1, alpha=0.7)
          ax.set_xlim([x_min, x_max])
        if(cli.overplot is not None):
          print derived_names[ix], ' overplot val = ', overp_der[ix], ' min = ', x_data.min(), ' max = ', x_data.max()
        
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        ax.set_title(labels_list[ix], fontsize=label_size)
        plt.draw()
  
  plot_folder = os.path.join(cli.full_path, 'plots')
  if (not os.path.isdir(plot_folder)):
      os.makedirs(plot_folder)
  correlation_file = os.path.join(plot_folder, 'derived_triangle.png')
  fig.savefig(correlation_file, bbox_inches='tight', dpi=300)
  anc.print_both('png done')
  correlation_file = os.path.join(plot_folder, 'derived_triangle.pdf')
  fig.savefig(correlation_file, bbox_inches='tight', dpi=96)    
  anc.print_both('pdf done')
  plt.close(fig)
      
  return
Example #16
0
def main():

    # READ COMMAND LINE ARGUMENTS
    cli = get_args()

    # STARTING TIME
    start = time.localtime()
    pc_output_dir = '%d-%02d-%02dT%02dh%02dm%02ds_' % (
        start.tm_year, start.tm_mon, start.tm_mday, start.tm_hour,
        start.tm_min, start.tm_sec)
    pc_output_files = 'trades_pc'

    # RENAME
    working_path = cli.full_path
    nthreads = 1

    # INITIALISE TRADES WITH SUBROUTINE WITHIN TRADES_LIB -> PARAMETER NAMES, MINMAX, INTEGRATION ARGS, READ DATA ...
    pytrades.initialize_trades(working_path, cli.sub_folder, nthreads)

    # RETRIEVE DATA AND VARIABLES FROM TRADES_LIB MODULE

    #global n_bodies, n_planets, ndata, npar, nfit, dof, inv_dof
    n_bodies = pytrades.n_bodies  # NUMBER OF TOTAL BODIES OF THE SYSTEM
    n_planets = n_bodies - 1  # NUMBER OF PLANETS IN THE SYSTEM
    ndata = pytrades.ndata  # TOTAL NUMBER OF DATA AVAILABLE
    npar = pytrades.npar  # NUMBER OF TOTAL PARAMATERS ~n_planets X 6
    nfit = pytrades.nfit  # NUMBER OF PARAMETERS TO FIT
    nfree = pytrades.nfree  # NUMBER OF FREE PARAMETERS (ie nrvset)
    dof = pytrades.dof  # NUMBER OF DEGREES OF FREEDOM = NDATA - NFIT
    global inv_dof
    #inv_dof = np.float64(1.0 / dof)
    inv_dof = pytrades_lib.pytrades.inv_dof

    # READ THE NAMES OF THE PARAMETERS FROM THE TRADES_LIB AND CONVERT IT TO PYTHON STRINGS
    str_len = pytrades.str_len
    temp_names = pytrades.get_parameter_names(nfit, str_len)
    trades_names = anc.convert_fortran_charray2python_strararray(temp_names)
    fitting_names = anc.trades_names_to_emcee(trades_names)

    # save initial_fitting parameters into array
    original_fit_parameters = trades_parameters.copy()
    fitting_parameters = anc.e_to_sqrte_fitting(trades_parameters,
                                                trades_names)

    trades_minmax = pytrades.parameters_minmax  # PARAMETER BOUNDARIES
    parameters_minmax = anc.e_to_sqrte_boundaries(trades_minmax, trades_names)

    # RADIAL VELOCITIES SET
    n_rv = pytrades_lib.pytrades.nrv
    n_set_rv = pytrades_lib.pytrades.nrvset

    # TRANSITS SET
    n_t0 = pytrades_lib.pytrades.nt0
    n_t0_sum = pytrades_lib.pytrades.ntts
    n_set_t0 = 0
    for i in range(0, n_bodies - 1):
        if (n_t0[i] > 0): n_set_t0 += 1

    # compute global constant for the loglhd
    global ln_err_const

    #try:
    ## fortran variable RV in python will be rv!!!
    #e_RVo = np.array(pytrades_lib.pytrades.ervobs[:], dtype=np.float64)
    #except:
    #e_RVo = np.array([0.], dtype=np.float64)
    #try:
    #e_T0o = np.array(pytrades_lib.pytrades.et0obs[:,:], dtype=np.float64).reshape((-1))
    #except:
    #e_T0o = np.array([0.], dtype=np.float64)
    #ln_err_const = anc.compute_ln_err_const(dof, e_RVo, e_T0o, True)
    ln_err_const = pytrades_lib.pytrades.ln_err_const

    # INITIALISE SCRIPT FOLDER/LOG FILE
    working_folder, run_log, of_run = init_folder(working_path, cli.sub_folder)
    anc.print_both('', of_run)
    anc.print_both(' ======== ', of_run)
    anc.print_both(' pyTRADES', of_run)
    anc.print_both(' ======== ', of_run)
    anc.print_both('', of_run)
    anc.print_both(' WORKING PATH = %s' % (working_path), of_run)
    anc.print_both(
        ' dof = ndata(%d) - nfit(%d) - nfree(%d) = %d' %
        (ndata, nfit, nfree, dof), of_run)
    anc.print_both(' Total N_RV = %d for %d set(s)' % (n_rv, n_set_rv), of_run)
    anc.print_both(
        ' Total N_T0 = %d for %d out of %d planet(s)' %
        (n_t0_sum, n_set_t0, n_planets), of_run)
    anc.print_both(' %s = %.7f' % ('log constant error = ', ln_err_const),
                   of_run)

    # SET PYPOLYCHORD
    # needed to define number of derived parameters for PyPolyChord
    nder = 0

    # define the loglikelihood function for PyPolyChord
    def likelihood(fitting_par):

        # derived parameters
        derived_par = [0.0] * nder
        # convert fitting_par to trades_par
        trades_par = anc.sqrte_to_e_fitting(fitting_par, fitting_names)
        loglhd = 0.
        check = 1
        loglhd, check = pytrades.fortran_loglikelihood(
            np.array(trades_par, dtype=np.float64))
        #print loglhd, ln_err_const
        loglhd = loglhd + ln_err_const  # ln_err_const: global variable

        return loglhd, derived_par

    # define the prior for the fitting parameters
    def prior(hypercube):
        """ Uniform prior from [-1,1]^D. """

        fitting_par = [0.0] * nfit
        for i, x in enumerate(hypercube):
            fitting_par[i] = PC_priors.UniformPrior(parameters_minmax[i, 0],
                                                    parameters_minmax[i, 1])(x)

        return fitting_par

    # set PyPolyChord: the pc_settings define how to run PC, e.g. nlive, precision_criterio, etc.
    pc_settings = PC_settings.PolyChordSettings(nfit, nder)
    pc_settings.base_dir = cli.pc_output_dir
    pc_settings.file_root = cli.pc_output_files
    pc_settings.do_clustering = True
    # Possible PyPolyChord settings:
    #Keyword arguments
    #-----------------
    #nlive: int
    #(Default: nDims*25)
    #The number of live points.
    #Increasing nlive increases the accuracy of posteriors and evidences,
    #and proportionally increases runtime ~ O(nlive).

    #num_repeats : int
    #(Default: nDims*5)
    #The number of slice slice-sampling steps to generate a new point.
    #Increasing num_repeats increases the reliability of the algorithm.
    #Typically
    #* for reliable evidences need num_repeats ~ O(5*nDims).
    #* for reliable posteriors need num_repeats ~ O(nDims)

    #nprior : int
    #(Default: nlive)
    #The number of prior samples to draw before starting compression.

    #do_clustering : boolean
    #(Default: True)
    #Whether or not to use clustering at run time.

    #feedback : {0,1,2,3}
    #(Default: 1)
    #How much command line feedback to give

    #precision_criterion : float
    #(Default: 0.001)
    #Termination criterion. Nested sampling terminates when the evidence
    #contained in the live points is precision_criterion fraction of the
    #total evidence.

    #max_ndead : int
    #(Default: -1)
    #Alternative termination criterion. Stop after max_ndead iterations.
    #Set negative to ignore (default).

    #boost_posterior : float
    #(Default: 0.0)
    #Increase the number of posterior samples produced.  This can be set
    #arbitrarily high, but you won't be able to boost by more than
    #num_repeats
    #Warning: in high dimensions PolyChord produces _a lot_ of posterior
    #samples. You probably don't need to change this

    #posteriors : boolean
    #(Default: True)
    #Produce (weighted) posterior samples. Stored in <root>.txt.

    #equals : boolean
    #(Default: True)
    #Produce (equally weighted) posterior samples. Stored in
    #<root>_equal_weights.txt

    #cluster_posteriors : boolean
    #(Default: True)
    #Produce posterior files for each cluster?
    #Does nothing if do_clustering=False.

    #write_resume : boolean
    #(Default: True)
    #Create a resume file.

    #read_resume : boolean
    #(Default: True)
    #Read from resume file.

    #write_stats : boolean
    #(Default: True)
    #Write an evidence statistics file.

    #write_live : boolean
    #(Default: True)
    #Write a live points file.

    #write_dead : boolean
    #(Default: True)
    #Write a dead points file.

    #write_dead : boolean
    #(Default: True)
    #Write a prior points file.

    #update_files : int
    #(Default: nlive)
    #How often to update the files in <base_dir>.

    #base_dir : string
    #(Default: 'chains')
    #Where to store output files.

    #file_root : string
    #(Default: 'test')
    #Root name of the files produced.

    #grade_frac : List[float]
    #(Default: 1)
    #The amount of time to spend in each speed.

    #grade_dims : List[int]
    #(Default: 1)
    #The number of parameters within each speed.

    # RUN POLYCHORD
    pc_run = PC.run_polychord(likelihood, nfit, nder, pc_settings, prior)

    # set label and legend names
    kel_plot_labels = anc.keplerian_legend(fitting_names, cli.m_type)
    pc_paramnames = [('%s' % (fitting_names[i]), r'%s' % (kel_plot_labels[i]))
                     for i in range(nfit)]
    #pc_paramnames += [('r*', 'r')]
    pc_run.make_paramnames_files(pc_paramnames)

    if (cli.pc_plot):
        import getdist.plots
        import matplotlib.pyplot as plt
        plt.rc('font', **{
            'family': 'serif',
            'serif': ['Computer Modern Roman']
        })
        plt.rc('text', usetex=True)
        posterior = pc_run.posterior
        g = getdist.plots.getSubplotPlotter()
        g.triangle_plot(posterior, filled=True)
        plt.show()

    return