Example #1
0
def Regrid_PS(PS1, Corners):
    dim1, dim2 = Corners.shape[1:]
    dim1 -= 1
    dim2 -= 1
    global px, py
    px, py = np.where(PS1)
    global squares
    squares = np.array(Make_squares(Corners))
    square_num = np.arange(0, len(squares))

    points = np.zeros((len(px), 2))
    points[:, 0] = px
    points[:, 1] = py

    global pspixels
    pspixels = Footprint_square(Corners, points)

    global psimage
    psimage = PS1.copy()

    pool = MultiPool()
    values = list(pool.map(Pix_sum, square_num))
    pool.close()

    PS_scene = np.array(values)
    PS_scene = np.nansum(PS_scene, axis=0)
    PS_scene = PS_scene.astype('float')
    PS_scene = PS_scene.reshape(dim1, dim2)
    return PS_scene
Example #2
0
def test_marginal_ln_likelihood(tmpdir, case):
    prior, _ = get_prior(case)

    data, _ = make_data()
    prior_samples = prior.sample(size=100)
    joker = TheJoker(prior)

    # pass JokerSamples instance
    ll = joker.marginal_ln_likelihood(data, prior_samples)
    assert len(ll) == len(prior_samples)

    # save prior samples to a file and pass that instead
    filename = str(tmpdir / 'samples.hdf5')
    prior_samples.write(filename, overwrite=True)

    ll = joker.marginal_ln_likelihood(data, filename)
    assert len(ll) == len(prior_samples)

    # make sure batches work:
    ll = joker.marginal_ln_likelihood(data, filename, n_batches=10)
    assert len(ll) == len(prior_samples)

    # NOTE: this makes it so I can't parallelize tests, I think
    with MultiPool(processes=2) as pool:
        joker = TheJoker(prior, pool=pool)
        ll = joker.marginal_ln_likelihood(data, filename)
    assert len(ll) == len(prior_samples)
Example #3
0
    def sample(self, n_samples):
        if self.pool is None or _GPU_ENABLED:
            pool = SerialPool()
        else:
            if isinstance(self.pool, int):
                pool = MultiPool(self.pool)
            elif isinstance(self.pool, (SerialPool, MultiPool)):
                pool = self.pool
            else:
                raise TypeError(
                    "Does not understand the given multiprocessing pool.")

        drawn_samples = list(
            pool.map(self.draw_one_joint_posterior_sample_map,
                     range(n_samples)))
        pool.close()

        drawn_zs = [drawn_samples[i][0] for i in range(n_samples)]
        drawn_inference_posteriors = [
            drawn_samples[i][1] for i in range(n_samples)
        ]

        drawn_joint_posterior_samples = pd.DataFrame(
            drawn_inference_posteriors)
        drawn_joint_posterior_samples["redshift"] = drawn_zs

        return drawn_joint_posterior_samples
Example #4
0
def pool(request):
    multimode = 'None'
    # multimode = 'Serial'
    # multimode = 'Multi'
    # multimode = 'MPI'

    # setup code
    pool = None
    if multimode == 'Serial':
        from schwimmbad import SerialPool
        pool = SerialPool()
    elif multimode == 'Multi':
        from schwimmbad import MultiPool
        pool = MultiPool()
    elif multimode == 'MPI':
        from schwimmbad import MPIPool
        pool = MPIPool()
        if not pool.is_master():
            pool.wait()
            import sys
            sys.exit(0)

    # inject class variables
    request.cls.pool = pool
    yield

    # tear down
    if multimode == 'Multi' or multimode == 'MPI':
        pool.close()
Example #5
0
def generate_joint_posterior_samples_from_marginalized_likelihood(
    joint_result,
    single_trigger_likelihoods,
    sep_char="^",
    ncores=1,
):
    common_parameters = [
        p for p in list(joint_result.posterior.columns) if sep_char not in p
    ]
    independent_parameters = [
        p for p in list(joint_result.posterior.columns) if sep_char in p
    ]

    logger = logging.getLogger(__prog__)
    logger.info(
        "Using {} CPU core(s) for generating joint posterior samples from marginalized likelihood"
        .format(ncores))
    import tqdm
    with MultiPool(ncores) as pool:
        output_samples = pool.starmap(
            generate_joint_posterior_sample_from_marginalized_likelihood,
            tqdm.tqdm([[
                row, single_trigger_likelihoods, common_parameters,
                independent_parameters, sep_char
            ] for _, row in joint_result.posterior.iterrows()]))

    # Edit data frame
    joint_result.posterior = pd.DataFrame(output_samples)
Example #6
0
def generate_atlas_in_parallel_chunking(zval,
                                        chunksize,
                                        nchunks,
                                        fname='temp_parallel_atlas',
                                        filter_list='filter_list_goodss.dat',
                                        filt_dir='internal',
                                        priors=[],
                                        z_bw=0.05,
                                        pg_folder='parallel_atlases/'):

    N_pregrid = chunksize

    atlas_vals = [
        fname, zval, priors, pg_folder, filter_list, filt_dir, N_pregrid
    ]

    time_start = time.time()
    try:
        with MultiPool() as pool:
            values = list(pool.map(partial(gen_pg_parallel, atlas_vals), data))
    finally:
        print('Generated pregrid (%.0f chunks, %.0f sedsperchunk) at zval',
              zval)
        print('time taken: %.2f mins.' % ((time.time() - time_start) / 60))

    # need to add code here to then concatenate chunks into a single file and delete the individual ones

    return
Example #7
0
def main(data_path: Path, model_path: Path, mask_path: Path, log_level: int):
    logging.basicConfig(
        stream=sys.stdout,
        level=log_level,
        datefmt='%Y-%m-%d %H:%M',
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

    with open(data_path) as f:
        data_cfg = yaml.load(f, Loader=yaml.FullLoader)
    nside = data_cfg['nside']
    npix = hp.nside2npix(nside)
    nmc = data_cfg['monte_carlo']
    amplitude_output_shape = (nmc, 2, npix)
    parameter_output_shape = (nmc, npix)
    frequencies = np.array(data_cfg['frequencies'])

    with h5py.File(data_cfg['hdf5_path'], 'r') as f:
        data = f['maps/monte_carlo/data'][...]
        cov = f['maps/monte_carlo/cov'][...]

    masking = lynx.Masking(mask_path)
    fitting_masks = list(masking.get_fitting_indices())

    tasks = get_tasks(data, cov, nmc, frequencies, fitting_masks, model_path)

    with MultiPool() as pool:
        results = pool.map(do_fitting, tasks)

    with h5py.File(data_cfg['hdf5_path'], 'a') as f:
        for result in results:
            save_data(f, amplitude_output_shape, parameter_output_shape,
                      result)
Example #8
0
def generate_snrs(result, likelihood, ncores=1):
    logger = logging.getLogger(__prog__)
    logger.info("Using {} CPU core(s) for computing SNRs".format(ncores))
    import tqdm
    with MultiPool(ncores) as pool:
        output_samples = pool.starmap(
            generate_snrs_per_sample,
            tqdm.tqdm([[row.to_dict(), likelihood]
                       for _, row in result.posterior.iterrows()]))
    result.posterior = pd.DataFrame(output_samples)
    return result
Example #9
0
def compute_mean_selection_function(selection_function, N_avg, pool=None):
    if pool is None:
        pool = SerialPool()
    elif isinstance(pool, int):
        pool = MultiPool(pool)
    elif isinstance(pool, (SerialPool, MultiPool)):
        pool = pool
    else:
        raise TypeError("Does not understand the given multiprocessing pool.")

    out = list(
        pool.starmap(selection_function.evaluate, [() for _ in range(N_avg)]))
    avg = np.average(out)
    pool.close()
    return avg
Example #10
0
    def test_de_dt_integrate(self):
        n_values = 10

        m_1 = np.random.uniform(0, 10, n_values) * u.Msun
        m_2 = np.random.uniform(0, 10, n_values) * u.Msun
        f_orb = 10**(np.random.uniform(-5, -1, n_values)) * u.Hz
        ecc = np.random.uniform(0.0, 0.9, n_values)
        beta, a_i = evol.check_mass_freq_input(m_1=m_1, m_2=m_2, f_orb_i=f_orb)
        n_step = 100
        c_0 = utils.c_0(a_i=a_i, ecc_i=ecc)
        timesteps = evol.create_timesteps_array(a_i=a_i,
                                                beta=beta,
                                                ecc_i=ecc,
                                                t_evol=1 * u.yr,
                                                n_step=n_step)

        t_merge = evol.get_t_merge_ecc(ecc_i=ecc,
                                       f_orb_i=f_orb,
                                       m_1=m_1,
                                       m_2=m_2)

        # remove any bad timesteps that would evolve past the merger
        bad_timesteps = timesteps >= t_merge[:, np.newaxis]
        timesteps[bad_timesteps] = -1 * u.Gyr
        previous = timesteps.max(axis=1).repeat(timesteps.shape[1])
        timesteps[bad_timesteps] = previous.reshape(
            timesteps.shape)[bad_timesteps]

        # get rid of the units for faster integration
        c_0 = c_0.to(u.m).value
        beta = beta.to(u.m**4 / u.s).value
        timesteps = timesteps.to(u.s).value

        # integrate by hand:
        ecc_evol = np.array([
            odeint(evol.de_dt, ecc[i], timesteps[i],
                   args=(beta[i], c_0[i])).flatten() for i in range(len(ecc))
        ])

        # integrate with function:
        with MultiPool(processes=1) as pool:
            ecc_pool = np.array(
                list(
                    pool.map(evol.integrate_de_dt,
                             zip(ecc, timesteps, beta, c_0))))

        self.assertTrue(np.allclose(ecc_evol, ecc_pool, equal_nan=True))
Example #11
0
def calm2l_mcmc(infile, alfvar, ncpu, outname):
    """
    - for test purpose
    """
    ALFPY_HOME = os.environ['ALFPY_HOME']
    samples = np.array(
        pd.read_csv(infile, delim_whitespace=True, header=None,
                    comment='#'))[:, 1:47]
    tstart = time.time()

    with MultiPool() as pool:
        pwork = partial(worker_m2l, alfvar, key_list)
        ml_res = pool.map(pwork, samples)
    ndur = time.time() - tstart
    pool.close()
    print('\ncalculating m2l in .mcmc file: {:.2f}minutes'.format(ndur / 60.))
    np.savez('{0}results/{1}_mcmcm2l_b.npz'.format(ALFPY_HOME, outname),
             m2l=ml_res)
    return np.array(ml_res)
def run_generation(env, parameters):
    num_agents = parameters.shape[0]
    rewards = np.zeros(num_agents)
    '''
    for a in range(num_agents):
        weights = parameters[a,:]
        rewards[a] = run_episode(env, weights)
    '''

    # replace loop with map operation for parallel processing
    with MultiPool() as pool:
        rewards = np.array(pool.map(run_episode, parameters))

    best_reward = np.max(rewards)
    mean_reward = np.mean(rewards)

    ranking = rewards.argsort()
    return parameters[ranking[-5:], :], best_reward, mean_reward, parameters[
        ranking[-1], :]
Example #13
0
def generate_atlas_in_parallel_zgrid(zgrid,
                                     atlas_params,
                                     dynamic_decouple=True):
    """
    Make a set of atlases given a redshift grid and a list of parameters (including a priors object).
    Atlas Params: [N_pregrid, priors, fname, store, path, filter_list, filt_dir, z_bw]
    """

    time_start = time.time()

    try:
        with MultiPool() as pool:
            values = list(
                pool.map(
                    partial(make_atlas_parallel, atlas_params=atlas_params),
                    zgrid))
    finally:
        time_end = time.time()
        print('time taken [parallel]: %.2f min.' %
              ((time_end - time_start) / 60))
def generate_atlas_in_parallel_chunking(chunksize,
                                        nchunks,
                                        fname='temp_parallel_atlas',
                                        filter_list='filter_list_goodss.dat',
                                        filt_dir='internal',
                                        priors=[],
                                        pg_folder='pregrids/'):
    """
    Generate chunks of an atlas in parallel and combine them into one big atlas
    """

    chunk_path = pg_folder + 'atlaschunks/'
    store_path = pg_folder

    atlas_vals = [fname, priors, chunk_path, filter_list, filt_dir, chunksize]

    time_start = time.time()
    data = np.arange(nchunks)

    try:
        with MultiPool() as pool:
            values = list(
                pool.map(partial(gen_pg_parallel, atlas_vals=atlas_vals),
                         data))
    finally:
        print('Generated pregrid (%.0f chunks, %.0f sedsperchunk)')
        print('time taken: %.2f mins.' % ((time.time() - time_start) / 60))

    combine_pregrid_chunks(fname_base=fname,
                           N_chunks=nchunks,
                           N_pregrid=chunksize,
                           N_param=priors.Nparam,
                           path=chunk_path,
                           store_path=store_path)

    return
Example #15
0
    def simulate(self, bandpasses, norm=None, seed=13, Ncpus=None):

        self.obs = np.array([])

        np.random.seed(seed)

        tstep = 1
        tmin = self.model.mintime()
        tmax = self.model.maxtime()
        time = np.arange(tmin, tmax + tstep, tstep)

        wstep = 10
        wmin = self.model.minwave()
        wmax = self.model.maxwave()
        wavelen = np.arange(wmin, wmax + wstep, wstep)

        fluxes = self.model.flux(time, wavelen)
        norm = np.max(fluxes) if norm is None else norm
        fluxes = fluxes.T * norm / np.max(fluxes)

        lc = LightCurve(time, wavelen, fluxes)

        redshifts = list(
            sncosmo.zdist(self.zmin,
                          self.zmax,
                          time=self.duration,
                          area=self.area))

        tasks = list(
            zip(redshifts, [self] * len(redshifts),
                [bandpasses] * len(redshifts), [self.bias] * len(redshifts),
                [lc] * len(redshifts),
                np.random.randint(2**32 - 1, size=len(redshifts))))
        with MultiPool(processes=Ncpus) as pool:
            observations = np.array(list(pool.map(survey_worker, tasks)))
        self.obs = observations
Example #16
0
def main():
    # how many files should we split the test output into
    nfiles = 10
    wfd_thresh = 1000000

    # get the options for the code and set the data_release globally (ugly) to
    # allow MultiPool to work
    kwargs = plasticc.get_data.parse_getdata_options()
    global data_release
    data_release = kwargs.pop('data_release')
    getter = plasticc.get_data.GetData(data_release)

    # setup paths for output
    base_dir = os.path.join(WORK_DIR, 'csv_dump')
    dump_dir = os.path.join(base_dir, data_release)
    if not os.path.exists(dump_dir):
        os.makedirs(dump_dir)

    # we can use model as a dummy string to indicate if we are generating
    # training or test data
    dummy = kwargs.pop('model')
    offset = kwargs.pop('offset')
    limit = kwargs.pop('limit')

    # setup root filenames for output - these get changed by fixpath
    if dummy == 'training':
        outfile = os.path.join(dump_dir, 'plasticc_training_set.csv')
        offset = None
    else:
        if limit is None:
            outfile = os.path.join(dump_dir, 'plasticc_test_set.csv')
        else:
            if offset is None:
                offset = 0
            outfile = os.path.join(dump_dir,
                                   'plasticc_test_n{}_set.csv'.format(offset))
            # if we're limiting the output, then just dump one file
            nfiles = 1

    # header file is named something sensible and is public
    header_file = outfile.replace('.csv', '_metadata.csv')
    header_file = fixpath(header_file)

    # make sure we remove any lingering files
    if os.path.exists(outfile):
        os.remove(outfile)

    _ = kwargs.get('field')

    # set the header keywords for training and testing
    # same except for sntype will be removed from test and hostgal_photoz isn't
    # provided
    kwargs['columns']=['objid','ptrobs_min','ptrobs_max','ra','decl', 'mwebv',\
                        'hostgal_specz', 'hostgal_photoz', 'hostgal_photoz_err','sntype']

    # set an extrasql query to get just the DDF and WFD objects
    # sntype for testing = true sntype + 100
    if dummy == 'training':
        extrasql = "AND sntype < 100 AND ((objid LIKE 'WFD%') OR (objid LIKE 'DDF%'))"
    else:
        extrasql = "AND sntype > 100 AND ((objid LIKE 'WFD%') OR (objid LIKE 'DDF%'))"

    # set up options for data retrieval ignoring many of the command-line
    # options - impose cuts later
    kwargs['extrasql'] = extrasql
    kwargs['model'] = '%'
    kwargs['field'] = '%'
    kwargs['sort'] = True
    kwargs['shuffle'] = False
    kwargs['limit'] = None
    kwargs['get_num_lightcurves'] = True
    total = getter.get_lcs_headers(**kwargs)
    total = list(total)[0]
    kwargs['limit'] = total
    kwargs['get_num_lightcurves'] = False
    kwargs['offset'] = offset

    out = getter.get_lcs_headers(**kwargs)
    # current as of 20180827
    aggregate_types = {
        11: 11,
        2: 2,
        3: 3,
        12: 2,
        13: 3,
        14: 2,
        41: 41,
        43: 43,
        51: 51,
        60: 60,
        61: 99,
        62: 99,
        63: 99,
        64: 64,
        70: 70,
        80: 80,
        81: 81,
        83: 83,
        84: 84,
        90: 99,
        91: 91,
        92: 99,
        93: 91
    }
    aggregate_names = {
        11: 'SNIa-normal',
        2: 'SNCC-II',
        3: 'SNCC-Ibc',
        12: 'SNCC-II',
        13: 'SNCC-Ibc',
        14: 'SNCC-II',
        41: 'SNIa-91bg',
        43: 'SNIa-x',
        51: 'KN',
        60: 'SLSN-I',
        61: 'PISN',
        62: 'ILOT',
        63: 'CART',
        64: 'TDE',
        70: 'AGN',
        80: 'RRlyrae',
        81: 'Mdwarf',
        83: 'EBE',
        84: 'MIRA',
        90: 'uLens-Binary',
        91: 'uLens-Point',
        92: 'uLens-STRING',
        93: 'uLens-Point'
    }

    if dummy == 'training':
        pass
    else:
        aggregate_types = {x + 100: y for x, y in aggregate_types.items()}
    print('Aggregating as ', aggregate_types)

    # make a big list of the header - NOTE THAT WE ALWAYS RETRIEVE ALL OBJECTS
    out = list(out)
    if dummy == 'training':
        # we don't need to shuffle the training set
        pass
    else:
        # if we're generating test data, if we set a limit, just draw a random
        # sample else shuffle the full list
        if limit is not None:
            out = random.sample(out, limit)
        else:
            random.shuffle(out)

    # convert the selected header entries to a table
    out = at.Table(rows=out, names=kwargs['columns'])

    # we're not necessariy keeping all the models we simulated - remove any models that are not in keep_types
    keep_types = aggregate_types.keys()
    mask = np.array(
        [True if x in keep_types else False for x in out['sntype']])
    out = out[mask]

    # aggregate types - map to new class numbers (i.e. MODELNUM_PLASTICC)
    if dummy == 'training':
        new_type = np.array(
            [aggregate_types.get(x, None) for x in out['sntype']])
    else:
        new_type = np.array(
            [aggregate_types.get(x, None) for x in out['sntype']])
    out['sntype'] = new_type

    # type 99 is not included in training
    train_types = set(aggregate_types.values()) - set([
        99,
    ])
    train_types = list(train_types)

    # make sure that there are no "other" classes included in the training data
    if dummy == 'training':
        mask = np.array(
            [True if x in train_types else False for x in out['sntype']])
        out = out[mask]

    # randomize the output type ID - keep rare as 99
    target_map_file = outfile.replace('.csv', '_targetmap.txt').replace(
        '_test_set', '').replace('_training_set',
                                 '').replace(dump_dir, base_dir)
    try:
        target_map_data = at.Table.read(target_map_file, format='ascii')
        train_types = target_map_data['train_types']
        target_types = target_map_data['target_types']
        print(f'Restoring Target Map from {target_map_file}')
        target_map_file = target_map_file.replace(base_dir, dump_dir)
        target_map_file = fixpath(target_map_file, public=False)
        target_map_data.write(target_map_file,
                              format='ascii.fixed_width',
                              delimiter=' ',
                              overwrite=True)
        print(f'Wrote distribution target mapping to file {target_map_file}')
    except Exception as e:
        target_types = np.random.choice(99, len(train_types),
                                        replace=False).tolist()
        target_map_data = at.Table([train_types, target_types],
                                   names=['train_types', 'target_types'])
        target_map_data.write(target_map_file,
                              format='ascii.fixed_width',
                              delimiter=' ',
                              overwrite=True)
        print(f'Wrote target mapping to file {target_map_file}')
        target_map_file = target_map_file.replace(base_dir, dump_dir)
        target_map_file = fixpath(target_map_file, public=False)
        target_map_data.write(target_map_file,
                              format='ascii.fixed_width',
                              delimiter=' ',
                              overwrite=True)
        print(f'Wrote distribution target mapping to file {target_map_file}')

    # map the aggregated IDs to random target IDs
    target_map = dict(zip(train_types, target_types))
    target = np.array([target_map.get(x, 99) for x in out['sntype']])
    out['target'] = target
    print('Mapping as {}'.format(target_map))

    # orig map file is like target_map (and also private) but includes the rares
    orig_map_file = outfile.replace('.csv', '_origmap.txt').replace(
        '_test_set', '').replace('_training_set', '')
    orig_map_file = fixpath(orig_map_file, public=False)
    if not os.path.exists(orig_map_file):
        orig = []
        aggregated = []
        mapped = []
        names = []
        for key, val in aggregate_types.items():
            name = aggregate_names.get(key, 'Rare')
            names.append(name)
            orig.append(key)
            aggregated.append(val)
            mapping = target_map.get(val, 99)
            mapped.append(mapping)
        orig_map_data = at.Table([orig, aggregated, mapped, names],\
                            names=['ORIG_NUM', 'MODEL_NUM', 'TARGET', 'MODEL_NAME'])
        orig_map_data.write(orig_map_file,
                            format='ascii.fixed_width',
                            delimiter=' ',
                            overwrite=True)
        print(f'Wrote original mapping to file {orig_map_file}')

    # galactic objects have -9 as redshift - change to NaN
    # the numpy.isclose should have worked last time.... check this by hand.
    ind = out['hostgal_photoz'] == -9.
    out['hostgal_photoz'][ind] = np.nan
    out['hostgal_photoz_err'][ind] = np.nan
    ind = out['hostgal_specz'] == -9.
    out['hostgal_specz'][ind] = np.nan

    # add galactic coordinates
    c = SkyCoord(out['ra'], out['decl'], "icrs", unit='deg')
    gal = c.galactic
    out['gall'] = gal.l.value
    out['galb'] = gal.b.value

    # add distance modulus
    cosmo = FlatLambdaCDM(70, 0.3)
    out['distmod'] = cosmo.distmod(out['hostgal_photoz']).value
    ind = np.isfinite(out['distmod'])
    out['distmod'][~ind] = np.nan

    # figure out what fits files the data are in
    fits_files = [
        "LSST_{0}_MODEL{1}/LSST_{0}_{2}_PHOT.FITS".format(*x.split('_'))
        for x in out['objid']
    ]
    fits_files = np.array(fits_files)

    # the object names have the model name in them, so we need to edit them
    # new name = <SNID>
    orig_name = out['objid']
    new_name = np.array([x.split('_')[-1] for x in orig_name], dtype=np.int32)
    ddf_field = np.zeros(len(new_name), dtype=np.uint8)
    ind = new_name < wfd_thresh
    ddf_field[ind] = 1

    # preseve the mapping  between old name, new name and file name
    out['object_id'] = new_name
    out['filename'] = fits_files
    out['ddf_bool'] = ddf_field

    # sort things by object id - Rick has already randomized these, so we preserve his order.
    out.sort('object_id')
    del new_name
    del fits_files
    del ddf_field
    del target
    del new_type

    # if we are generating test data, save a truth table
    if dummy == 'training':
        pass
    else:
        out_name = out['object_id']
        target = out['target']

        # remove the model type from the output header that goes with the test data
        out.remove_column('target')

        # make sure the truth table actually matches the job presently executing
        truth_file = outfile.replace('_set.csv', '_truthtable.csv')
        truth_file = fixpath(truth_file, public=False)
        if os.path.exists(truth_file):
            os.remove(truth_file)

        # write the truth table
        truth_table = at.Table([out_name, target],
                               names=['object_id', 'target'])
        truth_table.write(truth_file)
        print(f'Wrote {truth_file}')

    nmc = len(out)
    out_ind = np.arange(nmc)
    nthreads = max(multiprocessing.cpu_count() // 2 - 1, 1)
    print(f'Using {nthreads} threads.')
    if dummy == 'training':
        batch_inds = np.array_split(out_ind, nthreads)
    else:
        # if this is test data, we want to break files up so that DDF and WFD
        # are in separate files and the number of files is split so we don't
        # have a giant CSV file
        batch_inds = []
        ind = np.where(out['object_id'] < wfd_thresh)[0]
        print('DDF objects {}'.format(len(ind)))
        batch_inds.append(ind)
        ind = np.where(out['object_id'] >= wfd_thresh)[0]
        print('WFD objects {}'.format(len(ind)))
        batch_inds += np.array_split(ind, nfiles)

    # make batches to load the data
    batches = []
    for ind in batch_inds:
        # we need the fits file for each object + the object pointers
        this_batch_lcs = at.Table(
            [
                out['object_id'][ind], out['ptrobs_min'][ind],
                out['ptrobs_max'][ind], out['filename'][ind]
            ],
            names=['object_id', 'ptrobs_min', 'ptrobs_max', 'filename'])
        batches.append(this_batch_lcs)
    gc.collect()

    # create a map from batch number to first objid in each batch
    # batch number is helpful to name files by batch
    # this is sequential, but you might imagine more complicated schemes
    batch_ids = np.arange(len(batches)) + 1
    batch_keys = [x['object_id'][0] for x in batches]
    batch_map = dict(zip(batch_keys, batch_ids))

    # do the output
    if dummy == 'training':
        # training is simple -  dump each batch into one file in sequence
        outfile = fixpath(outfile, gzip=True)
        with MultiPool(processes=nthreads) as pool:
            with tqdm(total=nmc) as pbar:
                outlines = 'object_id,mjd,passband,flux,flux_err,detected_bool\n'
                # change to pool.imap so order is preserved in output file
                # combine all the batches
                for result in pool.imap(task, batches):
                    _, nbatch, batchlines = result
                    pbar.update(nbatch)
                    outlines += '\n'.join(batchlines)
                    outlines += '\n'
                outbytes = outlines.encode()
                gc.collect()

                # do the output
                with gzip.open(outfile, 'wb', compresslevel=9) as f:
                    f.write(outbytes)

    else:
        # these variables will set the accessed time and modified time to the same numbers for all batches
        st_atime = None
        st_mtime = None

        with tqdm(total=nmc) as pbar:
            for batch in batches:
                # for test, the batches each get a separate file
                batch_key = batch[0]['object_id']
                batch_id = batch_map[batch_key]
                batchfile = outfile.replace('.csv', f'_batch{batch_id}.csv')
                batchfile = fixpath(batchfile, gzip=True)

                # for actual mutliprocessing, split up each file's indices into mini batches
                ind = np.arange(len(batch))
                mini_inds = np.array_split(ind, nthreads - 1)
                mini_batches = [batch[x] for x in mini_inds]
                nbatch = 0

                # combine all the batches
                outlines = 'object_id,mjd,passband,flux,flux_err,detected_bool\n'
                with MultiPool(processes=nthreads - 1) as pool:
                    for result in pool.imap(task, mini_batches):
                        _, mini_nbatch, mini_batchlines = result
                        nbatch += mini_nbatch
                        outlines += '\n'.join(mini_batchlines)
                        outlines += '\n'
                        pbar.update(mini_nbatch)
                outbytes = outlines.encode()
                gc.collect()

                # do the output
                with gzip.open(batchfile, 'wb', compresslevel=9) as f:
                    f.write(outbytes)

                # get the timestamps of the first batch file
                if st_atime is None:
                    st = os.stat(batchfile)
                    st_atime = st.st_atime
                    st_mtime = st.st_mtime

                # change the timestamp of the output
                os.utime(batchfile, (st_atime, st_mtime))

    # remove and rename some columns from the metadata output
    # this isn't strictly necessary, since we choose exactly what columns to output
    # but this makes sure astropy also strips any metadata about the columns itself
    out.remove_columns(
        ['objid', 'ptrobs_min', 'ptrobs_max', 'filename', 'sntype'])

    # setup what columns get output into the headers
    cols = [
        'object_id', 'ra', 'decl', 'gall', 'galb', 'ddf_bool', 'hostgal_specz',
        'hostgal_photoz', 'hostgal_photoz_err', 'distmod', 'mwebv'
    ]
    if dummy == 'training':
        cols.append('target')
    out = out[cols]

    # fix column precision
    precision = {'ra':6, 'decl':6, 'gall':6, 'galb':6,\
            'hostgal_specz':4, 'hostgal_photoz':4, 'hostgal_photoz_err':4, 'distmod':4, 'mwebv':3}
    for col, val in precision.items():
        formatstr = f'%.{val}f'
        out[col].format = formatstr

    # write out the header
    out.write(header_file, format='ascii.csv', overwrite=True)
Example #17
0
def main():
    # setup paths for output
    dump_dir = os.path.join(WORK_DIR, 'hdf5_dump')
    if not os.path.exists(dump_dir):
        os.makedirs(dump_dir)

    # get the options for the code and set the data_release globally (ugly) to
    # allow MultiPool to work
    kwargs = plasticc.get_data.parse_getdata_options()
    global data_release
    data_release = kwargs.pop('data_release')
    getter = plasticc.get_data.GetData(data_release)

    # we can use model as a dummy string to indicate if we are generating
    # training or test data
    dummy = kwargs.pop('model')
    offset = kwargs.pop('offset')
    limit = kwargs.pop('limit')

    if dummy == 'training':
        outfile = os.path.join(dump_dir, 'training_set.hdf5')
        offset = None
    else:
        if limit is None:
            outfile = os.path.join(dump_dir, 'test_set.hdf5')
        else:
            if offset is None:
                offset = 0
            outfile = os.path.join(dump_dir,
                                   'test_n{}_set.hdf5'.format(offset))

    # make sure we remove any lingering files
    if os.path.exists(outfile):
        os.remove(outfile)

    _ = kwargs.get('field')

    # set the header keywords for training and testing
    # same except for sntype will be removed from test and hostgal_photoz isn't
    # provided
    if dummy == 'training':
        kwargs['columns']=['objid','ptrobs_min','ptrobs_max','ra','decl', 'mwebv', 'mwebv_err',\
                        'hostgal_photoz', 'hostgal_photoz', 'hostgal_photoz_err', 'sntype']
    else:
        kwargs['columns']=['objid','ptrobs_min','ptrobs_max','ra','decl', 'mwebv', 'mwebv_err',\
                        'hostgal_photoz', 'hostgal_photoz_err', 'sntype']

    # set an extrasql query to get just the DDF and WFD objects
    # sntype for testing = true sntype + 100
    if dummy == 'training':
        extrasql = "AND sntype < 100 AND ((objid LIKE 'WFD%') OR (objid LIKE 'DDF%'))"
    else:
        extrasql = "AND sntype > 100 AND ((objid LIKE 'WFD%') OR (objid LIKE 'DDF%'))"

    # set up options for data retrieval ignoring many of the command-line
    # options
    kwargs['extrasql'] = extrasql
    kwargs['model'] = '%'
    kwargs['field'] = '%'
    kwargs['sort'] = True
    kwargs['shuffle'] = False
    kwargs['limit'] = None
    kwargs['get_num_lightcurves'] = True
    total = getter.get_lcs_headers(**kwargs)
    total = list(total)[0]
    kwargs['limit'] = total
    kwargs['get_num_lightcurves'] = False
    kwargs['offset'] = offset

    head = getter.get_lcs_headers(**kwargs)

    # make a big list of the header - NOTE THAT WE ALWAYS RETRIEVE ALL OBJECTS
    head = list(head)
    if dummy == 'training':
        pass
    else:
        # if we're generating test data, if we set a limit, just draw a random
        # sample else shuffle the full list
        if limit is not None:
            head = random.sample(head, limit)
        else:
            random.shuffle(head)

    # convert the selected header entries to a table and remove uncessary columns
    out = at.Table(rows=head, names=kwargs['columns'])
    out.remove_columns(['ptrobs_min', 'ptrobs_max'])

    # galactic objects have -9 as redshift - change to 0
    dummy_val = np.repeat(-9, len(out))
    ind = np.isclose(out['hostgal_photoz'], dummy_val)
    out['hostgal_photoz'][ind] = 0.
    out['hostgal_photoz_err'][ind] = 0.

    # the object names have the model name in them, so we need to edit them
    # new name = <FIELD><SNID>
    orig_name = out['objid']
    new_name = [
        '{}{}'.format(x.split('_')[0],
                      x.split('_')[-1]) for x in orig_name
    ]
    new_name = np.array(new_name)
    new_name = new_name.astype('bytes')
    out['objid'] = new_name

    # if we are generating test data, save a truth table
    if dummy == 'training':
        pass
    else:
        sntype = out['sntype']
        # remove the model type from the output header that goes with the test data
        out.remove_column('sntype')
        truth_file = outfile.replace('_set.hdf5', '_truthtable.hdf5')
        if os.path.exists(truth_file):
            os.remove(truth_file)
        # ... saving it in the truth table only
        orig_name = orig_name.astype(bytes)
        new_name = new_name.astype(bytes)
        sntype = sntype.astype(bytes)
        truth_table = at.Table([orig_name, new_name, sntype],
                               names=['objid', 'shortid', 'sntype'])
        truth_table.write(truth_file,
                          compression=True,
                          path='truth_table',
                          serialize_meta=False,
                          append=True)

    # write out the header
    out.write(outfile,
              compression=True,
              path='header',
              serialize_meta=False,
              append=True)
    nmc = len(out)

    # use a multiprocessing pool to load each light curve and dump to HDF5
    with MultiPool() as pool:
        with tqdm(total=nmc) as pbar:
            for result in pool.imap(task, head):
                short_obj, thislc = result
                thislc.write(outfile,
                             path=short_obj,
                             compression=True,
                             serialize_meta=False,
                             append=True)
                pbar.update()
Example #18
0
def fit_catalog(fit_cat,
                atlas_path,
                atlas_fname,
                output_fname,
                N_pregrid=10000,
                N_param=3,
                z_bw=0.05,
                f160_cut=100,
                fit_mask=[],
                zgrid=[],
                sfr_uncert_cutoff=2.0):

    cat_id, cat_zbest, cat_seds, cat_errs, cat_f160, cat_class_star = fit_cat

    #if not zgrid:
    if isinstance(zgrid, (np.ndarray)) == False:
        zgrid = np.arange(np.amin(cat_zbest), np.amax(cat_zbest), z_bw)

    fit_id = cat_id.copy()
    fit_logM_50 = np.zeros_like(cat_zbest)
    fit_logM_MAP = np.zeros_like(cat_zbest)
    fit_logM_16 = np.zeros_like(cat_zbest)
    fit_logM_84 = np.zeros_like(cat_zbest)
    fit_logSFRinst_50 = np.zeros_like(cat_zbest)
    fit_logSFRinst_MAP = np.zeros_like(cat_zbest)
    fit_logSFRinst_16 = np.zeros_like(cat_zbest)
    fit_logSFRinst_84 = np.zeros_like(cat_zbest)

    fit_logZsol_50 = np.zeros_like(cat_zbest)
    fit_logZsol_16 = np.zeros_like(cat_zbest)
    fit_logZsol_84 = np.zeros_like(cat_zbest)
    fit_Av_50 = np.zeros_like(cat_zbest)
    fit_Av_16 = np.zeros_like(cat_zbest)
    fit_Av_84 = np.zeros_like(cat_zbest)

    fit_zfit_50 = np.zeros_like(cat_zbest)
    fit_zfit_16 = np.zeros_like(cat_zbest)
    fit_zfit_84 = np.zeros_like(cat_zbest)
    fit_logMt_50 = np.zeros_like(cat_zbest)
    fit_logMt_16 = np.zeros_like(cat_zbest)
    fit_logMt_84 = np.zeros_like(cat_zbest)
    fit_logSFR100_50 = np.zeros_like(cat_zbest)
    fit_logSFR100_16 = np.zeros_like(cat_zbest)
    fit_logSFR100_84 = np.zeros_like(cat_zbest)
    fit_nparam = np.zeros_like(cat_zbest)
    fit_t25_50 = np.zeros_like(cat_zbest)
    fit_t25_16 = np.zeros_like(cat_zbest)
    fit_t25_84 = np.zeros_like(cat_zbest)
    fit_t50_50 = np.zeros_like(cat_zbest)
    fit_t50_16 = np.zeros_like(cat_zbest)
    fit_t50_84 = np.zeros_like(cat_zbest)
    fit_t75_50 = np.zeros_like(cat_zbest)
    fit_t75_16 = np.zeros_like(cat_zbest)
    fit_t75_84 = np.zeros_like(cat_zbest)

    fit_nbands = np.zeros_like(cat_zbest)
    fit_f160w = np.zeros_like(cat_zbest)
    fit_stellarity = np.zeros_like(cat_zbest)
    fit_chi2 = np.zeros_like(cat_zbest)
    fit_flags = np.zeros_like(cat_zbest)

    for i in (range(len(zgrid))):

        print('loading atlas at', zgrid[i])

        # for a given redshift slice,
        zval = zgrid[i]

        # select the galaxies to be fit
        z_mask = (cat_zbest <
                  (zval + z_bw / 2)) & (cat_zbest >
                                        (zval - z_bw / 2)) & (cat_f160 <
                                                              f160_cut)
        fit_ids = np.arange(len(cat_zbest))[z_mask]

        #         for gal_id in fit_ids:

        #             gal_sed = cat_seds[gal_id, 0:]
        #             gal_err = cat_errs[gal_id, 0:]

        #             fit_likelihood, fit_norm_fac = evaluate_sed_likelihood(gal_sed,gal_err,atlas,fit_mask=[],
        #                                                 zbest=None,deltaz=None)

        #             quants = get_quants(fit_likelihood, atlas, fit_norm_fac)

        print('starting parallel fitting for Ngals = ', len(fit_ids),
              ' at redshift ', str(zval))

        try:
            #             load the atlas
            fname = atlas_fname + '_zval_%.0f_' % (zgrid[i] * 10000)
            atlas = load_atlas(fname,
                               N_pregrid,
                               N_param=N_param,
                               path=atlas_path)
            print('loaded atlas')
            with MultiPool() as pool:
                # note: Parallel doesn't work in Python2.6

                #                 if not fit_mask:
                if isinstance(fit_mask, np.ndarray) == False:
                    all_quants = list(
                        pool.map(
                            partial(fit_gals,
                                    catvals=(cat_seds, cat_errs, atlas)),
                            fit_ids))
                else:
                    all_quants = list(
                        pool.map(
                            partial(fit_gals,
                                    catvals=(cat_seds, cat_errs, fit_mask,
                                             atlas)), fit_ids))
            print('finished fitting parallel zbest chunk at z=%.3f' % zval)

            print('starting to put values in arrays')
            for ii, gal_id in enumerate(fit_ids):

                gal_sed = cat_seds[gal_id, 0:]
                gal_err = cat_errs[gal_id, 0:]

                quants = all_quants[ii][0]
                fit_likelihood = all_quants[ii][1]
                #             fit_logM_MAP[gal_id] = all_quants[ii][2]
                #             fit_logSFRinst_MAP[gal_id] = all_quants[ii][3]

                fit_logM_50[gal_id] = quants[0][0]
                fit_logM_16[gal_id] = quants[0][1]
                fit_logM_84[gal_id] = quants[0][2]
                fit_logSFRinst_50[gal_id] = quants[1][0]
                fit_logSFRinst_16[gal_id] = quants[1][1]
                fit_logSFRinst_84[gal_id] = quants[1][2]

                fit_Av_50[gal_id] = quants[2][0]
                fit_Av_16[gal_id] = quants[2][1]
                fit_Av_84[gal_id] = quants[2][2]

                fit_logZsol_50[gal_id] = quants[3][0]
                fit_logZsol_16[gal_id] = quants[3][1]
                fit_logZsol_84[gal_id] = quants[3][2]

                fit_zfit_50[gal_id] = quants[4][0]
                fit_zfit_16[gal_id] = quants[4][1]
                fit_zfit_84[gal_id] = quants[4][2]

                fit_logMt_50[gal_id] = quants[5][0][0]
                fit_logMt_16[gal_id] = quants[5][1][0]
                fit_logMt_84[gal_id] = quants[5][2][0]
                fit_logSFR100_50[gal_id] = quants[5][0][1]
                fit_logSFR100_16[gal_id] = quants[5][1][1]
                fit_logSFR100_84[gal_id] = quants[5][2][1]
                fit_nparam[gal_id] = quants[5][0][2]
                fit_t25_50[gal_id] = quants[5][0][3]
                fit_t25_16[gal_id] = quants[5][1][3]
                fit_t25_84[gal_id] = quants[5][2][3]
                fit_t50_50[gal_id] = quants[5][0][4]
                fit_t50_16[gal_id] = quants[5][1][4]
                fit_t50_84[gal_id] = quants[5][2][4]
                fit_t75_50[gal_id] = quants[5][0][5]
                fit_t75_16[gal_id] = quants[5][1][5]
                fit_t75_84[gal_id] = quants[5][2][5]

                fit_nbands[gal_id] = np.sum(gal_sed > 0)
                fit_f160w[gal_id] = cat_f160[gal_id]
                fit_stellarity[gal_id] = cat_class_star[gal_id]
                fit_chi2[gal_id] = np.amin(fit_likelihood)

                # flagging galaxies that either
                # 1. have nan values for mass
                # 2. have SFR uncertainties > sfr_uncert_cutoff
                # 3. are flagged as a star
                # 4. have extremely large chi2
                if np.isnan(quants[0][0]):
                    fit_flags[gal_id] = 1.0
                elif (np.abs(fit_logSFRinst_84[gal_id] -
                             fit_logSFRinst_16[gal_id]) > sfr_uncert_cutoff):
                    fit_flags[gal_id] = 2.0
                elif (cat_class_star[gal_id] > 0.5):
                    fit_flags[gal_id] = 3.0
                elif (fit_chi2[gal_id] > 1000):
                    fit_flags[gal_id] = 4.0
                else:
                    fit_flags[gal_id] = 0.0

        except:
            print('couldn\'t fit with pool at z=', zval)

        print('finishing that')
        pl.clf()
        pl.figure(figsize=(12, 6))
        pl.hist(cat_zbest[cat_zbest > 0],
                np.arange(0, 6, z_bw),
                color='black',
                alpha=0.3)
        #pl.hist(fit_zfit_50[fit_zfit_50>0],np.arange(0,6,z_bw),color='royalblue')
        pl.hist(cat_zbest[fit_zfit_50 > 0],
                np.arange(0, 6, z_bw),
                color='royalblue')
        pl.title('fit %.0f/%.0f galaxies' %
                 (np.sum(fit_zfit_50 > 0), len(cat_zbest)))
        pl.xlabel('redshift')
        pl.ylabel('# galaxies')

        display.clear_output(wait=True)
        display.display(pl.gcf())

    pl.show()

    #'logSFRinst_MAP':fit_logSFRinst_MAP,
    #'logM_MAP':fit_logM_MAP,

    fit_mdict = {
        'ID': fit_id,
        'logM_50': fit_logM_50,
        'logM_16': fit_logM_16,
        'logM_84': fit_logM_84,
        'logSFRinst_50': fit_logSFRinst_50,
        'logSFRinst_16': fit_logSFRinst_16,
        'logSFRinst_84': fit_logSFRinst_84,
        'logZsol_50': fit_logZsol_50,
        'logZsol_16': fit_logZsol_16,
        'logZsol_84': fit_logZsol_84,
        'Av_50': fit_Av_50,
        'Av_16': fit_Av_16,
        'Av_84': fit_Av_84,
        'zfit_50': fit_zfit_50,
        'zfit_16': fit_zfit_16,
        'zfit_84': fit_zfit_84,
        'logMt_50': fit_logMt_50,
        'logMt_16': fit_logMt_16,
        'logMt_84': fit_logMt_84,
        'logSFR100_50': fit_logSFR100_50,
        'logSFR100_16': fit_logSFR100_16,
        'logSFR100_84': fit_logSFR100_84,
        't25_50': fit_t25_50,
        't25_16': fit_t25_16,
        't25_84': fit_t25_84,
        't50_50': fit_t50_50,
        't50_16': fit_t50_16,
        't50_84': fit_t50_84,
        't75_50': fit_t75_50,
        't75_16': fit_t75_16,
        't75_84': fit_t75_84,
        'nparam': fit_nparam,
        'nbands': fit_nbands,
        'F160w': fit_f160w,
        'stellarity': fit_stellarity,
        'chi2': fit_chi2,
        'fit_flags': fit_flags
    }

    fit_cat = Table(fit_mdict)

    fit_cat.write(output_fname, format='ascii.commented_header')

    return
Example #19
0
def evol_ecc(ecc_i,
             t_evol=None,
             n_step=100,
             timesteps=None,
             beta=None,
             m_1=None,
             m_2=None,
             a_i=None,
             f_orb_i=None,
             output_vars=['ecc', 'f_orb'],
             n_proc=1,
             avoid_merger=True,
             exact_t_merge=False,
             t_before=1 * u.Myr,
             t_merge=None):
    """Evolve an array of eccentric binaries for ``t_evol`` time

    This function use Peters & Mathews (1964) Eq. 5.11 and 5.13.

    Note that all of {``beta``, ``m_1``, ``m_2``, ``ecc_i``, ``a_i``, ``f_orb_i``} must have the
    same dimensions.

    Parameters
    ----------
    ecc_i : `float/array`
        Initial eccentricity

    t_evol : `float/array`
        Amount of time for which to evolve each binaries. Required if ``timesteps`` is None. Defaults to
        merger times.

    n_steps : `int`
        Number of timesteps to take between t=0 and t=``t_evol``. Required if ``timesteps`` is None.
        Defaults to 100.

    timesteps : `float/array`
        Array of exact timesteps to take when evolving each binary. Must be monotonically increasing and
        start with t=0. Either supply a 1D array to use for every binary or a 2D array that has a different
        array of timesteps for each binary. ``timesteps`` is used in place of ``t_evol`` and ``n_steps`` and
        takes precedence over them.

    beta : `float/array`
        Constant defined in Peters and Mathews (1964) Eq. 5.9. See :meth:`legwork.utils.beta`
        (if supplied ``m_1`` and `m_2` are ignored)

    m_1 : `float/array`
        Primary mass (required if ``beta`` is None or if ``output_vars`` contains a frequency)

    m_2 : `float/array`
        Secondary mass (required if ``beta`` is None or if ``output_vars`` contains a frequency)

    a_i : `float/array`
        Initial semi-major axis (if supplied ``f_orb_i`` is ignored)

    f_orb_i : `float/array`
        Initial orbital frequency (required if ``a_i`` is None)

    output_vars : `array`
        List of **ordered** output vars, choose from any of ``timesteps``, ``ecc``, ``a``, ``f_orb`` and
        ``f_GW`` for which of timesteps, eccentricity, semi-major axis and orbital/GW frequency that you want.
        Default is [``ecc``, ``f_orb``]

    n_proc : `int`
        Number of processors to split eccentricity evolution over, where the default is n_proc=1

    avoid_merger : `boolean`
        Whether to avoid integration around the merger of the binary. Warning:
        setting this to false will result in many LSODA errors to be outputted
        since the derivatives get so large.

    exact_t_merge : `boolean`
        Whether to calculate the merger time exactly or use a fit (only
        relevant when ``avoid_merger`` is set to True

    t_before : `float`
        How much time before the merger to cutoff the integration (default is
        1 Myr - this will prevent all LSODA warnings for e < 0.95, you may
        need to increase this time if your sample is more eccentric than this)

    t_merge : `float/array`
        Merger times for each source to be evolved. Only used when
        `avoid_merger=True`. If `None` then these will be automatically
        calculated either approximately or exactly based on the values of
        `exact_t_merge`.

    Returns
    -------
    evolution : `array`
        Array possibly containing eccentricity, semi-major axis, timesteps and frequency evolution.
        Content determined by ``output_vars``
    """
    # transform input if only a single source
    arrayed_args, single_source = utils.ensure_array(m_1, m_2, beta, a_i,
                                                     f_orb_i, ecc_i)
    m_1, m_2, beta, a_i, f_orb_i, ecc_i = arrayed_args
    output_vars = np.array([output_vars]) if isinstance(output_vars,
                                                        str) else output_vars

    beta, a_i = check_mass_freq_input(beta=beta,
                                      m_1=m_1,
                                      m_2=m_2,
                                      a_i=a_i,
                                      f_orb_i=f_orb_i)

    if np.isin(output_vars, ["f_orb", "f_GW"]).any() and (m_1 is None
                                                          or m_2 is None):
        raise ValueError(
            "`m_1`` and `m_2` required if `output_vars` contains a frequency")

    c_0 = utils.c_0(a_i=a_i, ecc_i=ecc_i)
    timesteps = create_timesteps_array(a_i=a_i,
                                       beta=beta,
                                       ecc_i=ecc_i,
                                       t_evol=t_evol,
                                       n_step=n_step,
                                       timesteps=timesteps)

    # if avoiding the merger during integration
    if avoid_merger:
        if t_merge is None:
            # calculate the merger time
            t_merge = get_t_merge_ecc(ecc_i=ecc_i,
                                      a_i=a_i,
                                      beta=beta,
                                      exact=exact_t_merge).to(u.Gyr)

        # warn the user if they are evolving past the merger
        if np.any(timesteps > t_merge[:, np.newaxis]):
            print(
                "WARNING: Some timesteps are past the merger of the source and this may produce erroneous",
                "results in combination with `avoid_merger=True`. Only evolve sources until their merger",
                "or set `avoid_merger=False`.")

        # make a mask for any timesteps that are too close to the merger
        too_close = timesteps >= t_merge[:, np.newaxis] - t_before

        check = too_close
        check[:, 0] = True
        if np.all(check):  # pragma: no cover
            print("WARNING: All timesteps are too close to merger so",
                  "evolution is not possible. Either set `t_before` to a",
                  "smaller time or turn off `avoid_merger`")

        # ensure that the first timestep is always valid
        too_close[:, 0] = False

        if np.any(too_close):
            # set them all equal to the previous timestep before passing limit
            timesteps[too_close] = -1 * u.Gyr
            previous = timesteps.max(axis=1).repeat(timesteps.shape[1])
            timesteps[too_close] = previous.reshape(timesteps.shape)[too_close]

    # get rid of the units for faster integration
    c_0 = c_0.to(u.m).value
    beta = beta.to(u.m**4 / u.s).value
    timesteps = timesteps.to(u.s).value

    # perform the evolution
    if n_proc > 1:
        with MultiPool(processes=n_proc) as pool:
            ecc_evol = np.array(
                list(
                    pool.map(integrate_de_dt,
                             zip(ecc_i, timesteps.tolist(), beta, c_0))))
    else:
        ecc_evol = np.array([
            odeint(de_dt, ecc_i[i], timesteps[i],
                   args=(beta[i], c_0[i])).flatten() for i in range(len(ecc_i))
        ])

    c_0 = c_0[:, np.newaxis] * u.m
    ecc_evol = np.nan_to_num(ecc_evol, nan=0.0)

    # calculate a_evol if any frequency or separation requested
    if np.isin(output_vars, ["a", "f_orb", "f_GW"]).any():
        a_evol = utils.get_a_from_ecc(ecc_evol, c_0)

        # calculate f_orb_evol if any frequency requested
        if np.isin(output_vars, ["f_orb", "f_GW"]).any():
            # change merged binaries to extremely small separations
            a_not0 = np.where(a_evol.value == 0.0, 1e-30 * a_evol.unit, a_evol)
            f_orb_evol = utils.get_f_orb_from_a(a=a_not0,
                                                m_1=m_1[:, np.newaxis],
                                                m_2=m_2[:, np.newaxis])

            # change frequencies back to 1Hz since LISA can't measure above
            f_orb_evol = np.where(a_not0.value == 1e-30, 1e2 * u.Hz,
                                  f_orb_evol)

    # construct evolution output
    evolution = []
    for var in output_vars:
        if var == "timesteps":
            timesteps = timesteps.flatten() if single_source else timesteps
            evolution.append((timesteps * u.s).to(u.yr))
        elif var == "ecc":
            ecc_evol = ecc_evol.flatten() if single_source else ecc_evol
            evolution.append(ecc_evol)
        elif var == "a":
            a_evol = a_evol.flatten() if single_source else a_evol
            evolution.append(a_evol.to(u.AU))
        elif var == "f_orb":
            f_orb_evol = f_orb_evol.flatten() if single_source else f_orb_evol
            evolution.append(f_orb_evol.to(u.Hz))
        elif var == "f_GW":
            f_orb_evol = f_orb_evol.flatten() if single_source else f_orb_evol
            evolution.append(2 * f_orb_evol.to(u.Hz))
    return evolution if len(evolution) > 1 else evolution[0]
Example #20
0
    def evolve(cls, initialbinarytable, pool=None, **kwargs):
        """After setting a number of initial conditions we evolve the system.

        Parameters
        ----------
        initialbinarytable : DataFrame
            Initial conditions of the binary

        **kwargs:
            There are three ways to tell evolve and thus the fortran
            what you want all the flags and other BSE specific
            parameters to be. If you pass both a dictionary of flags and/or a inifile
            and a table with the BSE parameters in the columns,
            the column values will be overwritten by
            what is in the dictionary or ini file.

            NUMBER 1: PASS A DICTIONARY OF FLAGS

                 BSEDict

            NUMBER 2: PASS A PANDAS DATA FRAME WITH PARAMS DEFINED AS COLUMNS

                 All you need is the initialbinarytable if the all
                 the BSE parameters are defined as columns

            NUMBER 3: PASS PATH TO A INI FILE WITH THE FLAGS DEFINED

                params

        randomseed : `int`, optional, default let numpy choose for you
            If you would like the random seed that the underlying fortran code
            uses to be the same for all of the initial conditions you passed
            then you can send this keyword argument in. It is recommended
            to just let numpy choose a random number as the Fortran random seed
            and then this number will be returned as a column in the
            initial binary table so that you can reproduce the results.

        nproc : `int`, optional, default: 1
            number of CPUs to use to evolve systems
            in parallel

        idx : `int`, optional, default: 0
            initial index of the bcm/bpp arrays

        dtp : `float`, optional: default: tphysf
            timestep size in Myr for bcm output where tphysf
            is total evolution time in Myr

        n_per_block : `int`, optional, default: -1
            number of systems to evolve in a block with
            _evolve_multi_system, to allow larger multiprocessing
            queues and reduced overhead. If less than 1 use _evolve_single_system

        Returns
        -------
        output_bpp : DataFrame
            Evolutionary history of each binary

        output_bcm : DataFrame
            Final state of each binary

        initialbinarytable : DataFrame
            Initial conditions for each binary
        """
        idx = kwargs.pop('idx', 0)
        nproc = min(kwargs.pop('nproc', 1), len(initialbinarytable))
        n_per_block = kwargs.pop('n_per_block', -1)

        # There are three ways to tell evolve and thus the fortran
        # what you want all the flags and other BSE specific
        # parameters to be

        # NUMBER 1: PASS A DICTIONARY OF FLAGS
        BSEDict = kwargs.pop('BSEDict', {})

        # NUMBER 2: PASS A PANDAS DATA FRAME WITH PARAMS DEFINED AS COLUMNS

        #     All you need is the initialbinarytable with columns,
        #     If you pass both a dictionary of flags and/or a inifile
        #     and a table with the columns, the column values will be
        #     overwritten by what is in the dictionary or ini file

        # NUMBER 3: PASS PATH TO A INI FILE WITH THE FLAGS DEFINED
        params = kwargs.pop('params', None)

        if BSEDict and params is not None:
            raise ValueError('Please pass either a dictionary '
                             'of BSE flags or a path to an inifle not both.')

        if params is not None:
            if not os.path.isfile(params):
                raise ValueError(
                    "File does not exist, probably supplied incorrect "
                    "path to the inifile.")
            BSEDict, _, _, _, _ = utils.parse_inifile(params)

        # error check the parameters you are trying to pass to BSE
        # if we sent in a table with the parameter names
        # then we will temporarily create a dictionary
        # in order to verify that the values in the table
        # are valid
        utils.error_check(BSEDict)

        # check the initial conditions of the system and warn user if
        # anything is weird about them, such as the star starts
        # in Roche Lobe overflow
        utils.check_initial_conditions(initialbinarytable)

        # assign some columns based on keyword arguments but that
        # can be overwritten by the params or BSEDict
        if 'dtp' not in initialbinarytable.keys():
            initialbinarytable = initialbinarytable.assign(
                dtp=kwargs.pop('dtp', initialbinarytable['tphysf']))
        if 'randomseed' not in initialbinarytable.keys():
            seed = np.random.randint(np.iinfo(np.int32).min,
                                     np.iinfo(np.int32).max,
                                     size=len(initialbinarytable))
            initialbinarytable = initialbinarytable.assign(
                randomseed=kwargs.pop('randomseed', seed))
        if 'bin_num' not in initialbinarytable.keys():
            initialbinarytable = initialbinarytable.assign(
                bin_num=np.arange(idx, idx + len(initialbinarytable)))

        for k, v in BSEDict.items():
            if k in initialbinarytable.keys():
                warnings.warn(
                    "The value for {0} in initial binary table is being "
                    "overwritten by the value of {0} from either the params "
                    "file or the BSEDict.".format(k))
            # special columns that need to be handled differently
            if k == 'natal_kick_array':
                assign_natal_kick_array = [BSEDict['natal_kick_array']
                                           ] * len(initialbinarytable)
                initialbinarytable = initialbinarytable.assign(
                    natal_kick_array=assign_natal_kick_array)
                for idx, column_name in enumerate(NATAL_KICK_COLUMNS):
                    for sn_idx in range(2):
                        column_name_sn = column_name + '_{0}'.format(sn_idx +
                                                                     1)
                        column_values = pd.Series(
                            [BSEDict['natal_kick_array'][sn_idx][idx]] *
                            len(initialbinarytable),
                            index=initialbinarytable.index,
                            name=column_name_sn)
                        kwargs1 = {column_name_sn: column_values}
                        initialbinarytable = initialbinarytable.assign(
                            **kwargs1)
            elif k == 'qcrit_array':
                initialbinarytable = initialbinarytable.assign(
                    qcrit_array=[BSEDict['qcrit_array']] *
                    len(initialbinarytable))
                for kstar in range(0, 16):
                    columns_values = pd.Series(
                        [BSEDict['qcrit_array'][kstar]] *
                        len(initialbinarytable),
                        index=initialbinarytable.index,
                        name='qcrit_{0}'.format(kstar))
                    initialbinarytable.loc[:, 'qcrit_{0}'.
                                           format(kstar)] = columns_values
            elif k == 'fprimc_array':
                columns_values = [BSEDict['fprimc_array']
                                  ] * len(initialbinarytable)
                initialbinarytable = initialbinarytable.assign(
                    fprimc_array=columns_values)
                for kstar in range(0, 16):
                    columns_values = pd.Series(
                        [BSEDict['fprimc_array'][kstar]] *
                        len(initialbinarytable),
                        index=initialbinarytable.index,
                        name='fprimc_{0}'.format(kstar))
                    initialbinarytable.loc[:, 'fprimc_{0}'.
                                           format(kstar)] = columns_values
            else:
                # assigning values this way work for most of the parameters.
                kwargs1 = {k: v}
                initialbinarytable = initialbinarytable.assign(**kwargs1)

        # Here we perform two checks
        # First, if the BSE parameters are not in the initial binary table
        # and either a dictionary or an inifile was not provided
        # then we need to raise an ValueError and tell the user to provide
        # either a dictionary or an inifile or add more columns
        if not BSEDict:
            if ((not set(INITIAL_BINARY_TABLE_SAVE_COLUMNS).issubset(
                    initialbinarytable.columns))
                    and (not set(INITIAL_CONDITIONS_PASS_COLUMNS).issubset(
                        initialbinarytable.columns))):
                raise ValueError(
                    "You are passing BSE parameters as columns in the "
                    "initial binary table but not all BSE parameters are defined. "
                    "Please pass a BSEDict or a params file or make sure "
                    "you have all BSE parameters as columns {0} or {1}.".
                    format(INITIAL_BINARY_TABLE_SAVE_COLUMNS,
                           INITIAL_CONDITIONS_PASS_COLUMNS))

        # If you did not supply the natal kick or qcrit_array or fprimc_array in the BSEdict then we construct
        # it from the initial conditions table
        if ((pd.Series(FLATTENED_NATAL_KICK_COLUMNS).isin(
                initialbinarytable.keys()).all())
                and ('natal_kick_array' not in BSEDict)):
            column_values = initialbinarytable[
                FLATTENED_NATAL_KICK_COLUMNS].values.reshape(
                    -1, 2, len(NATAL_KICK_COLUMNS)).tolist()
            initialbinarytable = initialbinarytable.assign(
                natal_kick_array=column_values)

        if (pd.Series(QCRIT_COLUMNS).isin(
                initialbinarytable.keys()).all()) and ('qcrit_array'
                                                       not in BSEDict):
            initialbinarytable = initialbinarytable.assign(
                qcrit_array=initialbinarytable[QCRIT_COLUMNS].values.tolist())

        if (pd.Series(FPRIMC_COLUMNS).isin(
                initialbinarytable.keys()).all()) and ('fprimc_array'
                                                       not in BSEDict):
            initialbinarytable = initialbinarytable.assign(
                fprimc_array=initialbinarytable[FPRIMC_COLUMNS].values.tolist(
                ))

        # need to ensure that the order of parameters that we pass to BSE
        # is correct
        initial_conditions = initialbinarytable[
            INITIAL_CONDITIONS_PASS_COLUMNS].to_dict('records')

        # we use different columns to save the BSE parameters because some
        # of the parameters are list/arrays which we instead save as
        # individual values because it makes saving to HDF5 easier/more efficient.
        initialbinarytable = initialbinarytable[
            INITIAL_BINARY_TABLE_SAVE_COLUMNS]

        # Allow a user to specify a custom time step sampling for certain parts of the evolution
        timestep_conditions = kwargs.pop('timestep_conditions', [])
        set_checkstates(timestep_conditions=timestep_conditions)

        # check if a pool was passed
        if pool is None:
            with MultiPool(processes=nproc) as pool:
                # evolve systems
                if n_per_block > 0:
                    initial_conditions = np.asarray(initial_conditions)
                    n_tot = initial_conditions.shape[0]
                    initial_conditions_blocked = []
                    itr_block = 0
                    while itr_block < n_tot:
                        itr_next = np.min([n_tot, itr_block + n_per_block])
                        initial_conditions_blocked.append(
                            initial_conditions[itr_block:itr_next])
                        itr_block = itr_next
                    output = list(
                        pool.map(_evolve_multi_system,
                                 initial_conditions_blocked))
                else:
                    output = list(
                        pool.map(_evolve_single_system, initial_conditions))
        else:
            # evolve systems
            if n_per_block > 0:
                initial_conditions = np.asarray(initial_conditions)
                n_tot = initial_conditions.shape[0]
                initial_conditions_blocked = []
                itr_block = 0
                while itr_block < n_tot:
                    itr_next = np.min([n_tot, itr_block + n_per_block])
                    initial_conditions_blocked.append(
                        initial_conditions[itr_block:itr_next])
                    itr_block = itr_next
                output = list(
                    pool.map(_evolve_multi_system, initial_conditions_blocked))
            else:
                output = list(
                    pool.map(_evolve_single_system, initial_conditions))

        output = np.array(output, dtype=object)
        bpp_arrays = np.vstack(output[:, 1])
        bcm_arrays = np.vstack(output[:, 2])
        kick_info_arrays = np.vstack(output[:, 3])

        natal_kick_arrays = np.vstack(output[:, 4])
        natal_kick_arrays = natal_kick_arrays.reshape(
            -1, 1, len(FLATTENED_NATAL_KICK_COLUMNS))
        for idx, column in enumerate(FLATTENED_NATAL_KICK_COLUMNS):
            # assigning values this way work for most of the parameters.
            kwargs1 = {column: natal_kick_arrays[:, :, idx]}
            initialbinarytable = initialbinarytable.assign(**kwargs1)

        kick_info = pd.DataFrame(kick_info_arrays,
                                 columns=KICK_COLUMNS,
                                 index=kick_info_arrays[:, -1].astype(int))

        bpp = pd.DataFrame(bpp_arrays,
                           columns=BPP_COLUMNS,
                           index=bpp_arrays[:, -1].astype(int))

        bcm = pd.DataFrame(bcm_arrays,
                           columns=BCM_COLUMNS,
                           index=bcm_arrays[:, -1].astype(int))

        bcm.merger_type = bcm.merger_type.astype(int).astype(str).apply(
            lambda x: x.zfill(4))
        bcm.bin_state = bcm.bin_state.astype(int)
        bpp.bin_num = bpp.bin_num.astype(int)
        bcm.bin_num = bcm.bin_num.astype(int)

        return bpp, bcm, initialbinarytable, kick_info
def main():
    # setup paths for output 
    dump_dir = os.path.join(WORK_DIR, 'hdf5_dump')
    if not os.path.exists(dump_dir):
        os.makedirs(dump_dir)

    # get the options for the code and set the data_release globally (ugly) to
    # allow MultiPool to work
    kwargs = plasticc.get_data.parse_getdata_options()
    global data_release 
    data_release = kwargs.pop('data_release')
    getter = plasticc.get_data.GetData(data_release)

    # we can use model as a dummy string to indicate if we are generating
    # training or test data
    dummy  = kwargs.pop('model')
    offset = kwargs.pop('offset')
    limit  = kwargs.pop('limit')

    if dummy == 'training':
        outfile = os.path.join(dump_dir, 'training_set.hdf5')
        offset = None
    else:
        if limit is None: 
            outfile = os.path.join(dump_dir, 'test_set.hdf5')
        else:
            if offset is None:
                offset = 0
            outfile = os.path.join(dump_dir, 'test_n{}_set.hdf5'.format(offset))

    # make sure we remove any lingering files 
    if os.path.exists(outfile):
        os.remove(outfile)

    _ = kwargs.get('field')

    # set the header keywords for training and testing
    # same except for sntype will be removed from test and hostgal_photoz isn't
    # provided
    if dummy == 'training':
        kwargs['columns']=['objid','ptrobs_min','ptrobs_max','ra','decl', 'mwebv', 'mwebv_err',\
                        'hostgal_photoz', 'hostgal_photoz', 'hostgal_photoz_err', 'sntype']
    else:
        kwargs['columns']=['objid','ptrobs_min','ptrobs_max','ra','decl', 'mwebv', 'mwebv_err',\
                        'hostgal_photoz', 'hostgal_photoz_err', 'sntype']

    # set an extrasql query to get just the DDF and WFD objects
    # sntype for testing = true sntype + 100 
    if dummy == 'training':
        extrasql = "AND sntype < 100 AND ((objid LIKE 'WFD%') OR (objid LIKE 'DDF%'))"
    else:
        extrasql = "AND sntype > 100 AND ((objid LIKE 'WFD%') OR (objid LIKE 'DDF%'))"

    # set up options for data retrieval ignoring many of the command-line
    # options
    kwargs['extrasql'] = extrasql
    kwargs['model'] = '%'
    kwargs['field'] = '%'
    kwargs['sort']  = True
    kwargs['shuffle'] = False
    kwargs['limit'] = None
    kwargs['get_num_lightcurves'] = True
    total = getter.get_lcs_headers(**kwargs)
    total = list(total)[0]
    kwargs['limit'] = total
    kwargs['get_num_lightcurves'] = False
    kwargs['offset'] = offset


    out = getter.get_lcs_headers(**kwargs)
    aggregate_types = {1:1, 2:2, 3:3, 12:2, 13:3, 14:2, 41:41, 43:43, 45:45, 51:51, 60:60, 61:61, 62:62, 63:63, 64:64, 80:80, 81:81, 82:82, 83:83, 84:84, 90:90, 91:91}
    if dummy == 'training':
        pass 
    else:
        aggregate_types = {x+100:y for x,y in aggregate_types.items()}
    print('Aggregating as ', aggregate_types)

    # make a big list of the header - NOTE THAT WE ALWAYS RETRIEVE ALL OBJECTS
    out = list(out)
    if dummy == 'training':
        # we don't need to shuffle the training set 
        pass
    else:
        # if we're generating test data, if we set a limit, just draw a random
        # sample else shuffle the full list
        if limit is not None:
            out = random.sample(out, limit)
        else:
            random.shuffle(out)

    # convert the selected header entries to a table
    out = at.Table(rows=out, names=kwargs['columns'])

    # we're not necessariy keeping all the models we simulated (42 and 50 are going bye bye)
    keep_types = aggregate_types.keys()
    mask = np.array([True if x in keep_types else False for x in out['sntype']])
    out = out[mask]

    # aggregate types
    if dummy=='training':
        new_type = np.array([aggregate_types.get(x, None) for x in out['sntype']])
    else:
        new_type = np.array([aggregate_types.get(x, None) for x in out['sntype']])
    out['sntype'] = new_type 

    # make sure that there are no "other" classes included in the training data
    if dummy == 'training':
        # not train types - 45, 60, 61, 62, 63, 64, 90, 91
        train_types = (1, 2, 3, 41, 43, 51, 80, 81, 82, 83, 84) 
        mask = np.array([True if x in train_types else False for x in out['sntype']])
        out = out[mask]

    # galactic objects have -9 as redshift - change to 0
    dummy_val = np.repeat(-9, len(out))
    ind = np.isclose(out['hostgal_photoz'], dummy_val)
    out['hostgal_photoz'][ind] = 0.
    out['hostgal_photoz_err'][ind] = 0.

    # figure out what fits files the data are in
    fits_files =[ "LSST_{0}_MODEL{1}/LSST_{0}_{2}_PHOT.FITS".format(*x.split('_')) for x in out['objid']] 
    fits_files = np.array(fits_files)
    uniq_files = np.unique(fits_files)

    # the object names have the model name in them, so we need to edit them
    # new name = <FIELD><SNID>
    orig_name = out['objid']
    new_name = [ '{}{}'.format(x.split('_')[0], x.split('_')[-1]) for x in orig_name]
    new_name = np.array(new_name)
    out_name = new_name
    nmc = len(out)

    # if we are generating test data, save a truth table
    if dummy == 'training':
        pass 
    else: 
        sntype = out['sntype']
        # remove the model type from the output header that goes with the test data
        out.remove_column('sntype')
        truth_file = outfile.replace('_set.hdf5', '_truthtable.hdf5')
        if os.path.exists(truth_file):
            os.remove(truth_file)
        # ... saving it in the truth table only
        orig_name = orig_name.astype(bytes)
        new_name  = new_name.astype(bytes)
        truth_table = at.Table([orig_name, new_name, sntype], names=['objid','shortid','sntype'])
        truth_table.write(truth_file, compression=True, path='truth_table', serialize_meta=False, append=True)

    # make batches to load the data 
    batches = {}
    for filename in uniq_files:
        ind = (fits_files == filename)
        this_fits_lcs = at.Table([out['objid'][ind], out['ptrobs_min'][ind], out['ptrobs_max'][ind]], names=['objid','ptrobs_min', 'ptrobs_max'])
        batches[filename] = this_fits_lcs 

    name_lookup = dict(zip(orig_name, out_name))

    gc.collect()

    # do the output
    failed = []
    with MultiPool() as pool:
        with tqdm(total=nmc) as pbar:
            for result in pool.imap_unordered(task, batches.items()):
                this_file_n = len(result.items())
                with h5py.File(outfile, 'a') as outf:
                   for true_obj, thislc in result.items():
                       short_obj = name_lookup[true_obj]
                       retries = 10
                       notwritten = True
                       overwrite = False
                       while notwritten and retries > 0:
                           try:
                               outf.create_dataset(short_obj, data=thislc, compression='lzf')
                               #thislc.write(outfile, path=short_obj, compression=True, serialize_meta=False, append=True,\
                               #    overwrite=overwrite)
                               notwritten = False
                           except Exception as e:
                               timer.sleep(0.010)
                               overwrite = True 
                               retries -= 1
                               print('{} {}'.format(true_obj, e))
                       if notwritten: 
                           failed.append((true_obj, short_obj))
                           print("Failed", true_obj)
                       outf.flush()
                   pbar.update(this_file_n)
            gc.collect()
    print(failed)
    
    # write out the header
    out['objid'] = out_name.astype(bytes) 
    out.remove_columns(['ptrobs_min', 'ptrobs_max'])
    out.write(outfile, compression=True, path='header', serialize_meta=False, append=True)
Example #22
0
    print("loading data table took {0} s".format(time.time() - start))

    print("loading pair indices...")
    pairs_start = time.time()
    pairs_file = '../data/matched-pairs-dustin.fits'
    pairs = read_from_fits(pairs_file)  # pairs is a global variable
    print("loading pairs array took {0} s".format(time.time() - pairs_start))

    print("calculating chisquared...")
    with h5py.File('chisqs.hdf5', 'w') as f:
        dset = f.create_dataset('chisqs', data=np.zeros_like(pairs) - 1)

    #tasks = list(zip(range(len(table)), table.iterrows()))
    tasks = list(zip(range(10000), pairs[:10000, :]))

    pool = MultiPool()
    map_start = time.time()
    results = pool.map(worker, tasks, callback=callback)
    map_end = time.time()
    print("mapping took {0} s".format(map_end - map_start))
    pool.close()

    with h5py.File('chisqs.hdf5', 'r+') as f:
        chisqs = np.copy(f['chisqs'])

    chisqs2 = calc_chisqs_for_table(table, pairs[:10000, :])

    if False:  # basic diagnostics
        print("chisqareds calculated, checking on matches...")
        plt.hist(chisqs[(chisqs > 0.) & (chisqs < 50.)], bins=500)
        plt.xlabel('$\chi^2$', fontsize=16)
Example #23
0
    def initial_sample(
        self,
        M1min=0.08,
        M2min=0.08,
        M1max=150.0,
        M2max=150.0,
        porb_lo=0.15,
        porb_hi=8.0,
        rand_seed=0,
        size=None,
        nproc=1,
        pool=None,
        mp_seeds=None,
    ):
        """Sample initial binary distribution according to Moe & Di Stefano (2017)
        <http://adsabs.harvard.edu/abs/2017ApJS..230...15M>`_

        Parameters
        ----------
        M1min : `float`
            minimum primary mass to sample [Msun]
            DEFAULT: 0.08
        M2min : `float`
            minimum secondary mass to sample [Msun]
            DEFAULT: 0.08
        M1max : `float`
            maximum primary mass to sample [Msun]
            DEFAULT: 150.0
        M2max : `float`
            maximum primary mass to sample [Msun]
            DEFAULT: 150.0
        porb_lo : `float`
            minimum orbital period to sample [log10(days)]
        porb_hi : `float`
            maximum orbital period to sample [log10(days)]
        rand_seed : int
            random seed generator
            DEFAULT: 0
        size : int, optional
            number of evolution times to sample
            NOTE: this is set in cosmic-pop call as Nstep

        Returns
        -------
        primary_mass_list : array
            array of primary masses with size=size
        secondary_mass_list : array
            array of secondary masses with size=size
        porb_list : array
            array of orbital periods in days with size=size
        ecc_list : array
            array of eccentricities with size=size
        mass_singles : `float`
            Total mass in single stars needed to generate population
        mass_binaries : `float`
            Total mass in binaries needed to generate population
        n_singles : `int`
            Number of single stars needed to generate a population
        n_binaries : `int`
            Number of binaries needed to generate a population
        binfrac_list : array
            array of binary probabilities based on primary mass and period with size=size
        """
        if pool is None:
            with MultiPool(processes=nproc) as pool:
                if mp_seeds is not None:
                    if len(list(mp_seeds)) != nproc:
                        raise ValueError(
                            "Must supply a list of random seeds with length equal to number of processors"
                        )
                else:
                    mp_seeds = [
                        nproc * (task._identity[0] - 1) for task in pool._pool
                    ]

                inputs = [(M1min, M2min, M1max, M2max, porb_hi, porb_lo,
                           size / nproc, rand_seed + mp_seed)
                          for mp_seed in mp_seeds]
                worker = Worker()
                results = list(pool.map(worker, inputs))
        else:
            if mp_seeds is not None:
                if len(list(mp_seeds)) != nproc:
                    raise ValueError(
                        "Must supply a list of random seeds with length equal to number of processors"
                    )
            else:
                if isinstance(pool, MPIPool):
                    mp_seeds = [nproc * (task - 1) for task in pool.workers]
                elif isinstance(pool, MultiPool):
                    mp_seeds = [
                        nproc * (task._identity[0] - 1) for task in pool._pool
                    ]
                else:
                    mp_seeds = [0 for i in range(nproc)]

            inputs = [(M1min, M2min, M1max, M2max, porb_hi, porb_lo,
                       size / nproc, rand_seed + mp_seed)
                      for mp_seed in mp_seeds]
            worker = Worker()
            results = list(pool.map(worker, inputs))

        dat_lists = [[], [], [], [], [], [], [], [], []]

        for output_list in results:
            ii = 0
            for dat_list in output_list:
                dat_lists[ii].append(dat_list)
                ii += 1

        primary_mass_list = np.hstack(dat_lists[0])
        secondary_mass_list = np.hstack(dat_lists[1])
        porb_list = np.hstack(dat_lists[2])
        ecc_list = np.hstack(dat_lists[3])
        mass_singles = np.sum(dat_lists[4])
        mass_binaries = np.sum(dat_lists[5])
        n_singles = np.sum(dat_lists[6])
        n_binaries = np.sum(dat_lists[7])
        binfrac_list = np.hstack(dat_lists[8])

        return (primary_mass_list, secondary_mass_list, porb_list, ecc_list,
                mass_singles, mass_binaries, n_singles, n_binaries,
                binfrac_list)
Example #24
0

bounds = scipy.optimize.Bounds([
    np.arctan(-10), -2,
    np.arctan(-10), -2,
    np.arctan(-10), -np.inf, 0, -np.inf, -10.
], [
    np.arctan(10), np.inf,
    np.arctan(10), np.inf,
    np.arctan(10), np.inf, np.inf, np.inf,
    leopy.stats.logit(0.30)
])

try:
    from schwimmbad import MultiPool
    pool = MultiPool()
    print('Parallel execution on ' + str(pool.size) + ' processes')
except ImportError as error:
    print('Serial execution as module `schwimmbad` was not found')
    pool = None

my_print_fun = auxiliary.MyPrintFun()
my_take_step = auxiliary.MyTakeStep(stepsize=1)
minimizer_options = {'disp': True, 'ftol': 1e-8}
minimizer_kwargs = {
    'method': method,
    'bounds': bounds,
    'options': minimizer_options,
    'args': (pool, )
}
values = []
for x in data:
    values.append(do_the_processing(x))

#using the map() function which applies the function (passed as the first argument) to each element in the iterable (second argument)
#Note: python3 map() returns a generator object, so we call list on this to get out the values
values = list(map(do_the_processing, data))

#class-based wrapper for the built-in (serial) map() function
from schwimmbad import SerialPool
pool = SearlPool()
values = list(pool.map(do_the_processing, data))

#utilize multiple cores on the same processor
from schwimmbad import MultiPool
with MultiPool() as pool:
    values = list(pool.map(do_the_processing, data))


#when using MPI pool tell all worker processes to wait for tasks from the master process
def main(pool, data):
    values = pool.map(do_the_processing, data)


from schwimmbad import MPIPool
with MPIPool() as pool:
    if not pool.is_master():
        pool.wait()
        sys.exit(0)

#selecting a pool with command-line arguments