Python map_parallel Examples, dautil.util.map_parallel Python Examples

Example #1

0

Show file

def main(basedir, glob, mask_path, pixel_size, no_mpi, processes, full):
    all_paths = list(basedir.glob(glob))
    pixel_size *= np.pi / 10800.
    with h5py.File(mask_path, 'r') as f2:
        tmask = f2['t'][:]
        pmask = f2['p'][:]

    _get_weight_fsky = partial(get_weight_fsky, tmask, pmask, pixel_size)
    if not no_mpi:
        from mpi4py.futures import MPIPoolExecutor
        with MPIPoolExecutor() as executor:
            data = executor.map(_get_weight_fsky, all_paths)
    else:
        from dautil.util import map_parallel
        data = map_parallel(_get_weight_fsky, all_paths, processes=processes)

    index = pd.Index(list(map(get_greg, all_paths)),
                     name='date') if full else pd.MultiIndex.from_tuples(
                         list(map(get_idx, all_paths)),
                         names=('nullsplit', 'subsplit', 'date'))

    df = pd.DataFrame(data=data,
                      index=index,
                      columns=('tweight', 'pweight', 't_fsky', 'p_fsky'))

    df.sort_index(inplace=True)
    return df

Example #2

0

Show file

def main(args):
    _glob = partial(glob, recursive=True) if args.recursive else glob
    pkl_in_paths = chain(*(_glob(glob_i)
                           for glob_i in args.input))

    Nones = map_parallel(
        partial(pkldelete, dry_run=args.dry_run, verbose=args.verbose),
        pkl_in_paths,
        processes=args.p
    )
    if args.verbose:
        print('Finish checking {} pkl files.'.format(len(Nones)))

Example #3

0

Show file

def main(path, output):
    df = pd.read_hdf(path)

    non_na_idx = ~df.html.isna()

    results = map_parallel(html_filter,
                           df[non_na_idx].index,
                           df.loc[non_na_idx, 'html'],
                           mode='multiprocessing',
                           processes=20)

    with open(output, 'w') as f:
        f.writelines(results)

Example #4

0

Show file

File: pandas_concat.py Project: ickc/dautil-py

def main(args):
    _glob = partial(glob, recursive=True) if args.recursive else glob
    h5_in_paths = chain(*(_glob(glob_i) for glob_i in args.input))

    dfs = map_parallel(pd.read_hdf, h5_in_paths, processes=args.p)
    print('Loaded {} input, concat...'.format(len(dfs)))
    df = pd.concat(dfs)
    del h5_in_paths, dfs
    df.sort_index(inplace=True)
    df.to_hdf(
        args.output,
        'df',
        format='table',
        complevel=args.compress_level,
    )

Example #5

0

Show file

def main(args):
    _glob = partial(glob, recursive=True) if args.recursive else glob
    in_paths = chain(*(_glob(os.path.join(args.basedir, glob_i))
                       for glob_i in args.glob))

    _convert = partial(convert, args.basedir, args.output, protocol=args.protocol)

    Nones = map_parallel(
        _convert,
        in_paths,
        mode=('mpi' if args.mpi else 'multiprocessing'),
        processes=args.processes
    )
    if args.verbose:
        print('Finish converting {} pickle files.'.format(len(Nones)))

Example #6

0

Show file

def main(args):
    _glob = partial(glob, recursive=True) if args.recursive else glob
    h5_in_paths = chain(*(_glob(glob_i) for glob_i in args.input))

    Nones = map_parallel(partial(h5delete,
                                 datasets=args.datasets,
                                 dry_run=args.dry_run,
                                 verbose=args.verbose),
                         h5_in_paths,
                         processes=args.p)
    if args.verbose:
        if args.datasets:
            print('Finish checking {} HDF5 files with datasets {}.'.format(
                len(Nones), ' '.join(args.datasets)))
        else:
            print('Finish checking {} HDF5 files.'.format(len(Nones)))

Example #7

0

Show file

File: mask.py Project: ickc/TAIL

def main(
    basedir,
    output,
    no_ps,
    catalog,
    pixel_size,
    width,
    source,
    compress_level):
    basedir = Path(basedir)

    temp_mask, pol_mask = map_parallel(partial(get_apodized_mask, basedir), ('w0', 'w4'), p=2)
    if not no_ps:
        n = int(round(width / pixel_size * 60.)) + 1
        shape = np.array((n, n))

        # pixel size in radian
        pixel_size = pixel_size * np.pi / 10800.
        # defined in libmap.py
        xmax = width * np.pi / 360.
        xmin = -xmax
        ymax = xmax
        ymin = xmin
        ps_mask = zero_pad_ratio(mask_from_map(
            source,
            shape,
            xmin, xmax, ymin, ymax,
            catalog
        ))
        temp_mask *= ps_mask
        pol_mask *= ps_mask

    with h5py.File(output, libver='latest') as f:
        # tmask
        f.create_dataset('t',
            data=temp_mask,
            compression_opts=compress_level,
            **H5_CREATE_KW
        )
        # pmask
        f.create_dataset('p',
            data=pol_mask,
            compression_opts=compress_level,
            **H5_CREATE_KW
        )

Example #8

0

Show file

File: null_spectra.py Project: ickc/TAIL

def main(pseudospectra,
         theory,
         filter_transfer,
         modecoupling,
         beam,
         bin_width,
         l_min,
         l_max,
         processes,
         compute_nl=False,
         return_w=False,
         l_boundary=600,
         l_lower=50,
         l_upper=4300):
    '''
    `l_boundary`: the pivot point of l bins. e.g. given a bin_width, 600 is always the boundary between 2 bins.
    `l_lower`: lowest l we trust. e.g. 50
    `l_upper`: highest l we trust. e.g. 4300 due to F_TT can becomes negative above that.
    '''
    if l_min < 0:
        l_min = l_boundary - (l_boundary - l_lower) // bin_width * bin_width
        print(f'auto l-min at {l_min}')
    if l_max < 0:
        l_max = l_boundary + (l_upper - l_boundary) // bin_width * bin_width
        print(f'auto l-max at {l_max}')

    l_common = np.arange(l_min, l_max)
    l_common_binned = l_common.reshape(-1, bin_width).mean(axis=1)

    # Reading
    df_beam = pd.read_hdf(beam)['all']
    B_2 = np.square(np.interp(l_common, df_beam.index, df_beam.values.real))
    del df_beam

    df = pd.concat(
        map_parallel(pd.read_hdf,
                     glob(pseudospectra),
                     mode='multithreading',
                     processes=processes))

    df_theory = pd.read_hdf(theory) * _REAL_TO_SIM
    df_filter = pd.read_hdf(filter_transfer)

    # mapping all cases
    index = pd.MultiIndex.from_product(
        (df.index.levels[1], df.index.levels[2], df.index.levels[3]),
        names=df.index.names[1:4])
    if return_w:
        index_w = pd.MultiIndex.from_product(
            (df.index.levels[1], df.index.levels[2], df.index.levels[3],
             l_common_binned),
            names=df.index.names[1:4] + ['b'])

    with h5py.File(modecoupling, 'r') as f:
        res = list(
            starmap(
                partial(solve_spectra,
                        df,
                        df_theory,
                        df_filter,
                        f,
                        B_2,
                        bin_width,
                        l_common,
                        l_common_binned,
                        compute_nl=compute_nl,
                        return_w=return_w), index))

    if return_w:
        Cls, w_bls = list(map(list, zip(*res)))
    else:
        Cls = res

    df_spectra = pd.concat(Cls, keys=index)
    del l_common_binned, B_2, df, df_theory, df_filter

    df_spectra.index.names = index.names + df_spectra.index.names[-2:]
    del index

    df_spectra.sort_index(inplace=True)

    if return_w:
        df_window = pd.DataFrame(np.concatenate(w_bls, axis=0),
                                 index=index_w,
                                 columns=l_common)
        df_window.sort_index(inplace=True)
        return df_spectra, df_window
    else:
        return df_spectra

Example #9

0

Show file

def get_ks_p_uniform_bootstrap_N(null, N):
    return np.array(map_parallel(partial(get_ks_p_uniform_bootstrap, null),
                                 range(N),
                                 processes=20),
                    dtype=float__)

Example #10

0

Show file

File: pseudospectra_IO.py Project: ickc/TAIL

def get_pseudospectra_IO(
    df_weights,
    df_nauto,
    tmask,
    pmask,
    tnorm,
    pnorm,
    pixel_size,
    lmax,
    n_x,
    h5_weight_basedir,
    h5_signal_basedir,
    weight_name,
    processes,
    mapcase,
    name,
    isim,
    nullsplit,
    mode='multithreading',
):
    result = []
    teb_arrays = []
    tpweightses = []

    SUBSPLITS = ('0', ) if nullsplit is None else ('0', '1')

    for subsplit in SUBSPLITS:
        temp = df_weights.reset_index() \
            if nullsplit is None else \
            df_weights.loc[IDX[nullsplit, subsplit, :]].reset_index(level=2)

        dates = temp.date.values
        tpweights = temp[['tweight', 'pweight']].values.T
        del temp
        tpweightses.append(tpweights)

        # reading IQU
        _get_IQU_IO = partial(get_IQU_IO, n_x, h5_weight_basedir,
                              h5_signal_basedir, weight_name, nullsplit,
                              subsplit, mapcase, name)

        time = timeit.default_timer()
        IQUs = map_parallel(_get_IQU_IO, dates, mode=mode, processes=processes)
        del dates
        time -= timeit.default_timer()
        print('IQU IO,{},{}'.format(subsplit, -time))

        # 4d-array
        # 1st dim: I/Q/U at first then T/E/B
        # 2nd dim: no. of observations
        # 3/4 dim: the actual array of I/Q/U or T/E/B
        iqu_array = np.stack(list(map(list, zip(*IQUs))))
        del IQUs

        time = timeit.default_timer()
        teb_array = iqu_to_teb(iqu_array, tmask, pmask, tnorm, pnorm,
                               pixel_size, n_x)
        time -= timeit.default_timer()
        print('IQU to TEB,{},{}'.format(subsplit, -time))

        del iqu_array
        teb_arrays.append(teb_array)

        time = timeit.default_timer()
        t_s, t_n, e_s, e_n, b_s, b_n, te_s, te_n, tb_s, tb_n, eb_s, eb_n = pseudospectra_auto_all(
            pixel_size, lmax, teb_array, tpweights)
        del tpweights, teb_array
        time -= timeit.default_timer()
        print('pseudospectra,{},{}'.format(subsplit, -time))

        # in full map case, avoid having nan in index values
        nullsplit_value = 'full' if nullsplit is None else nullsplit
        result += [
            (('Cl', 'TT', subsplit, nullsplit_value, mapcase, isim), t_s),
            (('Nl', 'TT', subsplit, nullsplit_value, mapcase, isim), t_n),
            (('Cl', 'EE', subsplit, nullsplit_value, mapcase, isim), e_s),
            (('Nl', 'EE', subsplit, nullsplit_value, mapcase, isim), e_n),
            (('Cl', 'BB', subsplit, nullsplit_value, mapcase, isim), b_s),
            (('Nl', 'BB', subsplit, nullsplit_value, mapcase, isim), b_n),
            (('Cl', 'TE', subsplit, nullsplit_value, mapcase, isim), te_s),
            (('Nl', 'TE', subsplit, nullsplit_value, mapcase, isim), te_n),
            (('Cl', 'TB', subsplit, nullsplit_value, mapcase, isim), tb_s),
            (('Nl', 'TB', subsplit, nullsplit_value, mapcase, isim), tb_n),
            (('Cl', 'EB', subsplit, nullsplit_value, mapcase, isim), eb_s),
            (('Nl', 'EB', subsplit, nullsplit_value, mapcase, isim), eb_n)
        ]

    # cross spectra
    if nullsplit is not None:
        subsplit = '2'

        time = timeit.default_timer()

        t_s, t_n, e_s, e_n, b_s, b_n, te_s, te_n, tb_s, tb_n, eb_s, eb_n = pseudospectra_cross_all(
            pixel_size, lmax, teb_arrays[0], tpweightses[0], teb_arrays[1],
            tpweightses[1], df_nauto[nullsplit])
        del teb_arrays, tpweightses
        time -= timeit.default_timer()
        print('pseudospectra,{},{}'.format(subsplit, -time))

        result += [(('Cl', 'TT', subsplit, nullsplit, mapcase, isim), t_s),
                   (('Nl', 'TT', subsplit, nullsplit, mapcase, isim), t_n),
                   (('Cl', 'EE', subsplit, nullsplit, mapcase, isim), e_s),
                   (('Nl', 'EE', subsplit, nullsplit, mapcase, isim), e_n),
                   (('Cl', 'BB', subsplit, nullsplit, mapcase, isim), b_s),
                   (('Nl', 'BB', subsplit, nullsplit, mapcase, isim), b_n),
                   (('Cl', 'TE', subsplit, nullsplit, mapcase, isim), te_s),
                   (('Nl', 'TE', subsplit, nullsplit, mapcase, isim), te_n),
                   (('Cl', 'TB', subsplit, nullsplit, mapcase, isim), tb_s),
                   (('Nl', 'TB', subsplit, nullsplit, mapcase, isim), tb_n),
                   (('Cl', 'EB', subsplit, nullsplit, mapcase, isim), eb_s),
                   (('Nl', 'EB', subsplit, nullsplit, mapcase, isim), eb_n)]

    df = pd.DataFrame.from_dict(dict(result)).T
    del result
    df.index.names = ('sub_spectra', 'spectra', 'sub_split', 'null_split',
                      'map_case', 'n')
    return df

Example #11

0

Show file

File: pseudospectra_iter.py Project: ickc/TAIL

def main(
    outbasedir,
    h5_signal_basedir,
    name,
    mapcase,
    nreal,
    isim,
    mapsplits,
    processes=1,
    no_mpi=False,
    redirect=True,
    compress_level=9,
    weight_name='realmap',
    lmax=3000,
    pixel_size=2.,

    # these must be filled in
    selectionpath=None,
    maskpath=None,
    weightdir=None,
    mode='multithreading',
):
    df_weights = pd.read_hdf(selectionpath, 'weights')
    df_nauto = pd.read_hdf(selectionpath, 'n_auto')
    with h5py.File(str(maskpath), 'r') as f:
        tmask = f['t'][:]
        pmask = f['p'][:]
    tnorm = norm_fft(tmask)
    pnorm = norm_fft(pmask)
    n_x = tmask.shape[0]
    pixel_size *= np.pi / 10800.

    _get_pseudospectra_IO = partial(
        get_pseudospectra_wrap,
        df_weights,
        df_nauto,
        tmask,
        pmask,
        tnorm,
        pnorm,
        pixel_size,
        lmax,
        n_x,
        weightdir,
        h5_signal_basedir,
        weight_name,
        processes,
        compress_level,
        outbasedir / mapcase,
        mapcase,
        name if nreal is None else name + '{0:03}'.format(isim),
        isim,
        redirect=redirect,
        mode=mode,
    )

    if no_mpi:
        from dautil.util import map_parallel
        map_parallel(_get_pseudospectra_IO, mapsplits, processes=processes)
    else:
        from mpi4py.futures import MPIPoolExecutor
        with MPIPoolExecutor() as executor:
            executor.map(_get_pseudospectra_IO, mapsplits)