def main(basedir, glob, mask_path, pixel_size, no_mpi, processes, full): all_paths = list(basedir.glob(glob)) pixel_size *= np.pi / 10800. with h5py.File(mask_path, 'r') as f2: tmask = f2['t'][:] pmask = f2['p'][:] _get_weight_fsky = partial(get_weight_fsky, tmask, pmask, pixel_size) if not no_mpi: from mpi4py.futures import MPIPoolExecutor with MPIPoolExecutor() as executor: data = executor.map(_get_weight_fsky, all_paths) else: from dautil.util import map_parallel data = map_parallel(_get_weight_fsky, all_paths, processes=processes) index = pd.Index(list(map(get_greg, all_paths)), name='date') if full else pd.MultiIndex.from_tuples( list(map(get_idx, all_paths)), names=('nullsplit', 'subsplit', 'date')) df = pd.DataFrame(data=data, index=index, columns=('tweight', 'pweight', 't_fsky', 'p_fsky')) df.sort_index(inplace=True) return df
def main(args): _glob = partial(glob, recursive=True) if args.recursive else glob pkl_in_paths = chain(*(_glob(glob_i) for glob_i in args.input)) Nones = map_parallel( partial(pkldelete, dry_run=args.dry_run, verbose=args.verbose), pkl_in_paths, processes=args.p ) if args.verbose: print('Finish checking {} pkl files.'.format(len(Nones)))
def main(path, output): df = pd.read_hdf(path) non_na_idx = ~df.html.isna() results = map_parallel(html_filter, df[non_na_idx].index, df.loc[non_na_idx, 'html'], mode='multiprocessing', processes=20) with open(output, 'w') as f: f.writelines(results)
def main(args): _glob = partial(glob, recursive=True) if args.recursive else glob h5_in_paths = chain(*(_glob(glob_i) for glob_i in args.input)) dfs = map_parallel(pd.read_hdf, h5_in_paths, processes=args.p) print('Loaded {} input, concat...'.format(len(dfs))) df = pd.concat(dfs) del h5_in_paths, dfs df.sort_index(inplace=True) df.to_hdf( args.output, 'df', format='table', complevel=args.compress_level, )
def main(args): _glob = partial(glob, recursive=True) if args.recursive else glob in_paths = chain(*(_glob(os.path.join(args.basedir, glob_i)) for glob_i in args.glob)) _convert = partial(convert, args.basedir, args.output, protocol=args.protocol) Nones = map_parallel( _convert, in_paths, mode=('mpi' if args.mpi else 'multiprocessing'), processes=args.processes ) if args.verbose: print('Finish converting {} pickle files.'.format(len(Nones)))
def main(args): _glob = partial(glob, recursive=True) if args.recursive else glob h5_in_paths = chain(*(_glob(glob_i) for glob_i in args.input)) Nones = map_parallel(partial(h5delete, datasets=args.datasets, dry_run=args.dry_run, verbose=args.verbose), h5_in_paths, processes=args.p) if args.verbose: if args.datasets: print('Finish checking {} HDF5 files with datasets {}.'.format( len(Nones), ' '.join(args.datasets))) else: print('Finish checking {} HDF5 files.'.format(len(Nones)))
def main( basedir, output, no_ps, catalog, pixel_size, width, source, compress_level): basedir = Path(basedir) temp_mask, pol_mask = map_parallel(partial(get_apodized_mask, basedir), ('w0', 'w4'), p=2) if not no_ps: n = int(round(width / pixel_size * 60.)) + 1 shape = np.array((n, n)) # pixel size in radian pixel_size = pixel_size * np.pi / 10800. # defined in libmap.py xmax = width * np.pi / 360. xmin = -xmax ymax = xmax ymin = xmin ps_mask = zero_pad_ratio(mask_from_map( source, shape, xmin, xmax, ymin, ymax, catalog )) temp_mask *= ps_mask pol_mask *= ps_mask with h5py.File(output, libver='latest') as f: # tmask f.create_dataset('t', data=temp_mask, compression_opts=compress_level, **H5_CREATE_KW ) # pmask f.create_dataset('p', data=pol_mask, compression_opts=compress_level, **H5_CREATE_KW )
def main(pseudospectra, theory, filter_transfer, modecoupling, beam, bin_width, l_min, l_max, processes, compute_nl=False, return_w=False, l_boundary=600, l_lower=50, l_upper=4300): ''' `l_boundary`: the pivot point of l bins. e.g. given a bin_width, 600 is always the boundary between 2 bins. `l_lower`: lowest l we trust. e.g. 50 `l_upper`: highest l we trust. e.g. 4300 due to F_TT can becomes negative above that. ''' if l_min < 0: l_min = l_boundary - (l_boundary - l_lower) // bin_width * bin_width print(f'auto l-min at {l_min}') if l_max < 0: l_max = l_boundary + (l_upper - l_boundary) // bin_width * bin_width print(f'auto l-max at {l_max}') l_common = np.arange(l_min, l_max) l_common_binned = l_common.reshape(-1, bin_width).mean(axis=1) # Reading df_beam = pd.read_hdf(beam)['all'] B_2 = np.square(np.interp(l_common, df_beam.index, df_beam.values.real)) del df_beam df = pd.concat( map_parallel(pd.read_hdf, glob(pseudospectra), mode='multithreading', processes=processes)) df_theory = pd.read_hdf(theory) * _REAL_TO_SIM df_filter = pd.read_hdf(filter_transfer) # mapping all cases index = pd.MultiIndex.from_product( (df.index.levels[1], df.index.levels[2], df.index.levels[3]), names=df.index.names[1:4]) if return_w: index_w = pd.MultiIndex.from_product( (df.index.levels[1], df.index.levels[2], df.index.levels[3], l_common_binned), names=df.index.names[1:4] + ['b']) with h5py.File(modecoupling, 'r') as f: res = list( starmap( partial(solve_spectra, df, df_theory, df_filter, f, B_2, bin_width, l_common, l_common_binned, compute_nl=compute_nl, return_w=return_w), index)) if return_w: Cls, w_bls = list(map(list, zip(*res))) else: Cls = res df_spectra = pd.concat(Cls, keys=index) del l_common_binned, B_2, df, df_theory, df_filter df_spectra.index.names = index.names + df_spectra.index.names[-2:] del index df_spectra.sort_index(inplace=True) if return_w: df_window = pd.DataFrame(np.concatenate(w_bls, axis=0), index=index_w, columns=l_common) df_window.sort_index(inplace=True) return df_spectra, df_window else: return df_spectra
def get_ks_p_uniform_bootstrap_N(null, N): return np.array(map_parallel(partial(get_ks_p_uniform_bootstrap, null), range(N), processes=20), dtype=float__)
def get_pseudospectra_IO( df_weights, df_nauto, tmask, pmask, tnorm, pnorm, pixel_size, lmax, n_x, h5_weight_basedir, h5_signal_basedir, weight_name, processes, mapcase, name, isim, nullsplit, mode='multithreading', ): result = [] teb_arrays = [] tpweightses = [] SUBSPLITS = ('0', ) if nullsplit is None else ('0', '1') for subsplit in SUBSPLITS: temp = df_weights.reset_index() \ if nullsplit is None else \ df_weights.loc[IDX[nullsplit, subsplit, :]].reset_index(level=2) dates = temp.date.values tpweights = temp[['tweight', 'pweight']].values.T del temp tpweightses.append(tpweights) # reading IQU _get_IQU_IO = partial(get_IQU_IO, n_x, h5_weight_basedir, h5_signal_basedir, weight_name, nullsplit, subsplit, mapcase, name) time = timeit.default_timer() IQUs = map_parallel(_get_IQU_IO, dates, mode=mode, processes=processes) del dates time -= timeit.default_timer() print('IQU IO,{},{}'.format(subsplit, -time)) # 4d-array # 1st dim: I/Q/U at first then T/E/B # 2nd dim: no. of observations # 3/4 dim: the actual array of I/Q/U or T/E/B iqu_array = np.stack(list(map(list, zip(*IQUs)))) del IQUs time = timeit.default_timer() teb_array = iqu_to_teb(iqu_array, tmask, pmask, tnorm, pnorm, pixel_size, n_x) time -= timeit.default_timer() print('IQU to TEB,{},{}'.format(subsplit, -time)) del iqu_array teb_arrays.append(teb_array) time = timeit.default_timer() t_s, t_n, e_s, e_n, b_s, b_n, te_s, te_n, tb_s, tb_n, eb_s, eb_n = pseudospectra_auto_all( pixel_size, lmax, teb_array, tpweights) del tpweights, teb_array time -= timeit.default_timer() print('pseudospectra,{},{}'.format(subsplit, -time)) # in full map case, avoid having nan in index values nullsplit_value = 'full' if nullsplit is None else nullsplit result += [ (('Cl', 'TT', subsplit, nullsplit_value, mapcase, isim), t_s), (('Nl', 'TT', subsplit, nullsplit_value, mapcase, isim), t_n), (('Cl', 'EE', subsplit, nullsplit_value, mapcase, isim), e_s), (('Nl', 'EE', subsplit, nullsplit_value, mapcase, isim), e_n), (('Cl', 'BB', subsplit, nullsplit_value, mapcase, isim), b_s), (('Nl', 'BB', subsplit, nullsplit_value, mapcase, isim), b_n), (('Cl', 'TE', subsplit, nullsplit_value, mapcase, isim), te_s), (('Nl', 'TE', subsplit, nullsplit_value, mapcase, isim), te_n), (('Cl', 'TB', subsplit, nullsplit_value, mapcase, isim), tb_s), (('Nl', 'TB', subsplit, nullsplit_value, mapcase, isim), tb_n), (('Cl', 'EB', subsplit, nullsplit_value, mapcase, isim), eb_s), (('Nl', 'EB', subsplit, nullsplit_value, mapcase, isim), eb_n) ] # cross spectra if nullsplit is not None: subsplit = '2' time = timeit.default_timer() t_s, t_n, e_s, e_n, b_s, b_n, te_s, te_n, tb_s, tb_n, eb_s, eb_n = pseudospectra_cross_all( pixel_size, lmax, teb_arrays[0], tpweightses[0], teb_arrays[1], tpweightses[1], df_nauto[nullsplit]) del teb_arrays, tpweightses time -= timeit.default_timer() print('pseudospectra,{},{}'.format(subsplit, -time)) result += [(('Cl', 'TT', subsplit, nullsplit, mapcase, isim), t_s), (('Nl', 'TT', subsplit, nullsplit, mapcase, isim), t_n), (('Cl', 'EE', subsplit, nullsplit, mapcase, isim), e_s), (('Nl', 'EE', subsplit, nullsplit, mapcase, isim), e_n), (('Cl', 'BB', subsplit, nullsplit, mapcase, isim), b_s), (('Nl', 'BB', subsplit, nullsplit, mapcase, isim), b_n), (('Cl', 'TE', subsplit, nullsplit, mapcase, isim), te_s), (('Nl', 'TE', subsplit, nullsplit, mapcase, isim), te_n), (('Cl', 'TB', subsplit, nullsplit, mapcase, isim), tb_s), (('Nl', 'TB', subsplit, nullsplit, mapcase, isim), tb_n), (('Cl', 'EB', subsplit, nullsplit, mapcase, isim), eb_s), (('Nl', 'EB', subsplit, nullsplit, mapcase, isim), eb_n)] df = pd.DataFrame.from_dict(dict(result)).T del result df.index.names = ('sub_spectra', 'spectra', 'sub_split', 'null_split', 'map_case', 'n') return df
def main( outbasedir, h5_signal_basedir, name, mapcase, nreal, isim, mapsplits, processes=1, no_mpi=False, redirect=True, compress_level=9, weight_name='realmap', lmax=3000, pixel_size=2., # these must be filled in selectionpath=None, maskpath=None, weightdir=None, mode='multithreading', ): df_weights = pd.read_hdf(selectionpath, 'weights') df_nauto = pd.read_hdf(selectionpath, 'n_auto') with h5py.File(str(maskpath), 'r') as f: tmask = f['t'][:] pmask = f['p'][:] tnorm = norm_fft(tmask) pnorm = norm_fft(pmask) n_x = tmask.shape[0] pixel_size *= np.pi / 10800. _get_pseudospectra_IO = partial( get_pseudospectra_wrap, df_weights, df_nauto, tmask, pmask, tnorm, pnorm, pixel_size, lmax, n_x, weightdir, h5_signal_basedir, weight_name, processes, compress_level, outbasedir / mapcase, mapcase, name if nreal is None else name + '{0:03}'.format(isim), isim, redirect=redirect, mode=mode, ) if no_mpi: from dautil.util import map_parallel map_parallel(_get_pseudospectra_IO, mapsplits, processes=processes) else: from mpi4py.futures import MPIPoolExecutor with MPIPoolExecutor() as executor: executor.map(_get_pseudospectra_IO, mapsplits)