Example #1
0
def plot_cats_hbv_sim(dbs_dir,
                      water_bal_step_size,
                      full_flag=False,
                      wat_bal_flag=False,
                      show_warm_up_steps_flag=False,
                      n_cpus=1):
    '''Plot hbv simulations for every catchment for every kfold.'''

    cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5'))

    assert cats_dbs

    n_cats = len(cats_dbs)
    n_cpus = min(n_cats, n_cpus)

    const_args = (water_bal_step_size, full_flag, wat_bal_flag,
                  show_warm_up_steps_flag)

    plot_gen = ((cat_db, const_args) for cat_db in cats_dbs)

    if (n_cpus > 1) and (n_cats > 1):
        mp_pool = ProcessPool(n_cpus)
        mp_pool.restart(True)

        print(list(mp_pool.uimap(plot_cat_hbv_sim, plot_gen)))

        mp_pool.clear()
        mp_pool.close()
        mp_pool.join()

    else:
        for plot_args in plot_gen:
            plot_cat_hbv_sim(plot_args)

    return
Example #2
0
def plot_cats_qsims(dbs_dir, n_cpus=1):
    '''Plot discharge simulations for every catchment for every
    kfold using its prm_vecs.'''

    cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5'))

    assert cats_dbs

    n_cats = len(cats_dbs)
    n_cpus = min(n_cats, n_cpus)

    plot_gen = (cat_db for cat_db in cats_dbs)

    if (n_cpus > 1) and (n_cats > 1):
        mp_pool = ProcessPool(n_cpus)
        mp_pool.restart(True)

        print(list(mp_pool.uimap(plot_cat_qsims, plot_gen)))

        mp_pool.clear()
        mp_pool.close()
        mp_pool.join()

    else:
        for plot_args in plot_gen:
            plot_cat_qsims(plot_args)

    return
Example #3
0
def plot_cats_prm_vecs(dbs_dir, n_cpus):
    '''Plot final parameter set from kfold for every catchments along with
    objective function value distribution.
    '''

    cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5'))

    assert cats_dbs

    n_cats = len(cats_dbs)
    n_cpus = min(n_cats, n_cpus)

    n_cpus = min(n_cats, n_cpus)

    opt_res_gen = (cat_db for cat_db in cats_dbs)

    if (n_cpus > 1) and (n_cats > 1):
        mp_pool = ProcessPool(n_cpus)
        mp_pool.restart(True)

        print(list(mp_pool.uimap(plot_cat_prm_vecs, opt_res_gen)))

        mp_pool.clear()
        mp_pool.close()
        mp_pool.join()

    else:
        for opt_res in opt_res_gen:
            plot_cat_prm_vecs(opt_res)

    return
Example #4
0
def plot_cats_vars_errors(dbs_dir, err_var_labs, n_cpus):

    cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5'))

    assert cats_dbs

    n_cats = len(cats_dbs)
    n_cpus = min(n_cats, n_cpus)

    n_cpus = min(n_cats, n_cpus)

    cats_paths_gen = ((cat_db, err_var_labs) for cat_db in cats_dbs)

    if (n_cpus > 1) and (n_cats > 1):
        mp_pool = ProcessPool(n_cpus)
        mp_pool.restart(True)

        print(list(mp_pool.uimap(plot_cat_vars_errors, cats_paths_gen)))

        mp_pool.clear()
        mp_pool.close()
        mp_pool.join()

    else:
        for cat_paths in cats_paths_gen:
            plot_cat_vars_errors(cat_paths)

    return
Example #5
0
def plot_cats_kfold_effs(dbs_dir, hgs_db_path, compare_ann_cyc_flag, n_cpus):
    '''Plot the k-fold efficiency results.'''

    cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5'))

    assert cats_dbs

    n_cats = len(cats_dbs)
    n_cpus = min(n_cats, n_cpus)

    n_cpus = min(n_cats, n_cpus)

    const_args = (compare_ann_cyc_flag, hgs_db_path)
    cats_paths_gen = ((cat_db, const_args) for cat_db in cats_dbs)

    if (n_cpus > 1) and (n_cats > 1):
        mp_pool = ProcessPool(n_cpus)
        mp_pool.restart(True)

        print(list(mp_pool.uimap(plot_cat_kfold_effs, cats_paths_gen)))

        mp_pool.clear()
        mp_pool.close()
        mp_pool.join()

    else:
        for cat_paths in cats_paths_gen:
            plot_cat_kfold_effs(cat_paths)

    return
Example #6
0
def plot_cats_best_prms_1d(dbs_dir, n_cpus):
    '''Plot every best kfold parameter set for all catchments.'''

    cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5'))

    assert cats_dbs
    n_cats = len(cats_dbs)
    n_cpus = min(n_cats, n_cpus)

    cats_paths_gen = (cat_db for cat_db in cats_dbs)

    if (n_cpus > 1) and (n_cats > 1):
        mp_pool = ProcessPool(n_cpus)
        mp_pool.restart(True)

        print(list(mp_pool.uimap(plot_cat_best_prms_1d, cats_paths_gen)))

        mp_pool.clear()
        mp_pool.close()
        mp_pool.join()

    else:
        for cat_paths in cats_paths_gen:
            plot_cat_best_prms_1d(cat_paths)

    return
Example #7
0
    def _prep_anomaly_bjs_mp(anoms_arr, bjs_arr, n_cpus, fig_out_dir):

        assert anoms_arr.shape == bjs_arr.shape

        _idxs = ret_mp_idxs(anoms_arr.shape[1], n_cpus)
        _idxs_list = [_idxs[i:i + 2] for i in range(n_cpus)]

        _anoms_gen = ((anoms_arr[:, _idxs_list[i][0]:_idxs_list[i][1]])
                      for i in range(n_cpus))

        _bjs_gen = ((bjs_arr[:, _idxs_list[i][0]:_idxs_list[i][1]])
                    for i in range(n_cpus))

        mp_pool = ProcessPool(n_cpus)
        mp_pool.restart(True)

        try:
            print(
                list(
                    mp_pool.uimap(Anomaly._plot_anomaly_bjs_cdf, _idxs_list,
                                  _anoms_gen, _bjs_gen,
                                  [fig_out_dir] * n_cpus)))

            mp_pool.clear()

        except Exception as msg:
            mp_pool.close()
            mp_pool.join()
            print('Error in _plot_anomaly_bjs_cdf:', msg)
        return
Example #8
0
class BatchRunnerMP(BatchRunner):
    """ Child class of BatchRunner, extended with multiprocessing support. """
    def __init__(self, model_cls, nr_processes=2, **kwargs):
        """ Create a new BatchRunnerMP for a given model with the given
        parameters.

        Args:
            model_cls: The class of model to batch-run.
            nr_processes: the number of separate processes the BatchRunner
                should start, all running in parallel.
            kwargs: the kwargs required for the parent BatchRunner class
        """
        if not pathos_support:
            raise MPSupport
        super().__init__(model_cls, **kwargs)
        self.pool = ProcessPool(nodes=nr_processes)

    def run_all(self):
        """
        Run the model at all parameter combinations and store results,
        overrides run_all from BatchRunner.
        """
        # register the process pool and init a queue
        job_queue = []

        param_names, param_ranges = zip(*self.variable_parameters.items())
        run_count = count()
        total_iterations = self.iterations
        for param_range in param_ranges:
            total_iterations *= len(param_range)
        with tqdm(total_iterations, disable=not self.display_progress) as pbar:
            for param_values in product(*param_ranges):
                kwargs = dict(zip(param_names, param_values))
                kwargs.update(self.fixed_parameters)

                # make a new process and add it to the queue
                for i in range(self.iterations):
                    job_queue.append(
                        self.pool.uimap(self._run_single_model,
                                        (param_values, ), (next(run_count), ),
                                        (kwargs, )))

            # empty the queue
            results = []
            for task in job_queue:
                for model_vars, agent_vars in list(task):
                    results.append((model_vars, agent_vars))
                pbar.update()

            # store the results
            for model_vars, agent_vars in results:
                if self.model_reporters:
                    for model_key, model_val in model_vars.items():
                        self.model_vars[model_key] = model_val
                if self.agent_reporters:
                    for agent_key, reports in agent_vars.items():
                        self.agent_vars[agent_key] = reports
Example #9
0
class BatchRunnerMP(BatchRunner):
    """ Child class of BatchRunner, extended with multiprocessing support. """

    def __init__(self, model_cls, nr_processes=2, **kwargs):
        """ Create a new BatchRunnerMP for a given model with the given
        parameters.

        Args:
            model_cls: The class of model to batch-run.
            nr_processes: the number of separate processes the BatchRunner
                should start, all running in parallel.
            kwargs: the kwargs required for the parent BatchRunner class
        """
        if not pathos_support:
            raise MPSupport
        super().__init__(model_cls, **kwargs)
        self.pool = ProcessPool(nodes=nr_processes)

    def run_all(self):
        """
        Run the model at all parameter combinations and store results,
        overrides run_all from BatchRunner.
        """
        run_count = count()
        total_iterations, all_kwargs, all_param_values = self._make_model_args()

        # register the process pool and init a queue
        job_queue = []
        with tqdm(total_iterations, disable=not self.display_progress) as pbar:
            for i, kwargs in enumerate(all_kwargs):
                param_values = all_param_values[i]
                for _ in range(self.iterations):
                    # make a new process and add it to the queue
                    job_queue.append(self.pool.uimap(self.run_iteration,
                                                     (kwargs,),
                                                     (param_values,),
                                                     (next(run_count),)))
            # empty the queue
            results = []
            for task in job_queue:
                for model_vars, agent_vars in list(task):
                    results.append((model_vars, agent_vars))
                pbar.update()

            # store the results
            for model_vars, agent_vars in results:
                if self.model_reporters:
                    for model_key, model_val in model_vars.items():
                        self.model_vars[model_key] = model_val
                if self.agent_reporters:
                    for agent_key, reports in agent_vars.items():
                        self.agent_vars[agent_key] = reports
Example #10
0
    def run_all(self, processes=8):
        """
        Run the model at all parameter combinations and store results.

        Args:
            processes (int): number of processes to start
        """
        # Register the process pool and init a queue
        pool = ProcessPool(nodes=processes)
        job_queue = []

        if self.variable_parameters:
            param_names, param_sets = self.generate_samples()
        else:
            param_names = self.param_names
            param_sets = self.param_sets

        run_count = count()
        total_iterations = self.iterations * self.l_param_sets

        with tqdm(total=total_iterations,
                  disable=not self.display_progress) as pbar:
            for param_values in param_sets:
                kwargs = dict(zip(param_names, param_values))
                kwargs.update(self.fixed_parameters)

                # Make a new process and add it to the queue
                for i in range(self.iterations):
                    job_queue.append(
                        pool.uimap(self.iter, (kwargs, ), (param_values, ),
                                   (next(run_count), )))

            # Empty the process queue
            results = []
            for task in job_queue:
                for model_vars, agent_vars in list(task):
                    results.append((model_vars, agent_vars))
                pbar.update()

            for model_vars, agent_vars in results:
                if self.model_reporters:
                    for model_key, model_val in model_vars.items():
                        getattr(self, "model_vars",
                                None)[model_key] = model_val
                if self.agent_reporters:
                    for agent_key, reports in agent_vars.items():
                        getattr(self, "agent_vars", None)[agent_key] = reports
Example #11
0
def demonstrate(games, parallel=True):
    def evaluate(game):
        s = State()
        a = MinimaxAgent(max_depth=6, max_width=6)
        ss = []
        pp = []
        for x, y in game:
            d = a._get_dist(s)
            if len(d) != 1 or (d[0][0] >= 0 and d[0][1] >= 0):
                ss.append(s.featurize())
                pp.append(util.dist_to_prob(d))
            s.move(x, y)
        sys.stdout.write("=")
        sys.stdout.flush()
        return (np.array(ss), np.array(pp))

    if parallel:
        pool = ProcessPool(nodes=7)
        results = list(pool.uimap(evaluate, games))
    else:
        results = list(map(evaluate, games))
    states = np.concatenate(list(map(lambda t: t[0], results)), axis=0)
    probs = np.concatenate(list(map(lambda t: t[1], results)), axis=0)
    return states, probs
Example #12
0
def plot_cats_prm_vecs_evo(dbs_dir,
                           save_obj_flag,
                           save_png_flag,
                           save_gif_flag,
                           anim_secs,
                           n_cpus=1):
    '''Plot the evolution of parameter vectors and convex hull for every
    catchment for every kfold.
    '''

    cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5'))

    assert cats_dbs

    n_cats = len(cats_dbs)
    n_cpus = min(n_cats, n_cpus)

    n_cpus = min(n_cats, n_cpus)

    opt_res_gen = ((cat_db, save_obj_flag, save_png_flag, save_gif_flag,
                    anim_secs) for cat_db in cats_dbs)

    if (n_cpus > 1) and (n_cats > 1):
        mp_pool = ProcessPool(n_cpus)
        mp_pool.restart(True)

        print(list(mp_pool.uimap(plot_cat_prm_vecs_evo, opt_res_gen)))
        mp_pool.clear()
        mp_pool.close()
        mp_pool.join()

    else:
        for opt_res in opt_res_gen:
            plot_cat_prm_vecs_evo(opt_res)

    return
Example #13
0
def main():

    main_dir = Path(
        r'P:\Synchronize\IWS\Testings\fourtrans_practice\multisite_phs_spec_corr'
    )

    os.chdir(main_dir)

    interp_var = 'temp'

    ft_type = 'mag'

    #==========================================================================
    if interp_var == 'temp':
        # MEAN TEMPERATURE
        in_data_file = os.path.join(f'temperature_{ft_type}_spec_df.csv')

        in_vgs_file = os.path.join(r'temperature_cftns.csv')

        in_stns_coords_file = os.path.join(os.path.dirname(in_data_file),
                                           r'temperature_avg_coords.csv')

        out_dir = r'temperature_kriging'
        var_units = u'\u2103'  # 'centigrade'
        var_name = 'temperature'
        out_krig_net_cdf_file = f'kriging_1km_{ft_type}.nc'

        # interpolated values
        # can be int, float, 'min_in'/'max_in' or None
        # min_var_val = 'min_in'
        # max_var_val = 'max_in'
#         min_var_val = None
#         max_var_val = None

#==========================================================================

#==========================================================================
    elif interp_var == 'ppt':
        # PRECIPITATION
        in_data_file = os.path.join(f'precipitation_{ft_type}_spec_df.csv')

        in_vgs_file = os.path.join(r'precipitation_cftns.csv')

        in_stns_coords_file = os.path.join(os.path.dirname(in_data_file),
                                           r'precipitation_coords.csv')

        out_dir = r'precipitation_kriging'
        var_units = 'mm'
        var_name = 'precipitation'
        out_krig_net_cdf_file = f'kriging_1km_{ft_type}.nc'

        # interpolated values
        # can be int, float, 'min_in'/'max_in' or None
        # min_var_val = 'min_in'
        # max_var_val = 'max_in'
#         min_var_val = None
#         max_var_val = None

#==========================================================================
    else:
        raise ValueError(f'Invalid value for interp_var: {interp_var}!')

    out_krig_net_cdf_file = out_krig_net_cdf_file

    # assuming in_drift_raster and in_stns_coords_file and in_bounds_shp_file
    # have the same coordinates system
    # assuming in_drift_rasters_list have the same cell sizes, bounds and NDVs
    # basically they are copies of each other except for the drift values
    in_drift_rasters_list = ([
        r'P:\Synchronize\IWS\QGIS_Neckar\raster\lower_de_gauss_z3_1km.tif'
    ])

    #     in_bounds_shp_file = (
    #         os.path.join(r'P:\Synchronize\IWS\QGIS_Neckar\raster',
    #                      r'taudem_out_spate_rockenau\watersheds.shp'))

    in_bounds_shp_file = (os.path.join(
        r'P:\Synchronize\IWS\QGIS_Neckar\raster\taudem_out_spate_rockenau\watersheds.shp'
    ))

    align_ras_file = in_drift_rasters_list[0]

    out_figs_dir = os.path.join(out_dir, 'krige_figs')

    x_coords_lab = 'X'
    y_coords_lab = 'Y'
    time_dim_lab = 'freq'
    nc_mode = 'w'

    #     min_ppt_thresh = 1.0

    idw_exp = 5
    n_cpus = 1
    buffer_dist = 20e3
    sec_buffer_dist = 2e3

    in_sep = str(';')

    ord_krige_flag = True
    sim_krige_flag = True
    edk_krige_flag = True
    idw_flag = True
    plot_figs_flag = True

    #     ord_krige_flag = False
    sim_krige_flag = False
    edk_krige_flag = False
    idw_flag = False
    plot_figs_flag = False

    os.chdir(main_dir)

    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    if (not os.path.exists(out_figs_dir)) and plot_figs_flag:
        os.mkdir(out_figs_dir)

#     print('min_var_val:', min_var_val)
#     print('max_var_val:', max_var_val)
    print('idw_exp:', idw_exp)
    print('n_cpus:', n_cpus)
    print('nc_mode:', nc_mode)
    print('var_name:', var_name)
    print('out_dir:', out_dir)
    print('in_bounds_shp_file:', in_bounds_shp_file)
    print('out_krig_net_cdf_file:', out_krig_net_cdf_file)

    assert any([ord_krige_flag, sim_krige_flag, edk_krige_flag, idw_flag])

    #==========================================================================
    # read the data frames
    #==========================================================================
    in_data_df = pd.read_csv(in_data_file,
                             sep=in_sep,
                             index_col=0,
                             encoding='utf-8')

    in_vgs_df = pd.read_csv(in_vgs_file,
                            sep=in_sep,
                            index_col=0,
                            encoding='utf-8')

    in_stns_coords_df = pd.read_csv(in_stns_coords_file,
                                    sep=in_sep,
                                    index_col=0,
                                    encoding='utf-8')

    all_stns = in_data_df.columns.intersection(in_stns_coords_df.index)
    assert all_stns.shape[0]

    in_data_df = in_data_df.loc[:, all_stns]
    in_stns_coords_df = in_stns_coords_df.loc[all_stns, :]

    #==========================================================================
    # Get stations that are around/in the bounds_shp only
    #==========================================================================

    bds_vec = ogr.Open(in_bounds_shp_file)
    assert bds_vec

    bds_lyr = bds_vec.GetLayer(0)

    feat_buffs_list = []
    feat_sec_buffs_list = []
    for feat in bds_lyr:  # just to get the names of the catchments
        geom = feat.GetGeometryRef().Clone()
        assert geom

        feat_buffs_list.append(geom.Buffer(buffer_dist))
        feat_sec_buffs_list.append(geom.Buffer(sec_buffer_dist))

    bds_vec.Destroy()

    assert feat_buffs_list and feat_sec_buffs_list

    print(len(feat_buffs_list), 'polygons in the in_bounds_shp_file...')

    fin_stns = []
    for poly in feat_buffs_list:
        for stn in all_stns:
            if stn in fin_stns:
                continue

            curr_pt = cnvt_to_pt(*in_stns_coords_df.loc[stn,
                                                        ['X', 'Y']].values)

            if chk_cntmt(curr_pt, poly):
                fin_stns.append(stn)

    assert fin_stns

    print('%d stations out of %d within buffer zone of in_bounds_shp_file' %
          (len(fin_stns), in_stns_coords_df.shape[0]))

    fin_stns = np.unique(fin_stns)
    in_data_df = in_data_df.loc[:, fin_stns]
    in_stns_coords_df = in_stns_coords_df.loc[fin_stns, :]

    #==========================================================================
    # Read the DEM
    #==========================================================================

    #     if edk_krige_flag:
    #         in_drift_arr_list = []
    #         _rows_list = []
    #         _cols_list = []
    #
    #         for in_drift_raster in in_drift_rasters_list:
    #             in_drift_ds = gdal.Open(in_drift_raster)
    #
    #             assert in_drift_ds, 'GDAL cannot open %s' % in_drift_raster
    #
    #             drift_rows = in_drift_ds.RasterYSize
    #             drift_cols = in_drift_ds.RasterXSize
    #
    #             drift_geotransform = in_drift_ds.GetGeoTransform()
    #
    #             _drift_x_min = drift_geotransform[0]
    #             _drift_y_max = drift_geotransform[3]
    #
    #             drift_band = in_drift_ds.GetRasterBand(1)
    #             drift_ndv = drift_band.GetNoDataValue()
    #
    #             cell_width = drift_geotransform[1]
    #             cell_height = abs(drift_geotransform[5])
    #
    #             _drift_x_max = _drift_x_min + (drift_cols * cell_width)
    #             _drift_y_min = _drift_y_max - (drift_rows * cell_height)
    #
    #             _arr = in_drift_ds.ReadAsArray()
    #
    #             in_drift_arr_list.append(_arr)
    #             _rows_list.append(_arr.shape[0])
    #             _cols_list.append(_arr.shape[1])
    #
    #         assert all(_ == _rows_list[0] for _ in _rows_list), (
    #             'Drift raster have unequal number of rows!')
    #
    #         assert all(_ == _cols_list[0] for _ in _cols_list), (
    #             'Drift raster have unequal number of columns!')

    #==========================================================================
    # Read the bounding shapefile
    #==========================================================================
    #     sf = shp.Reader(in_bounds_shp_file)
    #     polys_list = [i.__geo_interface__ for i in sf.iterShapes()]

    ((fin_x_min, fin_x_max, fin_y_min, fin_y_max),
     cell_width) = get_aligned_shp_bds_and_cell_size(in_bounds_shp_file,
                                                     align_ras_file)

    cell_height = cell_width

    fin_x_min -= 2 * cell_width
    fin_x_max += 2 * cell_width
    fin_y_min -= 2 * cell_height
    fin_y_max += 2 * cell_height

    #     if edk_krige_flag:
    #         assert fin_x_min > _drift_x_min
    #         assert fin_x_max < _drift_x_max
    #         assert fin_y_min > _drift_y_min
    #         assert fin_y_max < _drift_y_max
    #
    #         min_col = int(max(0, (fin_x_min - _drift_x_min) / cell_width))
    #         max_col = int(ceil((fin_x_max - _drift_x_min) / cell_width))
    #
    #         min_row = int(max(0, (_drift_y_max - fin_y_max) / cell_height))
    #         max_row = int(ceil((_drift_y_max - fin_y_min) / cell_height))
    #
    #     else:
    min_col = 0
    max_col = int(ceil((fin_x_max - fin_x_min) / cell_width))

    min_row = 0
    max_row = int(ceil((fin_y_max - fin_y_min) / cell_height))

    #==========================================================================
    # Calculate coordinates at which to krige
    #==========================================================================

    assert 0 <= min_col <= max_col, (min_col, max_col)
    assert 0 <= min_row <= max_row, (min_row, max_row)

    strt_x_coord = fin_x_min + (0.5 * cell_width)
    end_x_coord = strt_x_coord + ((max_col - min_col) * cell_width)

    strt_y_coord = fin_y_max - (0.5 * cell_height)
    end_y_coord = strt_y_coord - ((max_row - min_row) * cell_height)

    krige_x_coords = np.linspace(strt_x_coord, end_x_coord,
                                 (max_col - min_col + 1))

    krige_y_coords = np.linspace(strt_y_coord, end_y_coord,
                                 (max_row - min_row + 1))

    krige_x_coords_mesh, krige_y_coords_mesh = np.meshgrid(
        krige_x_coords, krige_y_coords)

    krige_coords_orig_shape = krige_x_coords_mesh.shape

    #     if plot_figs_flag:
    #         # xy coords for pcolormesh
    #         pcolmesh_x_coords = np.linspace(
    #             fin_x_min, fin_x_max, (max_col - min_col + 1))
    #
    #         pcolmesh_y_coords = np.linspace(
    #             fin_y_max, fin_y_min, (max_row - min_row + 1))
    #
    #         krige_x_coords_plot_mesh, krige_y_coords_plot_mesh = (
    #             np.meshgrid(pcolmesh_x_coords, pcolmesh_y_coords))
    #
    #     else:
    #         krige_x_coords_plot_mesh, krige_y_coords_plot_mesh = None, None

    krige_x_coords_mesh = krige_x_coords_mesh.ravel()
    krige_y_coords_mesh = krige_y_coords_mesh.ravel()

    #     print('\n\n')
    #     print('#' * 10)
    #
    #     _beg_t = timeit.default_timer()
    #
    #     print(krige_x_coords_mesh.shape[0],
    #           'cells to interpolate per step before intersection!')
    #
    fin_cntn_idxs = np.ones(krige_x_coords_mesh.shape[0], dtype=bool)
    #     fin_cntn_idxs = np.zeros(krige_x_coords_mesh.shape[0], dtype=bool)
    #     ogr_pts = np.vectorize(cnvt_to_pt)(krige_x_coords_mesh, krige_y_coords_mesh)
    #
    #     for poly in feat_sec_buffs_list:
    #         curr_cntn_idxs = np.vectorize(chk_cntmt)(ogr_pts, poly)
    #         fin_cntn_idxs = fin_cntn_idxs | curr_cntn_idxs
    #
    #     print(fin_cntn_idxs.sum(),
    #           'cells to interpolate per step after intersection!')
    #
    #     _end_t = timeit.default_timer()
    #     _tot_t = _end_t - _beg_t
    #
    #     print(f'Took {_tot_t:0.4f} seconds!')
    #     print('#' * 10)
    #
    #     krige_x_coords_mesh = krige_x_coords_mesh[fin_cntn_idxs]
    #     krige_y_coords_mesh = krige_y_coords_mesh[fin_cntn_idxs]

    #     if edk_krige_flag:
    #         drift_vals_list = []
    #
    #         krige_cols = np.arange(min_col, max_col + 1, dtype=int)
    #         krige_rows = np.arange(min_row, max_row + 1, dtype=int)
    #
    #         assert krige_x_coords.shape[0] == krige_cols.shape[0]
    #         assert krige_y_coords.shape[0] == krige_rows.shape[0]
    #
    #         (krige_drift_cols_mesh,
    #          krige_drift_rows_mesh) = np.meshgrid(krige_cols, krige_rows)
    #
    #         krige_drift_cols_mesh = krige_drift_cols_mesh.ravel()
    #         krige_drift_rows_mesh = krige_drift_rows_mesh.ravel()
    #
    #         krige_drift_cols_mesh = krige_drift_cols_mesh[fin_cntn_idxs]
    #         krige_drift_rows_mesh = krige_drift_rows_mesh[fin_cntn_idxs]
    #
    #         for _drift_arr in in_drift_arr_list:
    #             _drift_vals = _drift_arr[
    #                 krige_drift_rows_mesh, krige_drift_cols_mesh]
    #
    #             drift_vals_list.append(_drift_vals)
    #
    # #         drift_vals_arr = np.array(drift_vals_list, dtype=float)
    #
    #         drift_df_cols = list(range(len(in_drift_rasters_list)))
    #         in_stns_drift_df = pd.DataFrame(
    #             index=in_stns_coords_df.index,
    #             columns=drift_df_cols,
    #             dtype=float)
    #
    #         for stn in in_stns_drift_df.index:
    #             stn_x = in_stns_coords_df.loc[stn, x_coords_lab]
    #             stn_y = in_stns_coords_df.loc[stn, y_coords_lab]
    #
    #             stn_col = int((stn_x - _drift_x_min) / cell_width)
    #             stn_row = int((_drift_y_max - stn_y) / cell_height)
    #
    #             for col, _arr in zip(drift_df_cols, in_drift_arr_list):
    #                 try:
    #                     _ = _arr[stn_row, stn_col]
    #                     if not np.isclose(drift_ndv, _):
    #                         in_stns_drift_df.loc[stn, col] = _
    #
    #                 except IndexError:
    #                     pass
    #
    #         in_stns_drift_df.dropna(inplace=True)

    #==========================================================================
    # Open NC
    #==========================================================================
    out_nc = nc.Dataset(os.path.join(out_dir, out_krig_net_cdf_file),
                        mode=str(nc_mode))

    if nc_mode == 'w':
        out_nc.set_auto_mask(False)
        out_nc.createDimension(x_coords_lab, krige_x_coords.shape[0])
        out_nc.createDimension(y_coords_lab, krige_y_coords.shape[0])
        out_nc.createDimension(time_dim_lab, in_data_df.shape[0])

        x_coords_nc = out_nc.createVariable(x_coords_lab,
                                            'd',
                                            dimensions=x_coords_lab)

        x_coords_nc[:] = krige_x_coords

        y_coords_nc = out_nc.createVariable(y_coords_lab,
                                            'd',
                                            dimensions=y_coords_lab)

        y_coords_nc[:] = krige_y_coords

        time_nc = out_nc.createVariable(time_dim_lab,
                                        'i8',
                                        dimensions=time_dim_lab)

        time_nc[:] = np.arange(in_data_df.shape[0])

    else:
        raise RuntimeError('Not configured for this option!')

        time_nc = out_nc.variables[time_dim_lab]
        krige_y_coords = y_coords_nc[:]
        krige_x_coords = x_coords_nc[:]

    #==========================================================================
    # MP stuff
    #==========================================================================
    mp_cond = False

    if ((n_cpus > 1) and (in_data_df.shape[0] > (n_cpus + 1))):
        idxs = pd.np.linspace(0,
                              in_data_df.shape[0], (n_cpus) + 1,
                              endpoint=True,
                              dtype=int)

        idxs = np.unique(idxs)
        print('MP idxs:', idxs)

        if idxs.shape[0] == 1:
            idxs = np.concatenate((np.array([0]), idxs))

        mp_cond = True

    else:
        idxs = [0, in_data_df.shape[0]]

    #==========================================================================
    # Krige
    #==========================================================================
    if ord_krige_flag:
        print('\n\n')
        print('#' * 10)

        _beg_t = timeit.default_timer()

        print('Ordinary Kriging...')

        if 'OK' not in out_nc.variables:
            ok_nc = out_nc.createVariable('OK',
                                          'd',
                                          dimensions=(time_dim_lab,
                                                      y_coords_lab,
                                                      x_coords_lab),
                                          fill_value=False)

        else:
            ok_nc = out_nc.variables['OK']

        ok_vars_gen = ((in_data_df.iloc[idxs[i]:idxs[i + 1]],
                        in_stns_coords_df, in_vgs_df.loc[ft_type][0],
                        krige_x_coords_mesh, krige_y_coords_mesh,
                        krige_coords_orig_shape, (idxs[i],
                                                  idxs[i + 1]), fin_cntn_idxs)
                       for i in range(n_cpus))

        if mp_cond:
            ok_krige_flds = np.full(
                (in_data_df.shape[0], krige_coords_orig_shape[0],
                 krige_coords_orig_shape[1]),
                np.nan,
                dtype=np.float32)

            mp_ress = []

            try:
                mp_pool = ProcessPool(n_cpus)
                mp_pool.restart(True)

                mp_ress = list(mp_pool.uimap(ordinary_kriging, ok_vars_gen))

                mp_pool.clear()

            except Exception as msg:
                mp_pool.close()
                mp_pool.join()
                print('Error in ordinary_kriging:', msg)

            for mp_res in mp_ress:
                if (len(mp_res) != 3) and (not isinstance(list)):
                    print('\n', mp_res, '\n')
                    continue

                [strt_index, end_index, sub_ok_krige_flds] = mp_res
                ok_krige_flds[strt_index:end_index] = sub_ok_krige_flds

                # free memory
                mp_res[2], sub_ok_krige_flds = None, None

            ok_nc[:] = ok_krige_flds

        else:
            [strt_index, end_index,
             ok_krige_flds] = ordinary_kriging(next(ok_vars_gen))

            ok_nc[:] = ok_krige_flds

        ok_nc.units = var_units
        ok_nc.standard_name = var_name + ' (ordinary kriging)'

        ok_krige_flds = None

        _end_t = timeit.default_timer()
        _tot_t = _end_t - _beg_t

        print(f'Took {_tot_t:0.4f} seconds!')
        print('#' * 10)


#     if sim_krige_flag:
#         print('\n\n')
#         print('#' * 10)
#
#         _beg_t = timeit.default_timer()
#
#         print('Simple Kriging...')
#         if 'SK' not in out_nc.variables:
#             sk_nc = out_nc.createVariable(
#                 'SK',
#                 'd',
#                 dimensions=(time_dim_lab, y_coords_lab, x_coords_lab),
#                 fill_value=False)
#
#         else:
#             sk_nc = out_nc.variables['SK']
#
#         sk_vars_gen = ((in_data_df.iloc[idxs[i]:idxs[i + 1]],
#                         in_stns_coords_df,
#                         in_vgs_df.iloc[idxs[i]:idxs[i + 1]],
#                         min_ppt_thresh,
#                         var_name,
#                         krige_x_coords_mesh,
#                         krige_y_coords_mesh,
#                         krige_coords_orig_shape,
#                         min_var_val,
#                         max_var_val,
#                         (idxs[i], idxs[i + 1]),
#                         plot_figs_flag,
#                         krige_x_coords_plot_mesh,
#                         krige_y_coords_plot_mesh,
#                         var_units,
#                         polys_list,
#                         out_figs_dir,
#                         fin_cntn_idxs) for i in range(n_cpus))
#
#         if mp_cond:
#             sk_krige_flds = np.full(
#                 (in_data_df.shape[0],
#                  krige_coords_orig_shape[0],
#                  krige_coords_orig_shape[1]),
#                 np.nan,
#                 dtype=np.float32)
#
#             mp_ress = []
#
#             try:
#                 mp_pool = ProcessPool(n_cpus)
#                 mp_pool.restart(True)
#
#                 mp_ress = list(mp_pool.uimap(simple_kriging, sk_vars_gen))
#
#                 mp_pool.clear()
#
#             except Exception as msg:
#                 mp_pool.close()
#                 mp_pool.join()
#                 print('Error in simple_kriging:', msg)
#
#             for mp_res in mp_ress:
#                 if (len(mp_res) != 3) and (not isinstance(list)):
#                     print('\n', mp_res, '\n')
#                     continue
#
#                 [strt_index, end_index, sub_sk_krige_flds] = mp_res
#                 sk_krige_flds[strt_index:end_index] = sub_sk_krige_flds
#
#                 # free memory
#                 mp_res[2], sub_sk_krige_flds = None, None
#
#             sk_nc[:] = sk_krige_flds
#
#         else:
#             [strt_index,
#              end_index,
#              sk_krige_flds] = simple_kriging(next(sk_vars_gen))
#
#             sk_nc[:] = sk_krige_flds
#
#         sk_nc.units = var_units
#         sk_nc.standard_name = var_name + ' (simple kriging)'
#
#         sk_krige_flds = None
#
#         _end_t = timeit.default_timer()
#         _tot_t = _end_t - _beg_t
#
#         print(f'Took {_tot_t:0.4f} seconds!')
#         print('#' * 10)
#
#     if edk_krige_flag:
#         print('\n\n')
#         print('#' * 10)
#
#         _beg_t = timeit.default_timer()
#
#         print('External Drift Kriging...')
#         if 'EDK' not in out_nc.variables:
#             edk_nc = out_nc.createVariable(
#                 'EDK',
#                 'd',
#                 dimensions=(time_dim_lab, y_coords_lab, x_coords_lab),
#                 fill_value=False)
#
#         else:
#             edk_nc = out_nc.variables['EDK']
#
#         edk_vars_gen = ((in_data_df.iloc[idxs[i]:idxs[i + 1]],
#                          in_stns_drift_df,
#                          in_stns_coords_df,
#                          in_vgs_df.iloc[idxs[i]:idxs[i + 1]],
#                          min_ppt_thresh,
#                          var_name,
#                          krige_x_coords_mesh,
#                          krige_y_coords_mesh,
#                          drift_vals_arr,
#                          krige_coords_orig_shape,
#                          drift_ndv,
#                          min_var_val,
#                          max_var_val,
#                          (idxs[i], idxs[i + 1]),
#                          plot_figs_flag,
#                          krige_x_coords_plot_mesh,
#                          krige_y_coords_plot_mesh,
#                          var_units,
#                          polys_list,
#                          out_figs_dir,
#                          fin_cntn_idxs) for i in range(n_cpus))
#
#         if mp_cond:
#             edk_krige_flds = np.full(
#                 (in_data_df.shape[0],
#                  krige_coords_orig_shape[0],
#                  krige_coords_orig_shape[1]),
#                 np.nan,
#                 dtype=np.float32)
#
#             mp_ress = []
#
#             try:
#                 mp_pool = ProcessPool(n_cpus)
#                 mp_pool.restart(True)
#
#                 mp_ress = list(mp_pool.uimap(
#                     external_drift_kriging, edk_vars_gen))
#
#                 mp_pool.clear()
#
#             except Exception as msg:
#                 mp_pool.close()
#                 mp_pool.join()
#                 print('Error in external_drift_kriging:', msg)
#
#             for mp_res in mp_ress:
#                 if (len(mp_res) != 3) and (not isinstance(list)):
#                     print('\n', mp_res, '\n')
#                     continue
#
#                 [strt_index, end_index, sub_edk_krige_flds] = mp_res
#                 edk_krige_flds[strt_index:end_index] = sub_edk_krige_flds
#
#                 print('sub_min:', np.nanmin(sub_edk_krige_flds))
#                 print('sub_max:', np.nanmax(sub_edk_krige_flds))
#
#                 # free memory
#                 mp_res[2], sub_edk_krige_flds = None, None
#
#         else:
#             [strt_index,
#              end_index,
#              edk_krige_flds] = external_drift_kriging(next(edk_vars_gen))
#
#         edk_nc[:] = edk_krige_flds
#
#         edk_nc.units = var_units
#         edk_nc.standard_name = var_name + ' (external drift kriging)'
#
#         edk_krige_flds = None
#
#         _end_t = timeit.default_timer()
#         _tot_t = _end_t - _beg_t
#
#         print(f'Took {_tot_t:0.4f} seconds!')
#         print('#' * 10)
#
#     #==========================================================================
#     # IDW
#     #==========================================================================
#     if idw_flag:
#         print('\n\n')
#         print('#' * 10)
#
#         _beg_t = timeit.default_timer()
#
#         print('Inverse Distance Weighting...')
#         if 'IDW' not in out_nc.variables:
#             idw_nc = out_nc.createVariable(
#                 'IDW',
#                 'd',
#                  dimensions=(time_dim_lab, y_coords_lab, x_coords_lab),
#                  fill_value=False)
#
#         else:
#             idw_nc = out_nc.variables['IDW']
#
#         idw_vars_gen = ((in_data_df.iloc[idxs[i]:idxs[i + 1]],
#                         in_stns_coords_df,
#                         min_ppt_thresh,
#                         idw_exp,
#                         var_name,
#                         krige_x_coords_mesh,
#                         krige_y_coords_mesh,
#                         krige_coords_orig_shape,
#                         min_var_val,
#                         max_var_val,
#                         (idxs[i], idxs[i + 1]),
#                         plot_figs_flag,
#                         krige_x_coords_plot_mesh,
#                         krige_y_coords_plot_mesh,
#                         var_units,
#                         polys_list,
#                         out_figs_dir,
#                         fin_cntn_idxs) for i in range(n_cpus))
#
#         if mp_cond:
#             idw_flds = np.full(
#                 (in_data_df.shape[0],
#                  krige_coords_orig_shape[0],
#                  krige_coords_orig_shape[1]),
#                 np.nan,
#                 dtype=np.float32)
#
#             mp_ress = []
#             try:
#                 mp_pool = ProcessPool(n_cpus)
#                 mp_pool.restart(True)
#
#                 mp_ress = list(mp_pool.uimap(
#                     inverse_distance_wtng, idw_vars_gen))
#
#                 mp_pool.clear()
#
#             except Exception as msg:
#                 mp_pool.close()
#                 mp_pool.join()
#                 print('Error in inverse_distance_wtng:', msg)
#
#             for mp_res in mp_ress:
#                 if (len(mp_res) != 3) and (not isinstance(list)):
#                     print('\n', mp_res, '\n')
#                     continue
#
#                 [strt_index, end_index, sub_idw_flds] = mp_res
#                 idw_flds[strt_index:end_index] = sub_idw_flds
#
#                 # free memory
#                 mp_res[2], sub_idw_flds = None, None
#
#         else:
#             [strt_index,
#              end_index,
#              idw_flds] = inverse_distance_wtng(next(idw_vars_gen))
#
#         idw_nc[:] = idw_flds
#
#         idw_nc.units = var_units
#         idw_nc.standard_name = (
#             var_name + ' (IDW (exp=%0.3f))' % float(idw_exp))
#
#         idw_flds = None
#
#         _end_t = timeit.default_timer()
#         _tot_t = _end_t - _beg_t
#
#         print(f'Took {_tot_t:0.4f} seconds!')
#         print('#' * 10)

    out_nc.Author = 'Faizan IWS Uni-Stuttgart'
    out_nc.Source = out_nc.filepath()
    out_nc.close()
    return
Example #14
0
def graph_sampling(graph: FSN, strategy: Optional[str] = "MetaDiff",
                   n_jobs: Optional[int] = 4,
                   use_cache: Optional[bool] = True, **kwargs) \
        -> List[List[Union[str, int]]]:
    """
    Sampling the sequences of nodes from FSN w.r.t. chosen strategy
    Parameters
    ----------
    graph : FSN object
        Graph to be processed
    strategy : str, default is 'MetaDiff'
        Walking strategy to be used
    n_jobs : int, default is 4
        Number of workers to be created in parallel pool
    use_cache : bool, default is True
        To use the previously cached files

    Returns
    -------
    Sampled sequences of BP nodes
    """
    set_new_config(**kwargs)
    local_logger = logging.getLogger(f"{__name__}")
    if use_cache and os.path.isfile(CONFIG.WORK_FOLDER[0] + "sampled_sequences_cached.pkl"):
        local_logger.info("Loading sequences from cache... wait...")
        try:
            with open(CONFIG.WORK_FOLDER[0] + "sampled_sequences_cached.pkl", "rb") as file:
                res = pickle.load(file)
            local_logger.info(f"Total number of raw sampled sequences is {len(res)}")
            local_logger.info(f"Average length of sequences is {sum(map(len, res)) / float(len(res))}")
            return res
        except FileNotFoundError:
            local_logger.info("File not found... Recalculate \n")
            pass
        except Exception as e:
            local_logger.error(f"Unexpected error: {e}")
    local_logger.info("Sampling sequences... wait...")
    max_processes = max(n_jobs, os.cpu_count())
    global walk
    if strategy in strategy_to_class.keys():
        walk = strategy_to_class[strategy](G=graph, walk_length=CONFIG.WALKS_LENGTH,
                                           direction=CONFIG.DIRECTION,
                                           pressure=CONFIG.PRESSURE, allow_back=CONFIG.ALLOW_BACK)
    else:
        raise KeyError(
            f"The given strategy {strategy} is unknown. The following ones are implemented: {strategy_to_class.keys()}")
    sampling_pool = ProcessPool(nodes=max_processes)
    local_logger.info("Created a Pool with " + str(max_processes) + " processes ")
    # required to restart pool to update CONFIG inside the parallel part
    sampling_pool.terminate()
    sampling_pool.restart()
    BPs = graph.get_BPs()
    n_BPs = len(BPs)
    sampled = list()
    try:
        with tqdm(total=n_BPs) as pbar:
            for i, res in enumerate(sampling_pool.uimap(wrappedWalk, BPs)):
                sampled.append(res)
                pbar.update()
    except KeyboardInterrupt:
        print('Got ^C while pool mapping, terminating the pool')
        sampling_pool.terminate()
    res = list(itertools.chain(*sampled))
    sampling_pool.terminate()
    sampling_pool.restart()
    local_logger.info("Cashing sampled sequences!")
    if use_cache:
        with open(CONFIG.WORK_FOLDER[0] + "sampled_sequences_cached.pkl", "wb") as file:
            pickle.dump(res, file)
    local_logger.info(f"Total number of raw sampled sequences is {len(res)}")
    local_logger.info(f"Average length of sequences is {sum(map(len, res)) / float(len(res))}")
    return res
Example #15
0
    def get_stn_comb_freqs(self, obs_vals_df):

        sim_beg_time = default_timer()

        n_extra_cpus_per_comb = self._n_cpus_extra // self._n_cpus

        if n_extra_cpus_per_comb:
            sim_chunks_idxs = ret_mp_idxs(self._n_sims, n_extra_cpus_per_comb)

            sub_mp_pool = ProcessPool(n_extra_cpus_per_comb)

            self._vb = False

        else:
            sim_chunks_idxs = np.array([0, self._n_sims])

            sub_mp_pool = None

        sim_chunks_idxs[-1] += 1  # for simulation zero

        sim_chunk_gen = ((
            obs_vals_df,
            (sim_chunks_idxs[i], sim_chunks_idxs[i + 1]),)

            for i in range(sim_chunks_idxs.shape[0] - 1))

        if sub_mp_pool is not None:
            list(sub_mp_pool.uimap(self._get_stn_comb_freqs, sim_chunk_gen))
            sub_mp_pool.clear()

        else:
            list(map(self._get_stn_comb_freqs, sim_chunk_gen))

        if self._vb_old:
            if self._vb_old and not self._vb:
                print_sl()

            print(
                f'INFO: Finished computing sub-station combinations in '
                f'{default_timer() - sim_beg_time:0.3f} seconds.')

            print_el()

        if self._save_sim_cdfs_flag or self._save_sim_acorrs_flag:
            if self._vb_old:
                if self._vb_old and not self._vb:
                    print_sl()

                print(
                    f'INFO: Computing/writing extra sub-station '
                    f'information to HDF5...')

            comb_str = str(tuple(obs_vals_df.columns))

            stats_gen = ((
                obs_vals_df.columns[i], comb_str,)

                for i in range(obs_vals_df.shape[1]))

            if sub_mp_pool is not None:
                list(sub_mp_pool.uimap(self._write_stats_to_hdf5, stats_gen))
                sub_mp_pool.clear()

            else:
                list(map(self._write_stats_to_hdf5, stats_gen))

            if self._vb_old:
                print(f'INFO: Done!')

                print_el()

        if sub_mp_pool is not None:
            sub_mp_pool.join()
            sub_mp_pool = None
        return
'''
Variation uses pathos dependency

Windows OS: Hangs
Mac OS:
Linux:
Debian (unclear because windows app but operated in same manner)

Cloud-based:
Repl.it: Works
Ideone.com: Fails-multiprocess error
'''

from multiprocess import freeze_support
from pathos.multiprocessing import ProcessPool

if __name__ == "__main__":
    freeze_support()

    pool = ProcessPool(nodes=4)
    results = pool.uimap(pow, [1, 2, 3, 4], [5, 6, 7, 8])
    print("...")
    print(list(results))
Example #17
0
                        out_figs_dir, fin_cntn_idxs) for i in range(n_cpus))

        if mp_cond:
            ok_krige_flds = np.full(
                (fin_date_range.shape[0], krige_coords_orig_shape[0],
                 krige_coords_orig_shape[1]),
                np.nan,
                dtype=np.float32)

            mp_ress = []

            try:
                mp_pool = ProcessPool(n_cpus)
                mp_pool.restart(True)

                mp_ress = list(mp_pool.uimap(ordinary_kriging, ok_vars_gen))

                mp_pool.clear()

            except Exception as msg:
                mp_pool.close()
                mp_pool.join()
                print('Error in ordinary_kriging:', msg)

            for mp_res in mp_ress:
                if (len(mp_res) != 3) and (not isinstance(list)):
                    print('\n', mp_res, '\n')
                    continue

                [strt_index, end_index, sub_ok_krige_flds] = mp_res
                ok_krige_flds[strt_index:end_index] = sub_ok_krige_flds
Example #18
0
        else:
            run_length += 1

            if run_length >= RUN_LENGTH_TO_SWITCH:
                # Switch label we're applying and reset counter
                current = SIGNIFICANT if (
                    current == NOT_SIGNIFCANT) else NOT_SIGNIFCANT
                run_length = 0

        res[i] = current

    return res


if __name__ == "__main__":
    # Batch process + multiprocessing
    from tqdm import tqdm

    from pathos.multiprocessing import ProcessPool
    pool = ProcessPool(nodes=3)
    job_queue = []

    for trial_num in range(1, 256):
        job_queue.append(pool.uimap(process_trial, (trial_num, )))

    # Code block to process R & p values for all trials
    with tqdm(total=255, unit='trials') as pbar:
        for task in job_queue:
            list(task)
            pbar.update()
Example #19
0
                f.write(text)
        return url, num_pages
    except KeyboardInterrupt:
        print('Exiting')
        raise KeyboardInterrupt
    except:
        traceback.print_exc()

def get_number_pages(soup):
    tds = soup.findAll('td', class_='vbmenu_control')
    for td in tds:
        text = td.text.strip()
        if text.startswith('Page'):
            tokens = text.split()
            return int(tokens[-1])
    return 1

def rest():
    time.sleep(random.random() * 2)


if __name__ == '__main__':
    pool = Pool(nodes = NUM_PROC)
    with open(input_file) as f:
        links = (row['link'] for row in csv.DictReader(f))
        results = pool.uimap(scrape_thread, links)
        for r in results:
            print(r)
            rest()

Example #20
0
                    'links': urls
                }
                json.dump(data, f, indent=4, sort_keys=True)
            add_completed_url(url)
            return True
    except:
        print('Problem while parsing: {}'.format(url))
        traceback.print_exc()
    add_problem_url(url)
    return False


if __name__ == '__main__':
    total_url_count = len(URLS)
    completed_urls_count = len(COMPLETED_URLS)
    problem_urls_count = len(PROBLEM_URLS)
    preprocessed_count = completed_urls_count + problem_urls_count
    unprocessed_urls = (URLS - COMPLETED_URLS) - PROBLEM_URLS
    pool = Pool(nodes=NUM_PROC)
    results = pool.uimap(get_content, unprocessed_urls)
    for i, success in enumerate(results):
        i += preprocessed_count
        sys.stderr.write('\rdone {0:%} ({1:d}/{2:d}) bad:{3}'.format(
            i / total_url_count, i, total_url_count, problem_urls_count))
        sys.stderr.flush()

        if success:
            completed_urls_count += 1
        else:
            problem_urls_count += 1
Example #21
0
class SuperPool:
    def __init__(self, n_cpu=-1):
        """Process pool for applying functions multi-threaded with progress bars.
        Arguments:
        n_cpu -- The number of processes to spawn. Defaults to the number of threads (logical cores) on your system.
        Usage:
        >>> pool = mlc.SuperPool()  # By default, the cpu count is used
        >>> def f(x):
        ...     return x ** 2
        >>> res = pool.map(f, range(1000))  # Apply function f to every value in y
        [mlcrate] 8 CPUs: 100%|████████████████████████| 1000/1000 [00:00<00:00, 1183.78it/s]
        """
        from multiprocessing import cpu_count
        from pathos.multiprocessing import ProcessPool
        import tqdm

        self.tqdm = tqdm

        if n_cpu == -1:
            n_cpu = cpu_count()

        self.n_cpu = n_cpu
        self.pool = ProcessPool(n_cpu)

    def __del__(self):
        self.pool.close()

    def map(self, func, array, chunksize=16, description=""):
        """Map a function over array using the pool and return [func(a) for a in array].
        Arguments:
        func -- The function to apply. Can be a lambda function
        array -- Any iterable to which the function should be applied over
        chunksize (default: 16) -- The size of a "chunk" which is sent to a CPU core for processing in one go. Larger values should speed up processing when using very fast functions, while smaller values will give a more granular progressbar.
        description (optional) -- Text to be displayed next to the progressbar.
        Returns:
        res -- A list of values returned from the function.
        """
        res = []

        def func_tracked(args):
            x, i = args
            return func(x), i

        array_tracked = zip(array, range(len(array)))

        desc = "{} CPUs{}".format(
            self.n_cpu, " - {}".format(description) if description else "")
        for out in self.tqdm.tqdm(
                self.pool.uimap(func_tracked,
                                array_tracked,
                                chunksize=chunksize),
                total=len(array),
                desc=desc,
                smoothing=0.05,
        ):
            res.append(out)

        # Sort based on i but return only the actual function result
        actual_res = [r[0] for r in sorted(res, key=lambda r: r[1])]

        return actual_res

    def exit(self):
        """Close the processes and wait for them to clean up."""
        self.pool.close()
        self.pool.join()

print('Beginning... ')
full_df = pd.read_csv('../../../../fundamentals.csv',
                      low_memory=False,
                      header=[0, 1])
print('csv in memory')

metrics = set([x[1] for x in list(full_df)[1:]])
date_col = list(full_df)[0]

pool = Pool(nodes=NPROC)
func_args = list(metrics)
print('finished creating args')
try:
    outputs = pool.uimap(process_fundamental, func_args)
except Exception as e:
    print(e)

for metric, data in zip(func_args, outputs):
    fundamentals[metric] = data

#TODO: Export data into db for faster access
#models.Base.metadata.create_all(db.engine)
session = db.create_session()
with open('universe.csv') as f:
    data = f.read().replace("'", '').replace(' ', '').replace('\n',
                                                              '').split(',')
universe = data
for fundamental in func_args:
    current = fundamentals[fundamental]
Example #23
0
class WorkManager(object):
    """ Class to in charge of managing the tasks and distributing them to
        the workers. They can be local (using other cores) or remote
        using other nodes in the local cluster """
    def __init__(self, ncpus='autodetect', ppservers=None, silent=False):

        if ncpus == 'autodetect':
            from pathos.helpers import cpu_count
            self.ncpus = cpu_count()
        else:
            self.ncpus = ncpus
        if ppservers:
            self._ppservers = ppservers
            self.sessions = [ppServer(srv) for srv in ppservers]
            self.ppservers = tuple([i.local_server for i in self.sessions])
            from pathos.parallel import ParallelPool as PPPool
            self.pool = PPPool(ncpus=self.ncpus, ppservers=self.ppservers)
            self.mode = 'cluster'
            from pathos.parallel import stats as pp_stats
            self.pp_stats = pp_stats
        else:
            from pathos.multiprocessing import ProcessPool as MPPool
            self.pool = MPPool(self.ncpus)
            self.mode = 'multicore'
        self.stats = {}
        self.silent = silent

    def __del__(self):
        del self.pool

    def process(self, task, items, timeout=90000):
        if not isinstance(task, Task):
            raise TypeError("task argument needs to be an 'Task' instance")
        # --- Call the Local initialialization
        task.initializeLocal()
        # --- Schedule all the jobs ....
        if self.mode == 'cluster':

            from ostap.utils.progress_bar import ProgressBar
            with ProgressBar(max_value=len(items), silent=self.silent) as bar:

                jobs = self.pool.uimap(_ppfunction,
                                       zip([task for i in items], items))

                ##jobs = [self.server.submit(_prefunction, (_ppfunction, task, item), (), ('ROOT','Ostap.ParallelPathos')) for item in items]
                ##jobs = [self.server.submit(_prefunction, (_ppfunction, task, item), (), ('Ostap.Parallel','time')) for item in items]
                ##jobs = [self.server.submit(_prefunction, (_ppfunction, task, item), (_ppfunction,), ('Ostap','time')) for item in items]
                for result, stat in jobs:
                    bar += 1
                    task._mergeResults(result)
                    self._mergeStatistics(stat)

            self._printStatistics()
            self.pp_stats()

        elif self.mode == 'multicore':

            start = time.time()
            from ostap.utils.progress_bar import ProgressBar
            with ProgressBar(max_value=len(items), silent=self.silent) as bar:
                jobs = self.pool.uimap(_ppfunction,
                                       zip([task for i in items], items))
                for result, stat in jobs:
                    bar += 1
                    task._mergeResults(result)
                    self._mergeStatistics(stat)
            end = time.time()

            self._printStatistics()
            logger.info('Time elapsed since server creation %f' %
                        (end - start))
        # --- Call the Local Finalize
        task.finalize()

    def _printStatistics(self):
        njobs = 0
        for stat in self.stats.values():
            njobs += stat.njob
            logger.info('Job execution statistics:')
            logger.info(
                'job count | % of all jobs | job time sum | time per job | job server'
            )
            for name, stat in self.stats.items():
                logger.info(
                    '       %d |        %6.2f |     %8.3f |    %8.3f | %s' %
                    (stat.njob, 100. * stat.njob / njobs, stat.time,
                     stat.time / stat.njob, name))

    def _mergeStatistics(self, stat):
        if stat.name not in self.stats: self.stats[stat.name] = Statistics()
        s = self.stats[stat.name]
        s.time += stat.time
        s.njob += 1
Example #24
0
def plot_cats_prms_transfer_perfs(dbs_dir, n_cpus=1):
    '''Plot catchments performances' by using parameters from other
    catchment.
    '''

    cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5'))

    assert cats_dbs

    n_cats = len(cats_dbs)
    n_cpus = min(n_cats, n_cpus)

    kf_prms_dict = {}
    cats_vars_dict = {}
    for cat_db in cats_dbs:
        with h5py.File(cat_db, 'r') as db:
            kfolds = db['data'].attrs['kfolds']
            cat = db.attrs['cat']

            cv_flag = db['data'].attrs['cv_flag']

            if cv_flag:
                print('plot_prm_trans_perfs not possible with cv_flag!')
                return

            f_var_infos = db['cdata/aux_var_infos'][...]
            prms_idxs = db['cdata/use_prms_idxs'][...]
            f_vars = db['cdata/aux_vars'][...]
            prms_flags = db['cdata/all_prms_flags'][...]
            bds_arr = db['cdata/bds_arr'][...]

            cat_vars_dict = {}
            cat_vars_dict['f_var_infos'] = f_var_infos
            cat_vars_dict['prms_idxs'] = prms_idxs
            cat_vars_dict['f_vars'] = f_vars
            cat_vars_dict['prms_flags'] = prms_flags
            cat_vars_dict['bds_arr'] = bds_arr

            cats_vars_dict[cat] = cat_vars_dict

            for i in range(1, kfolds + 1):
                kf_str = f'kf_{i:02d}'
                cd_db = db[f'calib/{kf_str}']

                opt_prms = cd_db['opt_prms'][...]

                if i not in kf_prms_dict:
                    kf_prms_dict[i] = {}

                kf_prms_dict[i][cat] = {}

                kf_prms_dict[i][cat]['opt_prms'] = opt_prms

    const_args = (kf_prms_dict, cats_vars_dict)
    plot_gen = ((cat_db, const_args) for cat_db in cats_dbs)

    if (n_cpus > 1) and (n_cats > 1):
        mp_pool = ProcessPool(n_cpus)
        mp_pool.restart(True)

        print(list(mp_pool.uimap(plot_cat_prms_transfer_perfs, plot_gen)))

        mp_pool.clear()
        mp_pool.close()
        mp_pool.join()

    else:
        for plot_args in plot_gen:
            plot_cat_prms_transfer_perfs(plot_args)

    return
Example #25
0
            return int(td.text.split()[-1])
    return 1

def save_html(url):
    fpath = forum_pages_path.format(url.replace('https://www.', '').replace('/', '_'))
    if os.path.exists(fpath):
        print('Skipping {}, already saved.'.format(url))
        return
    try:
        time.sleep(random() * 2)
        text = requests.get(url).text
        with open(fpath, 'wt') as f:
            f.write(text)
        print('Saved {}'.format(url))
    except:
        traceback.print_exc()
        print('Problem when trying to save:', url)

if __name__ == '__main__':
    pool = Pool(nodes = NUM_PROC)
    for row in forum_data:
        url, title, categories = row['url'], row['title'], row['categories']
        soup = bsoup(requests.get(url).text, features = 'html5')
        print('Total pages for {}: {}'.format(url, get_total_pages(soup)))
        total_pages = get_total_pages(soup)
        forum_id = get_forum_id(url)
        urls = [base_forum_url.format(forum_id=forum_id, page_num=page_num) for page_num in range(1, total_pages + 1)]
        results = pool.uimap(save_html, urls)
        list(results)
        del soup
Example #26
0
    def run_all(self, processors=1, iseed=1):
        """Run the model for all trials in the designed experiment and store results.

        Model constructor is assumed to take args (seed, collect_stepwise_data,
        trial kwargs).

        Args:
            processors (int, default=1): Number of cpu cores to use for batch run.
            iseed (int, default=1): Initial seed for replication 1 of each trial.
                Seeds for subsequent replications will be PNRG by this class.

        """
        pool = ProcessPool(nodes=processors)
        job_queue = []

        # Generator for initial model seeds. Models use these seeds to manage
        # their own RNGs.
        brng = PCG64(iseed)
        randomgen = RandomGenerator(brng)

        param_names = self.design.columns
        param_names = param_names[(param_names != 'replications')
                                  & (param_names != 'Trial')]

        total_iterations = self.design.replications.sum()

        self.manifest = []  # Records what seed was used where

        for row in self.design.itertuples():
            kwargs = {key: getattr(row, key) for key in param_names}

            # Reset model seed generator for next design point
            brng.seed(iseed)

            for rep in range(1, row.replications + 1):
                # Advance model seed for next replication
                model_seed = randomgen.randint(10000000)

                model_key = (
                    row.Trial,
                    rep,
                )
                self.manifest.append((model_key, model_seed))
                job_queue.append(
                    pool.uimap(self._run_single_replication, (model_seed, ),
                               (kwargs, ), (model_key, )))

        with tqdm(total=total_iterations,
                  desc='Total',
                  unit='dp',
                  disable=not self.display_progress) as pbar_total:
            # empty the queue
            results = []
            for task in job_queue:
                for model_vars, agent_vars, stepwise_vars in list(task):
                    results.append((model_vars, agent_vars, stepwise_vars))
                pbar_total.update()

        if self.data_handler:
            return  # Results already stored to database, nothing more to record

        # store the results in batchrunner
        # FUTURE: rework this module to only support external data_handler.
        # Rationale: Best practice to treat each replication atomically. Key
        # benefit of having all replications in memory is to do experiment-wide
        # analysis, and a data analysis module can read in all per-replication
        # files.
        for model_vars, agent_vars, stepwise_vars in results:
            if self.model_reporters:
                for model_key, model_val in model_vars.items():
                    self.model_vars[model_key] = model_val
            if self.agent_reporters:
                for agent_key, reports in agent_vars.items():
                    self.agent_vars[agent_key] = reports
            if self.collect_stepwise_data:
                for stepwise_key, stepwise_val in stepwise_vars.items():
                    self.stepwise_vars[stepwise_key] = stepwise_val