def run_many_configs(args=None, sys_argv=None, return_0_if_ok=True, started=None): started = started or datetime.datetime.now() env_cmd_line = Namespace( **{k: v for d in (vars(args), parse_env_vars()) for k, v in d.items()}) logger.info( 'With --config-dir, DASK_CLIENT and DASK_SCHEDULER in config files are ignored' ) dask_client = getattr(env_cmd_line, 'DASK_CLIENT', 'SERIAL') dask_scheduler = getattr(env_cmd_line, 'DASK_SCHEDULER', None) ret_val = 1 with try_finally_log_etime(started) as _: with warnings.catch_warnings(): # scikit-learn has a number # of deprecation warnings for kmeans warnings.simplefilter("ignore") results = [1] with client_context(dask_client, dask_scheduler) as client: kw = { 'args': args, 'sys_argv': sys_argv, 'return_0_if_ok': True, 'client': client, } pipe = partial(_run_one_config_of_many, **kw) fnames = glob.glob(os.path.join(args.config_dir, '*.yaml')) ret_val = max(map(pipe, fnames)) return ret_val
def predict_many(data_source, saved_model_tag=None, ensemble=None, client=None, serialize=None, to_raster=True, elm_predict_path=None): '''See elm.pipeline.Pipeline.predict_many method ''' env = parse_env_vars() elm_predict_path = elm_predict_path or env.get('ELM_PREDICT_PATH') if serialize and elm_predict_path and not os.path.exists(elm_predict_path): os.mkdir(elm_predict_path) pipe_example = ensemble[0][1] ds = data_source.copy() X = ds.pop('X', None) y = ds.pop('y', None) args_list = ds.pop('args_list', None) sampler = ds.pop('sampler', None) dsk = make_samples_dask(X, y, None, pipe_example, args_list, sampler, ds) sample_keys = tuple(dsk) args_list = tuple(itertools.product(sample_keys, ensemble)) keys = [] last_file_name = None for idx, (sample_key, (estimator_tag, estimator)) in enumerate(args_list): name = _next_name('predict_many') predict_tag = '{}-{}'.format(estimator_tag, sample_key) if saved_model_tag: predict_tag += '-' + saved_model_tag dsk[name] = ( _predict_one_sample_one_arg, estimator, serialize, to_raster, predict_tag, elm_predict_path, sample_key, ) keys.append(name) logger.info('Predict {} estimator(s) and {} sample(s) ' '({} combination[s])'.format(len(ensemble), len(sample_keys), len(args_list))) preds = [] if client is None: new = dask.get(dsk, keys) else: new = client.get(dsk, keys) return tuple(itertools.chain.from_iterable(new))
import glob import os from elm.config import parse_env_vars ENV = parse_env_vars() ELM_HAS_EXAMPLES = ENV['ELM_HAS_EXAMPLES'] if ELM_HAS_EXAMPLES: ELM_EXAMPLE_DATA_PATH = ENV['ELM_EXAMPLE_DATA_PATH'] TIF_FILES = glob.glob( os.path.join(ELM_EXAMPLE_DATA_PATH, 'tif', 'L8', '015', '033', 'LC80150332013207LGN00', '*.TIF')) HDF5_FILES = glob.glob( os.path.join(ELM_EXAMPLE_DATA_PATH, 'hdf5', '2016', '01', '01', 'imerg', '*.HDF5')) HDF4_FILES = glob.glob(os.path.join(ELM_EXAMPLE_DATA_PATH, 'hdf4', '*.hdf')) NETCDF_FILES = glob.glob( os.path.join(ELM_EXAMPLE_DATA_PATH, 'netcdf', '*.nc')) else: ELM_EXAMPLE_DATA_PATH = None TIF_FILES = [] HDF5_FILES = [] HDF4_FILES = [] NETCDF_FILES = [] def assertions_on_metadata(meta): required_keys = ('meta', 'band_meta') for key in required_keys: assert key in meta