def main(config_filename): logger.info('retrieve_from_mass') config = load_module(config_filename) logger.debug(config.ACTIVE_RUNIDS) # Check have access to all runids. for runid in config.ACTIVE_RUNIDS: # check_access(runid) pass # Use config to download relevant data. for runid in config.ACTIVE_RUNIDS: mass_info = config.MASS_INFO[runid] queries_dir = Path('queries') queries_dir.mkdir(exist_ok=True) for stream, stream_info in mass_info['stream'].items(): stashcodes = stream_info['stashcodes'] if 'years_months' in stream_info: years_months = stream_info['years_months'] else: years_months = gen_years_months(stream_info['start_year_month'], stream_info['end_year_month']) for i, (year, month) in enumerate(years_months): N = len(years_months) logger.info(f'Retrieving {i+1}/{N} {runid}: {stream} {year}/{month} {stashcodes}') start = timer() retrieve_from_MASS(config, queries_dir, runid, stream, year, month, stashcodes, stream_info) end = timer() logger.info(f'Retrieved in {end - start:02f}s')
def main(config_filename): config = load_module(config_filename) logger.debug(config) bsub_dir = Path('bsub_scripts') bsub_dir.mkdir(exist_ok=True) output_dir = Path('processing_output') output_dir.mkdir(exist_ok=True) script_path = Path(config.SCRIPT_PATH).absolute() config_path = Path(config_filename).absolute() logger.debug(script_path) logger.debug(config_path) for config_key in config.CONFIG_KEYS: logger.info(f'config_key: {config_key}') bsub_script_filepath = write_bsub_script(bsub_dir, script_path, config_path, config_key, config.BSUB_KWARGS) submit_bsub_script(bsub_script_filepath)
def main(config_filename, task_path_hash_key, config_path_hash): # Logging to stdout is fine -- it will end up in the output captured by bsub. setup_stdout_logging('DEBUG') config_path = Path(config_filename).absolute() curr_config_path_hash = sha1(config_path.read_bytes()).hexdigest() if config_path_hash != curr_config_path_hash: raise Exception( f'config file {config_path} has changed -- cannot run task.') config = load_module(config_filename) task_ctrl = config.gen_task_ctrl() assert not task_ctrl.finalized, f'task control {task_ctrl} already finalized' # TODO: Problmatic. Can potentially read metadata that is being written. # This is because another task could be finishing, and writing its output's metadata # when this is called, and finalize can be trying to read it at the same time. # Can perhaps fix if instead Task is responsible for working out if rerun needed, # and removing finalize here. task_ctrl.finalize() task = task_ctrl.task_from_path_hash_key[task_path_hash_key] task_ctrl.run_task(task)
import sys from cosmic.util import load_module from cosmic.datasets.cmorph.cmorph_convert import extract_europe_8km_30min def main(basedir, year, month): print(f'{year}, {month}') output_dir = basedir / f'precip_{year}{month:02}' extract_europe_8km_30min(output_dir, year, month) if __name__ == '__main__': config = load_module(sys.argv[1]) config_key = sys.argv[2] main(config.BASEDIR, *config.SCRIPT_ARGS[config_key])