def main(): # parse the command line arguments _args = parse_arguments().__dict__ if len(_args.get('var_list')) == 1 and " " in _args.get('var_list')[0]: var_list = _args.get('var_list')[0].split() else: var_list = _args.get('var_list') var_list = [x.strip(',') for x in var_list] input_path = _args.get('input_path') output_path = _args.get('output_path') tables_path = _args.get('tables_path') user_metadata = _args.get('user_metadata') custom_metadata = _args.get('custom_metadata') nproc = _args.get('num_proc') serial = _args.get('serial') realm = _args.get('realm') debug = True if _args.get('debug') else False map_path = _args.get('map') cmor_log_dir = _args.get('logdir') timeout = int(_args.get('timeout')) if _args.get('timeout') else False simple = _args.get('simple', False) precheck_path = _args.get('precheck', False) freq = _args.get('freq') logger = _setup_custom_logger(f"{cmor_log_dir}/e3sm_to_cmip.log", True) logger.info(f"input_path = {input_path}") logger.info(f"output_path = {output_path}") logger.info(f"precheck_path = {precheck_path}") if simple: no_metadata = True if not tables_path: resource_path, _ = os.path.split(os.path.abspath(resources.__file__)) tables_path = resource_path timer = None if timeout: timer = threading.Timer(timeout, timeout_exit) timer.start() if _args.get('handlers'): handlers_path = os.path.abspath(_args.get('handlers')) else: handlers_path, _ = os.path.split( os.path.abspath(cmor_handlers.__file__)) if precheck_path: new_var_list = precheck(input_path, precheck_path, var_list, realm) if not new_var_list: print("All variables previously computed") os.mkdir(os.path.join(output_path, 'CMIP6')) if timer: timer.cancel() return 0 else: print_message( f"Setting up conversion for {' '.join(new_var_list)}", 'ok') var_list = new_var_list # load variable handlers handlers = _load_handlers( handlers_path=handlers_path, tables_path=tables_path, var_list=var_list, freq=freq, realm=realm) if len(handlers) == 0: print_message('No handlers loaded') sys.exit(1) if _args.get('info'): print_var_info( handlers, freq, input_path, tables_path, _args.get('info_out')) sys.exit(0) new_metadata_path = os.path.join( output_path, 'user_metadata.json') # create the output dir if it doesnt exist if not os.path.exists(output_path): os.makedirs(output_path) # setup temp storage directory temp_path = os.environ.get('TMPDIR') if temp_path is None: temp_path = f'{output_path}/tmp' if not os.path.exists(temp_path): os.makedirs(temp_path) tempfile.tempdir = temp_path logging_path = os.path.join(output_path, 'converter.log') print_message(f"Writing log output to: {logging_path}", 'debug') # setup logging logging.basicConfig( format='%(asctime)s:%(levelname)s: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', filename=logging_path, filemode='w', level=logging.INFO) # copy the users metadata json file with the updated output directory if not simple: copy_user_metadata( user_metadata, output_path) # run in the user-selected mode if serial: print_message('Running CMOR handlers in serial', 'ok') try: status = run_serial( handlers=handlers, input_path=input_path, tables_path=tables_path, metadata_path=new_metadata_path, map_path=map_path, realm=realm, logdir=cmor_log_dir, simple=simple, outpath=output_path, freq=freq) except KeyboardInterrupt as error: print_message(' -- keyboard interrupt -- ', 'error') return 1 except Exception as e: print_debug(e) return 1 else: print_message('Running CMOR handlers in parallel', 'ok') try: pool = Pool(max_workers=nproc) status = run_parallel( pool=pool, handlers=handlers, input_path=input_path, tables_path=tables_path, metadata_path=new_metadata_path, map_path=map_path, realm=realm, logdir=cmor_log_dir, simple=simple, outpath=output_path, freq=freq) except KeyboardInterrupt as error: print_message(' -- keyboard interrupt -- ', 'error') return 1 except Exception as error: print_debug(error) return 1 if status != 0: print_message( f"Error running handlers: { ' '.join([x['name'] for x in handlers]) }") return 1 if custom_metadata: add_metadata( file_path=output_path, var_list=var_list, metadata=custom_metadata) if timeout: timer.cancel() return 0
def main(): # parse the command line arguments _args = parse_argsuments().__dict__ if len(_args.get('var_list')) == 1 and " " in _args.get('var_list')[0]: var_list = _args.get('var_list')[0].split() else: var_list = _args.get('var_list') var_list = [x.strip(',') for x in var_list] input_path = _args.get('input_path') output_path = _args.get('output_path') tables_path = _args.get('tables_path') user_metadata = _args.get('user_metadata') no_metadata = _args['no_metadata'] if _args.get('no_metadata') else False only_metadata = _args['only_metadata'] if _args.get( 'only_metadata') else False nproc = _args['num_proc'] if _args.get('num_proc') else 6 serial = _args['serial'] if _args.get('serial') else False mode = _args['mode'] if _args.get('mode') else 'atm' debug = True if _args.get('debug') else False map_path = _args['map'] if _args.get('map') else None cmor_log_dir = _args['logdir'] if _args.get('logdir') else None timeout = int(_args['timeout']) if _args.get('timeout') else None should_precheck = _args.get('precheck') timer = None if timeout: timer = threading.Timer(timeout, timeout_exit) timer.start() if _args.get('handlers'): handlers_path = os.path.abspath(_args.get('handlers')) else: handlers_path, _ = os.path.split( os.path.abspath(cmor_handlers.__file__)) if should_precheck: new_var_list = precheck(input_path, output_path, var_list, mode) if not new_var_list: print("All variables previously computed") if timer: timer.cancel() return 0 else: print("Setting up conversion for {}".format( " ".join(new_var_list))) var_list = new_var_list # add additional optional metadata to the output files if only_metadata: print_message('Updating file metadata and exiting', 'ok') add_metadata(file_path=output_path, var_list=var_list) return 0 new_metadata_path = os.path.join(output_path, 'user_metadata.json') # create the output dir if it doesnt exist if not os.path.exists(output_path): os.makedirs(output_path) # setup temp storage directory temp_path = os.environ.get('TMPDIR') if temp_path is None: temp_path = '{}/tmp'.format(output_path) if not os.path.exists(temp_path): os.makedirs(temp_path) tempfile.tempdir = temp_path logging_path = os.path.join(output_path, 'converter.log') print_message("Writing log output to: {}".format(logging_path), 'debug') # setup logging logging.basicConfig(format='%(asctime)s:%(levelname)s: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', filename=logging_path, filemode='w', level=logging.INFO) # copy the users metadata json file with the updated output directory copy_user_metadata(user_metadata, output_path) # load variable handlers handlers = load_handlers(handlers_path, var_list, debug) if len(handlers) == 0: print_message('No handlers loaded') sys.exit(1) # run in the user-selected mode if serial: print_message('Running CMOR handlers in serial', 'ok') try: status = run_serial(handlers=handlers, input_path=input_path, tables_path=tables_path, metadata_path=new_metadata_path, map_path=map_path, mode=mode, logdir=cmor_log_dir) except KeyboardInterrupt as error: print_message(' -- keyboard interrupt -- ', 'error') return 1 except Exception as e: print_debug(e) return 1 else: print_message('Running CMOR handlers in parallel', 'ok') try: pool = Pool(nproc) status = run_parallel(pool=pool, handlers=handlers, input_path=input_path, tables_path=tables_path, metadata_path=new_metadata_path, map_path=map_path, mode=mode, logdir=cmor_log_dir) except KeyboardInterrupt as error: print_message(' -- keyboard interrupt -- ', 'error') return 1 except Exception as error: print_debug(error) return 1 if status != 0: print_message("Error running handlers: {}".format(" ".join( [x['name'] for x in handlers]))) return 1 # add additional optional metadata to the output files if no_metadata: print_message('Not adding additional metadata', 'ok') else: add_metadata(file_path=output_path, var_list=var_list) if timeout: timer.cancel() return 0
def run_parallel(pool, handlers, input_path, tables_path, metadata_path, map_path=None, mode='atm', nproc=6, **kwargs): """ Run all the handlers in parallel Params: ------- pool (multiprocessing.Pool): a processing pool to run the handlers in handlers: a dict(str: (function_pointer, list(str) ) ) input_path (str): path to the input files directory tables_path (str): path to the tables directory metadata_path (str): path to the cmor input metadata mode (str): what type of files to work with Returns: -------- returns 1 if an error occurs, else 0 """ pool_res = list() will_run = [] for idx, handler in enumerate(handlers): handler_method = handler['method'] handler_variables = handler['raw_variables'] # find the input files this handler needs if mode in ['atm', 'lnd']: input_paths = {var: [os.path.join(input_path, x) for x in find_atm_files(var, input_path)] for var in handler_variables} else: input_paths = {var: find_mpas_files(var, input_path, map_path) for var in handler_variables} # setup the input args for the handler _kwargs = { 'table': handler.get('table'), 'raw_variables': handler.get('raw_variables'), 'units': handler.get('units'), 'positive': handler.get('positive'), 'name': handler.get('name'), 'logdir': kwargs.get('logdir'), 'unit_conversion': handler.get('unit_conversion'), 'simple': kwargs.get('simple'), 'outpath': kwargs.get('outpath') } will_run.append(handler.get('name')) pool_res.append( pool.submit( handler_method, input_paths, tables_path, metadata_path, **_kwargs)) # wait for each result to complete pbar = tqdm(total=len(pool_res)) num_success = 0 num_handlers = len(handlers) finished_success = [] for idx, res in enumerate(pool_res): try: out = res.result() finished_success.append(out) if out: num_success += 1 msg = f'Finished {out}, {idx + 1}/{num_handlers} jobs complete' else: msg = f'Error running handler {handlers[idx]["name"]}' print_message(msg, 'error') logger.info(msg) except Exception as e: print_debug(e) pbar.update(1) pbar.close() terminate(pool) print_message(f"{num_success} of {num_handlers} handlers complete", 'ok') failed = set(will_run) - set(finished_success) if failed: print_message(f"{', '.join(list(failed))} failed to complete") return 0
def run_serial(handlers, input_path, tables_path, metadata_path, map_path=None, mode='atm', logdir=None, simple=False, outpath=None, freq="mon"): """ Run each of the handlers one at a time on the main process Params: ------- handlers: a dict(str: (function_pointer, list(str) ) ) input_path (str): path to the input files directory tables_path (str): path to the tables directory metadata_path (str): path to the cmor input metadata mode (str): what type of files to work with Returns: -------- returns 1 if an error occurs, else 0 """ try: num_handlers = len(handlers) num_success = 0 name = None if mode != 'atm': pbar = tqdm(total=len(handlers)) for _, handler in enumerate(handlers): handler_method = handler['method'] handler_variables = handler['raw_variables'] unit_conversion = handler.get('unit_conversion') # find the input files this handler needs if mode in ['atm', 'lnd']: input_paths = {var: [os.path.join(input_path, x) for x in find_atm_files(var, input_path)] for var in handler_variables} elif mode == 'fx': input_paths = {var: [os.path.join(input_path, x) for x in os.listdir(input_path) if x[-3:] == '.nc'] for var in handler_variables} else: input_paths = {var: find_mpas_files(var, input_path, map_path) for var in handler_variables} try: name = handler_method( input_paths, tables_path, metadata_path, raw_variables=handler.get('raw_variables'), units=handler.get('units'), name=handler.get('name'), table=handler.get('table'), positive=handler.get('positive'), serial=True, logdir=logdir, simple=simple, outpath=outpath, unit_conversion=unit_conversion, freq=freq) except Exception as e: print_debug(e) if name is not None: num_success += 1 msg = f'Finished {name}, {num_success}/{num_handlers} jobs complete' else: msg = f'Error running handler {handler["name"]}' print_message(msg, status='error') logger.info(msg) if mode != 'atm': pbar.update(1) if mode != 'atm': pbar.close() except Exception as error: print_debug(error) return 1 else: print_message( f"{num_success} of {num_handlers} handlers complete", 'ok') return 0
def run_parallel(pool, handlers, input_path, tables_path, metadata_path, map_path=None, mode='atm', nproc=6, **kwargs): """ Run all the handlers in parallel Params: ------- pool (multiprocessing.Pool): a processing pool to run the handlers in handlers: a dict(str: (function_pointer, list(str) ) ) input_path (str): path to the input files directory tables_path (str): path to the tables directory metadata_path (str): path to the cmor input metadata mode (str): what type of files to work with Returns: -------- returns 1 if an error occurs, else 0 """ pool_res = list() for idx, handler in enumerate(handlers): handler_method = handler['method'] handler_variables = handler['raw_variables'] # find the input files this handler needs if mode in ['atm', 'lnd']: input_paths = { var: [ os.path.join(input_path, x) for x in find_atm_files(var, input_path) ] for var in handler_variables } else: input_paths = { var: find_mpas_files(var, input_path, map_path) for var in handler_variables } # setup the input args for the handler _kwargs = { 'table': handler.get('table'), 'raw_variables': handler.get('raw_variables'), 'units': handler.get('units'), 'positive': handler.get('positive'), 'name': handler.get('name'), 'logdir': kwargs.get('logdir') } pool_res.append( pool.apipe(handler_method, input_paths, tables_path, metadata_path, **_kwargs)) # wait for each result to complete pbar = progressbar.ProgressBar(maxval=len(pool_res)) pbar.start() num_success = 0 num_handlers = len(handlers) for idx, res in enumerate(pool_res): try: out = res.get(9999999) if out: num_success += 1 msg = 'Finished {handler}, {done}/{total} jobs complete'.format( handler=out, done=idx + 1, total=num_handlers) else: msg = 'Error running handler {}'.format(handlers[idx]['name']) print_message(msg, 'error') logger.info(msg) pbar.update(idx) except Exception as e: print_debug(e) return 1 pbar.finish() terminate(pool) print_message( "{} of {} handlers complete".format(num_success, num_handlers), 'ok') return 0
def run_serial(handlers, input_path, tables_path, metadata_path, map_path=None, mode='atm', logdir=None): """ Run each of the handlers one at a time on the main process Params: ------- handlers: a dict(str: (function_pointer, list(str) ) ) input_path (str): path to the input files directory tables_path (str): path to the tables directory metadata_path (str): path to the cmor input metadata mode (str): what type of files to work with Returns: -------- returns 1 if an error occurs, else 0 """ try: num_handlers = len(handlers) num_success = 0 if mode != 'atm': pbar = progressbar.ProgressBar(maxval=len(handlers)) pbar.start() for idx, handler in enumerate(handlers): handler_method = handler['method'] handler_variables = handler['raw_variables'] # find the input files this handler needs if mode in ['atm', 'lnd']: input_paths = { var: [ os.path.join(input_path, x) for x in find_atm_files(var, input_path) ] for var in handler_variables } elif mode == 'fx': input_paths = { var: [x for x in os.listdir(input_path) if x[-3:] == '.nc'] for var in handler_variables } else: input_paths = { var: find_mpas_files(var, input_path, map_path) for var in handler_variables } name = handler_method(input_paths, tables_path, metadata_path, raw_variables=handler.get('raw_variables'), units=handler.get('units'), name=handler.get('name'), table=handler.get('table'), positive=handler.get('positive'), serial=True, logdir=logdir) if name is not None: num_success += 1 msg = 'Finished {handler}, {done}/{total} jobs complete'.format( handler=name, done=num_success, total=num_handlers) else: msg = 'Error running handler {}'.format(handler['name']) print_message(msg, 'error') logger.info(msg) if mode != 'atm': pbar.update(idx) if mode != 'atm': pbar.finish() except Exception as error: print_debug(error) return 1 else: print_message( "{} of {} handlers complete".format(num_success, num_handlers), 'ok') return 0