Exemple #1
0
def main():

    # parse the command line arguments
    _args = parse_arguments().__dict__

    if len(_args.get('var_list')) == 1 and " " in _args.get('var_list')[0]:
        var_list = _args.get('var_list')[0].split()
    else:
        var_list = _args.get('var_list')
    var_list = [x.strip(',') for x in var_list]
    input_path = _args.get('input_path')
    output_path = _args.get('output_path')
    tables_path = _args.get('tables_path')
    user_metadata = _args.get('user_metadata')
    custom_metadata = _args.get('custom_metadata')
    nproc = _args.get('num_proc')
    serial = _args.get('serial')
    realm = _args.get('realm')
    debug = True if _args.get('debug') else False
    map_path = _args.get('map')
    cmor_log_dir = _args.get('logdir')
    timeout = int(_args.get('timeout')) if _args.get('timeout') else False
    simple = _args.get('simple', False)
    precheck_path = _args.get('precheck', False)
    freq = _args.get('freq')

    logger = _setup_custom_logger(f"{cmor_log_dir}/e3sm_to_cmip.log", True)
    logger.info(f"input_path = {input_path}")
    logger.info(f"output_path = {output_path}")
    logger.info(f"precheck_path = {precheck_path}")

    if simple:
        no_metadata = True
        if not tables_path:
            resource_path, _ = os.path.split(os.path.abspath(resources.__file__))
            tables_path = resource_path

    timer = None
    if timeout:
        timer = threading.Timer(timeout, timeout_exit)
        timer.start()

    if _args.get('handlers'):
        handlers_path = os.path.abspath(_args.get('handlers'))
    else:
        handlers_path, _ = os.path.split(
            os.path.abspath(cmor_handlers.__file__))

    if precheck_path:
        new_var_list = precheck(input_path, precheck_path, var_list, realm)
        if not new_var_list:
            print("All variables previously computed")
            os.mkdir(os.path.join(output_path, 'CMIP6'))
            if timer:
                timer.cancel()
            return 0
        else:
            print_message(
                f"Setting up conversion for {' '.join(new_var_list)}", 'ok')
            var_list = new_var_list

    # load variable handlers
    handlers = _load_handlers(
        handlers_path=handlers_path,
        tables_path=tables_path,
        var_list=var_list,
        freq=freq,
        realm=realm)

    if len(handlers) == 0:
        print_message('No handlers loaded')
        sys.exit(1)
    if _args.get('info'):
        print_var_info(
            handlers,
            freq,
            input_path,
            tables_path,
            _args.get('info_out'))
        sys.exit(0)

    new_metadata_path = os.path.join(
        output_path,
        'user_metadata.json')

    # create the output dir if it doesnt exist
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    # setup temp storage directory
    temp_path = os.environ.get('TMPDIR')
    if temp_path is None:

        temp_path = f'{output_path}/tmp'
        if not os.path.exists(temp_path):
            os.makedirs(temp_path)

    tempfile.tempdir = temp_path

    logging_path = os.path.join(output_path, 'converter.log')
    print_message(f"Writing log output to: {logging_path}", 'debug')

    # setup logging
    logging.basicConfig(
        format='%(asctime)s:%(levelname)s: %(message)s',
        datefmt='%m/%d/%Y %I:%M:%S %p',
        filename=logging_path,
        filemode='w',
        level=logging.INFO)

    # copy the users metadata json file with the updated output directory
    if not simple:
        copy_user_metadata(
            user_metadata, output_path)

    # run in the user-selected mode
    if serial:
        print_message('Running CMOR handlers in serial', 'ok')
        try:
            status = run_serial(
                handlers=handlers,
                input_path=input_path,
                tables_path=tables_path,
                metadata_path=new_metadata_path,
                map_path=map_path,
                realm=realm,
                logdir=cmor_log_dir,
                simple=simple,
                outpath=output_path,
                freq=freq)
        except KeyboardInterrupt as error:
            print_message(' -- keyboard interrupt -- ', 'error')
            return 1
        except Exception as e:
            print_debug(e)
            return 1
    else:
        print_message('Running CMOR handlers in parallel', 'ok')
        try:
            pool = Pool(max_workers=nproc)
            status = run_parallel(
                pool=pool,
                handlers=handlers,
                input_path=input_path,
                tables_path=tables_path,
                metadata_path=new_metadata_path,
                map_path=map_path,
                realm=realm,
                logdir=cmor_log_dir,
                simple=simple,
                outpath=output_path,
                freq=freq)
        except KeyboardInterrupt as error:
            print_message(' -- keyboard interrupt -- ', 'error')
            return 1
        except Exception as error:
            print_debug(error)
            return 1
    if status != 0:
        print_message(
            f"Error running handlers: { ' '.join([x['name'] for x in handlers]) }")
        return 1

    if custom_metadata:
        add_metadata(
            file_path=output_path,
            var_list=var_list,
            metadata=custom_metadata)

    if timeout:
        timer.cancel()
    return 0
def main():

    # parse the command line arguments
    _args = parse_argsuments().__dict__

    if len(_args.get('var_list')) == 1 and " " in _args.get('var_list')[0]:
        var_list = _args.get('var_list')[0].split()
    else:
        var_list = _args.get('var_list')
    var_list = [x.strip(',') for x in var_list]
    input_path = _args.get('input_path')
    output_path = _args.get('output_path')
    tables_path = _args.get('tables_path')
    user_metadata = _args.get('user_metadata')
    no_metadata = _args['no_metadata'] if _args.get('no_metadata') else False
    only_metadata = _args['only_metadata'] if _args.get(
        'only_metadata') else False
    nproc = _args['num_proc'] if _args.get('num_proc') else 6
    serial = _args['serial'] if _args.get('serial') else False
    mode = _args['mode'] if _args.get('mode') else 'atm'
    debug = True if _args.get('debug') else False
    map_path = _args['map'] if _args.get('map') else None
    cmor_log_dir = _args['logdir'] if _args.get('logdir') else None
    timeout = int(_args['timeout']) if _args.get('timeout') else None
    should_precheck = _args.get('precheck')

    timer = None
    if timeout:
        timer = threading.Timer(timeout, timeout_exit)
        timer.start()

    if _args.get('handlers'):
        handlers_path = os.path.abspath(_args.get('handlers'))
    else:
        handlers_path, _ = os.path.split(
            os.path.abspath(cmor_handlers.__file__))

    if should_precheck:
        new_var_list = precheck(input_path, output_path, var_list, mode)
        if not new_var_list:
            print("All variables previously computed")
            if timer: timer.cancel()
            return 0
        else:
            print("Setting up conversion for {}".format(
                " ".join(new_var_list)))
            var_list = new_var_list

    # add additional optional metadata to the output files
    if only_metadata:
        print_message('Updating file metadata and exiting', 'ok')
        add_metadata(file_path=output_path, var_list=var_list)
        return 0

    new_metadata_path = os.path.join(output_path, 'user_metadata.json')

    # create the output dir if it doesnt exist
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    # setup temp storage directory
    temp_path = os.environ.get('TMPDIR')
    if temp_path is None:

        temp_path = '{}/tmp'.format(output_path)
        if not os.path.exists(temp_path):
            os.makedirs(temp_path)

    tempfile.tempdir = temp_path

    logging_path = os.path.join(output_path, 'converter.log')
    print_message("Writing log output to: {}".format(logging_path), 'debug')

    # setup logging
    logging.basicConfig(format='%(asctime)s:%(levelname)s: %(message)s',
                        datefmt='%m/%d/%Y %I:%M:%S %p',
                        filename=logging_path,
                        filemode='w',
                        level=logging.INFO)

    # copy the users metadata json file with the updated output directory
    copy_user_metadata(user_metadata, output_path)

    # load variable handlers
    handlers = load_handlers(handlers_path, var_list, debug)
    if len(handlers) == 0:
        print_message('No handlers loaded')
        sys.exit(1)

    # run in the user-selected mode
    if serial:
        print_message('Running CMOR handlers in serial', 'ok')
        try:
            status = run_serial(handlers=handlers,
                                input_path=input_path,
                                tables_path=tables_path,
                                metadata_path=new_metadata_path,
                                map_path=map_path,
                                mode=mode,
                                logdir=cmor_log_dir)
        except KeyboardInterrupt as error:
            print_message(' -- keyboard interrupt -- ', 'error')
            return 1
        except Exception as e:
            print_debug(e)
            return 1
    else:
        print_message('Running CMOR handlers in parallel', 'ok')
        try:
            pool = Pool(nproc)
            status = run_parallel(pool=pool,
                                  handlers=handlers,
                                  input_path=input_path,
                                  tables_path=tables_path,
                                  metadata_path=new_metadata_path,
                                  map_path=map_path,
                                  mode=mode,
                                  logdir=cmor_log_dir)
        except KeyboardInterrupt as error:
            print_message(' -- keyboard interrupt -- ', 'error')
            return 1
        except Exception as error:
            print_debug(error)
            return 1
    if status != 0:
        print_message("Error running handlers: {}".format(" ".join(
            [x['name'] for x in handlers])))
        return 1

    # add additional optional metadata to the output files
    if no_metadata:
        print_message('Not adding additional metadata', 'ok')
    else:
        add_metadata(file_path=output_path, var_list=var_list)

    if timeout:
        timer.cancel()
    return 0
Exemple #3
0
def run_parallel(pool, handlers, input_path, tables_path, metadata_path,
                 map_path=None, mode='atm', nproc=6, **kwargs):
    """
    Run all the handlers in parallel
    Params:
    -------
        pool (multiprocessing.Pool): a processing pool to run the handlers in
        handlers: a dict(str: (function_pointer, list(str) ) )
        input_path (str): path to the input files directory
        tables_path (str): path to the tables directory
        metadata_path (str): path to the cmor input metadata
        mode (str): what type of files to work with
    Returns:
    --------
        returns 1 if an error occurs, else 0
    """

    pool_res = list()
    will_run = []
    for idx, handler in enumerate(handlers):
        handler_method = handler['method']
        handler_variables = handler['raw_variables']
        # find the input files this handler needs
        if mode in ['atm', 'lnd']:

            input_paths = {var: [os.path.join(input_path, x) for x in
                                 find_atm_files(var, input_path)]
                           for var in handler_variables}
        else:
            input_paths = {var: find_mpas_files(var, input_path,
                                                map_path)
                           for var in handler_variables}

        # setup the input args for the handler
        _kwargs = {
            'table': handler.get('table'),
            'raw_variables': handler.get('raw_variables'),
            'units': handler.get('units'),
            'positive': handler.get('positive'),
            'name': handler.get('name'),
            'logdir': kwargs.get('logdir'),
            'unit_conversion': handler.get('unit_conversion'),
            'simple': kwargs.get('simple'),
            'outpath': kwargs.get('outpath')
        }
        will_run.append(handler.get('name'))

        pool_res.append(
            pool.submit(
                handler_method,
                input_paths,
                tables_path,
                metadata_path,
                **_kwargs))

    # wait for each result to complete
    pbar = tqdm(total=len(pool_res))
    num_success = 0
    num_handlers = len(handlers)
    finished_success = []
    for idx, res in enumerate(pool_res):
        try:
            out = res.result()
            finished_success.append(out)
            if out:
                num_success += 1
                msg = f'Finished {out}, {idx + 1}/{num_handlers} jobs complete'
            else:
                msg = f'Error running handler {handlers[idx]["name"]}'
                print_message(msg, 'error')

            logger.info(msg)
        except Exception as e:
            print_debug(e)
        pbar.update(1)

    pbar.close()
    terminate(pool)
    print_message(f"{num_success} of {num_handlers} handlers complete", 'ok')
    failed = set(will_run) - set(finished_success)
    if failed:
        print_message(f"{', '.join(list(failed))} failed to complete")
    return 0
Exemple #4
0
def run_serial(handlers, input_path, tables_path, metadata_path, map_path=None,
               mode='atm', logdir=None, simple=False, outpath=None, freq="mon"):
    """
    Run each of the handlers one at a time on the main process

    Params:
    -------
        handlers: a dict(str: (function_pointer, list(str) ) )
        input_path (str): path to the input files directory
        tables_path (str): path to the tables directory
        metadata_path (str): path to the cmor input metadata
        mode (str): what type of files to work with
    Returns:
    --------
        returns 1 if an error occurs, else 0
    """
    try:

        num_handlers = len(handlers)
        num_success = 0
        name = None

        if mode != 'atm':
            pbar = tqdm(total=len(handlers))

        for _, handler in enumerate(handlers):

            handler_method = handler['method']
            handler_variables = handler['raw_variables']
            unit_conversion = handler.get('unit_conversion')

            # find the input files this handler needs
            if mode in ['atm', 'lnd']:

                input_paths = {var: [os.path.join(input_path, x) for x in
                                     find_atm_files(var, input_path)]
                               for var in handler_variables}
            elif mode == 'fx':
                input_paths = {var: [os.path.join(input_path, x) for x in os.listdir(input_path) if x[-3:] == '.nc']
                               for var in handler_variables}
            else:
                input_paths = {var: find_mpas_files(var, input_path,
                                                    map_path)
                               for var in handler_variables}

            try:
                name = handler_method(
                    input_paths,
                    tables_path,
                    metadata_path,
                    raw_variables=handler.get('raw_variables'),
                    units=handler.get('units'),
                    name=handler.get('name'),
                    table=handler.get('table'),
                    positive=handler.get('positive'),
                    serial=True,
                    logdir=logdir,
                    simple=simple,
                    outpath=outpath,
                    unit_conversion=unit_conversion,
                    freq=freq)
            except Exception as e:
                print_debug(e)

            if name is not None:
                num_success += 1
                msg = f'Finished {name}, {num_success}/{num_handlers} jobs complete'
            else:
                msg = f'Error running handler {handler["name"]}'
                print_message(msg, status='error')
            logger.info(msg)

            if mode != 'atm':
                pbar.update(1)
        if mode != 'atm':
            pbar.close()

    except Exception as error:
        print_debug(error)
        return 1
    else:
        print_message(
            f"{num_success} of {num_handlers} handlers complete", 'ok')
        return 0
Exemple #5
0
def run_parallel(pool,
                 handlers,
                 input_path,
                 tables_path,
                 metadata_path,
                 map_path=None,
                 mode='atm',
                 nproc=6,
                 **kwargs):
    """
    Run all the handlers in parallel
    Params:
    -------
        pool (multiprocessing.Pool): a processing pool to run the handlers in
        handlers: a dict(str: (function_pointer, list(str) ) )
        input_path (str): path to the input files directory
        tables_path (str): path to the tables directory
        metadata_path (str): path to the cmor input metadata
        mode (str): what type of files to work with
    Returns:
    --------
        returns 1 if an error occurs, else 0
    """

    pool_res = list()
    for idx, handler in enumerate(handlers):
        handler_method = handler['method']
        handler_variables = handler['raw_variables']
        # find the input files this handler needs
        if mode in ['atm', 'lnd']:

            input_paths = {
                var: [
                    os.path.join(input_path, x)
                    for x in find_atm_files(var, input_path)
                ]
                for var in handler_variables
            }
        else:
            input_paths = {
                var: find_mpas_files(var, input_path, map_path)
                for var in handler_variables
            }

        # setup the input args for the handler
        _kwargs = {
            'table': handler.get('table'),
            'raw_variables': handler.get('raw_variables'),
            'units': handler.get('units'),
            'positive': handler.get('positive'),
            'name': handler.get('name'),
            'logdir': kwargs.get('logdir')
        }

        pool_res.append(
            pool.apipe(handler_method, input_paths, tables_path, metadata_path,
                       **_kwargs))

    # wait for each result to complete
    pbar = progressbar.ProgressBar(maxval=len(pool_res))
    pbar.start()
    num_success = 0
    num_handlers = len(handlers)

    for idx, res in enumerate(pool_res):
        try:
            out = res.get(9999999)
            if out:
                num_success += 1
                msg = 'Finished {handler}, {done}/{total} jobs complete'.format(
                    handler=out, done=idx + 1, total=num_handlers)
            else:
                msg = 'Error running handler {}'.format(handlers[idx]['name'])
                print_message(msg, 'error')

            logger.info(msg)
            pbar.update(idx)
        except Exception as e:
            print_debug(e)
            return 1

    pbar.finish()
    terminate(pool)
    print_message(
        "{} of {} handlers complete".format(num_success, num_handlers), 'ok')
    return 0
Exemple #6
0
def run_serial(handlers,
               input_path,
               tables_path,
               metadata_path,
               map_path=None,
               mode='atm',
               logdir=None):
    """
    Run each of the handlers one at a time on the main process

    Params:
    -------
        handlers: a dict(str: (function_pointer, list(str) ) )
        input_path (str): path to the input files directory
        tables_path (str): path to the tables directory
        metadata_path (str): path to the cmor input metadata
        mode (str): what type of files to work with
    Returns:
    --------
        returns 1 if an error occurs, else 0
    """
    try:

        num_handlers = len(handlers)
        num_success = 0

        if mode != 'atm':
            pbar = progressbar.ProgressBar(maxval=len(handlers))
            pbar.start()

        for idx, handler in enumerate(handlers):

            handler_method = handler['method']
            handler_variables = handler['raw_variables']

            # find the input files this handler needs
            if mode in ['atm', 'lnd']:

                input_paths = {
                    var: [
                        os.path.join(input_path, x)
                        for x in find_atm_files(var, input_path)
                    ]
                    for var in handler_variables
                }
            elif mode == 'fx':
                input_paths = {
                    var:
                    [x for x in os.listdir(input_path) if x[-3:] == '.nc']
                    for var in handler_variables
                }
            else:
                input_paths = {
                    var: find_mpas_files(var, input_path, map_path)
                    for var in handler_variables
                }

            name = handler_method(input_paths,
                                  tables_path,
                                  metadata_path,
                                  raw_variables=handler.get('raw_variables'),
                                  units=handler.get('units'),
                                  name=handler.get('name'),
                                  table=handler.get('table'),
                                  positive=handler.get('positive'),
                                  serial=True,
                                  logdir=logdir)

            if name is not None:
                num_success += 1
                msg = 'Finished {handler}, {done}/{total} jobs complete'.format(
                    handler=name, done=num_success, total=num_handlers)
            else:
                msg = 'Error running handler {}'.format(handler['name'])
                print_message(msg, 'error')
            logger.info(msg)

            if mode != 'atm':
                pbar.update(idx)
        if mode != 'atm':
            pbar.finish()

    except Exception as error:
        print_debug(error)
        return 1
    else:
        print_message(
            "{} of {} handlers complete".format(num_success, num_handlers),
            'ok')
        return 0