예제 #1
0
def load_data(cache_dir=_default_cache_dir,
              cache=True,
              github_url='https://github.com/gidden/aneris'):
    """
    Load a dataset from the online repository (requires internet).

    If a local copy is found then always use that to avoid network traffic.

    Parameters
    ----------
    cache_dir : string, optional
        The directory in which to search for and write cached data.
    cache : boolean, optional
        If True, then cache data locally for use on subsequent calls
    github_url : string
        Github repository where the data is stored
    """
    longdir = os.path.expanduser(cache_dir)
    if not os.path.isdir(longdir):
        os.mkdir(longdir)

    files = {
        'rc': 'aneris.yaml',
        'hist': 'history.xls',
        'model': 'model.xls',
        'regions': 'regions.csv',
    }
    files = {k: os.path.join(longdir, f) for k, f in files.items()}

    for localfile in files.values():
        if not os.path.exists(localfile):
            fname = os.path.basename(localfile)
            url = '/'.join(
                (github_url, 'raw', 'master', 'tests', 'test_data', fname))
            urlretrieve(url, localfile)

    # read input
    hist = aneris.pd_read(files['hist'])
    if hist.empty:
        raise ValueError('History file is empty')
    hist.columns = hist.columns.astype(str)  # make sure they're all strings
    regions = aneris.pd_read(files['regions'])
    if regions.empty:
        raise ValueError('Region definition is empty')
    model, overrides, config = aneris.read_excel(files['model'])
    model.columns = model.columns.astype(str)  # make sure they're all strings
    rc = aneris.RunControl(rc=files['rc'])
    rc.recursive_update('config', config)

    # get driver
    driver = aneris.HarmonizationDriver(rc, hist, model, overrides, regions)

    if not cache:
        for localfile in files.values():
            os.remove(localfile)

    return model, hist, driver
예제 #2
0
def harmonize(inf,
              history,
              regions,
              rc,
              output_path,
              output_prefix,
              return_result=False,
              write_output=True):
    # check files exist
    check = [inf, history, regions, rc]
    for f in check:
        if f and not os.path.exists(f):
            raise IOError('{} does not exist on the filesystem.'.format(f))

    # read input
    hist = aneris.pd_read(history, str_cols=True)
    if hist.empty:
        raise ValueError('History file is empty')
    regions = aneris.pd_read(regions, str_cols=True)
    if regions.empty:
        raise ValueError('Region definition is empty')
    model, overrides, config = aneris.read_excel(inf)
    rc = aneris.RunControl(rc=rc)
    rc.recursive_update('config', config)

    # do core harmonization
    driver = aneris.HarmonizationDriver(rc, hist, model, overrides, regions)
    for scenario in driver.scenarios():
        driver.harmonize(scenario)
    model, metadata, diagnostics = driver.harmonized_results()

    if write_output:
        # write to excel
        prefix = output_prefix or inf.split('.')[0]
        fname = os.path.join(output_path, '{}_harmonized.xlsx'.format(prefix))
        logger().info('Writing result to: {}'.format(fname))
        aneris.pd_write(model, fname, sheet_name='data')

        # save data about harmonization
        fname = os.path.join(output_path, '{}_metadata.xlsx'.format(prefix))
        logger().info('Writing metadata to: {}'.format(fname))
        aneris.pd_write(metadata, fname)

        # save data about harmonization
        if not diagnostics.empty:
            fname = os.path.join(output_path,
                                 '{}_diagnostics.xlsx'.format(prefix))
            logger().info('Writing diagnostics to: {}'.format(fname))
            aneris.pd_write(diagnostics, fname)

    if return_result:
        return model, metadata, diagnostics