def load_data(cache_dir=_default_cache_dir, cache=True, github_url='https://github.com/gidden/aneris'): """ Load a dataset from the online repository (requires internet). If a local copy is found then always use that to avoid network traffic. Parameters ---------- cache_dir : string, optional The directory in which to search for and write cached data. cache : boolean, optional If True, then cache data locally for use on subsequent calls github_url : string Github repository where the data is stored """ longdir = os.path.expanduser(cache_dir) if not os.path.isdir(longdir): os.mkdir(longdir) files = { 'rc': 'aneris.yaml', 'hist': 'history.xls', 'model': 'model.xls', 'regions': 'regions.csv', } files = {k: os.path.join(longdir, f) for k, f in files.items()} for localfile in files.values(): if not os.path.exists(localfile): fname = os.path.basename(localfile) url = '/'.join( (github_url, 'raw', 'master', 'tests', 'test_data', fname)) urlretrieve(url, localfile) # read input hist = aneris.pd_read(files['hist']) if hist.empty: raise ValueError('History file is empty') hist.columns = hist.columns.astype(str) # make sure they're all strings regions = aneris.pd_read(files['regions']) if regions.empty: raise ValueError('Region definition is empty') model, overrides, config = aneris.read_excel(files['model']) model.columns = model.columns.astype(str) # make sure they're all strings rc = aneris.RunControl(rc=files['rc']) rc.recursive_update('config', config) # get driver driver = aneris.HarmonizationDriver(rc, hist, model, overrides, regions) if not cache: for localfile in files.values(): os.remove(localfile) return model, hist, driver
def harmonize(inf, history, regions, rc, output_path, output_prefix, return_result=False, write_output=True): # check files exist check = [inf, history, regions, rc] for f in check: if f and not os.path.exists(f): raise IOError('{} does not exist on the filesystem.'.format(f)) # read input hist = aneris.pd_read(history, str_cols=True) if hist.empty: raise ValueError('History file is empty') regions = aneris.pd_read(regions, str_cols=True) if regions.empty: raise ValueError('Region definition is empty') model, overrides, config = aneris.read_excel(inf) rc = aneris.RunControl(rc=rc) rc.recursive_update('config', config) # do core harmonization driver = aneris.HarmonizationDriver(rc, hist, model, overrides, regions) for scenario in driver.scenarios(): driver.harmonize(scenario) model, metadata, diagnostics = driver.harmonized_results() if write_output: # write to excel prefix = output_prefix or inf.split('.')[0] fname = os.path.join(output_path, '{}_harmonized.xlsx'.format(prefix)) logger().info('Writing result to: {}'.format(fname)) aneris.pd_write(model, fname, sheet_name='data') # save data about harmonization fname = os.path.join(output_path, '{}_metadata.xlsx'.format(prefix)) logger().info('Writing metadata to: {}'.format(fname)) aneris.pd_write(metadata, fname) # save data about harmonization if not diagnostics.empty: fname = os.path.join(output_path, '{}_diagnostics.xlsx'.format(prefix)) logger().info('Writing diagnostics to: {}'.format(fname)) aneris.pd_write(diagnostics, fname) if return_result: return model, metadata, diagnostics