def get_fm_file(file_name, download_if_missing=False):
    """Read facilitymatcher file, if not present, generate it.
    :param file_name: str, can be 'FacilityMatchList_forStEWI' or
        'FRS_NAICSforStEWI'
    :param download_if_missing: bool, if True will attempt to load from
        remote server prior to generating if file not found locally
    """
    file_meta = set_facilitymatcher_meta(file_name, category='')
    df = load_preprocessed_output(file_meta, paths)
    if df is None:
        log.info(f'{file_name} not found in {output_dir}, '
                 'writing facility matches to file')
        if download_if_missing:
            download_from_remote(file_meta, paths)
        elif file_name == 'FacilityMatchList_forStEWI':
            write_fm.write_facility_matches()
        elif file_name == 'FRS_NAICSforStEWI':
            write_naics.write_NAICS_matches()
        df = load_preprocessed_output(file_meta, paths)
    col_dict = {"FRS_ID": "str",
                "FacilityID": "str",
                "NAICS": "str"}
    for k, v in col_dict.items():
        if k in df:
            df[k] = df[k].astype(v)
    return df
Beispiel #2
0
def getFlowByActivity(datasource,
                      year,
                      flowclass=None,
                      geographic_level=None,
                      download_if_missing=DEFAULT_DOWNLOAD_IF_MISSING):
    """
    Retrieves stored data in the FlowByActivity format
    :param datasource: str, the code of the datasource.
    :param year: int, a year, e.g. 2012
    :param flowclass: str, a 'Class' of the flow. Optional. E.g. 'Water'
    :param geographic_level: str, a geographic level of the data.
                             Optional. E.g. 'national', 'state', 'county'.
    :param download_if_missing: bool, if True will attempt to load from remote server
        prior to generating if file not found locally
    :return: a pandas DataFrame in FlowByActivity format
    """
    from esupy.processed_data_mgmt import download_from_remote
    # Set fba metadata
    name = flowsa.flowbyactivity.set_fba_name(datasource, year)
    fba_meta = set_fb_meta(name, "FlowByActivity")

    # Try to load a local version of fba; generate and load if missing
    fba = load_preprocessed_output(fba_meta, paths)
    # Remote download
    if fba is None and download_if_missing:
        log.info('%s %s not found in %s, downloading from remote source',
                 datasource, str(year), fbaoutputpath)
        download_from_remote(fba_meta, paths)
        fba = load_preprocessed_output(fba_meta, paths)

    if fba is None:
        log.info('%s %s not found in %s, running functions to generate FBA',
                 datasource, str(year), fbaoutputpath)
        # Generate the fba
        flowsa.flowbyactivity.main(year=year, source=datasource)
        # Now load the fba
        fba = load_preprocessed_output(fba_meta, paths)
        if fba is None:
            log.error('getFlowByActivity failed, FBA not found')
        else:
            log.info('Loaded %s %s from %s', datasource, str(year),
                     fbaoutputpath)
    else:
        log.info('Loaded %s %s from %s', datasource, str(year), fbaoutputpath)

    # Address optional parameters
    if flowclass is not None:
        fba = fba[fba['Class'] == flowclass]
    # if geographic level specified, only load rows in geo level
    if geographic_level is not None:
        fba = filter_by_geoscale(fba, geographic_level)
    return fba
Beispiel #3
0
def getFlowBySector(methodname,
                    download_FBAs_if_missing=DEFAULT_DOWNLOAD_IF_MISSING,
                    download_FBS_if_missing=DEFAULT_DOWNLOAD_IF_MISSING):
    """
    Loads stored FlowBySector output or generates it if it doesn't exist,
    then loads
    :param methodname: string, Name of an available method for the given class
    :param download_FBAs_if_missing: bool, if True will attempt to load FBAS
        used in generating the FBS from remote server prior to generating if
        file not found locally
    :param download_FBS_if_missing: bool, if True will attempt to load from
        remote server prior to generating if file not found locally
    :return: dataframe in flow by sector format
    """
    fbs_meta = set_fb_meta(methodname, "FlowBySector")
    # Try to load a local version of the FBS
    fbs = load_preprocessed_output(fbs_meta, paths)
    # If that didn't work, try to download a remote version of FBS
    if fbs is None and download_FBS_if_missing:
        log.info('%s not found in %s, downloading from remote source',
                 methodname, fbsoutputpath)
        # download and load the FBS parquet
        subdirectory_dict = {'.log': 'Log'}
        download_from_remote(fbs_meta,
                             paths,
                             subdirectory_dict=subdirectory_dict)
        fbs = load_preprocessed_output(fbs_meta, paths)
    # If that didn't work or wasn't allowed, try to construct the FBS
    if fbs is None:
        log.info('%s not found in %s, running functions to generate FBS',
                 methodname, fbsoutputpath)
        # Generate the fbs, with option to download any required FBAs from
        # Data Commons
        flowsa.flowbysector.main(
            method=methodname,
            download_FBAs_if_missing=download_FBAs_if_missing)
        # Now load the fbs
        fbs = load_preprocessed_output(fbs_meta, paths)
    # If none of the above worked, log an error message
    if fbs is None:
        log.error('getFlowBySector failed, FBS not found')
    # Otherwise (that is, if one of the above methods successfuly loaded the
    # FBS), log it.
    else:
        log.info('Loaded %s from %s', methodname, fbsoutputpath)
    return fbs
Beispiel #4
0
def read_method(method_id):
    """Returns the method stored in output."""
    meta = set_lcia_method_meta(method_id)
    try:
        log.info('reading stored method file')
        method = load_preprocessed_output(meta, paths)
        return method
    except (FileNotFoundError, OSError):
        log.error('No parquet file identified for ' + method_id.value)
        return None
Beispiel #5
0
def read_method(method_id):
    """Returns the method stored in output."""
    meta = set_lcia_method_meta(method_id)
    method = load_preprocessed_output(meta, paths)
    method_path = outputpath + '/' + meta.category
    if method is None:
        log.info(meta.name_data + ' not found in ' + method_path)
    else:
        log.info('loaded ' + meta.name_data + ' from ' + method_path)
    return method
def read_FRS_file(file_name, col_dict):
    """Retrieve FRS data file stored locally."""
    file_meta = set_facilitymatcher_meta(file_name, category=ext_folder)
    log.info('loading %s from %s', file_meta.name_data, FRSpath)
    file_meta.name_data = strip_file_extension(file_meta.name_data)
    file_meta.ext = 'csv'
    df = load_preprocessed_output(file_meta, paths)
    df_FRS = pd.DataFrame()
    for k, v in col_dict.items():
        df_FRS[k] = df[k].astype(v)
    return df_FRS
def read_inventory(inventory_acronym, year, f, download_if_missing=False):
    """Return the inventory from local directory. If not found, generate it.

    :param inventory_acronym: like 'TRI'
    :param year: year as number like 2010
    :param f: object of class StewiFormat
    :param download_if_missing: bool, if True will attempt to load from
        remote server prior to generating if file not found locally
    :return: dataframe of stored inventory; if not present returns None
    """
    file_name = inventory_acronym + '_' + str(year)
    meta = set_stewi_meta(file_name, str(f))
    inventory = load_preprocessed_output(meta, paths)
    method_path = paths.local_path + '/' + meta.category
    if inventory is None:
        log.info(f'{meta.name_data} not found in {method_path}')
        if download_if_missing:
            meta.tool = meta.tool.lower()  # lower case for remote access
            download_from_remote(meta, paths)
            # download metadata file
            metadata_meta = copy.copy(meta)
            metadata_meta.category = ''
            metadata_meta.ext = 'json'
            download_from_remote(metadata_meta, paths)
        else:
            log.info('requested inventory does not exist in local directory, '
                     'it will be generated...')
            generate_inventory(inventory_acronym, year)
        inventory = load_preprocessed_output(meta, paths)
        if inventory is None:
            log.error('error generating inventory')
    if inventory is not None:
        log.info(f'loaded {meta.name_data} from {method_path}')
        # ensure dtypes
        fields = f.field_types()
        fields = {
            key: value
            for key, value in fields.items() if key in list(inventory)
        }
        inventory = inventory.astype(fields)
    return inventory
Beispiel #8
0
def getFlowBySector(methodname,
                    download_if_missing=DEFAULT_DOWNLOAD_IF_MISSING):
    """
    Loads stored FlowBySector output or generates it if it doesn't exist, then loads
    :param methodname: string, Name of an available method for the given class
    :param download_if_missing: bool, if True will attempt to load from remote server
        prior to generating if file not found locally
    :return: dataframe in flow by sector format
    """
    from esupy.processed_data_mgmt import download_from_remote

    fbs_meta = set_fb_meta(methodname, "FlowBySector")
    fbs = load_preprocessed_output(fbs_meta, paths)

    # Remote download
    if fbs is None and download_if_missing:
        log.info('%s not found in %s, downloading from remote source',
                 methodname, fbsoutputpath)
        # download and load the FBS parquet
        subdirectory_dict = {'.log': 'Log'}
        download_from_remote(fbs_meta,
                             paths,
                             subdirectory_dict=subdirectory_dict)
        fbs = load_preprocessed_output(fbs_meta, paths)

    # If remote download not specified and no FBS, generate the FBS
    if fbs is None:
        log.info('%s not found in %s, running functions to generate FBS',
                 methodname, fbsoutputpath)
        # Generate the fba
        flowsa.flowbysector.main(method=methodname)
        # Now load the fba
        fbs = load_preprocessed_output(fbs_meta, paths)
        if fbs is None:
            log.error('getFlowBySector failed, FBS not found')
        else:
            log.info('Loaded %s from %s', methodname, fbsoutputpath)
    else:
        log.info('Loaded %s from %s', methodname, fbsoutputpath)
    return fbs
def getCombinedInventory(name, category=''):
    """Read the inventory dataframe from local directory.

    :param name: str, name of dataset or name of file
    """
    if ("." + WRITE_FORMAT) in name:
        method_path = output_dir + '/' + category
        inventory = read_into_df(method_path + name)
    else:
        meta = set_stewicombo_meta(name, category)
        method_path = output_dir + '/' + meta.category
        inventory = load_preprocessed_output(meta, paths)
    if inventory is None:
        log.info(f'{name} not found in {method_path}')
    else:
        log.info(f'loaded {name} from {method_path}')
    return inventory