Exemple #1
0
def noaa_parse(*, year, **_):
    """
    Combine, parse, and format the provided dataframes
    :param year: year
    :return: df, parsed and partially formatted to flowbyactivity
        specifications
    """
    # Read directly into a pandas df
    df_raw = pd.read_csv(externaldatapath + "foss_landings.csv")

    # read state fips from common.py
    df_state = get_state_FIPS().reset_index(drop=True)
    df_state['State'] = df_state["State"].str.lower()

    # modify fish state names to match those from common
    df = df_raw.drop('Sum Pounds', axis=1)
    df['State'] = df["State"].str.lower()

    # filter by year
    df = df[df['Year'] == int(year)]
    # noaa differentiates between florida east and west,
    # which is not necessary for our purposes
    df['State'] = df['State'].str.replace(r'-east', '')
    df['State'] = df['State'].str.replace(r'-west', '')

    # sum florida data after casting rows as numeric
    df['Sum Dollars'] = df['Sum Dollars'].str.replace(r',', '')
    df["Sum Dollars"] = df["Sum Dollars"].apply(pd.to_numeric)
    df2 = df.groupby(['Year', 'State'],
                     as_index=False).agg({"Sum Dollars": sum})

    # new column includes state fips
    df3 = df2.merge(df_state[["State", "FIPS"]],
                    how="left",
                    left_on="State",
                    right_on="State")

    # data includes "process at sea", which is not associated with any
    # fips, assign value of '99' if fips is nan, add the state name to
    # description and drop state name
    df3['Description'] = None
    df3.loc[df3['State'] == 'process at sea', 'Description'] = df3['State']
    df3.loc[df3['State'] == 'process at sea', 'FIPS'] = 99
    df4 = df3.drop('State', axis=1)

    # rename columns to match flowbyactivity format
    df4 = df4.rename(columns={"Sum Dollars": "FlowAmount", "FIPS": "Location"})

    # hardcode data
    df4["Class"] = "Money"
    df4["SourceName"] = "NOAA_Landings"
    df4["FlowName"] = None
    df4 = assign_fips_location_system(df4, year)
    df4["Unit"] = "$"
    df4["ActivityProducedBy"] = "All Species"
    df4['DataReliability'] = 5  # tmp
    df4['DataCollection'] = 5  # tmp

    return df4
def create_geoscale_list(df, geoscale, year='2015'):
    """
    Create a list of FIPS associated with given geoscale

    :param df: FlowBySector of FlowByActivity df
    :param geoscale: 'national', 'state', or 'county'
    :return: list of relevant FIPS
    """

    # filter by geoscale depends on Location System
    fips = []
    if geoscale == "national":
        fips.append(US_FIPS)
    elif df['LocationSystem'].str.contains('FIPS').any():
        # all_FIPS = read_stored_FIPS()
        if geoscale == "state":
            state_FIPS = get_state_FIPS(year)
            fips = list(state_FIPS['FIPS'])
        elif geoscale == "county":
            county_FIPS = get_county_FIPS(year)
            fips = list(county_FIPS['FIPS'])

    return fips