Beispiel #1
0
def write_daily_prof(df, temp, run_group, state):
    '''
    Write the daily profile based hourly profiles as a normalized list by year/month/day/hour
    Daily factors should be based on sum of hourly haps over sum of annual haps
    '''
    cols = ['region_cd','scc','run_group','facid','tract','polynumber','src_id','ann_value']
    hierarchy = [['region_cd','scc'],['region_cd',]]
    df = df[cols].copy().sort('ann_value').drop_duplicates(['run_group','region_cd','facid'], 
      take_last=True)
    temp.aermod.fillna(0, inplace=True)
    value_cols = ['month','day','hour','factor']
    df = df[['run_group','region_cd','scc',
      'ann_value']].copy().drop_duplicates(['run_group','region_cd','scc'])
    df = match_temporal(df, temp.aermod, value_cols, hierarchy, temp.use_daily)
    df.drop('scc', axis=1, inplace=True)
    df['year'] = os.environ['BASE_YEAR'][2:4]
    df.drop_duplicates(['region_cd','run_group','year','month','day','hour'], inplace=True)
    keys = ['region_cd','run_group','year']
    mean_factor = df[['region_cd','factor']].copy().groupby('region_cd', as_index=False).mean()
    df = pd.merge(df, mean_factor, on='region_cd', how='left', suffixes=['','_mean'])
    df['factor'] = (df['factor']/df['factor_mean']) 
    df[['month','day','hour']] = df[['month','day','hour']].astype('i')
    fname = os.path.join(os.environ['WORK_PATH'],'temporal','tract_%s_%s_hourly.csv' %(state, 
      run_group))
    df.to_csv(fname, columns=['run_group','region_cd','year','month','day','hour','factor'],
      index=False)
Beispiel #2
0
def write_no_daily_prof(run_df, temp, run_group, state):
    '''
    Write the standard non-hourly non-daily profile based temporal profiles
    '''
    cols = ['region_cd','scc','run_group','facid','tract','polynumber','src_id','ann_value']
    run_df = run_df[cols].copy().drop_duplicates()
    # Use main contributing region_cd and SCC for source temporalization
    run_df.sort('ann_value', inplace=True)
    run_df.drop_duplicates('facid', keep='last', inplace=True)
    scalar_cols = [s_col for s_col in temp.aermod.columns if s_col.startswith('Scalar')]
    value_cols = ['qflag',] + scalar_cols
    # Only match by fips/scc or fips; SCC only gets default
    hierarchy = [['region_cd','scc'],['region_cd',]]
    run_df = match_temporal(run_df, temp.aermod, value_cols, hierarchy)
    run_df.drop(['scc','ann_value'], axis=1, inplace=True)
    run_df.drop_duplicates(inplace=True)
    if not run_df[run_df[def_prof.index[0]].isnull()].empty:
        if run_group == 'OILGAS4':
            run_df = fill_default(run_df, def_oil_prof)
        else:
            run_df = fill_default(run_df, def_prof)
    qflag_list = list(run_df['qflag'].drop_duplicates())
    if 'MHRDOW7' in qflag_list:
        scalar_cols = ['Scalar%s' %x for x in xrange(1,2017)]
    elif 'MHRDOW' in qflag_list:
        scalar_cols = ['Scalar%s' %x for x in xrange(1,865)]
    elif 'HROFDY' in qflag_list:
        scalar_cols = ['Scalar%s' %x for x in xrange(1,25)]
    else:
        scalar_cols = ['Scalar%s' %x for x in xrange(1,13)]
    cols = ['run_group','state','region_cd','tract','polynumber','facid','src_id','qflag']
    run_df['state'] = state_dict[state]
    fname = os.path.join(os.environ['WORK_PATH'],'temporal','tract_%s_%s_temporal.csv' %(state,
      run_group))
    run_df.to_csv(fname, index=False, columns=cols+scalar_cols)
Beispiel #3
0
def get_no_daily_prof(run_df, temp):
    '''
    Write the standard non-hourly non-daily profile based temporal profiles
    '''
    scalar_cols = [
        s_col for s_col in temp.aermod.columns if s_col.startswith('Scalar')
    ]
    value_cols = [
        'qflag',
    ] + scalar_cols
    # Only match by fips/scc or fips; SCC only gets default
    hierarchy = [['region_cd', 'scc'], [
        'region_cd',
    ]]
    run_df = match_temporal(run_df, temp.aermod, value_cols, hierarchy)
    run_df.drop(['region_cd', 'scc'], axis=1, inplace=True)
    run_df.drop_duplicates(inplace=True)
    if not run_df[run_df[def_prof.index[0]].isnull()].empty:
        run_df = fill_default(run_df, def_prof)
    qflag_list = list(run_df['qflag'].drop_duplicates())
    if 'MHRDOW7' in qflag_list:
        scalar_cols = ['Scalar%s' % x for x in xrange(1, 2017)]
    elif 'MHRDOW' in qflag_list:
        scalar_cols = ['Scalar%s' % x for x in xrange(1, 865)]
    elif 'HROFDY' in qflag_list:
        scalar_cols = ['Scalar%s' % x for x in xrange(1, 25)]
    else:
        scalar_cols = ['Scalar%s' % x for x in xrange(1, 13)]
    return run_df[['run_group', 'met_cell', 'src_id', 'qflag'] +
                  scalar_cols].copy()
Beispiel #4
0
def write_temp_factors(df, temp, work_path):
    '''
    Write the temporal factors by source ID for each facility
    Adjust the output columns based on the max number of scalars used within the facility
    '''
    df = df[[
        'region_cd', 'scc', 'facility_id', 'facility_name', 'src_id', 'state'
    ]].copy()
    scalar_cols = [col for col in temp.columns if col.startswith('Scalar')]
    hierarchy = [['region_cd', 'scc', 'facility_id'], ['scc', 'facility_id'],
                 ['region_cd', 'facility_id'], [
                     'facility_id',
                 ], ['region_cd', 'scc'], [
                     'scc',
                 ], [
                     'region_cd',
                 ]]
    df = match_temporal(df, temp, [
        'qflag',
    ] + scalar_cols, hierarchy)
    cols = ['facility_id', 'facility_name', 'src_id', 'qflag']
    df = df[cols + scalar_cols].copy().drop_duplicates(
        ['facility_id', 'src_id'])
    qflag_list = list(df['qflag'].drop_duplicates())
    if 'MHRDOW7' in qflag_list:
        col_names = cols + ['Scalar%s' % x for x in range(1, 2017)]
    elif 'MHRDOW' in qflag_list:
        col_names = cols + ['Scalar%s' % x for x in range(1, 865)]
    elif 'HROFDAY' in qflag_list:
        col_names = cols + ['Scalar%s' % x for x in range(1, 25)]
    else:
        col_names = cols + ['Scalar%s' % x for x in range(1, 13)]
    fname = os.path.join(work_path, 'temporal', 'point_temporal.csv')
    df.to_csv(fname, columns=col_names, index=False, quotechar=' ')
Beispiel #5
0
def get_temp_codes(df, xref):
    '''
    Get the temporal profile codes for each row. This is used for checking unique temporal profile.

    *  Assume that only diurnal profiles only vary by weekday/weekend or
        are constant across all days of the week. 
       This assumption is good for all US point sources in 2014v2.
    '''
    if len(xref[xref['ALLDAY'].isnull()]) > 0:
        if 'TUESDAY' in list(xref.columns):
            try:
                xref.ix[xref['ALLDAY'].isnull(),
                        'ALLDAY'] = xref.ix[xref['ALLDAY'].isnull(), 'TUESDAY']
            except KeyError:
                xref.ix[xref['ALLDAY'].isnull(),
                        'ALLDAY'] = xref.ix[xref['ALLDAY'].isnull(), 'WEEKDAY']
    type_cols = ['ALLDAY', 'MONTHLY', 'WEEKLY']
    xref.drop_duplicates(['facility_id', 'region_cd', 'scc'], inplace=True)
    xref['region_cd'] = xref['region_cd'].apply(fix_fips)
    xref['scc'] = xref['scc'].apply(lambda x: str(x).strip())
    xref['facility_id'] = xref['facility_id'].apply(lambda x: str(x).strip())
    hierarchy = [['region_cd', 'scc', 'facility_id'], ['scc', 'facility_id'],
                 ['region_cd', 'facility_id'], [
                     'facility_id',
                 ], ['region_cd', 'scc'], [
                     'scc',
                 ], [
                     'region_cd',
                 ]]
    return match_temporal(df, xref, type_cols, hierarchy)
Beispiel #6
0
def write_daily_prof(df, temp):
    '''
    Write the daily profile based hourly profiles as a normalized list by year/month/day/hour
    Daily factors should be based on sum of hourly haps over sum of annual haps
    '''
    cols = ['region_cd','scc','run_group','met_cell','ann_value','src_id']
    hierarchy = [['region_cd','scc'],['region_cd',]]
    df = df[cols].copy().sort_values('ann_value').drop_duplicates(['run_group','region_cd','met_cell'], 
      take_last=True)
    temp.profs.fillna(0, inplace=True)
    value_cols = ['month','day','hour','factor']
    run_group = df['run_group'].values[0]
    fips_map = get_fips_map(df[['run_group','region_cd','met_cell','src_id',
      'ann_value']].copy())
    write_county_xwalk(fips_map, run_group)
    df = df[['run_group','region_cd','scc',
      'ann_value']].copy().drop_duplicates(['run_group','region_cd','scc'])
    for state in list(df['region_cd'].str[:2].drop_duplicates()):
        st_df = df.ix[df['region_cd'].str[:2] == state].copy() 
        st_df = match_temporal(st_df, temp.profs, value_cols, hierarchy, temp.use_daily)
        st_df.drop('scc', axis=1, inplace=True)
        st_df['year'] = os.environ['BASE_YEAR'][2:4]
        st_df.drop_duplicates(['region_cd','run_group','year','month','day','hour'], 
          inplace=True)
        keys = ['region_cd','run_group','year']
        mean_factor = st_df[['region_cd','factor']].copy().groupby('region_cd', as_index=False).mean()
        st_df = pd.merge(st_df, mean_factor, on='region_cd', how='left', suffixes=['','_mean'])
        st_df['factor'] = (st_df['factor']/st_df['factor_mean']) 
        st_df[['month','day','hour']] = st_df[['month','day','hour']].astype('i')
        fname = os.path.join(os.environ['WORK_PATH'],'temporal','%s_%s_hourly.csv' %(run_group, state))
        st_df.to_csv(fname, columns=['run_group','region_cd','year','month','day','hour','factor'],
          index=False, float_format='%.12g')
Beispiel #7
0
 def _write_temp_prof(self, df, temp, airport_type):
     '''        
     Write the temporal factors by source ID for each facility
     Adjust the output columns based on the max number of scalars used within the facility
     '''
     cols = ['facility_id','facility_name','src_id']
     df = df[cols].copy()
     inv_df = self.inv.ix[self.inv['facility_id'].isin(list(df['facility_id'].drop_duplicates())),
         ['region_cd','scc','facility_id','ann_value']].copy()
     inv_df.sort('ann_value', ascending=False, inplace=True)
     inv_df.drop_duplicates('facility_id', inplace=True)
     scalar_cols = [col for col in temp.columns if col.startswith('Scalar')]
     hierarchy = [['region_cd','scc','facility_id'],['scc','facility_id'],['region_cd','facility_id'],
     ['facility_id',], ['region_cd','scc'], ['scc',], ['region_cd',]]
     temp = match_temporal(inv_df, temp, ['qflag',]+scalar_cols, hierarchy)
     qflag_list = list(temp['qflag'].drop_duplicates())
     if 'MHRDOW7' in qflag_list:
         scalar_cols = ['Scalar%s' %x for x in range(1,2017)]
     elif 'MHRDOW' in qflag_list:
         scalar_cols = ['Scalar%s' %x for x in range(1,865)]
     elif 'HROFDY' in qflag_list:
         scalar_cols = ['Scalar%s' %x for x in range(1,25)]
     else:
         scalar_cols = ['Scalar%s' %x for x in range(1,13)]
     df = pd.merge(df, temp[['facility_id','qflag']+scalar_cols], on='facility_id', how='left') 
     df.drop_duplicates(['facility_id','src_id'], inplace=True)
     fname = os.path.join(os.environ['WORK_PATH'],'temporal','airport_%s_temporal.csv' %airport_type)
     df.to_csv(fname, index=False, quotechar=' ')
Beispiel #8
0
def calc_monthly_temp(df, temp):
    '''
    Get monthly only temporlization. The CMV run group only varies by month.
    '''
    scalar_cols = [
        s_col for s_col in temp.profs.columns if s_col.startswith('Scalar')
    ]
    value_cols = [
        'qflag',
    ] + scalar_cols
    # Only match by fips/scc or fips; SCC only gets default
    hierarchy = [['region_cd', 'scc'], [
        'region_cd',
    ], [
        'scc',
    ]]
    df = match_temporal(df, temp.profs, value_cols, hierarchy)
    df['state'] = df['region_cd'].str[:2]
    df.drop(['region_cd', 'scc'], axis=1, inplace=True)
    df.drop_duplicates(inplace=True)
    if len(df[df['qflag'] != 'MONTH']) > 0:
        print('WARNING: Non-MONTH profiles found. This should not be for CMV.')
    scalar_cols = ['Scalar%s' % x for x in range(1, 13)]
    return df[['state', 'facid', 'src_id', 'qflag'] + scalar_cols]