df_proposal['orig_job'] = orig_jobs # ASSIGN JOBS - flush and no flush option* # cmonths - career length in months for each employee. # length is equal to number of employees cmonths = f.career_months_df_in(df_proposal) # nonret_each_month: count of non-retired employees remaining # in each month until no more remain - # length is equal to longest career length nonret_each_month = f.count_per_month(cmonths) all_months = np.sum(nonret_each_month) cumulative = nonret_each_month.cumsum() np_low_limits = f.make_lower_slice_limits(cumulative) job_level_counts = np.array(jcnts_arr[1]) if cf.delayed_implementation: imp_month = cf.imp_month imp_low = np_low_limits[imp_month] imp_high = cumulative[imp_month] dstand = pd.read_pickle(stand_path_string) ds_option = dstand[['job_count', 'lspcnt', 'spcnt', 'rank_in_job', 'jobp']] dstand = dstand[['mnum', 'jnum', 'empkey', 'fur']][:imp_high] dstand.rename(columns={'jnum': 'stand_jobs'}, inplace=True) dstand['key'] = (dstand.empkey * 1000) + dstand.mnum dstand.drop(['mnum', 'empkey'], inplace=True, axis=1)
df_proposal['orig_job'] = orig_jobs # ASSIGN JOBS - flush and no flush option* # cmonths - career length in months for each employee. # length is equal to number of employees cmonths = f.career_months_df_in(df_proposal) # nonret_each_month: count of non-retired employees remaining # in each month until no more remain - # length is equal to longest career length nonret_each_month = f.count_per_month(cmonths) all_months = np.sum(nonret_each_month) cumulative = nonret_each_month.cumsum() np_low_limits = f.make_lower_slice_limits(cumulative) job_level_counts = np.array(jcnts_arr[1]) if cf.delayed_implementation: imp_month = cf.imp_month imp_low = np_low_limits[imp_month] imp_high = cumulative[imp_month] dstand = pd.read_pickle(stand_path_string) ds_option = dstand[['job_count', 'lspcnt', 'spcnt', 'rank_in_job', 'jobp']] dstand = dstand[['mnum', 'jnum', 'empkey', 'fur']][:imp_high] dstand.rename(columns={'jnum': 'stand_jobs'}, inplace=True) dstand['key'] = (dstand.empkey * 1000) + dstand.mnum
for i in range(len(ds_list)): df_long = ds_list[i] df_short = short_ds_list[i] jcnts = jcnts_arr[0][i] # jcnts = np.take(jcnts, np.where(jcnts != 0)[0]) short_len = len(short_ds_list[i]) # ORIG_JOB* cmonths_this_ds = f.career_months_df_in(df_short) this_ds_nonret_each_month = f.count_per_month(cmonths_this_ds) uppers = this_ds_nonret_each_month.cumsum() lowers = f.make_lower_slice_limits(uppers) all_months = np.sum(this_ds_nonret_each_month) this_table = table[0][i] this_month_counts = table[1][i] df_align = df_long[['twa', 'fur']] fur_codes = np.array(df_align.fur) # if i == 0 and cf.apply_supc: # i == 0 >> eg1 from skeleton # twa_rights = np.array(cf.twa_rights) # twa_jobs = np.transpose(twa_rights)[1] # sup_c_counts = np.transpose(twa_rights)[2] # twa_dict = dict(zip(twa_jobs, sup_c_counts)) # # calc twa sup c condition month range and concat
def main(): script, proposal_name, *conditions = argv pre, suf = 'dill/', '.pkl' skeleton_path_string = (pre + 'skeleton' + suf) proposal_order_string = (pre + 'p_' + proposal_name + suf) stand_path_string = (pre + 'standalone' + suf) output_name = 'ds_' + proposal_name try: df_master = pd.read_pickle(pre + 'master' + suf) except OSError: print('Master list not found. Run build_program_files script?') print('\n >>> exiting routine.\n') exit() try: ds = pd.read_pickle(skeleton_path_string) except OSError: print('\nSkeleton file not found. ' + 'Run build_program_files script?\n\n' + 'Standalone build failed.\n\n' + ' >>> exiting routine.\n') exit() try: df_order = pd.read_pickle(proposal_order_string) except OSError: prop_names = \ pd.read_pickle('dill/proposal_names.pkl').proposals.tolist() stored_case = pd.read_pickle('dill/case_dill.pkl').case.value print('\nerror : proposal name "' + str(proposal_name) + '" not found...\n') print('available proposal names are ', prop_names, 'for case study:', stored_case) print('\n >>> exiting routine.\n') exit() sdict = pd.read_pickle('dill/dict_settings.pkl') tdict = pd.read_pickle('dill/dict_job_tables.pkl') # do not include inactive employees (other than furlough) in data model df_master = df_master[ (df_master.line == 1) | (df_master.fur == 1)].copy() num_of_job_levels = sdict['num_of_job_levels'] lspcnt_calc = sdict['lspcnt_calc_on_remaining_population'] # ORDER the skeleton df according to INTEGRATED list order. # df_skel can initially be in any integrated order, each employee # group must be in proper order relative to itself. # Use the short-form 'idx' (order) column from either the proposed # list or the new_order column from an edited list to create a new column, # 'new_order', within the long-form df_skel. The new order column # is created by data alignment using the common empkey indexes. # The skeleton may then be sorted by month and new_order. # (note: duplicate df_skel empkey index empkeys (from different months) # are assigned the same order value) if 'edit' in conditions: df_new_order = pd.read_pickle(proposal_order_string) ds['new_order'] = df_new_order['new_order'] dataset_path_string = (pre + 'ds_edit' + suf) else: try: order_key = df_order.idx except: order_key = df_order.new_order ds['new_order'] = order_key dataset_path_string = (pre + output_name + suf) if os.path.isdir('dill/'): try: os.remove(dataset_path_string) except OSError: pass # sort the skeleton by month and proposed list order ds.sort_values(['mnum', 'new_order'], inplace=True) # ORIG_JOB* eg_sequence = df_master.eg.values fur_sequence = df_master.fur.values # create list of employee group codes from the master data egs = sorted(pd.unique(eg_sequence)) # retrieve job counts array jcnts_arr = tdict['jcnts_arr'] if 'prex' in conditions: sg_rights = sdict['sg_rights'] sg_eg_list = [] sg_dict = od() stove_dict = od() # Find the employee groups which have pre-existing job rights... # grab the eg code from each sg (special group) job right description # and add to sg_eg_list for line_item in sg_rights: sg_eg_list.append(line_item[0]) # place unique eg codes into sorted list sg_eg_list = sorted(pd.unique(sg_eg_list)) # Make a dictionary containing the special group data for each # group with special rights for eg in sg_eg_list: sg_data = [] for line_item in sg_rights: if line_item[0] == eg: sg_data.append(line_item) sg_dict[eg] = sg_data for eg in egs: if eg in sg_eg_list: # (run prex stovepipe routine with eg dict key and value) sg = df_master[df_master.eg == eg]['sg'].values fur = df_master[df_master.eg == eg]['fur'] ojob_array = f.make_stovepipe_prex_shortform( jcnts_arr[0][eg - 1], sg, sg_dict[eg], fur) prex_stove = np.take(ojob_array, np.where(fur == 0)[0]) stove_dict[eg] = prex_stove else: # (run make_stovepipe routine with eg dict key and value) stove_dict[eg] = f.make_stovepipe_jobs_from_jobs_arr( jcnts_arr[0][eg - 1]) # use dict values as inputs to sp_arr, # ordered dict maintains proper sequence... sp_arr = list(np.array(list(stove_dict.values()))) # total of jobs per eg eg_job_counts = np.add.reduce(jcnts_arr[0], axis=1) orig_jobs = f.make_intgrtd_from_sep_stove_lists(sp_arr, eg_sequence, fur_sequence, eg_job_counts, num_of_job_levels) else: orig_jobs = f.make_original_jobs_from_counts( jcnts_arr[0], eg_sequence, fur_sequence, num_of_job_levels).astype(int) # insert stovepipe job result into new column of proposal (month_form) # this indexes the jobs with empkeys (orig_jobs is an ndarray only) df_master['orig_job'] = orig_jobs # ASSIGN JOBS - flush and no flush option* # cmonths - career length in months for each employee. # length is equal to number of employees cmonths = f.career_months(df_master, sdict['starting_date']) # nonret_each_month: count of non-retired employees remaining # in each month until no more remain - # length is equal to longest career length nonret_each_month = f.count_per_month(cmonths) all_months = np.sum(nonret_each_month) high_limits = nonret_each_month.cumsum() low_limits = f.make_lower_slice_limits(high_limits) if sdict['delayed_implementation']: imp_month = sdict['imp_month'] imp_low = low_limits[imp_month] imp_high = high_limits[imp_month] # read the standalone dataset (info is not in integrated order) ds_stand = pd.read_pickle(stand_path_string) # get standalone data and order it the same as the integrated dataset. # create a unique key column in the standalone data df and a temporary # df which is ordered according to the integrated dataset imp_cols, arr_dict, col_array = \ f.make_preimp_array(ds_stand, ds, imp_high, sdict['compute_job_category_order'], sdict['compute_pay_measures']) # select columns to use as pre-implementation data for integrated # dataset data is limited to the pre-implementation months # aligned_jnums and aligned_fur arrays are the same as standalone data # up to the end of the implementation month, then the standalone value # for the implementation month is passed down unchanged for the # remainder of months in the model. These arrays carry over # standalone data for each employee group to be honored until and when # the integrated list is implemented. # These values from the standalone datasets (furlough status and # standalone job held at the implementation date) are needed for # subsequent integrated dataset job assignment calculations. Other # standalone values are simply copied and inserted into the # pre-implementation months of the integrated dataset. delayed_jnums = col_array[arr_dict['jnum']] delayed_fur = col_array[arr_dict['fur']] aligned_jnums = f.align_fill_down(imp_low, imp_high, ds[[]], # indexed with empkeys delayed_jnums) aligned_fur = f.align_fill_down(imp_low, imp_high, ds[[]], delayed_fur) # now assign "filled-down" job numbers to numpy array delayed_jnums[imp_low:] = aligned_jnums[imp_low:] delayed_fur[imp_low:] = aligned_fur[imp_low:] # ORIG_JOB and FUR (delayed implementation) # then assign numpy array values to orig_job column of integrated # dataset as starting point for integrated job assignments ds['orig_job'] = delayed_jnums ds['fur'] = delayed_fur if sdict['integrated_counts_preimp']: # assign combined job counts prior to the implementation date. # (otherwise, separate employee group counts will be used when # data is transferred from col_array at end of script) # NOTE: this data is the actual number of jobs held within each # category; could be less than the number of jobs available as # attrition occurs standalone_preimp_job_counts = \ f.make_delayed_job_counts(imp_month, delayed_jnums, low_limits, high_limits) col_array[arr_dict['job_count']][:imp_high] = \ standalone_preimp_job_counts else: # set implementation month at zero for job assignment routine imp_month = 0 # ORIG_JOB and FUR (no delayed implementation) # transfer proposal stovepipe jobs (month_form) to long_form via index # (empkey) alignment... ds['orig_job'] = df_master['orig_job'] # developer note: test to verify this is not instantiated elsewhere... ds['fur'] = df_master['fur'] table = tdict['table'] j_changes = tdict['j_changes'] reduction_months = f.get_job_reduction_months(j_changes) # copy selected columns from ds for job assignment function input below. # note: if delayed implementation, the 'fur' and 'orig_job' columns # contain standalone data through the implementation month. df_align = ds[['eg', 'sg', 'fur', 'orig_job']].copy() # JNUM, FUR, JOB_COUNT if sdict['no_bump']: # No bump, no flush option (includes conditions, furlough/recall, # job changes schedules) # this is the main job assignment function. It loops through all of # the months in the model and assigns jobs nbnf, job_count, fur = \ f.assign_jobs_nbnf_job_changes(df_align, low_limits, high_limits, all_months, reduction_months, imp_month, conditions, sdict, tdict, fur_return=sdict['recall']) ds['jnum'] = nbnf ds['job_count'] = job_count ds['fur'] = fur # for create_snum_and_spcnt_arrays function input... jnum_jobs = nbnf else: # Full flush and bump option (no conditions or # furlough/recall schedulue considered, job changes are included) # No bump, no flush applied up to implementation date fbff, job_count, fur = f.assign_jobs_full_flush_job_changes( nonret_each_month, table[0], num_of_job_levels) ds['jnum'] = fbff ds['job_count'] = job_count ds['fur'] = fur # for create_snum_and_spcnt_arrays function input... jnum_jobs = fbff # SNUM, SPCNT, LNUM, LSPCNT monthly_job_counts = table[1] ds['snum'], ds['spcnt'], ds['lnum'], ds['lspcnt'] = \ f.create_snum_and_spcnt_arrays(jnum_jobs, num_of_job_levels, nonret_each_month, monthly_job_counts, lspcnt_calc) # RANK in JOB ds['rank_in_job'] = ds.groupby(['mnum', 'jnum'], sort=False).cumcount() + 1 # JOBP jpcnt = (ds.rank_in_job / ds.job_count).values np.put(jpcnt, np.where(jpcnt == 1.0)[0], .99999) ds['jobp'] = ds['jnum'] + jpcnt # PAY - merge with pay table - provides monthly pay if sdict['compute_pay_measures']: # account for furlough time (only count active months) if sdict['discount_longev_for_fur']: # skel(ds) provides pre-calculated non-discounted scale data # flip ones and zeros... ds['non_fur'] = 1 - ds.fur.values non_fur = ds.groupby([pd.Grouper('empkey')])['non_fur'] \ .cumsum().values ds.pop('non_fur') starting_mlong = ds.s_lmonths.values cum_active_months = non_fur + starting_mlong ds['mlong'] = cum_active_months ds['ylong'] = ds['mlong'].values / 12 ds['scale'] = np.clip((cum_active_months / 12) + 1, 1, sdict['top_of_scale']).astype(int) # make a new long_form dataframe and assign a combination of # pay-related ds columns from large dataset as its index... # the dataframe is empty - we are only making an index-alignment # vehicle to use with indexed pay table.... # the dataframe index contains specific scale, job, and contract year # for each line in long_form ds df_pt_index = pd.DataFrame(index=((ds['scale'].values * 100) + ds['jnum'].values + (ds['year'].values * 100000))) if sdict['enhanced_jobs']: df_pt = pd.read_pickle('dill/pay_table_enhanced.pkl') else: df_pt = pd.read_pickle('dill/pay_table_basic.pkl') # 'data-align' small indexed pay_table to long_form df: df_pt_index['monthly'] = df_pt['monthly'] ds['monthly'] = df_pt_index.monthly.values # MPAY # adjust monthly pay for any raise and last month pay percent if # applicable ds['mpay'] = ((ds['pay_raise'].values * ds['mth_pcnt'].values * ds['monthly'].values)) / 1000 ds.pop('monthly') # CPAY ds['cpay'] = ds.groupby('new_order')['mpay'].cumsum() if sdict['delayed_implementation']: ds_cols = ds.columns # grab each imp_col (column to insert standalone or pre-implementation # date data) and replace integrated data up through implementation # date for col in imp_cols: if col in ds_cols: arr = ds[col].values arr[:imp_high] = col_array[arr_dict[col]][:imp_high] ds[col] = arr # CAT_ORDER # global job ranking if sdict['compute_job_category_order']: ds['cat_order'] = f.make_cat_order(ds, table[0]) # save to file if sdict['save_to_pickle']: ds.to_pickle(dataset_path_string)
def main(): script, *conditions = argv input_skel = 'skeleton' pre, suf = 'dill/', '.pkl' skeleton_path_string = (pre + input_skel + suf) try: ds = pd.read_pickle(skeleton_path_string) except OSError: print('\nSkeleton file not found. ' + 'Run build_program_files script?\n\n' + 'Standalone build failed.\n\n' + ' >>> exiting routine.\n') exit() if os.path.isdir('dill/'): try: os.remove('dill/standalone.pkl') except OSError: pass sdict = pd.read_pickle('dill/dict_settings.pkl') tdict = pd.read_pickle('dill/dict_job_tables.pkl') num_of_job_levels = sdict['num_of_job_levels'] egs = np.unique(ds.eg) start_month = 0 # make prex True or False # (for input to assign_standalone_job_changes function) prex = 'prex' in conditions table = tdict['s_table'] jcnts_arr = tdict['jcnts_arr'] j_changes = tdict['j_changes'] job_change_months = f.get_job_change_months(j_changes) job_reduction_months = f.get_job_reduction_months(j_changes) # sort the skeleton by employee group, month, and index # (preserves each group's list order) ds.sort_values(['eg', 'mnum', 'idx']) ds_dict = {} short_ds_dict = {} for grp in egs: ds_dict[grp] = ds[ds.eg == grp].copy() for grp in egs: short_ds_dict[grp] = ds_dict[grp][ds_dict[grp].mnum == 0].copy() ds = pd.DataFrame() for eg in egs: df_long = ds_dict[eg] df_short = short_ds_dict[eg] jcnts = jcnts_arr[0][eg - 1] short_len = len(df_short) # ORIG_JOB* cmonths_this_ds = \ f.career_months(df_short, sdict['starting_date']) this_ds_nonret_each_month = f.count_per_month(cmonths_this_ds) high_limits = this_ds_nonret_each_month.cumsum() low_limits = f.make_lower_slice_limits(high_limits) all_months = np.sum(this_ds_nonret_each_month) this_eg_table = f.add_zero_col(table[0][eg - 1]) this_eg_month_counts = table[1][eg - 1] df_align_cols = ['fur'] if 'sg' in df_long: df_align_cols.append('sg') df_align = df_long[df_align_cols] # pre-existing employee group special job assignment is included within # the job assignment function below... results = f.assign_standalone_job_changes(eg, df_align, low_limits, high_limits, all_months, this_eg_table, this_eg_month_counts, this_ds_nonret_each_month, job_change_months, job_reduction_months, start_month, sdict, tdict, apply_sg_cond=prex) jnums = results[0] count_col = results[1] held = results[2] fur = results[3] orig_jobs = results[4] # HELD JOB # job from previous month df_long['held'] = held # JOB_COUNT df_long['job_count'] = count_col # ORIG_JOB df_short['orig_job'] = orig_jobs df_long['orig_job'] = df_short['orig_job'] # ASSIGN JOBS - (stovepipe method only since only # assigning within each employee group separately) # JNUM df_long['jnum'] = jnums # SNUM, SPCNT, LNUM, LSPCNT monthly_job_counts = table[1][eg - 1] lspcnt_calc = sdict['lspcnt_calc_on_remaining_population'] df_long['snum'], df_long['spcnt'], \ df_long['lnum'], df_long['lspcnt'] = \ f.create_snum_and_spcnt_arrays(jnums, num_of_job_levels, this_ds_nonret_each_month, monthly_job_counts, lspcnt_calc) # RANK in JOB df_long['rank_in_job'] = \ df_long.groupby(['mnum', 'jnum']).cumcount() + 1 # JOBP # make last percentage position in each job category .99999 vs 1.0 # so that jobp calculations are correct jpcnt = (df_long.rank_in_job / df_long.job_count).values np.put(jpcnt, np.where(jpcnt == 1.0)[0], .99999) df_long['jobp'] = df_long['jnum'] + jpcnt # PAY - merge with pay table - provides monthly pay if sdict['compute_pay_measures']: if sdict['discount_longev_for_fur']: # skel provides non-discounted scale data # flip ones and zeros... df_long['non_fur'] = 1 - fur df_long['fur'] = fur non_fur = \ (df_long.groupby([pd.Grouper('empkey')]) ['non_fur'].cumsum().values) df_long.pop('non_fur') starting_mlong = df_long.s_lmonths.values cum_active_months = non_fur + starting_mlong df_long['mlong'] = cum_active_months df_long['ylong'] = df_long['mlong'] / 12 df_long['scale'] = \ np.clip((cum_active_months / 12) + 1, 1, sdict['top_of_scale']).astype(int) # SCALE df_pt_index = pd.DataFrame( index=(df_long['scale'] * 100) + df_long['jnum'] + (df_long['year'] * 100000)) if sdict['enhanced_jobs']: df_pt = pd.read_pickle( 'dill/pay_table_enhanced.pkl') else: df_pt = pd.read_pickle( 'dill/pay_table_basic.pkl') df_pt_index['monthly'] = df_pt['monthly'] df_long['monthly'] = df_pt_index.monthly.values # MPAY # adjust monthly pay for any raise and last month pay percent if # applicable df_long['mpay'] = ( (df_long['pay_raise'] * df_long['mth_pcnt'] * df_long['monthly'])) / 1000 df_long.pop('monthly') # CPAY df_long['cpay'] = df_long.groupby('idx')['mpay'].cumsum() ds = pd.concat([ds, df_long], ignore_index=True) ds.sort_values(by=['mnum', 'idx'], inplace=True) ds.set_index('empkey', drop=False, verify_integrity=False, inplace=True) # CAT_ORDER # global job ranking if sdict['compute_job_category_order']: table = tdict['table'] ds['cat_order'] = f.make_cat_order(ds, table[0]) # save to file if sdict['save_to_pickle']: ds.to_pickle('dill/standalone.pkl')
def main(): script, *conditions = argv input_skel = 'skeleton' pre, suf = 'dill/', '.pkl' skeleton_path_string = (pre + input_skel + suf) try: ds = pd.read_pickle(skeleton_path_string) except OSError: print('\nSkeleton file not found. ' + 'Run build_program_files script?\n\n' + 'Standalone build failed.\n\n' + ' >>> exiting routine.\n') exit() if os.path.isdir('dill/'): try: os.remove('dill/standalone.pkl') except OSError: pass sdict = pd.read_pickle('dill/dict_settings.pkl') tdict = pd.read_pickle('dill/dict_job_tables.pkl') num_of_job_levels = sdict['num_of_job_levels'] egs = np.unique(ds.eg) start_month = 0 # make prex True or False # (for input to assign_standalone_job_changes function) prex = 'prex' in conditions table = tdict['s_table'] jcnts_arr = tdict['jcnts_arr'] j_changes = tdict['j_changes'] job_change_months = f.get_job_change_months(j_changes) job_reduction_months = f.get_job_reduction_months(j_changes) # sort the skeleton by employee group, month, and index # (preserves each group's list order) ds.sort_values(['eg', 'mnum', 'idx']) ds_dict = {} short_ds_dict = {} for grp in egs: ds_dict[grp] = ds[ds.eg == grp].copy() for grp in egs: short_ds_dict[grp] = ds_dict[grp][ds_dict[grp].mnum == 0].copy() ds = pd.DataFrame() for eg in egs: df_long = ds_dict[eg] df_short = short_ds_dict[eg] jcnts = jcnts_arr[0][eg - 1] short_len = len(df_short) # ORIG_JOB* cmonths_this_ds = \ f.career_months(df_short, sdict['starting_date']) this_ds_nonret_each_month = f.count_per_month(cmonths_this_ds) high_limits = this_ds_nonret_each_month.cumsum() low_limits = f.make_lower_slice_limits(high_limits) all_months = np.sum(this_ds_nonret_each_month) this_eg_table = f.add_zero_col(table[0][eg - 1]) this_eg_month_counts = table[1][eg - 1] df_align_cols = ['fur'] if 'sg' in df_long: df_align_cols.append('sg') df_align = df_long[df_align_cols] # pre-existing employee group special job assignment is included within # the job assignment function below... results = f.assign_standalone_job_changes(eg, df_align, low_limits, high_limits, all_months, this_eg_table, this_eg_month_counts, this_ds_nonret_each_month, job_change_months, job_reduction_months, start_month, sdict, tdict, apply_sg_cond=prex) jnums = results[0] count_col = results[1] held = results[2] fur = results[3] orig_jobs = results[4] # HELD JOB # job from previous month df_long['held'] = held # JOB_COUNT df_long['job_count'] = count_col # ORIG_JOB df_short['orig_job'] = orig_jobs df_long['orig_job'] = df_short['orig_job'] # ASSIGN JOBS - (stovepipe method only since only # assigning within each employee group separately) # JNUM df_long['jnum'] = jnums # SNUM, SPCNT, LNUM, LSPCNT monthly_job_counts = table[1][eg - 1] lspcnt_calc = sdict['lspcnt_calc_on_remaining_population'] df_long['snum'], df_long['spcnt'], \ df_long['lnum'], df_long['lspcnt'] = \ f.create_snum_and_spcnt_arrays(jnums, num_of_job_levels, this_ds_nonret_each_month, monthly_job_counts, lspcnt_calc) # RANK in JOB df_long['rank_in_job'] = \ df_long.groupby(['mnum', 'jnum']).cumcount() + 1 # JOBP # make last percentage position in each job category .99999 vs 1.0 # so that jobp calculations are correct jpcnt = (df_long.rank_in_job / df_long.job_count).values np.put(jpcnt, np.where(jpcnt == 1.0)[0], .99999) df_long['jobp'] = df_long['jnum'] + jpcnt # PAY - merge with pay table - provides monthly pay if sdict['compute_pay_measures']: if sdict['discount_longev_for_fur']: # skel provides non-discounted scale data # flip ones and zeros... df_long['non_fur'] = 1 - fur df_long['fur'] = fur non_fur = \ (df_long.groupby([pd.Grouper('empkey')]) ['non_fur'].cumsum().values) df_long.pop('non_fur') starting_mlong = df_long.s_lmonths.values cum_active_months = non_fur + starting_mlong df_long['mlong'] = cum_active_months df_long['ylong'] = df_long['mlong'] / 12 df_long['scale'] = \ np.clip((cum_active_months / 12) + 1, 1, sdict['top_of_scale']).astype(int) # SCALE df_pt_index = pd.DataFrame(index=(df_long['scale'] * 100) + df_long['jnum'] + (df_long['year'] * 100000)) if sdict['enhanced_jobs']: df_pt = pd.read_pickle('dill/pay_table_enhanced.pkl') else: df_pt = pd.read_pickle('dill/pay_table_basic.pkl') df_pt_index['monthly'] = df_pt['monthly'] df_long['monthly'] = df_pt_index.monthly.values # MPAY # adjust monthly pay for any raise and last month pay percent if # applicable df_long['mpay'] = ((df_long['pay_raise'] * df_long['mth_pcnt'] * df_long['monthly'])) / 1000 df_long.pop('monthly') # CPAY df_long['cpay'] = df_long.groupby('idx')['mpay'].cumsum() ds = pd.concat([ds, df_long], ignore_index=True) ds.sort_values(by=['mnum', 'idx'], inplace=True) ds.set_index('empkey', drop=False, verify_integrity=False, inplace=True) # CAT_ORDER # global job ranking if sdict['compute_job_category_order']: table = tdict['table'] ds['cat_order'] = f.make_cat_order(ds, table[0]) # save to file if sdict['save_to_pickle']: ds.to_pickle('dill/standalone.pkl')
short_ds_dict[grp] = ds_dict[grp][ds_dict[grp].mnum == 0].copy() ds = pd.DataFrame() for i in egs - 1: df_long = ds_dict[i + 1] df_short = short_ds_dict[i + 1] jcnts = jcnts_arr[0][i] short_len = len(df_short) # ORIG_JOB* cmonths_this_ds = f.career_months_df_in(df_short) this_ds_nonret_each_month = f.count_per_month(cmonths_this_ds) uppers = this_ds_nonret_each_month.cumsum() lowers = f.make_lower_slice_limits(uppers) all_months = np.sum(this_ds_nonret_each_month) this_table = table[0][i] this_month_counts = table[1][i] df_align_cols = ['fur'] if 'sg' in df_long: df_align_cols.append('sg') df_align = df_long[df_align_cols] fur_codes = np.array(df_align.fur) # pre-existing employee group special job assignment is included within # the job assignment function below...