def check_parameters_historical(dict_input_historical): ''' Check the input parameters for historical. ''' # Check that all keys exist keys = dict_input_historical.keys() input_sheets = [ 'Readme', 'Input_parameters', 'Partition_groups', 'Data', 'Processing_Log' ] for key in keys: val = dict_input_historical[key] if key.find('sheet_') != -1: if val is None: if key == 'sheet_quantreg': dict_input_historical[key] = 'Quant reg coefficients' elif key == 'sheet_cond_quant': dict_input_historical[key] = 'Conditional quantiles' elif key == 'sheet_historical': dict_input_historical[key] = 'Historical distribution' else: message = 'No sheet called ' + key + ' should exist' show_message(message, halt=True) else: # Check that the specified sheetname is not one of the inputs # (it is OK that it is the same name as an existing sheet if # that sheet is an output) if val in input_sheets: message = key + ' specified as ' + val + ', cannot be the same as necessary input sheet' show_message(message, halt=True)
def do_partition(debug=False): ''' Entry point function called when button for partitions is called. This function cannot take in any arguments and can not have any return values due to limitation of the RunPython VBA code in xlwings. ''' # Start measurement of time t0 = time.time() # Make sure a wb exists if wb is None: print('partition_main: wb is None') print('This may be due to not calling set_mock_caller_file') print('and setting the caller Workbook') import sys sys.exit(-1) if debug: print('+' * 40) print('start of do_partition') print('+' * 40) # Call prerun if debug: print('---- calling prerun_partition') dict_input_partition, dict_groups, df_partition = prerun_partition( debug=debug) # Call main run if debug: print('---- calling run_partition') dict_output_partition = run_partition(dict_input_partition, dict_groups, df_partition, debug=debug) # Call postrun if debug: print('---- calling postrun_partition') postrun_partition(dict_output_partition, debug=debug) # End measurement of time t1 = time.time() # Total time for this operation (formatted string) tdiff = "{:.1f}".format(t1 - t0) sheets = [ dict_output_partition[key] for key in dict_output_partition if key.find('sheet') != -1 and key != 'sheet_input' ] message = 'Finished with partition in ' + tdiff + ' sec,\n' message += 'output is in sheets ' + ', '.join(sheets) show_message(message, msgtype='info')
def prerun_scenario(debug=False): ''' Prerun function for step 2, quantfit. This function cannot take in any arguments due to limitations of the RunPython VBA code in xlwings. Check that the necessary steps beforehand have been done. Read in/check the input parameters and return a dict for input parameters and a df for data. ''' # Check that the necessary steps beforehand have been done. # -------------------------- # Read in parameters # -------------------------- dict_input_scenario = read_parameters_scenario() # -------------------------- # Check parameter values # -------------------------- check_parameters_scenario(dict_input_scenario) # Read in global parameters # (this also checks if values have changed since being initially set) dict_global_params = read_parameters_global() #print(dict_input_scenario) #print(dict_global_params) # Add each key, val from dict_global_params to dict_input_quantfit for key, val in dict_global_params.items(): # Check that the keys do not clash if key in dict_input_scenario: message = 'dict_input_quantfit should not have key ' + key + ' that is common with dict_global_params' show_message(message, halt=True) dict_input_scenario[key] = val # Create df for data input_sheetnames = [ dict_input_scenario['sheet_partition'], dict_input_scenario['sheet_quantreg'], dict_input_scenario['sheet_cond_quant'] ] sheet_partition = dict_input_scenario['sheet_partition'] df_scenario_collections = read_data_scenario(input_sheetnames) df_scenario_collections['Data'] = df_scenario_collections['Data'][ (df_scenario_collections['Data'].index >= df_scenario_collections[sheet_partition]['date'].values[0]) & (df_scenario_collections['Data'].index <= df_scenario_collections[sheet_partition]['date'].values[-1])] # return a dict for input parameters and a df return dict_input_scenario, df_scenario_collections
def do_quantfit(path='.', debug=False): ''' Entry point function called when button for quantile fits is called. This function cannot take in any arguments and can not have any return values due to limitation of the RunPython VBA code in xlwings. ''' # Start measurement of time t0 = time.time() if debug: print('+' * 40) print('start of do_quantfit') print('+' * 40) # Call prerun if debug: print('---- calling prerun_quantfit') dict_input_quantfit, df_quantfit = prerun_quantfit(debug=debug) if debug: print('dict_input_quantfit:') for key in dict_input_quantfit: print(key.ljust(20) + ':' + str(dict_input_quantfit[key])) print('df_quantfit:') print(df_quantfit) # Call main run if debug: print('---- calling run_quantfit') dict_output_quantfit = run_quantfit(dict_input_quantfit, df_quantfit, debug=debug) # Call postrun if debug: print('---- calling postrun_quantfit') postrun_quantfit(dict_output_quantfit, path=path, debug=debug) # End measurement of time t1 = time.time() # Total time for this operation (formatted string) tdiff = "{:.1f}".format(t1 - t0) sheets = [ dict_output_quantfit[key] for key in dict_output_quantfit if key.find('sheet') != -1 ] message = 'Finished with quantfit in ' + tdiff + ' sec,\n' message += 'output is in sheets ' + ', '.join(sheets) show_message(message)
def prerun_quantfit(debug=False): ''' Prerun function for step 2, quantfit. This function cannot take in any arguments due to limitations of the RunPython VBA code in xlwings. Check that the necessary steps beforehand have been done. Read in/check the input parameters and return a dict for input parameters and a df for data. ''' if debug: print('=' * 30) print('start of prerun_quantfit') print('=' * 30) # Keys for input parameter dict keys = [ 'quantlist', 'regressors', 'sheet_input', 'sheet_quantreg', 'sheet_cond_quant' ] # Check that the necessary steps beforehand have been done. # -------------------------- # Read in parameters # -------------------------- dict_input_quantfit = read_parameters_quantfit() # -------------------------- # Check parameter values # -------------------------- check_parameters_quantfit(dict_input_quantfit, keys) # Read in global parameters # (this also checks if values have changed since being initially set) dict_global_params = read_parameters_global() # Add each key, val from dict_global_params to dict_input_quantfit for key, val in dict_global_params.items(): # Check that the keys do not clash if key in dict_input_quantfit: message = 'dict_input_quantfit should not have key ' + key + ' that is common with dict_global_params' show_message(message, halt=True) dict_input_quantfit[key] = val # Create df for data input_sheetname = dict_input_quantfit['sheet_input'] df_quantfit = read_data_quantfit(input_sheetname) # return a dict for input parameters and a df return dict_input_quantfit, df_quantfit
def check_partition_groups(dict_groups, df_partition): ''' Check that for the list of variables in each group the column exists in df_partition. ''' # Loop over groups for group in dict_groups: # Loop over variables for varname in dict_groups[group]: if varname not in df_partition.columns: message = 'variable ' + varname + ' was specified for group ' + group + ' but does not exist in df_partition' show_message(message, halt=True)
def prerun_tsfit(debug=False): ''' Prerun function for step 3, tsfit. Check that the necessary steps beforehand have been done. Read in/check the input parameters and return a dict for input parameters and a df for data. ''' if debug: print('=' * 30) print('start of prerun_tsfit') print('=' * 30) # Keys for input parameter dict keys = ['latest_date', 'fit_params', 'sheet_tsfit'] # Read in parameters dict_input_tsfit = read_parameters_tsfit() # Check parameter values check_parameters_tsfit(dict_input_tsfit, keys) # Read in global parameters # (this also checks if values have changed since being initially set) dict_global_params = read_parameters_global() # Add each key, val from dict_global_params to dict_input_tsfit for key, val in dict_global_params.items(): # Check that the keys do not clash if key in dict_input_tsfit: message = 'dict_input_tsfit should not have key ' + key + ' that is common with dict_global_params' show_message(message, halt=True) dict_input_tsfit[key] = val # Create df for data input_sheetnames = [ dict_input_tsfit['sheet_partition'], dict_input_tsfit['sheet_quantreg'], dict_input_tsfit['sheet_cond_quant'] ] df_tsfit_collections = read_data_tsfit(input_sheetnames) # return a dict for input parameters and a df return dict_input_tsfit, df_tsfit_collections
def prerun_segment(debug=False): ''' Prerun function for step 2, quantfit. This function cannot take in any arguments due to limitations of the RunPython VBA code in xlwings. Check that the necessary steps beforehand have been done. Read in/check the input parameters and return a dict for input parameters and a df for data. ''' # Check that the necessary steps beforehand have been done. # -------------------------- # Read in parameters # -------------------------- dict_input_segment = read_parameters_segment() # -------------------------- # Check parameter values # -------------------------- check_parameters_segment(dict_input_segment) # Read in global parameters # (this also checks if values have changed since being initially set) dict_global_params = read_parameters_global() #print(dict_input_segment) #print(dict_global_params) # Add each key, val from dict_global_params to dict_input_quantfit for key, val in dict_global_params.items(): # Check that the keys do not clash if key in dict_input_segment: message = 'dict_input_quantfit should not have key ' + key + ' that is common with dict_global_params' show_message(message, halt=True) dict_input_segment[key] = val # Create df for data df_segment_collections = read_data_segment() # return a dict for input parameters and a df return dict_input_segment, df_segment_collections
def read_data_partition(): ''' Read in the input data for partition. For partition, all data should be in the sheet called "Data". Another sheet, "Partition_groups" should ''' # Get the sheets in the wb sheetnames = [sheet.name for sheet in wb.sheets] # Make sure that Data sheet exists if 'Data' not in sheetnames: message = 'Sheet named Data does not exist' show_message(message, halt=True) # Read in the Data sheet as a df df_partition = wb.sheets['Data'].range('A1').options(pd.DataFrame, index=False, expand='table').value colset = set() dupset = set() for e in df_partition.columns: if e not in colset: colset.add(e) else: dupset.add(e) if len(dupset) > 0: dlist = list(dupset) dstr = ','.join(dlist) message = 'Duplicate variables ' + dstr + ' in datasheet, please check.' show_message(message, halt=True) # Set index to date df_partition.index = df_partition['date'] df_partition.index.name = None # TODO: set index to PeriodIndex return df_partition
def do_historical(debug=False): ''' Entry point function called when button for cenario fits is called. This function cannot take in any arguments and can not have any return values due to limitation of the RunPython VBA code in xlwings. ''' t0 = time.time() # Make sure a wb exists if wb is None: print('historical_main: wb is None') print('This may be due to not calling set_mock_caller_file') print('and setting the caller Workbook') import sys sys.exit(-1) else: print(wb) dict_input_historical, df_historical = prerun_historical(debug=debug) dict_output_historical = run_historical(dict_input_historical, df_historical, debug=debug) postrun_historical(dict_output_historical, debug=debug) # End measurement of time t1 = time.time() # Total time for this operation (formatted string) tdiff = "{:.1f}".format(t1 - t0) sheetname = dict_output_historical['sheet_historical'] message = 'Finished with historical test in ' + tdiff + ' sec,\n' message += 'output is in sheets ' + ', ' + sheetname show_message(message, msgtype='info')
def check_data_partition(df_partition, dict_input_partition): ''' Check that necessary columns in df_partition are available, and also check that all columns are numeric. ''' # -------------------------- # The parameter # dict_input_partition['target'] # need to be columns in df_partition # -------------------------- for key in ['target']: col = dict_input_partition[key] if col not in df_partition.columns: message = 'col ' + col + ' for key ' + key + ' not in columns of df_partition' show_message(message, halt=True) # -------------------------- # If any of the columns in # df_partition are not numeric # raise an error since the user # may have included invalid text # -------------------------- # Global check of col types coltypes = set(df_partition.dtypes.values) if np.dtype('O') in coltypes: # If we find an object type in the columns, # find which ones they are obj_cols = [] for col in df_partition.columns: if df_partition[col].dtype == np.dtype('O'): obj_cols.append(col) message = 'The following columns were not numeric types\n' message += 'This may be due to the data containing characters\n' message += 'Please remove all characters and run again\n' message += ', '.join(obj_cols) show_message(message, halt=True)
def run_partition(dict_input_partition, dict_groups, df_partition, debug=False): ''' Main run function for step 1, partition. Takes in as arguments a dict for input parameters and a df for data. Outputs a dict for output parameters. Does partitioning and returns a dict of output parameters. ** This function should be independent of any Excel input/output and be executable as a regular Python function independent of Excel. ** ''' if debug: print('=' * 30) print('start of run_partition') print('=' * 30) # Show input parameters print('dict_input_partition:') for key in dict_input_partition: print(key.ljust(30) + ':' + str(dict_input_partition[key])) print('dict_groups:') for key in dict_groups: print(key.ljust(30) + ':' + str(dict_groups[key])) print('df_partition:') print(df_partition) warnings.filterwarnings("ignore") # ------------------------ # Create DataFrame for log # ------------------------ log_frame = pd.DataFrame(columns=['Time', 'Action']) # ------------------------ # Create output dict # ------------------------ dict_output_partition = dict() # ------------------------ # Copy the output sheet names # from dict_input_partition # ------------------------ for key in dict_input_partition: if key.find('sheet_') != -1: dict_output_partition[key] = dict_input_partition[key] #print(key, dict_output_partition[key] , dict_input_partition[key]) # ------------------------ # Get parameters from # dict_input_partition # ------------------------ sdate = dict_input_partition['sdate'] edate = dict_input_partition['edate'] horizon = dict_input_partition['horizon'] tdep = dict_input_partition['target'] + '_hz_' + str(horizon) df_partition = df_partition.set_index(df_partition['date'], drop=False) method = dict_input_partition['method'] benchcutoff = dict_input_partition['pcutoff'] rgdp = dict_input_partition['target'] # column name for real GDP method_growth = dict_input_partition['method_growth'] PLStarget = dict_input_partition['PLS_target'] # ------------------------ # Run the partition # ------------------------ retroframe, retroload, logretro, exitcode = partition_retro( dall=df_partition, groups_dict=dict_groups, tdep=tdep, rgdp=rgdp, method_growth=method_growth, horizon=horizon, method=method, sdate=sdate, edate=edate, benchcutoff=benchcutoff, PLStarget=PLStarget) log_frame = log_frame.append(logretro, ignore_index=True) if exitcode == -1: message = 'In the given time period some groups are complete empty. No feasible partition can be made. Please adjust partition groups or start date' show_message(message, halt=True) # Add return values figs = {} #print(list(dict_groups.keys())) figs = partition_plot(df_partition, retroframe, retroload, list(dict_groups.keys()), PLStarget, tdep, method) dict_output_partition['frame'] = retroframe dict_output_partition['loading'] = retroload dict_output_partition['log'] = logretro dict_output_partition['figs'] = figs dict_output_partition['groups'] = list(dict_groups.keys()) dict_output_partition['method'] = method return dict_output_partition
def check_parameters_quantfit(dict_input_quantfit, keys): ''' Check the input parameters for quantfit. ''' # Check that all keys exist for key in keys: if key not in dict_input_quantfit: message = 'key ' + key + ' not found in dict_input_quantfit' show_message(message) input_sheets = [ 'Readme', 'Input_parameters', 'Partition_groups', 'Data', 'Processing_Log' ] for key in keys: val = dict_input_quantfit[key] if key == 'quantlist': # Check that all values are between 0 and 1, # and that the necessary values of 0.10, 0.25, 0.50, 0.75, 0.90 exist vals_np = np.array(val) # create np.array for value checking if not (np.all(0 < vals_np) and np.all(vals_np < 1)): message = 'All values of quantlist must be between 0 and 1' message += 'Given values: ' + str(val) show_message(message, halt=True) # Check that necessary values are present necessary_vals = [0.10, 0.25, 0.50, 0.75, 0.90] for _val in necessary_vals: if _val not in val: message = 'Value of ' + str( _val) + ' must be included in quantlist' message += 'Given values: ' + str(val) show_message(message, halt=True) if key == 'regressors': # val is a dict of dicts with keys [regressor]['transform/option'] for regressor in val: transform = val[regressor]['transform'] option = val[regressor]['option'] # Check that transform is a valid value from the pulldown menu if transform not in [ 'None', 'Lagged', 'MVA', 'Power', 'Diff', 'ChangeRate' ]: message = 'transform for ' + regressor + ' was not a valid option, given ' + transform show_message(message, halt=True) # If 'No transformation' or 'Log' was chosen, make sure no option was given if transform in ['None']: if option is not None: message = 'option for regressor = ' + regressor + ' with transform of ' + transform + ' must not have option set' show_message(message, halt=True) # If 'Lagged' or 'Moving Average' was chosen, make sure lag exists and is an int if transform in [ 'Lagged', 'MVA', 'Power', 'Diff', 'ChangeRate' ]: if type(option) != float or abs(int(option) - option) > 1E-5: message = 'option for regressor = ' + regressor + ' with transform of ' + transform + ' must have option of int, given ' + str( option) show_message(message, halt=True) # Since the value is less than 1E-5 away from an int, # convert to int so that there are no problems later dict_input_quantfit['regressors'][regressor][ 'option'] = int(option) if key == 'sheet_input': # If nothing had been specified in the cell containing the partition output sheet, # set to default if val is None: sheetname = 'Output_partitions' dict_input_quantfit[key] = sheetname # Get existing sheetnames sheetnames = [sheet.name for sheet in wb.sheets] if sheetname not in sheetnames: message = 'Input sheet for quantfit: ' + sheetname + ' does not exist' show_message(message, halt=True) elif key.find('sheet_') != -1: # If a value was specified, check that it is not one of the # input sheet names and use it as the output sheet name. # Otherwise we will use the default 'Output_quantfits' if val is None: if key == 'sheet_quantreg': dict_input_quantfit[key] = 'Quant reg coefficients' elif key == 'sheet_cond_quant': dict_input_quantfit[key] = 'Conditional quantiles' else: message = 'No sheet called ' + key + ' should exist' show_message(message, halt=True) else: # Check that the specified sheetname is not one of the inputs # (it is OK that it is the same name as an existing sheet if # that sheet is an output) if val in input_sheets: message = key + ' specified as ' + val + ', cannot be the same as necessary input sheet' show_message(message, halt=True)
def ffill_values(wb, df, method='ffill', limit=None, debug=False): ''' Forward fill (ffill) missing latest values. Input: df : DataFrame for data wb : workbook to be modified limit : Limit on consecutive values to fill ''' if debug: print('-' * 20 + ' start of ffill_values') # Make a copy so we can compare to the original _df = df.copy() if debug: print('_df before ffill:') print(_df) # Since df has dates before dict_input_partition['sdate'] sliced off, # we need an offset for the row number daterange = wb.sheets['Data'].range('A1').expand( 'down').value # this is a list # Find the index where _df.index[0] is try: offset = daterange.index(_df.index[0]) except ValueError: message = 'Could not find ' + str( _df.index[0]) + ' in range of sheet Data starting at A1' show_message(message, halt=True) if debug: print('_df.index[0] = ' + str(_df.index[0])) print('offset = ' + str(offset)) print('_df.index[:20]:') print(_df.index[:20]) print('daterange[:20]:') print(daterange[:20]) # range for all values in sheet Data range = wb.sheets['Data'].range('A1').expand() # For each column fill missing values but not latest missing values for icol, col in enumerate(_df): s = df[col] # original _s = _df[col] # copy that gets filled # Skip dtypes that are not floats. # This is because the original data includes columns for isocodes if df[col].dtype not in [float, np.float64, np.float32]: if debug: print('skipping col ' + col + ' due to dtype being ' + df[col].dtype.name) continue # If doing ffill, we don't want the initial missing values, only the final missing values # Use the first_valid_index to restrict the range first_index = s.first_valid_index() if debug: print('first_index = ' + str(first_index)) # If doing bfill, we don't want the final missing values, only the initial missing values # Use the last_valid_index to restrict the range last_index = s.last_valid_index() if debug: print('last_index = ' + str(last_index)) # Do ffill if method == 'ffill': _s[first_index:] = _s[first_index:].fillna(method=method, limit=limit) # Get all values that have nan as the difference # between the original and the interpolated copy filled = _s[first_index:][(s[first_index:] - _s[first_index:]).isnull() == True] elif method == 'bfill': _s[:last_index] = _s[:last_index].fillna(method=method, limit=limit) # Get all values that have nan as the difference # between the original and the interpolated copy filled = _s[:last_index][(s[:last_index] - _s[:last_index]).isnull() == True] else: message = 'Function ffill_values cannot take in method ' + method show_message(message, halt=True) #if debug: if len(filled) > 0: print('values that have been filled in ffill:') print(filled) # Use the index and col name to fill the Excel sheet with the interpolated values # Get the index location for ind in filled.index: irow = _df.index.get_loc(ind) # Get the cell corresponding to the value we want to fill. # Note that depending on whether the date is a column or not, we need to add 1 to the column number, # and that the row has an offset determined at the beginning of this function cell = range[irow + offset, icol] # if debug: print('Filling in row ' + str(cell.row) + ' column ' + str(cell.column)) print('with value ' + str(_s.loc[ind])) # Fill the sheet with the interpolated values cell.value = _s.loc[ind] # Set the font color to red # 3 is red # 4 is green # 5 is blue # 6 is yellow # 7 is magenta cell.api.Font.ColorIndex = 3 # end of loop over index in filled # end of loop over index of filled # end of loop over columns in _df # Return the updated data return _df
def interpolate_missing_values(wb, df, debug=False): ''' Interpolate missing in-between values. Input: df : DataFrame to be filled wb : workbook to be modified ''' if debug: print('-' * 20 + ' start of interpolate_missing_values') # Make a copy so we can compare to the original _df = df.copy() if debug: print('Total of ' + str(len(df.columns)) + ' cols') print('_df before interpolate_missing_values:') print(_df) # Since df has dates before dict_input_partition['sdate'] sliced off, # we need an offset for the row number daterange = wb.sheets['Data'].range('A1').expand( 'down').value # this is a list # Find the index where _df.index[0] is try: offset = daterange.index(_df.index[0]) except ValueError: message = 'Could not find ' + str( _df.index[0]) + ' in range of sheet Data starting at A1' show_message(message, halt=True) if debug: print('_df.index[0] = ' + str(_df.index[0])) print('offset = ' + str(offset)) print('_df.index[:20]:') print(_df.index[:20]) print('daterange[:20]:') print(daterange[:20]) # range for all values in sheet Data range = wb.sheets['Data'].range('A1').expand() # For each column fill missing values but not latest missing values for icol, col in enumerate(_df.columns): s = df[col] # original _s = _df[col] # copy that gets filled # Skip dtypes that are not floats. # This is because the original data includes columns for isocodes if df[col].dtype not in [float, np.float64, np.float32]: if debug: print('skipping col ' + col + ' due to dtype being ' + df[col].dtype.name) continue # Get last valid index value last_index = _s.last_valid_index() # Do interpolate up to last valid index so we interpolate all missing values _s[:last_index] = _s[:last_index].interpolate(method='linear') # We don't want the initial missing values, only values missing in between. # Use the first_valid_index to restrict the range first_index = s.first_valid_index() # Get all values that have nan as the difference # between the original and the interpolated copy filled = _s[first_index:last_index][( s[first_index:last_index] - _s[first_index:last_index]).isnull() == True] if debug: if len(filled) > 0: print('values that have been filled in interpolate:') print(filled) # Use the index and col name to fill the Excel sheet with the interpolated values for ind in filled.index: # Get the index location irow = _df.index.get_loc(ind) # Get the cell corresponding to the value we want to fill. # Note that depending on whether the date is a column or not, we need to add 1 to the column number, # and that the row has an offset determined at the beginning of this function cell = range[irow + offset, icol] if debug: print('Filling in row ' + str(cell.row) + ' column ' + str(cell.column)) print('with value ' + str(_s.loc[ind])) # Fill the sheet with the interpolated values cell.value = _s.loc[ind] # Set the font color to blue # 3 is red # 4 is green # 5 is blue cell.api.Font.ColorIndex = 5 # end of loop over index in filled # end of loop over index of filled # end of loop over columns in _df # Return the updated data return _df
def postrun_quantfit(dict_output_quantfit, debug=False): ''' Postrun function for step 2, quantfit. Takes as input dict from main run function. This function cannot return any values due to limitations of the RunPython VBA code in xlwings. ''' if debug: print('=' * 30) print('start of postrun_quantfit') print('=' * 30) # Create DataFrame for log log_frame = pd.DataFrame(columns=['Time', 'Action']) # Create the output sheets sheetvars = [ key for key in dict_output_quantfit if key.find('sheet') != -1 ] for sheetvar in sheetvars: # Don't do anything for the input sheet if sheetvar == 'sheet_input': continue # Check that sheetvar exists as a key in dict_output_quantfit if sheetvar not in dict_output_quantfit: message = 'sheetvar ' + sheetvar + ' is not a key for dict_output_quantfit' show_message(message, halt=True) # Get the actual sheet name sheetname = dict_output_quantfit[sheetvar] # Get existing sheetnames sheetnames = [sheet.name for sheet in wb.sheets] try: # Clear the sheet if it already exists if sheetname in sheetnames: wb.sheets[sheetname].clear() action = 'Cleared sheet ' + sheetname # Otherwise add it after the "Data" sheet else: wb.sheets.add(sheetname, after='Data') # Set output sheet colors to blue wb.sheets[sheetname].api.Tab.ColorIndex = 23 action = 'Created sheet ' + sheetname except: action = 'Unable to access sheet ' + sheetname # Add to log tn = date.now().strftime('%Y-%m-%d %H:%M:%S') log = pd.Series({'Time': tn, 'Action': action}) log_frame = log_frame.append(log, ignore_index=True) # end of loop over output sheetvars # Write out quantfit results try: for sheetvar in sheetvars: sheetname = dict_output_quantfit[sheetvar] if sheetvar == 'sheet_quantreg': wb.sheets[sheetname].range('A1').options( index=False).value = dict_output_quantfit['qcoef'] wb.sheets[sheetname].autofit() elif sheetvar == 'sheet_cond_quant': wb.sheets[sheetname].range('A1').options( index=True).value = dict_output_quantfit['cond_quant'] wb.sheets[sheetname].autofit() action = 'Quantfit results saved succesfully.' except: action = 'Unable to output quantfit results.' print(action) sheetname = dict_output_quantfit['sheet_quantreg'] try: wb.sheets[sheetname].pictures[0].delete() except: pass sheet = wb.sheets[sheetname] fig = dict_output_quantfit['figs'] # Set the path of the output file to be in the same dir as the # calling Excel file fullpath = os.path.abspath(os.path.dirname(wb.fullname) + '/figures') if not os.path.isdir(fullpath): os.makedirs(fullpath) outfilename = fullpath + '\\quantfit_' + date.now().strftime( '%Y_%m-%d@%H_%M-%S') + '.png' fig.savefig(outfilename) cr = len(dict_output_quantfit['regressors'].keys()) try: pic = sheet.pictures.add(fig, name='MyPlot_q', update=True, left=sheet.range('N6').left, top=sheet.range('N6').top, height=340 * (cr // 4 + 1), width=240 * (min(4, cr + 1))) pic.height = 340 * (cr // 4 + 1) pic.width = 240 * (min(4, cr + 1)) action = 'Quantile figure saved' except: action = 'Unable to add figure to sheet ' + sheetname # Add to log tn = date.now().strftime('%Y-%m-%d %H:%M:%S') log = pd.Series({'Time': tn, 'Action': action}) log_frame = log_frame.append(log, ignore_index=True) # Write out log_frame add_logsheet(wb, log_frame, colnum=3)
def gen_relation(shockdict, partition_groups, df_data, df_partition): df_shockedvar = pd.DataFrame(index=df_data.index) df_shockedgrp = df_partition.copy() for group in df_shockedgrp.columns: df_shockedgrp[group + '_shocked'] = df_shockedgrp[group] for var, shock in shockdict.items(): ct = 0 if shock['shocktype'] == 'By +/- STD': if var in partition_groups.keys(): std = np.nanstd(df_shockedgrp[var]) else: df_shockedvar[var] = df_data[var] std = np.nanstd(df_data[var].values) df_shockedvar[var + '_shocked'] = df_shockedvar[ var] + std * shock['shockvalue'] elif shock['shocktype'] == 'By +/- percentage' and ( var not in partition_groups.keys()): df_shockedvar[var] = df_data[var] df_shockedvar[ var + '_shocked'] = df_shockedvar[var] * (1 + shock['shockvalue']) for group, compvars in partition_groups.items(): if var in compvars and var in partition_groups.keys(): print(var + ' is not well defined.') message = var + ' is in partition groups and also a group name. Please Check' show_message(message, halt=False) if var in compvars: ct += 1 df_var = df_data[['date', var]].dropna() df_part = df_partition[['date', group]].dropna() sdate = max(min(df_var['date'].values), (min(df_part['date'].values))) edate = min(max(df_var['date'].values), (max(df_part['date'].values))) df_var = df_var[(df_var['date'] >= sdate) & (df_var['date'] <= edate)] df_part = df_part[(df_part['date'] >= sdate) & (df_part['date'] <= edate)] cov = np.corrcoef(df_var[var].values, df_part[group].values)[0][1] #cov=np.cov(df_data[var].values,df_partition[group])[0][1] print(group, cov, var, shock['shocktype'], shock['shockvalue']) if shock['shocktype'] == 'By +/- STD': df_shockedgrp[group + '_shocked'] = df_shockedgrp[ group + '_shocked'] + std * shock['shockvalue'] * cov elif shock['shocktype'] == 'By +/- percentage': df_shockedgrp[group + '_shocked'] = df_shockedgrp[ group + '_shocked'] + df_data[var] * shock['shockvalue'] * cov elif var == group: ct += 1 print(group, var, shock['shocktype'], shock['shockvalue']) if shock['shocktype'] == 'By +/- STD': df_shockedgrp[group + '_shocked'] = df_shockedgrp[ group + '_shocked'] + std * shock['shockvalue'] elif shock['shocktype'] == 'By +/- percentage': df_shockedgrp[group + '_shocked'] = df_shockedgrp[ group + '_shocked'] + df_shockedgrp[group] * shock['shockvalue'] if ct == 0: print(var + ' not in any group.') message = var + ' not in any partition groups. Please Check' show_message(message, halt=False) #print(df_shockedgrp.head()) return df_shockedvar, df_shockedgrp
def postrun_quantfit(dict_output_quantfit, path='.', debug=False): ''' Postrun function for step 2, quantfit. Takes as input dict from main run function. This function cannot return any values due to limitations of the RunPython VBA code in xlwings. ''' if debug: print('=' * 30) print('start of postrun_quantfit') print('=' * 30) # Create DataFrame for log log_frame = pd.DataFrame(columns=['Time', 'Action']) # Create the output sheets sheetvars = [ key for key in dict_output_quantfit if key.find('sheet') != -1 ] for sheetvar in sheetvars: # Don't do anything for the input sheet if sheetvar == 'sheet_input': continue # Check that sheetvar exists as a key in dict_output_quantfit if sheetvar not in dict_output_quantfit: message = 'sheetvar ' + sheetvar + ' is not a key for dict_output_quantfit' show_message(message, halt=True) # Get the actual sheet name sheetname = dict_output_quantfit[sheetvar] # Get existing sheetnames sheetnames = [sheet.name for sheet in wb.sheets] try: # Clear the sheet if it already exists if sheetname in sheetnames: wb.sheets[sheetname].clear() action = 'Cleared sheet ' + sheetname # Otherwise add it after the "Data" sheet else: wb.sheets.add(sheetname, after='Data') # Set output sheet colors to blue wb.sheets[sheetname].api.Tab.Colorindex = 23 action = 'Created sheet ' + sheetname except: action = 'Unable to access sheet ' + sheetname # Add to log tn = date.now().strftime('%Y-%m-%d %H:%M:%S') log = pd.Series({'Time': tn, 'Action': action}) log_frame = log_frame.append(log, ignore_index=True) # end of loop over output sheetvars # Write out quantfit results try: for sheetvar in sheetvars: sheetname = dict_output_quantfit[sheetvar] if sheetvar == 'sheet_quantreg': wb.sheets[sheetname].range('A1').options( index=False).value = dict_output_quantfit['qcoef'] elif sheetvar == 'sheet_cond_quant': wb.sheets[sheetname].range('A1').options( index=True).value = dict_output_quantfit['cond_quant'] elif sheetvar == 'sheet_local_proj': wb.sheets[sheetname].range('A1').options( index=False).value = dict_output_quantfit['localprj'] action = 'Quantfit results saved succesfully.' except: action = 'Unable to output quantfit results.' print(action) sheetname = dict_output_quantfit['sheet_quantreg'] try: wb.sheets[sheetname].pictures[0].delete() except: pass sheet = wb.sheets[sheetname] fig = dict_output_quantfit['figs'] fig.savefig(path + '\\quantfit' + date.now().strftime('%Y%m%d-%H-%M') + '.png') try: sheet.pictures.add(fig, name='MyPlot_q', update=True, left=sheet.range('L7').left, top=sheet.range('L7').top, height=260, width=1040) action = 'Quantile figure saved' except: action = 'Unable to add figure to sheet ' + sheetname # Add to log tn = date.now().strftime('%Y-%m-%d %H:%M:%S') log = pd.Series({'Time': tn, 'Action': action}) log_frame = log_frame.append(log, ignore_index=True) # Write out log_frame add_logsheet(wb, log_frame, colnum=3)
def check_parameters_tsfit(dict_input_tsfit, keys): ''' Check the input parameters for tsfit. ''' # Check that all keys exist for key in keys: if key not in dict_input_tsfit: message = 'key ' + key + ' not found in dict_input_tsfit' show_message(message) input_sheets = [ 'Readme', 'Input_parameters', 'Partition_groups', 'Data', 'Processing_Log' ] for key in dict_input_tsfit: val = dict_input_tsfit[key] if key == 'latest_date': if type(val) != date: message = 'edate = ' + str( val) + ' was not a datetime.datetime object' show_message(message) # the range of the date is checked in Excel and is not checked here elif key.find('sheet_') != -1: if val is None: if key == 'sheet_quantreg': dict_input_tsfit[key] = 'Quant reg coefficients' elif key == 'sheet_cond_quant': dict_input_tsfit[key] = 'Conditional quantiles' elif key == 'sheet_local_proj': dict_input_tsfit[key] = 'Local projections' elif key == 'sheet_partition': dict_input_tsfit[key] = 'Output_partitions' elif key == 'sheet_tsfit': if dict_input_tsfit['fit_params'][ 'fittype'] == 'Asymmetric T': dict_input_tsfit[key] = 'Asymmetric T fit' else: dict_input_tsfit[key] = 'T-skew fit' else: message = 'No sheet called ' + key + ' should exist' show_message(message, halt=True) else: # Check that the specified sheetname is not one of the inputs # (it is OK that it is the same name as an existing sheet if # that sheet is an output) if val in input_sheets: message = key + ' specified as ' + val + ', cannot be the same as necessary input sheet' show_message(message, halt=True) elif key == 'regressors': # val is a dict of dicts with keys [regressor]['transform/option'] for regressor in val: transform = val[regressor]['transform'] option = val[regressor]['option'] # Check that transform is a valid value from the pulldown menu if transform not in [ 'None', 'Lagged', 'MVA', 'Power', 'Diff', 'ChangeRate' ]: message = 'transform for ' + regressor + ' was not a valid option, given ' + transform show_message(message, halt=True) # If 'No transformation' or 'Log' was chosen, make sure no option was given if transform in ['None']: if option is not None: message = 'option for regressor = ' + regressor + ' with transform of ' + transform + ' must not have option set' show_message(message, halt=True) # If 'Lagged' or 'Moving Average' was chosen, make sure lag exists and is an int if transform in [ 'Lagged', 'MVA', 'Power', 'Diff', 'ChangeRate' ]: if type(option) != float or abs(int(option) - option) > 1E-5: message = 'option for regressor = ' + regressor + ' with transform of ' + transform + ' must have option of int, given ' + str( option) show_message(message, halt=True) # Since the value is less than 1E-5 away from an int, # convert to int so that there are no problems later dict_input_tsfit['regressors'][regressor]['option'] = int( option) elif key == 'fit_params': # This is a dict with keys for the variable name of each constraint # 'mode', 'dof', 'var', 'skewness', 'var_low', 'var_high', 'skew_low', 'skew_high' # and for each variable name there are 2 keys 'constraint' and 'value' dict_params = val for varname in dict_params: if varname in [ 'dof', 'var_low', 'var_high', 'skew_low', 'skew_high' ]: constraint = dict_params[varname]['constraint'] value = dict_params[varname]['value'] # 'mode' will always be specified if varname == 'mode': # Check that value is from pulldown if constraint not in [ 'Free', 'Fixed', 'Median', 'Mean' ]: message = 'constraint for ' + varname + ' was ' + constraint + ', not in pulldown values' show_message(message, halt=True) else: # For all other varnames, only options are 'Fixed' and 'Free' # Check that value is from pulldown if constraint not in ['Free', 'Fixed', 'Default']: message = 'constraint for ' + varname + ' was ' + constraint + ', not in pulldown values' show_message(message, halt=True) # Check that no value is given when Free if constraint in ['Free', 'Median'] and value is not None: message = 'constraint for ' + varname + ' was ' + constraint + ' so value cannot be given as ' + str( value) show_message(message, halt=True) # Check that value is a float if constraint in ['Fixed'] and type(value) != float: message = 'If constraint for ' + varname + ' is ' + constraint + ', value must be float, given as ' + str( value) show_message(message, halt=True) elif varname == 'qsmooth': if dict_params[varname]['option'] != 'None' and dict_params[ varname]['period'] is None: message = 'Please provide period number for quantile smooth.' show_message(message, halt=True) elif varname == 'fittype': if dict_params[varname] != 'Asymmetric T' and dict_params[ varname] != 'T-skew': message = 'Not valid skewed T distribution option.' show_message(message, halt=True)
def check_parameters_segment(dict_input_segment): ''' Check the input parameters for segment. ''' # Check that all keys exist keys = dict_input_segment.keys() input_sheets = [ 'Readme', 'Input_parameters', 'Partition_groups', 'Data', 'Processing_Log' ] for key in keys: val = dict_input_segment[key] if key == 'freq' and val not in ['Monthly', 'Quarterly', 'Yearly']: message = 'freq = ' + val + ' was not a valid value' show_message(message) if key == 'sdate': if type(val) != date: message = 'sdate = ' + str( val) + ' was not a datetime.datetime object' show_message(message) # the range of the date is checked in Excel and is not checked here if key == 'edate': if type(val) != date: message = 'edate = ' + str( val) + ' was not a datetime.datetime object' show_message(message) # the range of the date is checked in Excel and is not checked here if key == 'method' and val not in ['LDA', 'PCA', 'PLS']: message = 'method = ' + val + ' was not a valid value' show_message(message) if key == 'pcutoff' and not (0 < val and val < 1): print('pcutoff = ' + str(val)) message = 'pcutoff = ' + str(val) + ' was not a valid sheet name' show_message(message) if key == 'real_GDP': message = 'benchmark = ' + val + ' needs to be checked' show_message(message, output_messagebox=False) if key == 'method_growth': if val not in ['cpd', 'yoy', 'level']: message = 'method_growth = ' + val + ' must be one of cpd/yoy' show_message(message, output_messagebox=False) if key == 'retropolate' and val not in ['Yes', 'No']: message = 'retropolate = ' + val + ' was not a valid value' show_message(message) if key == 'quantlist': # Check that all values are between 0 and 1, # and that the necessary values of 0.10, 0.25, 0.50, 0.75, 0.90 exist vals_np = np.array(val) # create np.array for value checking if not (np.all(0 < vals_np) and np.all(vals_np < 1)): message = 'All values of quantlist must be between 0 and 1' message += 'Given values: ' + str(val) show_message(message, halt=True) # Check that necessary values are present necessary_vals = [0.10, 0.25, 0.50, 0.75, 0.90] for _val in necessary_vals: if _val not in val: message = 'Value of ' + str( _val) + ' must be included in quantlist' message += 'Given values: ' + str(val) show_message(message, halt=True) if key == 'regressors': # val is a dict of dicts with keys [regressor]['transform/option'] for regressor in val: transform = val[regressor]['transform'] option = val[regressor]['option'] # Check that transform is a valid value from the pulldown menu if transform not in [ 'None', 'Lagged', 'MVA', 'Power', 'Diff', 'ChangeRate' ]: message = 'transform for ' + regressor + ' was not a valid option, given ' + transform show_message(message, halt=True) # If 'No transformation' or 'Log' was chosen, make sure no option was given if transform in ['None']: if option is not None: message = 'option for regressor = ' + regressor + ' with transform of ' + transform + ' must not have option set' show_message(message, halt=True) # If 'Lagged' or 'Moving Average' was chosen, make sure lag exists and is an int if transform in [ 'Lagged', 'MVA', 'Power', 'Diff', 'ChangeRate' ]: if type(option) != float or abs(int(option) - option) > 1E-5: message = 'option for regressor = ' + regressor + ' with transform of ' + transform + ' must have option of int, given ' + str( option) show_message(message, halt=True) # Since the value is less than 1E-5 away from an int, # convert to int so that there are no problems later dict_input_segment['regressors'][regressor][ 'option'] = int(option) if key == 'shockvars': for shockvar in val: shocktype = val[shockvar]['shocktype'] shockvalue = val[shockvar]['shockvalue'] # Check that shocktype is a valid value from the pulldown menu if shocktype not in [ 'None', 'By +/- STD', 'By +/- percentage' ]: message = 'Shock type for ' + shockvar + ' was not a valid option, given ' + shocktype show_message(message, halt=True) if shocktype in ['By +/- STD', 'By +/- percentage']: if abs(shockvalue) > 10: message = 'Shock value for variable = ' + shockvar + ' with shocktype of ' + shocktype + ' must have option of int, given ' + str( shockvalue) show_message(message, halt=True) elif key.find('sheet_') != -1: if val is None: if key == 'sheet_quantreg': dict_input_segment[key] = 'Quant reg coefficients' elif key == 'sheet_cond_quant': dict_input_segment[key] = 'Conditional quantiles' elif key == 'sheet_local_proj': dict_input_segment[key] = 'Local projections' elif key == 'sheet_partition': dict_input_segment[key] = 'Output_partitions' elif key == 'sheet_segment': dict_input_segment[key] = 'Multiple_projections' elif key == 'sheet_term': dict_input_segment[key] = 'Term_Structure' else: message = 'No sheet called ' + key + ' should exist' show_message(message, halt=True) else: # Check that the specified sheetname is not one of the inputs # (it is OK that it is the same name as an existing sheet if # that sheet is an output) if val in input_sheets: message = key + ' specified as ' + val + ', cannot be the same as necessary input sheet' show_message(message, halt=True)
def postrun_partition(dict_output_partition, debug=False): ''' Postrun function for step 1, partition. Takes as input dict from main run function. This function cannot return any values due to limitations of the RunPython VBA code in xlwings. ''' if debug: print('=' * 30) print('start of run_partition') print('=' * 30) # Create DataFrame for log log_frame = pd.DataFrame(columns=['Time', 'Action']) # Create the output sheets for sheetvar in [ key for key in dict_output_partition if key.find('sheet') != -1 ]: # Check that sheetvar exists as a key in dict_output_partition if sheetvar not in dict_output_partition: message = 'sheetvar ' + sheetvar + ' is not a key for dict_output_partition' show_message(message, halt=True) # Get the actual sheet name sheetname = dict_output_partition[sheetvar] # Get existing sheetnames sheetnames = [sheet.name for sheet in wb.sheets] try: # Clear the sheet if it already exists if sheetname in sheetnames: wb.sheets[sheetname].clear() action = 'Cleared sheet ' + sheetname # Otherwise add it after the "Data" sheet else: wb.sheets.add(sheetname, after='Data') # Set output sheet colors to blue wb.sheets[sheetname].api.Tab.Colorindex = 23 action = 'Created sheet ' + sheetname except: action = 'Unable to access sheet ' + sheetname # Add to log tn = date.now().strftime('%Y-%m-%d %H:%M:%S') log = pd.Series({'Time': tn, 'Action': action}) log_frame = log_frame.append(log, ignore_index=True) # end of loop over output sheetvars # Write out partition and loadings try: sheetname = dict_output_partition['sheet_partitions'] wb.sheets[sheetname].range('A1').options( index=False).value = dict_output_partition['frame'] wb.sheets[sheetname].autofit() sheetname = dict_output_partition['sheet_loadings'] wb.sheets[sheetname].range('A1').options( index=False).value = dict_output_partition['loading'] wb.sheets[sheetname].autofit() action = 'Partitions and loadings saved succesfully.' except: action = 'Unable to output partitions and loadings.' sheetname = dict_output_partition['sheet_partitions'] for p in wb.sheets[sheetname].shapes: try: p.delete() except Exception as e: print(e) sheet = wb.sheets[sheetname] if dict_output_partition['method'] == 'PLS': for i, fig in enumerate(dict_output_partition['figs']): fullpath = os.path.abspath( os.path.dirname(wb.fullname) + '/figures') if not os.path.isdir(fullpath): os.makedirs(fullpath) group = dict_output_partition['groups'][i] outfilename = fullpath + '\\partition_PLS_' + group + date.now( ).strftime('%Y_%m-%d@%H_%M-%S') + '.png' fig.savefig(outfilename) try: X = str(2 + i * 38) sheet.pictures.add(fig, name='MyPlot_P' + str(i + 1), update=True, left=sheet.range('M' + X).left, top=sheet.range('M' + X).top, height=500, width=750) action = 'Partition figure saved' except: action = 'Unable to add figure to sheet ' + sheetname else: fig = dict_output_partition['figs'][0] # Set the path of the output file to be in the same dir as the # calling Excel file fullpath = os.path.abspath(os.path.dirname(wb.fullname) + '/figures') if not os.path.isdir(fullpath): os.makedirs(fullpath) outfilename = fullpath + '\\partition_' + date.now().strftime( '%Y_%m-%d@%H_%M-%S') + '.png' fig.savefig(outfilename) cr = len(dict_output_partition['groups']) try: sheet.pictures.add(fig, name='MyPlot_P1', update=True, left=sheet.range('M2').left, top=sheet.range('M2').top, height=720, width=cr * 255) action = 'Partition figure saved' except: action = 'Unable to add figure to sheet ' + sheetname tn = date.now().strftime('%Y-%m-%d %H:%M:%S') log = pd.Series({'Time': tn, 'Action': action}) log_frame = log_frame.append(log, ignore_index=True) fig1 = dict_output_partition['figs'][1] if dict_output_partition['method'] == 'PLS': ht = 700 wd = 480 else: ht = 320 wd = 320 try: sheet.pictures.add(fig1, name='MyPlot_P2', update=True, left=sheet.range('M54').left, top=sheet.range('M54').top, height=ht, width=wd) action = 'Partition figure saved' except: action = 'Unable to add figure to sheet ' + sheetname # Write out log_frame add_logsheet(wb, log_frame, colnum=1)
def check_parameters_partition(dict_input_partition, keys): ''' Check the input parameters for partition. ''' # Check that all keys exist for key in keys: if key not in dict_input_partition: message = 'key ' + key + ' not found in dict_input_partition' show_message(message) # Get the sheets in the wb sheetnames = [sheet.name for sheet in wb.sheets] # These are the sheets that are used as input and never overwritten input_sheets = [ 'Readme', 'Input_parameters', 'Partition_groups', 'Data', 'Processing_Log' ] # Check for necessary sheets for sheetname in ['Partition_groups', 'Data']: if sheetname not in sheetnames: message = 'sheet with name ' + sheetname + ' must be in input Excel file' show_message(message) # ------------------------------------------- # # Go through each value and check the values # ------------------------------------------- # for key in keys: val = dict_input_partition[key] if key == 'freq' and val not in ['Monthly', 'Quarterly', 'Yearly']: message = 'freq = ' + val + ' was not a valid value' show_message(message) if key == 'sdate': if type(val) != date: message = 'sdate = ' + str( val) + ' was not a datetime.datetime object' show_message(message) # the range of the date is checked in Excel and is not checked here if key == 'edate': if type(val) != date: message = 'edate = ' + str( val) + ' was not a datetime.datetime object' show_message(message) # the range of the date is checked in Excel and is not checked here if key == 'method' and val not in ['LDA', 'PCA', 'PLS']: message = 'method = ' + val + ' was not a valid value' show_message(message) if key == 'pcutoff' and not (0 < val and val < 1): print('pcutoff = ' + str(val)) message = 'pcutoff = ' + str(val) + ' was not a valid sheet name' show_message(message) if key == 'real_GDP': message = 'benchmark = ' + val + ' needs to be checked' show_message(message, output_messagebox=False) if key == 'method_growth': if val not in ['cpd', 'yoy', 'level']: message = 'method_growth = ' + val + ' must be one of cpd/yoy' show_message(message, output_messagebox=False) if key == 'retropolate' and val not in ['Yes', 'No']: message = 'retropolate = ' + val + ' was not a valid value' show_message(message) if key in ['sheet_partitions', 'sheet_loadings']: # If a value was specified, check that it is not one of the # input sheet names and use it as the output sheet name. # Otherwise we will use the default 'Output_partitions' if val is None: dict_input_partition[key] = key.replace('sheet', 'Output') else: # Check that the specified sheetname is not one of the inputs # (it is OK that it is the same name as an existing sheet if # that sheet is an output) if val in input_sheets: message = key + ' specified as ' + val + ', cannot be the same as necessary input sheet' show_message(message)
def prerun_partition(debug=False): ''' Prerun function for step 1, partition. This function cannot take in any arguments due to limitations of the RunPython VBA code in xlwings. Read in/check the input parameters and return a dict for input parameters and a df for data. ''' if debug: print('=' * 30) print('start of prerun_partition') print('=' * 30) # Keys for input parameter dict keys = [ 'freq', 'sdate', 'edate', 'method', 'pcutoff', 'method_growth', 'retropolate', 'sheet_partitions', 'sheet_loadings' ] # -------------------------- # Read in parameters # -------------------------- dict_input_partition = read_parameters_partition() # -------------------------- # Check parameter values # -------------------------- check_parameters_partition(dict_input_partition, keys) # -------------------------- # Read in global parameters # -------------------------- # (this also checks if values have changed since being initially set) dict_global_params = read_parameters_global() # Add each key, val from dict_global_params to dict_input_partition for key, val in dict_global_params.items(): # Check that the keys do not clash if key in dict_input_partition: message = 'dict_input_partition should not have key ' + key + ' that is common with dict_global_params' show_message(message, halt=True) dict_input_partition[key] = val # -------------------------- # Create df for data # -------------------------- df_partition = read_data_partition() # -------------------------- # Create a dict for groups # -------------------------- if dict_input_partition['method'] == "PLS": dict_groups, dict_PLS = read_partition_groupsPLS() dict_input_partition['PLS_target'] = dict_PLS else: dict_groups = read_partition_groups() dict_input_partition['PLS_target'] = None # -------------------------- # Check df for partition # -------------------------- check_data_partition(df_partition, dict_input_partition) # -------------------------- # Set start and end dates and # fill missing values for data df # -------------------------- df_partition = format_data_partition(df_partition, dict_input_partition['sdate'], dict_input_partition['edate']) #--------------------------- # Check PLS target coverage #--------------------------- if dict_input_partition['method'] == "PLS": PLSvar = set() for g in dict_PLS.values(): for e in g: PLSvar.add(e) PLSvar = list(PLSvar) if df_partition.loc[:, PLSvar].isnull().sum().sum() > 0: print(df_partition.loc[:, PLSvar].head()) message = 'PLS target should cover all dates' show_message(message, halt=True) # -------------------------- # Check partition groups # -------------------------- check_partition_groups(dict_groups, df_partition) # -------------------------- # Return a dict for input parameters, # a dict for groups, # and a df for the data # -------------------------- return dict_input_partition, dict_groups, df_partition
def check_parameters_scenario(dict_input_scenario): ''' Check the input parameters for scenario. ''' # Check that all keys exist keys = dict_input_scenario.keys() input_sheets = [ 'Readme', 'Input_parameters', 'Partition_groups', 'Data', 'Processing_Log' ] for key in keys: val = dict_input_scenario[key] if key == 'quantlist': # Check that all values are between 0 and 1, # and that the necessary values of 0.10, 0.25, 0.50, 0.75, 0.90 exist vals_np = np.array(val) # create np.array for value checking if not (np.all(0 < vals_np) and np.all(vals_np < 1)): message = 'All values of quantlist must be between 0 and 1' message += 'Given values: ' + str(val) show_message(message, halt=True) # Check that necessary values are present necessary_vals = [0.10, 0.25, 0.50, 0.75, 0.90] for _val in necessary_vals: if _val not in val: message = 'Value of ' + str( _val) + ' must be included in quantlist' message += 'Given values: ' + str(val) show_message(message, halt=True) if key == 'regressors': # val is a dict of dicts with keys [regressor]['transform/option'] for regressor in val: transform = val[regressor]['transform'] option = val[regressor]['option'] # Check that transform is a valid value from the pulldown menu if transform not in [ 'None', 'Lagged', 'MVA', 'Power', 'Diff', 'ChangeRate' ]: message = 'transform for ' + regressor + ' was not a valid option, given ' + transform show_message(message, halt=True) # If 'No transformation' or 'Log' was chosen, make sure no option was given if transform in ['None']: if option is not None: message = 'option for regressor = ' + regressor + ' with transform of ' + transform + ' must not have option set' show_message(message, halt=True) # If 'Lagged' or 'Moving Average' was chosen, make sure lag exists and is an int if transform in [ 'Lagged', 'MVA', 'Power', 'Diff', 'ChangeRate' ]: if type(option) != float or abs(int(option) - option) > 1E-5: message = 'option for regressor = ' + regressor + ' with transform of ' + transform + ' must have option of int, given ' + str( option) show_message(message, halt=True) # Since the value is less than 1E-5 away from an int, # convert to int so that there are no problems later dict_input_scenario['regressors'][regressor][ 'option'] = int(option) if key == 'shockvars': for shockvar in val: shocktype = val[shockvar]['shocktype'] shockvalue = val[shockvar]['shockvalue'] # Check that shocktype is a valid value from the pulldown menu if shocktype not in [ 'None', 'By +/- STD', 'By +/- percentage' ]: message = 'Shock type for ' + shockvar + ' was not a valid option, given ' + shocktype show_message(message, halt=True) if shocktype in ['By +/- STD', 'By +/- percentage']: if abs(shockvalue) > 10: message = 'Shock value for variable = ' + shockvar + ' with shocktype of ' + shocktype + ' must have option of int, given ' + str( shockvalue) show_message(message, halt=True) elif key.find('sheet_') != -1: if val is None: if key == 'sheet_quantreg': dict_input_scenario[key] = 'Quant reg coefficients' elif key == 'sheet_cond_quant': dict_input_scenario[key] = 'Conditional quantiles' elif key == 'sheet_local_proj': dict_input_scenario[key] = 'Local projections' elif key == 'sheet_partition': dict_input_scenario[key] = 'Output_partitions' elif key == 'sheet_scenario': dict_input_scenario[key] = 'Senario test' else: message = 'No sheet called ' + key + ' should exist' show_message(message, halt=True) else: # Check that the specified sheetname is not one of the inputs # (it is OK that it is the same name as an existing sheet if # that sheet is an output) if val in input_sheets: message = key + ' specified as ' + val + ', cannot be the same as necessary input sheet' show_message(message, halt=True)