Exemple #1
0
def check_parameters_historical(dict_input_historical):
    '''
    Check the input parameters for historical.
    '''
    # Check that all keys exist
    keys = dict_input_historical.keys()

    input_sheets = [
        'Readme', 'Input_parameters', 'Partition_groups', 'Data',
        'Processing_Log'
    ]
    for key in keys:
        val = dict_input_historical[key]
        if key.find('sheet_') != -1:
            if val is None:
                if key == 'sheet_quantreg':
                    dict_input_historical[key] = 'Quant reg coefficients'
                elif key == 'sheet_cond_quant':
                    dict_input_historical[key] = 'Conditional quantiles'
                elif key == 'sheet_historical':
                    dict_input_historical[key] = 'Historical distribution'
                else:
                    message = 'No sheet called ' + key + ' should exist'
                    show_message(message, halt=True)
            else:
                # Check that the specified sheetname is not one of the inputs
                # (it is OK that it is the same name as an existing sheet if
                # that sheet is an output)
                if val in input_sheets:
                    message = key + ' specified as ' + val + ', cannot be the same as necessary input sheet'
                    show_message(message, halt=True)
Exemple #2
0
def do_partition(debug=False):
    '''
    Entry point function called when button for partitions is called.

    This function cannot take in any arguments and can not have
    any return values due to limitation of the RunPython VBA code
    in xlwings.
    '''

    # Start measurement of time
    t0 = time.time()

    # Make sure a wb exists
    if wb is None:
        print('partition_main: wb is None')
        print('This may be due to not calling set_mock_caller_file')
        print('and setting the caller Workbook')
        import sys
        sys.exit(-1)

    if debug:
        print('+' * 40)
        print('start of do_partition')
        print('+' * 40)

    # Call prerun
    if debug:
        print('---- calling prerun_partition')
    dict_input_partition, dict_groups, df_partition = prerun_partition(
        debug=debug)

    # Call main run
    if debug:
        print('---- calling run_partition')
    dict_output_partition = run_partition(dict_input_partition,
                                          dict_groups,
                                          df_partition,
                                          debug=debug)

    # Call postrun
    if debug:
        print('---- calling postrun_partition')
    postrun_partition(dict_output_partition, debug=debug)

    # End measurement of time
    t1 = time.time()

    # Total time for this operation (formatted string)
    tdiff = "{:.1f}".format(t1 - t0)

    sheets = [
        dict_output_partition[key] for key in dict_output_partition
        if key.find('sheet') != -1 and key != 'sheet_input'
    ]
    message = 'Finished with partition in ' + tdiff + ' sec,\n'
    message += 'output is in sheets ' + ', '.join(sheets)
    show_message(message, msgtype='info')
Exemple #3
0
def prerun_scenario(debug=False):
    '''
    Prerun function for step 2, quantfit.
    
    This function cannot take in any arguments due to limitations
    of the RunPython VBA code in xlwings.

    Check that the necessary steps beforehand have been done.
    Read in/check the input parameters and return a
    dict for input parameters and a df for data.
    '''

    # Check that the necessary steps beforehand have been done.

    # --------------------------
    # Read in parameters
    # --------------------------
    dict_input_scenario = read_parameters_scenario()

    # --------------------------
    # Check parameter values
    # --------------------------
    check_parameters_scenario(dict_input_scenario)

    # Read in global parameters
    # (this also checks if values have changed since being initially set)
    dict_global_params = read_parameters_global()
    #print(dict_input_scenario)
    #print(dict_global_params)

    # Add each key, val from dict_global_params to dict_input_quantfit
    for key, val in dict_global_params.items():
        # Check that the keys do not clash
        if key in dict_input_scenario:
            message = 'dict_input_quantfit should not have key ' + key + ' that is common with dict_global_params'
            show_message(message, halt=True)
        dict_input_scenario[key] = val

    # Create df for data
    input_sheetnames = [
        dict_input_scenario['sheet_partition'],
        dict_input_scenario['sheet_quantreg'],
        dict_input_scenario['sheet_cond_quant']
    ]
    sheet_partition = dict_input_scenario['sheet_partition']
    df_scenario_collections = read_data_scenario(input_sheetnames)
    df_scenario_collections['Data'] = df_scenario_collections['Data'][
        (df_scenario_collections['Data'].index >=
         df_scenario_collections[sheet_partition]['date'].values[0])
        & (df_scenario_collections['Data'].index <=
           df_scenario_collections[sheet_partition]['date'].values[-1])]

    # return a dict for input parameters and a df
    return dict_input_scenario, df_scenario_collections
Exemple #4
0
def do_quantfit(path='.', debug=False):
    '''
    Entry point function called when button for quantile fits is called.

    This function cannot take in any arguments and can not have
    any return values due to limitation of the RunPython VBA code
    in xlwings.
    '''

    # Start measurement of time
    t0 = time.time()

    if debug:
        print('+' * 40)
        print('start of do_quantfit')
        print('+' * 40)

    # Call prerun
    if debug:
        print('---- calling prerun_quantfit')
    dict_input_quantfit, df_quantfit = prerun_quantfit(debug=debug)

    if debug:
        print('dict_input_quantfit:')
        for key in dict_input_quantfit:
            print(key.ljust(20) + ':' + str(dict_input_quantfit[key]))
        print('df_quantfit:')
        print(df_quantfit)

    # Call main run
    if debug:
        print('---- calling run_quantfit')
    dict_output_quantfit = run_quantfit(dict_input_quantfit,
                                        df_quantfit,
                                        debug=debug)

    # Call postrun
    if debug:
        print('---- calling postrun_quantfit')
    postrun_quantfit(dict_output_quantfit, path=path, debug=debug)

    # End measurement of time
    t1 = time.time()

    # Total time for this operation (formatted string)
    tdiff = "{:.1f}".format(t1 - t0)

    sheets = [
        dict_output_quantfit[key] for key in dict_output_quantfit
        if key.find('sheet') != -1
    ]
    message = 'Finished with quantfit in ' + tdiff + ' sec,\n'
    message += 'output is in sheets ' + ', '.join(sheets)
    show_message(message)
Exemple #5
0
def prerun_quantfit(debug=False):
    '''
    Prerun function for step 2, quantfit.
    
    This function cannot take in any arguments due to limitations
    of the RunPython VBA code in xlwings.

    Check that the necessary steps beforehand have been done.
    Read in/check the input parameters and return a
    dict for input parameters and a df for data.
    '''

    if debug:
        print('=' * 30)
        print('start of prerun_quantfit')
        print('=' * 30)

    # Keys for input parameter dict
    keys = [
        'quantlist', 'regressors', 'sheet_input', 'sheet_quantreg',
        'sheet_cond_quant'
    ]
    # Check that the necessary steps beforehand have been done.

    # --------------------------
    # Read in parameters
    # --------------------------
    dict_input_quantfit = read_parameters_quantfit()

    # --------------------------
    # Check parameter values
    # --------------------------
    check_parameters_quantfit(dict_input_quantfit, keys)

    # Read in global parameters
    # (this also checks if values have changed since being initially set)
    dict_global_params = read_parameters_global()

    # Add each key, val from dict_global_params to dict_input_quantfit
    for key, val in dict_global_params.items():
        # Check that the keys do not clash
        if key in dict_input_quantfit:
            message = 'dict_input_quantfit should not have key ' + key + ' that is common with dict_global_params'
            show_message(message, halt=True)
        dict_input_quantfit[key] = val

    # Create df for data
    input_sheetname = dict_input_quantfit['sheet_input']
    df_quantfit = read_data_quantfit(input_sheetname)

    # return a dict for input parameters and a df
    return dict_input_quantfit, df_quantfit
Exemple #6
0
def check_partition_groups(dict_groups, df_partition):
    '''
    Check that for the list of variables in each group the column
    exists in df_partition.
    '''

    # Loop over groups
    for group in dict_groups:
        # Loop over variables
        for varname in dict_groups[group]:
            if varname not in df_partition.columns:
                message = 'variable ' + varname + ' was specified for group ' + group + ' but does not exist in df_partition'
                show_message(message, halt=True)
def prerun_tsfit(debug=False):
    '''
    Prerun function for step 3, tsfit.
    
    Check that the necessary steps beforehand have been done.
    Read in/check the input parameters and return a
    dict for input parameters and a df for data.
    '''

    if debug:
        print('=' * 30)
        print('start of prerun_tsfit')
        print('=' * 30)

    # Keys for input parameter dict
    keys = ['latest_date', 'fit_params', 'sheet_tsfit']

    # Read in parameters
    dict_input_tsfit = read_parameters_tsfit()
    # Check parameter values
    check_parameters_tsfit(dict_input_tsfit, keys)

    # Read in global parameters
    # (this also checks if values have changed since being initially set)
    dict_global_params = read_parameters_global()

    # Add each key, val from dict_global_params to dict_input_tsfit
    for key, val in dict_global_params.items():
        # Check that the keys do not clash
        if key in dict_input_tsfit:
            message = 'dict_input_tsfit should not have key ' + key + ' that is common with dict_global_params'
            show_message(message, halt=True)
        dict_input_tsfit[key] = val

    # Create df for data
    input_sheetnames = [
        dict_input_tsfit['sheet_partition'],
        dict_input_tsfit['sheet_quantreg'],
        dict_input_tsfit['sheet_cond_quant']
    ]
    df_tsfit_collections = read_data_tsfit(input_sheetnames)

    # return a dict for input parameters and a df
    return dict_input_tsfit, df_tsfit_collections
Exemple #8
0
def prerun_segment(debug=False):
    '''
    Prerun function for step 2, quantfit.
    
    This function cannot take in any arguments due to limitations
    of the RunPython VBA code in xlwings.

    Check that the necessary steps beforehand have been done.
    Read in/check the input parameters and return a
    dict for input parameters and a df for data.
    '''

    # Check that the necessary steps beforehand have been done.

    # --------------------------
    # Read in parameters
    # --------------------------
    dict_input_segment = read_parameters_segment()

    # --------------------------
    # Check parameter values
    # --------------------------
    check_parameters_segment(dict_input_segment)

    # Read in global parameters
    # (this also checks if values have changed since being initially set)
    dict_global_params = read_parameters_global()
    #print(dict_input_segment)
    #print(dict_global_params)

    # Add each key, val from dict_global_params to dict_input_quantfit
    for key, val in dict_global_params.items():
        # Check that the keys do not clash
        if key in dict_input_segment:
            message = 'dict_input_quantfit should not have key ' + key + ' that is common with dict_global_params'
            show_message(message, halt=True)
        dict_input_segment[key] = val

    # Create df for data
    df_segment_collections = read_data_segment()

    # return a dict for input parameters and a df
    return dict_input_segment, df_segment_collections
Exemple #9
0
def read_data_partition():
    '''
    Read in the input data for partition.

    For partition, all data should be in the sheet called "Data".
    Another sheet, "Partition_groups" should 
    '''

    # Get the sheets in the wb
    sheetnames = [sheet.name for sheet in wb.sheets]
    # Make sure that Data sheet exists
    if 'Data' not in sheetnames:
        message = 'Sheet named Data does not exist'
        show_message(message, halt=True)

    # Read in the Data sheet as a df
    df_partition = wb.sheets['Data'].range('A1').options(pd.DataFrame,
                                                         index=False,
                                                         expand='table').value
    colset = set()
    dupset = set()
    for e in df_partition.columns:
        if e not in colset:
            colset.add(e)
        else:
            dupset.add(e)
    if len(dupset) > 0:
        dlist = list(dupset)
        dstr = ','.join(dlist)
        message = 'Duplicate variables ' + dstr + ' in datasheet, please check.'
        show_message(message, halt=True)
    # Set index to date
    df_partition.index = df_partition['date']
    df_partition.index.name = None

    # TODO: set index to PeriodIndex

    return df_partition
Exemple #10
0
def do_historical(debug=False):
    '''
    Entry point function called when button for cenario fits is called.

    This function cannot take in any arguments and can not have
    any return values due to limitation of the RunPython VBA code
    in xlwings.
    '''
    t0 = time.time()

    # Make sure a wb exists
    if wb is None:
        print('historical_main: wb is None')
        print('This may be due to not calling set_mock_caller_file')
        print('and setting the caller Workbook')
        import sys
        sys.exit(-1)
    else:
        print(wb)

    dict_input_historical, df_historical = prerun_historical(debug=debug)

    dict_output_historical = run_historical(dict_input_historical,
                                            df_historical,
                                            debug=debug)

    postrun_historical(dict_output_historical, debug=debug)

    # End measurement of time
    t1 = time.time()

    # Total time for this operation (formatted string)
    tdiff = "{:.1f}".format(t1 - t0)

    sheetname = dict_output_historical['sheet_historical']
    message = 'Finished with historical test in ' + tdiff + ' sec,\n'
    message += 'output is in sheets ' + ', ' + sheetname
    show_message(message, msgtype='info')
Exemple #11
0
def check_data_partition(df_partition, dict_input_partition):
    '''
    Check that necessary columns in df_partition are available,
    and also check that all columns are numeric.
    '''

    # --------------------------
    # The parameter
    # dict_input_partition['target']
    # need to be columns in df_partition
    # --------------------------
    for key in ['target']:
        col = dict_input_partition[key]
        if col not in df_partition.columns:
            message = 'col ' + col + ' for key ' + key + ' not in columns of df_partition'
            show_message(message, halt=True)

    # --------------------------
    # If any of the columns in
    # df_partition are not numeric
    # raise an error since the user
    # may have included invalid text
    # --------------------------
    # Global check of col types
    coltypes = set(df_partition.dtypes.values)
    if np.dtype('O') in coltypes:
        # If we find an object type in the columns,
        # find which ones they are
        obj_cols = []
        for col in df_partition.columns:
            if df_partition[col].dtype == np.dtype('O'):
                obj_cols.append(col)
        message = 'The following columns were not numeric types\n'
        message += 'This may be due to the data containing characters\n'
        message += 'Please remove all characters and run again\n'
        message += ', '.join(obj_cols)
        show_message(message, halt=True)
Exemple #12
0
def run_partition(dict_input_partition,
                  dict_groups,
                  df_partition,
                  debug=False):
    '''
    Main run function for step 1, partition.

    Takes in as arguments a dict for input parameters
    and a df for data. Outputs a dict for output parameters.

    Does partitioning and returns a dict of output parameters.
    ** This function should be independent of any Excel input/output
    and be executable as a regular Python function independent of Excel. **
    '''

    if debug:
        print('=' * 30)
        print('start of run_partition')
        print('=' * 30)

        # Show input parameters
        print('dict_input_partition:')
        for key in dict_input_partition:
            print(key.ljust(30) + ':' + str(dict_input_partition[key]))
        print('dict_groups:')
        for key in dict_groups:
            print(key.ljust(30) + ':' + str(dict_groups[key]))
        print('df_partition:')
        print(df_partition)

    warnings.filterwarnings("ignore")

    # ------------------------
    # Create DataFrame for log
    # ------------------------
    log_frame = pd.DataFrame(columns=['Time', 'Action'])

    # ------------------------
    # Create output dict
    # ------------------------
    dict_output_partition = dict()

    # ------------------------
    # Copy the output sheet names
    # from dict_input_partition
    # ------------------------
    for key in dict_input_partition:
        if key.find('sheet_') != -1:
            dict_output_partition[key] = dict_input_partition[key]
            #print(key, dict_output_partition[key] , dict_input_partition[key])

    # ------------------------
    # Get parameters from
    # dict_input_partition
    # ------------------------
    sdate = dict_input_partition['sdate']
    edate = dict_input_partition['edate']
    horizon = dict_input_partition['horizon']
    tdep = dict_input_partition['target'] + '_hz_' + str(horizon)
    df_partition = df_partition.set_index(df_partition['date'], drop=False)
    method = dict_input_partition['method']
    benchcutoff = dict_input_partition['pcutoff']
    rgdp = dict_input_partition['target']  # column name for real GDP
    method_growth = dict_input_partition['method_growth']
    PLStarget = dict_input_partition['PLS_target']
    # ------------------------
    # Run the partition
    # ------------------------
    retroframe, retroload, logretro, exitcode = partition_retro(
        dall=df_partition,
        groups_dict=dict_groups,
        tdep=tdep,
        rgdp=rgdp,
        method_growth=method_growth,
        horizon=horizon,
        method=method,
        sdate=sdate,
        edate=edate,
        benchcutoff=benchcutoff,
        PLStarget=PLStarget)
    log_frame = log_frame.append(logretro, ignore_index=True)

    if exitcode == -1:
        message = 'In the given time period some groups are complete empty. No feasible partition can be made. Please adjust partition groups or start date'
        show_message(message, halt=True)

    # Add return values

    figs = {}
    #print(list(dict_groups.keys()))

    figs = partition_plot(df_partition, retroframe, retroload,
                          list(dict_groups.keys()), PLStarget, tdep, method)
    dict_output_partition['frame'] = retroframe
    dict_output_partition['loading'] = retroload
    dict_output_partition['log'] = logretro
    dict_output_partition['figs'] = figs
    dict_output_partition['groups'] = list(dict_groups.keys())
    dict_output_partition['method'] = method
    return dict_output_partition
Exemple #13
0
def check_parameters_quantfit(dict_input_quantfit, keys):
    '''
    Check the input parameters for quantfit.
    '''

    # Check that all keys exist
    for key in keys:
        if key not in dict_input_quantfit:
            message = 'key ' + key + ' not found in dict_input_quantfit'
            show_message(message)

    input_sheets = [
        'Readme', 'Input_parameters', 'Partition_groups', 'Data',
        'Processing_Log'
    ]
    for key in keys:
        val = dict_input_quantfit[key]

        if key == 'quantlist':
            # Check that all values are between 0 and 1,
            # and that the necessary values of 0.10, 0.25, 0.50, 0.75, 0.90 exist
            vals_np = np.array(val)  # create np.array for value checking
            if not (np.all(0 < vals_np) and np.all(vals_np < 1)):
                message = 'All values of quantlist must be between 0 and 1'
                message += 'Given values: ' + str(val)
                show_message(message, halt=True)
            # Check that necessary values are present
            necessary_vals = [0.10, 0.25, 0.50, 0.75, 0.90]
            for _val in necessary_vals:
                if _val not in val:
                    message = 'Value of ' + str(
                        _val) + ' must be included in quantlist'
                    message += 'Given values: ' + str(val)
                    show_message(message, halt=True)

        if key == 'regressors':
            # val is a dict of dicts with keys [regressor]['transform/option']
            for regressor in val:
                transform = val[regressor]['transform']
                option = val[regressor]['option']

                # Check that transform is a valid value from the pulldown menu
                if transform not in [
                        'None', 'Lagged', 'MVA', 'Power', 'Diff', 'ChangeRate'
                ]:
                    message = 'transform for ' + regressor + ' was not a valid option, given ' + transform
                    show_message(message, halt=True)

                # If 'No transformation' or 'Log' was chosen, make sure no option was given
                if transform in ['None']:
                    if option is not None:
                        message = 'option for regressor = ' + regressor + ' with transform of ' + transform + ' must not have option set'
                        show_message(message, halt=True)

                # If 'Lagged' or 'Moving Average' was chosen, make sure lag exists and is an int
                if transform in [
                        'Lagged', 'MVA', 'Power', 'Diff', 'ChangeRate'
                ]:
                    if type(option) != float or abs(int(option) -
                                                    option) > 1E-5:
                        message = 'option for regressor = ' + regressor + ' with transform of ' + transform + ' must have option of int, given ' + str(
                            option)
                        show_message(message, halt=True)
                    # Since the value is less than 1E-5 away from an int,
                    # convert to int so that there are no problems later
                    dict_input_quantfit['regressors'][regressor][
                        'option'] = int(option)

        if key == 'sheet_input':
            # If nothing had been specified in the cell containing the partition output sheet,
            # set to default
            if val is None:
                sheetname = 'Output_partitions'
                dict_input_quantfit[key] = sheetname

                # Get existing sheetnames
                sheetnames = [sheet.name for sheet in wb.sheets]
                if sheetname not in sheetnames:
                    message = 'Input sheet for quantfit: ' + sheetname + ' does not exist'
                    show_message(message, halt=True)

        elif key.find('sheet_') != -1:
            # If a value was specified, check that it is not one of the
            # input sheet names and use it as the output sheet name.
            # Otherwise we will use the default 'Output_quantfits'
            if val is None:
                if key == 'sheet_quantreg':
                    dict_input_quantfit[key] = 'Quant reg coefficients'
                elif key == 'sheet_cond_quant':
                    dict_input_quantfit[key] = 'Conditional quantiles'
                else:
                    message = 'No sheet called ' + key + ' should exist'
                    show_message(message, halt=True)
            else:
                # Check that the specified sheetname is not one of the inputs
                # (it is OK that it is the same name as an existing sheet if
                # that sheet is an output)
                if val in input_sheets:
                    message = key + ' specified as ' + val + ', cannot be the same as necessary input sheet'
                    show_message(message, halt=True)
Exemple #14
0
def ffill_values(wb, df, method='ffill', limit=None, debug=False):
    '''
    Forward fill (ffill) missing latest values.
    Input:
        df    : DataFrame for data
        wb    : workbook to be modified
        limit : Limit on consecutive values to fill
    '''

    if debug:
        print('-' * 20 + ' start of ffill_values')

    # Make a copy so we can compare to the original
    _df = df.copy()
    if debug:
        print('_df before ffill:')
        print(_df)

    # Since df has dates before dict_input_partition['sdate'] sliced off,
    # we need an offset for the row number
    daterange = wb.sheets['Data'].range('A1').expand(
        'down').value  # this is a list
    # Find the index where _df.index[0] is
    try:
        offset = daterange.index(_df.index[0])
    except ValueError:
        message = 'Could not find ' + str(
            _df.index[0]) + ' in range of sheet Data starting at A1'
        show_message(message, halt=True)

    if debug:
        print('_df.index[0] = ' + str(_df.index[0]))
        print('offset = ' + str(offset))
        print('_df.index[:20]:')
        print(_df.index[:20])
        print('daterange[:20]:')
        print(daterange[:20])

    # range for all values in sheet Data
    range = wb.sheets['Data'].range('A1').expand()
    # For each column fill missing values but not latest missing values
    for icol, col in enumerate(_df):
        s = df[col]  # original
        _s = _df[col]  # copy that gets filled

        # Skip dtypes that are not floats.
        # This is because the original data includes columns for isocodes
        if df[col].dtype not in [float, np.float64, np.float32]:
            if debug:
                print('skipping col ' + col + ' due to dtype being ' +
                      df[col].dtype.name)
            continue

        # If doing ffill, we don't want the initial missing values, only the final missing values
        # Use the first_valid_index to restrict the range
        first_index = s.first_valid_index()
        if debug:
            print('first_index = ' + str(first_index))

        # If doing bfill, we don't want the final missing values, only the initial missing values
        # Use the last_valid_index to restrict the range
        last_index = s.last_valid_index()
        if debug:
            print('last_index = ' + str(last_index))

        # Do ffill
        if method == 'ffill':
            _s[first_index:] = _s[first_index:].fillna(method=method,
                                                       limit=limit)

            # Get all values that have nan as the difference
            # between the original and the interpolated copy
            filled = _s[first_index:][(s[first_index:] -
                                       _s[first_index:]).isnull() == True]
        elif method == 'bfill':
            _s[:last_index] = _s[:last_index].fillna(method=method,
                                                     limit=limit)

            # Get all values that have nan as the difference
            # between the original and the interpolated copy
            filled = _s[:last_index][(s[:last_index] -
                                      _s[:last_index]).isnull() == True]
        else:
            message = 'Function ffill_values cannot take in method ' + method
            show_message(message, halt=True)

        #if debug:
        if len(filled) > 0:
            print('values that have been filled in ffill:')
            print(filled)

        # Use the index and col name to fill the Excel sheet with the interpolated values
        # Get the index location
        for ind in filled.index:
            irow = _df.index.get_loc(ind)

            # Get the cell corresponding to the value we want to fill.
            # Note that depending on whether the date is a column or not, we need to add 1 to the column number,
            # and that the row has an offset determined at the beginning of this function
            cell = range[irow + offset, icol]
            # if debug:
            print('Filling in row ' + str(cell.row) + ' column ' +
                  str(cell.column))
            print('with value ' + str(_s.loc[ind]))

            # Fill the sheet with the interpolated values
            cell.value = _s.loc[ind]

            # Set the font color to red
            # 3 is red
            # 4 is green
            # 5 is blue
            # 6 is yellow
            # 7 is magenta
            cell.api.Font.ColorIndex = 3
            # end of loop over index in filled
        # end of loop over index of filled
    # end of loop over columns in _df

    # Return the updated data
    return _df
Exemple #15
0
def interpolate_missing_values(wb, df, debug=False):
    '''
    Interpolate missing in-between values.
    Input:
        df : DataFrame to be filled
        wb : workbook to be modified
    '''

    if debug:
        print('-' * 20 + ' start of interpolate_missing_values')

    # Make a copy so we can compare to the original
    _df = df.copy()
    if debug:
        print('Total of ' + str(len(df.columns)) + ' cols')
        print('_df before interpolate_missing_values:')
        print(_df)

    # Since df has dates before dict_input_partition['sdate'] sliced off,
    # we need an offset for the row number
    daterange = wb.sheets['Data'].range('A1').expand(
        'down').value  # this is a list
    # Find the index where _df.index[0] is
    try:
        offset = daterange.index(_df.index[0])
    except ValueError:
        message = 'Could not find ' + str(
            _df.index[0]) + ' in range of sheet Data starting at A1'
        show_message(message, halt=True)

    if debug:
        print('_df.index[0] = ' + str(_df.index[0]))
        print('offset = ' + str(offset))
        print('_df.index[:20]:')
        print(_df.index[:20])
        print('daterange[:20]:')
        print(daterange[:20])

    # range for all values in sheet Data
    range = wb.sheets['Data'].range('A1').expand()
    # For each column fill missing values but not latest missing values

    for icol, col in enumerate(_df.columns):
        s = df[col]  # original
        _s = _df[col]  # copy that gets filled

        # Skip dtypes that are not floats.
        # This is because the original data includes columns for isocodes
        if df[col].dtype not in [float, np.float64, np.float32]:
            if debug:
                print('skipping col ' + col + ' due to dtype being ' +
                      df[col].dtype.name)
            continue

        # Get last valid index value
        last_index = _s.last_valid_index()

        # Do interpolate up to last valid index so we interpolate all missing values
        _s[:last_index] = _s[:last_index].interpolate(method='linear')

        # We don't want the initial missing values, only values missing in between.
        # Use the first_valid_index to restrict the range
        first_index = s.first_valid_index()

        # Get all values that have nan as the difference
        # between the original and the interpolated copy
        filled = _s[first_index:last_index][(
            s[first_index:last_index] -
            _s[first_index:last_index]).isnull() == True]

        if debug:
            if len(filled) > 0:
                print('values that have been filled in interpolate:')
                print(filled)

        # Use the index and col name to fill the Excel sheet with the interpolated values
        for ind in filled.index:
            # Get the index location
            irow = _df.index.get_loc(ind)

            # Get the cell corresponding to the value we want to fill.
            # Note that depending on whether the date is a column or not, we need to add 1 to the column number,
            # and that the row has an offset determined at the beginning of this function
            cell = range[irow + offset, icol]
            if debug:
                print('Filling in row ' + str(cell.row) + ' column ' +
                      str(cell.column))
                print('with value ' + str(_s.loc[ind]))

            # Fill the sheet with the interpolated values
            cell.value = _s.loc[ind]

            # Set the font color to blue
            # 3 is red
            # 4 is green
            # 5 is blue
            cell.api.Font.ColorIndex = 5
            # end of loop over index in filled
        # end of loop over index of filled
    # end of loop over columns in _df

    # Return the updated data
    return _df
Exemple #16
0
def postrun_quantfit(dict_output_quantfit, debug=False):
    '''
    Postrun function for step 2, quantfit.

    Takes as input dict from main run function.

    This function cannot return any values due to limitations
    of the RunPython VBA code in xlwings.
    
    '''

    if debug:
        print('=' * 30)
        print('start of postrun_quantfit')
        print('=' * 30)

    # Create DataFrame for log
    log_frame = pd.DataFrame(columns=['Time', 'Action'])

    # Create the output sheets
    sheetvars = [
        key for key in dict_output_quantfit if key.find('sheet') != -1
    ]
    for sheetvar in sheetvars:

        # Don't do anything for the input sheet
        if sheetvar == 'sheet_input':
            continue

        # Check that sheetvar exists as a key in dict_output_quantfit
        if sheetvar not in dict_output_quantfit:
            message = 'sheetvar ' + sheetvar + ' is not a key for dict_output_quantfit'
            show_message(message, halt=True)

        # Get the actual sheet name
        sheetname = dict_output_quantfit[sheetvar]

        # Get existing sheetnames
        sheetnames = [sheet.name for sheet in wb.sheets]

        try:
            # Clear the sheet if it already exists
            if sheetname in sheetnames:
                wb.sheets[sheetname].clear()
                action = 'Cleared sheet ' + sheetname
            # Otherwise add it after the "Data" sheet
            else:
                wb.sheets.add(sheetname, after='Data')
                # Set output sheet colors to blue
                wb.sheets[sheetname].api.Tab.ColorIndex = 23
                action = 'Created sheet ' + sheetname
        except:
            action = 'Unable to access sheet ' + sheetname

        # Add to log
        tn = date.now().strftime('%Y-%m-%d %H:%M:%S')
        log = pd.Series({'Time': tn, 'Action': action})
        log_frame = log_frame.append(log, ignore_index=True)

    # end of loop over output sheetvars

    # Write out quantfit results
    try:
        for sheetvar in sheetvars:
            sheetname = dict_output_quantfit[sheetvar]
            if sheetvar == 'sheet_quantreg':
                wb.sheets[sheetname].range('A1').options(
                    index=False).value = dict_output_quantfit['qcoef']
                wb.sheets[sheetname].autofit()
            elif sheetvar == 'sheet_cond_quant':
                wb.sheets[sheetname].range('A1').options(
                    index=True).value = dict_output_quantfit['cond_quant']
                wb.sheets[sheetname].autofit()
        action = 'Quantfit results saved succesfully.'
    except:
        action = 'Unable to output quantfit results.'
        print(action)

    sheetname = dict_output_quantfit['sheet_quantreg']
    try:
        wb.sheets[sheetname].pictures[0].delete()
    except:
        pass
    sheet = wb.sheets[sheetname]
    fig = dict_output_quantfit['figs']

    # Set the path of the output file to be in the same dir as the
    # calling Excel file
    fullpath = os.path.abspath(os.path.dirname(wb.fullname) + '/figures')
    if not os.path.isdir(fullpath):
        os.makedirs(fullpath)
    outfilename = fullpath + '\\quantfit_' + date.now().strftime(
        '%Y_%m-%d@%H_%M-%S') + '.png'
    fig.savefig(outfilename)
    cr = len(dict_output_quantfit['regressors'].keys())
    try:
        pic = sheet.pictures.add(fig,
                                 name='MyPlot_q',
                                 update=True,
                                 left=sheet.range('N6').left,
                                 top=sheet.range('N6').top,
                                 height=340 * (cr // 4 + 1),
                                 width=240 * (min(4, cr + 1)))
        pic.height = 340 * (cr // 4 + 1)
        pic.width = 240 * (min(4, cr + 1))

        action = 'Quantile figure saved'
    except:
        action = 'Unable to add figure to sheet ' + sheetname

    # Add to log
    tn = date.now().strftime('%Y-%m-%d %H:%M:%S')
    log = pd.Series({'Time': tn, 'Action': action})
    log_frame = log_frame.append(log, ignore_index=True)

    # Write out log_frame
    add_logsheet(wb, log_frame, colnum=3)
Exemple #17
0
def gen_relation(shockdict, partition_groups, df_data, df_partition):

    df_shockedvar = pd.DataFrame(index=df_data.index)
    df_shockedgrp = df_partition.copy()
    for group in df_shockedgrp.columns:
        df_shockedgrp[group + '_shocked'] = df_shockedgrp[group]
    for var, shock in shockdict.items():
        ct = 0

        if shock['shocktype'] == 'By +/- STD':
            if var in partition_groups.keys():
                std = np.nanstd(df_shockedgrp[var])
            else:
                df_shockedvar[var] = df_data[var]
                std = np.nanstd(df_data[var].values)
                df_shockedvar[var + '_shocked'] = df_shockedvar[
                    var] + std * shock['shockvalue']
        elif shock['shocktype'] == 'By +/- percentage' and (
                var not in partition_groups.keys()):
            df_shockedvar[var] = df_data[var]
            df_shockedvar[
                var +
                '_shocked'] = df_shockedvar[var] * (1 + shock['shockvalue'])
        for group, compvars in partition_groups.items():
            if var in compvars and var in partition_groups.keys():
                print(var + ' is not well  defined.')
                message = var + ' is in partition groups and also a group name. Please Check'
                show_message(message, halt=False)

            if var in compvars:
                ct += 1
                df_var = df_data[['date', var]].dropna()
                df_part = df_partition[['date', group]].dropna()

                sdate = max(min(df_var['date'].values),
                            (min(df_part['date'].values)))
                edate = min(max(df_var['date'].values),
                            (max(df_part['date'].values)))

                df_var = df_var[(df_var['date'] >= sdate)
                                & (df_var['date'] <= edate)]
                df_part = df_part[(df_part['date'] >= sdate)
                                  & (df_part['date'] <= edate)]

                cov = np.corrcoef(df_var[var].values,
                                  df_part[group].values)[0][1]
                #cov=np.cov(df_data[var].values,df_partition[group])[0][1]
                print(group, cov, var, shock['shocktype'], shock['shockvalue'])
                if shock['shocktype'] == 'By +/- STD':
                    df_shockedgrp[group + '_shocked'] = df_shockedgrp[
                        group + '_shocked'] + std * shock['shockvalue'] * cov
                elif shock['shocktype'] == 'By +/- percentage':
                    df_shockedgrp[group + '_shocked'] = df_shockedgrp[
                        group +
                        '_shocked'] + df_data[var] * shock['shockvalue'] * cov
            elif var == group:
                ct += 1
                print(group, var, shock['shocktype'], shock['shockvalue'])
                if shock['shocktype'] == 'By +/- STD':
                    df_shockedgrp[group + '_shocked'] = df_shockedgrp[
                        group + '_shocked'] + std * shock['shockvalue']
                elif shock['shocktype'] == 'By +/- percentage':
                    df_shockedgrp[group + '_shocked'] = df_shockedgrp[
                        group +
                        '_shocked'] + df_shockedgrp[group] * shock['shockvalue']

        if ct == 0:
            print(var + ' not in any group.')
            message = var + ' not in any partition groups. Please Check'
            show_message(message, halt=False)
    #print(df_shockedgrp.head())
    return df_shockedvar, df_shockedgrp
Exemple #18
0
def postrun_quantfit(dict_output_quantfit, path='.', debug=False):
    '''
    Postrun function for step 2, quantfit.

    Takes as input dict from main run function.

    This function cannot return any values due to limitations
    of the RunPython VBA code in xlwings.
    
    '''

    if debug:
        print('=' * 30)
        print('start of postrun_quantfit')
        print('=' * 30)

    # Create DataFrame for log
    log_frame = pd.DataFrame(columns=['Time', 'Action'])

    # Create the output sheets
    sheetvars = [
        key for key in dict_output_quantfit if key.find('sheet') != -1
    ]
    for sheetvar in sheetvars:

        # Don't do anything for the input sheet
        if sheetvar == 'sheet_input':
            continue

        # Check that sheetvar exists as a key in dict_output_quantfit
        if sheetvar not in dict_output_quantfit:
            message = 'sheetvar ' + sheetvar + ' is not a key for dict_output_quantfit'
            show_message(message, halt=True)

        # Get the actual sheet name
        sheetname = dict_output_quantfit[sheetvar]

        # Get existing sheetnames
        sheetnames = [sheet.name for sheet in wb.sheets]

        try:
            # Clear the sheet if it already exists
            if sheetname in sheetnames:
                wb.sheets[sheetname].clear()
                action = 'Cleared sheet ' + sheetname
            # Otherwise add it after the "Data" sheet
            else:
                wb.sheets.add(sheetname, after='Data')
                # Set output sheet colors to blue
                wb.sheets[sheetname].api.Tab.Colorindex = 23
                action = 'Created sheet ' + sheetname
        except:
            action = 'Unable to access sheet ' + sheetname

        # Add to log
        tn = date.now().strftime('%Y-%m-%d %H:%M:%S')
        log = pd.Series({'Time': tn, 'Action': action})
        log_frame = log_frame.append(log, ignore_index=True)

    # end of loop over output sheetvars

    # Write out quantfit results
    try:
        for sheetvar in sheetvars:
            sheetname = dict_output_quantfit[sheetvar]
            if sheetvar == 'sheet_quantreg':
                wb.sheets[sheetname].range('A1').options(
                    index=False).value = dict_output_quantfit['qcoef']
            elif sheetvar == 'sheet_cond_quant':
                wb.sheets[sheetname].range('A1').options(
                    index=True).value = dict_output_quantfit['cond_quant']
            elif sheetvar == 'sheet_local_proj':
                wb.sheets[sheetname].range('A1').options(
                    index=False).value = dict_output_quantfit['localprj']
        action = 'Quantfit results saved succesfully.'
    except:
        action = 'Unable to output quantfit results.'
        print(action)

    sheetname = dict_output_quantfit['sheet_quantreg']
    try:
        wb.sheets[sheetname].pictures[0].delete()
    except:
        pass
    sheet = wb.sheets[sheetname]
    fig = dict_output_quantfit['figs']
    fig.savefig(path + '\\quantfit' + date.now().strftime('%Y%m%d-%H-%M') +
                '.png')
    try:
        sheet.pictures.add(fig,
                           name='MyPlot_q',
                           update=True,
                           left=sheet.range('L7').left,
                           top=sheet.range('L7').top,
                           height=260,
                           width=1040)
        action = 'Quantile figure saved'
    except:
        action = 'Unable to add figure to sheet ' + sheetname

    # Add to log
    tn = date.now().strftime('%Y-%m-%d %H:%M:%S')
    log = pd.Series({'Time': tn, 'Action': action})
    log_frame = log_frame.append(log, ignore_index=True)

    # Write out log_frame
    add_logsheet(wb, log_frame, colnum=3)
Exemple #19
0
def check_parameters_tsfit(dict_input_tsfit, keys):
    '''
    Check the input parameters for tsfit.
    '''

    # Check that all keys exist
    for key in keys:
        if key not in dict_input_tsfit:
            message = 'key ' + key + ' not found in dict_input_tsfit'
            show_message(message)

    input_sheets = [
        'Readme', 'Input_parameters', 'Partition_groups', 'Data',
        'Processing_Log'
    ]
    for key in dict_input_tsfit:
        val = dict_input_tsfit[key]

        if key == 'latest_date':
            if type(val) != date:
                message = 'edate = ' + str(
                    val) + ' was not a datetime.datetime object'
                show_message(message)
            # the range of the date is checked in Excel and is not checked here

        elif key.find('sheet_') != -1:
            if val is None:
                if key == 'sheet_quantreg':
                    dict_input_tsfit[key] = 'Quant reg coefficients'
                elif key == 'sheet_cond_quant':
                    dict_input_tsfit[key] = 'Conditional quantiles'
                elif key == 'sheet_local_proj':
                    dict_input_tsfit[key] = 'Local projections'
                elif key == 'sheet_partition':
                    dict_input_tsfit[key] = 'Output_partitions'
                elif key == 'sheet_tsfit':
                    if dict_input_tsfit['fit_params'][
                            'fittype'] == 'Asymmetric T':
                        dict_input_tsfit[key] = 'Asymmetric T fit'
                    else:
                        dict_input_tsfit[key] = 'T-skew fit'
                else:
                    message = 'No sheet called ' + key + ' should exist'
                    show_message(message, halt=True)
            else:
                # Check that the specified sheetname is not one of the inputs
                # (it is OK that it is the same name as an existing sheet if
                # that sheet is an output)
                if val in input_sheets:
                    message = key + ' specified as ' + val + ', cannot be the same as necessary input sheet'
                    show_message(message, halt=True)

        elif key == 'regressors':
            # val is a dict of dicts with keys [regressor]['transform/option']
            for regressor in val:
                transform = val[regressor]['transform']
                option = val[regressor]['option']

                # Check that transform is a valid value from the pulldown menu
                if transform not in [
                        'None', 'Lagged', 'MVA', 'Power', 'Diff', 'ChangeRate'
                ]:
                    message = 'transform for ' + regressor + ' was not a valid option, given ' + transform
                    show_message(message, halt=True)

                # If 'No transformation' or 'Log' was chosen, make sure no option was given
                if transform in ['None']:
                    if option is not None:
                        message = 'option for regressor = ' + regressor + ' with transform of ' + transform + ' must not have option set'
                        show_message(message, halt=True)

                # If 'Lagged' or 'Moving Average' was chosen, make sure lag exists and is an int
                if transform in [
                        'Lagged', 'MVA', 'Power', 'Diff', 'ChangeRate'
                ]:
                    if type(option) != float or abs(int(option) -
                                                    option) > 1E-5:
                        message = 'option for regressor = ' + regressor + ' with transform of ' + transform + ' must have option of int, given ' + str(
                            option)
                        show_message(message, halt=True)
                    # Since the value is less than 1E-5 away from an int,
                    # convert to int so that there are no problems later
                    dict_input_tsfit['regressors'][regressor]['option'] = int(
                        option)

        elif key == 'fit_params':
            # This is a dict with keys for the variable name of each constraint
            # 'mode', 'dof', 'var', 'skewness', 'var_low', 'var_high', 'skew_low', 'skew_high'
            # and for each variable name there are 2 keys 'constraint' and 'value'
            dict_params = val

            for varname in dict_params:

                if varname in [
                        'dof', 'var_low', 'var_high', 'skew_low', 'skew_high'
                ]:
                    constraint = dict_params[varname]['constraint']
                    value = dict_params[varname]['value']

                    # 'mode' will always be specified
                    if varname == 'mode':

                        # Check that value is from pulldown
                        if constraint not in [
                                'Free', 'Fixed', 'Median', 'Mean'
                        ]:
                            message = 'constraint for ' + varname + ' was ' + constraint + ', not in pulldown values'
                            show_message(message, halt=True)
                    else:
                        # For all other varnames, only options are 'Fixed' and 'Free'

                        # Check that value is from pulldown
                        if constraint not in ['Free', 'Fixed', 'Default']:
                            message = 'constraint for ' + varname + ' was ' + constraint + ', not in pulldown values'
                            show_message(message, halt=True)

                # Check that no value is given when Free
                    if constraint in ['Free', 'Median'] and value is not None:
                        message = 'constraint for ' + varname + ' was ' + constraint + ' so value cannot be given as ' + str(
                            value)
                        show_message(message, halt=True)

                # Check that value is a float
                    if constraint in ['Fixed'] and type(value) != float:
                        message = 'If constraint for ' + varname + ' is ' + constraint + ', value must be float, given as ' + str(
                            value)
                        show_message(message, halt=True)

                elif varname == 'qsmooth':
                    if dict_params[varname]['option'] != 'None' and dict_params[
                            varname]['period'] is None:
                        message = 'Please provide period number for quantile smooth.'
                        show_message(message, halt=True)
                elif varname == 'fittype':
                    if dict_params[varname] != 'Asymmetric T' and dict_params[
                            varname] != 'T-skew':
                        message = 'Not valid skewed T distribution option.'
                        show_message(message, halt=True)
Exemple #20
0
def check_parameters_segment(dict_input_segment):
    '''
    Check the input parameters for segment.
    '''
    # Check that all keys exist
    keys = dict_input_segment.keys()

    input_sheets = [
        'Readme', 'Input_parameters', 'Partition_groups', 'Data',
        'Processing_Log'
    ]
    for key in keys:
        val = dict_input_segment[key]

        if key == 'freq' and val not in ['Monthly', 'Quarterly', 'Yearly']:
            message = 'freq = ' + val + ' was not a valid value'
            show_message(message)

        if key == 'sdate':
            if type(val) != date:
                message = 'sdate = ' + str(
                    val) + ' was not a datetime.datetime object'
                show_message(message)
            # the range of the date is checked in Excel and is not checked here

        if key == 'edate':
            if type(val) != date:
                message = 'edate = ' + str(
                    val) + ' was not a datetime.datetime object'
                show_message(message)
            # the range of the date is checked in Excel and is not checked here

        if key == 'method' and val not in ['LDA', 'PCA', 'PLS']:
            message = 'method = ' + val + ' was not a valid value'
            show_message(message)

        if key == 'pcutoff' and not (0 < val and val < 1):
            print('pcutoff = ' + str(val))
            message = 'pcutoff = ' + str(val) + ' was not a valid sheet name'
            show_message(message)

        if key == 'real_GDP':
            message = 'benchmark = ' + val + ' needs to be checked'
            show_message(message, output_messagebox=False)

        if key == 'method_growth':
            if val not in ['cpd', 'yoy', 'level']:
                message = 'method_growth = ' + val + ' must be one of cpd/yoy'
                show_message(message, output_messagebox=False)

        if key == 'retropolate' and val not in ['Yes', 'No']:
            message = 'retropolate = ' + val + ' was not a valid value'
            show_message(message)

        if key == 'quantlist':
            # Check that all values are between 0 and 1,
            # and that the necessary values of 0.10, 0.25, 0.50, 0.75, 0.90 exist
            vals_np = np.array(val)  # create np.array for value checking
            if not (np.all(0 < vals_np) and np.all(vals_np < 1)):
                message = 'All values of quantlist must be between 0 and 1'
                message += 'Given values: ' + str(val)
                show_message(message, halt=True)
            # Check that necessary values are present
            necessary_vals = [0.10, 0.25, 0.50, 0.75, 0.90]
            for _val in necessary_vals:
                if _val not in val:
                    message = 'Value of ' + str(
                        _val) + ' must be included in quantlist'
                    message += 'Given values: ' + str(val)
                    show_message(message, halt=True)

        if key == 'regressors':
            # val is a dict of dicts with keys [regressor]['transform/option']
            for regressor in val:
                transform = val[regressor]['transform']
                option = val[regressor]['option']

                # Check that transform is a valid value from the pulldown menu
                if transform not in [
                        'None', 'Lagged', 'MVA', 'Power', 'Diff', 'ChangeRate'
                ]:
                    message = 'transform for ' + regressor + ' was not a valid option, given ' + transform
                    show_message(message, halt=True)

                # If 'No transformation' or 'Log' was chosen, make sure no option was given
                if transform in ['None']:
                    if option is not None:
                        message = 'option for regressor = ' + regressor + ' with transform of ' + transform + ' must not have option set'
                        show_message(message, halt=True)

                # If 'Lagged' or 'Moving Average' was chosen, make sure lag exists and is an int
                if transform in [
                        'Lagged', 'MVA', 'Power', 'Diff', 'ChangeRate'
                ]:
                    if type(option) != float or abs(int(option) -
                                                    option) > 1E-5:
                        message = 'option for regressor = ' + regressor + ' with transform of ' + transform + ' must have option of int, given ' + str(
                            option)
                        show_message(message, halt=True)
                    # Since the value is less than 1E-5 away from an int,
                    # convert to int so that there are no problems later
                    dict_input_segment['regressors'][regressor][
                        'option'] = int(option)

        if key == 'shockvars':

            for shockvar in val:
                shocktype = val[shockvar]['shocktype']
                shockvalue = val[shockvar]['shockvalue']

                # Check that shocktype is a valid value from the pulldown menu
                if shocktype not in [
                        'None', 'By +/- STD', 'By +/- percentage'
                ]:
                    message = 'Shock type for ' + shockvar + ' was not a valid option, given ' + shocktype
                    show_message(message, halt=True)

                if shocktype in ['By +/- STD', 'By +/- percentage']:
                    if abs(shockvalue) > 10:
                        message = 'Shock value for variable = ' + shockvar + ' with shocktype of ' + shocktype + ' must have option of int, given ' + str(
                            shockvalue)
                        show_message(message, halt=True)

        elif key.find('sheet_') != -1:
            if val is None:

                if key == 'sheet_quantreg':
                    dict_input_segment[key] = 'Quant reg coefficients'
                elif key == 'sheet_cond_quant':
                    dict_input_segment[key] = 'Conditional quantiles'
                elif key == 'sheet_local_proj':
                    dict_input_segment[key] = 'Local projections'
                elif key == 'sheet_partition':
                    dict_input_segment[key] = 'Output_partitions'
                elif key == 'sheet_segment':
                    dict_input_segment[key] = 'Multiple_projections'
                elif key == 'sheet_term':
                    dict_input_segment[key] = 'Term_Structure'
                else:
                    message = 'No sheet called ' + key + ' should exist'
                    show_message(message, halt=True)
            else:
                # Check that the specified sheetname is not one of the inputs
                # (it is OK that it is the same name as an existing sheet if
                # that sheet is an output)
                if val in input_sheets:
                    message = key + ' specified as ' + val + ', cannot be the same as necessary input sheet'
                    show_message(message, halt=True)
Exemple #21
0
def postrun_partition(dict_output_partition, debug=False):
    '''
    Postrun function for step 1, partition.

    Takes as input dict from main run function.

    This function cannot return any values due to limitations
    of the RunPython VBA code in xlwings.
    
    '''

    if debug:
        print('=' * 30)
        print('start of run_partition')
        print('=' * 30)

    # Create DataFrame for log
    log_frame = pd.DataFrame(columns=['Time', 'Action'])

    # Create the output sheets

    for sheetvar in [
            key for key in dict_output_partition if key.find('sheet') != -1
    ]:
        # Check that sheetvar exists as a key in dict_output_partition
        if sheetvar not in dict_output_partition:
            message = 'sheetvar ' + sheetvar + ' is not a key for dict_output_partition'
            show_message(message, halt=True)

        # Get the actual sheet name
        sheetname = dict_output_partition[sheetvar]

        # Get existing sheetnames
        sheetnames = [sheet.name for sheet in wb.sheets]

        try:
            # Clear the sheet if it already exists
            if sheetname in sheetnames:
                wb.sheets[sheetname].clear()
                action = 'Cleared sheet ' + sheetname
            # Otherwise add it after the "Data" sheet
            else:
                wb.sheets.add(sheetname, after='Data')
                # Set output sheet colors to blue
                wb.sheets[sheetname].api.Tab.Colorindex = 23
                action = 'Created sheet ' + sheetname
        except:
            action = 'Unable to access sheet ' + sheetname

        # Add to log
        tn = date.now().strftime('%Y-%m-%d %H:%M:%S')
        log = pd.Series({'Time': tn, 'Action': action})
        log_frame = log_frame.append(log, ignore_index=True)

    # end of loop over output sheetvars

    # Write out partition and loadings
    try:
        sheetname = dict_output_partition['sheet_partitions']
        wb.sheets[sheetname].range('A1').options(
            index=False).value = dict_output_partition['frame']
        wb.sheets[sheetname].autofit()
        sheetname = dict_output_partition['sheet_loadings']
        wb.sheets[sheetname].range('A1').options(
            index=False).value = dict_output_partition['loading']
        wb.sheets[sheetname].autofit()
        action = 'Partitions and loadings saved succesfully.'
    except:
        action = 'Unable to output partitions and loadings.'

    sheetname = dict_output_partition['sheet_partitions']

    for p in wb.sheets[sheetname].shapes:
        try:
            p.delete()
        except Exception as e:
            print(e)
    sheet = wb.sheets[sheetname]
    if dict_output_partition['method'] == 'PLS':
        for i, fig in enumerate(dict_output_partition['figs']):
            fullpath = os.path.abspath(
                os.path.dirname(wb.fullname) + '/figures')
            if not os.path.isdir(fullpath):
                os.makedirs(fullpath)
            group = dict_output_partition['groups'][i]
            outfilename = fullpath + '\\partition_PLS_' + group + date.now(
            ).strftime('%Y_%m-%d@%H_%M-%S') + '.png'
            fig.savefig(outfilename)
            try:
                X = str(2 + i * 38)
                sheet.pictures.add(fig,
                                   name='MyPlot_P' + str(i + 1),
                                   update=True,
                                   left=sheet.range('M' + X).left,
                                   top=sheet.range('M' + X).top,
                                   height=500,
                                   width=750)
                action = 'Partition figure saved'
            except:
                action = 'Unable to add figure to sheet ' + sheetname
    else:
        fig = dict_output_partition['figs'][0]
        # Set the path of the output file to be in the same dir as the
        # calling Excel file
        fullpath = os.path.abspath(os.path.dirname(wb.fullname) + '/figures')
        if not os.path.isdir(fullpath):
            os.makedirs(fullpath)
        outfilename = fullpath + '\\partition_' + date.now().strftime(
            '%Y_%m-%d@%H_%M-%S') + '.png'
        fig.savefig(outfilename)
        cr = len(dict_output_partition['groups'])
        try:
            sheet.pictures.add(fig,
                               name='MyPlot_P1',
                               update=True,
                               left=sheet.range('M2').left,
                               top=sheet.range('M2').top,
                               height=720,
                               width=cr * 255)
            action = 'Partition figure saved'
        except:
            action = 'Unable to add figure to sheet ' + sheetname
        tn = date.now().strftime('%Y-%m-%d %H:%M:%S')
        log = pd.Series({'Time': tn, 'Action': action})
        log_frame = log_frame.append(log, ignore_index=True)

        fig1 = dict_output_partition['figs'][1]
        if dict_output_partition['method'] == 'PLS':
            ht = 700
            wd = 480
        else:
            ht = 320
            wd = 320
        try:
            sheet.pictures.add(fig1,
                               name='MyPlot_P2',
                               update=True,
                               left=sheet.range('M54').left,
                               top=sheet.range('M54').top,
                               height=ht,
                               width=wd)
            action = 'Partition figure saved'
        except:
            action = 'Unable to add figure to sheet ' + sheetname

    # Write out log_frame
    add_logsheet(wb, log_frame, colnum=1)
Exemple #22
0
def check_parameters_partition(dict_input_partition, keys):
    '''
    Check the input parameters for partition.
    '''

    # Check that all keys exist
    for key in keys:
        if key not in dict_input_partition:
            message = 'key ' + key + ' not found in dict_input_partition'
            show_message(message)

    # Get the sheets in the wb
    sheetnames = [sheet.name for sheet in wb.sheets]

    # These are the sheets that are used as input and never overwritten
    input_sheets = [
        'Readme', 'Input_parameters', 'Partition_groups', 'Data',
        'Processing_Log'
    ]

    # Check for necessary sheets
    for sheetname in ['Partition_groups', 'Data']:
        if sheetname not in sheetnames:
            message = 'sheet with name ' + sheetname + ' must be in input Excel file'
            show_message(message)

    # ------------------------------------------- #
    # Go through each value and check the values
    # ------------------------------------------- #

    for key in keys:
        val = dict_input_partition[key]

        if key == 'freq' and val not in ['Monthly', 'Quarterly', 'Yearly']:
            message = 'freq = ' + val + ' was not a valid value'
            show_message(message)

        if key == 'sdate':
            if type(val) != date:
                message = 'sdate = ' + str(
                    val) + ' was not a datetime.datetime object'
                show_message(message)
            # the range of the date is checked in Excel and is not checked here

        if key == 'edate':
            if type(val) != date:
                message = 'edate = ' + str(
                    val) + ' was not a datetime.datetime object'
                show_message(message)
            # the range of the date is checked in Excel and is not checked here

        if key == 'method' and val not in ['LDA', 'PCA', 'PLS']:
            message = 'method = ' + val + ' was not a valid value'
            show_message(message)

        if key == 'pcutoff' and not (0 < val and val < 1):
            print('pcutoff = ' + str(val))
            message = 'pcutoff = ' + str(val) + ' was not a valid sheet name'
            show_message(message)

        if key == 'real_GDP':
            message = 'benchmark = ' + val + ' needs to be checked'
            show_message(message, output_messagebox=False)

        if key == 'method_growth':
            if val not in ['cpd', 'yoy', 'level']:
                message = 'method_growth = ' + val + ' must be one of cpd/yoy'
                show_message(message, output_messagebox=False)

        if key == 'retropolate' and val not in ['Yes', 'No']:
            message = 'retropolate = ' + val + ' was not a valid value'
            show_message(message)

        if key in ['sheet_partitions', 'sheet_loadings']:
            # If a value was specified, check that it is not one of the
            # input sheet names and use it as the output sheet name.
            # Otherwise we will use the default 'Output_partitions'
            if val is None:
                dict_input_partition[key] = key.replace('sheet', 'Output')
            else:
                # Check that the specified sheetname is not one of the inputs
                # (it is OK that it is the same name as an existing sheet if
                # that sheet is an output)
                if val in input_sheets:
                    message = key + ' specified as ' + val + ', cannot be the same as necessary input sheet'
                    show_message(message)
Exemple #23
0
def prerun_partition(debug=False):
    '''
    Prerun function for step 1, partition.

    This function cannot take in any arguments due to limitations
    of the RunPython VBA code in xlwings.

    Read in/check the input parameters and return a
    dict for input parameters and a df for data.
    '''

    if debug:
        print('=' * 30)
        print('start of prerun_partition')
        print('=' * 30)

    # Keys for input parameter dict
    keys = [
        'freq', 'sdate', 'edate', 'method', 'pcutoff', 'method_growth',
        'retropolate', 'sheet_partitions', 'sheet_loadings'
    ]

    # --------------------------
    # Read in parameters
    # --------------------------
    dict_input_partition = read_parameters_partition()

    # --------------------------
    # Check parameter values
    # --------------------------
    check_parameters_partition(dict_input_partition, keys)

    # --------------------------
    # Read in global parameters
    # --------------------------
    # (this also checks if values have changed since being initially set)
    dict_global_params = read_parameters_global()

    # Add each key, val from dict_global_params to dict_input_partition
    for key, val in dict_global_params.items():
        # Check that the keys do not clash
        if key in dict_input_partition:
            message = 'dict_input_partition should not have key ' + key + ' that is common with dict_global_params'
            show_message(message, halt=True)
        dict_input_partition[key] = val

    # --------------------------
    # Create df for data
    # --------------------------
    df_partition = read_data_partition()

    # --------------------------
    # Create a dict for groups
    # --------------------------

    if dict_input_partition['method'] == "PLS":
        dict_groups, dict_PLS = read_partition_groupsPLS()
        dict_input_partition['PLS_target'] = dict_PLS
    else:
        dict_groups = read_partition_groups()
        dict_input_partition['PLS_target'] = None

    # --------------------------
    # Check df for partition
    # --------------------------
    check_data_partition(df_partition, dict_input_partition)

    # --------------------------
    # Set start and end dates and
    # fill missing values for data df
    # --------------------------
    df_partition = format_data_partition(df_partition,
                                         dict_input_partition['sdate'],
                                         dict_input_partition['edate'])

    #---------------------------
    # Check PLS target coverage
    #---------------------------

    if dict_input_partition['method'] == "PLS":
        PLSvar = set()
        for g in dict_PLS.values():
            for e in g:
                PLSvar.add(e)
        PLSvar = list(PLSvar)
        if df_partition.loc[:, PLSvar].isnull().sum().sum() > 0:
            print(df_partition.loc[:, PLSvar].head())
            message = 'PLS target should cover all dates'
            show_message(message, halt=True)

    # --------------------------
    # Check partition groups
    # --------------------------
    check_partition_groups(dict_groups, df_partition)

    # --------------------------
    # Return a dict for input parameters,
    # a dict for groups,
    # and a df for the data
    # --------------------------
    return dict_input_partition, dict_groups, df_partition
Exemple #24
0
def check_parameters_scenario(dict_input_scenario):
    '''
    Check the input parameters for scenario.
    '''
    # Check that all keys exist
    keys = dict_input_scenario.keys()

    input_sheets = [
        'Readme', 'Input_parameters', 'Partition_groups', 'Data',
        'Processing_Log'
    ]
    for key in keys:
        val = dict_input_scenario[key]

        if key == 'quantlist':
            # Check that all values are between 0 and 1,
            # and that the necessary values of 0.10, 0.25, 0.50, 0.75, 0.90 exist
            vals_np = np.array(val)  # create np.array for value checking
            if not (np.all(0 < vals_np) and np.all(vals_np < 1)):
                message = 'All values of quantlist must be between 0 and 1'
                message += 'Given values: ' + str(val)
                show_message(message, halt=True)
            # Check that necessary values are present
            necessary_vals = [0.10, 0.25, 0.50, 0.75, 0.90]
            for _val in necessary_vals:
                if _val not in val:
                    message = 'Value of ' + str(
                        _val) + ' must be included in quantlist'
                    message += 'Given values: ' + str(val)
                    show_message(message, halt=True)

        if key == 'regressors':
            # val is a dict of dicts with keys [regressor]['transform/option']
            for regressor in val:
                transform = val[regressor]['transform']
                option = val[regressor]['option']

                # Check that transform is a valid value from the pulldown menu
                if transform not in [
                        'None', 'Lagged', 'MVA', 'Power', 'Diff', 'ChangeRate'
                ]:
                    message = 'transform for ' + regressor + ' was not a valid option, given ' + transform
                    show_message(message, halt=True)

                # If 'No transformation' or 'Log' was chosen, make sure no option was given
                if transform in ['None']:
                    if option is not None:
                        message = 'option for regressor = ' + regressor + ' with transform of ' + transform + ' must not have option set'
                        show_message(message, halt=True)

                # If 'Lagged' or 'Moving Average' was chosen, make sure lag exists and is an int
                if transform in [
                        'Lagged', 'MVA', 'Power', 'Diff', 'ChangeRate'
                ]:
                    if type(option) != float or abs(int(option) -
                                                    option) > 1E-5:
                        message = 'option for regressor = ' + regressor + ' with transform of ' + transform + ' must have option of int, given ' + str(
                            option)
                        show_message(message, halt=True)
                    # Since the value is less than 1E-5 away from an int,
                    # convert to int so that there are no problems later
                    dict_input_scenario['regressors'][regressor][
                        'option'] = int(option)

        if key == 'shockvars':

            for shockvar in val:
                shocktype = val[shockvar]['shocktype']
                shockvalue = val[shockvar]['shockvalue']

                # Check that shocktype is a valid value from the pulldown menu
                if shocktype not in [
                        'None', 'By +/- STD', 'By +/- percentage'
                ]:
                    message = 'Shock type for ' + shockvar + ' was not a valid option, given ' + shocktype
                    show_message(message, halt=True)

                if shocktype in ['By +/- STD', 'By +/- percentage']:
                    if abs(shockvalue) > 10:
                        message = 'Shock value for variable = ' + shockvar + ' with shocktype of ' + shocktype + ' must have option of int, given ' + str(
                            shockvalue)
                        show_message(message, halt=True)

        elif key.find('sheet_') != -1:
            if val is None:
                if key == 'sheet_quantreg':
                    dict_input_scenario[key] = 'Quant reg coefficients'
                elif key == 'sheet_cond_quant':
                    dict_input_scenario[key] = 'Conditional quantiles'
                elif key == 'sheet_local_proj':
                    dict_input_scenario[key] = 'Local projections'
                elif key == 'sheet_partition':
                    dict_input_scenario[key] = 'Output_partitions'
                elif key == 'sheet_scenario':
                    dict_input_scenario[key] = 'Senario test'
                else:
                    message = 'No sheet called ' + key + ' should exist'
                    show_message(message, halt=True)
            else:
                # Check that the specified sheetname is not one of the inputs
                # (it is OK that it is the same name as an existing sheet if
                # that sheet is an output)
                if val in input_sheets:
                    message = key + ' specified as ' + val + ', cannot be the same as necessary input sheet'
                    show_message(message, halt=True)