def unpickle(pkl_cfg: Dict, plot_name: String) -> dict:
    pkl_name = pkl_cfg['pkl_name']
    if isinstance(pkl_name, str):
        to_process = {plot_name: pkl_name}
    else:
        to_process = pkl_name

    df_dict = {}
    for key, value in to_process.items():
        if pkl_cfg['clear_it']:
            if test_file_path(value):
                os.remove(value)
        if pkl_cfg['pickle_it']:
            if test_file_path(value):
                df = pd.read_pickle(value)
                df_dict[key] = df

    if len(df_dict) == 0:
        df_dict = None

    return df_dict
def read_currency_codes(context, cur_config: Dict,
                        xml_path: String) -> DataFrame:
    """
    Read IS0 4217 currency codes
    :param context: execution context
    :param cur_config: currency configuration
    :param xml_path: path to file to read
    """
    cfg = cur_config['value']
    currency_codes_cfg = cfg['currency_codes']
    table_node = currency_codes_cfg['table_node']
    entry_node = currency_codes_cfg['entry_node']
    country_attrib = currency_codes_cfg['country_attrib']
    currency_name_attrib = currency_codes_cfg['currency_name_attrib']
    currency_code_attrib = currency_codes_cfg['currency_code_attrib']
    currency_nbr_attrib = currency_codes_cfg['currency_nbr_attrib']
    currency_minor_units_attrib = currency_codes_cfg[
        'currency_minor_units_attrib']

    if not test_file_path(xml_path):
        raise ValueError(f"Invalid xml_path: {xml_path}")

    context.log.info(f"Reading '{xml_path}'")

    tree = eT.parse(xml_path)
    root = tree.getroot()

    columns = [
        country_attrib, currency_name_attrib, currency_code_attrib,
        currency_nbr_attrib, currency_minor_units_attrib
    ]
    df = pd.DataFrame(dtype=str, columns=columns)

    # load into an array of dictionaries ignoring first row as it's not a user
    for entry in root.iter(entry_node):
        value = {}
        for xml_val in entry:
            value[xml_val.tag] = xml_val.text
        df = df.append(value, ignore_index=True)

    context.log.info(f"Read details of {len(df)} currencies")

    return df
def read_imf_per_currency(context, cur_config: Dict,
                          tsv_path: String) -> DataFrame:
    """
    Read an IMF SDR per currency file
    :param context: execution context
    :param cur_config: currency configuration
    :param tsv_path: path to file to read
    """
    cfg = cur_config['value']

    if not test_file_path(tsv_path):
        raise ValueError(f"Invalid tsv_path: {tsv_path}")

    context.log.info(f"Reading '{tsv_path}'")

    # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html#pandas.read_csv
    df = pd.read_csv(tsv_path, encoding=cfg['encoding'], sep='\t', skiprows=2)

    # drop unnamed columns
    df = drop_unnamed_columns(df)

    return df
def read_ex_rates_per_usd(context, cur_config: Dict,
                          ex_per_usd_path: String) -> DataFrame:
    """
    Read an 'IMF National Currency per U.S. Dollar, period average' file
    :param context: execution context
    :param cur_config: currency config
    :param ex_per_usd_path: path to file to read
    """
    cfg = cur_config['value']

    if not test_file_path(ex_per_usd_path):
        raise ValueError(f"Invalid ex_per_usd_path: {ex_per_usd_path}")

    context.log.info(f"Reading '{ex_per_usd_path}'")

    # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_excel.html
    df = pd.read_excel(ex_per_usd_path, skiprows=5, header=1)

    # drop non-required columns
    df = drop_unnamed_columns(df, inplace=True)
    df.drop(['Scale', 'Base Year'], axis=1, inplace=True)

    return df
예제 #5
0
def initialise_plot(context, yaml_path, plot_name):
    """
    Load yaml file and return the configuration dictionary for the specified plot
    :param context: execution context
    :param yaml_path: path to the yaml configuration file
    :param plot_name: name of plot to retrieve config for
    :return: configuration dictionary
    :rtype: dict
    """
    # verify path
    if not path.exists(yaml_path):
        raise ValueError(f'Invalid path: {yaml_path}')
    if not test_file_path(yaml_path):
        raise ValueError(f'Not a file path: {yaml_path}')

    plots_config = load_yaml(yaml_path)

    plot_config = None
    if plot_name.lower() == 'all':
        plot_config = {'all': plots_config}
        context.log.info(
            f'Loaded configuration for {len(plots_config.keys())} plots from {yaml_path}'
        )
        raise NotImplementedError(
            'All plots functionality is not yet fully supported')
    elif plot_name in plots_config.keys():
        plot_config = {plot_name: plots_config[plot_name]}
        context.log.info(
            f'Loaded configuration for {plot_name} from {yaml_path}')
    else:
        raise ValueError(
            f'No configuration for {plot_name} found in {yaml_path}')

    yield Output(plot_config, 'plot_config')
    # TODO remove
    yield Output('need to remove', 'plot_sql')
def read_sdr_valuation(context, cur_config: Dict, sdrv_path: String):
    # Doh! not required just use SDR's per USD

    # "Special drawing rights (abbreviated SDR, ISO 4217 currency code XDR (numeric: 960)[1]) are supplementary foreign
    #  exchange reserve assets defined and maintained by the International Monetary Fund (IMF).[2] SDRs are units of
    #  account for the IMF, and not a currency per se."
    # https://en.wikipedia.org/wiki/Special_drawing_rights

    # "The currency value of the SDR is determined by summing the values in U.S. dollars, based on market exchange
    #  rates, of a basket of major currencies (the U.S. dollar, Euro, Japanese yen, pound sterling and the Chinese
    #  renminbi). The SDR currency value is calculated daily (except on IMF holidays or whenever the IMF is closed
    #  for business) and the valuation basket is reviewed and adjusted every five years."
    # https://www.imf.org/external/np/fin/data/rms_sdrv.aspx

    cfg = cur_config['value']

    if not test_file_path(sdrv_path):
        raise ValueError(f"Invalid sdrv_path: {sdrv_path}")

    context.log.info(f"Reading '{sdrv_path}'")

    # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html#pandas.read_csv
    df = pd.read_csv(sdrv_path,
                     encoding=cfg['encoding'],
                     sep='\t',
                     skiprows=2,
                     skip_blank_lines=True,
                     skipfooter=cfg['skipfooter'],
                     engine='python')

    columns = cfg['sdrv_columns']
    df.columns = columns

    # layout of std info block
    # 02-Jan-1981	Deutsche mark	0.46	1.974	        0.233029	0
    # 02-Jan-1981	French franc	0.74	4.56	        0.162281	0
    # 02-Jan-1981	Japanese yen	34	    202.87	        0.167595	0
    # 02-Jan-1981	U.K. pound	    0.071	2.378	        0.168838	0
    # 02-Jan-1981	U.S. dollar	    0.54	1	            0.54	    0
    #                                                       1.271743
    #                                       U.S.$1.00 = SDR	0.786322
    #                                       SDR1 = US$	    1.27174

    # drop currency_unit, currency_amt & percent_change columns as not required
    # 02-Jan-1981	1.974	        0.233029
    # 02-Jan-1981	4.56	        0.162281
    # 02-Jan-1981	202.87	        0.167595
    # 02-Jan-1981	2.378	        0.168838
    # 02-Jan-1981	1	            0.54
    #                               1.271743
    #               U.S.$1.00 = SDR	0.786322
    #               SDR1 = US$	    1.27174
    df.drop([columns[1], columns[2], columns[5]], axis=1, inplace=True)

    # drop rows all nan or only 1 non-nan
    # 02-Jan-1981	1.974	        0.233029
    # 02-Jan-1981	4.56	        0.162281
    # 02-Jan-1981	202.87	        0.167595
    # 02-Jan-1981	2.378	        0.168838
    # 02-Jan-1981	1	            0.54
    #               U.S.$1.00 = SDR	0.786322
    #               SDR1 = US$	    1.27174
    df.dropna(thresh=2, inplace=True)
    df.reset_index(inplace=True, drop=True)

    # fill forward date nan's
    # 02-Jan-1981	1.974	        0.233029
    # 02-Jan-1981	4.56	        0.162281
    # 02-Jan-1981	202.87	        0.167595
    # 02-Jan-1981	2.378	        0.168838
    # 02-Jan-1981	1	            0.54
    # 02-Jan-1981	U.S.$1.00 = SDR	0.786322
    # 02-Jan-1981	SDR1 = US$	    1.27174
    date_col = columns[0]
    df[date_col] = df[date_col].fillna(method='ffill')

    # get the rows required
    # 02-Jan-1981	U.S.$1.00 = SDR	0.786322
    exchange_rate_col = columns[3]
    usd_eq_srdv = df[df[exchange_rate_col].str.contains(cfg['usd_eq_srdv'],
                                                        regex=False)]

    # drop unrequired column
    usd_eq_srdv.drop([exchange_rate_col], axis=1, inplace=True)

    return usd_eq_srdv
예제 #7
0
                'mongo_warehouse': mongo_warehouse_resource
            }
        )
    ]
)
def mongo_to_postgres_pipeline():
    raw = download_from_mongo()
    processed = process_unlocode(raw)
    upload_to_postgres(processed)


if __name__ == '__main__':

    # get path to config file
    app_cfg_path = 'config.yaml'  # default in project root
    if not test_file_path(app_cfg_path):
        # no default so look for in environment or from console
        app_cfg_path = get_file_path('AC_CFG',
                                     'AirportCodes configuration file')
        if app_cfg_path is None:
            exit(0)

    app_cfg = load_yaml(app_cfg_path)

    if app_cfg is not None:
        # check some basic configs exist
        for key in ['airport_codes', 'postgresdb',
                    'mongodb']:  # required root level keys
            if key not in app_cfg.keys():
                raise EnvironmentError(f'Missing {key} configuration key')
    else:
예제 #8
0
def interactive_plot(sj_config: dict, plotly_config: String,
                     postgres_warehouse_resrc: Dict):
    """
    Process interactive plot
    :param sj_config: app configuration
    :param plotly_config: plotly configuration
    :param postgres_warehouse_resrc: postgres server resource
    """
    plot_cfg_path = sj_config['plots_cfg']
    plot_name = ''

    # dev HACK
    # if 'interactive_plots_cfg' in sj_config.keys():
    #     plot_cfg_path = sj_config['interactive_plots_cfg']
    # else:
    #     plot_cfg_path = ''
    # if 'interactive_plot_name' in sj_config.keys():
    #     plot_name = sj_config['interactive_plot_name']
    # else:
    #     plot_name = ''

    loop = True
    while loop:
        entering = True
        while entering:
            if len(plot_cfg_path) == 0:
                plot_cfg_path = get_user_input(
                    'Enter path to plot configuration file', plot_cfg_path)
            loop = plot_cfg_path.lower() != 'q'
            if loop:
                if not path.exists(plot_cfg_path):
                    print(f'>> Invalid path: {plot_cfg_path}')
                    plot_cfg_path = ''
                elif not test_file_path(plot_cfg_path):
                    print(f'>> Not a file path: {plot_cfg_path}')
                    plot_cfg_path = ''
                else:
                    entering = False
            else:
                break

        if loop:
            plots_config = load_yaml(plot_cfg_path)
            print(f'The following plots are available:')
            width = 0
            details = []
            for name in plots_config.keys():
                title = ''
                cfg = plots_config[name]
                if 'title' in cfg.keys():
                    title = cfg['title']
                    if isinstance(title, dict):
                        cfg = title
                        if 'text' in cfg.keys():
                            title = cfg['text']
                if len(name) > width:
                    width = len(name)
                details.append((name, title))

            fmt = f'  \x7b0:<{width}\x7d -- \x7b1\x7d'
            for name, title in details:
                print(fmt.format(name, title))

            entering = True
            while entering:
                plot_name = get_user_input('Enter plot name', plot_name)
                loop = plot_name.lower() != 'q'
                if loop:
                    try:
                        plots_config = load_yaml(plot_cfg_path, plot_name)

                        execute_file_ip_sql_to_plot_pipeline(
                            sj_config, plotly_config, postgres_warehouse_resrc,
                            plot_cfg_path, plot_name)
                        # execute_file_ip_postgres_to_plot_pipeline(sj_config, plotly_config, postgres_warehouse_resrc,
                        #                                           plot_cfg_path, plot_name)
                        entering = False
                    except yaml.parser.ParserError as pe:
                        print(f'>> Error in configuration: {pe}')
                    except KeyError as ke:
                        print(
                            f">> Error in configuration: plot '{plot_name}' not found"
                        )
                        plot_name = ''
                else:
                    break
예제 #9
0
def get_app_config(name, args):
    try:
        opts, args = getopt.getopt(args, get_short_opts(), get_long_opts())
    except getopt.GetoptError as err:
        print(err)
        usage(name)
        sys.exit(2)

    app_cfg_path = '../config.yaml'  # default in project root
    cmd_line_args = {
        'o': None,
        'p': None,
        'n': None,
        's': None,
        'd': None,
        'm': None,
        'w': None,
    }
    for opt, arg in opts:
        if opt == get_short_opt('h') or opt == get_long_opt('h'):
            usage(name)
            sys.exit()
        elif opt == get_short_opt('c') or opt == get_long_opt('c'):
            app_cfg_path = arg
        else:
            for key in ['o', 'p', 'n', 's', 'd', 'm']:
                if opt == get_short_opt(key) or opt == get_long_opt(key):
                    cmd_line_args[key] = arg
                    break

    # get path to config file
    if not test_file_path(app_cfg_path):
        # no default so look for in environment or from console
        app_cfg_path = get_file_path('SJ_CFG',
                                     'SalesJournal configuration file')
        if app_cfg_path is None:
            exit(0)

    # load app config
    app_cfg = load_yaml(app_cfg_path)

    if app_cfg is not None:
        # check some basic configs exist
        for key in ['sales_journal', 'postgresdb']:  # required root level keys
            if key not in app_cfg.keys():
                raise EnvironmentError(f'Missing {key} configuration key')
    else:
        raise EnvironmentError(f'Missing configuration')

    sj_config = app_cfg['sales_journal']

    # get plotly config
    plotly_cfg = None
    plotly_cfg_keys = ['plotly', 'orca', 'executable']
    if cmd_line_args['o'] is None:
        cfg = app_cfg
        for key in plotly_cfg_keys:
            if key in cfg.keys():
                cfg = cfg[key]
                if key == plotly_cfg_keys[-1]:
                    plotly_cfg = cfg
            else:
                break
    else:
        plotly_cfg = cmd_line_args['o']

    # TODO disabled cmd line args for now not fully tested
    # if cmd_line_args['n'] is not None:
    #     sj_config['plot_name'] = cmd_line_args['n']
    if cmd_line_args['p'] is not None:
        sj_config['plots_cfg'] = cmd_line_args['p']
    # if cmd_line_args['s'] is not None:
    #     sj_config['load_file_sets'] = cmd_line_args['s']
    if cmd_line_args['d'] is not None:
        sj_config['db_data_path'] = cmd_line_args['d']
    # if cmd_line_args['m'] is not None:
    #     sj_config['csv_pipeline_run_mode'] = cmd_line_args['m']
    if cmd_line_args['w'] is not None:
        sj_config['sales_data_desc'] = cmd_line_args['w']

    return app_cfg, plotly_cfg