def unpickle(pkl_cfg: Dict, plot_name: String) -> dict: pkl_name = pkl_cfg['pkl_name'] if isinstance(pkl_name, str): to_process = {plot_name: pkl_name} else: to_process = pkl_name df_dict = {} for key, value in to_process.items(): if pkl_cfg['clear_it']: if test_file_path(value): os.remove(value) if pkl_cfg['pickle_it']: if test_file_path(value): df = pd.read_pickle(value) df_dict[key] = df if len(df_dict) == 0: df_dict = None return df_dict
def read_currency_codes(context, cur_config: Dict, xml_path: String) -> DataFrame: """ Read IS0 4217 currency codes :param context: execution context :param cur_config: currency configuration :param xml_path: path to file to read """ cfg = cur_config['value'] currency_codes_cfg = cfg['currency_codes'] table_node = currency_codes_cfg['table_node'] entry_node = currency_codes_cfg['entry_node'] country_attrib = currency_codes_cfg['country_attrib'] currency_name_attrib = currency_codes_cfg['currency_name_attrib'] currency_code_attrib = currency_codes_cfg['currency_code_attrib'] currency_nbr_attrib = currency_codes_cfg['currency_nbr_attrib'] currency_minor_units_attrib = currency_codes_cfg[ 'currency_minor_units_attrib'] if not test_file_path(xml_path): raise ValueError(f"Invalid xml_path: {xml_path}") context.log.info(f"Reading '{xml_path}'") tree = eT.parse(xml_path) root = tree.getroot() columns = [ country_attrib, currency_name_attrib, currency_code_attrib, currency_nbr_attrib, currency_minor_units_attrib ] df = pd.DataFrame(dtype=str, columns=columns) # load into an array of dictionaries ignoring first row as it's not a user for entry in root.iter(entry_node): value = {} for xml_val in entry: value[xml_val.tag] = xml_val.text df = df.append(value, ignore_index=True) context.log.info(f"Read details of {len(df)} currencies") return df
def read_imf_per_currency(context, cur_config: Dict, tsv_path: String) -> DataFrame: """ Read an IMF SDR per currency file :param context: execution context :param cur_config: currency configuration :param tsv_path: path to file to read """ cfg = cur_config['value'] if not test_file_path(tsv_path): raise ValueError(f"Invalid tsv_path: {tsv_path}") context.log.info(f"Reading '{tsv_path}'") # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html#pandas.read_csv df = pd.read_csv(tsv_path, encoding=cfg['encoding'], sep='\t', skiprows=2) # drop unnamed columns df = drop_unnamed_columns(df) return df
def read_ex_rates_per_usd(context, cur_config: Dict, ex_per_usd_path: String) -> DataFrame: """ Read an 'IMF National Currency per U.S. Dollar, period average' file :param context: execution context :param cur_config: currency config :param ex_per_usd_path: path to file to read """ cfg = cur_config['value'] if not test_file_path(ex_per_usd_path): raise ValueError(f"Invalid ex_per_usd_path: {ex_per_usd_path}") context.log.info(f"Reading '{ex_per_usd_path}'") # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_excel.html df = pd.read_excel(ex_per_usd_path, skiprows=5, header=1) # drop non-required columns df = drop_unnamed_columns(df, inplace=True) df.drop(['Scale', 'Base Year'], axis=1, inplace=True) return df
def initialise_plot(context, yaml_path, plot_name): """ Load yaml file and return the configuration dictionary for the specified plot :param context: execution context :param yaml_path: path to the yaml configuration file :param plot_name: name of plot to retrieve config for :return: configuration dictionary :rtype: dict """ # verify path if not path.exists(yaml_path): raise ValueError(f'Invalid path: {yaml_path}') if not test_file_path(yaml_path): raise ValueError(f'Not a file path: {yaml_path}') plots_config = load_yaml(yaml_path) plot_config = None if plot_name.lower() == 'all': plot_config = {'all': plots_config} context.log.info( f'Loaded configuration for {len(plots_config.keys())} plots from {yaml_path}' ) raise NotImplementedError( 'All plots functionality is not yet fully supported') elif plot_name in plots_config.keys(): plot_config = {plot_name: plots_config[plot_name]} context.log.info( f'Loaded configuration for {plot_name} from {yaml_path}') else: raise ValueError( f'No configuration for {plot_name} found in {yaml_path}') yield Output(plot_config, 'plot_config') # TODO remove yield Output('need to remove', 'plot_sql')
def read_sdr_valuation(context, cur_config: Dict, sdrv_path: String): # Doh! not required just use SDR's per USD # "Special drawing rights (abbreviated SDR, ISO 4217 currency code XDR (numeric: 960)[1]) are supplementary foreign # exchange reserve assets defined and maintained by the International Monetary Fund (IMF).[2] SDRs are units of # account for the IMF, and not a currency per se." # https://en.wikipedia.org/wiki/Special_drawing_rights # "The currency value of the SDR is determined by summing the values in U.S. dollars, based on market exchange # rates, of a basket of major currencies (the U.S. dollar, Euro, Japanese yen, pound sterling and the Chinese # renminbi). The SDR currency value is calculated daily (except on IMF holidays or whenever the IMF is closed # for business) and the valuation basket is reviewed and adjusted every five years." # https://www.imf.org/external/np/fin/data/rms_sdrv.aspx cfg = cur_config['value'] if not test_file_path(sdrv_path): raise ValueError(f"Invalid sdrv_path: {sdrv_path}") context.log.info(f"Reading '{sdrv_path}'") # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html#pandas.read_csv df = pd.read_csv(sdrv_path, encoding=cfg['encoding'], sep='\t', skiprows=2, skip_blank_lines=True, skipfooter=cfg['skipfooter'], engine='python') columns = cfg['sdrv_columns'] df.columns = columns # layout of std info block # 02-Jan-1981 Deutsche mark 0.46 1.974 0.233029 0 # 02-Jan-1981 French franc 0.74 4.56 0.162281 0 # 02-Jan-1981 Japanese yen 34 202.87 0.167595 0 # 02-Jan-1981 U.K. pound 0.071 2.378 0.168838 0 # 02-Jan-1981 U.S. dollar 0.54 1 0.54 0 # 1.271743 # U.S.$1.00 = SDR 0.786322 # SDR1 = US$ 1.27174 # drop currency_unit, currency_amt & percent_change columns as not required # 02-Jan-1981 1.974 0.233029 # 02-Jan-1981 4.56 0.162281 # 02-Jan-1981 202.87 0.167595 # 02-Jan-1981 2.378 0.168838 # 02-Jan-1981 1 0.54 # 1.271743 # U.S.$1.00 = SDR 0.786322 # SDR1 = US$ 1.27174 df.drop([columns[1], columns[2], columns[5]], axis=1, inplace=True) # drop rows all nan or only 1 non-nan # 02-Jan-1981 1.974 0.233029 # 02-Jan-1981 4.56 0.162281 # 02-Jan-1981 202.87 0.167595 # 02-Jan-1981 2.378 0.168838 # 02-Jan-1981 1 0.54 # U.S.$1.00 = SDR 0.786322 # SDR1 = US$ 1.27174 df.dropna(thresh=2, inplace=True) df.reset_index(inplace=True, drop=True) # fill forward date nan's # 02-Jan-1981 1.974 0.233029 # 02-Jan-1981 4.56 0.162281 # 02-Jan-1981 202.87 0.167595 # 02-Jan-1981 2.378 0.168838 # 02-Jan-1981 1 0.54 # 02-Jan-1981 U.S.$1.00 = SDR 0.786322 # 02-Jan-1981 SDR1 = US$ 1.27174 date_col = columns[0] df[date_col] = df[date_col].fillna(method='ffill') # get the rows required # 02-Jan-1981 U.S.$1.00 = SDR 0.786322 exchange_rate_col = columns[3] usd_eq_srdv = df[df[exchange_rate_col].str.contains(cfg['usd_eq_srdv'], regex=False)] # drop unrequired column usd_eq_srdv.drop([exchange_rate_col], axis=1, inplace=True) return usd_eq_srdv
'mongo_warehouse': mongo_warehouse_resource } ) ] ) def mongo_to_postgres_pipeline(): raw = download_from_mongo() processed = process_unlocode(raw) upload_to_postgres(processed) if __name__ == '__main__': # get path to config file app_cfg_path = 'config.yaml' # default in project root if not test_file_path(app_cfg_path): # no default so look for in environment or from console app_cfg_path = get_file_path('AC_CFG', 'AirportCodes configuration file') if app_cfg_path is None: exit(0) app_cfg = load_yaml(app_cfg_path) if app_cfg is not None: # check some basic configs exist for key in ['airport_codes', 'postgresdb', 'mongodb']: # required root level keys if key not in app_cfg.keys(): raise EnvironmentError(f'Missing {key} configuration key') else:
def interactive_plot(sj_config: dict, plotly_config: String, postgres_warehouse_resrc: Dict): """ Process interactive plot :param sj_config: app configuration :param plotly_config: plotly configuration :param postgres_warehouse_resrc: postgres server resource """ plot_cfg_path = sj_config['plots_cfg'] plot_name = '' # dev HACK # if 'interactive_plots_cfg' in sj_config.keys(): # plot_cfg_path = sj_config['interactive_plots_cfg'] # else: # plot_cfg_path = '' # if 'interactive_plot_name' in sj_config.keys(): # plot_name = sj_config['interactive_plot_name'] # else: # plot_name = '' loop = True while loop: entering = True while entering: if len(plot_cfg_path) == 0: plot_cfg_path = get_user_input( 'Enter path to plot configuration file', plot_cfg_path) loop = plot_cfg_path.lower() != 'q' if loop: if not path.exists(plot_cfg_path): print(f'>> Invalid path: {plot_cfg_path}') plot_cfg_path = '' elif not test_file_path(plot_cfg_path): print(f'>> Not a file path: {plot_cfg_path}') plot_cfg_path = '' else: entering = False else: break if loop: plots_config = load_yaml(plot_cfg_path) print(f'The following plots are available:') width = 0 details = [] for name in plots_config.keys(): title = '' cfg = plots_config[name] if 'title' in cfg.keys(): title = cfg['title'] if isinstance(title, dict): cfg = title if 'text' in cfg.keys(): title = cfg['text'] if len(name) > width: width = len(name) details.append((name, title)) fmt = f' \x7b0:<{width}\x7d -- \x7b1\x7d' for name, title in details: print(fmt.format(name, title)) entering = True while entering: plot_name = get_user_input('Enter plot name', plot_name) loop = plot_name.lower() != 'q' if loop: try: plots_config = load_yaml(plot_cfg_path, plot_name) execute_file_ip_sql_to_plot_pipeline( sj_config, plotly_config, postgres_warehouse_resrc, plot_cfg_path, plot_name) # execute_file_ip_postgres_to_plot_pipeline(sj_config, plotly_config, postgres_warehouse_resrc, # plot_cfg_path, plot_name) entering = False except yaml.parser.ParserError as pe: print(f'>> Error in configuration: {pe}') except KeyError as ke: print( f">> Error in configuration: plot '{plot_name}' not found" ) plot_name = '' else: break
def get_app_config(name, args): try: opts, args = getopt.getopt(args, get_short_opts(), get_long_opts()) except getopt.GetoptError as err: print(err) usage(name) sys.exit(2) app_cfg_path = '../config.yaml' # default in project root cmd_line_args = { 'o': None, 'p': None, 'n': None, 's': None, 'd': None, 'm': None, 'w': None, } for opt, arg in opts: if opt == get_short_opt('h') or opt == get_long_opt('h'): usage(name) sys.exit() elif opt == get_short_opt('c') or opt == get_long_opt('c'): app_cfg_path = arg else: for key in ['o', 'p', 'n', 's', 'd', 'm']: if opt == get_short_opt(key) or opt == get_long_opt(key): cmd_line_args[key] = arg break # get path to config file if not test_file_path(app_cfg_path): # no default so look for in environment or from console app_cfg_path = get_file_path('SJ_CFG', 'SalesJournal configuration file') if app_cfg_path is None: exit(0) # load app config app_cfg = load_yaml(app_cfg_path) if app_cfg is not None: # check some basic configs exist for key in ['sales_journal', 'postgresdb']: # required root level keys if key not in app_cfg.keys(): raise EnvironmentError(f'Missing {key} configuration key') else: raise EnvironmentError(f'Missing configuration') sj_config = app_cfg['sales_journal'] # get plotly config plotly_cfg = None plotly_cfg_keys = ['plotly', 'orca', 'executable'] if cmd_line_args['o'] is None: cfg = app_cfg for key in plotly_cfg_keys: if key in cfg.keys(): cfg = cfg[key] if key == plotly_cfg_keys[-1]: plotly_cfg = cfg else: break else: plotly_cfg = cmd_line_args['o'] # TODO disabled cmd line args for now not fully tested # if cmd_line_args['n'] is not None: # sj_config['plot_name'] = cmd_line_args['n'] if cmd_line_args['p'] is not None: sj_config['plots_cfg'] = cmd_line_args['p'] # if cmd_line_args['s'] is not None: # sj_config['load_file_sets'] = cmd_line_args['s'] if cmd_line_args['d'] is not None: sj_config['db_data_path'] = cmd_line_args['d'] # if cmd_line_args['m'] is not None: # sj_config['csv_pipeline_run_mode'] = cmd_line_args['m'] if cmd_line_args['w'] is not None: sj_config['sales_data_desc'] = cmd_line_args['w'] return app_cfg, plotly_cfg