def transform_plot_data(context, df: DataFrame, plot_config: Dict) -> Dict: """ Perform any necessary transformations on the plot data :param context: execution context :param df: pandas DataFrame of plot data :param plot_config: dict of plot configurations :return: dict of pandas DataFrame of plot data """ plot_info = {} if 'all' in plot_config.keys(): plot_details = plot_config['all'] raise NotImplementedError( 'All plots functionality is not yet fully supported') else: plot_details = plot_config for plot_key in plot_details.keys(): plot_cfg = plot_details[plot_key] df.columns = plot_cfg[ 'header'] # add the column names to the dataframe for column in plot_cfg['header']: if column in plot_cfg.keys(): column_config = plot_cfg[column] if 'to_datetime' in column_config.keys(): # convert to a date time df[column] = pd.to_datetime( df[column], format=column_config['to_datetime']) plot_info[plot_key] = {'df': df, 'config': plot_cfg} return plot_info
def transform_imf_currency_tsv(context, currency_df: DataFrame, cur_config: Dict): """ Transform an IMF SDR per currency DataFrame :param context: execution context :param currency_df: DataFrame to process :param cur_config: currency configuration """ cfg = cur_config['value'] date_col_name = cfg['date_col_name'] # clear whitespace currency_df.rename(columns=str.strip, inplace=True) # remove any whitespace in column names for column in currency_df.columns: currency_df[column] = currency_df[column].str.strip() # make sure no dates are missing currency_date = datetime.strptime(cfg['currency_start_date'], cfg['to_datetime']) currency_end_date = datetime.strptime(cfg['currency_end_date'], cfg['to_datetime']) delta = timedelta(days=1) while currency_date <= currency_end_date: date_text = currency_date.strftime(cfg['to_datetime']) if date_text not in currency_df[date_col_name].values: currency_df = currency_df.append({date_col_name: date_text}, ignore_index=True) currency_date += delta # drop non-data rows in data column currency_df[[date_col_name]] = currency_df[[date_col_name ]].fillna(value='') currency_df = currency_df[currency_df[date_col_name].str.contains( cfg['date_pattern'])] # convert dates and sort currency_df[date_col_name] = pd.to_datetime(currency_df[date_col_name], format=cfg['to_datetime']) currency_df = currency_df.sort_values(by=[date_col_name]) # fill gaps with previous value for column in currency_df.columns: if column != date_col_name: currency_df[column] = currency_df[column].fillna(method='ffill') # and if the gap is on the first line, the next valid value for column in currency_df.columns: if column != date_col_name: currency_df[column] = currency_df[column].fillna(method='bfill') # convert floats for column in currency_df.columns: if column != date_col_name: currency_df[column] = currency_df[column].astype(float) # rename columns to currency code columns = [] currency_names = {} regex = re.compile(cfg['currency_name_pattern']) for column in currency_df.columns: match = regex.search(column) if match: currency_names[match.group(2)] = match.group(1) columns.append(match.group(2)) else: columns.append(column) currency_df.columns = columns yield Output(currency_df, 'currency_df') yield Output(currency_names, 'currency_names')