コード例 #1
0
def transform_plot_data(context, df: DataFrame, plot_config: Dict) -> Dict:
    """
    Perform any necessary transformations on the plot data
    :param context: execution context
    :param df: pandas DataFrame of plot data
    :param plot_config: dict of plot configurations
    :return: dict of pandas DataFrame of plot data
    """
    plot_info = {}
    if 'all' in plot_config.keys():
        plot_details = plot_config['all']
        raise NotImplementedError(
            'All plots functionality is not yet fully supported')
    else:
        plot_details = plot_config

    for plot_key in plot_details.keys():
        plot_cfg = plot_details[plot_key]
        df.columns = plot_cfg[
            'header']  # add the column names to the dataframe

        for column in plot_cfg['header']:
            if column in plot_cfg.keys():
                column_config = plot_cfg[column]
                if 'to_datetime' in column_config.keys():
                    # convert to a date time
                    df[column] = pd.to_datetime(
                        df[column], format=column_config['to_datetime'])

        plot_info[plot_key] = {'df': df, 'config': plot_cfg}

    return plot_info
コード例 #2
0
def transform_imf_currency_tsv(context, currency_df: DataFrame,
                               cur_config: Dict):
    """
    Transform an IMF SDR per currency DataFrame
    :param context: execution context
    :param currency_df: DataFrame to process
    :param cur_config: currency configuration
    """
    cfg = cur_config['value']
    date_col_name = cfg['date_col_name']

    # clear whitespace
    currency_df.rename(columns=str.strip,
                       inplace=True)  # remove any whitespace in column names
    for column in currency_df.columns:
        currency_df[column] = currency_df[column].str.strip()

    # make sure no dates are missing
    currency_date = datetime.strptime(cfg['currency_start_date'],
                                      cfg['to_datetime'])
    currency_end_date = datetime.strptime(cfg['currency_end_date'],
                                          cfg['to_datetime'])
    delta = timedelta(days=1)
    while currency_date <= currency_end_date:
        date_text = currency_date.strftime(cfg['to_datetime'])
        if date_text not in currency_df[date_col_name].values:
            currency_df = currency_df.append({date_col_name: date_text},
                                             ignore_index=True)
        currency_date += delta

    # drop non-data rows in data column
    currency_df[[date_col_name]] = currency_df[[date_col_name
                                                ]].fillna(value='')
    currency_df = currency_df[currency_df[date_col_name].str.contains(
        cfg['date_pattern'])]

    # convert dates and sort
    currency_df[date_col_name] = pd.to_datetime(currency_df[date_col_name],
                                                format=cfg['to_datetime'])
    currency_df = currency_df.sort_values(by=[date_col_name])

    # fill gaps with previous value
    for column in currency_df.columns:
        if column != date_col_name:
            currency_df[column] = currency_df[column].fillna(method='ffill')
    # and if the gap is on the first line, the next valid value
    for column in currency_df.columns:
        if column != date_col_name:
            currency_df[column] = currency_df[column].fillna(method='bfill')

    # convert floats
    for column in currency_df.columns:
        if column != date_col_name:
            currency_df[column] = currency_df[column].astype(float)

    # rename columns to currency code
    columns = []
    currency_names = {}
    regex = re.compile(cfg['currency_name_pattern'])
    for column in currency_df.columns:
        match = regex.search(column)
        if match:
            currency_names[match.group(2)] = match.group(1)
            columns.append(match.group(2))
        else:
            columns.append(column)

    currency_df.columns = columns

    yield Output(currency_df, 'currency_df')
    yield Output(currency_names, 'currency_names')