def rolling_mean_by_date_by_group(data: dd = None,
                                  groupby_columns: List[str] = None,
                                  metric_columns: List[str] = None,
                                  date_column: str = None,
                                  window: int = None) -> dd:
    """
    Split input dateframe into groups and preform a rolling average on the metric columns for each group
    :param data: input dataframe
    :param groupby_columns: list of columns to group by
    :param metric_columns: columns to calculate rolling average on
    :param date_column: name of date column
    :param window: window size to be used on rolling average
    :return: modified dask dataframe
    """
    data = data.set_index(date_column, sorted=True)
    output_schema = dict(data.dtypes)
    for metric_column in metric_columns:
        output_schema[f'{metric_column}_rolling_mean'] = 'float32'
    output_schema = list(output_schema.items())
    data = data.groupby(by=groupby_columns).apply(
        lambda df_g: rolling_mean_by_date(
            data=df_g, metric_columns=metric_columns, window=window),
        meta=output_schema)
    data = data.reset_index().rename(columns={'index': date_column})
    return data
def fill_missing_dates(data: dd = None,
                       date_column: str = None,
                       fill_method: str = None,
                       columns=None,
                       date_range: Tuple[str] = None,
                       fill_value=None,
                       groupby_columns=None) -> dd:
    """
    Preform date fill on single group
    """
    all_dates = pd.date_range(date_range[0], date_range[1])
    metric_data = data[[
        col for col in data.columns if col not in groupby_columns
    ]]
    data = data[groupby_columns].reindex(all_dates, method='nearest')
    metric_data = metric_data.reindex(all_dates,
                                      method=fill_method,
                                      fill_value=fill_value)
    data = dd.merge(data, metric_data, left_index=True, right_index=True)
    data = data.reset_index().rename(columns={'index': date_column})[columns]
    return data
def yoy_percent_change_by_group(data: dd = None,
                                groupby_columns: List[str] = None,
                                metric_columns: List[str] = None,
                                date_column: str = None) -> dd:
    """
    Split dataframe into groups and calculate year over year percent change for the etric columns in each group
    :param data: input dataframe
    :param groupby_columns: list of columns to group by
    :param metric_columns: columns to calculate rolling average on
    :param date_column: name of date column
    :return: modified dataframe
    """
    data = data.set_index(date_column, sorted=True)
    output_schema = dict(data.dtypes)
    for metric_column in metric_columns:
        output_schema[f'{metric_column}_yoy_pct_change'] = 'float32'
    output_schema = list(output_schema.items())
    data = data.groupby(by=groupby_columns).apply(
        lambda df_g: yoy_percent_change(data=df_g,
                                        metric_columns=metric_columns),
        meta=output_schema)
    data = data.reset_index().rename(columns={'index': date_column})
    return data