def make_order_interval_features(orders: pd.DataFrame) -> pd.DataFrame:
    orders = orders.sort_values(['client_id', 'datetime'])

    last_order_client = orders['client_id'].shift(1)
    is_same_client = last_order_client == orders['client_id']
    orders['last_order_datetime'] = orders['datetime'].shift(1)

    orders['orders_interval'] = np.nan
    orders.loc[is_same_client, 'orders_interval'] = (
        orders.loc[is_same_client, 'datetime'] -
        orders.loc[is_same_client, 'last_order_datetime']
    ).dt.total_seconds() / SECONDS_IN_DAY

    cl_gb = orders.groupby('client_id', sort=False)
    features = cl_gb.agg(
        {
            'orders_interval': [
                'mean',  # mean interval between orders
                'median',
                'std',  # constancy of orders
                'min',
                'max',
                'last',  # interval between last 2 orders
            ]
        }
    )
    drop_column_multi_index_inplace(features)
    features.reset_index(inplace=True)
    features.fillna(-3, inplace=True)

    return features
def make_store_features(orders: pd.DataFrame) -> pd.DataFrame:
    cl_st_gb = orders.groupby(['client_id', 'store_id'])
    store_agg = cl_st_gb.agg({
        'transaction_id': ['count'],
    })

    drop_column_multi_index_inplace(store_agg)
    store_agg.reset_index(inplace=True)

    cl_gb = store_agg.groupby(['client_id'])
    simple_features = cl_gb.agg(
        {
            'transaction_id_count': ['max', 'mean', 'median']
        }
    )

    drop_column_multi_index_inplace(simple_features)
    simple_features.reset_index(inplace=True)
    simple_features.columns = (
        ['client_id'] +
        [
            f'store_{col}'
            for col in simple_features.columns[1:]
        ]
    )

    latent_features = make_latent_store_features(orders)

    features = pd.merge(
        simple_features,
        latent_features,
        on='client_id'
    )

    return features
def make_small_product_features(purchases: pd.DataFrame) -> pd.DataFrame:
    cl_pr_gb = purchases.groupby(['client_id', 'product_id'])
    product_agg = cl_pr_gb.agg({
        'product_quantity': ['sum'],
    })

    drop_column_multi_index_inplace(product_agg)
    product_agg.reset_index(inplace=True)

    cl_gb = product_agg.groupby(['client_id'])
    features = cl_gb.agg({'product_quantity_sum': ['max']})

    drop_column_multi_index_inplace(features)
    features.reset_index(inplace=True)

    return features
def make_features_for_orders_with_express_points_spent(
        orders: pd.DataFrame
) -> pd.DataFrame:

    orders_with_eps = orders.loc[orders['express_points_spent'] != 0]

    o_gb = orders_with_eps.groupby(['client_id'])
    features = o_gb.agg(
        {
            'purchase_sum': ['median'],
            'datetime': ['max']
        }
    )
    drop_column_multi_index_inplace(features)
    features.reset_index(inplace=True)
    features['days_from_last_express_points_spent'] = (
            MAILING_DATETIME - features['datetime_max']
    ).dt.days
    features.drop(columns=['datetime_max'], inplace=True)
    features.rename(
        columns={
            'purchase_sum_median': 'median_purchase_sum_eps'
        },
        inplace=True,
    )

    order_int_features = make_order_interval_features(orders_with_eps)
    renamings = {
        col: f'{col}_eps'
        for col in order_int_features
        if col != 'client_id'
    }
    order_int_features.rename(columns=renamings, inplace=True)

    features = pd.merge(
        features,
        order_int_features,
        on='client_id',
    )

    features = features.merge(
        pd.Series(orders['client_id'].unique(), name='client_id'),
        how='right',
    )

    return features
def make_really_purchase_features(purchases: pd.DataFrame) -> pd.DataFrame:
    simple_purchases = purchases.reindex(
        columns=['client_id', 'product_id', 'trn_sum_from_iss']
    )
    prices_bounds = [0, 98, 195, 490, 950, 1900, 4400, FLOAT32_MAX]
    agg_dict = {}
    for i, lower_bound in enumerate(prices_bounds[:-1]):
        upper_bound = prices_bounds[i + 1]
        name = f'price_from_{lower_bound}'
        simple_purchases[name] = (
            (simple_purchases['trn_sum_from_iss'] >= lower_bound) &
            (simple_purchases['trn_sum_from_iss'] < upper_bound)
        ).astype(int)
        agg_dict[name] = ['sum', 'mean']

    agg_dict.update(
        {
            'trn_sum_from_iss': ['median'],  # median product price
            'product_id': ['count', 'nunique'],
        }
    )
    simple_features = simple_purchases.groupby('client_id').agg(agg_dict)
    drop_column_multi_index_inplace(simple_features)
    simple_features.reset_index(inplace=True)

    p_gb = purchases.groupby(['client_id', 'transaction_id'])
    purchase_agg = p_gb.agg(
        {
            'product_id': ['count'],
            'product_quantity': ['max'],
        }
    )
    drop_column_multi_index_inplace(purchase_agg)
    purchase_agg.reset_index(inplace=True)
    o_gb = purchase_agg.groupby('client_id')
    complex_features = o_gb.agg(
        {
            # mean products in order
            'product_id_count': ['mean', 'median'],
            # mean max number of one product
            'product_quantity_max': ['mean', 'median'],
        }
    )
    drop_column_multi_index_inplace(complex_features)
    complex_features.reset_index(inplace=True)
    features = pd.merge(
        simple_features,
        complex_features,
        on='client_id'
    )


    return features
def make_order_features(orders: pd.DataFrame) -> pd.DataFrame:
    orders = orders.copy()

    o_gb = orders.groupby('client_id')

    agg_dict = {
            'transaction_id': ['count'],  # number of orders
            'regular_points_received': ['sum', 'max', 'median'],
            'express_points_received': ['sum', 'max', 'median'],
            'regular_points_spent': ['sum', 'min', 'median'],
            'express_points_spent': ['sum', 'min', 'median'],
            'purchase_sum': ['sum', 'max', 'median'],
            'store_id': ['nunique'],  # number of unique stores
            'datetime': ['max'],  # datetime of last order
        }

    # is regular/express points spent/received
    for points_type in POINT_TYPES:
        for event_type in POINT_EVENT_TYPES:
            col_name = f'{points_type}_points_{event_type}'
            new_col_name = f'is_{points_type}_points_{event_type}'
            orders[new_col_name] = (orders[col_name] != 0).astype(int)
            agg_dict[new_col_name] = ['sum']

    features = o_gb.agg(agg_dict)
    drop_column_multi_index_inplace(features)
    features.reset_index(inplace=True)

    features['days_from_last_order'] = (
        MAILING_DATETIME - features['datetime_max']
    ).dt.total_seconds() // SECONDS_IN_DAY
    features.drop(columns=['datetime_max'], inplace=True)

    # proportion of regular/express points spent to all transactions
    for points_type in POINT_TYPES:
        for event_type in POINT_EVENT_TYPES:
            col_name = f'is_{points_type}_points_{event_type}_sum'
            new_col_name = f'proportion_count_{points_type}_points_{event_type}'
            features[new_col_name] = (
                    features[col_name] / features['transaction_id_count']
            )

    express_col = f'is_express_points_spent_sum'
    regular_col = f'is_regular_points_spent_sum'
    new_col_name = f'ratio_count_express_to_regular_points_spent'
    features[new_col_name] = (
            features[express_col] / features[regular_col]
    ).replace(np.inf, FLOAT32_MAX)

    for points_type in POINT_TYPES:
        spent_col = f'is_{points_type}_points_spent_sum'
        received_col = f'is_{points_type}_points_received_sum'
        new_col_name = f'ratio_count_{points_type}_points_spent_to_received'
        features[new_col_name] = (
                features[spent_col] / features[received_col]
        ).replace(np.inf, 1000)


    for points_type in POINT_TYPES:
        spent_col = f'{points_type}_points_spent_sum'
        orders_sum_col = f'purchase_sum_sum'
        new_col_name = f'ratio_sum_{points_type}_points_spent_to_purchases_sum'
        features[new_col_name] = features[spent_col] / features[orders_sum_col]

    new_col_name = f'ratio_sum_express_points_spent_to_sum_regular_points_spent'
    regular_col = f'regular_points_spent_sum'
    express_col = f'express_points_spent_sum'
    features[new_col_name] = features[express_col] / features[regular_col]

    return features