def get_orderbook(feed_df: pd.DataFrame, ob_state: dd, ob_state_seq) -> dd: """Gets those orders which are still active at the end of the feed""" # Find those orders which are no longer on the book # TODO: find those orders which were modified, handle carefully open_messages = feed_df[feed_df['type'] == 'open'] open_messages['size'] = open_messages['remaining_size'] residual_orders = open_messages[ open_messages['sequence'] > ob_state_seq] all_orders = ob_state.append(residual_orders) done_messages = feed_df[feed_df['type'] == 'done'] done_order_ids = list(done_messages['order_id']) # Find those orders which are still on the book ob_filtered = all_orders[~all_orders['order_id'].isin(done_order_ids)] # This variable is used in the pandas query below # final_trade_price = trades['price'].dropna().iloc[-1] # ob_final = DataSplitter.get_side("buy", ob_filtered).query('price < @final_trade_price').append( # DataSplitter.get_side("sell", ob_filtered).query('price > @final_trade_price') # ) if not OrderBookCreator.check_ob_valid(ob_filtered): raise AssertionError("OrderBook does not appear to be valid") final_seq = ob_filtered['sequence'].sort_values().iloc[-1] return ob_filtered.reset_index(drop=True)[[ 'side', 'order_id', 'price', 'size' ]], final_seq
def agg_insert_by_group(data: dd = None, groupby_columns: List[str] = None, agg_dict: dict = None, insert_dict: dict = None) -> dd: """ Split input dataframe into groups, apply aggregations on each group according to the aggregation dict, insert aggregated results back into the original dataframe with column values specified in insert dict :param data: input dask dataframe :param groupby_columns: list of column names to group by :param agg_dict: dictionary of the format {column name: aggregation to preform to column name} :param insert_dict: dictionary of the format {column name: value of column to be set prior to insertion} :return: modified datafraeme """ agg_data = data.groupby(groupby_columns).agg(agg_dict).reset_index() agg_data.columns = agg_data.columns.droplevel(1) for column, value in insert_dict.items(): agg_data[column] = 'COMBINED' data = data.append(agg_data) return data