def get_limit_market_order_ratio(df: dd): limits = DataSplitter.get_limit_orders_from_feed(df) markets = DataSplitter.get_market_orders_from_feed(df) num_limits = len(limits) num_markets = len(markets) return Statistics.get_ratio(num_limits, num_markets)
def generate_sim_params(cls, orders_df, trades_df, cancels_df, feed_df, ob_state, ob_state_seq_num, ob_state_time, graph=False): cls.check_has_elements([orders_df, trades_df, cancels_df]) try: params = {} distributions = {} ratios = {} correlations = {} discrete_distributions = {} # TODO: reduce code duplication and parallelise inverse CDF generation with pebble.ProcessPool() as pool: price_size_corrs = Correlations.get_price_size_corr( trades_df, DataSplitter.get_limit_orders_from_feed(orders_df)) correlations['buy_price_size'] = price_size_corrs['buy'] correlations['sell_price_size'] = price_size_corrs['sell'] # Sell order prices relative sell_orders = DataSplitter.get_side("sell", orders_df) sell_prices_relative = DataTransformer.get_prices_relative_to_midprice( ob_state, ob_state_seq_num, ob_state_time, feed_df, sell_orders) sell_x, sell_cy = Sample.get_cdf_data(sell_prices_relative) discrete_distributions["sell_price_relative"] = { 'x': sell_x.tolist(), 'cy': sell_cy.tolist() } Sample.plot_cdf(sell_x, sell_cy, "Sell order prices (relative)") # Buy order prices relative buy_orders = DataSplitter.get_side("buy", orders_df) buy_prices_relative = DataTransformer.get_prices_relative_to_midprice( ob_state, ob_state_seq_num, ob_state_time, feed_df, buy_orders) buy_prices_relative = buy_prices_relative.apply(lambda x: -x) buy_x, buy_cy = Sample.get_cdf_data(buy_prices_relative) discrete_distributions["buy_price_relative"] = { 'x': buy_x.tolist(), 'cy': buy_cy.tolist() } Sample.plot_cdf( buy_x, buy_cy, "Buy prices (relative) (flipped for comparison)") # Buy side cancel prices relative buy_cancels = DataSplitter.get_side("buy", cancels_df) buy_cancels_relative = DataTransformer.get_prices_relative_to_midprice( ob_state, ob_state_seq_num, ob_state_time, feed_df, buy_cancels) buy_cancels_relative = buy_cancels_relative.apply(lambda x: -x) buy_cancels_x, buy_cancels_cy = Sample.get_cdf_data( buy_cancels_relative) discrete_distributions["buy_cancels_relative"] = { 'x': buy_cancels_x.tolist(), 'cy': buy_cancels_cy.tolist() } Sample.plot_cdf( buy_cancels_x, buy_cancels_cy, "Buy cancel prices (relative) (flipped for comparison)") # Sell side cancel prices relative sell_cancels = DataSplitter.get_side("sell", cancels_df) sell_cancels_relative = DataTransformer.get_prices_relative_to_midprice( ob_state, ob_state_seq_num, ob_state_time, feed_df, sell_cancels) sell_cancels_x, sell_cancels_cy = Sample.get_cdf_data( sell_cancels_relative) discrete_distributions["sell_cancels_relative"] = { 'x': sell_cancels_x.tolist(), 'cy': sell_cancels_cy.tolist() } Sample.plot_cdf(sell_cancels_x, sell_cancels_cy, "Sell cancel prices (relative)") # Market orders market_orders = DataSplitter.get_market_orders_from_feed( orders_df) # Buy market order sizes buy_market_sizes = DataSplitter.get_side( "buy", market_orders)['size'].dropna().apply(lambda x: abs(x)) buy_market_sizes_x, buy_market_sizes_cy = Sample.get_cdf_data( buy_market_sizes) discrete_distributions["buy_market_size"] = \ {'x': buy_market_sizes_x.tolist(), 'cy': buy_market_sizes_cy.tolist()} Sample.plot_cdf(buy_market_sizes_x, buy_market_sizes_cy, "Buy market order sizes") # Sell market order sizes sell_market_sizes = DataSplitter.get_side( "sell", market_orders)['size'].dropna().apply(lambda x: abs(x)) sell_market_sizes_x, sell_market_sizes_cy = Sample.get_cdf_data( sell_market_sizes) discrete_distributions["sell_market_size"] = \ {'x': sell_market_sizes_x.tolist(), 'cy': sell_market_sizes_cy.tolist()} Sample.plot_cdf(sell_market_sizes_x, sell_market_sizes_cy, "Sell market order sizes") # Find distributions using different procs # relative_order_price_distributions = pool.schedule(DataTransformer.price_distributions, # (trades_df, orders_df,), # dict(relative=True, graph=graph)) # Buy/sell Price # order_price_distributions = pool.schedule(DataTransformer.price_distributions, # (trades_df, orders_df,), # dict(relative=False, graph=True)) # Buy/sell price Cancellation # relative_cancel_price_distributions = pool.schedule(DataTransformer.price_distributions, # (trades_df, cancels_df,)) # Limit Order Size limit_orders = DataSplitter.get_limit_orders_from_feed( orders_df) buy_limit_orders_size = DataSplitter.get_side( "buy", limit_orders)['size'].dropna().apply(lambda x: abs(x)) buy_limit_order_sizes_x, buy_limit_order_sizes_cy = Sample.get_cdf_data( buy_limit_orders_size) discrete_distributions["buy_limit_size"] = \ {'x': buy_limit_order_sizes_x.tolist(), 'cy': buy_limit_order_sizes_cy.tolist()} Sample.plot_cdf(buy_limit_order_sizes_x, buy_limit_order_sizes_cy, "Buy limit order sizes") sell_limit_orders_size = DataSplitter.get_side( "sell", limit_orders)['size'].dropna().apply(lambda x: abs(x)) sell_limit_order_sizes_x, sell_limit_order_sizes_cy = Sample.get_cdf_data( sell_limit_orders_size) discrete_distributions["sell_limit_size"] = \ {'x': sell_limit_order_sizes_x.tolist(), 'cy': sell_limit_order_sizes_cy.tolist()} Sample.plot_cdf(sell_limit_order_sizes_x, sell_limit_order_sizes_cy, "Sell limit order sizes") intervals = DataTransformer.get_time_intervals(orders_df) intervals_x, intervals_cy = Sample.get_cdf_data(intervals) discrete_distributions["intervals"] = \ {'x': intervals_x.tolist(), 'cy': intervals_cy.tolist()} Sample.plot_cdf(intervals_x, intervals_cy, "Order intervals") # buy_limit_size = pool.schedule(DistributionFitter.best_fit_distribution, # (buy_limit_orders['size'],)) # sell_limit_size = pool.schedule(DistributionFitter.best_fit_distribution, # (sell_limit_orders['size'],)) # Market Order Size # market_orders = DataSplitter.get_market_orders(orders_df) # buy_market_orders = DataSplitter.get_side("buy", market_orders) # sell_market_orders = DataSplitter.get_side("sell", market_orders) # buy_market_size = pool.schedule(DistributionFitter.best_fit_distribution, # (buy_market_orders['size'],)) # sell_market_size = pool.schedule(DistributionFitter.best_fit_distribution, # (sell_market_orders['size'],)) # intervals = pool.schedule(DataTransformer.intervals_distribution, (orders_df,)) ratios["buy_sell_order_ratio"] = Statistics.get_buy_sell_ratio( orders_df) ratios[ "buy_sell_cancel_ratio"] = Statistics.get_buy_sell_ratio( cancels_df) ratios[ "buy_sell_volume_ratio"] = Statistics.get_buy_sell_volume_ratio( orders_df) ratios[ 'limit_market_order_ratio'] = Statistics.get_limit_market_order_ratio( orders_df) # Buy/sell Price relative # distributions["buy_price_relative"] = relative_order_price_distributions.result()["buy"][1] # distributions["sell_price_relative"] = relative_order_price_distributions.result()["sell"][1] # distributions["buy_price"] = order_price_distributions.result()["buy"][1] # distributions["sell_price"] = order_price_distributions.result()["sell"][1] # distributions["buy_cancel_price"] = relative_cancel_price_distributions.result()["buy"][1] # distributions["sell_cancel_price"] = relative_cancel_price_distributions.result()["sell"][1] # buy_limit_size_best_fit, buy_limit_size_best_fit_params = buy_limit_size.result() # _, distributions["buy_limit_size"] = DistributionFitter.get_distribution_string(buy_limit_size_best_fit, # buy_limit_size_best_fit_params) # # sell_limit_size_best_fit, sell_limit_size_best_fit_params = sell_limit_size.result() # _, distributions["sell_limit_size"] = DistributionFitter.get_distribution_string(sell_limit_size_best_fit, # sell_limit_size_best_fit_params) # buy_market_size_best_fit, buy_market_size_best_fit_params = buy_market_size.result() # _, distributions["buy_market_size"] = DistributionFitter.get_distribution_string(buy_market_size_best_fit, # buy_market_size_best_fit_params) # # sell_market_size_best_fit, sell_market_size_best_fit_params = sell_market_size.result() # _, distributions["sell_market_size"] = DistributionFitter.get_distribution_string(sell_market_size_best_fit, # sell_market_size_best_fit_params) # _, distributions["interval"] = intervals.result() params['ratios'] = ratios params['correlations'] = correlations params['distributions'] = distributions params['discreteDistributions'] = discrete_distributions return params except Exception as e: cls.logger.error("Failed to generate parameters, exception was " + str(e)) raise e