Esempio n. 1
0
    def get_limit_market_order_ratio(df: dd):
        limits = DataSplitter.get_limit_orders_from_feed(df)
        markets = DataSplitter.get_market_orders_from_feed(df)

        num_limits = len(limits)
        num_markets = len(markets)

        return Statistics.get_ratio(num_limits, num_markets)
Esempio n. 2
0
    def generate_sim_params(cls,
                            orders_df,
                            trades_df,
                            cancels_df,
                            feed_df,
                            ob_state,
                            ob_state_seq_num,
                            ob_state_time,
                            graph=False):
        cls.check_has_elements([orders_df, trades_df, cancels_df])

        try:
            params = {}
            distributions = {}
            ratios = {}
            correlations = {}
            discrete_distributions = {}

            # TODO: reduce code duplication and parallelise inverse CDF generation
            with pebble.ProcessPool() as pool:
                price_size_corrs = Correlations.get_price_size_corr(
                    trades_df,
                    DataSplitter.get_limit_orders_from_feed(orders_df))
                correlations['buy_price_size'] = price_size_corrs['buy']
                correlations['sell_price_size'] = price_size_corrs['sell']

                # Sell order prices relative
                sell_orders = DataSplitter.get_side("sell", orders_df)
                sell_prices_relative = DataTransformer.get_prices_relative_to_midprice(
                    ob_state, ob_state_seq_num, ob_state_time, feed_df,
                    sell_orders)
                sell_x, sell_cy = Sample.get_cdf_data(sell_prices_relative)
                discrete_distributions["sell_price_relative"] = {
                    'x': sell_x.tolist(),
                    'cy': sell_cy.tolist()
                }
                Sample.plot_cdf(sell_x, sell_cy,
                                "Sell order prices (relative)")

                # Buy order prices relative
                buy_orders = DataSplitter.get_side("buy", orders_df)
                buy_prices_relative = DataTransformer.get_prices_relative_to_midprice(
                    ob_state, ob_state_seq_num, ob_state_time, feed_df,
                    buy_orders)
                buy_prices_relative = buy_prices_relative.apply(lambda x: -x)
                buy_x, buy_cy = Sample.get_cdf_data(buy_prices_relative)
                discrete_distributions["buy_price_relative"] = {
                    'x': buy_x.tolist(),
                    'cy': buy_cy.tolist()
                }
                Sample.plot_cdf(
                    buy_x, buy_cy,
                    "Buy prices (relative) (flipped for comparison)")

                # Buy side cancel prices relative
                buy_cancels = DataSplitter.get_side("buy", cancels_df)
                buy_cancels_relative = DataTransformer.get_prices_relative_to_midprice(
                    ob_state, ob_state_seq_num, ob_state_time, feed_df,
                    buy_cancels)
                buy_cancels_relative = buy_cancels_relative.apply(lambda x: -x)
                buy_cancels_x, buy_cancels_cy = Sample.get_cdf_data(
                    buy_cancels_relative)
                discrete_distributions["buy_cancels_relative"] = {
                    'x': buy_cancels_x.tolist(),
                    'cy': buy_cancels_cy.tolist()
                }
                Sample.plot_cdf(
                    buy_cancels_x, buy_cancels_cy,
                    "Buy cancel prices (relative) (flipped for comparison)")

                # Sell side cancel prices relative
                sell_cancels = DataSplitter.get_side("sell", cancels_df)
                sell_cancels_relative = DataTransformer.get_prices_relative_to_midprice(
                    ob_state, ob_state_seq_num, ob_state_time, feed_df,
                    sell_cancels)
                sell_cancels_x, sell_cancels_cy = Sample.get_cdf_data(
                    sell_cancels_relative)
                discrete_distributions["sell_cancels_relative"] = {
                    'x': sell_cancels_x.tolist(),
                    'cy': sell_cancels_cy.tolist()
                }
                Sample.plot_cdf(sell_cancels_x, sell_cancels_cy,
                                "Sell cancel prices (relative)")

                # Market orders
                market_orders = DataSplitter.get_market_orders_from_feed(
                    orders_df)

                # Buy market order sizes
                buy_market_sizes = DataSplitter.get_side(
                    "buy",
                    market_orders)['size'].dropna().apply(lambda x: abs(x))
                buy_market_sizes_x, buy_market_sizes_cy = Sample.get_cdf_data(
                    buy_market_sizes)
                discrete_distributions["buy_market_size"] = \
                    {'x': buy_market_sizes_x.tolist(), 'cy': buy_market_sizes_cy.tolist()}
                Sample.plot_cdf(buy_market_sizes_x, buy_market_sizes_cy,
                                "Buy market order sizes")

                # Sell market order sizes
                sell_market_sizes = DataSplitter.get_side(
                    "sell",
                    market_orders)['size'].dropna().apply(lambda x: abs(x))
                sell_market_sizes_x, sell_market_sizes_cy = Sample.get_cdf_data(
                    sell_market_sizes)
                discrete_distributions["sell_market_size"] = \
                    {'x': sell_market_sizes_x.tolist(), 'cy': sell_market_sizes_cy.tolist()}
                Sample.plot_cdf(sell_market_sizes_x, sell_market_sizes_cy,
                                "Sell market order sizes")

                # Find distributions using different procs
                # relative_order_price_distributions = pool.schedule(DataTransformer.price_distributions,
                #                                                    (trades_df, orders_df,),
                #                                                    dict(relative=True, graph=graph))

                # Buy/sell Price
                # order_price_distributions = pool.schedule(DataTransformer.price_distributions,
                #                                           (trades_df, orders_df,),
                #                                           dict(relative=False, graph=True))

                # Buy/sell price Cancellation
                # relative_cancel_price_distributions = pool.schedule(DataTransformer.price_distributions,
                #                                                     (trades_df, cancels_df,))

                # Limit Order Size
                limit_orders = DataSplitter.get_limit_orders_from_feed(
                    orders_df)

                buy_limit_orders_size = DataSplitter.get_side(
                    "buy",
                    limit_orders)['size'].dropna().apply(lambda x: abs(x))
                buy_limit_order_sizes_x, buy_limit_order_sizes_cy = Sample.get_cdf_data(
                    buy_limit_orders_size)
                discrete_distributions["buy_limit_size"] = \
                    {'x': buy_limit_order_sizes_x.tolist(), 'cy': buy_limit_order_sizes_cy.tolist()}
                Sample.plot_cdf(buy_limit_order_sizes_x,
                                buy_limit_order_sizes_cy,
                                "Buy limit order sizes")

                sell_limit_orders_size = DataSplitter.get_side(
                    "sell",
                    limit_orders)['size'].dropna().apply(lambda x: abs(x))
                sell_limit_order_sizes_x, sell_limit_order_sizes_cy = Sample.get_cdf_data(
                    sell_limit_orders_size)
                discrete_distributions["sell_limit_size"] = \
                    {'x': sell_limit_order_sizes_x.tolist(), 'cy': sell_limit_order_sizes_cy.tolist()}
                Sample.plot_cdf(sell_limit_order_sizes_x,
                                sell_limit_order_sizes_cy,
                                "Sell limit order sizes")

                intervals = DataTransformer.get_time_intervals(orders_df)
                intervals_x, intervals_cy = Sample.get_cdf_data(intervals)
                discrete_distributions["intervals"] = \
                    {'x': intervals_x.tolist(), 'cy': intervals_cy.tolist()}
                Sample.plot_cdf(intervals_x, intervals_cy, "Order intervals")

                # buy_limit_size = pool.schedule(DistributionFitter.best_fit_distribution,
                #                                (buy_limit_orders['size'],))
                # sell_limit_size = pool.schedule(DistributionFitter.best_fit_distribution,
                #                                 (sell_limit_orders['size'],))

                # Market Order Size

                # market_orders = DataSplitter.get_market_orders(orders_df)
                # buy_market_orders = DataSplitter.get_side("buy", market_orders)
                # sell_market_orders = DataSplitter.get_side("sell", market_orders)

                # buy_market_size = pool.schedule(DistributionFitter.best_fit_distribution,
                #                                (buy_market_orders['size'],))
                # sell_market_size = pool.schedule(DistributionFitter.best_fit_distribution,
                #                                 (sell_market_orders['size'],))

                # intervals = pool.schedule(DataTransformer.intervals_distribution, (orders_df,))

                ratios["buy_sell_order_ratio"] = Statistics.get_buy_sell_ratio(
                    orders_df)
                ratios[
                    "buy_sell_cancel_ratio"] = Statistics.get_buy_sell_ratio(
                        cancels_df)
                ratios[
                    "buy_sell_volume_ratio"] = Statistics.get_buy_sell_volume_ratio(
                        orders_df)
                ratios[
                    'limit_market_order_ratio'] = Statistics.get_limit_market_order_ratio(
                        orders_df)

                # Buy/sell Price relative
                # distributions["buy_price_relative"] = relative_order_price_distributions.result()["buy"][1]
                # distributions["sell_price_relative"] = relative_order_price_distributions.result()["sell"][1]

                # distributions["buy_price"] = order_price_distributions.result()["buy"][1]
                # distributions["sell_price"] = order_price_distributions.result()["sell"][1]

                # distributions["buy_cancel_price"] = relative_cancel_price_distributions.result()["buy"][1]
                # distributions["sell_cancel_price"] = relative_cancel_price_distributions.result()["sell"][1]

                # buy_limit_size_best_fit, buy_limit_size_best_fit_params = buy_limit_size.result()
                # _, distributions["buy_limit_size"] = DistributionFitter.get_distribution_string(buy_limit_size_best_fit,
                #                                                                                 buy_limit_size_best_fit_params)
                #
                # sell_limit_size_best_fit, sell_limit_size_best_fit_params = sell_limit_size.result()
                # _, distributions["sell_limit_size"] = DistributionFitter.get_distribution_string(sell_limit_size_best_fit,
                #                                                                                  sell_limit_size_best_fit_params)

                # buy_market_size_best_fit, buy_market_size_best_fit_params = buy_market_size.result()
                # _, distributions["buy_market_size"] = DistributionFitter.get_distribution_string(buy_market_size_best_fit,
                #                                                                                  buy_market_size_best_fit_params)
                #
                # sell_market_size_best_fit, sell_market_size_best_fit_params = sell_market_size.result()
                # _, distributions["sell_market_size"] = DistributionFitter.get_distribution_string(sell_market_size_best_fit,
                #                                                                                   sell_market_size_best_fit_params)

                # _, distributions["interval"] = intervals.result()

                params['ratios'] = ratios
                params['correlations'] = correlations
                params['distributions'] = distributions
                params['discreteDistributions'] = discrete_distributions

            return params
        except Exception as e:
            cls.logger.error("Failed to generate parameters, exception was " +
                             str(e))
            raise e