Beispiel #1
0
    def __init__(self, tables_dict={}, scalar=1, round_figures_by=None):
        self._tables_dict = tables_dict
        self._scalar = scalar
        self._round_figures_by = round_figures_by

        self._time_series_ops = TimeSeriesOps()
        self._util_func = UtilFunc()
Beispiel #2
0
    def __init__(self, version=constants.tcapy_version):
        self._util_func = UtilFunc(
        )  # general utility operations (such as flatten lists)
        self._trade_order_tag = TradeOrderFilterTag(
        )  # to filter trade/orders according to the values of certain tags

        self._version = version
Beispiel #3
0
    def __init__(self,
                 temp_data_folder=constants.temp_data_folder,
                 temp_large_data_folder=constants.temp_large_data_folder,
                 tickers=None,
                 data_store=None):

        self.temp_data_folder = temp_data_folder
        self.temp_large_data_folder = temp_large_data_folder
        self.tickers = None
        self.util_func = UtilFunc()
        self.time_series_ops = TimeSeriesOps()
        self.data_store = data_store

        logger = LoggerManager().getLogger(__name__)

        if not (os.path.isdir(self.temp_data_folder)):
            logger.warn("Temp data folder " + self.temp_data_folder +
                        " does not exist")

        if not (os.path.isdir(self.temp_large_data_folder)):
            logger.warn("Temp large data folder " + self.temp_data_folder +
                        " does not exist")

        if tickers is not None:
            self.tickers = tickers
Beispiel #4
0
    def __init__(self,
                 computation_results,
                 title='Cuemacro Computation',
                 renderer=CanvasRenderer(),
                 chart_report_height=constants.chart_report_height,
                 chart_report_width=constants.chart_report_width):
        """Initialize class, with the computation results we wish to convert into a report like format

        Parameters
        ----------
        computation_results : ComputationResults
            The results of a large scale computation, which contains charts and DataFrames

        title : str
            Title of webpage to be rendered
        """
        self._util_func = UtilFunc()
        self._computation_results = computation_results
        self._title = title
        self._chart = Chart(engine='plotly')
        self._renderer = renderer
        self._computation_request = computation_results.computation_request

        self._chart_report_width = chart_report_width
        self._chart_report_height = chart_report_height
Beispiel #5
0
    def _write_df_to_db_single_thread(self, ticker, remove_duplicates=True, if_exists_table='append',
                                      if_exists_ticker='replace'):

        logger = LoggerManager.getLogger(__name__)

        postfix = '-' + self._get_postfix() + '-with-duplicates'

        if remove_duplicates:
            postfix = '-' + self._get_postfix() + '-no-duplicates'

        filename = os.path.join(self.temp_large_data_folder, ticker + postfix) + '.' + fileformat

        logger.info("Reading " + filename)

        util_func = UtilFunc()
        time_series_ops = TimeSeriesOps()
        data_source_local = self._get_output_data_source()

        df = util_func.read_dataframe_from_binary(filename, format=binary_format)

        if df is not None:
            df = time_series_ops.localize_as_UTC(df)

            data_source_local.append_market_data(df, ticker, if_exists_table=if_exists_table,
                                                 if_exists_ticker=if_exists_ticker)
        else:
            logger.warn("Couldn't write dataframe for " + ticker + " to database, appears it is empty!")
Beispiel #6
0
    def __init__(self,
                 trade_order_list=None,
                 metric_name=None,
                 filter_by=['all'],
                 tag_value_combinations={},
                 keep_fields=['executed_notional', 'side'],
                 replace_text={},
                 round_figures_by=1,
                 scalar=1.0,
                 weighting_field=constants.table_weighting_field,
                 exclude_fields_from_avg=[]):
        self._trade_order_list = trade_order_list
        self._metric_name = metric_name
        self._results_summary = ResultsSummary()
        self._keep_fields = keep_fields
        self._filter_by = filter_by
        self._replace_text = replace_text
        self._round_figures_by = round_figures_by
        self._weighting_field = weighting_field
        self._scalar = scalar
        self._exclude_fields_from_avg = exclude_fields_from_avg

        self._tag_value_combinations = tag_value_combinations
        self._trade_order_filter_tag = TradeOrderFilterTag()
        self._results_form_tag = 'table'
        self._util_func = UtilFunc()
        self._time_series_ops = TimeSeriesOps()
Beispiel #7
0
    def __init__(self, app, session_manager, callback_manager,
                 glob_volatile_cache, layout):
        super(TCACallerImplGen,
              self).__init__(app, session_manager, callback_manager,
                             glob_volatile_cache, layout)

        self._util_func = UtilFunc()
Beispiel #8
0
    def create_plot_flags(self, session_manager, layout):
        """Creates flags for each display component (eg. plot or table) on each web page in the project. These are
        necessary so we can keep track of whether we need to recalculate the underlying TCA analysis.

        Parameters
        ----------
        session_manager : SessionManager
            Stores and modifies session data which is unique for each user

        layout : Layout
            Specifies the layout of an HTML page using Dash components

        Returns
        -------
        dict
        """
        plot_flags = {}
        plot_lines = {}

        for page in layout.pages:

            page_flags = []
            line_flags = []

            # For redrawing plots
            for gen_flag in self._generic_plot_flags:
                key = page + gen_flag

                # Append a plot flag if it exists
                if key in layout.id_flags:
                    page_flags.append(
                        self._session_manager.create_calculated_flags(
                            'redraw-' + page,
                            session_manager.create_calculated_flags(
                                self._util_func.dict_key_list(
                                    layout.id_flags[key].keys()),
                                self._generic_plot_flags[gen_flag])))

            plot_flags[page] = UtilFunc().flatten_list_of_lists(page_flags)

            # For clicking on charts
            for gen_flag in self._generic_line_flags:
                key = page + gen_flag

                # Append a line clicking flag if it exists
                if key in layout.id_flags:
                    line_flags.append(
                        self._session_manager.create_calculated_flags(
                            'redraw-' + page,
                            session_manager.create_calculated_flags(
                                self._util_func.dict_key_list(
                                    layout.id_flags[key].keys()),
                                self._generic_plot_flags[gen_flag])))

            if line_flags != []:
                plot_lines[page] = UtilFunc().flatten_list_of_lists(line_flags)

        return plot_flags
Beispiel #9
0
    def __init__(self, app, session_manager, callback_manager, glob_volatile_cache, layout, callback_dict=None):
        self._util_func = UtilFunc()

        self._session_manager = session_manager
        self._callback_manager = callback_manager

        self._glob_volatile_cache = glob_volatile_cache

        self.attach_callbacks(app, callback_manager, callback_dict=callback_dict)
Beispiel #10
0
    def __init__(self, version=constants.tcapy_version, volatile_cache_engine=constants.volatile_cache_engine):
        self._data_factory = DataFactory(version=version)

        self._util_func = UtilFunc()  # general utility operations (such as flatten lists)
        self._fx_conv = FXConv()  # for determining if FX crosses are in the correct convention
        self._time_series_ops = TimeSeriesOps()  # time series operations, such as filtering by date

        self._metric_executed_price = MetricExecutedPriceNotional()  # for determining the executed notionals/price of orders
        # from trades

        self._benchmark_mid = BenchmarkMid()  # to calculate mid price from bid/ask quote market data
        self._trade_order_tag = TradeOrderFilterTag()  # to filter trade/orders according to the values of certain tags
        self._version = version
        self._volatile_cache_engine = volatile_cache_engine
Beispiel #11
0
    def __init__(self, tca_request=None, tag_value_combinations={}):
        """Initialise with the TCA parameters of our analysis and which field/value combinations we wish to filter for.

        Parameters
        ----------
        tca_request : TCARequest
            TCA parameters for our analysis


        tag_value_combinations : dict
            User defined fields and their value to be filtered
        """
        self._util_func = UtilFunc()

        self.set_trade_order_params(tca_request=tca_request, tag_value_combinations=tag_value_combinations)
Beispiel #12
0
    def get_util_func():
        with Mediator._util_func_lock:

            if Mediator._util_func is None:
                Mediator._util_func = UtilFunc()

        return Mediator._util_func
Beispiel #13
0
    def __init__(self, computation_results, title='Cuemacro Computation'):
        """Initialize class, with the computation results we wish to convert into a report like format

        Parameters
        ----------
        computation_results : ComputationResults
            The results of a large scale computation, which contains charts and DataFrames

        title : str
            Title of webpage to be rendered
        """
        self._util_func = UtilFunc()
        self._computation_results = computation_results
        self._title = title
        self._canvas_plotter = 'plain'
        self._chart = Chart(engine='plotly')
Beispiel #14
0
    def __init__(self, app=None, constants=None, url_prefix=''):
        super(LayoutDash, self).__init__(app=app, constants=constants, url_prefix=url_prefix)

        self.id_flags = {}
        self.pages = {}

        self._util_func = UtilFunc()
        self._url_prefix = url_prefix
Beispiel #15
0
    def __init__(self, dict_of_df, computation_request, text_preamble=''):
        self._plot_render = PlotRender()
        self._util_func = UtilFunc()
        self.text_preamble = text_preamble

        self._computation_request = computation_request

        self._rendered = False
Beispiel #16
0
    def __init__(self, version=constants.tcapy_version):
        self._util_func = UtilFunc()

        self._tca_market_trade_loader = Mediator.get_tca_market_trade_loader(version=version)
        self._time_series_ops = TimeSeriesOps()
        self._trade_order_tag = TradeOrderFilterTag()

        logger = LoggerManager.getLogger(__name__)
        logger.info("Init TCAEngine version: " + self._tca_market_trade_loader.get_tca_version() + " - Env: " + constants.env)
Beispiel #17
0
    def set_trade_order_params(self, tca_request=None, tag_value_combinations={}):
        """Sets the parameters for filtering of trade/orders according to the values of tags

        Parameters
        ----------
        tca_request : TCARequest

        tag_value_combinations : dict
            Filter for a combination of _tag/values

        Returns
        -------

        """
        self._tca_request = tca_request
        self._tag_value_combinations = tag_value_combinations
        self._util_func = UtilFunc()

        if tag_value_combinations != {}:
            self._tag = self._util_func.dict_key_list(tag_value_combinations.keys())
Beispiel #18
0
def test_write_csv_from_data_vendor():
    """Tests downloading market data from the data vendor and dumping to CSV. Checks written CSV against what is loaded
    in memory. Also checks data is available in each 'usual' market hour.

    Note, that we use cached data from disk, as we want to download relatively large sections of data, and doing
    this externally can cause the test to run very slowly.
    """

    for data_vendor_name in data_vendor_name_list:

        # database_source = database_source_dict[data_vendor_name]
        database_populator = database_populator_dict[data_vendor_name]
        chunk_int_min = chunk_int_min_dict[data_vendor_name]

        # Specifically choose dates which straddle the weekend boundary
        # 1) during British Summer Time in London
        # 2) during GMT time in London
        start_date = '27 Apr 2018'; finish_date = '03 May 2018'; expected_csv_files = 5
        # start_date = '02 Feb 2018'; finish_date = '07 Feb 2018'; expected_csv_files = 4
        split_size = 'daily'
        write_csv = False

        # Prepare the CSV folder first
        csv_folder = resource('csv_' + data_vendor_name + '_dump')

        # Empty the CSV test harness folder
        UtilFunc().forcibly_create_empty_folder(csv_folder)

        msg, df_dict = database_populator.download_to_csv(
            start_date, finish_date, ['EURUSD'], chunk_int_min=chunk_int_min, split_size=split_size, csv_folder=csv_folder,
            return_df=True, write_large_csv=write_csv, remove_duplicates=False, web_proxies=web_proxies)

        df_read_direct_from_data_vendor = df_dict['EURUSD']

        # Check it has data for every market hour (eg. ignoring Saturdays)
        assert util_func.check_data_frame_points_in_every_hour(df_read_direct_from_data_vendor, start_date, finish_date)

        if write_csv:
            # read back the CSVs dumped on disk in the test harness CSV folder
            csv_file_list = glob.glob(csv_folder + '/EURUSD*.csv')

            assert len(csv_file_list) == expected_csv_files

            df_list = []

            for c in csv_file_list:
                df = pd.read_csv(c, index_col=0)
                df.index = pd.to_datetime(df.index)
                df_list.append(df)

            # now compare the CSVs on disk versus those read directly
            df_read_from_csv = pd.concat(df_list).tz_localize(pytz.utc)

            assert_frame_equal(df_read_from_csv, df_read_direct_from_data_vendor)
Beispiel #19
0
    def set_trade_order_params(self,
                               tca_request=None,
                               time_of_day=None,
                               day_of_week=None,
                               month_of_year=None,
                               specific_dates=None,
                               time_zone='utc'):
        """Initialise our filter, by the times of day, days of the week and months we wish to filter our trade/filters by.
        Note that it is optional which period to filter by (eg. we can filter just by time of day if we want to).

        Parameters
        ----------
        tca_request : TCARequest
            TCA parameters for our analysis

        time_of_day : dict
            Describing the start and finish time of our filter

        day_of_week : str
            Which day of the week to filter by?

        month_of_year : str
            Which month of the of the year to filter by?

        specific_dates : str / str (list)
            Which dates to filter by

        time_zone : str
            Time zone to use (eg. 'utc')
        """

        self.tca_request = tca_request
        self.time_of_day = time_of_day
        self.day_of_week = day_of_week
        self.month_of_year = month_of_year
        self.specific_dates = specific_dates
        self.time_zone = time_zone

        self._util_func = UtilFunc()
Beispiel #20
0
def create_market_trade_data_eikon():
    """Creates a small dataset for testing purposes for market, trade and order data for EURUSD at the start of May 2017,
    which is dumped to the designated tcapy test harness folder.

    Returns
    -------

    """
    # Use database source as Arctic (or directly from Dukascopy) for market data (assume we are using market data as a source)
    tca_market = TCAMarketTradeLoaderImpl()

    util_func = UtilFunc()

    market_df = []

    for tick in ticker:
        market_request = MarketRequest(ticker=tick,
                                       data_store=data_store,
                                       start_date=start_date,
                                       finish_date=finish_date)

        market_df.append(
            tca_market.get_market_data(market_request=market_request))

    # Note: it can be very slow to write these CSV files
    market_df = pd.concat(market_df)
    market_df.to_csv(os.path.join(folder, 'small_test_market_df_eikon.csv.gz'),
                     compression='gzip')

    # Also write to disk as HDF5 file (easier to load up later)
    util_func.write_dataframe_to_binary(
        market_df, os.path.join(folder, 'small_test_market_df_eikon.gzip'))

    # Create a spot file in reverse order
    market_df.sort_index(ascending=False)\
        .to_csv(os.path.join(folder, 'small_test_market_df_reverse_eikon.csv.gz'), compression='gzip')

    # Also write to disk as Parquet file (easier to load up later)
    util_func.write_dataframe_to_binary(
        market_df,
        os.path.join(folder, 'small_test_market_df_reverse_eikon.parquet'))

    if create_trade_order_data:
        # Use the market data we just downloaded to CSV, and perturb it to generate the trade data
        data_test_creator = DataTestCreator(
            market_data_postfix=postfix,
            csv_market_data=os.path.join(folder,
                                         'small_test_market_df_eikon.csv.gz'),
            write_to_db=False)

        # Create randomised trade/order data
        trade_order = data_test_creator.create_test_trade_order(
            ticker_trades, start_date=start_date, finish_date=finish_date)

        trade_order['trade_df'].to_csv(
            os.path.join(folder, 'small_test_trade_df_eikon.csv'))
        trade_order['order_df'].to_csv(
            os.path.join(folder, 'small_test_order_df_eikon.csv'))
Beispiel #21
0
    def __init__(self,
                 trade_order_list=None,
                 metric_name=None,
                 aggregate_by_field=None,
                 aggregation_metric='mean',
                 tag_value_combinations={}):
        self._trade_order_list = trade_order_list
        self._metric_name = metric_name
        self._aggregate_by_field = aggregate_by_field
        self._aggregation_metric = aggregation_metric
        self._results_summary = ResultsSummary()

        self._tag_value_combinations = tag_value_combinations
        self._trade_order_filter_tag = TradeOrderFilterTag()
        self._util_func = UtilFunc()
        self._time_series_ops = TimeSeriesOps()
Beispiel #22
0
    def _fetch_market_data(self, start, finish, ticker, write_to_disk=True, read_cached_from_disk=True, web_proxies=constants.web_proxies):
        logger = LoggerManager.getLogger(__name__)

        key = (str(start) + str(finish) + ticker + '_' + self._get_postfix()).replace(":", '_')

        filename = os.path.join(self.temp_data_folder, key) + '.' + fileformat
        util_func = UtilFunc()

        start_time_stamp = pd.Timestamp(start)
        finish_time_stamp = pd.Timestamp(finish)

        if self._remove_weekend_points():
            weekend_data = "Weekend? " + key

            weekday_point = UtilFunc().is_weekday_point(start_time_stamp, finish_time_stamp,
                                                        friday_close_nyc_hour=constants.friday_close_utc_hour,
                                                        sunday_open_utc_hour=constants.sunday_open_utc_hour)

            if not(weekday_point):
                return None, weekend_data

        df = None

        if read_cached_from_disk:
            if os.path.exists(filename):
                df = util_func.read_dataframe_from_binary(filename, format=binary_format)

                if df is not None:
                    logger.debug("Read " + filename + " from disk")

        if df is None:
            # Convert tcapy ticker into vendor ticker
            df = self._get_input_data_source().fetch_market_data(start, finish,
                                                                 ticker=self._get_tickers_vendor()[ticker], web_proxies=web_proxies)

            if df is not None:

                if write_to_disk:
                    # Write a small temporary dataframe to disk (if the process fails later, these can be picked up,
                    # without having a call the external vendor again
                    util_func.write_dataframe_to_binary(df, filename, format=binary_format)

        msg = None

        if df is None:
            msg = "No data? " + key

        return df, msg
Beispiel #23
0
class TCACallerImplGen(TCACaller):
    def __init__(self, app, session_manager, callback_manager,
                 glob_volatile_cache, layout):
        super(TCACallerImplGen,
              self).__init__(app, session_manager, callback_manager,
                             glob_volatile_cache, layout)

        self._util_func = UtilFunc()

    def calculate_computation_summary(self, tca_type, external_params=None):

        # callback triggered by Dash application
        def callback(*args):
            """Kicks off fetching of data of market data and TCA calculations for a specific currency pair. Caches the data
            in a VolatileCache instance, ready to be read in by the other charts.

            Parameters
            ----------
            ticker_val : str
                ticker to be used in TCA calculations

            start_date_val : str
                Start date of TCA analysis

            start_time_val : str
                Start time of TCA analysis

            finish_date_val : str
                Finish date of TCA analysis

            finish_time_val : str
                Finish time of TCA analysis

            venue_val : str
                Venue data to be used

            n_clicks : int
                Number of clicks

            Returns
            -------
            str
            """
            start = time.time()

            tag = tca_type + '-calculation-button'

            old_clicks = self._session_manager.get_session_clicks(tag)

            # make sure none of the other charts/links are plotted till we have completed this!
            self._session_manager.set_session_flag([
                self._plot_flags['aggregated'], self._plot_flags['detailed'],
                self._plot_flags['compliance']
            ], False)

            logger = LoggerManager.getLogger(__name__)

            if tca_type == 'detailed':
                ticker_val, start_date_val, start_time_val, finish_date_val, finish_time_val, \
                broker_val, algo_val, venue_val, market_data_val, metric_val, n_clicks = args

                # Catch cases where users repeatedly click, which can cause misalignment in clicks
                self._session_manager.set_session_clicks(tag,
                                                         n_clicks,
                                                         old_clicks=old_clicks)

                logger.debug(
                    self.create_generate_button_msg(old_clicks, n_clicks))

                # Make sure all the parameters have been selected
                if ticker_val != '' and venue_val != '' and start_date_val != '' and start_time_val != '' and \
                        finish_date_val != '' and finish_time_val != '' and market_data_val != '' and broker_val != '' and \
                        algo_val != '' and n_clicks > old_clicks:

                    # Expand tickers/broker fields etc, in case for example 'All' has been specified or any other groups
                    broker_val = self._util_func.populate_field(
                        broker_val,
                        constants.available_brokers_dictionary,
                        exception_fields='All')
                    algo_val = self._util_func.populate_field(
                        algo_val,
                        constants.available_algos_dictionary,
                        exception_fields='All')
                    venue_val = self._util_func.populate_field(
                        venue_val,
                        constants.available_venues_dictionary,
                        exception_fields='All')

                    # Combine the start date/time and finish date/time
                    start_date_val = start_date_val + ' ' + start_time_val
                    finish_date_val = finish_date_val + ' ' + finish_time_val

                    metric_val = metric_val.replace(' ', '_')

                    logger.debug('Calculation click old: ' + str(old_clicks) +
                                 " clicks vs new " + str(n_clicks))

                    self._session_manager.set_session_clicks(tag, n_clicks)
                    self._session_manager.set_session_flag('metric',
                                                           value=metric_val)

                    self._session_manager.set_session_flag(
                        'detailed-visualization', value=True)

                    logger.info('Selected ' + ticker_val + " " +
                                start_date_val + " - " + finish_date_val)

                    # Check that dates are less than 1 month apart
                    if pd.Timestamp(finish_date_val) - pd.Timestamp(
                            start_date_val) > pd.Timedelta(
                                days=constants.max_plot_days):
                        return "Status: Cannot plot more than " + str(
                            constants.max_plot_days) + " days!"
                    elif pd.Timestamp(start_date_val) >= pd.Timestamp(
                            finish_date_val):
                        return "Status: Start date must be before the end date"

                    try:
                        #if True:

                        # Clear the cache for the current user
                        self._glob_volatile_cache.clear_key_match(
                            self._session_manager.get_session_id())

                        results_form = [
                            # Calculate the distribute of the metric for trades/orders, broken down by trade side (buy/sell)
                            DistResultsForm(
                                trade_order_list=['trade_df', 'order_df'],
                                metric_name=metric_val,
                                aggregate_by_field='side',
                                scalar=10000.0,
                                weighting_field=
                                'executed_notional_in_reporting_currency'),

                            # Create a table the markout of every trade
                            TableResultsForm(
                                trade_order_list=['trade_df'],
                                metric_name='markout',
                                filter_by='all',
                                replace_text={
                                    'markout_': '',
                                    'executed_notional': 'exec not',
                                    'notional_currency': 'exec not cur'
                                },
                                keep_fields=[
                                    'executed_notional', 'side',
                                    'notional_currency'
                                ],
                                scalar={
                                    'all': 10000.0,
                                    'exclude': ['executed_notional', 'side']
                                },
                                round_figures_by={
                                    'all': 2,
                                    'executed_notional': 0,
                                    'side': 0
                                },
                                weighting_field='executed_notional')
                        ]

                        benchmark_calcs = [
                            # Calculate the arrival prices for every trade/order
                            BenchmarkArrival(
                                trade_order_list=['trade_df', 'order_df']),

                            # Calculate the VWAP for each order
                            BenchmarkVWAP(trade_order_list=['order_df']),

                            # Calculate the TWAP for each order
                            BenchmarkTWAP(trade_order_list=['order_df'])
                        ]

                        metric_calcs = [
                            metric_val,
                            MetricMarkout(trade_order_list=['trade_df'])
                        ]

                        # Get from cache, note given that we are in the first part of the chain we should force it to calculate!
                        sparse_market_trade_df = self.get_cached_computation_analysis(
                            key='sparse_market_trade_df',
                            start_date=start_date_val,
                            finish_date=finish_date_val,
                            ticker=ticker_val,
                            venue=venue_val,
                            market_data=market_data_val,
                            event_type='trade',
                            dummy_market=False,
                            broker=broker_val,
                            algo=algo_val,
                            metric_calcs=metric_calcs,
                            metric_trade_order_list=['trade_df', 'order_df'],
                            benchmark_calcs=benchmark_calcs,
                            tca_type='detailed',
                            tca_engine=self._tca_engine,
                            results_form=results_form,
                            force_calculate=True)

                        calc_start = sparse_market_trade_df.index[0]
                        calc_end = sparse_market_trade_df.index[-1]

                        detailed_title = self.create_status_msg_flags(
                            'detailed', ticker_val, calc_start, calc_end)

                    except Exception as e:
                        LoggerManager().getLogger(__name__).exception(e)

                        return "Status: error " + str(e) + ". Check dates?"

                    finish = time.time()

                    return 'Status: calculated ' + str(round(
                        finish - start, 3)) + "s for " + detailed_title

            elif tca_type == 'aggregated':
                ticker_val, start_date_val, finish_date_val, broker_val, algo_val, venue_val, reload_val, market_data_val, \
                event_type_val, metric_val, n_clicks = args

                # Catch cases where users repeatedly click, which can cause misalignment in clicks
                self._session_manager.set_session_clicks(tag,
                                                         n_clicks,
                                                         old_clicks=old_clicks)

                logger.debug(
                    self.create_generate_button_msg(old_clicks, n_clicks))

                if ticker_val != '' and start_date_val != '' and venue_val != '' \
                        and finish_date_val != '' and reload_val != '' and event_type_val != '' and metric_val != '' and \
                        n_clicks > old_clicks:

                    # Expand tickers/broker fields etc, in case for example 'All' has been specified or any other groups
                    ticker_val_list = self._util_func.populate_field(
                        ticker_val, constants.available_tickers_dictionary)
                    broker_val_list = self._util_func.populate_field(
                        broker_val, constants.available_brokers_dictionary)
                    algo_val_list = self._util_func.populate_field(
                        algo_val, constants.available_algos_dictionary)
                    venue_val_list = self._util_func.populate_field(
                        venue_val, constants.available_venues_dictionary)

                    metric_val = metric_val.replace(' ', '_')

                    logger.debug('Calculation click old: ' + str(old_clicks) +
                                 " clicks vs new " + str(n_clicks))

                    self._session_manager.set_session_clicks(tag, n_clicks)
                    self._session_manager.set_session_flag('metric',
                                                           value=metric_val)

                    self._session_manager.set_session_flag(
                        'aggregated-visualization', True)

                    try:
                        # if True:

                        # Clear the cache for the current user
                        self._glob_volatile_cache.clear_key_match(
                            self._session_manager.get_session_id())

                        results_form = [
                            # Show the distribution of the selected metric for trades weighted by notional
                            # aggregated by ticker and then by venue
                            DistResultsForm(
                                trade_order_list=['trade_df'],
                                metric_name=metric_val,
                                aggregate_by_field=['ticker', 'venue'],
                                weighting_field=
                                'executed_notional_in_reporting_currency'),

                            # Display the timeline of metrics average by day (and weighted by notional)
                            TimelineResultsForm(
                                trade_order_list=['trade_df'],
                                by_date='date',
                                metric_name=metric_val,
                                aggregation_metric='mean',
                                aggregate_by_field='ticker',
                                scalar=10000.0,
                                weighting_field=
                                'executed_notional_in_reporting_currency'),

                            # Display a bar chart showing the average metric weighted by notional and aggregated by ticker
                            # venue
                            BarResultsForm(
                                trade_order_list=['trade_df'],
                                metric_name=metric_val,
                                aggregation_metric='mean',
                                aggregate_by_field=['ticker', 'venue'],
                                scalar=10000.0,
                                weighting_field=
                                'executed_notional_in_reporting_currency')
                        ]

                        try:
                            # if True:
                            timeline_trade_df_metric_by_ticker = self.get_cached_computation_analysis(
                                key='timeline_trade_df_' + metric_val +
                                '_by_ticker',
                                start_date=start_date_val,
                                finish_date=finish_date_val,
                                event_type=event_type_val,
                                ticker=ticker_val_list,
                                broker=broker_val_list,
                                algo=algo_val_list,
                                venue=venue_val_list,
                                market_data=market_data_val,
                                dummy_market=True,
                                tca_engine=self._tca_engine,
                                tca_type='aggregated',
                                metric_calcs=metric_val,
                                metric_trade_order_list=['trade_df'],
                                results_form=results_form,
                                force_calculate=True,
                                reload_val=reload_val,
                                trade_order_mapping=['trade_df'])

                            calc_start = timeline_trade_df_metric_by_ticker.index[
                                0]
                            calc_end = timeline_trade_df_metric_by_ticker.index[
                                -1]

                            aggregated_title = self.create_status_msg_flags(
                                'aggregated', ticker_val, calc_start, calc_end)

                            logger.debug('Plotted aggregated summary plot!')

                            finish = time.time()

                        except Exception as e:
                            LoggerManager().getLogger(__name__).exception(e)

                            return "Status: error - " + str(
                                e) + ". Check data exists for these dates?"

                    except Exception as e:
                        LoggerManager().getLogger(__name__).exception(e)

                        return 'Status: error - ' + str(
                            e) + ". Check data exists for these dates?"

                    return 'Status: calculated ' + str(round(
                        finish - start, 3)) + "s for " + aggregated_title

            elif tca_type == 'compliance':
                ticker_val, start_date_val, finish_date_val, broker_val, algo_val, venue_val, reload_val, market_data_val, \
                filter_time_of_day_val, start_time_of_day_val, finish_time_of_day_val, slippage_bounds_val, visualization_val, n_clicks = args

                # Catch cases where users repeatedly click, which can cause misalignment in clicks
                self._session_manager.set_session_clicks(tag,
                                                         n_clicks,
                                                         old_clicks=old_clicks)

                logger.debug(
                    self.create_generate_button_msg(old_clicks, n_clicks))

                if ticker_val != '' and start_date_val != '' and broker_val != '' and algo_val != '' and venue_val != '' \
                        and finish_date_val != '' and reload_val != '' and filter_time_of_day_val != '' \
                        and start_time_of_day_val != '' and finish_time_of_day_val != '' and slippage_bounds_val != '' \
                        and n_clicks > old_clicks:

                    ticker_val_list = self._util_func.populate_field(
                        ticker_val, constants.available_tickers_dictionary)
                    broker_val_list = self._util_func.populate_field(
                        broker_val,
                        constants.available_brokers_dictionary,
                        exception_fields='All')
                    algo_val_list = self._util_func.populate_field(
                        algo_val,
                        constants.available_algos_dictionary,
                        exception_fields='All')
                    venue_val_list = self._util_func.populate_field(
                        venue_val,
                        constants.available_venues_dictionary,
                        exception_fields='All')

                    logger.debug('Calculation click old: ' + str(old_clicks) +
                                 " clicks vs new " + str(n_clicks))

                    self._session_manager.set_session_clicks(tag, n_clicks)

                    if visualization_val == 'yes':
                        self._session_manager.set_session_flag(
                            'compliance-visualization', True)
                    else:
                        self._session_manager.set_session_flag(
                            'compliance-visualization', False)

                    try:
                        # if True:

                        # Clear the cache for the current user
                        self._glob_volatile_cache.clear_key_match(
                            self._session_manager.get_session_id())

                        slippage_bounds = 0.0
                        overwrite_bid_ask = True

                        if slippage_bounds_val == 'bid/ask':
                            overwrite_bid_ask = False
                        else:
                            slippage_bounds = float(slippage_bounds_val)

                        metric_calcs = [
                            # Calculate slippage for trades
                            MetricSlippage(trade_order_list='trade_df'),
                        ]

                        benchmark_calcs = [
                            # Generate the spread to mid for market data (in certain case artificially create a spread)
                            BenchmarkSpreadToMid(
                                bid_mid_bp=slippage_bounds,
                                ask_mid_bp=slippage_bounds,
                                overwrite_bid_ask=overwrite_bid_ask)
                        ]

                        results_form = [
                            # Display a table of all the anomalous trades by slippage (ie. outside bid/ask)
                            TableResultsForm(
                                # Only display for trades
                                trade_order_list=['trade_df'],

                                # Display slippage
                                metric_name='slippage',

                                # Order by the worst slippage
                                filter_by='worst_all',

                                # Replace text on table to make it look nicer
                                replace_text={
                                    'markout_': '',
                                    'executed_notional': 'exec not',
                                    '_currency': ' cur',
                                    '_in_reporting': ' in rep',
                                    'slippage_benchmark': 'benchmark',
                                    'slippage_anomalous': 'anomalous',
                                    'broker_id': 'broker ID',
                                    'algo_id': 'algo ID',
                                    'executed_price': 'price'
                                },
                                exclude_fields_from_avg=[
                                    'slippage_anomalous', 'slippage_benchmark',
                                    'side'
                                ],

                                # Only select trades outside bid/ask (ie. where slippage anomalous = 1)
                                tag_value_combinations={
                                    'slippage_anomalous': 1.0
                                },

                                # Display several columns
                                keep_fields=[
                                    'ticker', 'broker_id', 'algo_id',
                                    'notional_currency', 'executed_notional',
                                    'executed_notional_in_reporting_currency',
                                    'side', 'executed_price'
                                ],

                                # Multiply slippage field by 10000 (to convert into basis points)
                                scalar={'slippage': 10000.0},

                                # Round figures to make them easier to read
                                round_figures_by={
                                    'executed_notional': 0,
                                    'executed_notional_in_reporting_currency':
                                    0,
                                    'side': 0,
                                    'slippage': 2,
                                    'slippage_benchmark': 4
                                }),

                            # Get the total notional executed by broker (in reporting currency)
                            BarResultsForm(
                                # Select child orders
                                trade_order_list=['trade_df'],

                                # Aggregate by broker name
                                aggregate_by_field='broker_id',

                                # Select the notional for analysis
                                metric_name=
                                'executed_notional_in_reporting_currency',  # analyse notional

                                # Sum all the notionals
                                aggregation_metric='sum',

                                # Round figures
                                round_figures_by=0)
                        ]

                        # Reformat tables for notional by broker
                        join_tables = [
                            # JoinTables(
                            # tables_dict={'table_name': 'jointables_broker_id_df',
                            #
                            #              # fetch the following calculated tables
                            #              'table_list': [
                            #                  'bar_order_df_executed_notional_in_reporting_currency_by_broker_id'],
                            #
                            #              # append to the columns of each table
                            #              'column_list': ['notional (rep cur)'],
                            #              'replace_text': {'broker_id': 'broker ID'}
                            #              })
                        ]

                        try:
                            # if True:
                            trade_df = self.get_cached_computation_analysis(
                                key='trade_df',
                                start_date=start_date_val,
                                finish_date=finish_date_val,
                                start_time_of_day=start_time_of_day_val,
                                finish_time_of_day=finish_time_of_day_val,
                                filter_time_of_day=filter_time_of_day_val,
                                event_type='trade',
                                ticker=ticker_val_list,
                                broker=broker_val_list,
                                algo=algo_val_list,
                                venue=venue_val_list,
                                dummy_market=True,
                                market_data=market_data_val,
                                tca_engine=self._tca_engine,
                                tca_type='compliance',
                                metric_calcs=metric_calcs,
                                benchmark_calcs=benchmark_calcs,
                                metric_trade_order_list=['trade_df'],
                                results_form=results_form,
                                join_tables=join_tables,
                                force_calculate=True,
                                reload_val=reload_val,
                                trade_order_mapping=['trade_df'])

                            calc_start = trade_df.index[0]
                            calc_end = trade_df.index[-1]

                            compliance_title = self.create_status_msg_flags(
                                'compliance', ticker_val, calc_start, calc_end)

                            logger.debug(
                                'Generated compliance summary.. awaiting plot callbacks!'
                            )

                            finish = time.time()

                        except Exception as e:
                            logger.exception(e)

                            return "Status: error " + str(
                                e) + ". Check data exists for these dates?"

                    except Exception as e:
                        logger.exception(e)

                        return 'Status: error ' + str(
                            e) + ". Check data exists for these dates?"

                    return 'Status: calculated ' + str(round(
                        finish - start, 3)) + "s for " + compliance_title

            raise dash.exceptions.PreventUpdate(
                "No data changed - " + tca_type
            )  # Not very elegant but only way to prevent plots disappearing
            # return "Status: ok"

        if external_params is not None:
            return callback(**external_params)

        return callback
Beispiel #24
0
 def __init__(self):
     self._util_func = UtilFunc()
Beispiel #25
0
class ComputationReport(ABC):
    """Converts ComputationResults (largely consisting of Plotly based Figures and HTML tables) into self contained HTML pages.
    Can also render these HTML pages into PDFs. Uses Renderer objects to create the HTML including BasicRenderer (which
    uses chartpy's "Canvas" object extensively) and JinjaRenderer (uses Jinja templating for HTML and WeasyPrint for PDF
    conversion).

    """
    def __init__(self,
                 computation_results,
                 title='Cuemacro Computation',
                 renderer=CanvasRenderer(),
                 chart_report_height=constants.chart_report_height,
                 chart_report_width=constants.chart_report_width):
        """Initialize class, with the computation results we wish to convert into a report like format

        Parameters
        ----------
        computation_results : ComputationResults
            The results of a large scale computation, which contains charts and DataFrames

        title : str
            Title of webpage to be rendered
        """
        self._util_func = UtilFunc()
        self._computation_results = computation_results
        self._title = title
        self._chart = Chart(engine='plotly')
        self._renderer = renderer
        self._computation_request = computation_results.computation_request

        self._chart_report_width = chart_report_width
        self._chart_report_height = chart_report_height

    def create_report(self,
                      output_filename=None,
                      output_format='html',
                      offline_js=False):
        """Creates an HTML/PDF report from a ComputationResult object, which can (optionally) be written to disk, alternatively
        returns a binary representation of the HTML or PDF.

        Parameters
        ----------
        output_filename : str (optional)
            File output, if this is not specified a binary object is returned

        output_format : str
            'html' (default) - output an HTML page

        offline_js : bool
            False (default) - download's Plotly.js in webpage to be rendered
            True - includes Plotly.js in web page to be rendered (results in much bigger file sizes)

        Returns
        -------
        pdf or HTML binary

        """

        extra_head_code = ''

        if output_format == 'html':

            # Embed plotly.js in HTML (makes it bigger, but then doesn't require web connection)
            if offline_js:
                embed_chart = 'offline_embed_js_div'
            else:
                # Otherwise put web link to plotly.js (but this means we need to download every time)
                embed_chart = 'offline_div'
                extra_head_code = '<head><script src="https://cdn.plot.ly/plotly-latest.min.js"></script></head>'
        elif output_format == 'pdf':
            # For PDFs we need to create static SVGs of plotly charts
            embed_chart = 'offline_image_svg_in_html'
        elif output_format == 'xlwings':
            embed_chart = 'leave_as_fig'

        # Get a list of the HTML to render
        elements_to_render_dict = self._layout_computation_results_to_html(
            embed_chart)

        return self._renderer.render_elements(elements_to_render_dict,
                                              title=self._title,
                                              output_filename=output_filename,
                                              output_format=output_format,
                                              extra_head_code=extra_head_code)

    def _generate_filename(self, extension):
        return (self._get_time_stamp() + "." + extension)

    def _get_time_stamp(self):
        return str(datetime.datetime.now()).replace(':', '-').replace(
            ' ', '-').replace(".", "-")

    def _create_text_html(self, text, add_hr=True):
        """Takes text and then creates the appropriate HTML to represent it, split by horizontal HTML bars

        Parameters
        ----------
        text : str (list)
            Text to be added in HTML

        Returns
        -------
        list (of HTML)
        """
        if text != [] and text is not None and add_hr:
            html_output = [['<hr>']]
        else:
            html_output = []

        if not (isinstance(text, list)):
            text = [text]

        for t in text:
            html_output.append([t])

        return html_output

    def _create_table_html(self, table):
        """Takes tables in HTML and then creates the appropriate HTML to represent it, split by horizontal HTML bars

        Parameters
        ----------
        text : str (list)
            Tables in HTML format

        Returns
        -------
        list (of HTML)
        """
        if table != {} and table is not None:
            html_output = [['<hr>']]
        else:
            html_output = []

        for t in self._util_func.dict_key_list(table.keys()):
            html_output.append(table[t])

        return html_output

    def _create_chart_html(self, chart, embed_chart):
        if chart != {} and chart is not None:
            html_output = [['<hr>']]
        else:
            html_output = []

        style = Style(plotly_plot_mode=embed_chart)

        for c in self._util_func.dict_key_list(chart.keys()):

            # Update chart size and padding (if it's Plotly), so it fits well on PDF
            try:
                chart[c].update_layout(
                    autosize=False,
                    width=self._chart_report_width,
                    height=self._chart_report_height,
                    margin=dict(l=10, r=10, b=10, t=60, pad=4),
                )
            except:
                pass

            if embed_chart == 'leave_as_fig':
                html_output.append([chart[c]])
            else:
                html_output.append([self._chart.plot(chart[c], style=style)])

        return html_output

    @abc.abstractmethod
    def _layout_computation_results_to_html(self,
                                            embed_chart='offline_embed_js_div'
                                            ):
        """Converts the computation results to a list containing HTML, primarily of the charts. Should be implemented
        by concrete subclasses, where we can select the order of the charts (and which charts are converted)

        Parameters
        ----------
        embed_chart : str
            'offline_embed_js_div' (default) - converts Plotly Figures into HTML + includes Plotly.js script
            'offline_div' - converts Plotly Figures into HTML (but excludes Plotly.js script)

        Returns
        -------
        list (containing HTML), list (containing HTML of descriptions)
        """
        pass
Beispiel #26
0
def test_write_multiple_wildcard_market_data_csvs_arctic():
    """Tests we can write sequential market data CSVs (or HDF5) whose path has been specified by a wildcard (eg. EURUSD*.csv).
    It is assumed that the CSVs are in chronological orders, from their filenames.
    """
    if not (run_arctic_tests): return

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    arctic_start_date = '01 Jan 2016'
    arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    for a in arctic_lib_type:
        database_source = DatabaseSourceArctic(postfix='testharness',
                                               arctic_lib_type=a)

        ### Read CSV data which is sorted ascending (default!)
        database_source.convert_csv_to_table(
            csv_market_data_store,
            ticker,
            test_harness_arctic_market_data_table,
            if_exists_table='replace',
            if_exists_ticker='replace',
            market_trade_data='market',
            csv_read_chunksize=10**6,
            remove_duplicates=False)

        database_source_csv = DatabaseSourceCSV(
            market_data_database_csv=csv_market_data_store)

        market_df_csv = database_source_csv.fetch_market_data(
            start_date=arctic_start_date,
            finish_date=arctic_finish_date,
            ticker=ticker)

        # Prepare the CSV folder first
        csv_folder = os.path.join(constants.test_data_harness_folder,
                                  'csv_arctic_mult')

        # Empty the CSV test harness folder, where we shall dump the mini CSVs
        UtilFunc().forcibly_create_empty_folder(csv_folder)

        # Split the CSV file into several mini CSV files (and also HDF5 files)
        market_df_list = TimeSeriesOps().split_array_chunks(market_df_csv,
                                                            chunks=3)

        chunk_no = 0

        for m in market_df_list:
            m.to_csv(
                os.path.join(csv_folder, "EURUSD" + str(chunk_no) + '.csv'))
            UtilFunc().write_dataframe_to_binary(
                m,
                os.path.join(csv_folder,
                             "EURUSD" + str(chunk_no) + '.parquet'),
                format='parquet')

            chunk_no = chunk_no + 1

        file_ext = ['csv', 'parquet']

        for f in file_ext:
            ### Read CSV data from the mini CSVs (using wildcard char) and dump to Arctic
            database_source.convert_csv_to_table(
                os.path.join(csv_folder, "EURUSD*." + f),
                ticker,
                test_harness_arctic_market_data_table,
                if_exists_table='append',
                if_exists_ticker='replace',
                market_trade_data='market',
                csv_read_chunksize=10**6,
                remove_duplicates=False)

            market_request = MarketRequest(
                start_date=arctic_start_date,
                finish_date=arctic_finish_date,
                ticker=ticker,
                data_store=database_source,
                market_data_database_table=test_harness_arctic_market_data_table
            )

            # Read back from Arctic
            market_df_load = market_loader.get_market_data(
                market_request=market_request)

            # Compare reading directly from the original large CSV vs. reading back from arctic (which was dumped from split CSVs)
            diff_df = abs(market_df_load['mid'] - market_df_csv['mid'])

            outside_bounds = diff_df[diff_df >= eps]

            assert len(outside_bounds) == 0
Beispiel #27
0
# for caching data (in Redis)
from tcapy.util.mediator import Mediator

# utility stuff
from tcapy.conf.constants import Constants
from tcapy.util.loggermanager import LoggerManager
from tcapy.util.utilfunc import UtilFunc

# for caching data (in Redis)

# creates the HTML layout of the web pages
from chartpy.dashboard import CallbackManager, SessionManager

constants = Constants()
util_func = UtilFunc()

# manage session information for every client
session_manager = SessionManager()

# manage creation of callback for Dash
callback_manager = CallbackManager(constants)

logger = LoggerManager.getLogger(__name__)

# print constants for user information
logger.info("Platform = " + constants.plat)
logger.info("Env = " + constants.env)
logger.info("Python = " + sys.executable)
logger.info("Debug environment = " + str(constants.debug_start_flask_server_directly))
Beispiel #28
0
class ComputationCaller(ABC):
    """Abstract class which adds listeners to the GUI buttons in the tcapy application for doing TCA or other _calculations. At
    initialisation it adds listeners for these buttons and links them to the various text box inputs (where the user
    can specify the various computation parameters such as start date, finish date, ticker, TCA metrics etc.)

    When a button is pressed it triggers various "calculate" methods, which convert the GUI input, into computation request/TCARequest objects
    which are then sent to another object for doing the actual computation. This analysis is then cached in Redis. The
    completion of this calculation will then trigger a callback from every display component (such as a plot or table)
    which search the cache for the appropriate output to display.

    If a user wishes to create programmatically call tcapy, it is recommended they create a comptuation request directly, rather
    than attempting to use ComputationCaller, and then submit that to an external computation engine.
    """
    def __init__(self,
                 app,
                 session_manager,
                 callback_manager,
                 glob_volatile_cache,
                 layout,
                 callback_dict=None):
        self._util_func = UtilFunc()

        self._session_manager = session_manager
        self._callback_manager = callback_manager

        self._glob_volatile_cache = glob_volatile_cache

        self.create_callbacks(app,
                              callback_manager,
                              callback_dict=callback_dict)

    def create_plot_flags(self, session_manager, layout):
        """Creates flags for each display component (eg. plot or table) on each web page in the project. These are
        necessary so we can keep track of whether we need to recalculate the underlying TCA analysis.

        Parameters
        ----------
        session_manager : SessionManager
            Stores and modifies session data which is unique for each user

        layout : Layout
            Specifies the layout of an HTML page using Dash components

        Returns
        -------
        dict
        """
        plot_flags = {}
        plot_lines = {}

        for page in layout.pages:

            page_flags = []
            line_flags = []

            # For redrawing plots
            for gen_flag in self._generic_plot_flags:
                key = page + gen_flag

                # Append a plot flag if it exists
                if key in layout.id_flags:
                    page_flags.append(
                        self._session_manager.create_calculated_flags(
                            'redraw-' + page,
                            session_manager.create_calculated_flags(
                                self._util_func.dict_key_list(
                                    layout.id_flags[key].keys()),
                                self._generic_plot_flags[gen_flag])))

            plot_flags[page] = UtilFunc().flatten_list_of_lists(page_flags)

            # For clicking on charts
            for gen_flag in self._generic_line_flags:
                key = page + gen_flag

                # Append a line clicking flag if it exists
                if key in layout.id_flags:
                    line_flags.append(
                        self._session_manager.create_calculated_flags(
                            'redraw-' + page,
                            session_manager.create_calculated_flags(
                                self._util_func.dict_key_list(
                                    layout.id_flags[key].keys()),
                                self._generic_plot_flags[gen_flag])))

            if line_flags != []:
                plot_lines[page] = UtilFunc().flatten_list_of_lists(line_flags)

        return plot_flags

    def create_callbacks(self, app, callback_manager, callback_dict=None):
        """Creates callbacks for each calculation button in the application, so that it is linked to execution code, when that
        button is pressed. Typically these button presses kick off a large computation (eg. TCA analysis).

        Parameters
        ----------
        app : dash.App
            A dash app is wrapper over a Flask mini-webserver

        callback_manager : CallbackManager
            Creates callbacks for dash components

        callback_dict : dict
            Dictionary of callbacks for Dash

        """

        if callback_dict is None:
            callback_dict = constants.dash_callbacks

        for k in callback_dict.keys():
            # Dash callbacks for detailed page
            app.callback(callback_manager.output_callback(k, 'status'),
                         callback_manager.input_callback(k, callback_dict[k]))(
                             self.calculate_computation_summary(k))

    def add_list_kwargs(self, kwargs, tag, addition):
        """Adds a value to the kwargs dictionary (or appends it to an existing _tag

        Parameters
        ----------
        kwargs : dict
            Existing kwargs dictionary

        tag : str
            Key to be added to kwargs

        addition : str
            Value of key to be added

        Returns
        -------
        dict
        """

        if addition is not None:
            if tag not in kwargs:
                kwargs[tag] = addition
            else:
                if kwargs[tag] is not None:
                    if isinstance(kwargs[tag], list):
                        kwargs[tag] = kwargs[tag].append(addition)
                    else:
                        kwargs[tag] = [kwargs[tag], addition]
                else:
                    kwargs[tag] = addition

        return kwargs

    def fill_computation_request_kwargs(self, kwargs, fields):
        pass

    def create_computation_request(self, **kwargs):
        pass

    def _fetch_cached_list(self,
                           force_calculate=False,
                           computation_type=None,
                           session_id=None,
                           key=None):
        """Fetches a cached list of objects (typically DataFrames) which have been generated during a larger computation
        (eg. TCA analysis) for a particular session.

        Parameters
        ----------
        force_calculate : bool (default: False)
            Should a large calculation be recomputed? If so, do not attempt to fetch from cache

        computation_type : str
            What computation type are we doing?

        session_id : str
            A unique identifer for the current web session

        key : str
            Which key to retrieve from the cache, which (usually) relates to a DataFrame generated by TCA output

        Returns
        -------
        list (usually of pd.DataFrames)
        """

        cached_list = []

        # First try to get from the cache (only need the key for this, no hash!)
        if not (force_calculate):
            if not (isinstance(key, list)):
                key = [key]

            if session_id != '' and computation_type != '':
                sessions_id_computation = session_id + '' + computation_type + '_'
            else:
                sessions_id_computation = ''

            for k in key:
                # this will be unique to each user
                cached_list.append(
                    self._glob_volatile_cache.get(sessions_id_computation + k))

        return cached_list

    def get_cached_computation_analysis(self, **kwargs):
        """Fetches a computation outoput from a cache (typically Redis) or computes the analysis directly using another object, if
        requested. Typically, a computation is initiated and then that large analysis is cached, ready to be consumed by
        display components which repeatedly call this function.

        Parameters
        ----------
        kwargs
            Variables generated by GUI which relate to our computations (eg. start date, finish date, ticker etc.)

        Returns
        -------
        pd.DataFrame
        """

        try:
            force_calculate = kwargs['force_calculate']
        except:
            force_calculate = False

        key = None

        if 'key' in kwargs: key = kwargs['key']

        if 'test' not in kwargs:
            computation_type = self._tca_engine.get_engine_description()
            session_id = self._session_manager.get_session_id() + "_expiry_"
            session_id_computation = session_id + '' + computation_type + '_'
        else:
            computation_type = ''
            session_id = ''
            session_id_computation = ''

        # Try to fetch some TCA analysis output from the cache
        cached_list = self._fetch_cached_list(
            force_calculate=force_calculate,
            computation_type=computation_type,
            session_id=session_id,
            key=key)

        # Otherwise force the calculation (or if doesn't exist in the cache!)
        # when a button is pressed, typically force calculate will be set to True
        if force_calculate:

            computation_request = self.create_computation_request(**kwargs)

            # Delete any existing keys for the current session
            self._glob_volatile_cache.clear_key_match("*" + session_id + "*")

            dict_of_df = self.run_computation_request(computation_request)

            dict_key_list = []
            dict_element_list = []

            # Cache all the dataframes in Redis/or other memory space (will likely need for later calls!)
            # from security perspective probably better not to cache the TCAEngine objects on a database (which can execute code)
            for dict_key in dict_of_df.keys():

                # check if we have all the keys filled (will be missing if for example there are no trades)
                if dict_key not in dict_of_df:
                    raise Exception('Missing ' + dict_key)

                dict_key_list.append(session_id_computation + dict_key)
                dict_element_list.append(dict_of_df[dict_key])

            self._session_manager.set_session_flag('user_df', dict_key_list)

            # self._glob_volatile_cache.put(session_id_computation + dict_key, dict_of_df[dict_key])

            # Put it back into Redis cache (to be fetched by Dash callbacks)
            self._glob_volatile_cache.put(dict_key_list, dict_element_list)

            logger = LoggerManager.getLogger(__name__)
            logger.debug('Generated tables: ' +
                         str(self._util_func.dict_key_list(dict_of_df.keys())))

            if key is None:
                return None

            if not (isinstance(key, list)):
                key = [key]

            for k in key:
                # Has one of the dataframes we want, just been calculated, if so return it!
                if k in dict_of_df.keys():
                    cached_list.append(dict_of_df[k])

                # Otherwise look in Redis for the table for the user
                else:
                    # as last resort get from our global, this key is unique to each user
                    cached_list.append(
                        self._glob_volatile_cache.get(session_id_computation +
                                                      k))

        # return as tuples
        tup = list(cached_list)

        if len(tup) == 1:
            return tup[0]
        else:
            return tup

    def create_status_msg_flags(self, computation_type, ticker, calc_start,
                                calc_end):
        if isinstance(ticker, list):
            ticker = self._util_func.pretty_str_list(ticker)

        title = ticker + ": " \
                + str(calc_start).replace(':00+00:00', '').replace('000+00:00', '') + " - " \
                + str(calc_end).replace(':00+00:00', '').replace('000+00:00', '') + " at " \
                + str(datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"))

        self._session_manager.set_session_flag({
            computation_type + '-title':
            title,
            computation_type + '-ticker':
            ticker
        })

        self._session_manager.set_session_flag(
            self._plot_flags[computation_type], True)

        return title

    def create_generate_button_msg(self, old_clicks, n_clicks):
        return 'Triggered click old: ' + str(old_clicks) + " clicks vs new " + str(n_clicks) + \
               " for " + str(self._session_manager.get_session_id())

    def get_username_string(self):
        username = self._session_manager.get_username()

        if username is None:
            username = ''
        else:
            username = '******' + username

        return username

    @abc.abstractmethod
    def fill_computation_request_kwargs(self, kwargs, fields):
        """Fills a dictionary with the appropriate parameters which can be consumed by a ComputationRequest object. This involves
        a large number of object conversations, eg. str based dates to TimeStamps, metric names to Metric objects etc.

        Parameters
        ----------
        kwargs : dict
            Contains parameters related to computation analysis

        fields : str(list)
            List of fields we should fill with None if they don't exist in kwargs

        Returns
        -------
        dict
        """

        pass

    @abc.abstractmethod
    def run_computation_request(self, computation_request):
        """Creates a ComputationRequest object, populating its' fields with those from a kwargs dictionary, which consisted of
        parameters such as the start date, finish date, ticker, metrics to be computed, benchmark to be computed etd.

        The ComputationRequest object can later be consumed by a computation engine such as a TCAEngine

        Parameters
        ----------
        kwargs : dict
            For describing a computational analysis, such as the start date, finish date, ticker etc.

        Returns
        -------
        ComptuationRequest
        """
        pass

    @abc.abstractmethod
    def calculate_computation_summary(self,
                                      computation_type,
                                      external_params=None):
        """

        Parameters
        ----------
        comptuation_type : str
            Type of computation eg. 'detailed'

        external_params : dict


        Returns
        -------

        """
        pass
Beispiel #29
0
class TCATickerLoader(ABC):
    """This class is designed to load up market and trade data for single _tickers and also makes appropriate metric calculations
    for that specific ticker. It is generally called by the higher level TCAMarketTradeLoader class, which can handle multiple _tickers.

    """
    def __init__(self,
                 version=constants.tcapy_version,
                 volatile_cache_engine=constants.volatile_cache_engine):
        self._data_factory = DataFactory(version=version)

        self._util_func = UtilFunc(
        )  # general utility operations (such as flatten lists)
        self._fx_conv = FXConv(
        )  # for determining if FX crosses are in the correct convention
        self._time_series_ops = TimeSeriesOps(
        )  # time series operations, such as filtering by date

        self._metric_executed_price = MetricExecutedPriceNotional(
        )  # for determining the executed notionals/price of orders
        # from trades

        self._benchmark_mid = BenchmarkMarketMid(
        )  # to calculate mid price from bid/ask quote market data
        self._trade_order_tag = TradeOrderFilterTag(
        )  # to filter trade/orders according to the values of certain tags
        self._version = version
        self._volatile_cache_engine = volatile_cache_engine

    def get_market_data(self, market_request):
        """Gets market data for a particular ticker. When we ask for non-standard FX crosses, only the mid-field is
        returned (calculated as a cross rate). We do not give bid/ask quotes for calculated non-standard _tickers, as these
        can difficult to estimate.

        Parameters
        ----------
        market_request : MarketRequest
            The type of market data to get

        Returns
        -------
        DataFrame
        """
        logger = LoggerManager.getLogger(__name__)

        if isinstance(market_request, TCARequest):
            market_request = MarketRequest(market_request=market_request)

        old_ticker = market_request.ticker

        if market_request.asset_class == 'fx':
            # Check if we can get ticker directly or need to create synthetic cross rates
            ticker = self._fx_conv.correct_notation(market_request.ticker)
        else:
            # If not FX we don't have to invert
            ticker = old_ticker

        # If ticker is in the correct convention is in crosses where we collect data (typically this will be the USD
        # crosses, also some liquid non-USD pairs like EURJPY)

        # available_tickers = []

        if isinstance(market_request.data_store, DatabaseSource):
            # TODO improve ticker check here!
            available_tickers = [ticker]
        elif 'csv' in market_request.data_store or 'h5' in market_request.data_store or 'gzip' in market_request.data_store \
            or 'parquet' in market_request.data_store or isinstance(market_request.data_store, pd.DataFrame) :

            # For CSV (or H5) we don't have much choice, and could differ between CSV files (if CSV has 'ticker' field, will
            # match on that)
            available_tickers = [ticker]
        elif market_request.data_store in constants.market_data_tickers:
            available_tickers = self._util_func.dict_key_list(
                constants.market_data_tickers[
                    market_request.data_store].keys())

        else:
            err_msg = 'Ticker ' + str(
                ticker
            ) + " doesn't seem available in the data source " + market_request.data_store

            logger.error(err_msg)

            raise Exception(err_msg)

        if ticker in available_tickers:

            # In the correct convention or is not FX
            if ticker == old_ticker:
                market_df = self._get_correct_convention_market_data(
                    market_request)

            # Otherwise need to flip to the correct convention (only will return 'mid')
            else:
                market_request_flipped = MarketRequest(
                    market_request=market_request)
                market_request_flipped.ticker = ticker

                market_df = self._invert_quoting_market(
                    self._get_correct_convention_market_data(
                        market_request_flipped))

                if 'ticker' in market_df.columns:
                    market_df['ticker'] = old_ticker
        else:
            if market_request.asset_class == 'fx' and market_request.instrument == 'spot':
                # Otherwise we need to get both legs
                # eg. for NZDCAD, we shall download NZDUSD and USDCAD => multiply them to get NZDCAD

                # get the USD crosses for each leg and then multiply
                market_request_base = MarketRequest(
                    market_request=market_request)
                market_request_terms = MarketRequest(
                    market_request=market_request)

                market_request_base.ticker = old_ticker[0:3] + 'USD'
                market_request_terms.ticker = 'USD' + old_ticker[3:7]

                tickers_exist = self._fx_conv.currency_pair_in_list(
                        self._fx_conv.correct_notation(market_request_base.ticker), available_tickers) and \
                        self._fx_conv.currency_pair_in_list(
                            self._fx_conv.correct_notation(market_request_terms.ticker), available_tickers)

                # If both USD _tickers don't exist try computing via EUR _tickers? (eg. USDSEK from EURUSD & EURSEK)
                if not (tickers_exist):
                    market_request_base.ticker = old_ticker[0:3] + 'EUR'
                    market_request_terms.ticker = 'EUR' + old_ticker[3:7]

                    tickers_exist = self._fx_conv.currency_pair_in_list(
                        self._fx_conv.correct_notation(market_request_base.ticker), available_tickers) and \
                                    self._fx_conv.currency_pair_in_list(
                                        self._fx_conv.correct_notation(market_request_terms.ticker), available_tickers)

                # Check if that currency (in the CORRECT convention) is in the available _tickers
                # we will typically not collect market data for currencies in their wrong convention
                if tickers_exist:

                    fields_try = ['bid', 'ask', 'mid']

                    market_base_df = self.get_market_data(market_request_base)
                    market_terms_df = self.get_market_data(
                        market_request_terms)

                    market_has_data = False

                    if market_base_df is not None and market_terms_df is not None:
                        if not (market_base_df.empty) and not (
                                market_terms_df.empty):
                            market_has_data = True

                    # If there's no data in either DataFrame, don't attempt to calculate anything
                    if not (market_has_data):
                        return pd.DataFrame()

                    fields = []

                    for f in fields_try:
                        if f in market_base_df.columns and f in market_terms_df.columns:
                            fields.append(f)

                    # Only attempt to calculate if the fields exist
                    if len(fields) > 0:
                        # Remove any other columns (eg. with ticker name etc.)
                        market_base_df = market_base_df[fields]
                        market_terms_df = market_terms_df[fields]

                        # Need to align series to multiply (and then fill down points which don't match)
                        # can't use interpolation, given that would use FUTURE data
                        market_base_df, market_terms_df = market_base_df.align(
                            market_terms_df, join="outer")
                        market_base_df = market_base_df.fillna(method='ffill')
                        market_terms_df = market_terms_df.fillna(
                            method='ffill')

                        market_df = pd.DataFrame(data=market_base_df.values *
                                                 market_terms_df.values,
                                                 columns=fields,
                                                 index=market_base_df.index)

                        # Values at the start of the series MIGHT be nan, so need to ignore those
                        market_df = market_df.dropna(subset=['mid'])

                        if 'ticker' in market_df.columns:
                            market_df['ticker'] = old_ticker
                    else:
                        return None

                else:
                    # Otherwise couldn't compute either from the USD legs or EUR legs
                    logger.warning("Couldn't find market data for ticker: " +
                                   str(ticker))

                    return None
            else:
                # Otherwise couldn't find the non-FX ticker
                logger.warning("Couldn't find market data for ticker: " +
                               str(ticker))

                return None

        return market_df

    def get_trade_order_data(self,
                             tca_request,
                             trade_order_type,
                             start_date=None,
                             finish_date=None):
        """Gets trade data for specified parameters (eg. start/finish dates _tickers). Will also try to find trades
        when they have booked in the inverted market convention, and change the fields appropriately. For example, if
        we ask for GBPUSD trade data, it will also search for USDGBP and convert those trades in the correct convention.

        Parameters
        ----------
        tca_request : TCARequest
            What type of trade data do we want

        trade_order_type : str
            Do we want trade or order data?

        Returns
        -------
        DataFrame
        """
        logger = LoggerManager().getLogger(__name__)

        # by default, assume we want trade data (rather than order data)
        if trade_order_type is None:
            trade_order_type = 'trade_df'

        if start_date is None and finish_date is None:
            start_date = tca_request.start_date
            finish_date = tca_request.finish_date

        # Create request for actual executed trades
        trade_request = TradeRequest(trade_request=tca_request)

        trade_request.start_date = start_date
        trade_request.finish_date = finish_date
        trade_request.trade_order_type = trade_order_type

        # Fetch all the trades done in that ticker (will be sparse-like randomly spaced tick data)
        # assumed to be the correct convention (eg. GBPUSD)
        trade_df = self._data_factory.fetch_table(data_request=trade_request)

        # if fx see if inverted or not
        if tca_request.asset_class == 'fx' and tca_request.instrument == 'spot':
            # Also fetch data in the inverted cross (eg. USDGBP) as some trades may be recorded this way
            inv_trade_request = TradeRequest(trade_request=tca_request)

            inv_trade_request.start_date = start_date
            inv_trade_request.finish_date = finish_date
            inv_trade_request.trade_order_type = trade_order_type

            inv_trade_request.ticker = self._fx_conv.reverse_notation(
                trade_request.ticker)

            trade_inverted_df = self._data_factory.fetch_table(
                data_request=inv_trade_request)

            # Only add inverted trades if they exist!
            if trade_inverted_df is not None:
                if not (trade_inverted_df.empty):

                    invert_price_columns = [
                        'executed_price', 'price_limit', 'market_bid',
                        'market_mid', 'market_ask', 'arrival_price'
                    ]
                    invert_price_columns = [
                        x for x in invert_price_columns
                        if x in trade_inverted_df.columns
                    ]

                    # For trades (but not orders), there is an executed price field, which needs to be inverted
                    if invert_price_columns != []:
                        trade_inverted_df[
                            invert_price_columns] = 1.0 / trade_inverted_df[
                                invert_price_columns].values

                    trade_inverted_df['side'] = -trade_inverted_df[
                        'side']  # buys become sells, and vice versa!
                    trade_inverted_df['ticker'] = trade_request.ticker

                    if trade_df is not None:
                        trade_df = trade_df.append(trade_inverted_df)
                        trade_df = trade_df.sort_index()
                    else:
                        trade_df = trade_inverted_df

        # Check if trade data is not empty? if it is return None
        if self._check_is_empty_trade_order(trade_df, tca_request, start_date,
                                            finish_date, trade_order_type):
            return None

        if tca_request.asset_class == 'fx' and tca_request.instrument == 'spot':

            # Check if any notionals of any trade/order are quoted in the TERMS currency?
            terms_notionals = trade_df[
                'notional_currency'] == tca_request.ticker[3:6]

            # If any notional are quoted as terms, we should invert these so we quote notionals with base currency
            # for consistency
            if terms_notionals.any():
                inversion_ticker = tca_request.ticker[
                    3:6] + tca_request.ticker[0:3]

                inversion_spot, trade_df = self._fill_reporting_spot(
                    inversion_ticker, trade_df, start_date, finish_date,
                    tca_request)

                notional_fields = [
                    'notional', 'order_notional', 'executed_notional'
                ]

                # Need to check terms notionals again, as trade data could have shrunk (because can only get trades, where we have market data)
                terms_notionals = trade_df['notional_currency'] == str(
                    tca_request.ticker[3:6])

                # Only get the inversion spot if any terms notionals are quoted wrong way around
                if terms_notionals.any():
                    if inversion_spot is not None:
                        for n in notional_fields:
                            if n in trade_inverted_df.columns:
                                # trade_df[n][terms_notionals] = trade_df[n][terms_notionals].values * inversion_spot[terms_notionals].values
                                trade_df[n][terms_notionals] = pd.Series(
                                    index=trade_df.index[
                                        terms_notionals.values],
                                    data=trade_df[n][terms_notionals].values *
                                    inversion_spot[terms_notionals].values)
                    else:
                        logger.warning(
                            "Couldn't get spot data for " + inversion_ticker +
                            " to invert notionals. Hence not returning trading data."
                        )

                if terms_notionals.any():
                    trade_df['notional_currency'][
                        terms_notionals] = trade_request.ticker[0:3]

            # Also represent notional is reporting currency notional amount (eg. if we are USD based investors, convert
            # notional to USDs)

            # Using a reporting currency can be particularly useful if we are trying to aggregate metrics from many different
            # currency pairs (and wish to weight by a commonly measured reporting notional)

            # Eg. if we don't have USDUSD, then we need to convert
            if trade_request.ticker[0:3] != tca_request.reporting_currency:

                # So if we have EURJPY, we want to download EURUSD data
                reporting_ticker = trade_request.ticker[
                    0:3] + tca_request.reporting_currency

                reporting_spot, trade_df = self._fill_reporting_spot(
                    reporting_ticker, trade_df, start_date, finish_date,
                    tca_request)

                if reporting_spot is not None:
                    trade_df[
                        'notional_reporting_currency_mid'] = reporting_spot.values

                    # trade_df['notional_reporting_currency_mid'] = \
                    #     self._time_series_ops.vlookup_style_data_frame(trade_df.index, market_conversion_df, 'mid')[0].values

                    trade_df[
                        'reporting_currency'] = tca_request.reporting_currency

                    columns_to_report = [
                        'executed_notional', 'notional', 'order_notional'
                    ]

                    for c in columns_to_report:
                        if c in trade_df.columns:
                            trade_df[c + '_in_reporting_currency'] = \
                                trade_df['notional_reporting_currency_mid'].values * trade_df[c]
                else:
                    logger.warning(
                        "Couldn't get spot data to convert notionals into reporting currency. Hence not returning trading data."
                    )

                    return None
            else:
                # ie. USDUSD, so spot is 1
                trade_df['notional_reporting_currency_mid'] = 1.0

                # Reporting currency is the same as the notional of the trade, so no need to convert, just
                # replicate columns
                trade_df['reporting_currency'] = tca_request.reporting_currency

                columns_to_report = [
                    'executed_notional', 'notional', 'order_notional'
                ]

                for c in columns_to_report:
                    if c in trade_df.columns:
                        trade_df[c + '_in_reporting_currency'] = trade_df[c]

        return trade_df

    def get_trade_order_holder(self, tca_request):
        logger = LoggerManager.getLogger(__name__)

        # Get all the trade/orders which have been requested, eg. trade_df and order_df
        # do separate calls given they are assumed to be stored in different database tables
        trade_order_holder = DataFrameHolder()

        if tca_request.trade_order_mapping is not None:
            logger.debug("Get trade order holder for " +
                         str(tca_request.ticker) + " from " +
                         str(tca_request.start_date) + " - " +
                         str(tca_request.finish_date))

            for trade_order_type in tca_request.trade_order_mapping:
                trade_order_df = self.get_trade_order_data(
                    tca_request, trade_order_type)

                trade_order_holder.add_dataframe(trade_order_df,
                                                 trade_order_type)

        return trade_order_holder

    def get_market_trade_order_holder(self, tca_request):
        """Gets the both the market data and trade/order data associated with a TCA calculation as a tuple of
        (DataFrame, DataFrameHolder)

        Parameters
        ----------
        tca_request : TCARequest
            Parameters for a TCA calculation

        Returns
        -------
        DataFrame, DataFrameHolder
        """

        logger = LoggerManager.getLogger(__name__)

        logger.debug("Get market and trade/order data for " +
                     str(tca_request.ticker) + " from " +
                     str(tca_request.start_date) + " - " +
                     str(tca_request.finish_date))

        # Get all the trade/orders which have been requested, eg. trade_df and order_df
        # do separate calls given they are assumed to be stored in different database tables
        return self.get_market_data(tca_request), \
               self.get_trade_order_holder(tca_request)

    def calculate_metrics_single_ticker(self, market_trade_order_combo,
                                        tca_request, dummy_market):
        """Calls auxillary methods to get market/trade data for a single ticker. If necessary splits up the request into
        smaller date chunks to collect market and trade data in parallel (using Celery)

        Parameters
        ----------
        tca_request : TCARequest
            Parameter for the TCA analysis

        dummy_market : bool
            Should we put a dummy variable instead of returning market data

        Returns
        -------
        DataFrame, DataFrameHolder, str
        """

        trade_order_filter = tca_request.trade_order_filter
        benchmark_calcs = tca_request.benchmark_calcs
        metric_calcs = tca_request.metric_calcs
        ticker = tca_request.ticker

        logger = LoggerManager.getLogger(__name__)

        # Reassemble market and trade data from the tuple
        market_df, trade_order_df_dict = self.trim_sort_market_trade_order(
            market_trade_order_combo, tca_request.start_date,
            tca_request.finish_date, tca_request.ticker)

        # Calculate BenchmarkMarket's which only require market data and no trade data
        market_df = self.calculate_benchmark_market(market_df, tca_request)

        trade_order_df_values = []
        trade_order_df_keys = []

        # Calculations on trades with market data
        if len(trade_order_df_dict.keys()) > 0 and self._check_valid_market(
                market_df):

            # NOTE: this will not filter orders, only TRADES (as orders do not have venue parameters)
            logger.debug("Filter trades by venue")

            simple_filters = {'venue': tca_request.venue}

            if 'trade_df' in self._util_func.dict_key_list(
                    trade_order_df_dict.keys()):
                for s in simple_filters.keys():
                    trade_order_df_dict[
                        'trade_df'] = self._trade_order_tag.filter_trade_order(
                            trade_order_df=trade_order_df_dict['trade_df'],
                            tag_value_combinations={s: simple_filters[s]})

            # Do additional more customised post-filtering of the trade/orders (eg. by broker_id, algo_id)
            if trade_order_filter is not None:
                for a in trade_order_filter:
                    trade_order_df_dict = a.filter_trade_order_dict(
                        trade_order_df_dict=trade_order_df_dict)

            # NOTE: this will not filter orders, only TRADES (as orders do not have event type parameters)
            simple_filters = {'event_type': tca_request.event_type}

            if 'trade_df' in self._util_func.dict_key_list(
                    trade_order_df_dict.keys()):
                for s in simple_filters.keys():
                    trade_order_df_dict[
                        'trade_df'] = self._trade_order_tag.filter_trade_order(
                            trade_order_df=trade_order_df_dict['trade_df'],
                            tag_value_combinations={s: simple_filters[s]})

            # Remove any trade/orders which aren't empty
            t_remove = []

            for t in trade_order_df_dict.keys():
                if trade_order_df_dict[t] is None:
                    t_remove.append(t)

                    logger.warninging(
                        t + " is empty.. might cause problems later!")
                elif trade_order_df_dict[t].empty:
                    t_remove.append(t)

                    logger.warninging(
                        t + " is empty.. might cause problems later!")

            for t in t_remove:
                trade_order_df_dict.pop(t)

            trade_order_list = self._util_func.dict_key_list(
                trade_order_df_dict.keys())

            # Check if we have any trades/orders left to analyse?
            if len(trade_order_list) == 0:
                logger.error("No trade/orders for " + ticker)
            else:
                # ok we have some trade/orders left to analyse
                if not (isinstance(trade_order_list, list)):
                    trade_order_list = [trade_order_list]

                logger.debug("Calculating derived fields and benchmarks")

                logger.debug("Calculating execution fields")

                # Calculate derived executed fields for orders
                # can only do this if trade_df is also available
                if len(trade_order_df_dict.keys()
                       ) > 1 and 'trade_df' in self._util_func.dict_key_list(
                           trade_order_df_dict.keys()):

                    # For the orders, calculate the derived fields for executed notional, trade etc.
                    aggregated_notional_fields = 'executed_notional'

                    # Calculate the derived fields of the orders from the trades
                    # alao calculate any benchmarks for the orders
                    for i in range(1, len(trade_order_list)):
                        # NOTIONAL_EXECUTED: add derived field for executed price and notional executed for the orders
                        trade_order_df_dict[trade_order_list[
                            i]] = self._metric_executed_price.calculate_metric(
                                lower_trade_order_df=trade_order_df_dict[
                                    trade_order_list[i - 1]],
                                upper_trade_order_df=trade_order_df_dict[
                                    trade_order_list[i]],
                                aggregated_ids=constants.order_name +
                                '_pointer_id',
                                aggregated_notional_fields=
                                aggregated_notional_fields,
                                notional_reporting_currency_spot=
                                'notional_reporting_currency_mid')[0]

                # TODO not sure about this?
                if 'trade_df' in self._util_func.dict_key_list(
                        trade_order_df_dict.keys()):
                    if 'notional' not in trade_order_df_dict[
                            'trade_df'].columns:
                        trade_order_df_dict['trade_df'][
                            'notional'] = trade_order_df_dict['trade_df'][
                                'executed_notional']

                logger.debug("Calculating benchmarks")

                # Calculate user specified benchmarks for each trade order (which has been selected)
                if benchmark_calcs is not None:

                    for i in range(0, len(trade_order_df_dict)):
                        for b in benchmark_calcs:
                            # For benchmarks which need to be generated on a trade by trade basis (eg. VWAP, arrival etc)
                            if not (isinstance(b, BenchmarkMarket)):
                                logger.debug("Calculating " +
                                             type(b).__name__ + " for " +
                                             trade_order_list[i])

                                if trade_order_df_dict[
                                        trade_order_list[i]] is not None:
                                    if not (trade_order_df_dict[
                                            trade_order_list[i]].empty):
                                        trade_order_df_dict[trade_order_list[
                                            i]], _ = b.calculate_benchmark(
                                                trade_order_df=
                                                trade_order_df_dict[
                                                    trade_order_list[i]],
                                                market_df=market_df,
                                                trade_order_name=
                                                trade_order_list[i])

                logger.debug("Calculating metrics")

                # Calculate user specified metrics for each trade order (which has been selected)
                if metric_calcs is not None:
                    for i in range(0, len(trade_order_df_dict)):
                        for m in metric_calcs:
                            logger.debug("Calculating " + type(m).__name__ +
                                         " for " + trade_order_list[i])

                            if trade_order_df_dict[
                                    trade_order_list[i]] is not None:
                                if not (trade_order_df_dict[
                                        trade_order_list[i]].empty):
                                    trade_order_df_dict[trade_order_list[
                                        i]], _ = m.calculate_metric(
                                            trade_order_df=trade_order_df_dict[
                                                trade_order_list[i]],
                                            market_df=market_df,
                                            trade_order_name=trade_order_list[
                                                i])

                logger.debug("Completed derived field calculations for " +
                             ticker)

            trade_order_df_dict = self._calculate_additional_metrics(
                market_df, trade_order_df_dict, tca_request)

            if dummy_market:
                market_df = None

            trade_order_df_keys = self._util_func.dict_key_list(
                trade_order_df_dict.keys())
            trade_order_df_values = []

            for k in trade_order_df_keys:
                trade_order_df_values.append(trade_order_df_dict[k])

        # print("--- dataframes/keys ---")
        # print(trade_order_df_values)
        # print(trade_order_df_keys)

        return market_df, trade_order_df_values, ticker, trade_order_df_keys

    def calculate_benchmark_market(self, market_df, tca_request):

        logger = LoggerManager.getLogger(__name__)

        benchmark_calcs = tca_request.benchmark_calcs
        valid_market = self._check_valid_market(market_df)

        # Calculations on market data only
        if valid_market:
            for b in benchmark_calcs:

                # For benchmarks which only modify market data (and don't need trade specific information)
                if isinstance(b, BenchmarkMarket):
                    logger.debug("Calculating " + type(b).__name__ +
                                 " for market data")

                    market_df = b.calculate_benchmark(market_df=market_df)

        return market_df

    def _check_valid_market(self, market_df):
        if market_df is not None:
            if not (market_df.empty):
                return True

        return False

    def _fill_reporting_spot(self, ticker, trade_df, start_date, finish_date,
                             tca_request):
        logger = LoggerManager.getLogger(__name__)

        market_request = MarketRequest(
            start_date=start_date,
            finish_date=finish_date,
            ticker=ticker,
            data_store=tca_request.market_data_store,
            data_offset_ms=tca_request.market_data_offset_ms,
            use_multithreading=tca_request.use_multithreading,
            market_data_database_table=tca_request.market_data_database_table,
            multithreading_params=tca_request.multithreading_params)

        market_conversion_df = self.get_market_data(market_request)

        # Make sure the trades/orders are within the market data (for the purposes of the reporting spot)
        # we don't need to consider the length of the order, JUST the starting point
        trade_df = self.strip_trade_order_data_to_market(
            trade_df, market_conversion_df, consider_order_length=False)

        reporting_spot = None

        # need to check whether we actually have any trade data/market data
        if trade_df is not None and market_conversion_df is not None:
            if not (trade_df.empty) and not (market_conversion_df.empty):

                try:
                    reporting_spot = \
                        self._time_series_ops.vlookup_style_data_frame(trade_df.index, market_conversion_df, 'mid')[
                            0]

                except:
                    logger.error(
                        "Reporting spot is missing for this trade data sample!"
                    )

                if reporting_spot is None:
                    market_start_finish = "No market data in this sample. "

                    if market_conversion_df is not None:
                        market_start_finish = "Market data is between " + str(
                            market_conversion_df.index[0]) + " - " + str(
                                market_conversion_df.index[-1]) + ". "

                    logger.warning(market_start_finish)
                    logger.warning("Trade data is between " +
                                   str(trade_df.index[0]) + " - " +
                                   str(trade_df.index[-1]) + ".")

                    logger.warning(
                        "Couldn't get spot data to convert notionals currency. Hence not returning trading data."
                    )

        return reporting_spot, trade_df

    def _invert_quoting_market(self, market_df):
        """Inverts the quote data for an FX pair (eg. converts USD/GBP to GBP/USD) by calculating the reciprical. Also
        swaps around the bid/ask fields for consistency.

        Parameters
        ----------
        market_df : DataFrame
            Contains market data, typically quote data

        Returns
        -------
        DataFrame
        """

        if isinstance(market_df, pd.Series):
            market_df = pd.DataFrame(market_df)

        if 'mid' in market_df.columns:
            market_df['mid'] = 1.0 / market_df['mid'].values

        # Need to swap around bid/ask when inverting market data!
        if 'bid' in market_df.columns and 'ask' in market_df.columns:

            market_df['bid'] = 1.0 / market_df['ask'].values
            market_df['ask'] = 1.0 / market_df['bid'].values

        return market_df

    def _get_correct_convention_market_data(self,
                                            market_request,
                                            start_date=None,
                                            finish_date=None):
        """Gets market data for a ticker, when it is in the correct market convention. Otherwise throws an exception.

        Parameters
        ----------
        market_request : MarketRequest
            Parameters for the market data.

        Returns
        -------
        DataFrame
        """

        # Check that cross is in correct convention
        if self._fx_conv.correct_notation(
                market_request.ticker) != market_request.ticker:
            raise Exception(
                'Method expecting only crosses in correct market convention')

        if start_date is None and finish_date is None:
            start_date = market_request.start_date
            finish_date = market_request.finish_date

        return self._get_underlying_market_data(start_date, finish_date,
                                                market_request)

    def _get_underlying_market_data(self, start_date, finish_date,
                                    market_request):
        # Create request for market data
        market_request = MarketRequest(
            start_date=start_date,
            finish_date=finish_date,
            ticker=market_request.ticker,
            data_store=market_request.data_store,
            data_offset_ms=market_request.data_offset_ms,
            market_data_database_table=market_request.
            market_data_database_table)

        # Fetch market data in that ticker (will be tick data)
        market_df = self._data_factory.fetch_table(data_request=market_request)

        # TODO do further filtering of market and trade data as necessary
        if constants.resample_ms is not None:
            market_df = self._time_series_ops.resample_time_series(
                market_df, resample_ms=constants.resample_ms)

            market_df.dropna(inplace=True)

        ## TODO drop stale quotes for market data and add last update time?

        # Calculate mid market rate, if it doesn't exist
        if market_df is not None:
            if not (market_df.empty):
                market_df = self._benchmark_mid.calculate_benchmark(
                    market_df=market_df)

        return market_df

    def trim_sort_market_trade_order(self, market_trade_order_tuple,
                                     start_date, finish_date, ticker):
        """Takes market and trade/order data, then trims it so that the trade/order data is entirely within the
        start/finish date range of market data. If trade/order data does not fully overlap with the market data
        it can cause problems later when computing metrics/benchmarks.

        Parameters
        ----------
        market_trade_order_tuple : tuple
            Tuple of market data with trade/order data

        start_date : datetime
            Start date of TCA analysis

        finish_date : datetime
            Finish data of TCA analysis

        ticker : str
            Ticker

        Returns
        -------
        DataFrame, DataFrame (dict)
        """
        logger = LoggerManager.getLogger(__name__)

        market_df, trade_order_holder = self._convert_tuple_to_market_trade(
            market_trade_order_tuple)
        logger.debug("Filter the market date by start/finish date")

        # Check market data and trade data is not empty!
        market_df = self._time_series_ops.filter_start_finish_dataframe(
            market_df, start_date, finish_date)

        # When reassembling the market data, give user option of sorting it, in case the order of loading was in an odd order
        if market_df is not None and constants.re_sort_market_data_when_assembling:
            if not (market_df.empty):
                logger.debug("Filtered by start/finish date now sorting")

                market_df = market_df.sort_index()

        # Check if there's any market data? if we have none at all, then can't do any TCA, so warn user...
        if market_df is None or len(market_df.index) == 0:
            err_msg = "No market data between selected dates for " + ticker + " between " + str(start_date) + " - " \
                      + str(finish_date)

            logger.warning(err_msg)

            # raise DataMissingException(err_msg)

        logger.debug("Combine trade/order data")

        # Combine all the trades in a single dataframe (and also the same for orders)
        # which are placed into a single dict
        trade_order_df_dict = trade_order_holder.get_combined_dataframe_dict()

        # Make sure the trade data is totally within the market data (if trade data is outside market data, then
        # can't calculate any metrics later)
        for k in self._util_func.dict_key_list(trade_order_df_dict.keys()):
            trade_order_df_dict[k] = self.strip_trade_order_data_to_market(
                trade_order_df_dict[k], market_df)

        # Note, can sometimes get empty results when doing in parallel (eg. split up into days, and don't
        # get for a particular day, so don't raise an exception)
        if not (trade_order_holder.check_empty_combined_dataframe_dict(
                trade_order_df_dict)):
            err_msg = "No trade/order data between selected dates for " + ticker + " between " + str(start_date) + " - " \
                      + str(finish_date)

            logger.warning(err_msg)

            # raise DataMissingException(err_msg)

        return market_df, trade_order_df_dict

    def strip_trade_order_data_to_market(self,
                                         trade_order_df,
                                         market_df,
                                         consider_order_length=True):
        """Strips down the trade/order data so that it is within the market data provided. Hence, trade/order data
        will fully overlap with the market data.

        Parameters
        ----------
        trade_order_df : DataFrame
            Trade/order data from the client

        market_df : DataFrame
            Market data

        consider_order_length : bool (default: True)
            Should we consider the length of the order, when we consider the overlap?

        Returns
        -------
        DataFrame
        """

        if market_df is not None and trade_order_df is not None:
            if not (market_df.empty) and not (trade_order_df.empty):

                add_cond = True

                # For orders (ensure that the start/end time of every order is within the market data start/finish dates)
                # this is important, given that we often want to calculate benchmarks over orders from market data
                if consider_order_length:

                    if 'benchmark_date_start' in trade_order_df.columns and 'benchmark_date_end' in trade_order_df.columns \
                            and trade_order_df is not None:

                        add_cond = (trade_order_df['benchmark_date_start'] >=
                                    market_df.index[0]) & (
                                        trade_order_df['benchmark_date_end'] <=
                                        market_df.index[-1])

                # For trades (ensure that every trade is within the market data start/finish dates)
                trade_order_df = trade_order_df.loc[
                    (trade_order_df.index >= market_df.index[0])
                    & (trade_order_df.index <= market_df.index[-1]) & add_cond]

        return trade_order_df

    def _strip_start_finish_dataframe(self, data_frame, start_date,
                                      finish_date, tca_request):
        """Strips down the data frame to the dates which have been requested in the initial TCA request

        Parameters
        ----------
        data_frame : DataFrame
            Data to be stripped down

        start_date : datetime
            Start date of the computation

        finish_date : datetime
            Finish date of the computation

        tca_request : TCARequest
            Parameters for the TCA request

        Returns
        -------
        DataFrame
        """

        # print(data_frame)

        if start_date != tca_request.start_date:
            if data_frame is not None:
                if not (data_frame.empty):
                    data_frame = data_frame.loc[
                        data_frame.index >= tca_request.start_date]

        if finish_date != tca_request.finish_date:
            if data_frame is not None:
                if not (data_frame.empty):
                    data_frame = data_frame.loc[
                        data_frame.index <= tca_request.finish_date]

        return data_frame

    def _check_is_empty_trade_order(self, trade_df, tca_request, start_date,
                                    finish_date, trade_order_type):

        logger = LoggerManager.getLogger(__name__)

        if trade_df is None:
            logger.warning("Missing trade data for " + tca_request.ticker +
                           " between " + str(start_date) + " - " +
                           str(finish_date) + " in " + trade_order_type)

            return True

        elif trade_df.empty:
            logger.warning("Missing trade data for " + tca_request.ticker +
                           " between " + str(start_date) + " - " +
                           str(finish_date) + " in " + trade_order_type)

            return True

        return False

    @abc.abstractmethod
    def _calculate_additional_metrics(self, market_df, trade_order_df_dict,
                                      tca_request):
        pass

    @abc.abstractmethod
    def _convert_tuple_to_market_trade(self, market_trade_order_tuple):
        pass

    @abc.abstractmethod
    def get_tca_version(self):
        pass
Beispiel #30
0
    def _fetch_market_data(self,
                           start,
                           finish,
                           ticker,
                           write_to_disk=True,
                           read_cached_from_disk=True,
                           web_proxies=constants.web_proxies):
        logger = LoggerManager.getLogger(__name__)

        key = (str(start) + str(finish) + ticker + '_' +
               self._get_postfix()).replace(":", '_')

        filename = os.path.join(self.temp_data_folder, key) + '.' + fileformat
        util_func = UtilFunc()

        start_time_stamp = pd.Timestamp(start)
        finish_time_stamp = pd.Timestamp(finish)

        if self._remove_saturday():
            weekend_data = "Saturday? " + key

            # Ignore Saturday, and don't attempt to download
            if start_time_stamp.dayofweek == 5 or finish_time_stamp.dayofweek == 5:
                return None, weekend_data

        if self._remove_weekend_points():
            weekend_data = "Weekend? " + key

            if start_time_stamp.dayofweek == 6 and start_time_stamp.hour < 20:
                return None, weekend_data

            if start_time_stamp.dayofweek == 4 and start_time_stamp.hour > 22:
                return None, weekend_data

        df = None

        if read_cached_from_disk:
            if os.path.exists(filename):
                df = util_func.read_dataframe_from_binary(filename,
                                                          format=binary_format)

                if df is not None:
                    logger.debug("Read " + filename + " from disk")

        if df is None:
            # Convert tcapy ticker into vendor ticker
            df = self._get_input_data_source().fetch_market_data(
                start,
                finish,
                ticker=self._get_tickers_vendor()[ticker],
                web_proxies=web_proxies)

            if df is not None:
                df = df.drop('ticker', axis=1)

                if write_to_disk:
                    # Write a small temporary dataframe to disk (if the process fails later, these can be picked up,
                    # without having a call the external vendor again
                    util_func.write_dataframe_to_binary(df,
                                                        filename,
                                                        format=binary_format)

        msg = None

        if df is None:
            msg = "No data? " + key

        return df, msg