예제 #1
0
    def _join_market_downsampled_trade_orders(self,
                                              market_downsampled_df,
                                              trade_order_df,
                                              fields=None):
        """Combine market data with trade/orders, into a sparse DataFrame. Typically, used when preparing to display
        a mixture of market/trades data together.

        Parameters
        ----------
        market_downsampled_df : DataFrame
            Market data which has been downsampled

        trade_order_df : DataFrame
            Trade/order data to be combined

        fields : str (list)
            Fields to keep

        Returns
        -------
        DataFrame
        """

        logger = LoggerManager.getLogger(__name__)

        if fields is not None:
            trade_order_df = self._time_series_ops.filter_time_series_by_matching_columns(
                trade_order_df, fields)

        logger.debug('About to join')

        sparse_market_trade_df = market_downsampled_df.join(trade_order_df,
                                                            how='outer')

        # Add buy/sell trade prices in new columns (easier for plotting later)
        if 'executed_price' not in sparse_market_trade_df.columns:
            print('x')

        executed_price = sparse_market_trade_df['executed_price'].values
        side_to_match = sparse_market_trade_df['side'].values

        sparse_market_trade_df['buy_trade'] \
            = self._time_series_ops.nanify_array_based_on_other(side_to_match, -1, executed_price)  # make sells NaN (NOT buys!)
        sparse_market_trade_df['sell_trade'] \
            = self._time_series_ops.nanify_array_based_on_other(side_to_match, 1, executed_price)   # make buys NaN (NOT sells!)

        logger.debug('Finished joining')

        return sparse_market_trade_df
예제 #2
0
    def _fetch_market_data(self, start, finish, ticker, write_to_disk=True, read_cached_from_disk=True, web_proxies=constants.web_proxies):
        logger = LoggerManager.getLogger(__name__)

        key = (str(start) + str(finish) + ticker + '_' + self._get_postfix()).replace(":", '_')

        filename = os.path.join(self.temp_data_folder, key) + '.' + fileformat
        util_func = UtilFunc()

        start_time_stamp = pd.Timestamp(start)
        finish_time_stamp = pd.Timestamp(finish)

        if self._remove_weekend_points():
            weekend_data = "Weekend? " + key

            weekday_point = UtilFunc().is_weekday_point(start_time_stamp, finish_time_stamp,
                                                        friday_close_nyc_hour=constants.friday_close_utc_hour,
                                                        sunday_open_utc_hour=constants.sunday_open_utc_hour)

            if not(weekday_point):
                return None, weekend_data

        df = None

        if read_cached_from_disk:
            if os.path.exists(filename):
                df = util_func.read_dataframe_from_binary(filename, format=binary_format)

                if df is not None:
                    logger.debug("Read " + filename + " from disk")

        if df is None:
            # Convert tcapy ticker into vendor ticker
            df = self._get_input_data_source().fetch_market_data(start, finish,
                                                                 ticker=self._get_tickers_vendor()[ticker], web_proxies=web_proxies)

            if df is not None:

                if write_to_disk:
                    # Write a small temporary dataframe to disk (if the process fails later, these can be picked up,
                    # without having a call the external vendor again
                    util_func.write_dataframe_to_binary(df, filename, format=binary_format)

        msg = None

        if df is None:
            msg = "No data? " + key

        return df, msg
def combine_resampled_spot_data_into_single_dataframe_usd_base(
        resample_freq='1min', data_vendor='dukascopy'):
    df_list = []

    logger = LoggerManager.getLogger(__name__)

    for ticker in ticker_combined_mkt:
        logger.info("Reading " + ticker + " resample freq " + resample_freq +
                    " data vendor " + data_vendor)

        df = pd.read_parquet(csv_output + ticker + '_' + resample_freq + '_' +
                             data_vendor + '.' + file_extension)

        base = ticker[0:3]
        terms = ticker[3:6]

        if terms == 'USD':
            df_invert = pd.DataFrame(index=df.index)
            df_invert[terms + base + '.close'] = 1.0 / df[ticker + '.close']
            df_invert[terms + base + '.open'] = 1.0 / df[ticker + '.open']

            # Invert high and low!
            df_invert[terms + base + '.high'] = 1.0 / df[ticker + '.low']
            df_invert[terms + base + '.low'] = 1.0 / df[ticker + '.high']

            df_invert[terms + base + '.close'] = 1.0 / df[ticker + '.close']

            df_invert[terms + base + '.tickcount'] = df[ticker + '.tickcount']

            df = df_invert

        df_list.append(df)

    logger.info("Combining all tickers with resample freq " + resample_freq +
                " data vendor " + data_vendor)
    df = pd.DataFrame(index=df.index)

    df['USDUSD.close'] = 1.0

    df_list.append(df)
    df = calculations.pandas_outer_join(df_list)
    df = df.dropna()

    combined_file = 'fx_' + resample_freq + '_' + data_vendor + '.' + file_extension

    df.to_parquet(csv_output + combined_file)
예제 #4
0
    def get(self, key, burn_after_reading=False):
        """Gets the object(s) associated with the key(s) or CacheHandle(s)

        Parameters
        ----------
        key : str or CacheHandle (list)
            Key(s) to be fetched

        burn_after_reading : bool (default: False)
            Should the key be erased after reading?

        Returns
        -------
        object
        """
        logger = LoggerManager.getLogger(__name__)

        key = copy.copy(key)

        single = False

        if not (isinstance(key, list)):
            key = [key]

            single = True

        for i in range(0, len(key)):
            if isinstance(key[i], CacheHandle):
                key[i] = key[i].handle_name

        obj = None

        try:
            obj = self._get(key, burn_after_reading=burn_after_reading)
        except Exception as e:
            logger.warning("Couldn't retrieve " + str(key) + " from cache: " +
                           str(e))

        if ('market_df' in key):
            print("market_df")

        if single and obj is not None:
            return obj[0]

        return obj
예제 #5
0
    def get_trade_order_holder(self, tca_request):
        logger = LoggerManager.getLogger(__name__)

        logger.debug(
            "Get trade order holder for " + str(tca_request.ticker) + " from " + str(tca_request.start_date)
            + " - " + str(tca_request.finish_date))

        # Get all the trade/orders which have been requested, eg. trade_df and order_df
        # do separate calls given they are assumed to be stored in different database tables
        trade_order_holder = DataFrameHolder()

        if tca_request.trade_order_mapping is not None:
            for trade_order_type in tca_request.trade_order_mapping:
                trade_order_df = self.get_trade_order_data(tca_request, trade_order_type)

                trade_order_holder.add_dataframe(trade_order_df, trade_order_type)

        return trade_order_holder
예제 #6
0
    def _fill_reporting_spot(self, ticker, trade_df, start_date, finish_date, tca_request):
        logger = LoggerManager.getLogger(__name__)

        market_request = MarketRequest(start_date=start_date, finish_date=finish_date,
                                       ticker=ticker, data_store=tca_request.market_data_store,
                                       data_offset_ms=tca_request.market_data_offset_ms,
                                       use_multithreading=tca_request.use_multithreading,
                                       multithreading_params=tca_request.multithreading_params)

        market_conversion_df = self.get_market_data(market_request)

        # Make sure the trades/orders are within the market data (for the purposes of the reporting spot)
        # we don't need to consider the length of the order, JUST the starting point
        trade_df = self.strip_trade_order_data_to_market(trade_df, market_conversion_df, consider_order_length=False)

        reporting_spot = None

        # need to check whether we actually have any trade data/market data
        if trade_df is not None and market_conversion_df is not None:
            if not (trade_df.empty) and not (market_conversion_df.empty):

                try:
                    reporting_spot = \
                        self._time_series_ops.vlookup_style_data_frame(trade_df.index, market_conversion_df, 'mid')[
                            0]

                except:
                    logger.error("Reporting spot is missing for this trade data sample!")

                if reporting_spot is None:
                    market_start_finish = "No market data in this sample. "

                    if market_conversion_df is not None:
                        market_start_finish = "Market data is between " + str(
                            market_conversion_df.index[0]) + " - " + str(market_conversion_df.index[-1]) + ". "

                    logger.warn(market_start_finish)
                    logger.warn("Trade data is between " + str(trade_df.index[0]) + " - " + str(
                        trade_df.index[-1]) + ".")

                    logger.warn(
                        "Couldn't get spot data to convert notionals currency. Hence not returning trading data.")

        return reporting_spot, trade_df
예제 #7
0
    def calculate_benchmark_market(self, market_df, tca_request):

        logger = LoggerManager.getLogger(__name__)

        benchmark_calcs = tca_request.benchmark_calcs
        valid_market = self._check_valid_market(market_df)

        # Calculations on market data only
        if valid_market:
            for b in benchmark_calcs:

                # For benchmarks which only modify market data (and don't need trade specific information)
                if isinstance(b, BenchmarkMarket):
                    logger.debug("Calculating " + type(b).__name__ +
                                 " for market data")

                    market_df = b.calculate_benchmark(market_df=market_df)

        return market_df
예제 #8
0
    def _download(self, md_request, folder_prefix):
        from findatapy.market import MarketDataRequest, MarketDataGenerator, Market

        logger = LoggerManager.getLogger(__name__)
        market = Market(market_data_generator=MarketDataGenerator())

        ticker = md_request.ticker[0]
        df = market.fetch_market(md_request=md_request)

        df.columns = ['bid', 'ask', 'bidv', 'askv']

        df['venue'] = 'dukascopy'
        df['ticker'] = ticker

        df['mid'] = (df['bid'].values + df['ask'].values) / 2.0

        self.dump_hdf5_file(df, folder_prefix + "_" + ticker + ".h5")

        logger.info('Dumped to ' + folder_prefix + "_" + ticker + ".h5")
예제 #9
0
    def _chunk_dataframes(self, obj):
        logger = LoggerManager.getLogger(__name__)

        # Can sometime have very large dataframes, which need to be split, otherwise won't fit in a single Redis key
        mem = obj.memory_usage(deep='deep').sum()
        mem_float = round(float(mem) / (1024.0 * 1024.0), 3)
        mem = '----------- ' + str(mem_float) + ' MB -----------'

        chunks = int(math.ceil(mem_float / constants.volatile_cache_max_cache_chunk_size_mb))

        if chunks > 1:
            obj_list = self._time_series_ops.split_array_chunks(obj, chunks=chunks)
        else:

            obj_list = [obj]

        if obj_list != []:
            logger.debug("Pandas dataframe of size: " + mem + " in " + str(chunks) + " chunk(s)")

        return obj_list
예제 #10
0
    def _check_is_empty_trade_order(self, trade_df, tca_request, start_date,
                                    finish_date, trade_order_type):

        logger = LoggerManager.getLogger(__name__)

        if trade_df is None:
            logger.warning("Missing trade data for " + tca_request.ticker +
                           " between " + str(start_date) + " - " +
                           str(finish_date) + " in " + trade_order_type)

            return True

        elif trade_df.empty:
            logger.warning("Missing trade data for " + tca_request.ticker +
                           " between " + str(start_date) + " - " +
                           str(finish_date) + " in " + trade_order_type)

            return True

        return False
예제 #11
0
    def load_market_calculate_summarize_metrics(self,
                                                tca_request,
                                                dummy_market=False):
        """Splits up the TCA request into individual tickers. Market/trade data is loaded for each ticker, before
        conducting TCA (ie. calculating metrics, benchmarks etc.). Returns a dictionary consisting of market data and
        another dictionary of trade/order data (and any additional results associated with the TCA)

        Parameters
        ----------
        tca_request : TCARequest
            Parameters defining the TCA calculation

        dummy_market : bool, default False
            Do we return market data for future use?

        Returns
        -------
        DataFrame (dict), DataFrame (dict)
        """

        # Load market/trade data and compute metrics/benchmarks etc. per ticker
        market_df_dict, trade_order_results_df_dict, tca_request_list = \
            self.get_market_trade_metrics(tca_request, dummy_market=dummy_market)

        # If every ticker we have selected doesn't have trades (and are analysis also requires trades), can't do any TCA at all
        if len(trade_order_results_df_dict) == 0 and tca_request.trade_data_store is not None \
                and tca_request.trade_order_mapping is None:
            logger = LoggerManager.getLogger(__name__)

            err_msg = "no trade data for specified ticker(s) and time range"

            logger.error(err_msg)

            raise DataMissingException(err_msg)

        # trade_df = trade_order_results_df_dict['trade_df']
        # Now summarize those metrics across all the tickers, for easier display
        return self.summarize_metrics(market_df_dict,
                                      trade_order_results_df_dict,
                                      tca_request_list,
                                      dummy_market=dummy_market)
예제 #12
0
    def _write_df_to_db_single_thread(self,
                                      ticker,
                                      remove_duplicates=True,
                                      if_exists_table='append',
                                      if_exists_ticker='replace'):

        logger = LoggerManager.getLogger(__name__)

        postfix = '-' + self._get_postfix() + '-with-duplicates'

        if remove_duplicates:
            postfix = '-' + self._get_postfix() + '-no-duplicates'

        filename = os.path.join(self.temp_large_data_folder,
                                ticker + postfix) + '.' + fileformat

        logger.info("Reading " + filename)

        util_func = UtilFunc()
        time_series_ops = TimeSeriesOps()
        data_source_local = self._get_output_data_source()

        df = util_func.read_dataframe_from_binary(filename,
                                                  format=binary_format)

        if df is not None:
            df = time_series_ops.localize_as_UTC(df)

            data_source_local.append_market_data(
                df,
                ticker,
                if_exists_table=if_exists_table,
                if_exists_ticker=if_exists_ticker)
        else:
            logger.warn("Couldn't write dataframe for " + ticker +
                        " to database, appears it is empty!")
예제 #13
0
    def get_market_trade_order_holder(self, tca_request):
        """Gets the both the market data and trade/order data associated with a TCA calculation as a tuple of
        (DataFrame, DataFrameHolder)

        Parameters
        ----------
        tca_request : TCARequest
            Parameters for a TCA calculation

        Returns
        -------
        DataFrame, DataFrameHolder
        """

        logger = LoggerManager.getLogger(__name__)

        logger.debug(
            "Get market and trade/order data for " + str(tca_request.ticker) + " from " + str(tca_request.start_date)
            + " - " + str(tca_request.finish_date))

        # Get all the trade/orders which have been requested, eg. trade_df and order_df
        # do separate calls given they are assumed to be stored in different database tables
        return self.get_market_data(tca_request), \
               self.get_trade_order_holder(tca_request)
예제 #14
0
    def aggregate_tables(self,
                         df_dict={},
                         tables_dict={},
                         round_figures_by=None,
                         scalar=None):
        logger = LoggerManager.getLogger(__name__)

        if tables_dict == {}: tables_dict = self._tables_dict
        if round_figures_by is None: round_figures_by = self._round_figures_by
        if scalar is None: scalar = self._scalar

        joined_results = []

        table_name = tables_dict['table_name']
        table_list = tables_dict['table_list']

        column_list = None
        replace_text = None

        if 'column_list' in tables_dict.keys():
            column_list = tables_dict['column_list']

        if 'replace_text' in tables_dict.keys():
            replace_text = tables_dict['replace_text']

        agg_results = []

        for i in range(0, len(table_list)):
            table = table_list[i]

            # If the table in the output
            if table in df_dict.keys():
                df = df_dict[table].copy()

                if column_list is not None and column_list != []:
                    df.columns = [x + ' ' + column_list[i] for x in df.columns]

                df = self._util_func.replace_text_in_cols(df, replace_text)

                # Round/multiply elements in the table if requested
                if df is not None:
                    df = self._time_series_ops.multiply_scalar_dataframe(
                        df, scalar=scalar)
                    df = self._time_series_ops.round_dataframe(
                        df, round_figures_by)

                    agg_results.append(df)
            else:
                logger.warning(
                    table +
                    ' not in calculation output, are you use the dictionary entry is correct?'
                )

        # If we've collected the tables, try doing a join on all them
        # to combine them into one large table
        if agg_results != []:
            if len(agg_results) > 1:
                df_joined = self._time_series_ops.outer_join(agg_results)
            else:
                df_joined = agg_results[0]

            joined_results.append((df_joined, table_name))

        return joined_results
예제 #15
0
        def callback(*args):
            """Calculates the aggregated TCA computation when the "Calculate" button is clicked. Cached the results and
            then updates the status label when done.

            Parameters
            ----------
            ticker_val : str(list)
                tickers (eg. EURUSD, GBPUSD etc)

            venue_val : str(list)
                Trading venues

            start_date_val : str(list)
                Start date of TCA calculations

            finish_date_val : str(list)
                Finish date of TCA calculations

            reload_val : str
                Whether underlying market and trade data should be reloaded from dataframe or fetched from cache

            n_clicks : int
                Number of time button has been clicked

            Returns
            -------
            str
            """
            start = time.time()

            tag = tca_type + '-calculation-button'

            logger = LoggerManager.getLogger(__name__)
            logger.debug('Triggered click ' + tca_type)

            # old_clicks = self._session_manager.get_session_clicks(tag)

            # make sure none of the other charts are plotted till we have completed this!

            if tca_type == 'aggregated':
                uploadbox = args

                if uploadbox is not None:

                    if isinstance(uploadbox, tuple):
                        uploadbox = uploadbox[0]

                    # Assume that the user uploaded a binary CSV file
                    trade_df = DatabaseSourceCSVBinary(
                        trade_data_database_csv=uploadbox
                    ).fetch_trade_order_data()

                    data_frame_trade_order_mapping = OrderedDict([('trade_df',
                                                                   trade_df)])

                    start_date = trade_df.index[0]
                    finish_date = trade_df.index[-1]

                    ticker_val = FXConv().correct_unique_notation_list(
                        trade_df['ticker'].unique().tolist())

                    metric_val = 'slippage'

                    self._session_manager.set_session_flag('metric',
                                                           value=metric_val)
                    self._session_manager.set_session_flag(
                        'aggregated-visualization', True)

                    try:
                        #if True:

                        # clear the cache for the current user
                        self._glob_volatile_cache.clear_key_match(
                            self._session_manager.get_session_id())

                        results_form = [
                            # show the distribution of the selected metric for trades weighted by notional
                            # aggregated by ticker and then by venue
                            DistResultsForm(
                                trade_order_list=['trade_df'],
                                metric_name=metric_val,
                                aggregate_by_field=[
                                    'ticker', 'broker_id', 'venue'
                                ],
                                weighting_field=
                                'executed_notional_in_reporting_currency'),

                            # display the timeline of metrics average by day (and weighted by notional)
                            TimelineResultsForm(
                                trade_order_list=['trade_df'],
                                by_date='date',
                                metric_name=metric_val,
                                aggregation_metric='mean',
                                aggregate_by_field=['ticker'],
                                scalar=10000.0,
                                weighting_field=
                                'executed_notional_in_reporting_currency'),

                            # display a bar chart showing the average metric weighted by notional and aggregated by ticker
                            # venue
                            BarResultsForm(
                                trade_order_list=['trade_df'],
                                metric_name=metric_val,
                                aggregation_metric='mean',
                                aggregate_by_field=[
                                    'ticker', 'venue', 'broker_id'
                                ],
                                scalar=10000.0,
                                weighting_field=
                                'executed_notional_in_reporting_currency'),

                            # create a table the markout of every trade
                            TableResultsForm(
                                trade_order_list=['trade_df'],
                                metric_name='markout',
                                filter_by='all',
                                replace_text={
                                    'markout_': '',
                                    'executed_notional': 'exec not',
                                    'notional_currency': 'exec not cur'
                                },
                                keep_fields=[
                                    'executed_notional', 'side',
                                    'notional_currency'
                                ],
                                scalar={
                                    'all': 10000.0,
                                    'exclude': ['executed_notional', 'side']
                                },
                                round_figures_by={
                                    'all': 2,
                                    'executed_notional': 0,
                                    'side': 0
                                },
                                weighting_field='executed_notional')
                        ]

                        try:
                            #if True:
                            timeline_trade_df_metric_by_ticker = self.get_cached_computation_analysis(
                                key='timeline_trade_df_' + metric_val +
                                '_by_ticker',
                                tca_engine=self._tca_engine,
                                force_calculate=True,
                                tca_request=TCARequest(
                                    start_date=start_date,
                                    finish_date=finish_date,
                                    ticker=ticker_val,
                                    tca_type='aggregated',
                                    market_data_store='arctic-ncfx',
                                    trade_data_store='dataframe',
                                    trade_order_mapping=
                                    data_frame_trade_order_mapping,
                                    metric_calcs=[
                                        MetricSlippage(),
                                        MetricMarkout(
                                            trade_order_list=['trade_df'])
                                    ],
                                    results_form=results_form,
                                    dummy_market=True,
                                    use_multithreading=True))

                            calc_start = timeline_trade_df_metric_by_ticker.index[
                                0]
                            calc_end = timeline_trade_df_metric_by_ticker.index[
                                -1]

                            aggregated_title = self.create_status_msg_flags(
                                'aggregated', ticker_val, calc_start, calc_end)

                            logger.debug('Plotted aggregated summary plot!')

                            finish = time.time()

                        except Exception as e:
                            logger.exception(e)

                            return "Status: error - " + str(
                                e
                            ) + ". Check data exists for these dates?" + self.get_username_string(
                            )

                    except Exception as e:
                        logger.exception(e)

                        return 'Status: error - ' + str(
                            e
                        ) + ". Check data exists for these dates?" + self.get_username_string(
                        )

                    return 'Status: calculated ' + str(
                        round(finish - start, 3)
                    ) + "s for " + aggregated_title + self.get_username_string(
                    )

            raise dash.exceptions.PreventUpdate(
                "No data changed"
            )  # not very elegant but only way to prevent plots disappearing
예제 #16
0
    def get_cached_computation_analysis(self, **kwargs):
        """Fetches a computation outoput from a cache (typically Redis) or computes the analysis directly using another object, if
        requested. Typically, a computation is initiated and then that large analysis is cached, ready to be consumed by
        display components which repeatedly call this function.

        Parameters
        ----------
        kwargs
            Variables generated by GUI which relate to our computations (eg. start date, finish date, ticker etc.)

        Returns
        -------
        pd.DataFrame
        """

        try:
            force_calculate = kwargs['force_calculate']
        except:
            force_calculate = False

        key = None

        if 'key' in kwargs: key = kwargs['key']

        if 'test' not in kwargs:
            computation_type = self._tca_engine.get_engine_description()
            session_id = self._session_manager.get_session_id() + "_expiry_"
            session_id_computation = session_id + '' + computation_type + '_'
        else:
            computation_type = ''
            session_id = ''
            session_id_computation = ''

        # Try to fetch some TCA analysis output from the cache
        cached_list = self._fetch_cached_list(
            force_calculate=force_calculate,
            computation_type=computation_type,
            session_id=session_id,
            key=key)

        # Otherwise force the calculation (or if doesn't exist in the cache!)
        # when a button is pressed, typically force calculate will be set to True
        if force_calculate:

            computation_request = self.create_computation_request(**kwargs)

            # Delete any existing keys for the current session
            self._glob_volatile_cache.clear_key_match("*" + session_id + "*")

            dict_of_df = self.run_computation_request(computation_request)

            dict_key_list = []
            dict_element_list = []

            # Cache all the dataframes in Redis/or other memory space (will likely need for later calls!)
            # from security perspective probably better not to cache the TCAEngine objects on a database (which can execute code)
            for dict_key in dict_of_df.keys():

                # check if we have all the keys filled (will be missing if for example there are no trades)
                if dict_key not in dict_of_df:
                    raise Exception('Missing ' + dict_key)

                dict_key_list.append(session_id_computation + dict_key)
                dict_element_list.append(dict_of_df[dict_key])

            self._session_manager.set_session_flag('user_df', dict_key_list)

            # self._glob_volatile_cache.put(session_id_computation + dict_key, dict_of_df[dict_key])

            # Put it back into Redis cache (to be fetched by Dash callbacks)
            self._glob_volatile_cache.put(dict_key_list, dict_element_list)

            logger = LoggerManager.getLogger(__name__)
            logger.debug('Generated tables: ' +
                         str(self._util_func.dict_key_list(dict_of_df.keys())))

            if key is None:
                return None

            if not (isinstance(key, list)):
                key = [key]

            for k in key:
                # Has one of the dataframes we want, just been calculated, if so return it!
                if k in dict_of_df.keys():
                    cached_list.append(dict_of_df[k])

                # Otherwise look in Redis for the table for the user
                else:
                    # as last resort get from our global, this key is unique to each user
                    cached_list.append(
                        self._glob_volatile_cache.get(session_id_computation +
                                                      k))

        # return as tuples
        tup = list(cached_list)

        if len(tup) == 1:
            return tup[0]
        else:
            return tup
예제 #17
0
    def read_dataframe_from_binary(self, fname, format=constants.binary_default_dump_format):
        """Reads a DataFrame which is in HDF5/Parquet file format which was previously written by tcapy

        Parameters
        ----------
        fname : str
            Path of binary file

        format : str (default: 'parquet')
            What is the binary format? ('parquet' or 'hdf5' are supported)

        Returns
        -------
        pd.DataFrame
        """
        logger = LoggerManager.getLogger(__name__)

        # parquet is default choice in tcapy
        if format == 'parquet':
            data_frame = None

            try:
                if not (os.path.exists(fname)):
                    logger.error("Path doesn't exist for " + fname)

                    return data_frame

                return pd.read_parquet(fname, engine=constants.parquet_engine)
            except Exception as e:
                logger.error("No valid data for " + fname + ': ' + str(e))

                return data_frame

        elif format == 'hdf5':
            # Needs pytables tables
            data_frame = None
            store = None

            try:
                if not (os.path.exists(fname)):
                    logger.error("Path doesn't exist for " + fname)

                    return data_frame

                store = pd.HDFStore(fname)
                data_frame = store.select("data")
            except Exception as e:
                logger.error("No valid data for " + fname + ': ' + str(e))

                return data_frame

            finally:
                try:
                    if store is not None:
                        store.close()
                except:
                    pass

            return data_frame
        else:
            logger.warning("Cannot read file " + fname + ", invalid format specified")

            return None
예제 #18
0
    def calculate_benchmark(self,
                            trade_order_df=None,
                            market_df=None,
                            trade_order_name=None,
                            bid_benchmark=None,
                            ask_benchmark=None,
                            benchmark_date_start_field=None,
                            benchmark_date_end_field=None):
        if not (self._check_calculate_benchmark(
                trade_order_name=trade_order_name)):
            return trade_order_df, market_df

        # for the specified field (usually 'mid' field) calculate the time weighted average price, which is the simple
        # average
        if bid_benchmark is None: bid_benchmark = self._bid_benchmark
        if ask_benchmark is None: ask_benchmark = self._ask_benchmark
        if benchmark_date_start_field is None:
            benchmark_date_start_field = self._benchmark_date_start_field
        if benchmark_date_end_field is None:
            benchmark_date_end_field = self._benchmark_date_end_field

        if bid_benchmark in market_df.columns and ask_benchmark in market_df:
            trade_order_df[self._benchmark_name] = np.nan

            date_start = trade_order_df[benchmark_date_start_field].values
            date_end = trade_order_df[benchmark_date_end_field].values

            date_start = np.searchsorted(market_df.index, date_start)
            date_end = np.searchsorted(market_df.index, date_end)
            bid_price = market_df[bid_benchmark].values
            ask_price = market_df[ask_benchmark].values
            dt = market_df.index.to_series(
                keep_tz=False).diff().values / np.timedelta64(1, 's')
            dt[0] = 0  # first point should be weighted zero (since don't know how long it's been there)

            twap = []

            for i in range(0, len(trade_order_df.index)):

                if trade_order_df['side'][i] == 1:
                    price = ask_price
                elif trade_order_df['side'][i] == -1:
                    price = bid_price

                try:
                    if date_start[i] == date_end[i]:
                        twap.append(price[date_start[i]])
                    else:
                        twap_val = np.average(
                            price[date_start[i]:date_end[i]],
                            weights=dt[date_start[i]:date_end[i]])

                        twap.append(twap_val)
                except Exception as e:
                    err_msg = "TWAP cannot be calculated, given market data does not fully overlap with trade data: " \
                              + str(e)

                    LoggerManager.getLogger(__name__).error(err_msg)

                    raise TradeMarketNonOverlapException(err_msg)

            trade_order_df[self._benchmark_name] = twap
        else:
            LoggerManager.getLogger(
                __name__).warn(bid_benchmark + " and " + ask_benchmark +
                               " may not be in market data.")

        return trade_order_df, market_df
예제 #19
0
    def calculate_benchmark(self,
                            trade_order_df=None,
                            market_df=None,
                            trade_order_name=None,
                            bid_benchmark=None,
                            ask_benchmark=None,
                            volume_field=None,
                            benchmark_date_start_field=None,
                            benchmark_date_end_field=None):

        if not (self._check_calculate_benchmark(
                trade_order_name=trade_order_name)):
            return trade_order_df, market_df

        # if fields have not been specified, then take them from the field variables
        if bid_benchmark is None: bid_benchmark = self._bid_benchmark
        if ask_benchmark is None: ask_benchmark = self._ask_benchmark
        if volume_field is None: volume_field = self._volume_field
        if benchmark_date_start_field is None:
            benchmark_date_start_field = self._benchmark_date_start_field
        if benchmark_date_end_field is None:
            benchmark_date_end_field = self._benchmark_date_end_field

        if bid_benchmark in market_df.columns and ask_benchmark in market_df.columns and volume_field in market_df.columns:
            trade_order_df[self._benchmark_name] = np.nan

            date_start = trade_order_df[benchmark_date_start_field].values
            date_end = trade_order_df[benchmark_date_end_field].values

            date_start = np.searchsorted(market_df.index, date_start)
            date_end = np.searchsorted(market_df.index, date_end)
            bid_price = market_df[bid_benchmark].values
            ask_price = market_df[ask_benchmark].values
            volume = market_df[volume_field].values

            vwap = []

            for i in range(0, len(trade_order_df.index)):
                if trade_order_df['side'][i] == 1:
                    price = ask_price
                elif trade_order_df['side'][i] == -1:
                    price = bid_price

                if date_start[i] == date_end[i]:
                    vwap.append(price[date_start[i]])
                else:
                    try:
                        vwap.append(
                            np.average(
                                price[date_start[i]:date_end[i]],
                                weights=volume[date_start[i]:date_end[i]]))
                    except Exception as e:
                        err_msg = "VWAP cannot be calculated, given market data does not fully overlap with trade data: " \
                                  + str(e)

                        LoggerManager.getLogger(__name__).error(err_msg)

                        raise TradeMarketNonOverlapException(err_msg)

            trade_order_df[self._benchmark_name] = vwap
        else:
            LoggerManager.getLogger(
                __name__).warn(bid_benchmark + ", " + ask_benchmark + " " +
                               volume_field + " may not be in market data")

        return trade_order_df, market_df
예제 #20
0
    def trim_sort_market_trade_order(self, market_trade_order_tuple,
                                     start_date, finish_date, ticker):
        """Takes market and trade/order data, then trims it so that the trade/order data is entirely within the
        start/finish date range of market data. If trade/order data does not fully overlap with the market data
        it can cause problems later when computing metrics/benchmarks.

        Parameters
        ----------
        market_trade_order_tuple : tuple
            Tuple of market data with trade/order data

        start_date : datetime
            Start date of TCA analysis

        finish_date : datetime
            Finish data of TCA analysis

        ticker : str
            Ticker

        Returns
        -------
        DataFrame, DataFrame (dict)
        """
        logger = LoggerManager.getLogger(__name__)

        market_df, trade_order_holder = self._convert_tuple_to_market_trade(
            market_trade_order_tuple)
        logger.debug("Filter the market date by start/finish date")

        # Check market data and trade data is not empty!
        market_df = self._time_series_ops.filter_start_finish_dataframe(
            market_df, start_date, finish_date)

        # When reassembling the market data, give user option of sorting it, in case the order of loading was in an odd order
        if market_df is not None and constants.re_sort_market_data_when_assembling:
            if not (market_df.empty):
                logger.debug("Filtered by start/finish date now sorting")

                market_df = market_df.sort_index()

        # Check if there's any market data? if we have none at all, then can't do any TCA, so warn user...
        if market_df is None or len(market_df.index) == 0:
            err_msg = "No market data between selected dates for " + ticker + " between " + str(start_date) + " - " \
                      + str(finish_date)

            logger.warning(err_msg)

            # raise DataMissingException(err_msg)

        logger.debug("Combine trade/order data")

        # Combine all the trades in a single dataframe (and also the same for orders)
        # which are placed into a single dict
        trade_order_df_dict = trade_order_holder.get_combined_dataframe_dict()

        # Make sure the trade data is totally within the market data (if trade data is outside market data, then
        # can't calculate any metrics later)
        for k in self._util_func.dict_key_list(trade_order_df_dict.keys()):
            trade_order_df_dict[k] = self.strip_trade_order_data_to_market(
                trade_order_df_dict[k], market_df)

        # Note, can sometimes get empty results when doing in parallel (eg. split up into days, and don't
        # get for a particular day, so don't raise an exception)
        if not (trade_order_holder.check_empty_combined_dataframe_dict(
                trade_order_df_dict)):
            err_msg = "No trade/order data between selected dates for " + ticker + " between " + str(start_date) + " - " \
                      + str(finish_date)

            logger.warning(err_msg)

            # raise DataMissingException(err_msg)

        return market_df, trade_order_df_dict
예제 #21
0
    def get_market_data(self, market_request):
        """Gets market data for a particular ticker. When we ask for non-standard FX crosses, only the mid-field is
        returned (calculated as a cross rate). We do not give bid/ask quotes for calculated non-standard _tickers, as these
        can difficult to estimate.

        Parameters
        ----------
        market_request : MarketRequest
            The type of market data to get

        Returns
        -------
        DataFrame
        """
        logger = LoggerManager.getLogger(__name__)

        if isinstance(market_request, TCARequest):
            market_request = MarketRequest(market_request=market_request)

        old_ticker = market_request.ticker

        if market_request.asset_class == 'fx':
            # Check if we can get ticker directly or need to create synthetic cross rates
            ticker = self._fx_conv.correct_notation(market_request.ticker)
        else:
            # If not FX we don't have to invert
            ticker = old_ticker

        # If ticker is in the correct convention is in crosses where we collect data (typically this will be the USD
        # crosses, also some liquid non-USD pairs like EURJPY)

        # available_tickers = []

        if isinstance(market_request.data_store, DatabaseSource):
            # TODO improve ticker check here!
            available_tickers = [ticker]
        elif 'csv' in market_request.data_store or 'h5' in market_request.data_store or 'gzip' in market_request.data_store \
            or 'parquet' in market_request.data_store or isinstance(market_request.data_store, pd.DataFrame) :

            # For CSV (or H5) we don't have much choice, and could differ between CSV files (if CSV has 'ticker' field, will
            # match on that)
            available_tickers = [ticker]
        elif market_request.data_store in constants.market_data_tickers:
            available_tickers = self._util_func.dict_key_list(
                constants.market_data_tickers[
                    market_request.data_store].keys())

        else:
            err_msg = 'Ticker ' + str(
                ticker
            ) + " doesn't seem available in the data source " + market_request.data_store

            logger.error(err_msg)

            raise Exception(err_msg)

        if ticker in available_tickers:

            # In the correct convention or is not FX
            if ticker == old_ticker:
                market_df = self._get_correct_convention_market_data(
                    market_request)

            # Otherwise need to flip to the correct convention (only will return 'mid')
            else:
                market_request_flipped = MarketRequest(
                    market_request=market_request)
                market_request_flipped.ticker = ticker

                market_df = self._invert_quoting_market(
                    self._get_correct_convention_market_data(
                        market_request_flipped))

                if 'ticker' in market_df.columns:
                    market_df['ticker'] = old_ticker
        else:
            if market_request.asset_class == 'fx' and market_request.instrument == 'spot':
                # Otherwise we need to get both legs
                # eg. for NZDCAD, we shall download NZDUSD and USDCAD => multiply them to get NZDCAD

                # get the USD crosses for each leg and then multiply
                market_request_base = MarketRequest(
                    market_request=market_request)
                market_request_terms = MarketRequest(
                    market_request=market_request)

                market_request_base.ticker = old_ticker[0:3] + 'USD'
                market_request_terms.ticker = 'USD' + old_ticker[3:7]

                tickers_exist = self._fx_conv.currency_pair_in_list(
                        self._fx_conv.correct_notation(market_request_base.ticker), available_tickers) and \
                        self._fx_conv.currency_pair_in_list(
                            self._fx_conv.correct_notation(market_request_terms.ticker), available_tickers)

                # If both USD _tickers don't exist try computing via EUR _tickers? (eg. USDSEK from EURUSD & EURSEK)
                if not (tickers_exist):
                    market_request_base.ticker = old_ticker[0:3] + 'EUR'
                    market_request_terms.ticker = 'EUR' + old_ticker[3:7]

                    tickers_exist = self._fx_conv.currency_pair_in_list(
                        self._fx_conv.correct_notation(market_request_base.ticker), available_tickers) and \
                                    self._fx_conv.currency_pair_in_list(
                                        self._fx_conv.correct_notation(market_request_terms.ticker), available_tickers)

                # Check if that currency (in the CORRECT convention) is in the available _tickers
                # we will typically not collect market data for currencies in their wrong convention
                if tickers_exist:

                    fields_try = ['bid', 'ask', 'mid']

                    market_base_df = self.get_market_data(market_request_base)
                    market_terms_df = self.get_market_data(
                        market_request_terms)

                    market_has_data = False

                    if market_base_df is not None and market_terms_df is not None:
                        if not (market_base_df.empty) and not (
                                market_terms_df.empty):
                            market_has_data = True

                    # If there's no data in either DataFrame, don't attempt to calculate anything
                    if not (market_has_data):
                        return pd.DataFrame()

                    fields = []

                    for f in fields_try:
                        if f in market_base_df.columns and f in market_terms_df.columns:
                            fields.append(f)

                    # Only attempt to calculate if the fields exist
                    if len(fields) > 0:
                        # Remove any other columns (eg. with ticker name etc.)
                        market_base_df = market_base_df[fields]
                        market_terms_df = market_terms_df[fields]

                        # Need to align series to multiply (and then fill down points which don't match)
                        # can't use interpolation, given that would use FUTURE data
                        market_base_df, market_terms_df = market_base_df.align(
                            market_terms_df, join="outer")
                        market_base_df = market_base_df.fillna(method='ffill')
                        market_terms_df = market_terms_df.fillna(
                            method='ffill')

                        market_df = pd.DataFrame(data=market_base_df.values *
                                                 market_terms_df.values,
                                                 columns=fields,
                                                 index=market_base_df.index)

                        # Values at the start of the series MIGHT be nan, so need to ignore those
                        market_df = market_df.dropna(subset=['mid'])

                        if 'ticker' in market_df.columns:
                            market_df['ticker'] = old_ticker
                    else:
                        return None

                else:
                    # Otherwise couldn't compute either from the USD legs or EUR legs
                    logger.warning("Couldn't find market data for ticker: " +
                                   str(ticker))

                    return None
            else:
                # Otherwise couldn't find the non-FX ticker
                logger.warning("Couldn't find market data for ticker: " +
                               str(ticker))

                return None

        return market_df
예제 #22
0
    def download_from_external_source(self,
                                      append_data=True,
                                      remove_duplicates=True,
                                      if_exists_table='append',
                                      if_exists_ticker='append',
                                      number_of_days=30 * 7,
                                      chunk_int_min=None,
                                      start_date=None,
                                      finish_date=None,
                                      delete_cached_files=False,
                                      tickers=None,
                                      write_temp_to_disk=True,
                                      write_to_disk_db=True,
                                      read_cached_from_disk=True,
                                      write_large_csv=False,
                                      write_large_hdf5_parquet=True,
                                      csv_folder=constants.csv_folder,
                                      csv_compression=None,
                                      return_df=False,
                                      web_proxies=constants.web_proxies):
        """Downloads market data from an external source and then dumps to HDF5/Parquet files for temporary storage which is cached.
        If HDF5/Parquet cached files already exist for a time segment we read them in, saving us to make an external data call.

        Lastly, dumps it to an internal database.

        Parameters
        ----------
        append_data : bool
            True - only start collecting later data not already in database (ignoring number_of_days parameter)
            False - start collecting all data, ignoring anything stored in database

        remove_duplicates : bool
            True (default) - remove values which are repeated
            False - leave in repeated values

        if_exists_table : str
            'append' - if database table already exists append data to it
            'replace' - remove existing database table

        if_exists_ticker : str
            'append' - if ticker already exists in the database, append to it
            'replace' - replace any data for this ticker

        number_of_days : int
            Number of days to download data for

        chunk_int_min : int (None)
            Size of each download (default - specified in constants)

        Returns
        -------

        """
        # Swim()

        logger = LoggerManager.getLogger(__name__)

        if write_to_disk_db:
            data_source_local = self._get_output_data_source()

        if write_large_csv:
            if not (os.path.isdir(csv_folder)):
                logger.warn("CSV folder " + self.temp_data_folder +
                            " where we are about to write does not exist")

        # What chunk size in minutes do we want for this data provider?
        if chunk_int_min is None:
            chunk_int_min = self._get_download_chunk_min_size()

        if chunk_int_min is None:
            chunk_size_str = None
        else:
            chunk_size_str = str(chunk_int_min) + "min"

        if tickers is None:
            tickers = self._get_tickers()

        if isinstance(tickers, str):
            tickers = [tickers]

        # If there's no start or finish date, choose a default start finish data
        if start_date is None and finish_date is None:
            finish_date = datetime.datetime.utcnow()
            finish_date = datetime.datetime(finish_date.year,
                                            finish_date.month, finish_date.day,
                                            0, 0, 0, 0)

            start_date = finish_date - timedelta(days=number_of_days)  # 30*7
        else:
            start_date = self.time_series_ops.date_parse(start_date)
            finish_date = self.time_series_ops.date_parse(finish_date)

        if finish_date < start_date:
            logger.error("Download finish date is before start data!")

            return

        now = pd.Timestamp(datetime.datetime.utcnow(), tz='utc')

        # Do not allow downloading of future data!
        if finish_date > now:
            finish_date = now

        df_dict = {}

        # Loop through each ticker
        for ticker in tickers:

            has_old = False

            if delete_cached_files and write_to_disk_db:
                logger.info("Deleting all cached temp files for " + ticker)

                for name in glob.glob(self.temp_data_folder + '/*' + ticker +
                                      "*"):
                    try:
                        os.remove(name)
                    except:
                        logger.warn("Couldn't delete file " + name)

                logger.info("Finished deleting cached files for " + ticker)

            # If we have been asked to append data, load up what you can from the internal database
            # find the last point
            if append_data and if_exists_ticker == 'append' and write_to_disk_db:
                logger.info("Trying to download old data first for " + ticker)

                try:
                    df_old = data_source_local.fetch_market_data(
                        start_date,
                        finish_date,
                        ticker,
                        web_proxies=web_proxies)

                    # This will vary between tickers (in particular if we happen to add a new ticker)
                    start_date = df_old.index[-1]

                    has_old = True

                    # Remove reference - big file!
                    df_old = None

                except Exception as e:
                    logger.info("No data found for ticker " + ticker +
                                " with error: " + str(e))
            else:
                logger.info("Downloading new data for " + ticker + ".")

            # Date range may not work with timezones
            start_date = pd.Timestamp(start_date.replace(tzinfo=None))
            finish_date = pd.Timestamp(finish_date.replace(tzinfo=None))

            if finish_date - start_date < pd.Timedelta(days=1):
                start_date_list = [start_date, finish_date]
            else:
                # download from that last point to the present day
                start_date_list = pd.date_range(start_date, finish_date)

                start_date_list = [
                    pd.Timestamp(x.to_pydatetime()) for x in start_date_list
                ]

                if finish_date > start_date_list[-1]:
                    start_date_list.append(finish_date)

            df = None
            filename = os.path.join(self.temp_data_folder,
                                    ticker) + '.' + fileformat

            try:
                # df = UtilFunc().read_dataframe_from_hdf(filename)
                pass
            except:
                logger.info("Couldn't read HDF5/Parquet file for " + ticker)

            # Create downloads in x minute chunks (if we request very large chunks of data with certain data providers,
            # we could cause problems!)
            if df is None:
                df_remote_list = []

                # Loop by day (otherwise can end up with too many open files!)
                for i in range(0, len(start_date_list) - 1):

                    if chunk_size_str is not None:
                        if start_date_list[
                                i + 1] - start_date_list[i] < pd.Timedelta(
                                    minutes=chunk_int_min):
                            start_date_hist = [start_date_list[i]]
                            finish_date_hist = [start_date_list[i + 1]]
                        else:
                            start_date_hist, finish_date_hist = UtilFunc(
                            ).split_into_freq(start_date_list[i],
                                              start_date_list[i + 1],
                                              freq=chunk_size_str)
                    else:
                        start_date_hist = [start_date_list[i]]
                        finish_date_hist = [start_date_list[i + 1]]

                    # For FX and most other markets we should remove weekends (cryptocurrencies do have weekend data)
                    if self._remove_weekend_points():
                        start_date_hist, finish_date_hist = UtilFunc(
                        ).remove_weekend_points(start_date_hist,
                                                finish_date_hist)

                    output = []

                    if constants.use_multithreading:

                        # Create a multiprocess object for downloading data
                        swim = Swim(parallel_library=constants.
                                    database_populator_threading_library)
                        pool = swim.create_pool(thread_no=self._get_threads())

                        result = []

                        for i in range(0, len(start_date_hist)):
                            # output.append(self._fetch_market_data(start_date_hist[i], finish_date_hist[i], ticker))

                            result.append(
                                pool.apply_async(
                                    self._fetch_market_data,
                                    args=(start_date_hist[i],
                                          finish_date_hist[i], ticker,
                                          write_temp_to_disk,
                                          read_cached_from_disk, web_proxies)))

                        output = [p.get() for p in result]

                        swim.close_pool(pool, True)
                    else:
                        # Otherwise run in single threaded fashion
                        for i in range(0, len(start_date_hist)):
                            output.append(
                                self._fetch_market_data(
                                    start_date_hist[i],
                                    finish_date_hist[i],
                                    ticker,
                                    write_to_disk=write_temp_to_disk,
                                    read_cached_from_disk=read_cached_from_disk,
                                    web_proxies=web_proxies))

                    # Get all the dataframe chunks and returned messages
                    df_list = [
                        self._remove_duplicates_time_series(x,
                                                            remove_duplicates,
                                                            field='mid')
                        for x, y in output if x is not None
                    ]
                    msg_list = [
                        y for x, y in output if x is not None and y is not None
                    ]

                    # Concatenate all the 5 (or larger) minute data chunks
                    try:
                        if df_list != []:
                            df_temp = pd.concat(df_list)

                            if df_temp is not None:
                                if not (df_temp.empty):
                                    df_remote_list.append(df_temp)

                    except Exception as e:
                        logger.error(str(e))

                if df_remote_list != []:
                    df = pd.concat(df_remote_list)

                    # Need to sort data (database assumes sorted data for chunking/searches)
                    df = df.sort_index()
                    df = self.time_series_ops.localize_as_UTC(df)

                    if write_large_hdf5_parquet:
                        if df is not None:
                            if not (df.empty):
                                key =  '_' + self._get_postfix() + "_" + \
                                       (str(df.index[0]) + str(df.index[-1])).replace(":", '_').replace(" ", '_')
                                filename = os.path.join(
                                    csv_folder,
                                    ticker + key) + '.' + fileformat

                                # Temporary cache for testing purposes (also if the process crashes, we can read this back in)
                                UtilFunc().write_dataframe_to_binary(
                                    df, filename, format=binary_format)

            if df is not None:
                # Assume UTC time (don't want to mix UTC and non-UTC in database!)
                df = self.time_series_ops.localize_as_UTC(df)

            # write CSV
            if write_large_csv:
                if df is not None:
                    if not (df.empty):
                        key = '_' + self._get_postfix() + "_" + \
                              (str(df.index[0]) + str(df.index[-1])).replace(":", '_').replace(" ", '_')

                        if csv_compression is 'gzip':
                            df.to_csv(os.path.join(csv_folder,
                                                   ticker + key + ".csv.gz"),
                                      compression='gzip')
                        else:
                            df.to_csv(
                                os.path.join(csv_folder,
                                             ticker + key + ".csv"))

            if return_df:
                df_dict[ticker] = df

            # Dump what we have locally (or whatever DatabaseSource we have defined)
            try:

                start_date = start_date.replace(tzinfo=pytz.utc)

                # Remove first point if matches last point from dataset
                if has_old:
                    if df.index[0] == start_date:
                        df = df[-1:]

                if df is not None:
                    df = df.sort_index()

                    df = self._remove_duplicates_time_series(df,
                                                             remove_duplicates,
                                                             field='mid')

                if write_to_disk_db and df is not None:
                    data_source_local.append_market_data(
                        df,
                        ticker,
                        if_exists_table=if_exists_table,
                        if_exists_ticker=if_exists_ticker)

                    logger.info("Wrote to database for " + ticker)

            except Exception as e:
                final_err = "Data was missing for these dates " + str(start_date) + " - " + str(finish_date) + " for " \
                            + str(tickers) + " Didn't write anything to disk or return any valid dataframe: " + str(e)

                logger.error(final_err)

            if df is None:
                msg_list.append("No downloaded data for " + str(start_date) +
                                " - " + str(finish_date) +
                                ". Is this a holiday?")

        # Returns a status containing any failed downloads, which can be read by a user
        return msg_list, df_dict
# See the License for the specific language governing permissions and limitations under the License.
#


import os

from tcapy.conf.constants import Constants
from tcapy.util.loggermanager import LoggerManager
from tcapy.data.databasesource import DatabaseSourceCSV
from tcapy.data.databasesource import DatabaseSourceArctic, DatabaseSourcePyStore, DatabaseSourceInfluxDB, DatabaseSourceKDB

constants = Constants()

if __name__ == '__main__':

    logger = LoggerManager.getLogger(__name__)

    PLOT_BACK_DATA = False
    data_vendor = 'ncfx' # 'dukascopy' or 'ncfx'

    # Either use 'arctic' or 'pystore' or 'influxdb' or 'kdb' to store market tick data
    market_data_store = 'arctic'

    logger.info("About to upload data to " + market_data_store)

    ## YOU WILL NEED TO CHANGE THE BELOW LINES #########################################################################

    # Parameters for testing
    if True:
        data_vendor = 'testharness'
예제 #24
0
    def calculate_benchmark(self, trade_order_df=None, market_df=None, trade_order_name=None, bid_benchmark=None,
                            ask_benchmark=None,
                            weighting_field=None,
                            benchmark_date_start_field=None,
                            benchmark_date_end_field=None, start_time_before_offset=None, finish_time_after_offset=None,
                            overwrite_time_of_day=None, overwrite_timezone=None):

        if self._check_empty_benchmark_market_trade_data(trade_order_name, trade_order_df, market_df):
            return trade_order_df, market_df

        # If fields have not been specified, then take them from the field variables
        if bid_benchmark is None: bid_benchmark = self._bid_benchmark
        if ask_benchmark is None: ask_benchmark = self._ask_benchmark
        if weighting_field is None: weighting_field = self._weighting_field
        if benchmark_date_start_field is None: benchmark_date_start_field = self._benchmark_date_start_field
        if benchmark_date_end_field is None: benchmark_date_end_field = self._benchmark_date_end_field
        if start_time_before_offset is None: start_time_before_offset = self._start_time_before_offset
        if finish_time_after_offset is None: finish_time_after_offset = self._finish_time_after_offset
        if overwrite_time_of_day is None: overwrite_time_of_day = self._overwrite_time_of_day
        if overwrite_timezone is None: overwrite_timezone = self._overwrite_timezone

        if weighting_field is not None:
            weighting_field_condition = True
        else:
            weighting_field_condition = weighting_field in market_df.columns

        if bid_benchmark in market_df.columns and ask_benchmark in market_df.columns and weighting_field_condition:
            trade_order_df[self._benchmark_name] = np.nan

            if benchmark_date_start_field is not None and benchmark_date_end_field is not None and \
                    benchmark_date_start_field in trade_order_df.columns and benchmark_date_end_field in trade_order_df.columns:
                date_start = trade_order_df[benchmark_date_start_field].values
                date_end = trade_order_df[benchmark_date_end_field].values
            else:
                date_start = trade_order_df.index.values
                date_end = trade_order_df.index.values

            # Overwrite every trade/order start/end time by a specific time of day if this has been specified
            if overwrite_time_of_day is not None and overwrite_timezone is not None:
                date_start = self._time_series_ops.overwrite_time_of_day_in_datetimeindex(date_start,
                                overwrite_time_of_day,
                                old_tz=trade_order_df.index.tz,
                                overwrite_timezone=overwrite_timezone)
                date_end = self._time_series_ops.overwrite_time_of_day_in_datetimeindex(date_end,
                                overwrite_time_of_day,
                                old_tz=trade_order_df.index.tz,
                                overwrite_timezone=overwrite_timezone)

            # Subtract a user defined time from the start time of the order (or point in time for a trade) if specifed
            if start_time_before_offset is not None:
                date_start = date_start - self._time_series_ops.get_time_delta(start_time_before_offset)

            # Add a user defined time from the finish time of the order (or point in time for a trade) if specified
            if finish_time_after_offset is not None:
                date_end = date_end + self._time_series_ops.get_time_delta(finish_time_after_offset)

            date_start = np.searchsorted(market_df.index, date_start)
            date_end = np.searchsorted(market_df.index, date_end)
            bid_price = market_df[bid_benchmark].values
            ask_price = market_df[ask_benchmark].values

            try:
                trade_order_df[self._benchmark_name] = \
                    self._benchmark_calculation(trade_order_df, bid_price, ask_price, date_start, date_end,
                                                weights=self._generate_weights(market_df, weighting_field=weighting_field))
            except:
                LoggerManager.getLogger(__name__).warning(
                    self._benchmark_name + " not calculated (check if has correct input fields)")

        else:
            LoggerManager.getLogger(__name__).warning(
                bid_benchmark + ", " + ask_benchmark + " " + weighting_field + " may not be in market data")

        return trade_order_df, market_df
예제 #25
0
    def _fetch_market_data(self,
                           start,
                           finish,
                           ticker,
                           write_to_disk=True,
                           read_cached_from_disk=True,
                           web_proxies=constants.web_proxies):
        logger = LoggerManager.getLogger(__name__)

        key = (str(start) + str(finish) + ticker + '_' +
               self._get_postfix()).replace(":", '_')

        filename = os.path.join(self.temp_data_folder, key) + '.' + fileformat
        util_func = UtilFunc()

        start_time_stamp = pd.Timestamp(start)
        finish_time_stamp = pd.Timestamp(finish)

        if self._remove_saturday():
            weekend_data = "Saturday? " + key

            # Ignore Saturday, and don't attempt to download
            if start_time_stamp.dayofweek == 5 or finish_time_stamp.dayofweek == 5:
                return None, weekend_data

        if self._remove_weekend_points():
            weekend_data = "Weekend? " + key

            if start_time_stamp.dayofweek == 6 and start_time_stamp.hour < 20:
                return None, weekend_data

            if start_time_stamp.dayofweek == 4 and start_time_stamp.hour > 22:
                return None, weekend_data

        df = None

        if read_cached_from_disk:
            if os.path.exists(filename):
                df = util_func.read_dataframe_from_binary(filename,
                                                          format=binary_format)

                if df is not None:
                    logger.debug("Read " + filename + " from disk")

        if df is None:
            # Convert tcapy ticker into vendor ticker
            df = self._get_input_data_source().fetch_market_data(
                start,
                finish,
                ticker=self._get_tickers_vendor()[ticker],
                web_proxies=web_proxies)

            if df is not None:
                df = df.drop('ticker', axis=1)

                if write_to_disk:
                    # Write a small temporary dataframe to disk (if the process fails later, these can be picked up,
                    # without having a call the external vendor again
                    util_func.write_dataframe_to_binary(df,
                                                        filename,
                                                        format=binary_format)

        msg = None

        if df is None:
            msg = "No data? " + key

        return df, msg
예제 #26
0
    def _parallel_get_market_trade_metrics(self, tca_request_list,
                                           dummy_market):
        logger = LoggerManager.getLogger(__name__)

        market_holder_list = DataFrameHolder()
        trade_order_holder_list = DataFrameHolder()

        # For each currency pair select collect the trades and market data, then calculate benchmarks and slippage
        result = []

        keep_looping = True

        # If we have also asked for trades/order
        if tca_request_list[0].trade_order_mapping is not None:
            point_in_time_executions_only = \
                self._util_func.dict_key_list(tca_request_list[0].trade_order_mapping) == ['trade_df']
        else:
            point_in_time_executions_only = True

        parallel_library = tca_request_list[0].multithreading_params[
            'parallel_library']

        if parallel_library == 'single':
            # from tcapy.analysis.tcatickerloaderimpl import TCATickerLoaderImpl
            tca_ticker_loader = Mediator.get_tca_ticker_loader(
                version=self._version)

        start_date = tca_request_list[0].start_date
        finish_date = tca_request_list[0].finish_date

        # Parameters for the loop
        i = 0
        no_of_tries = 5

        # Error trapping for Celery, if have failed event retry it
        while i < no_of_tries and keep_looping:

            try:
                # For each TCA request kick off a thread
                for tca_request_single_ticker in tca_request_list:

                    # Split up the request by date (monthly/weekly chunks)
                    tca_request_date_split = self._split_tca_request_by_date(
                        tca_request_single_ticker,
                        tca_request_single_ticker.ticker,
                        period=tca_request_single_ticker.
                        multithreading_params['cache_period'])

                    if not(constants.multithreading_params['splice_request_by_dates']) \
                                or tca_request_list[0].tca_type == 'detailed' \
                                or tca_request_list[0].tca_type == 'compliance' \
                                or tca_request_list[0].summary_display == 'candlestick'\
                                or not(point_in_time_executions_only):

                        if 'celery' in parallel_library:
                            # Load all the data for this ticker and THEN calculate the metrics on it
                            result.append(
                                chord(
                                    (get_market_trade_holder_via_celery.s(
                                        tca_request_data) for tca_request_data
                                     in tca_request_date_split),
                                    calculate_metrics_single_ticker_via_celery.
                                    s(tca_request_single_ticker,
                                      dummy_market)).apply_async())
                        elif parallel_library == 'single':
                            # This is not actually parallel, but is mainly for debugging purposes
                            for tca_request_s in tca_request_date_split:

                                # print(tca_request_s.start_date)
                                market_df, trade_order_df_dict = tca_ticker_loader.get_market_trade_order_holder(
                                    tca_request_s, return_cache_handles=False)

                                market_df, trade_order_df_list, ticker, trade_order_keys = \
                                    tca_ticker_loader.calculate_metrics_single_ticker((market_df, trade_order_df_dict),
                                                                                        tca_request_s, dummy_market)

                                market_holder_list.add_dataframe(
                                    market_df, ticker)

                                trade_order_holder_list.add_dataframe_dict(
                                    dict(
                                        zip(trade_order_keys,
                                            trade_order_df_list)))

                    else:
                        # Otherwise work on parallel chunks by date
                        # doesn't currently work with orders which straddle day/week/month boundaries
                        # but should work with points in time
                        #
                        # In practice, it's not really much faster than the above code
                        if 'celery' == parallel_library:

                            # For each ticker/date combination load data and process chunk (so can do fully in parallel)
                            result.append(
                                group(
                                    get_market_trade_holder_and_calculate_metrics_single_ticker_via_celery
                                    .s(tca_request_data, dummy_market)
                                    for tca_request_data in
                                    tca_request_date_split).apply_async())

                # Now combine the results from the parallel operations, if using celery
                if 'celery' in parallel_library:

                    # Careful, when the output is empty!
                    output = [
                        p.get(timeout=constants.celery_timeout_seconds)
                        for p in result if p is not None
                    ]

                    # If pipelined/splice_request_by_dates will have two lists so flatten it into one
                    output = self._util_func.flatten_list_of_lists(output)

                    for market_df, trade_order_df_list, ticker, trade_order_keys in output:
                        market_holder_list.add_dataframe(market_df, ticker)
                        # market_df_dict[ticker] = market_df

                        trade_order_holder_list.add_dataframe_dict(
                            dict(zip(trade_order_keys, trade_order_df_list)))

                    del result
                    del output

                keep_looping = False

            except DateException as e:
                raise e

                keep_looping = False

            except TradeMarketNonOverlapException as e:
                raise e

                keep_looping = False

            except DataMissingException as e:
                raise e

                keep_looping = False

            except ErrorWritingOverlapDataException as e:
                raise e

                keep_looping = False

            # Exception likely related to Celery and possibly lack of communication with Redis message broker
            # or Memcached results backend
            # except Exception as e:
            except Exception as e:
                if i == no_of_tries - 1:
                    err_msg = "Failed with " + parallel_library + " after multiple attempts: " + str(
                        e) + ", " + str(traceback.format_exc())

                    raise Exception(err_msg)

                i = i + 1

                logger.warning("Failed with " + parallel_library +
                               ", trying again for " + str(i) + " time: " +
                               str(e) + ", " + str(traceback.format_exc()))

        logger.debug("Finished parallel computation")

        # Expand out the DataFrame holders into dictionaries of DataFrames
        market_df_dict = market_holder_list.get_combined_dataframe_dict()
        trade_order_results_df_dict = trade_order_holder_list.get_combined_dataframe_dict(
            start_date=start_date, finish_date=finish_date)

        # TODO add candlestick drawing here for cases when using split threading by date
        trade_order_results_df_dict = self._util_func.remove_keymatch_dict(
            trade_order_results_df_dict, 'market_df_downsampled')

        return market_df_dict, trade_order_results_df_dict
예제 #27
0
    def _apply_summary_metrics(self, tca_request_list,
                               trade_order_results_df_dict, market_df_dict):

        trade_order_list = self._util_func.dict_key_list(
            trade_order_results_df_dict.keys())
        market_list = self._util_func.dict_key_list(market_df_dict.keys())

        if not (isinstance(trade_order_list, list)):
            trade_order_list = [trade_order_list]

        if not (isinstance(market_list, list)):
            market_list = [market_list]

        # First get the market data (for doing bid/ask on distributions) - only does the first ticker!
        market_df = market_df_dict[tca_request_list[0].ticker]

        logger = LoggerManager.getLogger(__name__)
        logger.debug("Constructing results form to summarize analysis...")

        # Calculate user specified aggregate result forms (eg. timelines, distribution etc.) for each trade/order
        # which has been selected
        results_form = tca_request_list[0].results_form
        join_tables = tca_request_list[0].join_tables

        # If dummy market (ie. don't return market data to the user) has been specified then market data cannot
        # be included in ResultsForm calculations
        if results_form is not None:

            # Go through all the trade/orders doing statistical aggregations
            for i in range(0, len(trade_order_results_df_dict)):

                # Ignore 'fig' objects which are Plotly JSON Figures, and only process DataFrames
                if 'df' in trade_order_list[i]:
                    for r in results_form:

                        # Filter the trades for the event type which has been requested (eg. 'trade' or 'placement')
                        trade_order_df = self._trade_order_tag.filter_trade_order(
                            trade_order_df=trade_order_results_df_dict[
                                trade_order_list[i]],
                            tag_value_combinations={
                                'event_type': tca_request_list[0].event_type
                            })

                        # Calculate aggregate ResultForm
                        results = r.aggregate_results(
                            market_trade_order_df=trade_order_df,
                            market_df=market_df,
                            market_trade_order_name=trade_order_list[i])

                        if results[0] is not None:
                            for results_form_df, results_form_name in results:
                                trade_order_results_df_dict[
                                    results_form_name] = results_form_df

            # Go through all the market data doing statistical aggregations
            for i in range(0, len(market_df_dict)):

                # Ignore 'fig' objects which are Plotly JSON Figures, and only process DataFrames which are not empty
                if 'fig' not in market_list[i] and market_df_dict[
                        market_list[i]] is not None:
                    if not (market_df_dict[market_list[i]].empty):
                        for r in results_form:

                            # Calculate aggregate ResultForm
                            results = r.aggregate_results(
                                market_trade_order_df=market_df_dict[
                                    market_list[i]],
                                market_df=market_df_dict[market_list[i]],
                                market_trade_order_name=market_list[i])

                            if results[0] is not None:
                                for results_form_df, results_form_name in results:
                                    trade_order_results_df_dict[
                                        results_form_name] = results_form_df

        logger.debug("Now join table results...")

        # As a final stage, join together any tables which have been specified by the user
        # for example: does the user want to combine certain metrics or trades together?
        if join_tables is not None:
            for j in join_tables:
                results = j.aggregate_tables(
                    df_dict=trade_order_results_df_dict)

                if results != []:
                    if results[0] is not None:
                        for results_form_df, results_form_name in results:
                            trade_order_results_df_dict[
                                results_form_name] = results_form_df

        logger.debug(
            "Finished calculating results form and join table results!")

        return trade_order_results_df_dict
예제 #28
0
    def _combine_mini_df_from_disk_single_thread(self,
                                                 ticker,
                                                 remove_duplicates=True):

        logger = LoggerManager.getLogger(__name__)
        time_series_ops = TimeSeriesOps()

        logger.info('Getting ' + ticker + ' filenames...')
        temp_data_folder = self.temp_data_folder

        filename_list = []

        for root, dirnames, filenames in os.walk(temp_data_folder):

            for filename in filenames:
                if ticker in filename and '.' + fileformat in filename:
                    filename_h5_parquet = os.path.join(root, filename)

                    # if filename is less than 10MB add (otherwise likely a very large aggregated file!)
                    if os.path.getsize(filename_h5_parquet) < 10 * 1024 * 1024:
                        filename_list.append(filename_h5_parquet)

        df_list = []

        util_func = UtilFunc()

        logger.info('Loading ' + ticker + ' mini dataframe into  memory')

        i = 0

        if len(filename_list) == 0:
            logger.warn("Looks like there are no files for " + ticker +
                        " in " + temp_data_folder +
                        ". Are you sure path is correct?")

        # Go through each mini file which represents a few minutes of data and append it
        for filename in filename_list:
            filesize = 0

            try:
                filesize = os.path.getsize(filename) / 1024.0
                df = util_func.read_dataframe_from_binary(filename,
                                                          format=binary_format)

                i = i + 1

                # every 100 files print reading output@
                if i % 100 == 0:
                    logger.info('Reading ' + filename + ' number ' + str(i))

                if df is not None:
                    df = df.sort_index()
                    df = self._remove_duplicates_time_series(df,
                                                             remove_duplicates,
                                                             time_series_ops,
                                                             field='mid')

                    df_list.append(df)
            except Exception as e:
                logger.warn('Failed to parse ' + filename + " of " +
                            str(filesize) + "KB")  # + str(e))

            # if i > 1000:
            #    break

        # Assume UTC time (don't want to mix UTC and non-UTC in database!)
        if df_list == []:
            logger.warn('No dataframe read for ' + ticker +
                        ', cannot combine!')

            return

        logger.info('About to combine ' + ticker +
                    ' into large dataframe to write to disk...')

        df = pd.concat(df_list)
        df = time_series_ops.localize_as_UTC(df)

        df = df.sort_index()

        df = self._remove_duplicates_time_series(df,
                                                 remove_duplicates,
                                                 time_series_ops,
                                                 field='mid')

        postfix = '-' + self._get_postfix() + '-with-duplicates'

        if remove_duplicates:
            postfix = '-' + self._get_postfix() + '-no-duplicates'

        filename = os.path.join(self.temp_large_data_folder,
                                ticker + postfix) + '.' + fileformat

        df = time_series_ops.localize_as_UTC(df)
        util_func.write_dataframe_to_binary(df, filename, format=binary_format)
예제 #29
0
        def callback(*args):
            """Kicks off fetching of data of market data and TCA calculations for a specific currency pair. Caches the data
            in a VolatileCache instance, ready to be read in by the other charts.

            Parameters
            ----------
            ticker_val : str
                ticker to be used in TCA calculations

            start_date_val : str
                Start date of TCA analysis

            start_time_val : str
                Start time of TCA analysis

            finish_date_val : str
                Finish date of TCA analysis

            finish_time_val : str
                Finish time of TCA analysis

            venue_val : str
                Venue data to be used

            n_clicks : int
                Number of clicks

            Returns
            -------
            str
            """
            start = time.time()

            tag = tca_type + '-calculation-button'

            old_clicks = self._session_manager.get_session_clicks(tag)

            # make sure none of the other charts/links are plotted till we have completed this!
            self._session_manager.set_session_flag([
                self._plot_flags['aggregated'], self._plot_flags['detailed'],
                self._plot_flags['compliance']
            ], False)

            logger = LoggerManager.getLogger(__name__)

            if tca_type == 'detailed':
                ticker_val, start_date_val, start_time_val, finish_date_val, finish_time_val, \
                broker_val, algo_val, venue_val, market_data_val, metric_val, n_clicks = args

                # Catch cases where users repeatedly click, which can cause misalignment in clicks
                self._session_manager.set_session_clicks(tag,
                                                         n_clicks,
                                                         old_clicks=old_clicks)

                logger.debug(
                    self.create_generate_button_msg(old_clicks, n_clicks))

                # Make sure all the parameters have been selected
                if ticker_val != '' and venue_val != '' and start_date_val != '' and start_time_val != '' and \
                        finish_date_val != '' and finish_time_val != '' and market_data_val != '' and broker_val != '' and \
                        algo_val != '' and n_clicks > old_clicks:

                    # Expand tickers/broker fields etc, in case for example 'All' has been specified or any other groups
                    broker_val = self._util_func.populate_field(
                        broker_val,
                        constants.available_brokers_dictionary,
                        exception_fields='All')
                    algo_val = self._util_func.populate_field(
                        algo_val,
                        constants.available_algos_dictionary,
                        exception_fields='All')
                    venue_val = self._util_func.populate_field(
                        venue_val,
                        constants.available_venues_dictionary,
                        exception_fields='All')

                    # Combine the start date/time and finish date/time
                    start_date_val = start_date_val + ' ' + start_time_val
                    finish_date_val = finish_date_val + ' ' + finish_time_val

                    metric_val = metric_val.replace(' ', '_')

                    logger.debug('Calculation click old: ' + str(old_clicks) +
                                 " clicks vs new " + str(n_clicks))

                    self._session_manager.set_session_clicks(tag, n_clicks)
                    self._session_manager.set_session_flag('metric',
                                                           value=metric_val)

                    self._session_manager.set_session_flag(
                        'detailed-visualization', value=True)

                    logger.info('Selected ' + ticker_val + " " +
                                start_date_val + " - " + finish_date_val)

                    # Check that dates are less than 1 month apart
                    if pd.Timestamp(finish_date_val) - pd.Timestamp(
                            start_date_val) > pd.Timedelta(
                                days=constants.max_plot_days):
                        return "Status: Cannot plot more than " + str(
                            constants.max_plot_days) + " days!"
                    elif pd.Timestamp(start_date_val) >= pd.Timestamp(
                            finish_date_val):
                        return "Status: Start date must be before the end date"

                    try:
                        #if True:

                        # Clear the cache for the current user
                        self._glob_volatile_cache.clear_key_match(
                            self._session_manager.get_session_id())

                        results_form = [
                            # Calculate the distribute of the metric for trades/orders, broken down by trade side (buy/sell)
                            DistResultsForm(
                                trade_order_list=['trade_df', 'order_df'],
                                metric_name=metric_val,
                                aggregate_by_field='side',
                                scalar=10000.0,
                                weighting_field=
                                'executed_notional_in_reporting_currency'),

                            # Create a table the markout of every trade
                            TableResultsForm(
                                trade_order_list=['trade_df'],
                                metric_name='markout',
                                filter_by='all',
                                replace_text={
                                    'markout_': '',
                                    'executed_notional': 'exec not',
                                    'notional_currency': 'exec not cur'
                                },
                                keep_fields=[
                                    'executed_notional', 'side',
                                    'notional_currency'
                                ],
                                scalar={
                                    'all': 10000.0,
                                    'exclude': ['executed_notional', 'side']
                                },
                                round_figures_by={
                                    'all': 2,
                                    'executed_notional': 0,
                                    'side': 0
                                },
                                weighting_field='executed_notional')
                        ]

                        benchmark_calcs = [
                            # Calculate the arrival prices for every trade/order
                            BenchmarkArrival(
                                trade_order_list=['trade_df', 'order_df']),

                            # Calculate the VWAP for each order
                            BenchmarkVWAP(trade_order_list=['order_df']),

                            # Calculate the TWAP for each order
                            BenchmarkTWAP(trade_order_list=['order_df'])
                        ]

                        metric_calcs = [
                            metric_val,
                            MetricMarkout(trade_order_list=['trade_df'])
                        ]

                        # Get from cache, note given that we are in the first part of the chain we should force it to calculate!
                        sparse_market_trade_df = self.get_cached_computation_analysis(
                            key='sparse_market_trade_df',
                            start_date=start_date_val,
                            finish_date=finish_date_val,
                            ticker=ticker_val,
                            venue=venue_val,
                            market_data=market_data_val,
                            event_type='trade',
                            dummy_market=False,
                            broker=broker_val,
                            algo=algo_val,
                            metric_calcs=metric_calcs,
                            metric_trade_order_list=['trade_df', 'order_df'],
                            benchmark_calcs=benchmark_calcs,
                            tca_type='detailed',
                            tca_engine=self._tca_engine,
                            results_form=results_form,
                            force_calculate=True)

                        calc_start = sparse_market_trade_df.index[0]
                        calc_end = sparse_market_trade_df.index[-1]

                        detailed_title = self.create_status_msg_flags(
                            'detailed', ticker_val, calc_start, calc_end)

                    except Exception as e:
                        LoggerManager().getLogger(__name__).exception(e)

                        return "Status: error " + str(e) + ". Check dates?"

                    finish = time.time()

                    return 'Status: calculated ' + str(round(
                        finish - start, 3)) + "s for " + detailed_title

            elif tca_type == 'aggregated':
                ticker_val, start_date_val, finish_date_val, broker_val, algo_val, venue_val, reload_val, market_data_val, \
                event_type_val, metric_val, n_clicks = args

                # Catch cases where users repeatedly click, which can cause misalignment in clicks
                self._session_manager.set_session_clicks(tag,
                                                         n_clicks,
                                                         old_clicks=old_clicks)

                logger.debug(
                    self.create_generate_button_msg(old_clicks, n_clicks))

                if ticker_val != '' and start_date_val != '' and venue_val != '' \
                        and finish_date_val != '' and reload_val != '' and event_type_val != '' and metric_val != '' and \
                        n_clicks > old_clicks:

                    # Expand tickers/broker fields etc, in case for example 'All' has been specified or any other groups
                    ticker_val_list = self._util_func.populate_field(
                        ticker_val, constants.available_tickers_dictionary)
                    broker_val_list = self._util_func.populate_field(
                        broker_val, constants.available_brokers_dictionary)
                    algo_val_list = self._util_func.populate_field(
                        algo_val, constants.available_algos_dictionary)
                    venue_val_list = self._util_func.populate_field(
                        venue_val, constants.available_venues_dictionary)

                    metric_val = metric_val.replace(' ', '_')

                    logger.debug('Calculation click old: ' + str(old_clicks) +
                                 " clicks vs new " + str(n_clicks))

                    self._session_manager.set_session_clicks(tag, n_clicks)
                    self._session_manager.set_session_flag('metric',
                                                           value=metric_val)

                    self._session_manager.set_session_flag(
                        'aggregated-visualization', True)

                    try:
                        # if True:

                        # Clear the cache for the current user
                        self._glob_volatile_cache.clear_key_match(
                            self._session_manager.get_session_id())

                        results_form = [
                            # Show the distribution of the selected metric for trades weighted by notional
                            # aggregated by ticker and then by venue
                            DistResultsForm(
                                trade_order_list=['trade_df'],
                                metric_name=metric_val,
                                aggregate_by_field=['ticker', 'venue'],
                                weighting_field=
                                'executed_notional_in_reporting_currency'),

                            # Display the timeline of metrics average by day (and weighted by notional)
                            TimelineResultsForm(
                                trade_order_list=['trade_df'],
                                by_date='date',
                                metric_name=metric_val,
                                aggregation_metric='mean',
                                aggregate_by_field='ticker',
                                scalar=10000.0,
                                weighting_field=
                                'executed_notional_in_reporting_currency'),

                            # Display a bar chart showing the average metric weighted by notional and aggregated by ticker
                            # venue
                            BarResultsForm(
                                trade_order_list=['trade_df'],
                                metric_name=metric_val,
                                aggregation_metric='mean',
                                aggregate_by_field=['ticker', 'venue'],
                                scalar=10000.0,
                                weighting_field=
                                'executed_notional_in_reporting_currency')
                        ]

                        try:
                            # if True:
                            timeline_trade_df_metric_by_ticker = self.get_cached_computation_analysis(
                                key='timeline_trade_df_' + metric_val +
                                '_by_ticker',
                                start_date=start_date_val,
                                finish_date=finish_date_val,
                                event_type=event_type_val,
                                ticker=ticker_val_list,
                                broker=broker_val_list,
                                algo=algo_val_list,
                                venue=venue_val_list,
                                market_data=market_data_val,
                                dummy_market=True,
                                tca_engine=self._tca_engine,
                                tca_type='aggregated',
                                metric_calcs=metric_val,
                                metric_trade_order_list=['trade_df'],
                                results_form=results_form,
                                force_calculate=True,
                                reload_val=reload_val,
                                trade_order_mapping=['trade_df'])

                            calc_start = timeline_trade_df_metric_by_ticker.index[
                                0]
                            calc_end = timeline_trade_df_metric_by_ticker.index[
                                -1]

                            aggregated_title = self.create_status_msg_flags(
                                'aggregated', ticker_val, calc_start, calc_end)

                            logger.debug('Plotted aggregated summary plot!')

                            finish = time.time()

                        except Exception as e:
                            LoggerManager().getLogger(__name__).exception(e)

                            return "Status: error - " + str(
                                e) + ". Check data exists for these dates?"

                    except Exception as e:
                        LoggerManager().getLogger(__name__).exception(e)

                        return 'Status: error - ' + str(
                            e) + ". Check data exists for these dates?"

                    return 'Status: calculated ' + str(round(
                        finish - start, 3)) + "s for " + aggregated_title

            elif tca_type == 'compliance':
                ticker_val, start_date_val, finish_date_val, broker_val, algo_val, venue_val, reload_val, market_data_val, \
                filter_time_of_day_val, start_time_of_day_val, finish_time_of_day_val, slippage_bounds_val, visualization_val, n_clicks = args

                # Catch cases where users repeatedly click, which can cause misalignment in clicks
                self._session_manager.set_session_clicks(tag,
                                                         n_clicks,
                                                         old_clicks=old_clicks)

                logger.debug(
                    self.create_generate_button_msg(old_clicks, n_clicks))

                if ticker_val != '' and start_date_val != '' and broker_val != '' and algo_val != '' and venue_val != '' \
                        and finish_date_val != '' and reload_val != '' and filter_time_of_day_val != '' \
                        and start_time_of_day_val != '' and finish_time_of_day_val != '' and slippage_bounds_val != '' \
                        and n_clicks > old_clicks:

                    ticker_val_list = self._util_func.populate_field(
                        ticker_val, constants.available_tickers_dictionary)
                    broker_val_list = self._util_func.populate_field(
                        broker_val,
                        constants.available_brokers_dictionary,
                        exception_fields='All')
                    algo_val_list = self._util_func.populate_field(
                        algo_val,
                        constants.available_algos_dictionary,
                        exception_fields='All')
                    venue_val_list = self._util_func.populate_field(
                        venue_val,
                        constants.available_venues_dictionary,
                        exception_fields='All')

                    logger.debug('Calculation click old: ' + str(old_clicks) +
                                 " clicks vs new " + str(n_clicks))

                    self._session_manager.set_session_clicks(tag, n_clicks)

                    if visualization_val == 'yes':
                        self._session_manager.set_session_flag(
                            'compliance-visualization', True)
                    else:
                        self._session_manager.set_session_flag(
                            'compliance-visualization', False)

                    try:
                        # if True:

                        # Clear the cache for the current user
                        self._glob_volatile_cache.clear_key_match(
                            self._session_manager.get_session_id())

                        slippage_bounds = 0.0
                        overwrite_bid_ask = True

                        if slippage_bounds_val == 'bid/ask':
                            overwrite_bid_ask = False
                        else:
                            slippage_bounds = float(slippage_bounds_val)

                        metric_calcs = [
                            # Calculate slippage for trades
                            MetricSlippage(trade_order_list='trade_df'),
                        ]

                        benchmark_calcs = [
                            # Generate the spread to mid for market data (in certain case artificially create a spread)
                            BenchmarkSpreadToMid(
                                bid_mid_bp=slippage_bounds,
                                ask_mid_bp=slippage_bounds,
                                overwrite_bid_ask=overwrite_bid_ask)
                        ]

                        results_form = [
                            # Display a table of all the anomalous trades by slippage (ie. outside bid/ask)
                            TableResultsForm(
                                # Only display for trades
                                trade_order_list=['trade_df'],

                                # Display slippage
                                metric_name='slippage',

                                # Order by the worst slippage
                                filter_by='worst_all',

                                # Replace text on table to make it look nicer
                                replace_text={
                                    'markout_': '',
                                    'executed_notional': 'exec not',
                                    '_currency': ' cur',
                                    '_in_reporting': ' in rep',
                                    'slippage_benchmark': 'benchmark',
                                    'slippage_anomalous': 'anomalous',
                                    'broker_id': 'broker ID',
                                    'algo_id': 'algo ID',
                                    'executed_price': 'price'
                                },
                                exclude_fields_from_avg=[
                                    'slippage_anomalous', 'slippage_benchmark',
                                    'side'
                                ],

                                # Only select trades outside bid/ask (ie. where slippage anomalous = 1)
                                tag_value_combinations={
                                    'slippage_anomalous': 1.0
                                },

                                # Display several columns
                                keep_fields=[
                                    'ticker', 'broker_id', 'algo_id',
                                    'notional_currency', 'executed_notional',
                                    'executed_notional_in_reporting_currency',
                                    'side', 'executed_price'
                                ],

                                # Multiply slippage field by 10000 (to convert into basis points)
                                scalar={'slippage': 10000.0},

                                # Round figures to make them easier to read
                                round_figures_by={
                                    'executed_notional': 0,
                                    'executed_notional_in_reporting_currency':
                                    0,
                                    'side': 0,
                                    'slippage': 2,
                                    'slippage_benchmark': 4
                                }),

                            # Get the total notional executed by broker (in reporting currency)
                            BarResultsForm(
                                # Select child orders
                                trade_order_list=['trade_df'],

                                # Aggregate by broker name
                                aggregate_by_field='broker_id',

                                # Select the notional for analysis
                                metric_name=
                                'executed_notional_in_reporting_currency',  # analyse notional

                                # Sum all the notionals
                                aggregation_metric='sum',

                                # Round figures
                                round_figures_by=0)
                        ]

                        # Reformat tables for notional by broker
                        join_tables = [
                            # JoinTables(
                            # tables_dict={'table_name': 'jointables_broker_id_df',
                            #
                            #              # fetch the following calculated tables
                            #              'table_list': [
                            #                  'bar_order_df_executed_notional_in_reporting_currency_by_broker_id'],
                            #
                            #              # append to the columns of each table
                            #              'column_list': ['notional (rep cur)'],
                            #              'replace_text': {'broker_id': 'broker ID'}
                            #              })
                        ]

                        try:
                            # if True:
                            trade_df = self.get_cached_computation_analysis(
                                key='trade_df',
                                start_date=start_date_val,
                                finish_date=finish_date_val,
                                start_time_of_day=start_time_of_day_val,
                                finish_time_of_day=finish_time_of_day_val,
                                filter_time_of_day=filter_time_of_day_val,
                                event_type='trade',
                                ticker=ticker_val_list,
                                broker=broker_val_list,
                                algo=algo_val_list,
                                venue=venue_val_list,
                                dummy_market=True,
                                market_data=market_data_val,
                                tca_engine=self._tca_engine,
                                tca_type='compliance',
                                metric_calcs=metric_calcs,
                                benchmark_calcs=benchmark_calcs,
                                metric_trade_order_list=['trade_df'],
                                results_form=results_form,
                                join_tables=join_tables,
                                force_calculate=True,
                                reload_val=reload_val,
                                trade_order_mapping=['trade_df'])

                            calc_start = trade_df.index[0]
                            calc_end = trade_df.index[-1]

                            compliance_title = self.create_status_msg_flags(
                                'compliance', ticker_val, calc_start, calc_end)

                            logger.debug(
                                'Generated compliance summary.. awaiting plot callbacks!'
                            )

                            finish = time.time()

                        except Exception as e:
                            logger.exception(e)

                            return "Status: error " + str(
                                e) + ". Check data exists for these dates?"

                    except Exception as e:
                        logger.exception(e)

                        return 'Status: error ' + str(
                            e) + ". Check data exists for these dates?"

                    return 'Status: calculated ' + str(round(
                        finish - start, 3)) + "s for " + compliance_title

            raise dash.exceptions.PreventUpdate(
                "No data changed - " + tca_type
            )  # Not very elegant but only way to prevent plots disappearing
예제 #30
0
    def calculate_metrics_single_ticker(self, market_trade_order_combo,
                                        tca_request, dummy_market):
        """Calls auxillary methods to get market/trade data for a single ticker. If necessary splits up the request into
        smaller date chunks to collect market and trade data in parallel (using Celery)

        Parameters
        ----------
        tca_request : TCARequest
            Parameter for the TCA analysis

        dummy_market : bool
            Should we put a dummy variable instead of returning market data

        Returns
        -------
        DataFrame, DataFrameHolder, str
        """

        trade_order_filter = tca_request.trade_order_filter
        benchmark_calcs = tca_request.benchmark_calcs
        metric_calcs = tca_request.metric_calcs
        ticker = tca_request.ticker

        logger = LoggerManager.getLogger(__name__)

        # Reassemble market and trade data from the tuple
        market_df, trade_order_df_dict = self.trim_sort_market_trade_order(
            market_trade_order_combo, tca_request.start_date,
            tca_request.finish_date, tca_request.ticker)

        # Calculate BenchmarkMarket's which only require market data and no trade data
        market_df = self.calculate_benchmark_market(market_df, tca_request)

        trade_order_df_values = []
        trade_order_df_keys = []

        # Calculations on trades with market data
        if len(trade_order_df_dict.keys()) > 0 and self._check_valid_market(
                market_df):

            # NOTE: this will not filter orders, only TRADES (as orders do not have venue parameters)
            logger.debug("Filter trades by venue")

            simple_filters = {'venue': tca_request.venue}

            if 'trade_df' in self._util_func.dict_key_list(
                    trade_order_df_dict.keys()):
                for s in simple_filters.keys():
                    trade_order_df_dict[
                        'trade_df'] = self._trade_order_tag.filter_trade_order(
                            trade_order_df=trade_order_df_dict['trade_df'],
                            tag_value_combinations={s: simple_filters[s]})

            # Do additional more customised post-filtering of the trade/orders (eg. by broker_id, algo_id)
            if trade_order_filter is not None:
                for a in trade_order_filter:
                    trade_order_df_dict = a.filter_trade_order_dict(
                        trade_order_df_dict=trade_order_df_dict)

            # NOTE: this will not filter orders, only TRADES (as orders do not have event type parameters)
            simple_filters = {'event_type': tca_request.event_type}

            if 'trade_df' in self._util_func.dict_key_list(
                    trade_order_df_dict.keys()):
                for s in simple_filters.keys():
                    trade_order_df_dict[
                        'trade_df'] = self._trade_order_tag.filter_trade_order(
                            trade_order_df=trade_order_df_dict['trade_df'],
                            tag_value_combinations={s: simple_filters[s]})

            # Remove any trade/orders which aren't empty
            t_remove = []

            for t in trade_order_df_dict.keys():
                if trade_order_df_dict[t] is None:
                    t_remove.append(t)

                    logger.warninging(
                        t + " is empty.. might cause problems later!")
                elif trade_order_df_dict[t].empty:
                    t_remove.append(t)

                    logger.warninging(
                        t + " is empty.. might cause problems later!")

            for t in t_remove:
                trade_order_df_dict.pop(t)

            trade_order_list = self._util_func.dict_key_list(
                trade_order_df_dict.keys())

            # Check if we have any trades/orders left to analyse?
            if len(trade_order_list) == 0:
                logger.error("No trade/orders for " + ticker)
            else:
                # ok we have some trade/orders left to analyse
                if not (isinstance(trade_order_list, list)):
                    trade_order_list = [trade_order_list]

                logger.debug("Calculating derived fields and benchmarks")

                logger.debug("Calculating execution fields")

                # Calculate derived executed fields for orders
                # can only do this if trade_df is also available
                if len(trade_order_df_dict.keys()
                       ) > 1 and 'trade_df' in self._util_func.dict_key_list(
                           trade_order_df_dict.keys()):

                    # For the orders, calculate the derived fields for executed notional, trade etc.
                    aggregated_notional_fields = 'executed_notional'

                    # Calculate the derived fields of the orders from the trades
                    # alao calculate any benchmarks for the orders
                    for i in range(1, len(trade_order_list)):
                        # NOTIONAL_EXECUTED: add derived field for executed price and notional executed for the orders
                        trade_order_df_dict[trade_order_list[
                            i]] = self._metric_executed_price.calculate_metric(
                                lower_trade_order_df=trade_order_df_dict[
                                    trade_order_list[i - 1]],
                                upper_trade_order_df=trade_order_df_dict[
                                    trade_order_list[i]],
                                aggregated_ids=constants.order_name +
                                '_pointer_id',
                                aggregated_notional_fields=
                                aggregated_notional_fields,
                                notional_reporting_currency_spot=
                                'notional_reporting_currency_mid')[0]

                # TODO not sure about this?
                if 'trade_df' in self._util_func.dict_key_list(
                        trade_order_df_dict.keys()):
                    if 'notional' not in trade_order_df_dict[
                            'trade_df'].columns:
                        trade_order_df_dict['trade_df'][
                            'notional'] = trade_order_df_dict['trade_df'][
                                'executed_notional']

                logger.debug("Calculating benchmarks")

                # Calculate user specified benchmarks for each trade order (which has been selected)
                if benchmark_calcs is not None:

                    for i in range(0, len(trade_order_df_dict)):
                        for b in benchmark_calcs:
                            # For benchmarks which need to be generated on a trade by trade basis (eg. VWAP, arrival etc)
                            if not (isinstance(b, BenchmarkMarket)):
                                logger.debug("Calculating " +
                                             type(b).__name__ + " for " +
                                             trade_order_list[i])

                                if trade_order_df_dict[
                                        trade_order_list[i]] is not None:
                                    if not (trade_order_df_dict[
                                            trade_order_list[i]].empty):
                                        trade_order_df_dict[trade_order_list[
                                            i]], _ = b.calculate_benchmark(
                                                trade_order_df=
                                                trade_order_df_dict[
                                                    trade_order_list[i]],
                                                market_df=market_df,
                                                trade_order_name=
                                                trade_order_list[i])

                logger.debug("Calculating metrics")

                # Calculate user specified metrics for each trade order (which has been selected)
                if metric_calcs is not None:
                    for i in range(0, len(trade_order_df_dict)):
                        for m in metric_calcs:
                            logger.debug("Calculating " + type(m).__name__ +
                                         " for " + trade_order_list[i])

                            if trade_order_df_dict[
                                    trade_order_list[i]] is not None:
                                if not (trade_order_df_dict[
                                        trade_order_list[i]].empty):
                                    trade_order_df_dict[trade_order_list[
                                        i]], _ = m.calculate_metric(
                                            trade_order_df=trade_order_df_dict[
                                                trade_order_list[i]],
                                            market_df=market_df,
                                            trade_order_name=trade_order_list[
                                                i])

                logger.debug("Completed derived field calculations for " +
                             ticker)

            trade_order_df_dict = self._calculate_additional_metrics(
                market_df, trade_order_df_dict, tca_request)

            if dummy_market:
                market_df = None

            trade_order_df_keys = self._util_func.dict_key_list(
                trade_order_df_dict.keys())
            trade_order_df_values = []

            for k in trade_order_df_keys:
                trade_order_df_values.append(trade_order_df_dict[k])

        # print("--- dataframes/keys ---")
        # print(trade_order_df_values)
        # print(trade_order_df_keys)

        return market_df, trade_order_df_values, ticker, trade_order_df_keys