Ejemplo n.º 1
0
    def __init__(self,
                 temp_data_folder=constants.temp_data_folder,
                 temp_large_data_folder=constants.temp_large_data_folder,
                 tickers=None,
                 data_store=None):

        self.temp_data_folder = temp_data_folder
        self.temp_large_data_folder = temp_large_data_folder
        self.tickers = None
        self.util_func = UtilFunc()
        self.time_series_ops = TimeSeriesOps()
        self.data_store = data_store

        logger = LoggerManager().getLogger(__name__)

        if not (os.path.isdir(self.temp_data_folder)):
            logger.warn("Temp data folder " + self.temp_data_folder +
                        " does not exist")

        if not (os.path.isdir(self.temp_large_data_folder)):
            logger.warn("Temp large data folder " + self.temp_data_folder +
                        " does not exist")

        if tickers is not None:
            self.tickers = tickers
Ejemplo n.º 2
0
    def check_empty_combined_dataframe_dict(self, df_dict=None):

        if df_dict is None:
            df_dict = self.get_combined_dataframe_dict()

        logger = LoggerManager().getLogger(__name__)

        valid_data = True

        if df_dict is not None:
            if len(df_dict.keys()) > 0:
                t_remove = []

                for t in df_dict.keys():
                    if df_dict[t] is None:
                        logger.warn("Market/trade/order data not in " + t)
                        t_remove.append(t)
                    else:
                        if df_dict[t].empty:
                            logger.warn("Market/trade/order data not in " + t)
                            t_remove.append(t)

                for t in t_remove:
                    df_dict.pop(t)
            else:
                valid_data = False

            if len(df_dict.keys()) == 0:
                valid_data = False
        else:
            valid_data = False

        return valid_data
Ejemplo n.º 3
0
from tcapy.data.databasesource import DatabaseSourceMySQL, DatabaseSourceArctic

from test.config import *

logger = LoggerManager().getLogger(__name__)

constants = Constants()

logger.info('Make sure you have created folder ' + constants.csv_folder +
            ' & ' + constants.temp_data_folder + ' otherwise tests will fail.')

if not (os.path.exists(constants.csv_folder)):
    try:
        os.mkdir(constants.csv_folder)
    except:
        logger.warn('Could not create ' + constants.csv_folder)

if not (os.path.exists(constants.temp_data_folder)):
    try:
        os.mkdir(constants.temp_data_folder)
    except:
        logger.warn('Could not create ' + constants.temp_data_folder)

########################################################################################################################

start_date = '26 Apr 2017'
finish_date = '05 Jun 2017'
ticker_arctic = ['EURUSD', 'USDJPY']

# Market data parameters for tables/databases
market_data_table = 'market_data_table_test_harness'
Ejemplo n.º 4
0
    def get_trade_order_data(self, tca_request, trade_order_type, start_date=None, finish_date=None):
        """Gets trade data for specified parameters (eg. start/finish dates tickers). Will also try to find trades
        when they have booked in the inverted market convention, and change the fields appropriately. For example, if
        we ask for GBPUSD trade data, it will also search for USDGBP and convert those trades in the correct convention.

        Parameters
        ----------
        tca_request : TCARequest
            What type of trade data do we want

        trade_order_type : str
            Do we want trade or order data?

        Returns
        -------
        DataFrame
        """
        logger = LoggerManager().getLogger(__name__)

        # by default, assume we want trade data (rather than order data)
        if trade_order_type is None:
            trade_order_type = 'trade_df'

        if start_date is None and finish_date is None:
            start_date = tca_request.start_date
            finish_date = tca_request.finish_date

        # Create request for actual executed trades
        trade_request = TradeRequest(trade_request=tca_request)

        trade_request.start_date = start_date; trade_request.finish_date = finish_date
        trade_request.trade_order_type = trade_order_type

        # Fetch all the trades done in that ticker (will be sparse-like randomly spaced tick data)
        # assumed to be the correct convention (eg. GBPUSD)
        trade_df = self._data_factory.fetch_table(data_request=trade_request)

        # if fx see if inverted or not
        if tca_request.asset_class == 'fx' and tca_request.instrument == 'spot':
            # Also fetch data in the inverted cross (eg. USDGBP) as some trades may be recorded this way
            inv_trade_request = TradeRequest(trade_request=tca_request)

            inv_trade_request.start_date = start_date;
            inv_trade_request.finish_date = finish_date
            inv_trade_request.trade_order_type = trade_order_type

            inv_trade_request.ticker = self._fx_conv.reverse_notation(trade_request.ticker)

            trade_inverted_df = self._data_factory.fetch_table(data_request=inv_trade_request)

            # Only add inverted trades if they exist!
            if not (trade_inverted_df.empty):

                invert_price_columns = ['executed_price', 'price_limit', 'market_bid', 'market_mid', 'market_ask',
                                        'arrival_price']
                invert_price_columns = [x for x in invert_price_columns if x in trade_inverted_df.columns]

                # For trades (but not orders), there is an executed price field, which needs to be inverted
                if invert_price_columns != []:
                    trade_inverted_df[invert_price_columns] = 1.0 / trade_inverted_df[invert_price_columns].values

                trade_inverted_df['side'] = -trade_inverted_df['side']  # buys become sells, and vice versa!
                trade_inverted_df['ticker'] = trade_request.ticker

                if trade_df is not None:
                    trade_df = trade_df.append(trade_inverted_df)
                    trade_df = trade_df.sort_index()
                else:
                    trade_df = trade_inverted_df

        # Check if trade data is not empty? if it is return None
        if self._check_is_empty_trade_order(trade_df, tca_request, start_date, finish_date, trade_order_type):
            return None

        if tca_request.asset_class == 'fx' and tca_request.instrument == 'spot':

            # Check if any notionals of any trade/order are quoted in the TERMS currency?
            terms_notionals = trade_df['notional_currency'] == tca_request.ticker[3:6]

            # If any notional are quoted as terms, we should invert these so we quote notionals with base currency
            # for consistency
            if terms_notionals.any():
                inversion_ticker = tca_request.ticker[3:6] + tca_request.ticker[0:3]

                inversion_spot, trade_df = self._fill_reporting_spot(inversion_ticker, trade_df, start_date,
                                                                     finish_date, tca_request)

                notional_fields = ['notional', 'order_notional', 'executed_notional']

                # Need to check terms notionals again, as trade data could have shrunk (because can only get trades, where we have market data)
                terms_notionals = trade_df['notional_currency'] == str(tca_request.ticker[3:6])

                # Only get the inversion spot if any terms notionals are quoted wrong way around
                if terms_notionals.any():
                    if inversion_spot is not None:
                        for n in notional_fields:
                            if n in trade_inverted_df.columns:
                                # trade_df[n][terms_notionals] = trade_df[n][terms_notionals].values * inversion_spot[terms_notionals].values
                                trade_df[n][terms_notionals] = pd.Series(index=trade_df.index[terms_notionals.values],
                                                                         data=trade_df[n][terms_notionals].values *
                                                                              inversion_spot[terms_notionals].values)
                    else:
                        logger.warn("Couldn't get spot data for " + inversion_ticker + " to invert notionals. Hence not returning trading data.")

                if terms_notionals.any():
                    trade_df['notional_currency'][terms_notionals] = trade_request.ticker[0:3]

            # Also represent notional is reporting currency notional amount (eg. if we are USD based investors, convert
            # notional to USDs)

            # Using a reporting currency can be particularly useful if we are trying to aggregate metrics from many different
            # currency pairs (and wish to weight by a commonly measured reporting notional)

            # Eg. if we don't have USDUSD, then we need to convert
            if trade_request.ticker[0:3] != tca_request.reporting_currency:

                # So if we have EURJPY, we want to download EURUSD data
                reporting_ticker = trade_request.ticker[0:3] + tca_request.reporting_currency

                reporting_spot, trade_df = self._fill_reporting_spot(
                    reporting_ticker, trade_df, start_date, finish_date, tca_request)

                if reporting_spot is not None:
                    trade_df['notional_reporting_currency_mid'] = reporting_spot.values

                    # trade_df['notional_reporting_currency_mid'] = \
                    #     self._time_series_ops.vlookup_style_data_frame(trade_df.index, market_conversion_df, 'mid')[0].values

                    trade_df['reporting_currency'] = tca_request.reporting_currency

                    columns_to_report = ['executed_notional', 'notional', 'order_notional']

                    for c in columns_to_report:
                        if c in trade_df.columns:
                            trade_df[c + '_in_reporting_currency'] = \
                                trade_df['notional_reporting_currency_mid'].values * trade_df[c]
                else:
                    logger.warn(
                        "Couldn't get spot data to convert notionals into reporting currency. Hence not returning trading data.")

                    return None
            else:
                # ie. USDUSD, so spot is 1
                trade_df['notional_reporting_currency_mid'] = 1.0

                # Reporting currency is the same as the notional of the trade, so no need to convert, just
                # replicate columns
                trade_df['reporting_currency'] = tca_request.reporting_currency

                columns_to_report = ['executed_notional', 'notional', 'order_notional']

                for c in columns_to_report:
                    if c in trade_df.columns:
                        trade_df[c + '_in_reporting_currency'] = trade_df[c]

        return trade_df
Ejemplo n.º 5
0
    def fetch_table(self, data_request):
        """Fetches table from underlying DatabaseSource

        Parameters
        ----------
        data_request : DataRequest
            Request for data with start/finish date etc.

        Returns
        -------
        DataFrame
        """
        # Fetch table from the underlying database (CSV, SQL or RESTful etc.)
        logger = LoggerManager().getLogger(__name__)

        data_norm = data_request.data_norm

        if data_norm is None:
            data_norm = Mediator.get_data_norm(version=self._version)

        # Where do we get data from?
        database_source = Mediator.get_database_source_picker().get_database_source(data_request)

        if database_source is None:
            Exception("User asked for an unsupported database source")

        # Extract the start/finish dates and ticker we wish to download data for
        start_date = data_request.start_date
        finish_date = data_request.finish_date
        ticker = data_request.ticker

        # Are we requesting market data or trade/order data of our own executions?
        if isinstance(data_request, MarketRequest):
            df = database_source.fetch_market_data(start_date=start_date, finish_date=finish_date, ticker=ticker,
                                                   table_name=data_request.market_data_database_table)

            df = data_norm.normalize_market_data(df, None, data_request)
        elif isinstance(data_request, TradeRequest):

            trade_order_type = data_request.trade_order_type
            trade_order_mapping = data_request.trade_order_mapping

            if data_request.data_store == 'csv' and trade_order_type != None and trade_order_mapping != None:
                df = database_source.fetch_trade_order_data(start_date=start_date, finish_date=finish_date,
                                                            ticker=ticker,
                                                            table_name=trade_order_mapping[trade_order_type])
            elif trade_order_mapping is not None:
                df = database_source.fetch_trade_order_data(start_date=start_date, finish_date=finish_date,
                                                            ticker=ticker,
                                                            table_name=trade_order_mapping[trade_order_type])
            else:
                # Otherwise we have a CSV file without any sort of mapping, which we assume only contains trade_df data
                df = database_source.fetch_trade_order_data(start_date=start_date, finish_date=finish_date,
                                                            ticker=ticker)

            df = data_norm.normalize_trade_data(df, None, data_request)

        if df is None:
            pass

        if df is not None and df.empty:
            logger.warn('Dataframe empty for ticker ' + ticker)

        return df
Ejemplo n.º 6
0
    def get_dataframe_by_key(self,
                             key,
                             combined=True,
                             start_date=None,
                             finish_date=None):
        """Gets a specific trade/order and combine it into a single DataFrame.

        Parameters
        ----------
        key : str
            Which market data ticker or trades/order to return

        combined : True
            Should we combine all the market data for a specific ticker or trades (or orders) into a single DataFrame before returning?

        Returns
        -------
        DataFrame
        """
        if key in self._df_dict.keys():
            dataframe_key_list = self._df_dict[key]

            logger = LoggerManager().getLogger(__name__)

            is_data_frame_key = None

            # Special cases if 'df' in key
            if 'df' in key:
                is_data_frame_key = True

            # Plotly Figures
            if 'fig' in key:
                is_data_frame_key = False

            if is_data_frame_key is None:
                logger.warn('Cannot guess key type for ' + key +
                            ', assuming DataFrame')

                is_data_frame_key = True

            if is_data_frame_key:

                try:
                    df = Mediator.get_volatile_cache().get_dataframe_handle(
                        Mediator.get_util_func().flatten_list_of_lists(
                            dataframe_key_list),
                        burn_after_reading=True)
                except Exception as e:
                    # print("DATAFRAMEHOLDER ERROR" + str(e))
                    df = dataframe_key_list

                if combined:
                    df = Mediator.get_time_series_ops().concat_dataframe_list(
                        df)

                if df is not None:
                    if not (df.empty):
                        df = df.sort_index()

                if start_date is not None and finish_date is not None:
                    df = Mediator.get_time_series_ops(
                    ).filter_start_finish_dataframe(df, start_date,
                                                    finish_date)

                return df
            # elif 'fig' in key:
            #     try:
            #         df = self._volatile_cache.get_dataframe_handle(
            #             self._util_func.flatten_list_of_lists(dataframe_key_list), burn_after_reading=True)
            #     except:
            #         df = dataframe_key_list
            #
            #     if combined:
            #
            #         xy_dict = {}
            #
            #         for fig in df:
            #             for trace in fig['data']:
            #                 name = trace['name']
            #
            #                 xy_dict[name + '_x'] = []
            #                 xy_dict[name + '_y'] = []
            #                 xy_dict['trace_name_list'] = []
            #
            #         for fig in df:
            #             for trace in fig['data']:
            #                 name = trace['name']
            #
            #                 xy_dict[name + '_x'].append(trace['x'])
            #                 xy_dict[name + '_y'].append(trace['y'])
            #
            #                 if name not in xy_dict['trace_name_list']:
            #                     xy_dict['trace_name_list'].append(name)
            #
            #         fig = df[0]
            #
            #         # aggregate all the x & y values
            #         for i in range(0, len(fig['data'])):
            #             name = fig['data'][i]['name']
            #
            #             for j in range(1, len(xy_dict[name + '_x'])):
            #                 fig['data'][i]['x'].extend(xy_dict[name + '_x'])
            #                 fig['data'][i]['y'].extend(xy_dict[name + '_y'])
            #
            #         return fig
            else:
                # Otherwise different type of metadata (don't attempt to combine it) - eg. Plotly Fig
                try:
                    df = Mediator.get_volatile_cache().get_dataframe_handle(
                        Mediator.get_util_func().flatten_list_of_lists(
                            dataframe_key_list),
                        burn_after_reading=True)
                except Exception as e:
                    print(e)
                    df = dataframe_key_list

                if isinstance(df, list):
                    return df[0]

                return df

        return None