Esempio n. 1
0
def test_market_data_convention():
    """Tests that market data for unusual quotations is consistent (ie. if the user requests USDEUR, this should be
    inverted EURUSD (which is the correct convention)
    """
    market_loader =  Mediator.get_tca_market_trade_loader(version=tcapy_version)
    market_request = MarketRequest(start_date=start_date, finish_date=finish_date, ticker=ticker,
                             data_store=market_data_store)

    #### Compare EURUSD to USDEUR
    market_correct_conv_series = pd.DataFrame(market_loader.get_market_data(market_request)['mid'])

    market_request.ticker = 'USDEUR'
    market_reverse_conv_series = pd.DataFrame(1.0 / market_loader.get_market_data(market_request)['mid'])

    assert_frame_equal(market_correct_conv_series, market_reverse_conv_series, check_dtype=False)

    ### Compare EURJPY (which is autogenerated, if EURJPY is not collected directly) vs. EURUSD & USDJPY multiplied

    # Use resampled series for comparison
    market_request.ticker = 'USDJPY'
    market_df_USDJPY = pd.DataFrame(market_loader.get_market_data(market_request)['mid'])

    market_request.ticker = 'EURJPY'
    market_df_EURJPY = pd.DataFrame(market_loader.get_market_data(market_request)['mid']).resample('1min').mean()

    market_df_EURJPY_comp = (market_correct_conv_series.resample('1min').mean() * market_df_USDJPY.resample('1min').mean())

    market_df_EURJPY, market_df_EURJPY_comp = market_df_EURJPY.align(market_df_EURJPY_comp, join='inner')

    comp = (market_df_EURJPY - market_df_EURJPY_comp).dropna()

    assert all(comp < eps)
Esempio n. 2
0
    def get_market_data(self, market_request, return_cache_handles=False):
        # Handles returns a pointer

        volatile_cache = Mediator.get_volatile_cache(volatile_cache_engine=self._volatile_cache_engine)

        cache = True

        # Don't attempt to cache DataFrames
        if hasattr(market_request, 'market_data_store'):
            if (isinstance(market_request.market_data_store, pd.DataFrame)):
                cache = False
        elif isinstance(market_request.data_store, pd.DataFrame):
            cache = False

        # If we have allowed the caching of monthly/periodic market data
        if market_request.multithreading_params['cache_period_market_data'] and cache:
            old_start_date = market_request.start_date;
            old_finish_date = market_request.finish_date

            # so we can also take TCARequest objects
            if hasattr(market_request, 'market_data_store'):
                data_store = market_request.market_data_store
                data_offset_ms = market_request.market_data_offset_ms
            else:
                data_store = market_request.data_store
                data_offset_ms = market_request.data_offset_ms

            # See if we can fetch from the cache (typically Redis)
            start_date, finish_date, market_key, market_df = \
                volatile_cache.get_data_request_cache(market_request, data_store, 'market_df',
                                                     data_offset_ms)

            # If data is already cached, just return the existing CacheHandle (which is like a pointer to the reference
            # in Redis)
            if market_df is not None and start_date == old_start_date and finish_date == old_finish_date and return_cache_handles:
                return CacheHandle(market_key, add_time_expiry=False)

            if market_df is None:

                market_request_copy = MarketRequest(market_request=market_request)
                market_request_copy.start_date = start_date
                market_request_copy.finish_date = finish_date

                market_df = super(TCATickerLoaderImpl, self).get_market_data(market_request_copy)

                volatile_cache.put_data_request_cache(market_request_copy, market_key, market_df)

            market_df = self._strip_start_finish_dataframe(market_df, old_start_date, old_finish_date, market_request)
        else:
            market_df = super(TCATickerLoaderImpl, self).get_market_data(market_request)

        # Return as a cache handle (which can be easily passed across Celery for example)
        # Only if use_multithreading
        if return_cache_handles and market_request.use_multithreading:
            return volatile_cache.put_dataframe_handle(market_df,
                use_cache_handles=market_request.multithreading_params['cache_period_market_data'])

        return market_df
Esempio n. 3
0
def test_write_chunked_market_data_arctic():
    """For very large CSV files we might need to read them in chunks. tcapy supports this and also supports CSVs
    which are sorted in reverse (ie. descending). We need to enable chunking and reverse reading with flags.

    This tests whether chunked data is written correctly to Arctic, comparing it with that read from CSV directly
    """

    if not (run_arctic_tests): return

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    arctic_start_date = '01 Jan 2016'; arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    # load data from CSVs directly (for comparison later)
    market_df_csv_desc = DatabaseSourceCSV(market_data_database_csv=csv_reverse_market_data_store).fetch_market_data(
        start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker)

    market_df_csv_asc = DatabaseSourceCSV(market_data_database_csv=csv_market_data_store).fetch_market_data(
        start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker)

    for a in arctic_lib_type:
        database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a)

        ### write CSV data to Arctic which is sorted ascending (default!)
        database_source.convert_csv_to_table(csv_market_data_store, ticker,
                                             test_harness_arctic_market_data_table,
                                             if_exists_table='replace',
                                             if_exists_ticker='replace', market_trade_data='market',
                                             csv_read_chunksize=100000, remove_duplicates=False)

        market_request = MarketRequest(start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker,
                                       data_store=test_harness_arctic_market_data_store,
                                       market_data_database_table=test_harness_arctic_market_data_table)

        market_df_load = market_loader.get_market_data(market_request=market_request)

        # compare reading directly from the CSV vs. reading back from arctic
        assert all(market_df_csv_asc['mid'] - market_df_load['mid'] < eps)

        ### write CSV data to Arctic which is sorted descending
        database_source.convert_csv_to_table(csv_reverse_market_data_store, ticker,
                                             test_harness_arctic_market_data_table,
                                             if_exists_table='append',
                                             if_exists_ticker='replace', market_trade_data='market',
                                             csv_read_chunksize=100000, read_in_reverse=True, remove_duplicates=False)

        market_request = MarketRequest(start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker,
                                       data_store=test_harness_arctic_market_data_store,
                                       market_data_database_table=test_harness_arctic_market_data_table)

        market_df_load = market_loader.get_market_data(market_request=market_request)

        # compare reading directly from the CSV vs. reading back from arctic
        assert all(market_df_csv_desc['mid'] - market_df_load['mid'] < eps)
Esempio n. 4
0
    def _get_underlying_market_data(self, start_date, finish_date,
                                    market_request):
        # Create request for market data
        market_request = MarketRequest(
            start_date=start_date,
            finish_date=finish_date,
            ticker=market_request.ticker,
            data_store=market_request.data_store,
            data_offset_ms=market_request.data_offset_ms,
            market_data_database_table=market_request.
            market_data_database_table)

        # Fetch market data in that ticker (will be tick data)
        market_df = self._data_factory.fetch_table(data_request=market_request)

        # TODO do further filtering of market and trade data as necessary
        if constants.resample_ms is not None:
            market_df = self._time_series_ops.resample_time_series(
                market_df, resample_ms=constants.resample_ms)

            market_df.dropna(inplace=True)

        ## TODO drop stale quotes for market data and add last update time?

        # Calculate mid market rate, if it doesn't exist
        if market_df is not None:
            if not (market_df.empty):
                market_df = self._benchmark_mid.calculate_benchmark(
                    market_df=market_df)

        return market_df
Esempio n. 5
0
def create_market_trade_data_eikon():
    """Creates a small dataset for testing purposes for market, trade and order data for EURUSD at the start of May 2017,
    which is dumped to the designated tcapy test harness folder.

    Returns
    -------

    """
    # Use database source as Arctic (or directly from Dukascopy) for market data (assume we are using market data as a source)
    tca_market = TCAMarketTradeLoaderImpl()

    util_func = UtilFunc()

    market_df = []

    for tick in ticker:
        market_request = MarketRequest(ticker=tick,
                                       data_store=data_store,
                                       start_date=start_date,
                                       finish_date=finish_date)

        market_df.append(
            tca_market.get_market_data(market_request=market_request))

    # Note: it can be very slow to write these CSV files
    market_df = pd.concat(market_df)
    market_df.to_csv(os.path.join(folder, 'small_test_market_df_eikon.csv.gz'),
                     compression='gzip')

    # Also write to disk as HDF5 file (easier to load up later)
    util_func.write_dataframe_to_binary(
        market_df, os.path.join(folder, 'small_test_market_df_eikon.gzip'))

    # Create a spot file in reverse order
    market_df.sort_index(ascending=False)\
        .to_csv(os.path.join(folder, 'small_test_market_df_reverse_eikon.csv.gz'), compression='gzip')

    # Also write to disk as Parquet file (easier to load up later)
    util_func.write_dataframe_to_binary(
        market_df,
        os.path.join(folder, 'small_test_market_df_reverse_eikon.parquet'))

    if create_trade_order_data:
        # Use the market data we just downloaded to CSV, and perturb it to generate the trade data
        data_test_creator = DataTestCreator(
            market_data_postfix=postfix,
            csv_market_data=os.path.join(folder,
                                         'small_test_market_df_eikon.csv.gz'),
            write_to_db=False)

        # Create randomised trade/order data
        trade_order = data_test_creator.create_test_trade_order(
            ticker_trades, start_date=start_date, finish_date=finish_date)

        trade_order['trade_df'].to_csv(
            os.path.join(folder, 'small_test_trade_df_eikon.csv'))
        trade_order['order_df'].to_csv(
            os.path.join(folder, 'small_test_order_df_eikon.csv'))
Esempio n. 6
0
def test_write_market_data_arctic():
    """Tests we can write market data to Arctic
    """
    if not (run_arctic_tests): return

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    ### Test we can read data from CSV and dump to Arctic (and when read back it matches CSV)
    db_start_date = '01 Jan 2016'
    db_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    replace_append = ['replace', 'append']

    # Check first when replacing full table and then appending
    for a in arctic_lib_type:
        for i in replace_append:
            database_source = DatabaseSourceArctic(postfix='testharness',
                                                   arctic_lib_type=a)

            # Write CSV to Arctic
            database_source.convert_csv_to_table(
                csv_market_data_store,
                ticker,
                test_harness_arctic_market_data_table,
                if_exists_table=i,
                if_exists_ticker='replace',
                market_trade_data='market',
                remove_duplicates=False)

            # Fetch data directly from CSV
            database_source_csv = DatabaseSourceCSV(
                market_data_database_csv=csv_market_data_store)

            market_df_csv = database_source_csv.fetch_market_data(
                start_date=db_start_date,
                finish_date=db_finish_date,
                ticker=ticker)

            # Read back data from Arctic and compare with CSV
            market_request = MarketRequest(
                start_date=db_start_date,
                finish_date=db_finish_date,
                ticker=ticker,
                data_store=
                database_source,  # test_harness_arctic_market_data_store,
                market_data_database_table=test_harness_arctic_market_data_table
            )

            market_df_load = market_loader.get_market_data(
                market_request=market_request)

            diff_df = market_df_csv['mid'] - market_df_load['mid']

            diff_df.to_csv('test' + i + '.csv')
            assert all(diff_df < eps)
Esempio n. 7
0
    def _fill_reporting_spot(self, ticker, trade_df, start_date, finish_date,
                             tca_request):
        logger = LoggerManager.getLogger(__name__)

        market_request = MarketRequest(
            start_date=start_date,
            finish_date=finish_date,
            ticker=ticker,
            data_store=tca_request.market_data_store,
            data_offset_ms=tca_request.market_data_offset_ms,
            use_multithreading=tca_request.use_multithreading,
            market_data_database_table=tca_request.market_data_database_table,
            multithreading_params=tca_request.multithreading_params)

        market_conversion_df = self.get_market_data(market_request)

        # Make sure the trades/orders are within the market data (for the purposes of the reporting spot)
        # we don't need to consider the length of the order, JUST the starting point
        trade_df = self.strip_trade_order_data_to_market(
            trade_df, market_conversion_df, consider_order_length=False)

        reporting_spot = None

        # need to check whether we actually have any trade data/market data
        if trade_df is not None and market_conversion_df is not None:
            if not (trade_df.empty) and not (market_conversion_df.empty):

                try:
                    reporting_spot = \
                        self._time_series_ops.vlookup_style_data_frame(trade_df.index, market_conversion_df, 'mid')[
                            0]

                except:
                    logger.error(
                        "Reporting spot is missing for this trade data sample!"
                    )

                if reporting_spot is None:
                    market_start_finish = "No market data in this sample. "

                    if market_conversion_df is not None:
                        market_start_finish = "Market data is between " + str(
                            market_conversion_df.index[0]) + " - " + str(
                                market_conversion_df.index[-1]) + ". "

                    logger.warning(market_start_finish)
                    logger.warning("Trade data is between " +
                                   str(trade_df.index[0]) + " - " +
                                   str(trade_df.index[-1]) + ".")

                    logger.warning(
                        "Couldn't get spot data to convert notionals currency. Hence not returning trading data."
                    )

        return reporting_spot, trade_df
Esempio n. 8
0
def test_write_market_data_db():
    """Tests we can write market data to KDB/Influxdb/PyStore
    """

    database_source_list, test_harness_market_data_table_list, test_harness_data_store_list = _get_db_market_database_source(
    )

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    for i in range(0, len(database_source_list)):

        database_source = database_source_list[i]
        test_harness_market_data_table = test_harness_market_data_table_list[i]
        test_harness_data_store = test_harness_data_store_list[i]

        ### Test we can read data from CSV and dump to InfluxDB/KDB/PyStore (and when read back it matches CSV)
        db_start_date = '01 Jan 2016'
        db_finish_date = pd.Timestamp(datetime.datetime.utcnow())

        replace_append = ['replace', 'append']

        database_source_csv = DatabaseSourceCSV(
            market_data_database_csv=csv_market_data_store)

        market_df_csv = database_source_csv.fetch_market_data(
            start_date=db_start_date,
            finish_date=db_finish_date,
            ticker=ticker)

        # Check first when replacing full table and then appending (will still replace ticker though)
        for i in replace_append:

            database_source.convert_csv_to_table(
                csv_market_data_store,
                ticker,
                test_harness_market_data_table,
                if_exists_table=i,
                if_exists_ticker='replace',
                market_trade_data='market',
                remove_duplicates=False)

            market_request = MarketRequest(
                start_date=db_start_date,
                finish_date=db_finish_date,
                ticker=ticker,
                data_store=test_harness_data_store,
                market_data_database_table=test_harness_market_data_table)

            market_df_load = market_loader.get_market_data(
                market_request=market_request)

            diff_df = market_df_csv['mid'] - market_df_load['mid']

            assert all(diff_df < eps)
Esempio n. 9
0
def test_append_market_data_arctic():
    """Tests we can append market data to arctic (we will have already written data to the test harness database)
    """
    if not (run_arctic_tests): return

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    ### Test we can append (non-overlapping) data to Arctic
    arctic_start_date = '01 Jan 2016'; arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    # use this market request later when reading back from Arctic
    market_request = MarketRequest(start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker,
                                   data_store=test_harness_arctic_market_data_store,
                                   market_data_database_table=test_harness_arctic_market_data_table)

    # load data from CSV for comparison later
    database_source_csv = DatabaseSourceCSV(market_data_database_csv=csv_market_data_store)

    market_df_csv = database_source_csv.fetch_market_data(
        start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker)

    market_df_list = TimeSeriesOps().split_array_chunks(market_df_csv, chunks=2)

    for a in arctic_lib_type:

        database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a)

        market_df_lower = market_df_list[0];
        market_df_higher = market_df_list[1]

        database_source.append_market_data(market_df_lower, ticker, table_name=test_harness_arctic_market_data_table,
                                           if_exists_table='replace', if_exists_ticker='replace', remove_duplicates=False)

        overlap_error = False

        ## Try to append overlapping data (this will fail!)
        try:
            database_source.append_market_data(market_df_lower, ticker,
                                               table_name=test_harness_arctic_market_data_table,
                                               if_exists_table='append', if_exists_ticker='append', remove_duplicates=False)
        except ErrorWritingOverlapDataException as e:
            overlap_error = True

        assert overlap_error

        # Append non-overlapping data which follows (writing overlapping data into Arctic will mess up the datastore!)
        database_source.append_market_data(market_df_higher, ticker, table_name=test_harness_arctic_market_data_table,
                                           if_exists_table='append', if_exists_ticker='append', remove_duplicates=False)

        market_df_all_read_back = market_loader.get_market_data(market_request=market_request)

        assert all(market_df_all_read_back['mid'] - market_df_csv['mid'] < eps)
Esempio n. 10
0
def test_append_market_data_db():
    """Tests we can append market data to KDB/InfluxDB.
    """
    database_source_list, test_harness_market_data_table_list, test_harness_data_store_list = _get_db_database_source()

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    for i in range(0, len(database_source_list)):

        database_source = database_source_list[i]
        test_harness_market_data_table = test_harness_market_data_table_list[i]
        test_harness_data_store = test_harness_data_store_list[i]

        ### Test we can append (non-overlapping) data to KDB/InfluxDB
        db_start_date = '01 Jan 2016'; db_finish_date = pd.Timestamp(datetime.datetime.utcnow())

        # TODO
        market_request = MarketRequest(start_date=db_start_date, finish_date=db_finish_date, ticker=ticker,
                                       data_store=test_harness_data_store, market_data_database_table=test_harness_market_data_table)

        market_df_load = market_loader.get_market_data(market_request=market_request)

        market_df_list = TimeSeriesOps().split_array_chunks(market_df_load, chunks=2)

        market_df_lower = market_df_list[0];
        market_df_higher = market_df_list[1]

        database_source.append_market_data(market_df_lower, ticker, table_name=test_harness_market_data_table,
                                           if_exists_table='replace', if_exists_ticker='replace', remove_duplicates=False)

        overlap_error = False

        ## try to append overlapping data (this will fail!)
        try:
            database_source.append_market_data(market_df_lower, ticker,
                                               table_name=test_harness_market_data_table,
                                               if_exists_table='append', if_exists_ticker='append', remove_duplicates=False)
        except ErrorWritingOverlapDataException as e:
            overlap_error = True

        assert overlap_error

        # append non-overlapping data which follows (writing overlapping data can end up with duplicated values - although
        # KDB/InfluxDB will allow this)
        database_source.append_market_data(market_df_higher, ticker, table_name=test_harness_market_data_table,
                                           if_exists_table='append', if_exists_ticker='append', remove_duplicates=False)

        market_df_all_read_back = market_loader.get_market_data(market_request=market_request)

        assert all(market_df_all_read_back['mid'] - market_df_load['mid'] < eps)
Esempio n. 11
0
def test_write_multiple_wildcard_market_data_csvs_arctic():
    """Tests we can write sequential market data CSVs (or HDF5) whose path has been specified by a wildcard (eg. EURUSD*.csv).
    It is assumed that the CSVs are in chronological orders, from their filenames.
    """
    if not (run_arctic_tests): return

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    arctic_start_date = '01 Jan 2016'
    arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    for a in arctic_lib_type:
        database_source = DatabaseSourceArctic(postfix='testharness',
                                               arctic_lib_type=a)

        ### Read CSV data which is sorted ascending (default!)
        database_source.convert_csv_to_table(
            csv_market_data_store,
            ticker,
            test_harness_arctic_market_data_table,
            if_exists_table='replace',
            if_exists_ticker='replace',
            market_trade_data='market',
            csv_read_chunksize=10**6,
            remove_duplicates=False)

        database_source_csv = DatabaseSourceCSV(
            market_data_database_csv=csv_market_data_store)

        market_df_csv = database_source_csv.fetch_market_data(
            start_date=arctic_start_date,
            finish_date=arctic_finish_date,
            ticker=ticker)

        # Prepare the CSV folder first
        csv_folder = os.path.join(constants.test_data_harness_folder,
                                  'csv_arctic_mult')

        # Empty the CSV test harness folder, where we shall dump the mini CSVs
        UtilFunc().forcibly_create_empty_folder(csv_folder)

        # Split the CSV file into several mini CSV files (and also HDF5 files)
        market_df_list = TimeSeriesOps().split_array_chunks(market_df_csv,
                                                            chunks=3)

        chunk_no = 0

        for m in market_df_list:
            m.to_csv(
                os.path.join(csv_folder, "EURUSD" + str(chunk_no) + '.csv'))
            UtilFunc().write_dataframe_to_binary(
                m,
                os.path.join(csv_folder,
                             "EURUSD" + str(chunk_no) + '.parquet'),
                format='parquet')

            chunk_no = chunk_no + 1

        file_ext = ['csv', 'parquet']

        for f in file_ext:
            ### Read CSV data from the mini CSVs (using wildcard char) and dump to Arctic
            database_source.convert_csv_to_table(
                os.path.join(csv_folder, "EURUSD*." + f),
                ticker,
                test_harness_arctic_market_data_table,
                if_exists_table='append',
                if_exists_ticker='replace',
                market_trade_data='market',
                csv_read_chunksize=10**6,
                remove_duplicates=False)

            market_request = MarketRequest(
                start_date=arctic_start_date,
                finish_date=arctic_finish_date,
                ticker=ticker,
                data_store=database_source,
                market_data_database_table=test_harness_arctic_market_data_table
            )

            # Read back from Arctic
            market_df_load = market_loader.get_market_data(
                market_request=market_request)

            # Compare reading directly from the original large CSV vs. reading back from arctic (which was dumped from split CSVs)
            diff_df = abs(market_df_load['mid'] - market_df_csv['mid'])

            outside_bounds = diff_df[diff_df >= eps]

            assert len(outside_bounds) == 0
Esempio n. 12
0
    def get_market_data(self, market_request):
        """Gets market data for a particular ticker. When we ask for non-standard FX crosses, only the mid-field is
        returned (calculated as a cross rate). We do not give bid/ask quotes for calculated non-standard _tickers, as these
        can difficult to estimate.

        Parameters
        ----------
        market_request : MarketRequest
            The type of market data to get

        Returns
        -------
        DataFrame
        """
        logger = LoggerManager.getLogger(__name__)

        if isinstance(market_request, TCARequest):
            market_request = MarketRequest(market_request=market_request)

        old_ticker = market_request.ticker

        if market_request.asset_class == 'fx':
            # Check if we can get ticker directly or need to create synthetic cross rates
            ticker = self._fx_conv.correct_notation(market_request.ticker)
        else:
            # If not FX we don't have to invert
            ticker = old_ticker

        # If ticker is in the correct convention is in crosses where we collect data (typically this will be the USD
        # crosses, also some liquid non-USD pairs like EURJPY)

        # available_tickers = []

        if isinstance(market_request.data_store, DatabaseSource):
            # TODO improve ticker check here!
            available_tickers = [ticker]
        elif 'csv' in market_request.data_store or 'h5' in market_request.data_store or 'gzip' in market_request.data_store \
            or 'parquet' in market_request.data_store or isinstance(market_request.data_store, pd.DataFrame) :

            # For CSV (or H5) we don't have much choice, and could differ between CSV files (if CSV has 'ticker' field, will
            # match on that)
            available_tickers = [ticker]
        elif market_request.data_store in constants.market_data_tickers:
            available_tickers = self._util_func.dict_key_list(
                constants.market_data_tickers[
                    market_request.data_store].keys())

        else:
            err_msg = 'Ticker ' + str(
                ticker
            ) + " doesn't seem available in the data source " + market_request.data_store

            logger.error(err_msg)

            raise Exception(err_msg)

        if ticker in available_tickers:

            # In the correct convention or is not FX
            if ticker == old_ticker:
                market_df = self._get_correct_convention_market_data(
                    market_request)

            # Otherwise need to flip to the correct convention (only will return 'mid')
            else:
                market_request_flipped = MarketRequest(
                    market_request=market_request)
                market_request_flipped.ticker = ticker

                market_df = self._invert_quoting_market(
                    self._get_correct_convention_market_data(
                        market_request_flipped))

                if 'ticker' in market_df.columns:
                    market_df['ticker'] = old_ticker
        else:
            if market_request.asset_class == 'fx' and market_request.instrument == 'spot':
                # Otherwise we need to get both legs
                # eg. for NZDCAD, we shall download NZDUSD and USDCAD => multiply them to get NZDCAD

                # get the USD crosses for each leg and then multiply
                market_request_base = MarketRequest(
                    market_request=market_request)
                market_request_terms = MarketRequest(
                    market_request=market_request)

                market_request_base.ticker = old_ticker[0:3] + 'USD'
                market_request_terms.ticker = 'USD' + old_ticker[3:7]

                tickers_exist = self._fx_conv.currency_pair_in_list(
                        self._fx_conv.correct_notation(market_request_base.ticker), available_tickers) and \
                        self._fx_conv.currency_pair_in_list(
                            self._fx_conv.correct_notation(market_request_terms.ticker), available_tickers)

                # If both USD _tickers don't exist try computing via EUR _tickers? (eg. USDSEK from EURUSD & EURSEK)
                if not (tickers_exist):
                    market_request_base.ticker = old_ticker[0:3] + 'EUR'
                    market_request_terms.ticker = 'EUR' + old_ticker[3:7]

                    tickers_exist = self._fx_conv.currency_pair_in_list(
                        self._fx_conv.correct_notation(market_request_base.ticker), available_tickers) and \
                                    self._fx_conv.currency_pair_in_list(
                                        self._fx_conv.correct_notation(market_request_terms.ticker), available_tickers)

                # Check if that currency (in the CORRECT convention) is in the available _tickers
                # we will typically not collect market data for currencies in their wrong convention
                if tickers_exist:

                    fields_try = ['bid', 'ask', 'mid']

                    market_base_df = self.get_market_data(market_request_base)
                    market_terms_df = self.get_market_data(
                        market_request_terms)

                    market_has_data = False

                    if market_base_df is not None and market_terms_df is not None:
                        if not (market_base_df.empty) and not (
                                market_terms_df.empty):
                            market_has_data = True

                    # If there's no data in either DataFrame, don't attempt to calculate anything
                    if not (market_has_data):
                        return pd.DataFrame()

                    fields = []

                    for f in fields_try:
                        if f in market_base_df.columns and f in market_terms_df.columns:
                            fields.append(f)

                    # Only attempt to calculate if the fields exist
                    if len(fields) > 0:
                        # Remove any other columns (eg. with ticker name etc.)
                        market_base_df = market_base_df[fields]
                        market_terms_df = market_terms_df[fields]

                        # Need to align series to multiply (and then fill down points which don't match)
                        # can't use interpolation, given that would use FUTURE data
                        market_base_df, market_terms_df = market_base_df.align(
                            market_terms_df, join="outer")
                        market_base_df = market_base_df.fillna(method='ffill')
                        market_terms_df = market_terms_df.fillna(
                            method='ffill')

                        market_df = pd.DataFrame(data=market_base_df.values *
                                                 market_terms_df.values,
                                                 columns=fields,
                                                 index=market_base_df.index)

                        # Values at the start of the series MIGHT be nan, so need to ignore those
                        market_df = market_df.dropna(subset=['mid'])

                        if 'ticker' in market_df.columns:
                            market_df['ticker'] = old_ticker
                    else:
                        return None

                else:
                    # Otherwise couldn't compute either from the USD legs or EUR legs
                    logger.warning("Couldn't find market data for ticker: " +
                                   str(ticker))

                    return None
            else:
                # Otherwise couldn't find the non-FX ticker
                logger.warning("Couldn't find market data for ticker: " +
                               str(ticker))

                return None

        return market_df
Esempio n. 13
0
 def _get_output_data_source(self):
     return Mediator.get_database_source_picker().get_database_source(
         MarketRequest(data_store=self.data_store))
Esempio n. 14
0
    def create_test_trade_order(self,
                                ticker,
                                start_date='01 Jan 2016',
                                finish_date='01 May 2018',
                                order_min_size=0.5 * constants.MILLION,
                                order_max_size=20.0 * constants.MILLION,
                                number_of_orders_min_per_year=252 * 20,
                                number_of_orders_max_per_year=252 * 200):
        """Create a randomised list of orders & trade using indicative market data as a source (and perturbing the
        execution prices, within various constraints, such as the approximate size of orders trades, the orders per _year

        Parameters
        ----------
        ticker : str
            Ticker

        start_date : str
            Start date of the orders

        finish_date : str
            Finish date of the orders

        order_min_size : float
            Minimum size of orders

        order_max_size : float
            Maximum size of orders

        number_of_orders_min_per_year : int
            Minimum orders per _year

        number_of_orders_max_per_year : int
            Maximum orders per _year

        Returns
        -------
        DataFrame
        """
        logger = LoggerManager.getLogger(__name__)

        if isinstance(ticker, str):
            ticker = [ticker]

        order_list = []
        trade_list = []

        start_date = self.time_series_ops.date_parse(start_date,
                                                     assume_utc=True)
        finish_date = self.time_series_ops.date_parse(finish_date,
                                                      assume_utc=True)
        util_func = UtilFunc()

        # Make this parallel? but may have memory issues
        for tick in ticker:

            logger.info("Loading market data for " + tick)

            # split into yearly chunks (otherwise can run out of memory easily)
            date_list = util_func.split_date_single_list(
                start_date,
                finish_date,
                split_size='yearly',
                add_partial_period_start_finish_dates=True)

            # TODO do in a batch fashion
            for i in range(0, len(date_list) - 1):
                df = self._tca_market.get_market_data(
                    MarketRequest(start_date=date_list[i],
                                  finish_date=date_list[i + 1],
                                  ticker=tick,
                                  data_store=self._market_data_source))

                # self.database_source_market.fetch_market_data(start_date = start_date, finish_date = finish_date, ticker = tick)

                # Need to make sure there's sufficient market data!
                if df is not None:
                    if len(df.index) >= 2:
                        # Get the percentage of the _year represented by the difference between the start and finish dates
                        year_perc = float(
                            (df.index[-1] - df.index[0]).seconds /
                            (24.0 * 60.0 * 60.0)) / 365.0

                        logger.info("Constructing randomised trades for " +
                                    tick)

                        number_of_orders_min = int(
                            year_perc * number_of_orders_min_per_year)
                        number_of_orders_max = int(
                            year_perc * number_of_orders_max_per_year)

                        # Split up the data frame into equally sized chunks
                        df_orders = self._derive_order_no(
                            self._strip_columns(df, tick),
                            number_of_orders_min, number_of_orders_max)

                        # Don't want a memory leak, so delete this as soon possible from memory!
                        del df

                        # order_counter = 0

                        logger.info("Now beginning order construction for " +
                                    tick)

                        # For each order create randomised associated trades
                        # group together all the trades per day as orders
                        for df_order in df_orders:

                            # Set duration of the grandparent order (find randomised start/finish time)
                            # somewhere between 0-25% for start, and 75% to 100% for end point
                            df_order = self.rand_time_series.randomly_truncate_data_frame_within_bounds(
                                df_order, start_perc=0.25, finish_perc=0.75)

                            logger.debug("Creating order between " +
                                         str(df_order.index[0]) + " - " +
                                         str(df_order.index[-1]))

                            # Assume all orders/trades are in the same direction (which is randomly chosen)
                            buy_sell = randint(0, 1)

                            # Sell trades
                            if buy_sell == 0:
                                side_no = -1
                                side = 'bid'

                            # Buy trades
                            else:
                                side_no = 1
                                side = 'ask'

                            magnitude = 10000.0 * 2

                            if tick == 'USDJPY': magnitude = 100.0 * 2.0

                            if randint(0, 100) > 97:
                                new_tick = tick[3:6] + tick[0:3]

                                if 'ticker' in df_order.columns:
                                    df_order['ticker'] = new_tick

                                if 'bid' in df_order.columns and 'ask' in df_order.columns:
                                    ask = 1.0 / df_order['bid']
                                    bid = 1.0 / df_order['ask']

                                    df_order['bid'] = bid
                                    df_order['ask'] = ask

                                df_order['mid'] = 1.0 / df_order['mid']
                            else:
                                new_tick = tick

                            # Get 'bid' for sells, and 'ask' for buys
                            df_order['trade_value'] = df_order[side]

                            # We want to simulate the executions by perturbing the buys randomly
                            df_order = self.rand_time_series.randomly_perturb_column(
                                df_order,
                                column='trade_value',
                                magnitude=magnitude)

                            # Assume notional is in base currency in vast majority of cases
                            if randint(0, 100) > 97:
                                notional_currency = new_tick[3:6]
                            else:
                                notional_currency = new_tick[0:3]

                            notional_multiplier = 1.0

                            if notional_currency == 'JPY':
                                notional_multiplier = 100.0

                            # Randomly choose a realistic order notional
                            # This will later be subdivided into trade notional
                            order_notional = randint(
                                order_min_size * notional_multiplier,
                                order_max_size * notional_multiplier)

                            order_additional_attributes = {
                                'broker_id':
                                constants.test_brokers_dictionary['All'],
                                'broker_sub_id':
                                constants.test_sub_brokers_dictionary['All'],
                                'algo_id':
                                constants.test_algos_dictionary['All'],
                                'algo_settings':
                                'default',
                            }

                            # Construct an order and add it to list
                            ind_order = self._construct_order(
                                df_order,
                                order_type='order',
                                notional=order_notional,
                                notional_currency=notional_currency,
                                side=side_no,
                                tick=new_tick,
                                additional_attributes=
                                order_additional_attributes)

                            order_list.append(ind_order)

                            trade_additional_attributes = self.grab_attributes_from_trade_order(
                                ind_order, [
                                    'broker_id', 'broker_sub_id', 'algo_id',
                                    'algo_settings'
                                ])

                            # Now create all the broker messages for the order

                            # These will consist firstly of placement messages
                            # then potentionally cancels, cancel/replace and in most cases we randomly assign trade fills
                            trade_list = self._create_trades_from_order(
                                trade_list=trade_list,
                                df_order=df_order,
                                tick=new_tick,
                                ind_order=ind_order,
                                side_no=side_no,
                                order_notional=order_notional,
                                notional_currency=notional_currency,
                                additional_attributes=
                                trade_additional_attributes)

                            # order_counter = order_counter + 1

        # Aggregate all the lists into DataFrames (setting 'date' as the index)

        # For the trade dataframe also drop the 'index' column which was previous used to ensure that fills, were after placements
        trade_order_dict = {
            'trade_df':
            self.time_series_ops.aggregate_dict_to_dataframe(
                trade_list, 'date', 'index'),
            'order_df':
            self.time_series_ops.aggregate_dict_to_dataframe(
                order_list, 'date')
        }

        return trade_order_dict
constants = Constants()

if __name__ == '__main__':
    # 'arctic' or 'pystore'
    database_dialect = 'arctic'

    # 'dukascopy' or 'ncfx'
    data_vendor = 'dukascopy'

    ticker = 'EURUSD'

    # Warning for high frequency data file sizes might be very big, so you may need to reduce this!
    market_request = MarketRequest(start_date='04 Jan 2016',
                                   finish_date='05 Jan 2016',
                                   ticker=ticker,
                                   data_store=database_dialect + '-' +
                                   data_vendor)

    tca_market_trade_loader = TCAMarketTradeLoaderImpl()

    df = tca_market_trade_loader.get_market_data(market_request)

    # Grab a Plotly figure of the data
    fig = PlotRender().plot_timeline(df, title=ticker)

    # Generate HTML file of Plotly figure
    Chart(engine='plotly').plot(fig, style=Style(html_file_output='test.html'))

    print(df)