Exemple #1
0
def test_write_trade_data_ms_sql_server():
    """Tests that trade data can be read from CSV and dumped to MS SQL server
    """

    if not (run_ms_sql_server_tests): return

    database_source = DatabaseSourceMSSQLServer()

    for t in trade_order_list:
        # dump trade_df to SQL test harness database and overwrite
        database_source.convert_csv_to_table(csv_trade_order_mapping[t], None, sql_trade_order_mapping[t],
                                             database_name=test_harness_sql_server_trade_data_database_name,
                                             if_exists_table='replace', market_trade_data='trade')

        trade_order_df_sql = database_source.fetch_trade_order_data(
            start_date=start_date, finish_date=finish_date, ticker=ticker, table_name=sql_trade_order_mapping[t],
            database_name=test_harness_sql_server_trade_data_database_name)

        database_source_csv = DatabaseSourceCSV()

        trade_order_df_csv = database_source_csv.fetch_trade_order_data(
            start_date=start_date, finish_date=finish_date, ticker=ticker, table_name=csv_trade_order_mapping[t])

        comp_fields = ['executed_price', 'notional', 'side']

        # check that the data read back from MS SQL Server matches that from the original CSV
        for c in comp_fields:
            if c in trade_order_df_sql.columns and c in trade_order_df_csv.columns:
                exec_sql = trade_order_df_sql[c]#.dropna()
                exec_csv = trade_order_df_csv[c]#.dropna()

                exec_diff = exec_sql - exec_csv

                assert all(exec_diff < eps)
def test_write_market_data_arctic():
    """Tests we can write market data to Arctic
    """
    if not (run_arctic_tests): return

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    ### Test we can read data from CSV and dump to Arctic (and when read back it matches CSV)
    db_start_date = '01 Jan 2016'
    db_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    replace_append = ['replace', 'append']

    # Check first when replacing full table and then appending
    for a in arctic_lib_type:
        for i in replace_append:

            database_source = DatabaseSourceArctic(postfix='testharness',
                                                   arctic_lib_type=a)

            # Write CSV to Arctic
            database_source.convert_csv_to_table(
                csv_market_data_store,
                ticker,
                test_harness_arctic_market_data_table,
                if_exists_table=i,
                if_exists_ticker='replace',
                market_trade_data='market',
                remove_duplicates=False)

            # Fetch data directly from CSV
            database_source_csv = DatabaseSourceCSV(
                market_data_database_csv=csv_market_data_store)

            market_df_csv = database_source_csv.fetch_market_data(
                start_date=db_start_date,
                finish_date=db_finish_date,
                ticker=ticker)

            # Read back data from Arctic and compare with CSV
            market_request = MarketRequest(
                start_date=db_start_date,
                finish_date=db_finish_date,
                ticker=ticker,
                data_store=
                database_source,  # test_harness_arctic_market_data_store,
                market_data_database_table=test_harness_arctic_market_data_table
            )

            market_df_load = market_loader.get_market_data(
                market_request=market_request)

            diff_df = market_df_csv['mid'] - market_df_load['mid']

            diff_df.to_csv('test' + i + '.csv')
            assert all(diff_df < eps)
Exemple #3
0
def test_write_market_data_db():
    """Tests we can write market data to KDB/Influxdb/PyStore
    """

    database_source_list, test_harness_market_data_table_list, test_harness_data_store_list = _get_db_market_database_source(
    )

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    for i in range(0, len(database_source_list)):

        database_source = database_source_list[i]
        test_harness_market_data_table = test_harness_market_data_table_list[i]
        test_harness_data_store = test_harness_data_store_list[i]

        ### Test we can read data from CSV and dump to InfluxDB/KDB/PyStore (and when read back it matches CSV)
        db_start_date = '01 Jan 2016'
        db_finish_date = pd.Timestamp(datetime.datetime.utcnow())

        replace_append = ['replace', 'append']

        database_source_csv = DatabaseSourceCSV(
            market_data_database_csv=csv_market_data_store)

        market_df_csv = database_source_csv.fetch_market_data(
            start_date=db_start_date,
            finish_date=db_finish_date,
            ticker=ticker)

        # Check first when replacing full table and then appending (will still replace ticker though)
        for i in replace_append:

            database_source.convert_csv_to_table(
                csv_market_data_store,
                ticker,
                test_harness_market_data_table,
                if_exists_table=i,
                if_exists_ticker='replace',
                market_trade_data='market',
                remove_duplicates=False)

            market_request = MarketRequest(
                start_date=db_start_date,
                finish_date=db_finish_date,
                ticker=ticker,
                data_store=test_harness_data_store,
                market_data_database_table=test_harness_market_data_table)

            market_df_load = market_loader.get_market_data(
                market_request=market_request)

            diff_df = market_df_csv['mid'] - market_df_load['mid']

            assert all(diff_df < eps)
Exemple #4
0
def test_append_market_data_arctic():
    """Tests we can append market data to arctic (we will have already written data to the test harness database)
    """
    if not (run_arctic_tests): return

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    ### Test we can append (non-overlapping) data to Arctic
    arctic_start_date = '01 Jan 2016'; arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    # use this market request later when reading back from Arctic
    market_request = MarketRequest(start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker,
                                   data_store=test_harness_arctic_market_data_store,
                                   market_data_database_table=test_harness_arctic_market_data_table)

    # load data from CSV for comparison later
    database_source_csv = DatabaseSourceCSV(market_data_database_csv=csv_market_data_store)

    market_df_csv = database_source_csv.fetch_market_data(
        start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker)

    market_df_list = TimeSeriesOps().split_array_chunks(market_df_csv, chunks=2)

    for a in arctic_lib_type:

        database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a)

        market_df_lower = market_df_list[0];
        market_df_higher = market_df_list[1]

        database_source.append_market_data(market_df_lower, ticker, table_name=test_harness_arctic_market_data_table,
                                           if_exists_table='replace', if_exists_ticker='replace', remove_duplicates=False)

        overlap_error = False

        ## Try to append overlapping data (this will fail!)
        try:
            database_source.append_market_data(market_df_lower, ticker,
                                               table_name=test_harness_arctic_market_data_table,
                                               if_exists_table='append', if_exists_ticker='append', remove_duplicates=False)
        except ErrorWritingOverlapDataException as e:
            overlap_error = True

        assert overlap_error

        # Append non-overlapping data which follows (writing overlapping data into Arctic will mess up the datastore!)
        database_source.append_market_data(market_df_higher, ticker, table_name=test_harness_arctic_market_data_table,
                                           if_exists_table='append', if_exists_ticker='append', remove_duplicates=False)

        market_df_all_read_back = market_loader.get_market_data(market_request=market_request)

        assert all(market_df_all_read_back['mid'] - market_df_csv['mid'] < eps)
Exemple #5
0
def test_write_trade_data_sql():
    """Tests that trade data can be read from CSV and dumped to various SQL dialect
    """

    database_source_list, test_harness_trade_database_list, test_harness_data_store_list = _get_db_trade_database_source(
    )

    for i in range(0, len(database_source_list)):

        database_source = database_source_list[i]

        test_harness_trade_database = test_harness_trade_database_list[i]
        test_harness_data_store = test_harness_data_store_list[i]

        for t in trade_order_list:
            # Dump trade_df to SQL test harness database and overwrite
            database_source.convert_csv_to_table(
                csv_trade_order_mapping[t],
                None, (sql_trade_order_mapping[test_harness_data_store])[t],
                test_harness_trade_database,
                if_exists_table='replace',
                market_trade_data='trade')

            trade_order_df_sql = database_source.fetch_trade_order_data(
                start_date=start_date,
                finish_date=finish_date,
                ticker=ticker,
                table_name=sql_trade_order_mapping[test_harness_data_store][t],
                database_name=test_harness_trade_database)

            database_source_csv = DatabaseSourceCSV()

            trade_order_df_csv = database_source_csv.fetch_trade_order_data(
                start_date=start_date,
                finish_date=finish_date,
                ticker=ticker,
                table_name=csv_trade_order_mapping[t])

            comp_fields = ['executed_price', 'notional', 'side']

            # Check that the data read back from SQL database matches that from the original CSV
            for c in comp_fields:
                if c in trade_order_df_sql.columns and c in trade_order_df_csv.columns:
                    exec_sql = trade_order_df_sql[c]  #.dropna()
                    exec_csv = trade_order_df_csv[c]  #.dropna()

                    exec_diff = exec_sql - exec_csv

                    assert all(exec_diff < eps)
Exemple #6
0
def test_write_multiple_wildcard_market_data_csvs_arctic():
    """Tests we can write sequential market data CSVs (or HDF5) whose path has been specified by a wildcard (eg. EURUSD*.csv).
    It is assumed that the CSVs are in chronological orders, from their filenames.
    """
    if not (run_arctic_tests): return

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    arctic_start_date = '01 Jan 2016'
    arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    for a in arctic_lib_type:
        database_source = DatabaseSourceArctic(postfix='testharness',
                                               arctic_lib_type=a)

        ### Read CSV data which is sorted ascending (default!)
        database_source.convert_csv_to_table(
            csv_market_data_store,
            ticker,
            test_harness_arctic_market_data_table,
            if_exists_table='replace',
            if_exists_ticker='replace',
            market_trade_data='market',
            csv_read_chunksize=10**6,
            remove_duplicates=False)

        database_source_csv = DatabaseSourceCSV(
            market_data_database_csv=csv_market_data_store)

        market_df_csv = database_source_csv.fetch_market_data(
            start_date=arctic_start_date,
            finish_date=arctic_finish_date,
            ticker=ticker)

        # Prepare the CSV folder first
        csv_folder = os.path.join(constants.test_data_harness_folder,
                                  'csv_arctic_mult')

        # Empty the CSV test harness folder, where we shall dump the mini CSVs
        UtilFunc().forcibly_create_empty_folder(csv_folder)

        # Split the CSV file into several mini CSV files (and also HDF5 files)
        market_df_list = TimeSeriesOps().split_array_chunks(market_df_csv,
                                                            chunks=3)

        chunk_no = 0

        for m in market_df_list:
            m.to_csv(
                os.path.join(csv_folder, "EURUSD" + str(chunk_no) + '.csv'))
            UtilFunc().write_dataframe_to_binary(
                m,
                os.path.join(csv_folder,
                             "EURUSD" + str(chunk_no) + '.parquet'),
                format='parquet')

            chunk_no = chunk_no + 1

        file_ext = ['csv', 'parquet']

        for f in file_ext:
            ### Read CSV data from the mini CSVs (using wildcard char) and dump to Arctic
            database_source.convert_csv_to_table(
                os.path.join(csv_folder, "EURUSD*." + f),
                ticker,
                test_harness_arctic_market_data_table,
                if_exists_table='append',
                if_exists_ticker='replace',
                market_trade_data='market',
                csv_read_chunksize=10**6,
                remove_duplicates=False)

            market_request = MarketRequest(
                start_date=arctic_start_date,
                finish_date=arctic_finish_date,
                ticker=ticker,
                data_store=database_source,
                market_data_database_table=test_harness_arctic_market_data_table
            )

            # Read back from Arctic
            market_df_load = market_loader.get_market_data(
                market_request=market_request)

            # Compare reading directly from the original large CSV vs. reading back from arctic (which was dumped from split CSVs)
            diff_df = abs(market_df_load['mid'] - market_df_csv['mid'])

            outside_bounds = diff_df[diff_df >= eps]

            assert len(outside_bounds) == 0
Exemple #7
0
        def callback(*args):
            """Calculates the aggregated TCA computation when the "Calculate" button is clicked. Cached the results and
            then updates the status label when done.

            Parameters
            ----------
            ticker_val : str(list)
                tickers (eg. EURUSD, GBPUSD etc)

            venue_val : str(list)
                Trading venues

            start_date_val : str(list)
                Start date of TCA calculations

            finish_date_val : str(list)
                Finish date of TCA calculations

            reload_val : str
                Whether underlying market and trade data should be reloaded from dataframe or fetched from cache

            n_clicks : int
                Number of time button has been clicked

            Returns
            -------
            str
            """
            start = time.time()

            tag = tca_type + '-calculation-button'

            logger = LoggerManager.getLogger(__name__)
            logger.debug('Triggered click ' + tca_type)

            # old_clicks = self._session_manager.get_session_clicks(tag)

            # make sure none of the other charts are plotted till we have completed this!

            if tca_type == 'aggregated':
                uploadbox = args

                if uploadbox is not None:

                    if isinstance(uploadbox, tuple):
                        uploadbox = uploadbox[0]

                    # Assume that the user uploaded a binary CSV file
                    trade_df = DatabaseSourceCSVBinary(
                        trade_data_database_csv=uploadbox
                    ).fetch_trade_order_data()

                    data_frame_trade_order_mapping = OrderedDict([('trade_df',
                                                                   trade_df)])

                    start_date = trade_df.index[0]
                    finish_date = trade_df.index[-1]

                    ticker_val = FXConv().correct_unique_notation_list(
                        trade_df['ticker'].unique().tolist())

                    metric_val = 'slippage'

                    self._session_manager.set_session_flag('metric',
                                                           value=metric_val)
                    self._session_manager.set_session_flag(
                        'aggregated-visualization', True)

                    try:
                        #if True:

                        # clear the cache for the current user
                        self._glob_volatile_cache.clear_key_match(
                            self._session_manager.get_session_id())

                        results_form = [
                            # show the distribution of the selected metric for trades weighted by notional
                            # aggregated by ticker and then by venue
                            DistResultsForm(
                                trade_order_list=['trade_df'],
                                metric_name=metric_val,
                                aggregate_by_field=[
                                    'ticker', 'broker_id', 'venue'
                                ],
                                weighting_field=
                                'executed_notional_in_reporting_currency'),

                            # display the timeline of metrics average by day (and weighted by notional)
                            TimelineResultsForm(
                                trade_order_list=['trade_df'],
                                by_date='date',
                                metric_name=metric_val,
                                aggregation_metric='mean',
                                aggregate_by_field=['ticker'],
                                scalar=10000.0,
                                weighting_field=
                                'executed_notional_in_reporting_currency'),

                            # display a bar chart showing the average metric weighted by notional and aggregated by ticker
                            # venue
                            BarResultsForm(
                                trade_order_list=['trade_df'],
                                metric_name=metric_val,
                                aggregation_metric='mean',
                                aggregate_by_field=[
                                    'ticker', 'venue', 'broker_id'
                                ],
                                scalar=10000.0,
                                weighting_field=
                                'executed_notional_in_reporting_currency'),

                            # create a table the markout of every trade
                            TableResultsForm(
                                trade_order_list=['trade_df'],
                                metric_name='markout',
                                filter_by='all',
                                replace_text={
                                    'markout_': '',
                                    'executed_notional': 'exec not',
                                    'notional_currency': 'exec not cur'
                                },
                                keep_fields=[
                                    'executed_notional', 'side',
                                    'notional_currency'
                                ],
                                scalar={
                                    'all': 10000.0,
                                    'exclude': ['executed_notional', 'side']
                                },
                                round_figures_by={
                                    'all': 2,
                                    'executed_notional': 0,
                                    'side': 0
                                },
                                weighting_field='executed_notional')
                        ]

                        try:
                            #if True:
                            timeline_trade_df_metric_by_ticker = self.get_cached_computation_analysis(
                                key='timeline_trade_df_' + metric_val +
                                '_by_ticker',
                                tca_engine=self._tca_engine,
                                force_calculate=True,
                                tca_request=TCARequest(
                                    start_date=start_date,
                                    finish_date=finish_date,
                                    ticker=ticker_val,
                                    tca_type='aggregated',
                                    market_data_store='arctic-ncfx',
                                    trade_data_store='dataframe',
                                    trade_order_mapping=
                                    data_frame_trade_order_mapping,
                                    metric_calcs=[
                                        MetricSlippage(),
                                        MetricMarkout(
                                            trade_order_list=['trade_df'])
                                    ],
                                    results_form=results_form,
                                    dummy_market=True,
                                    use_multithreading=True))

                            calc_start = timeline_trade_df_metric_by_ticker.index[
                                0]
                            calc_end = timeline_trade_df_metric_by_ticker.index[
                                -1]

                            aggregated_title = self.create_status_msg_flags(
                                'aggregated', ticker_val, calc_start, calc_end)

                            logger.debug('Plotted aggregated summary plot!')

                            finish = time.time()

                        except Exception as e:
                            logger.exception(e)

                            return "Status: error - " + str(
                                e
                            ) + ". Check data exists for these dates?" + self.get_username_string(
                            )

                    except Exception as e:
                        logger.exception(e)

                        return 'Status: error - ' + str(
                            e
                        ) + ". Check data exists for these dates?" + self.get_username_string(
                        )

                    return 'Status: calculated ' + str(
                        round(finish - start, 3)
                    ) + "s for " + aggregated_title + self.get_username_string(
                    )

            raise dash.exceptions.PreventUpdate(
                "No data changed"
            )  # not very elegant but only way to prevent plots disappearing
Exemple #8
0
    def get(self):
        logger = LoggerManager.getLogger(__name__)

        if request.content_type == 'application/json':
            json_input = request.json

            if 'trade_df' in json_input.keys() and 'username' in json_input.keys() and 'password' in json_input.keys():
                username = json_input['username']

                # TODO check passwords
                password = json_input['password']

                logger.info("Received API request from user: "******"Failed to complete request for user: "******" - " + str(e))

                    return "Failed to complete request"

                logger.info("Completed API request from user: "******"Unsupported media type, only accepts JSON"
Exemple #9
0
#
# Copyright 2020 Cuemacro
#
# See the License for the specific language governing permissions and limitations under the License.
#

import os

from chartpy import Chart
from tcapy.data.databasesource import DatabaseSourceCSVBinary

parquet_path = '/data/csv_dump/dukascopy/'

filename = [
    'EURUSD_dukascopy_2020-05-01_00_00_00.096000+00_002020-05-31_23_59_59.084000+00_00.parquet'
]

for f in filename:
    final_path = os.path.join(parquet_path, f)

    database_source = DatabaseSourceCSVBinary(
        market_data_database_csv=final_path)
    df = database_source.fetch_market_data()

    print(df)

    df_resample = df.resample('1min').last()

    Chart().plot(df_resample)
def test_fetch_market_trade_data_csv():
    """Tests downloading of market and trade/order data from CSV files
    """

    ### Get market data
    market_loader = Mediator.get_tca_market_trade_loader()

    market_request = MarketRequest(start_date=start_date,
                                   finish_date=finish_date,
                                   ticker=ticker,
                                   data_store=csv_market_data_store)

    market_df = market_loader.get_market_data(market_request)

    assert not(market_df.empty) \
           and market_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \
           and market_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')

    # For a high level trade data request, we need to use TCA request, because it usually involves some
    # market data download (we are assuming that the market data is being downloaded from our arctic database)
    # eg. for converting notionals to reporting currency)
    tca_request = TCARequest(
        start_date=start_date,
        finish_date=finish_date,
        ticker=ticker,
        trade_data_store='csv',
        market_data_store=test_harness_arctic_market_data_store,
        trade_order_mapping=csv_trade_order_mapping,
        market_data_database_table=test_harness_arctic_market_data_table)

    for t in trade_order_list:
        trade_order_df = market_loader.get_trade_order_data(tca_request, t)

        try:
            trade_order_df = Mediator.get_volatile_cache(
            ).get_dataframe_handle(trade_order_df)
        except:
            pass

        assert not trade_order_df.empty \
               and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \
               and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')

    ### Test using DataFactory and DatabaseSource
    from tcapy.data.datafactory import DataFactory

    data_factory = DataFactory()

    for t in trade_order_list:
        ### Test using DataFactory
        trade_request = TradeRequest(
            start_date=start_date,
            finish_date=finish_date,
            ticker=ticker,
            data_store='csv',
            trade_order_mapping=csv_trade_order_mapping,
            trade_order_type=t)

        trade_order_df = data_factory.fetch_table(trade_request)

        assert not trade_order_df.empty \
                          and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \
                          and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')

        ### Test using DatabaseSourceCSV
        database_source = DatabaseSourceCSV()

        trade_order_df = database_source.fetch_trade_order_data(
            start_date,
            finish_date,
            ticker,
            table_name=csv_trade_order_mapping[t])

        assert not trade_order_df.empty \
                             and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \
                             and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')