예제 #1
0
def test_write_market_data_arctic():
    """Tests we can write market data to Arctic
    """
    if not (run_arctic_tests): return

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    ### Test we can read data from CSV and dump to Arctic (and when read back it matches CSV)
    db_start_date = '01 Jan 2016'
    db_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    replace_append = ['replace', 'append']

    # Check first when replacing full table and then appending
    for a in arctic_lib_type:
        for i in replace_append:

            database_source = DatabaseSourceArctic(postfix='testharness',
                                                   arctic_lib_type=a)

            # Write CSV to Arctic
            database_source.convert_csv_to_table(
                csv_market_data_store,
                ticker,
                test_harness_arctic_market_data_table,
                if_exists_table=i,
                if_exists_ticker='replace',
                market_trade_data='market',
                remove_duplicates=False)

            # Fetch data directly from CSV
            database_source_csv = DatabaseSourceCSV(
                market_data_database_csv=csv_market_data_store)

            market_df_csv = database_source_csv.fetch_market_data(
                start_date=db_start_date,
                finish_date=db_finish_date,
                ticker=ticker)

            # Read back data from Arctic and compare with CSV
            market_request = MarketRequest(
                start_date=db_start_date,
                finish_date=db_finish_date,
                ticker=ticker,
                data_store=
                database_source,  # test_harness_arctic_market_data_store,
                market_data_database_table=test_harness_arctic_market_data_table
            )

            market_df_load = market_loader.get_market_data(
                market_request=market_request)

            diff_df = market_df_csv['mid'] - market_df_load['mid']

            diff_df.to_csv('test' + i + '.csv')
            assert all(diff_df < eps)
예제 #2
0
def test_fetch_market_data_db():
    """Tests that we can fetch data from Arctic/KDB/InfluxDB. Note you need to populate the database first before running this for
    the desired dates.
    """
    market_loader = Mediator.get_tca_market_trade_loader()

    market_data_store_list, market_data_database_table_list = _get_db_market_data_store(
    )

    for market_data_store, market_data_database_table in zip(
            market_data_store_list, market_data_database_table_list):
        market_request = MarketRequest(
            start_date=start_date,
            finish_date=finish_date,
            ticker=ticker,
            data_store=market_data_store,
            market_data_database_table=market_data_database_table)

        market_df = market_loader.get_market_data(market_request)

        try:
            market_df = Mediator.get_volatile_cache().get_dataframe_handle(
                market_df)
        except:
            pass

        assert not(market_df.empty) \
               and market_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \
               and market_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')

        market_request.start_date = invalid_start_date
        market_request.finish_date = invalid_finish_date

        market_empty_df = market_loader.get_market_data(market_request)

        try:
            market_empty_df = Mediator.get_volatile_cache(
            ).get_dataframe_handle(market_empty_df)
        except:
            pass

        assert market_empty_df.empty
예제 #3
0
def test_write_market_data_db():
    """Tests we can write market data to KDB/Influxdb/PyStore
    """
    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    for database_source, test_harness_market_data_table, test_harness_data_store in zip(
            *_get_db_market_database_source()):

        ### Test we can read data from CSV and dump to InfluxDB/KDB/PyStore (and when read back it matches CSV)
        db_start_date = '01 Jan 2016'
        db_finish_date = pd.Timestamp(datetime.datetime.utcnow())

        replace_append = ['replace', 'append']

        database_source_csv = DatabaseSourceCSV(
            market_data_database_csv=csv_market_data_store)

        market_df_csv = database_source_csv.fetch_market_data(
            start_date=db_start_date,
            finish_date=db_finish_date,
            ticker=ticker)

        # Check first when replacing full table and then appending (will still replace ticker though)
        for i in replace_append:

            database_source.convert_csv_to_table(
                csv_market_data_store,
                ticker,
                test_harness_market_data_table,
                if_exists_table=i,
                if_exists_ticker='replace',
                market_trade_data='market',
                remove_duplicates=False)

            market_request = MarketRequest(
                start_date=db_start_date,
                finish_date=db_finish_date,
                ticker=ticker,
                data_store=test_harness_data_store,
                market_data_database_table=test_harness_market_data_table)

            market_df_load = market_loader.get_market_data(
                market_request=market_request)

            diff_df = market_df_csv['mid'] - market_df_load['mid']

            assert all(diff_df < eps)
예제 #4
0
def write_mongo_db_atlas_arctic():
    """Tests we can write market data to Arctic/MongoDB on Atlas (cloud)
    """

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    ### Test we can read data from CSV and dump to Arctic (and when read back it matches CSV)
    db_start_date = '01 Jan 2016'
    db_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    database_source = DatabaseSourceArctic(
        postfix='testharness',
        arctic_lib_type='CHUNK_STORE',
        connection_string=arctic_connection_string)

    # Write CSV to Arctic
    database_source.convert_csv_to_table(csv_market_data_store,
                                         ticker,
                                         test_harness_arctic_market_data_table,
                                         if_exists_table='replace',
                                         if_exists_ticker='replace',
                                         market_trade_data='market',
                                         remove_duplicates=False)

    # Read back data from Arctic and compare with CSV
    market_request = MarketRequest(
        start_date=db_start_date,
        finish_date=db_finish_date,
        ticker=ticker,
        data_store=database_source,  # test_harness_arctic_market_data_store,
        market_data_database_table=test_harness_arctic_market_data_table)

    market_df_load = market_loader.get_market_data(
        market_request=market_request)

    print(market_df_load)
예제 #5
0
def test_fetch_market_trade_data_csv():
    """Tests downloading of market and trade/order data from CSV files
    """

    ### Get market data
    market_loader = Mediator.get_tca_market_trade_loader()

    market_request = MarketRequest(start_date=start_date,
                                   finish_date=finish_date,
                                   ticker=ticker,
                                   data_store=csv_market_data_store)

    market_df = market_loader.get_market_data(market_request)

    assert not(market_df.empty) \
           and market_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \
           and market_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')

    # For a high level trade data request, we need to use TCA request, because it usually involves some
    # market data download (we are assuming that the market data is being downloaded from our arctic database)
    # eg. for converting notionals to reporting currency
    tca_request = TCARequest(start_date=start_date,
                             finish_date=finish_date,
                             ticker=ticker,
                             trade_data_store='csv',
                             market_data_store=arctic_market_data_store,
                             trade_order_mapping=csv_trade_order_mapping)

    for t in trade_order_list:
        trade_order_df = market_loader.get_trade_order_data(tca_request, t)

        try:
            trade_order_df = Mediator.get_volatile_cache(
            ).get_dataframe_handle(trade_order_df)
        except:
            pass

        assert not trade_order_df.empty \
               and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \
               and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')

    ### Test using DataFactory and DatabaseSource
    from tcapy.data.datafactory import DataFactory

    data_factory = DataFactory()

    for t in trade_order_list:
        ### Test using DataFactory
        trade_request = TradeRequest(
            start_date=start_date,
            finish_date=finish_date,
            ticker=ticker,
            data_store='csv',
            trade_order_mapping=csv_trade_order_mapping,
            trade_order_type=t)

        trade_order_df = data_factory.fetch_table(trade_request)

        assert not trade_order_df.empty \
                          and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \
                          and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')

        ### Test using DatabaseSourceCSV
        from tcapy.data.databasesource import DatabaseSourceCSV

        database_source = DatabaseSourceCSV()

        trade_order_df = database_source.fetch_trade_order_data(
            start_date,
            finish_date,
            ticker,
            table_name=csv_trade_order_mapping[t])

        assert not trade_order_df.empty \
                             and trade_order_df.index[0] >= pd.Timestamp(start_date).tz_localize('utc') \
                             and trade_order_df.index[-1] <= pd.Timestamp(finish_date).tz_localize('utc')
예제 #6
0
def test_append_market_data_db():
    """Tests we can append market data to KDB/InfluxDB/PyStore.
    """

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    for database_source, test_harness_market_data_table, test_harness_data_store in zip(
            *_get_db_market_database_source()):

        ### Test we can append (non-overlapping) data to InfluxDB/KDB/PyStore
        db_start_date = '01 Jan 2016'
        db_finish_date = pd.Timestamp(datetime.datetime.utcnow())

        market_request = MarketRequest(
            start_date=db_start_date,
            finish_date=db_finish_date,
            ticker=ticker,
            data_store=test_harness_data_store,
            market_data_database_table=test_harness_market_data_table)

        market_df_load = market_loader.get_market_data(
            market_request=market_request)

        market_df_list = TimeSeriesOps().split_array_chunks(market_df_load,
                                                            chunks=2)

        market_df_lower = market_df_list[0]
        market_df_higher = market_df_list[1]

        database_source.append_market_data(
            market_df_lower,
            ticker,
            table_name=test_harness_market_data_table,
            if_exists_table='replace',
            if_exists_ticker='replace',
            remove_duplicates=False)

        overlap_error = False

        ## Try to append overlapping data (this will fail!)
        try:
            database_source.append_market_data(
                market_df_lower,
                ticker,
                table_name=test_harness_market_data_table,
                if_exists_table='append',
                if_exists_ticker='append',
                remove_duplicates=False)
        except ErrorWritingOverlapDataException as e:
            overlap_error = True

        assert overlap_error

        # Append non-overlapping data which follows (writing overlapping data can end up with duplicated values - although
        # KDB/InfluxDB/PyStore will allow this)
        database_source.append_market_data(
            market_df_higher,
            ticker,
            table_name=test_harness_market_data_table,
            if_exists_table='append',
            if_exists_ticker='append',
            remove_duplicates=False)

        market_df_all_read_back = market_loader.get_market_data(
            market_request=market_request)

        assert all(
            market_df_all_read_back['mid'] - market_df_load['mid'] < eps)
예제 #7
0
def test_write_multiple_wildcard_market_data_csvs_arctic():
    """Tests we can write sequential market data CSVs (or HDF5) whose path has been specified by a wildcard (eg. EURUSD*.csv).
    It is assumed that the CSVs are in chronological orders, from their filenames.
    """
    if not (run_arctic_tests): return

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    arctic_start_date = '01 Jan 2016'
    arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    for a in arctic_lib_type:
        database_source = DatabaseSourceArctic(postfix='testharness',
                                               arctic_lib_type=a)

        ### Read CSV data which is sorted ascending (default!)
        database_source.convert_csv_to_table(
            csv_market_data_store,
            ticker,
            test_harness_arctic_market_data_table,
            if_exists_table='replace',
            if_exists_ticker='replace',
            market_trade_data='market',
            csv_read_chunksize=10**6,
            remove_duplicates=False)

        database_source_csv = DatabaseSourceCSV(
            market_data_database_csv=csv_market_data_store)

        market_df_csv = database_source_csv.fetch_market_data(
            start_date=arctic_start_date,
            finish_date=arctic_finish_date,
            ticker=ticker)

        # Prepare the CSV folder first
        csv_folder = resource('csv_arctic_mult')

        # Empty the CSV test harness folder, where we shall dump the mini CSVs
        UtilFunc().forcibly_create_empty_folder(csv_folder)

        # Split the CSV file into several mini CSV files (and also HDF5 files)
        market_df_list = TimeSeriesOps().split_array_chunks(market_df_csv,
                                                            chunks=3)

        chunk_no = 0

        for m in market_df_list:
            m.to_csv(
                os.path.join(csv_folder, "EURUSD" + str(chunk_no) + '.csv'))
            UtilFunc().write_dataframe_to_binary(
                m,
                os.path.join(csv_folder,
                             "EURUSD" + str(chunk_no) + '.parquet'),
                format='parquet')

            chunk_no = chunk_no + 1

        file_ext = ['csv', 'parquet']

        for f in file_ext:
            ### Read CSV data from the mini CSVs (using wildcard char) and dump to Arctic
            database_source.convert_csv_to_table(
                os.path.join(csv_folder, "EURUSD*." + f),
                ticker,
                test_harness_arctic_market_data_table,
                if_exists_table='append',
                if_exists_ticker='replace',
                market_trade_data='market',
                csv_read_chunksize=10**6,
                remove_duplicates=False)

            market_request = MarketRequest(
                start_date=arctic_start_date,
                finish_date=arctic_finish_date,
                ticker=ticker,
                data_store=database_source,
                market_data_database_table=test_harness_arctic_market_data_table
            )

            # Read back from Arctic
            market_df_load = market_loader.get_market_data(
                market_request=market_request)

            # Compare reading directly from the original large CSV vs. reading back from arctic (which was dumped from split CSVs)
            diff_df = abs(market_df_load['mid'] - market_df_csv['mid'])

            outside_bounds = diff_df[diff_df >= eps]

            assert len(outside_bounds) == 0
예제 #8
0
def test_write_chunked_market_data_arctic():
    """For very large CSV files we might need to read them in chunks. tcapy supports this and also supports CSVs
    which are sorted in reverse (ie. descending). We need to enable chunking and reverse reading with flags.

    This tests whether chunked data is written correctly to Arctic, comparing it with that read from CSV directly
    """

    if not (run_arctic_tests): return

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    arctic_start_date = '01 Jan 2016'
    arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    # Load data from CSVs directly (for comparison later)
    market_df_csv_desc = DatabaseSourceCSV(
        market_data_database_csv=csv_reverse_market_data_store
    ).fetch_market_data(start_date=arctic_start_date,
                        finish_date=arctic_finish_date,
                        ticker=ticker)

    market_df_csv_asc = DatabaseSourceCSV(
        market_data_database_csv=csv_market_data_store).fetch_market_data(
            start_date=arctic_start_date,
            finish_date=arctic_finish_date,
            ticker=ticker)

    for a in arctic_lib_type:
        database_source = DatabaseSourceArctic(postfix='testharness',
                                               arctic_lib_type=a)

        ### Write CSV data to Arctic which is sorted ascending (default!)
        database_source.convert_csv_to_table(
            csv_market_data_store,
            ticker,
            test_harness_arctic_market_data_table,
            if_exists_table='replace',
            if_exists_ticker='replace',
            market_trade_data='market',
            csv_read_chunksize=100000,
            remove_duplicates=False)

        market_request = MarketRequest(
            start_date=arctic_start_date,
            finish_date=arctic_finish_date,
            ticker=ticker,
            data_store=database_source,
            market_data_database_table=test_harness_arctic_market_data_table)

        market_df_load = market_loader.get_market_data(
            market_request=market_request)

        # Compare reading directly from the CSV vs. reading back from arctic
        assert all(market_df_csv_asc['mid'] - market_df_load['mid'] < eps)

        ### Write CSV data to Arctic which is sorted descending
        database_source.convert_csv_to_table(
            csv_reverse_market_data_store,
            ticker,
            test_harness_arctic_market_data_table,
            if_exists_table='append',
            if_exists_ticker='replace',
            market_trade_data='market',
            csv_read_chunksize=100000,
            read_in_reverse=True,
            remove_duplicates=False)

        market_request = MarketRequest(
            start_date=arctic_start_date,
            finish_date=arctic_finish_date,
            ticker=ticker,
            data_store=database_source,
            market_data_database_table=test_harness_arctic_market_data_table)

        market_df_load = market_loader.get_market_data(
            market_request=market_request)

        # Compare reading directly from the CSV vs. reading back from arctic
        assert all(market_df_csv_desc['mid'] - market_df_load['mid'] < eps)
예제 #9
0
def test_append_market_data_arctic():
    """Tests we can append market data to arctic (we will have already written data to the test harness database)
    """
    if not (run_arctic_tests): return

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    ### Test we can append (non-overlapping) data to Arctic
    arctic_start_date = '01 Jan 2016'
    arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    # Load data from CSV for comparison later
    database_source_csv = DatabaseSourceCSV(
        market_data_database_csv=csv_market_data_store)

    market_df_csv = database_source_csv.fetch_market_data(
        start_date=arctic_start_date,
        finish_date=arctic_finish_date,
        ticker=ticker)

    market_df_list = TimeSeriesOps().split_array_chunks(market_df_csv,
                                                        chunks=2)

    for a in arctic_lib_type:

        database_source = DatabaseSourceArctic(postfix='testharness',
                                               arctic_lib_type=a)

        market_df_lower = market_df_list[0]
        market_df_higher = market_df_list[1]

        database_source.append_market_data(
            market_df_lower,
            ticker,
            table_name=test_harness_arctic_market_data_table,
            if_exists_table='replace',
            if_exists_ticker='replace',
            remove_duplicates=False)

        overlap_error = False

        ## Try to append overlapping data (this will fail!)
        try:
            database_source.append_market_data(
                market_df_lower,
                ticker,
                table_name=test_harness_arctic_market_data_table,
                if_exists_table='append',
                if_exists_ticker='append',
                remove_duplicates=False)
        except ErrorWritingOverlapDataException as e:
            overlap_error = True

        assert overlap_error

        # Append non-overlapping data which follows (writing overlapping data into Arctic will mess up the datastore!)
        database_source.append_market_data(
            market_df_higher,
            ticker,
            table_name=test_harness_arctic_market_data_table,
            if_exists_table='append',
            if_exists_ticker='append',
            remove_duplicates=False)

        # Use this market request later when reading back from Arctic
        market_request = MarketRequest(
            start_date=arctic_start_date,
            finish_date=arctic_finish_date,
            ticker=ticker,
            data_store=database_source,
            market_data_database_table=test_harness_arctic_market_data_table)

        market_df_all_read_back = market_loader.get_market_data(
            market_request=market_request)

        diff_df = abs(market_df_all_read_back['mid'] - market_df_csv['mid'])

        outside_bounds = diff_df[diff_df >= eps]

        assert len(outside_bounds) == 0