def test_write_market_data_arctic():
    """Tests we can write market data to Arctic
    """
    if not (run_arctic_tests): return

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    ### Test we can read data from CSV and dump to Arctic (and when read back it matches CSV)
    db_start_date = '01 Jan 2016'
    db_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    replace_append = ['replace', 'append']

    # Check first when replacing full table and then appending
    for a in arctic_lib_type:
        for i in replace_append:

            database_source = DatabaseSourceArctic(postfix='testharness',
                                                   arctic_lib_type=a)

            # Write CSV to Arctic
            database_source.convert_csv_to_table(
                csv_market_data_store,
                ticker,
                test_harness_arctic_market_data_table,
                if_exists_table=i,
                if_exists_ticker='replace',
                market_trade_data='market',
                remove_duplicates=False)

            # Fetch data directly from CSV
            database_source_csv = DatabaseSourceCSV(
                market_data_database_csv=csv_market_data_store)

            market_df_csv = database_source_csv.fetch_market_data(
                start_date=db_start_date,
                finish_date=db_finish_date,
                ticker=ticker)

            # Read back data from Arctic and compare with CSV
            market_request = MarketRequest(
                start_date=db_start_date,
                finish_date=db_finish_date,
                ticker=ticker,
                data_store=
                database_source,  # test_harness_arctic_market_data_store,
                market_data_database_table=test_harness_arctic_market_data_table
            )

            market_df_load = market_loader.get_market_data(
                market_request=market_request)

            diff_df = market_df_csv['mid'] - market_df_load['mid']

            diff_df.to_csv('test' + i + '.csv')
            assert all(diff_df < eps)
Exemple #2
0
def test_write_market_data_db():
    """Tests we can write market data to KDB/Influxdb/PyStore
    """

    database_source_list, test_harness_market_data_table_list, test_harness_data_store_list = _get_db_market_database_source(
    )

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    for i in range(0, len(database_source_list)):

        database_source = database_source_list[i]
        test_harness_market_data_table = test_harness_market_data_table_list[i]
        test_harness_data_store = test_harness_data_store_list[i]

        ### Test we can read data from CSV and dump to InfluxDB/KDB/PyStore (and when read back it matches CSV)
        db_start_date = '01 Jan 2016'
        db_finish_date = pd.Timestamp(datetime.datetime.utcnow())

        replace_append = ['replace', 'append']

        database_source_csv = DatabaseSourceCSV(
            market_data_database_csv=csv_market_data_store)

        market_df_csv = database_source_csv.fetch_market_data(
            start_date=db_start_date,
            finish_date=db_finish_date,
            ticker=ticker)

        # Check first when replacing full table and then appending (will still replace ticker though)
        for i in replace_append:

            database_source.convert_csv_to_table(
                csv_market_data_store,
                ticker,
                test_harness_market_data_table,
                if_exists_table=i,
                if_exists_ticker='replace',
                market_trade_data='market',
                remove_duplicates=False)

            market_request = MarketRequest(
                start_date=db_start_date,
                finish_date=db_finish_date,
                ticker=ticker,
                data_store=test_harness_data_store,
                market_data_database_table=test_harness_market_data_table)

            market_df_load = market_loader.get_market_data(
                market_request=market_request)

            diff_df = market_df_csv['mid'] - market_df_load['mid']

            assert all(diff_df < eps)
Exemple #3
0
def test_append_market_data_arctic():
    """Tests we can append market data to arctic (we will have already written data to the test harness database)
    """
    if not (run_arctic_tests): return

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    ### Test we can append (non-overlapping) data to Arctic
    arctic_start_date = '01 Jan 2016'; arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    # use this market request later when reading back from Arctic
    market_request = MarketRequest(start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker,
                                   data_store=test_harness_arctic_market_data_store,
                                   market_data_database_table=test_harness_arctic_market_data_table)

    # load data from CSV for comparison later
    database_source_csv = DatabaseSourceCSV(market_data_database_csv=csv_market_data_store)

    market_df_csv = database_source_csv.fetch_market_data(
        start_date=arctic_start_date, finish_date=arctic_finish_date, ticker=ticker)

    market_df_list = TimeSeriesOps().split_array_chunks(market_df_csv, chunks=2)

    for a in arctic_lib_type:

        database_source = DatabaseSourceArctic(postfix='testharness', arctic_lib_type=a)

        market_df_lower = market_df_list[0];
        market_df_higher = market_df_list[1]

        database_source.append_market_data(market_df_lower, ticker, table_name=test_harness_arctic_market_data_table,
                                           if_exists_table='replace', if_exists_ticker='replace', remove_duplicates=False)

        overlap_error = False

        ## Try to append overlapping data (this will fail!)
        try:
            database_source.append_market_data(market_df_lower, ticker,
                                               table_name=test_harness_arctic_market_data_table,
                                               if_exists_table='append', if_exists_ticker='append', remove_duplicates=False)
        except ErrorWritingOverlapDataException as e:
            overlap_error = True

        assert overlap_error

        # Append non-overlapping data which follows (writing overlapping data into Arctic will mess up the datastore!)
        database_source.append_market_data(market_df_higher, ticker, table_name=test_harness_arctic_market_data_table,
                                           if_exists_table='append', if_exists_ticker='append', remove_duplicates=False)

        market_df_all_read_back = market_loader.get_market_data(market_request=market_request)

        assert all(market_df_all_read_back['mid'] - market_df_csv['mid'] < eps)
Exemple #4
0
def test_write_multiple_wildcard_market_data_csvs_arctic():
    """Tests we can write sequential market data CSVs (or HDF5) whose path has been specified by a wildcard (eg. EURUSD*.csv).
    It is assumed that the CSVs are in chronological orders, from their filenames.
    """
    if not (run_arctic_tests): return

    market_loader = Mediator.get_tca_market_trade_loader(version=tcapy_version)

    arctic_start_date = '01 Jan 2016'
    arctic_finish_date = pd.Timestamp(datetime.datetime.utcnow())

    for a in arctic_lib_type:
        database_source = DatabaseSourceArctic(postfix='testharness',
                                               arctic_lib_type=a)

        ### Read CSV data which is sorted ascending (default!)
        database_source.convert_csv_to_table(
            csv_market_data_store,
            ticker,
            test_harness_arctic_market_data_table,
            if_exists_table='replace',
            if_exists_ticker='replace',
            market_trade_data='market',
            csv_read_chunksize=10**6,
            remove_duplicates=False)

        database_source_csv = DatabaseSourceCSV(
            market_data_database_csv=csv_market_data_store)

        market_df_csv = database_source_csv.fetch_market_data(
            start_date=arctic_start_date,
            finish_date=arctic_finish_date,
            ticker=ticker)

        # Prepare the CSV folder first
        csv_folder = os.path.join(constants.test_data_harness_folder,
                                  'csv_arctic_mult')

        # Empty the CSV test harness folder, where we shall dump the mini CSVs
        UtilFunc().forcibly_create_empty_folder(csv_folder)

        # Split the CSV file into several mini CSV files (and also HDF5 files)
        market_df_list = TimeSeriesOps().split_array_chunks(market_df_csv,
                                                            chunks=3)

        chunk_no = 0

        for m in market_df_list:
            m.to_csv(
                os.path.join(csv_folder, "EURUSD" + str(chunk_no) + '.csv'))
            UtilFunc().write_dataframe_to_binary(
                m,
                os.path.join(csv_folder,
                             "EURUSD" + str(chunk_no) + '.parquet'),
                format='parquet')

            chunk_no = chunk_no + 1

        file_ext = ['csv', 'parquet']

        for f in file_ext:
            ### Read CSV data from the mini CSVs (using wildcard char) and dump to Arctic
            database_source.convert_csv_to_table(
                os.path.join(csv_folder, "EURUSD*." + f),
                ticker,
                test_harness_arctic_market_data_table,
                if_exists_table='append',
                if_exists_ticker='replace',
                market_trade_data='market',
                csv_read_chunksize=10**6,
                remove_duplicates=False)

            market_request = MarketRequest(
                start_date=arctic_start_date,
                finish_date=arctic_finish_date,
                ticker=ticker,
                data_store=database_source,
                market_data_database_table=test_harness_arctic_market_data_table
            )

            # Read back from Arctic
            market_df_load = market_loader.get_market_data(
                market_request=market_request)

            # Compare reading directly from the original large CSV vs. reading back from arctic (which was dumped from split CSVs)
            diff_df = abs(market_df_load['mid'] - market_df_csv['mid'])

            outside_bounds = diff_df[diff_df >= eps]

            assert len(outside_bounds) == 0
Exemple #5
0
#
# Copyright 2020 Cuemacro
#
# See the License for the specific language governing permissions and limitations under the License.
#

import os

from chartpy import Chart
from tcapy.data.databasesource import DatabaseSourceCSVBinary

parquet_path = '/data/csv_dump/dukascopy/'

filename = [
    'EURUSD_dukascopy_2020-05-01_00_00_00.096000+00_002020-05-31_23_59_59.084000+00_00.parquet'
]

for f in filename:
    final_path = os.path.join(parquet_path, f)

    database_source = DatabaseSourceCSVBinary(
        market_data_database_csv=final_path)
    df = database_source.fetch_market_data()

    print(df)

    df_resample = df.resample('1min').last()

    Chart().plot(df_resample)