Ejemplo n.º 1
0
def test_delete_market_data_arctic():
    """Tests we can delete a section of a data for a particular
    """
    if not (run_arctic_tests): return

    for a in arctic_lib_type:
        database_source = DatabaseSourceArctic(postfix='testharness',
                                               arctic_lib_type=a)

        ### Test we can read data from CSV and dump to Arctic (and when read back it matches CSV)
        db_start_date = '01 Jan 2016'
        db_finish_date = pd.Timestamp(datetime.datetime.utcnow())

        # Write test market data CSV to arctic first
        database_source.convert_csv_to_table(
            csv_market_data_store,
            ticker,
            test_harness_arctic_market_data_table,
            if_exists_table='replace',
            if_exists_ticker='replace',
            market_trade_data='market',
            remove_duplicates=False)

        db_start_cut_off = '26 Apr 2017 00:00'
        db_finish_cut_off = '27 Apr 2017 00:50'

        market_df_old = database_source.fetch_market_data(
            start_date=db_start_date,
            finish_date=db_finish_date,
            ticker=ticker,
            table_name=test_harness_arctic_market_data_table)

        market_df_old = market_df_old.loc[
            (market_df_old.index <= db_start_cut_off) |
            (market_df_old.index >= db_finish_cut_off)]

        # Do it with Arctic (note: underneath this will just use pandas, as can't do on database deletion with Arctic)
        database_source.delete_market_data(
            ticker,
            start_date=db_start_cut_off,
            finish_date=db_finish_cut_off,
            table_name=test_harness_arctic_market_data_table)

        # read back data from database (will exclude the deleted records)
        market_df_new = database_source.fetch_market_data(
            start_date=db_start_date,
            finish_date=db_finish_date,
            ticker=ticker,
            table_name=test_harness_arctic_market_data_table)

        # sort columns so they are same order
        market_df_old = market_df_old.sort_index(axis=1)
        market_df_new = market_df_new.sort_index(axis=1)

        assert_frame_equal(market_df_old, market_df_new)
Ejemplo n.º 2
0
def example_arctic_dukacopy_download():
    """Example of downloading from the lower level Arctic wrapper directly (DatabaseSourceArctic, rather than using any
    higher level classes such as TCAMarketTradeDataLoader
    """

    data_loader = DatabaseSourceArctic(postfix='dukascopy')

    df = data_loader.fetch_market_data('01 May 2017',
                                       '30 May 2017',
                                       ticker='EURUSD')

    print(df)
Ejemplo n.º 3
0
def example_arctic_ncfx_download():
    """Example of downloading from the lower level Arctic wrapper directly (DatabaseSourceArctic, rather than using any
    higher level classes such as TCAMarketTradeDataLoader
    """

    data_loader = DatabaseSourceArctic(postfix='ncfx')

    df = data_loader.fetch_market_data(short_start_date,
                                       short_finish_date,
                                       ticker=ticker)

    print(df)
                if_exists_table = 'replace'

        database_source.convert_csv_to_table(csv_file, ticker, market_data_database_table,
                                             if_exists_table=if_exists_table, remove_duplicates=remove_duplicates,
                                             if_exists_ticker=if_append_replace_ticker, date_format=date_format,
                                             read_in_reverse=read_in_reverse)

        # It is worth plotting the data to check validity sometimes (make sure you choose appropriate start/finish dates
        # loading a *very* large tick history into memory will result in your computer running out of memory
        if PLOT_BACK_DATA:
            from chartpy import Chart

            import datetime
            import pandas as pd

            df = database_source.fetch_market_data(start_date='01 Jan 2000', finish_date=datetime.datetime.utcnow(), ticker=ticker)

            df = pd.DataFrame(df.resample('5min').mean())

            if 'mid' not in df.columns:
                df['mid'] = (df['bid'] + df['ask']) / 2.0

            df = pd.DataFrame(df['mid'])

            Chart(engine='plotly').plot(df)

            print(df)

    logger.info("Finished uploading data to " + market_data_store)

Ejemplo n.º 5
0
    def upload_market_data_flat_file(self, data_vendor='dukascopy', market_data_store='arctic', server_host=None,
                                     server_port=None,
                                     ticker_mkt=['EURUSD', 'GBPUSD', 'AUDUSD', 'NZDUSD', 'USDCAD', 'USDCHF',
                                                 'EURNOK', 'EURSEK', 'USDJPY',
                                                 'USDNOK', 'USDSEK', 'EURJPY',
                                                 'USDMXN', 'USDTRY', 'USDZAR', 'EURPLN'],
                                     csv_folder=None,
                                     if_exists_table='replace',
                                     if_append_replace_ticker='replace',
                                     file_extension='parquet',
                                     read_in_reverse=False, remove_duplicates=False, date_format=None,
                                     plot_back_data=False, access_control=AccessControl(), market_data_database_table=None):

        logger = LoggerManager.getLogger(__name__)

        # Files dumped by DatabasePopulator look like this
        ## 'AUDUSD_dukascopy_2016-01-03_22_00_01.868000+00_002016-01-31_23_59_57.193000+00_00.parquet'
        csv_file = [x + '_' + data_vendor + '_20*.' + file_extension for x in
                    ticker_mkt]  # assume that ALL TIME IN UTC!

        ####################################################################################################################

        # Load market data

        # Create market data store for database and associated data vendor
        if market_data_store == 'arctic':
            if server_host is None:
                server_host = constants.arctic_host

            if server_port is None:
                server_port = constants.arctic_port

            database_source = DatabaseSourceArctic(postfix=data_vendor,
                                                username=access_control.arctic_username,
                                                password=access_control.arctic_password,
                                                server_host=server_host, server_port=server_port)

            if market_data_database_table is None:
                market_data_database_table = constants.arctic_market_data_database_table

        if market_data_store == 'pystore':
            database_source = DatabaseSourcePyStore(postfix=data_vendor)

            if market_data_database_table is None:
                market_data_database_table = constants.pystore_market_data_database_table

        if market_data_store == 'influxdb':
            if server_host is None:
                server_host = constants.influxdb_host

            if server_port is None:
                server_port = constants.influxdb_port

            database_source = DatabaseSourceInfluxDB(postfix=data_vendor,
                                                     username=access_control.influxdb_username,
                                                     password=access_control.influxdb_password,
                                                     server_host=server_host, server_port=server_port)

            if market_data_database_table is None:
                market_data_database_table = constants.influxdb_market_data_database_table

        if market_data_store == 'kdb':
            if server_host is None:
                server_host = constants.kdb_host

            if server_port is None:
                server_port = constants.kdb_port

            database_source = DatabaseSourceKDB(postfix=data_vendor,
                                                username=access_control.kdb_username,
                                                password=access_control.kdb_password,
                                                server_host=server_host, server_port=server_port)

            if market_data_database_table is None:
                market_data_database_table = constants.kdb_market_data_database_table

        if csv_folder is None:
            csv_folder = constants.test_data_folder

        # This relies on you have market data stored in Parquet/H5/CSV files already (eg. by downloading from DukasCopy)
        # note: whilst free FX data can be used for testing (in particular for generating randomised trades),
        # you may to choose other high frequency quality data for actual benchmark

        csv_market_data = [os.path.join(csv_folder, x) for x in csv_file]

        # For each ticker, read in the H5/CSV file and then dump into tick database
        # Potentionally, we can thread this?
        for i in range(0, len(ticker_mkt)):
            ticker = ticker_mkt[i]
            csv_file = csv_market_data[i]

            # On the second time through the loop, we make sure to append to table
            # otherwise will keep overwriting!
            if if_exists_table == 'replace':
                if i >= 1:
                    if_exists_table = 'append'
                else:
                    if_exists_table = 'replace'

            database_source.convert_csv_to_table(csv_file, ticker, market_data_database_table,
                                                 if_exists_table=if_exists_table,
                                                 remove_duplicates=remove_duplicates,
                                                 if_exists_ticker=if_append_replace_ticker, date_format=date_format,
                                                 read_in_reverse=read_in_reverse)

            # It is worth plotting the data to check validity sometimes (make sure you choose appropriate start/finish dates
            # loading a *very* large tick history into memory will result in your computer running out of memory
            if plot_back_data:
                from chartpy import Chart

                import datetime
                import pandas as pd

                df = database_source.fetch_market_data(start_date='01 Jan 2000',
                                                       finish_date=datetime.datetime.utcnow(),
                                                       ticker=ticker)

                df = pd.DataFrame(df.resample('5min').mean())

                if 'mid' not in df.columns:
                    df['mid'] = (df['bid'] + df['ask']) / 2.0

                df = pd.DataFrame(df['mid'])

                Chart(engine='plotly').plot(df)

                print(df)

        logger.info("Finished uploading data to " + market_data_store)
Ejemplo n.º 6
0
        'EURUSD', 'GBPUSD', 'AUDUSD', 'NZDUSD', 'USDCAD', 'USDCHF', 'EURNOK',
        'EURSEK', 'USDJPY'
    ]

    if database_dialect == 'arctic':
        from tcapy.data.databasesource import DatabaseSourceArctic as DatabaseSource
    elif database_dialect == 'pystore':
        from tcapy.data.databasesource import DatabaseSourcePyStore as DatabaseSource

    database_source = DatabaseSource(postfix=data_vendor)

    file_format = 'parquet'

    for t in tickers:
        market_df = database_source.fetch_market_data(start_date=start_date,
                                                      finish_date=finish_date,
                                                      ticker=t)

        key = '_' + data_vendor + "_" + \
              (str(market_df.index[0]) + str(market_df.index[-1])).replace(":", '_').replace(" ", '_')
        filename = os.path.join(folder, t + key) + '.' + file_format

        if market_df is not None:
            c = market_df.columns
            print('Writing ' + t + ' to ' + filename)
            print('No of items ' + str(len(market_df.index)))

            if file_format == 'parquet':
                market_df.to_parquet(filename)
            elif file_format == 'csv':
                market_df.to_csv(filename)