Example #1
0
    def _benchmark_calculation(self, trade_order_df, bid_price, ask_price, date_start, date_end, weights=None):

        benchmark = []

        for i in range(0, len(trade_order_df.index)):
            # If the trade is a buy
            if trade_order_df['side'][i] == 1:
                price = ask_price

            # If the trade is a sell
            elif trade_order_df['side'][i] == -1:
                price = bid_price

            if date_start[i] == date_end[i]:
                benchmark.append(price[date_start[i]])
            else:
                try:
                    benchmark.append(self._get_price(price[date_start[i]:date_end[i]], side=trade_order_df['side'][i]))
                except Exception as e:
                    err_msg = self._benchmark_name + " cannot be calculated, given market data does not fully overlap with trade data: " \
                              + str(e)

                    LoggerManager.getLogger(__name__).error(err_msg)

                    raise TradeMarketNonOverlapException(err_msg)

        return benchmark
Example #2
0
    def calculate_benchmark(self, trade_order_df=None, market_df=None, trade_order_name=None, bid_benchmark=None,
                            ask_benchmark=None,
                            benchmark_date_start_field=None,
                            benchmark_date_end_field=None):
        if not (self._check_calculate_benchmark(trade_order_name=trade_order_name)): return trade_order_df, market_df

        # for the specified field (usually 'mid' field) calculate the time weighted average price, which is the simple
        # average
        if bid_benchmark is None: bid_benchmark = self._bid_benchmark
        if ask_benchmark is None: ask_benchmark = self._ask_benchmark
        if benchmark_date_start_field is None: benchmark_date_start_field = self._benchmark_date_start_field
        if benchmark_date_end_field is None: benchmark_date_end_field = self._benchmark_date_end_field

        if bid_benchmark in market_df.columns and ask_benchmark in market_df:
            trade_order_df[self._benchmark_name] = np.nan

            date_start = trade_order_df[benchmark_date_start_field].values
            date_end = trade_order_df[benchmark_date_end_field].values

            date_start = np.searchsorted(market_df.index, date_start)
            date_end = np.searchsorted(market_df.index, date_end)
            bid_price = market_df[bid_benchmark].values
            ask_price = market_df[ask_benchmark].values
            dt = market_df.index.to_series().diff().values / np.timedelta64(1, 's')
            dt[0] = 0  # first point should be weighted zero (since don't know how long it's been there)

            twap = []

            for i in range(0, len(trade_order_df.index)):

                if trade_order_df['side'][i] == 1:
                    price = ask_price
                elif trade_order_df['side'][i] == -1:
                    price = bid_price

                try:
                    if date_start[i] == date_end[i]:
                        twap.append(price[date_start[i]])
                    else:
                        twap_val = np.average(price[date_start[i]:date_end[i]], weights=dt[date_start[i]:date_end[i]])

                        twap.append(twap_val)
                except Exception as e:
                    err_msg = "TWAP cannot be calculated, given market data does not fully overlap with trade data: " \
                              + str(e)

                    LoggerManager.getLogger(__name__).error(err_msg)

                    raise TradeMarketNonOverlapException(err_msg)

            trade_order_df[self._benchmark_name] = twap
        else:
            LoggerManager.getLogger(__name__).warn(bid_benchmark + " and " + ask_benchmark + " may not be in market data.")

        return trade_order_df, market_df
Example #3
0
    def calculate_benchmark(self, trade_order_df=None, market_df=None, trade_order_name=None, bid_benchmark=None,
                            ask_benchmark=None,
                            volume_field=None,
                            benchmark_date_start_field=None,
                            benchmark_date_end_field=None):

        if not (self._check_calculate_benchmark(trade_order_name=trade_order_name)): return trade_order_df, market_df

        # if fields have not been specified, then take them from the field variables
        if bid_benchmark is None: bid_benchmark = self._bid_benchmark
        if ask_benchmark is None: ask_benchmark = self._ask_benchmark
        if volume_field is None: volume_field = self._volume_field
        if benchmark_date_start_field is None: benchmark_date_start_field = self._benchmark_date_start_field
        if benchmark_date_end_field is None: benchmark_date_end_field = self._benchmark_date_end_field

        if bid_benchmark in market_df.columns and ask_benchmark in market_df.columns and volume_field in market_df.columns:
            trade_order_df[self._benchmark_name] = np.nan

            date_start = trade_order_df[benchmark_date_start_field].values
            date_end = trade_order_df[benchmark_date_end_field].values

            date_start = np.searchsorted(market_df.index, date_start)
            date_end = np.searchsorted(market_df.index, date_end)
            bid_price = market_df[bid_benchmark].values
            ask_price = market_df[ask_benchmark].values
            volume = market_df[volume_field].values

            vwap = []

            for i in range(0, len(trade_order_df.index)):
                if trade_order_df['side'][i] == 1:
                    price = ask_price
                elif trade_order_df['side'][i] == -1:
                    price = bid_price

                if date_start[i] == date_end[i]:
                    vwap.append(price[date_start[i]])
                else:
                    try:
                        vwap.append(
                            np.average(price[date_start[i]:date_end[i]], weights=volume[date_start[i]:date_end[i]]))
                    except Exception as e:
                        err_msg = "VWAP cannot be calculated, given market data does not fully overlap with trade data: " \
                                  + str(e)

                        LoggerManager.getLogger(__name__).error(err_msg)

                        raise TradeMarketNonOverlapException(err_msg)

            trade_order_df[self._benchmark_name] = vwap
        else:
            LoggerManager.getLogger(__name__).warn(
                bid_benchmark + ", " + ask_benchmark + " " + volume_field + " may not be in market data")

        return trade_order_df, market_df
Example #4
0
    def _check_data_store(self, data_store):
        try:
            # constants = Constants()

            if not data_store in constants.valid_data_store or '.csv' not in data_store or '.h5' not in data_store:
                err_msg = data_store + " is not a defined data source."

                LoggerManager.getLogger(__name__).error(err_msg)

                raise ValidationException(err_msg)
        except:
            pass

        return data_store
Example #5
0
    def _write_df_to_db_single_thread(self, ticker, remove_duplicates=True, if_exists_table='append',
                                      if_exists_ticker='replace'):

        logger = LoggerManager.getLogger(__name__)

        postfix = '-' + self._get_postfix() + '-with-duplicates'

        if remove_duplicates:
            postfix = '-' + self._get_postfix() + '-no-duplicates'

        filename = os.path.join(self.temp_large_data_folder, ticker + postfix) + '.' + fileformat

        logger.info("Reading " + filename)

        util_func = UtilFunc()
        time_series_ops = TimeSeriesOps()
        data_source_local = self._get_output_data_source()

        df = util_func.read_dataframe_from_binary(filename, format=binary_format)

        if df is not None:
            df = time_series_ops.localize_as_UTC(df)

            data_source_local.append_market_data(df, ticker, if_exists_table=if_exists_table,
                                                 if_exists_ticker=if_exists_ticker)
        else:
            logger.warn("Couldn't write dataframe for " + ticker + " to database, appears it is empty!")
Example #6
0
    def _chunk_dataframes(self,
                          obj,
                          chunk_size_mb=constants.
                          volatile_cache_redis_max_cache_chunk_size_mb):
        logger = LoggerManager.getLogger(__name__)

        # Can sometime have very large dataframes, which need to be split, otherwise won't fit in a single Redis key
        mem = obj.memory_usage(deep='deep').sum()
        mem_float = round(float(mem) / (1024.0 * 1024.0), 3)
        mem = '----------- ' + str(mem_float) + ' MB -----------'

        chunks = int(math.ceil(mem_float / chunk_size_mb))

        if chunks > 1:
            obj_list = self._time_series_ops.split_array_chunks(obj,
                                                                chunks=chunks)
        else:

            obj_list = [obj]

        if obj_list != []:
            logger.debug("Pandas dataframe of size: " + mem + " in " +
                         str(chunks) + " chunk(s)")

        return obj_list
Example #7
0
def create_resampled_spot_data(resample_freq='1min', data_vendor='dukascopy'):

    logger = LoggerManager.getLogger(__name__)
    csv_input_folder = '/data/csv_dump/' + data_vendor + '/'

    for ticker in ticker_mkt:

        logger.info("Processing for " + ticker  + " resample freq " + resample_freq + " data vendor " + data_vendor)

        flat_file = csv_input_folder + ticker + '_' + data_vendor + '_*.' + file_extension

        df_dd = dd.read_parquet(flat_file).compute()['mid']

        logger.info("About to resample OHLC for " + ticker + " resample freq " + resample_freq + " data vendor " + data_vendor)

        resampler = df_dd.resample(resample_freq)
        df_dd_ohlc = resampler.ohlc()

        print(df_dd_ohlc.columns)

        logger.info("About to resample count for " + ticker)
        df_dd_count = resampler.count()
        df_dd_count.name = 'tickcount'

        df_dd = pd.concat([df_dd_ohlc, df_dd_count], axis=1)
        df_dd.columns = [ticker + '.' + x for x in df_dd.columns]

        df_dd = df_dd.dropna()
        df_dd.to_parquet(csv_output + ticker + '_' + resample_freq + '_' + data_vendor + '.' + file_extension)

        df_dd = None
Example #8
0
    def get_market_trade_metrics(self, tca_request, dummy_market=False):
        """Collects together all the market and trade data (and computes metrics) for each ticker specified in the
        TCARequest

        Parameters
        ----------
        tca_request : TCARequest
            Parameters for the TCA

        dummy_market : bool (default: False)
            Should dummy market data be returned (requires less memory)?

        Returns
        -------
        DataFrame (dict) , DataFrame (dict), TCARequest (list)
        """

        logger = LoggerManager.getLogger(__name__)

        logger.debug("Start loading trade/data/computation")

        # split up TCARequest into a list of TCA with different tickers
        tca_request_list = self._split_tca_request_into_list(tca_request)

        market_df_dict, trade_order_results_df_dict = self._get_market_trade_metrics(
            tca_request_list, dummy_market)

        logger.debug(
            "Finished loading data and calculating metrics on individual tickers"
        )

        return market_df_dict, trade_order_results_df_dict, tca_request_list
Example #9
0
    def _get(self, key, burn_after_reading=False):

        logger = LoggerManager.getLogger(__name__)
        logger.debug('Attempting to get list from cache: ' + str(key))

        old_key = key

        # Use a pipeline which is quicker for multiple database operations
        pipeline = VolatileRedis._db.pipeline()

        # Check if the key is inside Redis (may have the "size" after it, which will be needed to decompress)
        for k in key:
            pipeline.keys(k + "*")

        key = pipeline.execute()
        key = self._util_func.flatten_list_of_lists(key)

        if key != []:
            # Convert byte to string
            key = [k.decode("utf-8") for k in key]

            pipeline = VolatileRedis._db.pipeline()

            # Get list of values for each element
            for k in key:
                pipeline.lrange(k, 0, -1)

            if burn_after_reading:
                key_burn = [k for k in key if '_expiry_' in k]

                self.delete(key_burn, pipeline=pipeline)

            cache_output = pipeline.execute()
        else:
            cache_output = [None] * len(old_key)
            key = old_key

        if burn_after_reading:
            if len(cache_output) == len(key) + 1:
                logger.debug("Deleted " + str(cache_output[-1]) + ' keys')

                cache_output = cache_output[:-1]

        for i in range(0, len(key)):
            if cache_output[i] is not None:
                try:
                    cache_output[i] = self._convert_binary_to_python(
                        cache_output[i], key[i])
                except Exception as e:
                    logger.error(
                        "Error converting binary object to Python for key: " +
                        key[i] + " and " + str(e))

                    # print(cache_output[i])

                    cache_output[i] = None

        # print(cache_output)

        return cache_output
Example #10
0
    def get_market_trade_order_holder(self, tca_request):
        """Gets the both the market data and trade/order data associated with a TCA calculation as a tuple of
        (DataFrame, DataFrameHolder)

        Parameters
        ----------
        tca_request : TCARequest
            Parameters for a TCA calculation

        Returns
        -------
        DataFrame, DataFrameHolder
        """

        logger = LoggerManager.getLogger(__name__)

        logger.debug("Get market and trade/order data for " +
                     str(tca_request.ticker) + " from " +
                     str(tca_request.start_date) + " - " +
                     str(tca_request.finish_date))

        # Get all the trade/orders which have been requested, eg. trade_df and order_df
        # do separate calls given they are assumed to be stored in different database tables
        return self.get_market_data(tca_request), \
               self.get_trade_order_holder(tca_request)
Example #11
0
def create_resampled_spot_data():
    logger = LoggerManager.getLogger(__name__)

    for ticker in ticker_mkt:

        logger.info("Processing for " + ticker)

        flat_file = csv_folder + ticker + '_' + data_vendor + '_*.' + file_extension

        df_dd = dd.read_parquet(flat_file).compute()['mid']

        logger.info("About to resample OHLC for " + ticker)

        df_dd_ohlc = df_dd.resample(resample_freq).ohlc()

        print(df_dd_ohlc.columns)

        logger.info("About to resample count for " + ticker)
        df_dd_count = df_dd.resample(resample_freq).count()
        df_dd_count.name = 'tickcount'

        df_dd = df_dd_ohlc.join(df_dd_count)
        df_dd.columns = [ticker + '.' + x for x in df_dd.columns]

        df_dd = df_dd.dropna()
        df_dd.to_parquet(csv_output + ticker + '_' + resample_freq + '_' +
                         data_vendor + '.' + file_extension)
Example #12
0
    def _calculate_additional_metrics(self, market_df, trade_order_df_dict,
                                      tca_request):
        logger = LoggerManager.getLogger(__name__)

        # Add candlesticks/sparse DataFrames for plotting if requested
        if tca_request.tca_type == 'detailed' or tca_request.summary_display == 'candlestick':

            trade_order_list = self._util_func.dict_key_list(
                trade_order_df_dict.keys())

            # only add the ticker name if we have a non-detailed plot to differentiate between currency pairs
            if tca_request.tca_type == 'detailed':
                ticker_label = ''
            else:
                ticker_label = tca_request.ticker + '_'

            logger.debug(
                "Generating downsampled market data for potentional display")

            market_downsampled_df = self._time_series_ops.downsample_time_series_usable(
                market_df)

            # Combine downsampled market data with trade data
            fields = [
                'bid', 'ask', 'open', 'high', 'low', 'close', 'mid', 'vwap',
                'twap', 'arrival', 'buy_trade', 'sell_trade', 'notional',
                'executed_notional', 'executed_price', 'side'
            ]

            # create a sparse representation of the trades/orders which can later be displayed to users
            for trade_order in trade_order_list:
                if trade_order in trade_order_df_dict:
                    trade_order_df_dict[ticker_label + 'sparse_market_' + trade_order] = \
                        self._join_market_downsampled_trade_orders(market_downsampled_df,
                                                                   trade_order_df_dict[trade_order],
                                                                   fields=fields)

            trade_order_df_dict[
                ticker_label + 'market_df_downsampled'] = market_downsampled_df

            trade_order_df_dict[ticker_label + 'candlestick_fig'] = \
                    self._plot_render.generate_candlesticks(market_downsampled_df)

            if tca_request.summary_display == 'candlestick':
                for trade_order in trade_order_list:
                    if trade_order in trade_order_df_dict:
                        title = ticker_label + " " + trade_order
                        lines_to_plot = self._util_func.dict_key_list(
                            constants.detailed_timeline_plot_lines.keys())
                        lines_to_plot.append('candlestick')

                        trade_order_df_dict[ticker_label + 'sparse_market_' + trade_order.replace('df', 'fig')]\
                            = self._plot_render.plot_market_trade_timeline(
                            title=title, sparse_market_trade_df=trade_order_df_dict[ticker_label + 'sparse_market_' + trade_order],
                            lines_to_plot=lines_to_plot,
                            candlestick_fig=trade_order_df_dict[ticker_label + 'candlestick_fig'])

        return trade_order_df_dict
Example #13
0
    def __init__(self, version=constants.tcapy_version):
        self._util_func = UtilFunc()

        self._tca_market_trade_loader = Mediator.get_tca_market_trade_loader(version=version)
        self._time_series_ops = TimeSeriesOps()
        self._trade_order_tag = TradeOrderFilterTag()

        logger = LoggerManager.getLogger(__name__)
        logger.info("Init TCAEngine version: " + self._tca_market_trade_loader.get_tca_version() + " - Env: " + constants.env)
    def _apply_summary_metrics(self, tca_request_list, trade_order_results_df_dict, market_df_dict):

        trade_order_list = self._util_func.dict_key_list(trade_order_results_df_dict.keys())

        if not (isinstance(trade_order_list, list)):
            trade_order_list = [trade_order_list]

        # First get the market data
        market_df = market_df_dict[tca_request_list[0].ticker]

        logger = LoggerManager.getLogger(__name__)
        logger.debug("Constructing results form to summarize analysis...")

        # Calculate user specified aggregate result forms (eg. timelines, distribution etc.) for each trade/order
        # which has been selected
        results_form = tca_request_list[0].results_form
        join_tables = tca_request_list[0].join_tables

        # If dummy market (ie. don't return market data to the user) has been specified then market data cannot
        # be included in ResultsForm calculations
        if results_form is not None:
            for i in range(0, len(trade_order_results_df_dict)):
                current_key = self._util_func.dict_key_list(trade_order_results_df_dict.keys())[i]

                # Ignore 'fig' objects which are Plotly JSON Figures, and only process DataFrames
                if 'df' in current_key:
                    for r in results_form:

                        # Filter the trades for the event type which has been requested (eg. 'trade' or 'placement')
                        trade_order_df = self._trade_order_tag.filter_trade_order(
                            trade_order_df=trade_order_results_df_dict[trade_order_list[i]],
                            tag_value_combinations={'event_type': tca_request_list[0].event_type})

                        # Calculate aggregate ResultForm
                        results = r.aggregate_results(
                            trade_order_df=trade_order_df, market_df=market_df, trade_order_name=trade_order_list[i])

                        if results[0] is not None:
                            for results_form_df, results_form_name in results:
                                trade_order_results_df_dict[results_form_name] = results_form_df

        logger.debug("Now join table results...")

        # As a final stage, join together any tables which have been specified by the user
        # for example: does the user want to combine certain metrics or trades together?
        if join_tables is not None:
            for j in join_tables:
                results = j.aggregate_tables(df_dict=trade_order_results_df_dict)

                if results != []:
                    if results[0] is not None:
                        for results_form_df, results_form_name in results:
                            trade_order_results_df_dict[results_form_name] = results_form_df

        logger.debug("Finished calculating results form and join table results!")

        return trade_order_results_df_dict
Example #15
0
    def calculate_benchmark(self, market_df=None, mid=None, bid=None,
                            ask=None,
                            bid_mid_bp=None, ask_mid_bp=None, overwrite_bid_ask=None):

        if self._check_empty_benchmark_market_data(market_df): return market_df

        if mid is None: mid = self._mid
        if bid is None: bid = self._bid
        if ask is None: ask = self._ask
        if bid_mid_bp is None: bid_mid_bp = self._bid_mid_bp
        if ask_mid_bp is None: ask_mid_bp = self._ask_mid_bp
        if overwrite_bid_ask is None: overwrite_bid_ask = self._overwrite_bid_ask

        bid_mid_bp = float(bid_mid_bp);
        ask_mid_bp = float(ask_mid_bp)

        # market_df_list = [market_df]

        if mid not in market_df.columns:
            market_df[mid] = (market_df[bid].values + market_df[ask].values)/2.0

        # Calculate the bid-mid and ask-mid spreads from market data
        if bid in market_df.columns and ask in market_df.columns and not (overwrite_bid_ask):
            # market_df[bid + '_' + mid + '_spread'] = (market_df[bid].values / market_df[mid].values) - 1.0
            # market_df[ask + '_' + mid + '_spread'] = (market_df[mid].values / market_df[ask].values) - 1.0
            market_df[bid + '_' + mid + '_spread'] = pd.eval('(market_df.bid / market_df.mid) - 1.0')
            market_df[ask + '_' + mid + '_spread'] = pd.eval('(market_df.mid / market_df.ask) - 1.0')

        # If we have been asked to overwrite bid/ask columns with an artificial proxy
        elif bid in market_df.columns and ask in market_df.columns and overwrite_bid_ask:
            # otherwise if we don't have sufficient bid/ask data (and only mid data), or if we want to forecibly overwrite it,
            # create a synthetic bid/ask and use the user specified spread
            market_df[bid + '_' + mid + '_spread'] = -bid_mid_bp / 10000.0
            market_df[ask + '_' + mid + '_spread'] = -ask_mid_bp / 10000.0
            market_df[bid] = (market_df[mid].values) * (1.0 - bid_mid_bp / 10000.0)
            market_df[ask] = (market_df[mid].values) / (1.0 - ask_mid_bp / 10000.0)
            # market_df[bid + '_' + mid + '_spread'] = pd.eval('-bid_mid_bp / 10000.0')
            # market_df[ask + '_' + mid + '_spread'] = pd.eval('-ask_mid_bp / 10000.0')
            # market_df[bid] = pd.eval('(market_df.mid) * (1.0 - bid_mid_bp / 10000.0)')
            # market_df[ask] = pd.eval('(market_df.mid) / (1.0 - ask_mid_bp / 10000.0)')


        # If we only have the mid column
        elif mid in market_df.columns and bid not in market_df.columns and ask not in market_df.columns:
            market_df[bid + '_' + mid + '_spread'] = -bid_mid_bp / 10000.0
            market_df[ask + '_' + mid + '_spread'] = -ask_mid_bp / 10000.0
            market_df[bid] = (market_df[mid].values) * (1.0 - bid_mid_bp / 10000.0)
            market_df[ask] = (market_df[mid].values) / (1.0 - ask_mid_bp / 10000.0)
            # market_df[bid + '_' + mid + '_spread'] = pd.eval('-bid_mid_bp / 10000.0')
            # market_df[ask + '_' + mid + '_spread'] = pd.eval('-ask_mid_bp / 10000.0')
            # market_df[bid] = pd.eval('(market_df.mid) * (1.0 - bid_mid_bp / 10000.0)')
            # market_df[ask] = pd.eval('(market_df.mid) / (1.0 - ask_mid_bp / 10000.0)')
        else:
            LoggerManager().getLogger(__name__).warning("Couldn't calculate spread from mid, check market data has appropriate fields.")

        return market_df
Example #16
0
    def _fill_reporting_spot(self, ticker, trade_df, start_date, finish_date,
                             tca_request):
        logger = LoggerManager.getLogger(__name__)

        market_request = MarketRequest(
            start_date=start_date,
            finish_date=finish_date,
            ticker=ticker,
            data_store=tca_request.market_data_store,
            data_offset_ms=tca_request.market_data_offset_ms,
            use_multithreading=tca_request.use_multithreading,
            market_data_database_table=tca_request.market_data_database_table,
            multithreading_params=tca_request.multithreading_params)

        market_conversion_df = self.get_market_data(market_request)

        # Make sure the trades/orders are within the market data (for the purposes of the reporting spot)
        # we don't need to consider the length of the order, JUST the starting point
        trade_df = self.strip_trade_order_data_to_market(
            trade_df, market_conversion_df, consider_order_length=False)

        reporting_spot = None

        # need to check whether we actually have any trade data/market data
        if trade_df is not None and market_conversion_df is not None:
            if not (trade_df.empty) and not (market_conversion_df.empty):

                try:
                    reporting_spot = \
                        self._time_series_ops.vlookup_style_data_frame(trade_df.index, market_conversion_df, 'mid')[
                            0]

                except:
                    logger.error(
                        "Reporting spot is missing for this trade data sample!"
                    )

                if reporting_spot is None:
                    market_start_finish = "No market data in this sample. "

                    if market_conversion_df is not None:
                        market_start_finish = "Market data is between " + str(
                            market_conversion_df.index[0]) + " - " + str(
                                market_conversion_df.index[-1]) + ". "

                    logger.warning(market_start_finish)
                    logger.warning("Trade data is between " +
                                   str(trade_df.index[0]) + " - " +
                                   str(trade_df.index[-1]) + ".")

                    logger.warning(
                        "Couldn't get spot data to convert notionals currency. Hence not returning trading data."
                    )

        return reporting_spot, trade_df
def combine_resampled_spot_data_into_single_dataframe(resample_freq='1min',
                                                      data_vendor='dukascopy',
                                                      usd_base=False):
    df_list = []

    logger = LoggerManager.getLogger(__name__)

    for ticker in ticker_combined_mkt:
        logger.info("Reading " + ticker + " resample freq " + resample_freq +
                    " data vendor " + data_vendor)

        df = pd.read_parquet(csv_output + ticker + '_' + resample_freq + '_' +
                             data_vendor + '.' + file_extension)

        base = ticker[0:3]
        terms = ticker[3:6]

        if usd_base:
            if terms == 'USD':
                df_invert = pd.DataFrame(index=df.index)
                df_invert[terms + base +
                          '.close'] = 1.0 / df[ticker + '.close']
                df_invert[terms + base + '.open'] = 1.0 / df[ticker + '.open']

                # Invert high and low!
                df_invert[terms + base + '.high'] = 1.0 / df[ticker + '.low']
                df_invert[terms + base + '.low'] = 1.0 / df[ticker + '.high']

                df_invert[terms + base +
                          '.close'] = 1.0 / df[ticker + '.close']

                df_invert[terms + base + '.tickcount'] = df[ticker +
                                                            '.tickcount']

                df = df_invert

        df_list.append(df)

    logger.info("Combining all tickers with resample freq " + resample_freq +
                " data vendor " + data_vendor)
    df = pd.DataFrame(index=df.index)

    df['USDUSD.close'] = 1.0

    df_list.append(df)
    df = calculations.join(df_list, how='outer')
    df = df.dropna()

    if usd_base:
        combined_file = 'fx_' + resample_freq + '_' + data_vendor + '_usd_base.' + file_extension
    else:
        combined_file = 'fx_' + resample_freq + '_' + data_vendor + '.' + file_extension

    df.to_parquet(csv_output + combined_file)
Example #18
0
    def aggregate_tables(self, df_dict={}, tables_dict={}, round_figures_by=None, scalar=None):
        logger = LoggerManager.getLogger(__name__)

        if tables_dict == {}: tables_dict = self._tables_dict
        if round_figures_by is None: round_figures_by = self._round_figures_by
        if scalar is None: scalar = self._scalar

        joined_results = []

        table_name = tables_dict['table_name']
        table_list = tables_dict['table_list']

        column_list = None; replace_text = None

        if 'column_list' in tables_dict.keys():
            column_list = tables_dict['column_list']

        if 'replace_text' in tables_dict.keys():
            replace_text = tables_dict['replace_text']

        agg_results = []

        for i in range(0, len(table_list)):
            table = table_list[i]

            # If the table in the output
            if table in df_dict.keys():
                df = df_dict[table].copy()

                if column_list is not None and column_list != []:
                    df.columns = [x + ' ' + column_list[i] for x in df.columns]

                df = self._util_func.replace_text_in_cols(df, replace_text)

                # Round/multiply elements in the table if requested
                if df is not None:
                    df = self._time_series_ops.multiply_scalar_dataframe(df, scalar=scalar)
                    df = self._time_series_ops.round_dataframe(df, round_figures_by)

                    agg_results.append(df)
            else:
                logger.warning(table + ' not in calculation output, are you use the dictionary entry is correct?')

        # If we've collected the tables, try doing a join on all them
        # to combine them into one large table
        if agg_results != []:
            if len(agg_results) > 1:
                df_joined = self._time_series_ops.outer_join(agg_results)
            else:
                df_joined = agg_results[0]

            joined_results.append((df_joined, table_name))

        return joined_results
Example #19
0
    def vlookup_style_data_frame(self, dt, data_frame, search_field, timedelta_amount=None, just_before_point=True):
        """Does a VLOOKUP style search in a DataFrame given a set of times for a particular field. We assume both
        our DataFrame and dates to lookup are sorted (oldest first).

        Parameters
        ----------
        dt : DateTimeIndex list
            Dates to be looked up

        data_frame : DataFrame
            The DataFrame where we wish to do our lookup

        search_field : str
            Which field do we want to output

        timedelta_amount : TimeDelta (default: None)
            How much we wish to perturb our search times

        just_before_point : bool (default: True)
            Should we fetch the point just before (in the case of not matching), which would be necessary for slippage
            calculations, by contrast for market impact we would likely want to set this to False (ie. for points just after)

        Returns
        -------
        Series, DateTimeIndex
        """
        logger = LoggerManager.getLogger(__name__)

        # logger.debug("Applying VLOOKUP in timezone " + str(dt.tz) + " with " + str(data_frame.index.tz))

        if dt is None:
            return None, None

        if len(dt) == 0:
            return None, None

        # Check that our input times are within the bounds of our data frame
        if dt[0] <= data_frame.index[0] or dt[-1] >= data_frame.index[-1]:
            err_msg = "Lookup data (eg. trade) does not fully overlap with the main search space of data (eg. market)"

            logger.error(err_msg)

            raise ValidationException(err_msg)

            # return None, None

        indices = self.search_series(data_frame, dt, timedelta_amount=timedelta_amount, just_before_point=just_before_point)

        search_series = data_frame[search_field].iloc[indices]
        actual_dt = search_series.index
        search_series.index = dt

        # Return our VLOOKUPed values and alongside it, the time stamps of those observations
        return search_series, actual_dt
Example #20
0
    def check_empty_combined_dataframe_dict(self, df_dict=None):

        if df_dict is None:
            df_dict = self.get_combined_dataframe_dict()

        logger = LoggerManager().getLogger(__name__)

        valid_data = True

        if df_dict is not None:
            if len(df_dict.keys()) > 0:
                t_remove = []

                for t in df_dict.keys():
                    if df_dict[t] is None:
                        logger.warn("Market/trade/order data not in " + t)
                        t_remove.append(t)
                    else:
                        if df_dict[t].empty:
                            logger.warn("Market/trade/order data not in " + t)
                            t_remove.append(t)

                for t in t_remove:
                    df_dict.pop(t)
            else:
                valid_data = False

            if len(df_dict.keys()) == 0:
                valid_data = False
        else:
            valid_data = False

        return valid_data
Example #21
0
    def __init__(self,
                 temp_data_folder=constants.temp_data_folder,
                 temp_large_data_folder=constants.temp_large_data_folder,
                 tickers=None,
                 data_store=None):

        self.temp_data_folder = temp_data_folder
        self.temp_large_data_folder = temp_large_data_folder
        self.tickers = None
        self.util_func = UtilFunc()
        self.time_series_ops = TimeSeriesOps()
        self.data_store = data_store

        logger = LoggerManager().getLogger(__name__)

        if not (os.path.isdir(self.temp_data_folder)):
            logger.warn("Temp data folder " + self.temp_data_folder +
                        " does not exist")

        if not (os.path.isdir(self.temp_large_data_folder)):
            logger.warn("Temp large data folder " + self.temp_data_folder +
                        " does not exist")

        if tickers is not None:
            self.tickers = tickers
Example #22
0
    def _check_trade_order_type(self, trade_order_type):
        try:
            # constants = Constants()
            valid_trade_order_type = constants.trade_order_list

            if not trade_order_type in valid_trade_order_type:
                # don't make LoggerManager field variable so this can be pickled (important for Celery)
                LoggerManager().getLogger(__name__).error(trade_order_type & " is not a defined trade or order.")

                raise ValidationException(trade_order_type & " is not a defined trade or order.")
        except:
            pass

        return trade_order_type
Example #23
0
    def _join_market_downsampled_trade_orders(self,
                                              market_downsampled_df,
                                              trade_order_df,
                                              fields=None):
        """Combine market data with trade/orders, into a sparse DataFrame. Typically, used when preparing to display
        a mixture of market/trades data together.

        Parameters
        ----------
        market_downsampled_df : DataFrame
            Market data which has been downsampled

        trade_order_df : DataFrame
            Trade/order data to be combined

        fields : str (list)
            Fields to keep

        Returns
        -------
        DataFrame
        """

        logger = LoggerManager.getLogger(__name__)

        if fields is not None:
            trade_order_df = self._time_series_ops.filter_time_series_by_matching_columns(
                trade_order_df, fields)

        logger.debug('About to join')

        sparse_market_trade_df = market_downsampled_df.join(trade_order_df,
                                                            how='outer')

        # Add buy/sell trade prices in new columns (easier for plotting later)
        if 'executed_price' not in sparse_market_trade_df.columns:
            print('x')

        executed_price = sparse_market_trade_df['executed_price'].values
        side_to_match = sparse_market_trade_df['side'].values

        sparse_market_trade_df['buy_trade'] \
            = self._time_series_ops.nanify_array_based_on_other(side_to_match, -1, executed_price)  # make sells NaN (NOT buys!)
        sparse_market_trade_df['sell_trade'] \
            = self._time_series_ops.nanify_array_based_on_other(side_to_match, 1, executed_price)   # make buys NaN (NOT sells!)

        logger.debug('Finished joining')

        return sparse_market_trade_df
Example #24
0
    def _fetch_market_data(self, start, finish, ticker, write_to_disk=True, read_cached_from_disk=True, web_proxies=constants.web_proxies):
        logger = LoggerManager.getLogger(__name__)

        key = (str(start) + str(finish) + ticker + '_' + self._get_postfix()).replace(":", '_')

        filename = os.path.join(self.temp_data_folder, key) + '.' + fileformat
        util_func = UtilFunc()

        start_time_stamp = pd.Timestamp(start)
        finish_time_stamp = pd.Timestamp(finish)

        if self._remove_weekend_points():
            weekend_data = "Weekend? " + key

            weekday_point = UtilFunc().is_weekday_point(start_time_stamp, finish_time_stamp,
                                                        friday_close_nyc_hour=constants.friday_close_utc_hour,
                                                        sunday_open_utc_hour=constants.sunday_open_utc_hour)

            if not(weekday_point):
                return None, weekend_data

        df = None

        if read_cached_from_disk:
            if os.path.exists(filename):
                df = util_func.read_dataframe_from_binary(filename, format=binary_format)

                if df is not None:
                    logger.debug("Read " + filename + " from disk")

        if df is None:
            # Convert tcapy ticker into vendor ticker
            df = self._get_input_data_source().fetch_market_data(start, finish,
                                                                 ticker=self._get_tickers_vendor()[ticker], web_proxies=web_proxies)

            if df is not None:

                if write_to_disk:
                    # Write a small temporary dataframe to disk (if the process fails later, these can be picked up,
                    # without having a call the external vendor again
                    util_func.write_dataframe_to_binary(df, filename, format=binary_format)

        msg = None

        if df is None:
            msg = "No data? " + key

        return df, msg
Example #25
0
    def calculate_benchmark(self, market_df=None, field=None,
                            bid=None, ask=None):
        if self._check_empty_benchmark_market_data(market_df): return market_df

        if field is None: field = self._field
        if bid is None: bid = self._bid
        if ask is None: ask = self._ask

        # If the 'mid' price does not already exist in the market data, calculate it from the underlying bid/ask prices
        if field not in market_df.columns:
            # if field == 'bid/ask':
            market_df[field] = (market_df[bid].values + market_df[ask].values) / 2.0
        else:
            LoggerManager().getLogger(__name__).warning(field + " not in market data")

        return market_df
Example #26
0
    def get(self, key, burn_after_reading=False):
        """Gets the object(s) associated with the key(s) or CacheHandle(s)

        Parameters
        ----------
        key : str or CacheHandle (list)
            Key(s) to be fetched

        burn_after_reading : bool (default: False)
            Should the key be erased after reading?

        Returns
        -------
        object
        """
        logger = LoggerManager.getLogger(__name__)

        key = copy.copy(key)

        single = False

        if not (isinstance(key, list)):
            key = [key]

            single = True

        for i in range(0, len(key)):
            if isinstance(key[i], CacheHandle):
                key[i] = key[i].handle_name

        obj = None

        try:
            obj = self._get(key, burn_after_reading=burn_after_reading)
        except Exception as e:
            logger.warning("Couldn't retrieve " + str(key) + " from cache: " +
                           str(e))

        if ('market_df' in key):
            print("market_df")

        if single and obj is not None:
            return obj[0]

        return obj
Example #27
0
    def get_trade_order_holder(self, tca_request):
        logger = LoggerManager.getLogger(__name__)

        logger.debug(
            "Get trade order holder for " + str(tca_request.ticker) + " from " + str(tca_request.start_date)
            + " - " + str(tca_request.finish_date))

        # Get all the trade/orders which have been requested, eg. trade_df and order_df
        # do separate calls given they are assumed to be stored in different database tables
        trade_order_holder = DataFrameHolder()

        if tca_request.trade_order_mapping is not None:
            for trade_order_type in tca_request.trade_order_mapping:
                trade_order_df = self.get_trade_order_data(tca_request, trade_order_type)

                trade_order_holder.add_dataframe(trade_order_df, trade_order_type)

        return trade_order_holder
Example #28
0
    def _download(self, md_request, folder_prefix):
        from findatapy.market import MarketDataRequest, MarketDataGenerator, Market

        logger = LoggerManager.getLogger(__name__)
        market = Market(market_data_generator=MarketDataGenerator())

        ticker = md_request.ticker[0]
        df = market.fetch_market(md_request=md_request)

        df.columns = ['bid', 'ask', 'bidv', 'askv']

        df['venue'] = 'dukascopy'
        df['ticker'] = ticker

        df['mid'] = (df['bid'].values + df['ask'].values) / 2.0

        self.dump_hdf5_file(df, folder_prefix + "_" + ticker + ".h5")

        logger.info('Dumped to ' + folder_prefix + "_" + ticker + ".h5")
Example #29
0
    def calculate_benchmark_market(self, market_df, tca_request):

        logger = LoggerManager.getLogger(__name__)

        benchmark_calcs = tca_request.benchmark_calcs
        valid_market = self._check_valid_market(market_df)

        # Calculations on market data only
        if valid_market:
            for b in benchmark_calcs:

                # For benchmarks which only modify market data (and don't need trade specific information)
                if isinstance(b, BenchmarkMarket):
                    logger.debug("Calculating " + type(b).__name__ +
                                 " for market data")

                    market_df = b.calculate_benchmark(market_df=market_df)

        return market_df
Example #30
0
    def _check_is_empty_trade_order(self, trade_df, tca_request, start_date,
                                    finish_date, trade_order_type):

        logger = LoggerManager.getLogger(__name__)

        if trade_df is None:
            logger.warning("Missing trade data for " + tca_request.ticker +
                           " between " + str(start_date) + " - " +
                           str(finish_date) + " in " + trade_order_type)

            return True

        elif trade_df.empty:
            logger.warning("Missing trade data for " + tca_request.ticker +
                           " between " + str(start_date) + " - " +
                           str(finish_date) + " in " + trade_order_type)

            return True

        return False