def _get_benchmark(self, prices): field = None fields = prices.index.get_level_values("Field").unique() candidate_fields = ("Close", "Open", "Bid", "Ask", "High", "Low") for candidate in candidate_fields: if candidate in fields: field = candidate break else: raise MoonshotParameterError( "Cannot extract BENCHMARK {0} from {1} without one of {2}". format(self.BENCHMARK, self.CODE, ", ".join(candidate_fields))) try: benchmark = prices.loc[field][self.BENCHMARK] except KeyError: raise MoonshotError( "{0} BENCHMARK ConId {1} is not in backtest data".format( self.CODE, self.BENCHMARK)) if "Time" in prices.index.names: if not self.BENCHMARK_TIME: raise MoonshotParameterError( "Cannot extract BENCHMARK {0} from {1} because prices contains intraday " "prices but no BENCHMARK_TIME specified".format( self.BENCHMARK, self.CODE)) try: benchmark = benchmark.xs(self.BENCHMARK_TIME, level="Time") except KeyError: raise MoonshotError( "{0} BENCHMARK_TIME {1} is not in backtest data".format( self.CODE, self.BENCHMARK_TIME)) return pd.DataFrame(benchmark)
def _infer_timezone(self, prices): """ Infers the strategy timezone from the component securities if possible. """ if "Timezone" not in prices.index.get_level_values("Field"): raise MoonshotParameterError( "Cannot infer strategy timezone because Timezone field is missing, " "please set TIMEZONE parameter or include Timezone in MASTER_FIELDS" ) timezones = prices.loc["Timezone"].stack().unique() if len(timezones) > 1: raise MoonshotParameterError( "cannot infer strategy timezone because multiple timezones are present " "in data, please set TIMEZONE parameter explicitly (timezones: {0})" .format(", ".join(timezones))) return timezones[0]
def _load_model(self): """ Loads a model from file, either using joblib or pickle or keras. """ if not self.MODEL: raise MoonshotParameterError("please specify a model file") if "joblib" in self.MODEL: self.model = joblib.load(self.MODEL) elif "keras.h5" in self.MODEL: from keras.models import load_model self.model = load_model(self.MODEL) else: with open(self.MODEL, "rb") as f: self.model = pickle.load(f)
def get_historical_prices(self, start_date, end_date=None, nlv=None, max_cache=None): """ Downloads historical prices from a history db. Downloads security details from the master db and broadcasts the values to be shaped like the historical prices. """ if start_date: start_date = self._get_start_date_with_lookback(start_date) dbs = self.DB if not isinstance(dbs, (list, tuple)): dbs = [self.DB] db_universes = set() db_bar_sizes = set() for db in dbs: db_config = get_db_config(db) universes = db_config.get("universes", None) if universes: db_universes.update(set(universes)) bar_size = db_config.get("bar_size") db_bar_sizes.add(bar_size) db_universes = list(db_universes) db_bar_sizes = list(db_bar_sizes) if len(db_bar_sizes) > 1: raise MoonshotParameterError( "databases must contain same bar size but have different bar sizes " "(databases: {0}; bar sizes: {1})".format( ", ".join(dbs), ", ".join(db_bar_sizes))) all_prices = [] for db in dbs: kwargs = dict(start_date=start_date, end_date=end_date, universes=self.UNIVERSES, conids=self.CONIDS, exclude_universes=self.EXCLUDE_UNIVERSES, exclude_conids=self.EXCLUDE_CONIDS, times=self.DB_TIME_FILTERS, cont_fut=self.CONT_FUT, fields=self.DB_FIELDS, tz_naive=False) if max_cache: prices = HistoryCache.load(db, kwargs, max_cache) if prices is not None: all_prices.append(prices) continue if max_cache: f = HistoryCache.get_filepath(db, kwargs) else: f = io.StringIO() download_history_file(db, f, **kwargs) prices = pd.read_csv(f) all_prices.append(prices) prices = pd.concat(all_prices) prices = prices.pivot(index="ConId", columns="Date").T prices.index.set_names(["Field", "Date"], inplace=True) # Next, get the master file universes = self.UNIVERSES conids = self.CONIDS if not conids and not universes: universes = db_universes if not universes: conids = list(prices.columns) f = io.StringIO() download_master_file(f, conids=conids, universes=universes, exclude_conids=self.EXCLUDE_CONIDS, exclude_universes=self.EXCLUDE_UNIVERSES, fields=self.MASTER_FIELDS) securities = pd.read_csv(f, index_col="ConId") nlv = nlv or self._get_nlv() if nlv: missing_nlvs = set(securities.Currency) - set(nlv.keys()) if missing_nlvs: raise ValueError( "NLV dict is missing values for required currencies: {0}". format(", ".join(missing_nlvs))) securities['Nlv'] = securities.apply( lambda row: nlv.get(row.Currency, None), axis=1) # Append securities, indexed to the min date, to allow easy ffill on demand securities = pd.DataFrame(securities.T, columns=prices.columns) securities.index.name = "Field" idx = pd.MultiIndex.from_product( (securities.index, [prices.index.get_level_values("Date").min()]), names=["Field", "Date"]) securities = securities.reindex(index=idx, level="Field") prices = pd.concat((prices, securities)) timezone = self.TIMEZONE or self._infer_timezone(prices) dates = pd.to_datetime(prices.index.get_level_values("Date"), utc=True) dates = dates.tz_convert(timezone) prices.index = pd.MultiIndex.from_arrays( (prices.index.get_level_values("Field"), dates), names=("Field", "Date")) # Split date and time dts = prices.index.get_level_values("Date") dates = pd.to_datetime(dts.date) dates.tz = timezone prices.index = pd.MultiIndex.from_arrays( (prices.index.get_level_values("Field"), dates, dts.strftime("%H:%M:%S")), names=["Field", "Date", "Time"]) if db_bar_sizes[0] in ("1 day", "1 week", "1 month"): prices.index = prices.index.droplevel("Time") return prices