def __init__(self, data_descriptor, **kwargs): #assert isinstance(data.major_axis, pd.tseries.index.DatetimeIndex) self.data_descriptor = data_descriptor # Unpack config dictionary with default values. self.sids = kwargs.get('sids', data_descriptor['tickers']) self.start = kwargs.get('start', data_descriptor['index'][0]) self.end = kwargs.get('end', data_descriptor['index'][-1]) # Hash_value for downstream sorting. self.arg_string = hash_args(data_descriptor, **kwargs) self._raw_data = None self.remote = Remote()
def initialize_daily_indices(self): self.algorithm_returns_cont = pd.Series(index=self.trading_days) self.benchmark_returns_cont = pd.Series(index=self.trading_days) #TODO Here an update function, benchmark from start_date to now ## Here ----------------------------------------------------------------------------------------- self.remote = Remote()
def __init__(self, data_descriptor, **kwargs): assert isinstance(data_descriptor['index'], pd.tseries.index.DatetimeIndex) self.data_descriptor = data_descriptor # Unpack config dictionary with default values. self.sids = kwargs.get('sids', data_descriptor['tickers']) self.start = kwargs.get('start', data_descriptor['index'][0]) self.end = kwargs.get('end', data_descriptor['index'][-1]) # Hash_value for downstream sorting. self.arg_string = hash_args(data_descriptor, **kwargs) self._raw_data = None self.remote = Remote()
class YahooOHLCSource(DataSource): """ Yields all events in event_list that match the given sid_filter. If no event_list is specified, generates an internal stream of events to filter. Returns all events if filter is None. Configuration options: sids : list of values representing simulated internal sids start : start date delta : timedelta between internal events filter : filter to remove the sids """ def __init__(self, data_descriptor, **kwargs): #assert isinstance(data.major_axis, pd.tseries.index.DatetimeIndex) self.data_descriptor = data_descriptor # Unpack config dictionary with default values. self.sids = kwargs.get('sids', data_descriptor['tickers']) self.start = kwargs.get('start', data_descriptor['index'][0]) self.end = kwargs.get('end', data_descriptor['index'][-1]) # Hash_value for downstream sorting. self.arg_string = hash_args(data_descriptor, **kwargs) self._raw_data = None self.remote = Remote() @property def mapping(self): mapping = { 'dt': (lambda x: x, 'dt'), 'sid': (lambda x: x, 'sid'), 'price': (float, 'price'), 'volume': (int, 'volume'), } # Add additional fields. for field_name in self.data.minor_axis: if field_name in ['price', 'volume', 'dt', 'sid']: continue mapping[field_name] = (lambda x: x, field_name) return mapping @property def instance_hash(self): return self.arg_string def _get(self): return self.remote.fetch_equities_daily(self.sids, ohlc=True, indexes={}, start=self.start, end=self.end) def raw_data_gen(self): self.data = self._get() import ipdb ipdb.set_trace() for dt in self.data.major_axis: df = self.data.major_xs(dt) for sid, series in df.iterkv(): if sid in self.sids: event = { 'dt': dt, 'sid': sid, } for field_name, value in series.iteritems(): event[field_name] = value yield event @property def raw_data(self): if not self._raw_data: self._raw_data = self.raw_data_gen() return self._raw_data
class YahooPriceSource(DataSource): """ Yields all events in event_list that match the given sid_filter. If no event_list is specified, generates an internal stream of events to filter. Returns all events if filter is None. Configuration options: sids : list of values representing simulated internal sids start : start date delta : timedelta between internal events filter : filter to remove the sids """ def __init__(self, data_descriptor, **kwargs): assert isinstance(data_descriptor['index'], pd.tseries.index.DatetimeIndex) self.data_descriptor = data_descriptor # Unpack config dictionary with default values. self.sids = kwargs.get('sids', data_descriptor['tickers']) self.start = kwargs.get('start', data_descriptor['index'][0]) self.end = kwargs.get('end', data_descriptor['index'][-1]) # Hash_value for downstream sorting. self.arg_string = hash_args(data_descriptor, **kwargs) self._raw_data = None self.remote = Remote() @property def mapping(self): return { 'dt': (lambda x: x, 'dt'), 'sid': (lambda x: x, 'sid'), 'price': (float, 'price'), 'volume': (int, 'volume'), } @property def instance_hash(self): return self.arg_string def _get(self): return self.remote.fetch_equities_daily(self.sids, indexes={}, start=self.start, end=self.end) def raw_data_gen(self): self.data = self._get() for dt, series in self.data.iterrows(): for sid, price in series.iterkv(): if sid in self.sids: event = { 'dt': dt, 'sid': sid, 'price': price, 'volume': 1000, } yield event @property def raw_data(self): if not self._raw_data: self._raw_data = self.raw_data_gen() return self._raw_data
class RiskMetricsIterative(RiskMetricsBase): """Iterative version of RiskMetrics. Should behave exaclty like RiskMetricsBatch. :Usage: Instantiate RiskMetricsIterative once. Call update() method on each dt to update the metrics. """ def __init__(self, sim_params): self.treasury_curves = trading.environment.treasury_curves self.start_date = sim_params.period_start.replace( hour=0, minute=0, second=0, microsecond=0 ) self.end_date = sim_params.period_end.replace( hour=0, minute=0, second=0, microsecond=0 ) all_trading_days = trading.environment.trading_days mask = ((all_trading_days >= self.start_date) & (all_trading_days <= self.end_date)) self.trading_days = all_trading_days[mask] if sim_params.period_end not in self.trading_days: last_day = pd.tseries.index.DatetimeIndex( [sim_params.period_end] ) self.trading_days = self.trading_days.append(last_day) self.sim_params = sim_params if sim_params.emission_rate == 'daily': self.initialize_daily_indices() elif sim_params.emission_rate == 'minute': self.initialize_minute_indices(sim_params) self.algorithm_returns = None self.benchmark_returns = None self.compounded_log_returns = [] self.moving_avg = [] self.algorithm_volatility = [] self.benchmark_volatility = [] self.algorithm_period_returns = [] self.benchmark_period_returns = [] self.algorithm_covariance = None self.benchmark_variance = None self.condition_number = None self.eigen_values = None self.sharpe = [] self.sortino = [] self.information = [] self.beta = [] self.alpha = [] self.max_drawdown = 0 self.current_max = -np.inf self.excess_returns = [] self.daily_treasury = {} def initialize_minute_indices(self, sim_params): self.algorithm_returns_cont = pd.Series(index=pd.date_range( sim_params.first_open, sim_params.last_close, freq="Min")) self.benchmark_returns_cont = pd.Series(index=pd.date_range( sim_params.first_open, sim_params.last_close, freq="Min")) def initialize_daily_indices(self): self.algorithm_returns_cont = pd.Series(index=self.trading_days) self.benchmark_returns_cont = pd.Series(index=self.trading_days) #TODO Here an update function, benchmark from start_date to now ## Here ----------------------------------------------------------------------------------------- self.remote = Remote() def download_benchmark(self, event): ''' Get the current value of the index associated with code ''' # 1002.0 why float64 ? code = str(int(event)) #FIXME Fake return while data module isn't reshaped #return 0.04 assert code in datautils.Exchange bench_data = self.remote.fetch_equities_snapshot(datautils.Exchange[code]['index']) #assert bench_data #TODO Some check betwen received date and perc_return['trade_date_utc'] return float(bench_data[datautils.Exchange[code]['index']]['change_pct']) @property def last_return_date(self): return self.algorithm_returns.index[-1] def update(self, dt, algorithm_returns, benchmark_returns): #import ipdb; ipdb.set_trace() #NOTE Should probably change before, when benchmark_returns is extracted if benchmark_returns > 1000.0: # More than 1000 of return is a fake, we're in live mode # and need to retrieve current index value benchmark_returns = self.download_benchmark(benchmark_returns) self.algorithm_returns_cont[dt] = algorithm_returns self.algorithm_returns = self.algorithm_returns_cont.valid() self.benchmark_returns_cont[dt] = benchmark_returns self.benchmark_returns = self.benchmark_returns_cont.valid() self.num_trading_days = len(self.algorithm_returns) #self.trading_days += 1 self.update_compounded_log_returns() self.algorithm_period_returns.append( self.calculate_period_returns(self.algorithm_returns)) self.benchmark_period_returns.append( self.calculate_period_returns(self.benchmark_returns)) if not self.algorithm_returns.index.equals( self.benchmark_returns.index ): message = "Mismatch between benchmark_returns ({bm_count}) and \ algorithm_returns ({algo_count}) in range {start} : {end} on {dt}" message = message.format( bm_count=len(self.benchmark_returns), algo_count=len(self.algorithm_returns), start=self.start_date, end=self.end_date, dt=dt ) raise Exception(message) self.update_current_max() self.benchmark_volatility.append( self.calculate_volatility(self.benchmark_returns)) self.algorithm_volatility.append( self.calculate_volatility(self.algorithm_returns)) # caching the treasury rates for the minutely case is a # big speedup, because it avoids searching the treasury # curves on every minute. treasury_end = self.algorithm_returns.index[-1].replace( hour=0, minute=0) if treasury_end not in self.daily_treasury: treasury_period_return = choose_treasury( self.treasury_curves, self.start_date, self.algorithm_returns.index[-1] ) self.daily_treasury[treasury_end] =\ treasury_period_return self.treasury_period_return = \ self.daily_treasury[treasury_end] self.excess_returns.append( self.algorithm_period_returns[-1] - self.treasury_period_return) self.beta.append(self.calculate_beta()[0]) self.alpha.append(self.calculate_alpha()) self.sharpe.append(self.calculate_sharpe()) self.sortino.append(self.calculate_sortino()) self.information.append(self.calculate_information()) self.max_drawdown = self.calculate_max_drawdown() def to_dict(self): """ Creates a dictionary representing the state of the risk report. Returns a dict object of the form: """ period_label = self.last_return_date.strftime("%Y-%m") rval = { 'trading_days': len(self.algorithm_returns.valid()), 'benchmark_volatility': self.benchmark_volatility[-1], 'algo_volatility': self.algorithm_volatility[-1], 'treasury_period_return': self.treasury_period_return, 'algorithm_period_return': self.algorithm_period_returns[-1], 'benchmark_period_return': self.benchmark_period_returns[-1], 'beta': self.beta[-1], 'alpha': self.alpha[-1], 'excess_return': self.excess_returns[-1], 'max_drawdown': self.max_drawdown, 'period_label': period_label } rval['sharpe'] = self.sharpe[-1] rval['sortino'] = self.sortino[-1] rval['information'] = self.information[-1] return {k: None if check_entry(k, v) else v for k, v in rval.iteritems()} def __repr__(self): statements = [] metrics = [ "algorithm_period_returns", "benchmark_period_returns", "excess_returns", "trading_days", "benchmark_volatility", "algorithm_volatility", "sharpe", "sortino", "information", "algorithm_covariance", "benchmark_variance", "beta", "alpha", "max_drawdown", "algorithm_returns", "benchmark_returns", "condition_number", "eigen_values" ] for metric in metrics: value = getattr(self, metric) if isinstance(value, list): if len(value) == 0: value = np.nan else: value = value[-1] statements.append("{m}:{v}".format(m=metric, v=value)) return '\n'.join(statements) def update_compounded_log_returns(self): if len(self.algorithm_returns) == 0: return try: compound = math.log(1 + self.algorithm_returns[ self.algorithm_returns.last_valid_index()]) except ValueError: compound = 0.0 # BUG? Shouldn't this be set to log(1.0 + 0) ? if len(self.compounded_log_returns) == 0: self.compounded_log_returns.append(compound) else: self.compounded_log_returns.append( self.compounded_log_returns[-1] + compound ) def calculate_period_returns(self, returns): returns = np.array(returns) return (1. + returns).prod() - 1 def update_current_max(self): if len(self.compounded_log_returns) == 0: return if self.current_max < self.compounded_log_returns[-1]: self.current_max = self.compounded_log_returns[-1] def calculate_max_drawdown(self): if len(self.compounded_log_returns) == 0: return self.max_drawdown cur_drawdown = 1.0 - math.exp( self.compounded_log_returns[-1] - self.current_max) if self.max_drawdown < cur_drawdown: return cur_drawdown else: return self.max_drawdown def calculate_sharpe(self): """ http://en.wikipedia.org/wiki/Sharpe_ratio """ return sharpe_ratio(self.algorithm_volatility[-1], self.algorithm_period_returns[-1], self.treasury_period_return) def calculate_sortino(self, mar=None): """ http://en.wikipedia.org/wiki/Sortino_ratio """ if mar is None: mar = self.treasury_period_return return sortino_ratio(np.array(self.algorithm_returns), self.algorithm_period_returns[-1], mar) def calculate_information(self): """ http://en.wikipedia.org/wiki/Information_ratio """ A = np.array return information_ratio(A(self.algorithm_returns), A(self.benchmark_returns)) def calculate_alpha(self): """ http://en.wikipedia.org/wiki/Alpha_(investment) """ return alpha(self.algorithm_period_returns[-1], self.treasury_period_return, self.benchmark_period_returns[-1], self.beta[-1])
class EquitiesLiveSource(DataSource): """ Yields all events in event_list that match the given sid_filter. If no event_list is specified, generates an internal stream of events to filter. Returns all events if filter is None. Configuration options: sids : list of values representing simulated internal sids start : start date delta : timedelta between internal events filter : filter to remove the sids """ def __init__(self, data, **kwargs): assert isinstance(data['index'], pd.tseries.index.DatetimeIndex) self.data = data # Unpack config dictionary with default values. self.sids = kwargs.get('sids', data['tickers']) self.start = kwargs.get('start', data['index'][0]) self.end = kwargs.get('end', data['index'][-1]) # Hash_value for downstream sorting. self.arg_string = hash_args(data, **kwargs) self._raw_data = None self.remote = Remote() self.feed = DataFeed() @property def mapping(self): return { 'dt': (lambda x: x, 'dt'), 'sid': (lambda x: x, 'sid'), 'price': (float, 'price'), 'currency': (str, 'currency'), 'perc_change': (float, 'perc_change'), 'volume': (int, 'volume'), } @property def instance_hash(self): return self.arg_string def _wait_for_dt(self, dt): ''' Only return when we reach given datetime ''' # QuanTrade works with utc dates, conversion # are made for I/O #while datetime.datetime.now(pytz.utc) < dt: now = datetime.datetime.now(pytz.utc) while now < dt: log.debug('Waiting for {} / {}'.format(now, dt)) time.sleep(15) now = datetime.datetime.now(pytz.utc) def _get_updated_index(self): ''' truncate past dates in index ''' late_index = self.data['index'] current_dt = datetime.datetime.now(pytz.utc) selector = (late_index.day > current_dt.day) \ | ((late_index.day == current_dt.day) & (late_index.hour > current_dt.hour)) \ | ((late_index.day == current_dt.day) & (late_index.hour == current_dt.hour) & (late_index.minute >= current_dt.minute)) return self.data['index'][selector] def raw_data_gen(self): index = self._get_updated_index() for dt in index: self._wait_for_dt(dt) snapshot = self.remote.fetch_equities_snapshot(symbols=self.sids, level=2) if snapshot.empty: log.error('** No data snapshot available, maybe stopped by google ?') sys.exit(2) for sid in self.sids: # Fix volume = 0, (later will be denominator) if not int(snapshot[sid]['volume']): #TODO Here just a special value that the algo could detect like a missing data snapshot[sid]['volume'] = 10001 #import ipdb; ipdb.set_trace() #NOTE Conversions here are useless ? if snapshot[sid]['perc_change'] == '': snapshot[sid]['perc_change'] = 0 event = { 'dt': dt, 'sid': sid, 'price': float(snapshot[sid]['last']), 'currency': snapshot[sid]['currency'], 'perc_change': float(snapshot[sid]['perc_change']), 'volume': int(snapshot[sid]['volume']), } yield event @property def raw_data(self): if not self._raw_data: self._raw_data = self.raw_data_gen() return self._raw_data
class YahooOHLCSource(DataSource): """ Yields all events in event_list that match the given sid_filter. If no event_list is specified, generates an internal stream of events to filter. Returns all events if filter is None. Configuration options: sids : list of values representing simulated internal sids start : start date delta : timedelta between internal events filter : filter to remove the sids """ def __init__(self, data_descriptor, **kwargs): #assert isinstance(data.major_axis, pd.tseries.index.DatetimeIndex) self.data_descriptor = data_descriptor # Unpack config dictionary with default values. self.sids = kwargs.get('sids', data_descriptor['tickers']) self.start = kwargs.get('start', data_descriptor['index'][0]) self.end = kwargs.get('end', data_descriptor['index'][-1]) # Hash_value for downstream sorting. self.arg_string = hash_args(data_descriptor, **kwargs) self._raw_data = None self.remote = Remote() @property def mapping(self): mapping = { 'dt': (lambda x: x, 'dt'), 'sid': (lambda x: x, 'sid'), 'price': (float, 'price'), 'volume': (int, 'volume'), } # Add additional fields. for field_name in self.data.minor_axis: if field_name in ['price', 'volume', 'dt', 'sid']: continue mapping[field_name] = (lambda x: x, field_name) return mapping @property def instance_hash(self): return self.arg_string def _get(self): return self.remote.fetch_equities_daily(self.sids, ohlc=True, indexes={}, start=self.start, end=self.end) def raw_data_gen(self): self.data = self._get() import ipdb; ipdb.set_trace() for dt in self.data.major_axis: df = self.data.major_xs(dt) for sid, series in df.iterkv(): if sid in self.sids: event = { 'dt': dt, 'sid': sid, } for field_name, value in series.iteritems(): event[field_name] = value yield event @property def raw_data(self): if not self._raw_data: self._raw_data = self.raw_data_gen() return self._raw_data
class EquitiesLiveSource(DataSource): """ Yields all events in event_list that match the given sid_filter. If no event_list is specified, generates an internal stream of events to filter. Returns all events if filter is None. Configuration options: sids : list of values representing simulated internal sids start : start date delta : timedelta between internal events filter : filter to remove the sids """ def __init__(self, data, **kwargs): assert isinstance(data['index'], pd.tseries.index.DatetimeIndex) self.data = data # Unpack config dictionary with default values. self.sids = kwargs.get('sids', data['tickers']) self.start = kwargs.get('start', data['index'][0]) self.end = kwargs.get('end', data['index'][-1]) # Hash_value for downstream sorting. self.arg_string = hash_args(data, **kwargs) self._raw_data = None self.remote = Remote() self.feed = DataFeed() @property def mapping(self): return { 'dt': (lambda x: x, 'dt'), 'sid': (lambda x: x, 'sid'), 'price': (float, 'price'), 'currency': (str, 'currency'), 'perc_change': (float, 'perc_change'), 'volume': (int, 'volume'), } @property def instance_hash(self): return self.arg_string def _wait_for_dt(self, dt): ''' Only return when we reach given datetime ''' # QuanTrade works with utc dates, conversion # are made for I/O #while datetime.datetime.now(pytz.utc) < dt: now = datetime.datetime.now(pytz.utc) while now < dt: log.debug('Waiting for {} / {}'.format(now, dt)) time.sleep(15) now = datetime.datetime.now(pytz.utc) def _get_updated_index(self): ''' truncate past dates in index ''' late_index = self.data['index'] current_dt = datetime.datetime.now(pytz.utc) selector = (late_index.day > current_dt.day) \ | ((late_index.day == current_dt.day) & (late_index.hour > current_dt.hour)) \ | ((late_index.day == current_dt.day) & (late_index.hour == current_dt.hour) & (late_index.minute >= current_dt.minute)) return self.data['index'][selector] def raw_data_gen(self): index = self._get_updated_index() for dt in index: self._wait_for_dt(dt) snapshot = self.remote.fetch_equities_snapshot(symbols=self.sids, level=2) if snapshot.empty: log.error( '** No data snapshot available, maybe stopped by google ?') sys.exit(2) for sid in self.sids: # Fix volume = 0, (later will be denominator) if not int(snapshot[sid]['volume']): #TODO Here just a special value that the algo could detect like a missing data snapshot[sid]['volume'] = 10001 #import ipdb; ipdb.set_trace() #NOTE Conversions here are useless ? if snapshot[sid]['perc_change'] == '': snapshot[sid]['perc_change'] = 0 event = { 'dt': dt, 'sid': sid, 'price': float(snapshot[sid]['last']), 'currency': snapshot[sid]['currency'], 'perc_change': float(snapshot[sid]['perc_change']), 'volume': int(snapshot[sid]['volume']), } yield event @property def raw_data(self): if not self._raw_data: self._raw_data = self.raw_data_gen() return self._raw_data