class IndexQuoteFetcherTestCase(unittest.TestCase): def setUp(self): self.fetcher = IndexQuoteFetcher() self.dates = dateutil.get_startfrom(DATES, '20140101', 20) def tearDown(self): self.fetcher = None def test_fetch_window1(self): close = self.fetcher.fetch_window('close', self.dates, index='HS300') self.assertIsInstance(close, pd.Series) def test_fetch_window2(self): close = self.fetcher.fetch_window(['close'], self.dates, index='HS300') self.assertIsInstance(close, pd.DataFrame)
class TSRetUpdater(UpdaterBase): """The updater class for collections 'ts_ret'.""" def __init__(self, timeout=60): UpdaterBase.__init__(self, timeout=timeout) self.interval = IntervalFetcher('1min') self.quote = QuoteFetcher() self.indexquote = IndexQuoteFetcher() self.times = dateutil.generate_intervals(60) def pre_update(self): self.__dict__.update({ 'dates': self.db.dates.distinct('date'), 'collection': self.db['ts_ret'], }) def pro_update(self): return self.logger.debug('Ensuring index dname_1_date_1 on collection {}', self.collection.name) self.collection.ensure_index([('dname', 1), ('date', 1)], background=True) def update(self, date): """Update TinySoft interval returns data(1min, 5min, 15min, 30min, 60min, 120min) for the **same** day after market close.""" interval = self.interval.fetch_daily('close', self.times, date) interval.ix['093000'] = self.quote.fetch_daily('prev_close', date).reindex(index=interval.columns) interval = interval.sort_index() for i in (1, 5, 15, 30, 60, 120): sub_interval = interval.ix[::i] sub_ret = sub_interval.pct_change(1).ix[1:] key = {'dname': 'returns'+str(i), 'date': date} for time, ser in sub_ret.iterrows(): key.update({'time': time}) self.db.ts_ret.update(key, {'$set': {'dvalue': ser.dropna().to_dict()}}, upsert=True) self.logger.info('UPSERT documents for {} sids into (c: [{}]) of (d: [{}]) on {}', interval.shape[1], self.collection.name, self.db.name, date) indice = self.db.tsindex_1min.distinct('dname') for index in indice: query = {'dname': index, 'date': date} proj = {'_id': 0, 'close': 1} try: ser = pd.DataFrame(list(self.db.tsindex_1min.find(query, proj)))['close'] except: continue ser.index = self.times prev_close = self.indexquote.fetch_daily('prev_close', date, index=index) ser.ix['093000'] = prev_close ser = ser.sort_index() for i in (5, 15, 30, 60, 120): sub_ser = ser.ix[::i] sub_ret = sub_ser.pct_change(1).ix[1:] key = {'dname': 'returns'+str(i), 'index': index, 'date': date} self.db.tsindex_ret.update(key, {'$set': {'dvalue': sub_ret.to_dict()}}, upsert=True) self.logger.info('UPSERT documents for {} indice into (c: [{}]) of (d: [{}]) on {}', len(indice), self.db.tsindex_ret.name, self.db.name, date)
import pandas as pd import matplotlib.pyplot as plt from matplotlib.dates import DateFormatter datefmt = DateFormatter('%Y%m%d') from matplotlib.backends.backend_pdf import PdfPages import magic from orca.mongo.industry import IndustryFetcher from orca.mongo.index import IndexQuoteFetcher from orca.mongo.components import ComponentsFetcher from orca.mongo.sywgquote import SYWGQuoteFetcher from orca.mongo.kday import UnivFetcher industry_fetcher = IndustryFetcher(datetime_index=True, reindex=True) indexquote_fetcher = IndexQuoteFetcher(datetime_index=True) components_fetcher = ComponentsFetcher(datetime_index=True) sywgquote_fetcher = SYWGQuoteFetcher(datetime_index=True, use_industry=True) univ_fetcher = UnivFetcher(datetime_index=True, reindex=True) from orca.utils import dateutil from orca.utils.io import read_frame from orca.operation import api class Weight(object): """Class to analyse portfolio weight decomposition through time.""" def __init__(self, alpha, n, rank=None): self.alpha = api.format(alpha) self.rank_alpha = self.alpha.rank(axis=1, ascending=False) self.rank_alpha = self.rank_alpha[self.rank_alpha <= n]
def setUp(self): self.fetcher = IndexQuoteFetcher() self.dates = dateutil.get_startfrom(DATES, '20140101', 20)
class Performance(object): """Class to provide analyser to examine the performance of an alpha from different perspective. :param alpha: Alpha to be examined, either a well formatted DataFrame or :py:class:`orca.alpha.base.AlphaBase` """ mongo_lock = Lock() quote = QuoteFetcher(datetime_index=True, reindex=True) index_quote = IndexQuoteFetcher(datetime_index=True) components = ComponentsFetcher(datetime_index=True, reindex=True) returns = None index_returns = { 'HS300': None, } index_components = {'HS300': None, 'CS500': None, 'other': None} @classmethod def get_returns(cls, startdate): if cls.returns is None or startdate < cls.returns.index[0]: with cls.mongo_lock: cls.returns = cls.quote.fetch( 'returns', startdate=startdate.strftime('%Y%m%d')) return cls.returns @classmethod def get_index_returns(cls, startdate, index='HS300'): if index not in cls.index_returns or cls.index_returns[ index] is None or startdate < cls.index_returns[index].index[0]: with cls.mongo_lock: cls.index_returns[index] = cls.quote.fetch( 'returns', startdate=startdate.strftime('%Y%m%d')) return cls.index_returns[index] @classmethod def get_index_components(cls, startdate, index): if cls.index_components[ index] is None or startdate < cls.index_components[ index].index[0]: with cls.mongo_lock: cls.index_components['HS300'] = cls.components.fetch( 'HS300', startdate=startdate.strftime('%Y%m%d')) cls.index_components['CS500'] = cls.components.fetch( 'CS500', startdate=startdate.strftime('%Y%m%d')) cls.index_components['other'] = ~( cls.index_components['HS300'] | cls.index_components['CS500']) return cls.index_components[index] @classmethod def set_returns(cls, returns): """Call this method to set returns so that for future uses, there is no need to interact with MongoDB.""" with cls.mongo_lock: cls.returns = api.format(returns) @classmethod def set_index_returns(cls, index, returns): """Call this method to set index returns so that for future uses, there is no need to interact with MongoDB.""" with cls.mongo_lock: returns.index = pd.to_datetime(returns.index) cls.index_returns[index] = returns @classmethod def set_index_components(cls, index, components): """Call this method to set index components data so that for future uses, there is no need to interact with MongoDB.""" with cls.mongo_lock: cls.index_components[index] = api.format(components).fillna(False) def __init__(self, alpha): if isinstance(alpha, AlphaBase): self.alpha = alpha.get_alphas() else: self.alpha = api.format(alpha) self.alpha = self.alpha[np.isfinite(self.alpha)] self.startdate = self.alpha.index[0] def get_original(self): """**Be sure** to use this method when either the alpha is neutralized or you know what you are doing.""" return Analyser(self.alpha, Performance.get_returns(self.startdate)) def get_shift(self, n): return Analyser(self.alpha.shift(n), Performance.get_returns(self.alpha.index[n])) def get_longshort(self): """Pretend the alpha can be made into a long/short portfolio.""" return Analyser(api.neutralize(self.alpha), Performance.get_returns(self.startdate)) def get_long(self, index=None): """Only analyse the long part.""" return Analyser(self.alpha[self.alpha>0], Performance.get_returns(self.startdate), Performance.get_index_returns(self.startdate, index=index)) \ if index is not None else \ Analyser(self.alpha[self.alpha>0], Performance.get_returns(self.startdate)) def get_short(self, index=None): """Only analyse the short part.""" return Analyser(-self.alpha[self.alpha<0], Performance.get_returns(self.startdate), Performance.get_index_returns(self.startdate, index=index)) \ if index is not None else \ Analyser(-self.alpha[self.alpha<0], Performance.get_returns(self.startdate)) def get_qtop(self, q, index=None): """Only analyse the top quantile as long holding.""" return Analyser(api.qtop(self.alpha, q), Performance.get_returns(self.startdate), Performance.get_index_returns(self.startdate, index=index)) \ if index is not None else \ Analyser(api.qtop(self.alpha, q), Performance.get_returns(self.startdate)) def get_qbottom(self, q, index=None): """Only analyse the bottom quantile as long holding.""" return Analyser(api.qbottom(self.alpha, q), Performance.get_returns(self.startdate), Performance.get_index_returns(self.startdate, index=index)) \ if index is not None else \ Analyser(api.qbottom(self.alpha, q), Performance.get_returns(self.startdate)) def get_ntop(self, n, index=None): """Only analyse the top n stocks as long holding.""" return Analyser(api.top(self.alpha, n), Performance.get_returns(self.startdate), Performance.get_index_returns(self.startdate, index=index)) \ if index is not None else \ Analyser(api.top(self.alpha, n), Performance.get_returns(self.startdate)) def get_nbottom(self, n, index=None): """Only analyse the bottom n stocks as long holding.""" return Analyser(api.bottom(self.alpha, n), Performance.get_returns(self.startdate), Performance.get_index_returns(self.startdate, index=index)) \ if index is not None else \ Analyser(api.bottom(self.alpha, n), Performance.get_returns(self.startdate)) def get_qtail(self, q): """Long the top quantile and at the same time short the bottom quantile.""" return Analyser( api.qtop(self.alpha, q).astype(int) - api.qbottom(self.alpha, q).astype(int), Performance.get_returns(self.startdate)) def get_ntail(self, n): """Long the top n stocks and at the same time short the bottom n stocks.""" return Analyser( api.top(self.alpha, n).astype(int) - api.bottom(self.alpha, n).astype(int), Performance.get_returns(self.startdate)) def get_quantiles(self, n): """Return a list of analysers for n quantiles.""" return [Analyser(qt, Performance.get_returns(self.startdate)) \ for qt in api.quantiles(self.alpha, n)] def get_universe(self, univ): """Return a performance object for alpha in this universe.""" return Performance(api.intersect(self.alpha, univ)) def get_bms(self): """Return a list of 3 performance objects for alphas in HS300, CS500 and other.""" big = Performance.get_index_components(self.startdate, 'HS300').ix[self.alpha.index] mid = Performance.get_index_components(self.startdate, 'CS500').ix[self.alpha.index] sml = Performance.get_index_components(self.startdate, 'other').ix[self.alpha.index] return [self.get_universe(univ) for univ in [big, mid, sml]]
def __init__(self, timeout=60): UpdaterBase.__init__(self, timeout=timeout) self.interval = IntervalFetcher('1min') self.quote = QuoteFetcher() self.indexquote = IndexQuoteFetcher() self.times = dateutil.generate_intervals(60)
class TSRetUpdater(UpdaterBase): """The updater class for collections 'ts_ret'.""" def __init__(self, timeout=60): UpdaterBase.__init__(self, timeout=timeout) self.interval = IntervalFetcher('1min') self.quote = QuoteFetcher() self.indexquote = IndexQuoteFetcher() self.times = dateutil.generate_intervals(60) def pre_update(self): self.__dict__.update({ 'dates': self.db.dates.distinct('date'), 'collection': self.db['ts_ret'], }) def pro_update(self): return self.logger.debug('Ensuring index dname_1_date_1 on collection {}', self.collection.name) self.collection.ensure_index([('dname', 1), ('date', 1)], background=True) def update(self, date): """Update TinySoft interval returns data(1min, 5min, 15min, 30min, 60min, 120min) for the **same** day after market close.""" interval = self.interval.fetch_daily('close', self.times, date) interval.ix['093000'] = self.quote.fetch_daily( 'prev_close', date).reindex(index=interval.columns) interval = interval.sort_index() for i in (1, 5, 15, 30, 60, 120): sub_interval = interval.ix[::i] sub_ret = sub_interval.pct_change(1).ix[1:] key = {'dname': 'returns' + str(i), 'date': date} for time, ser in sub_ret.iterrows(): key.update({'time': time}) self.db.ts_ret.update( key, {'$set': { 'dvalue': ser.dropna().to_dict() }}, upsert=True) self.logger.info( 'UPSERT documents for {} sids into (c: [{}]) of (d: [{}]) on {}', interval.shape[1], self.collection.name, self.db.name, date) indice = self.db.tsindex_1min.distinct('dname') for index in indice: query = {'dname': index, 'date': date} proj = {'_id': 0, 'close': 1} try: ser = pd.DataFrame(list(self.db.tsindex_1min.find( query, proj)))['close'] except: continue ser.index = self.times prev_close = self.indexquote.fetch_daily('prev_close', date, index=index) ser.ix['093000'] = prev_close ser = ser.sort_index() for i in (5, 15, 30, 60, 120): sub_ser = ser.ix[::i] sub_ret = sub_ser.pct_change(1).ix[1:] key = { 'dname': 'returns' + str(i), 'index': index, 'date': date } self.db.tsindex_ret.update( key, {'$set': { 'dvalue': sub_ret.to_dict() }}, upsert=True) self.logger.info( 'UPSERT documents for {} indice into (c: [{}]) of (d: [{}]) on {}', len(indice), self.db.tsindex_ret.name, self.db.name, date)
import os import pandas as pd import warnings warnings.simplefilter(action='ignore', category=pd.core.common.SettingWithCopyWarning) from lxml import etree from orca import DATES from orca.barra.base import BarraOptimizerBase from orca.mongo.barra import BarraFetcher barra_fetcher = BarraFetcher('short') from orca.mongo.quote import QuoteFetcher quote_fetcher = QuoteFetcher() from orca.mongo.index import IndexQuoteFetcher index_quote_fetcher = IndexQuoteFetcher() from orca.mongo.components import ComponentsFetcher components_fetcher = ComponentsFetcher(as_bool=False) config = etree.XML("""<Optimize> <Assets><Composite/></Assets> <InitPortfolio/> <Universe/> <RiskModel path="/home/SambaServer/extend_data/Barra/short/${YYYY}/${MM}/${DD}" name="CNE5S"/> <Case> <Utility/> <Constraints> <HedgeConstraints> <Leverage> <Net lower="1" upper="1"/> </Leverage>