def filter(self, startdate, enddate=None, parent=None, **kwargs): if 'datetime_index' not in kwargs: kwargs.update({'datetime_index': self.datetime_index}) kwargs.update({'reindex': self.reindex}) if 'date_check' not in kwargs: kwargs.update({'date_check': self.date_check}) date_check = kwargs.get('date_check', self.date_check) univ_window = dateutil.cut_window( DATES, dateutil.compliment_datestring(str(startdate), -1, date_check), dateutil.compliment_datestring(str(enddate), 1, date_check) if enddate is not None else None) startdate, enddate = univ_window[0], univ_window[-1] # at first, the universe is full, i.e. all sids are included parent = pd.DataFrame(True, index=pd.to_datetime(univ_window) if self.datetime_index else univ_window, columns=SIDS) for elem in self._filters: # if the item is a tuple of filter, we first call their filter method and then intersect the results if isinstance(elem, tuple) or isinstance(elem, list): res = [ f.filter(startdate, enddate, parent=parent, **kwargs) for f in elem ] for df in res: parent = parent & df # simply call its filter method else: parent = elem.filter(startdate, enddate, parent=parent, **kwargs) return parent
def fetch(self, *args, **kwargs): """Use this method **only** if one wants to fetch returns.""" try: dateutil.compliment_datestring(str(args[0]), -1, True) return super(BarraFactorFetcher, self).fetch(None, *args, **kwargs) except ValueError: return super(BarraFactorFetcher, self).fetch(*args, **kwargs)
def filter(self, startdate, enddate=None, parent=None, **kwargs): if 'datetime_index' not in kwargs: kwargs.update({'datetime_index': self.datetime_index}) kwargs.update({'reindex': self.reindex}) if 'date_check' not in kwargs: kwargs.update({'date_check': self.date_check}) date_check = kwargs.get('date_check', self.date_check) univ_window = dateutil.cut_window( DATES, dateutil.compliment_datestring(str(startdate), -1, date_check), dateutil.compliment_datestring(str(enddate), 1, date_check) if enddate is not None else None) startdate, enddate = univ_window[0], univ_window[-1] # at first, the universe is full, i.e. all sids are included parent = pd.DataFrame(True, index=pd.to_datetime(univ_window) if self.datetime_index else univ_window, columns=SIDS) for elem in self._filters: # if the item is a tuple of filter, we first call their filter method and then intersect the results if isinstance(elem, tuple) or isinstance(elem, list): res = [f.filter(startdate, enddate, parent=parent, **kwargs) for f in elem] for df in res: parent = parent & df # simply call its filter method else: parent = elem.filter(startdate, enddate, parent=parent, **kwargs) return parent
def fetch_history(self, *args, **kwargs): """Use this method **only** if one wants to fetch returns.""" try: dateutil.compliment_datestring(str(args[0]), -1, True) return super(BarraFactorFetcher, self).fetch_history(None, *args, **kwargs) except ValueError: return super(BarraFactorFetcher, self).fetch_history(*args, **kwargs)
def filter(self, startdate, enddate=None, parent=None, return_parent=False, **kwargs): datetime_index = kwargs.get('datetime_index', self.datetime_index) reindex = kwargs.get('reindex', self.reindex) date_check = kwargs.get('date_check', self.date_check) univ_window = dateutil.cut_window( DATES, dateutil.compliment_datestring(str(startdate), -1, date_check), dateutil.compliment_datestring(str(enddate), 1, date_check) if enddate is not None else None) si, ei = map(DATES.index, [univ_window[0], univ_window[-1]]) data_window = DATES[si-self.delay-(self.window-1): ei-self.delay+1] dfs = [] for fetcher, dname in self.datas: df = fetcher.fetch_window(dname, data_window) df.index = DATES[si-(self.window-1): ei+1] dfs.append(df) df = self.synth(*dfs) parent = self.comply(df, parent) df = self.rule(self.window)(df) self.comply(df, parent, False) df = df.iloc[self.window-1:] df.index = univ_window if return_parent is True: return (self.format(parent, datetime_index=datetime_index, reindex=reindex), self.format(df, datetime_index=datetime_index, reindex=reindex)) return self.format(df, datetime_index=datetime_index, reindex=reindex)
def fetch(self, indicator, startdate, enddate=None, backdays=0, **kwargs): date_check = kwargs.get('date_check', self.date_check) window = dateutil.cut_window( DATES, dateutil.compliment_datestring(str(startdate), -1, date_check), dateutil.compliment_datestring(str(enddate), 1, date_check) if enddate is not None else None, backdays=backdays) return self.fetch_window(indicator, window, **kwargs)
def interval_fetch(pl, times, startdate, enddate, backdays=0, date_check=False): window = dateutil.cut_window( DATES, dateutil.compliment_datestring(str(startdate), -1, date_check), dateutil.compliment_datestring(str(enddate), 1, date_check), backdays ) return AlphaBase.interval_fetch_window(pl, times, window)
def generate_dates(startdate, enddate, num): if enddate is None: enddate = DATES[-1] dates = dateutil.cut_window( DATES, dateutil.compliment_datestring(str(startdate), -1, True), dateutil.compliment_datestring(str(enddate), 1, True)) chksize = len(dates) / num + (len(dates) % num > 0) return [dates[i:i + chksize] for i in range(0, len(dates), chksize)]
def record_fetch(df, startdate, enddate, backdays, date_check=False): window = dateutil.cut_window( DATES, dateutil.compliment_datestring(str(startdate), -1, date_check), dateutil.compliment_datestring(str(enddate), 1, date_check), backdays ) return AlphaBase.record_fetch_window(df, window)
def fetch(self, indicator, startdate, enddate=None, backdays=0, **kwargs): date_check = kwargs.get('date_check', self.date_check) window = dateutil.cut_window( DATES, dateutil.compliment_datestring(str(startdate), -1, date_check), dateutil.compliment_datestring(str(enddate), 1, date_check) if enddate is not None else None, backdays=backdays) return self.fetch_window(indicator, window, **kwargs)
def fetch(self, dname, times, startdate, enddate=None, backdays=0, **kwargs): """Use :py:meth:`fetch_window` behind the scene.""" date_check = kwargs.get('date_check', self.date_check) window = dateutil.cut_window( DATES, dateutil.compliment_datestring(str(startdate), -1, date_check), dateutil.compliment_datestring(str(enddate), 1, date_check) if enddate is not None else None, backdays=backdays) return self.fetch_window(dname, times, window, **kwargs)
def generate_dates(startdate, enddate, num): if enddate is None: enddate = DATES[-1] dates = dateutil.cut_window( DATES, dateutil.compliment_datestring(str(startdate), -1, True), dateutil.compliment_datestring(str(enddate), 1, True) ) chksize = len(dates) / num + (len(dates) % num > 0) return [dates[i: i+chksize] for i in range(0, len(dates), chksize)]
def fetch(self, dname, startdate, enddate=None, backdays=0, **kwargs): """Use :py:meth:`fetch_window` behind the scene.""" date_check = kwargs.get('date_check', self.date_check) window = dateutil.cut_window( DATES, dateutil.compliment_datestring(str(startdate), -1, date_check), dateutil.compliment_datestring(str(enddate), 1, date_check) if enddate is not None else None, backdays=backdays) return self.fetch_window(dname, window, **kwargs)
def fetch_window(self, *args, **kwargs): """Wrapper for :py:meth:`fetch_returns`. :param str dname: 'returns' or factor name """ if isinstance(args[0], list): try: dateutil.compliment_datestring(args[0][0], -1, True) return self.fetch_returns(None, *args, **kwargs) except ValueError: pass return self.fetch_returns(*args, **kwargs)
def fetch_window(self, *args, **kwargs): """Wrapper for :py:meth:`fetch_returns`. :param str dname: 'returns' or factor name """ if isinstance(args[0], list): try: dateutil.compliment_datestring(args[0][0], -1, True) return self.fetch_returns(None, *args, **kwargs) except ValueError: pass return self.fetch_returns(*args, **kwargs)
def filter(self, startdate, enddate=None, parent=None, **kwargs): datetime_index = kwargs.get('datetime_index', self.datetime_index) reindex = kwargs.get('reindex', self.reindex) date_check = kwargs.get('date_check', self.date_check) univ_window = dateutil.cut_window( DATES, dateutil.compliment_datestring(str(startdate), -1, date_check), dateutil.compliment_datestring(str(enddate), 1, date_check) if enddate is not None else None) index, columns = univ_window, SIDS df = pd.DataFrame(0, index, columns) df = df.add(self.series.astype(int), axis=1) self.comply(df, parent, False) return self.format(df, datetime_index, reindex)
def fetch_covariance(self, factor=None, startdate=None, enddate=None, **kwargs): if isinstance(factor, str): factor = factor.find( '_') == -1 and self.prefix + '_' + factor or factor assert factor in self.all_factors[self.model] datetime_index = kwargs.get('datetime_index', self.datetime_index) prevra = kwargs.get('prevra', False) date_check = kwargs.get('date_check', self.date_check) if enddate: enddate = dateutil.compliment_datestring(str(enddate), 1, date_check) enddate = dateutil.parse_date(DATES, enddate, -1)[1] else: enddate = DATES[-1] if startdate: startdate = dateutil.compliment_datestring(str(startdate), -1, date_check) startdate = dateutil.parse_date(DATES, startdate, 1)[1] else: startdate = DATES[0] query = {'date': {'$lte': enddate, '$gte': startdate}} if isinstance(factor, str): query['factor'] = factor proj = {'_id': 0, 'date': 1, 'covariance': 1, 'factor': 1} if prevra: cursor = self.precov.find(query, proj) else: cursor = self.cov.find(query, proj) if factor: res = pd.DataFrame( {row['date']: row['covariance'] for row in cursor}).T if datetime_index: res.index = pd.to_datetime(res.index) else: res = pd.DataFrame({(row['date'], row['factor']): row['covariance'] for row in cursor}).T res = pd.Panel( {date: res.ix[date] for date in res.unstack().index}) if datetime_index: res.items = pd.to_datetime(res.items) del cursor return res
def filter(self, startdate, enddate=None, parent=None, **kwargs): datetime_index = kwargs.get('datetime_index', self.datetime_index) reindex = kwargs.get('reindex', self.reindex) date_check = kwargs.get('date_check', self.date_check) univ_window = dateutil.cut_window( DATES, dateutil.compliment_datestring(str(startdate), -1, date_check), dateutil.compliment_datestring(str(enddate), 1, date_check) if enddate is not None else None) index, columns = univ_window, SIDS df = pd.DataFrame(0, index, columns) df = df.add(self.series.astype(int), axis=1) self.comply(df, parent, False) return self.format(df, datetime_index, reindex)
def fetch_intervals(self, dname, date, time, num=None, offset=0, **kwargs): """Return a consecutive interval data ``offset`` is to set offset of ``time``; along with ``date``, they determine the ending datetime. """ date_check = kwargs.get('date_check', self.date_check) reindex = kwargs.get('reindex', self.reindex) date = dateutil.compliment_datestring(str(date), -1, date_check) date = dateutil.parse_date(DATES, date, -1)[1] dateintervals = self.generate_dateintervals(date, time, num=1 if num is None else num, offset=offset) dateindex = pd.to_datetime([dis[0]+' '+dis[1] for dis in dateintervals]) window = [dis[0] for dis in dateintervals] query = {'dname': dname, 'date': {'$gte': window[0], '$lte': window[-1]}, } proj = {'_id': 0, 'dvalue': 1, 'date': 1, 'time': 1} cursor = self.collection.find(query, proj) df = pd.DataFrame({row['date']+' '+row['time']: row['dvalue'] for row in cursor}).T del cursor df.index = pd.to_datetime(df.index) df = df.ix[dateindex] if reindex: df = df.reindex(columns=SIDS) return df.iloc[0] if num is None else df
def generate_dates(startdate, enddate, parts=None): startdate, enddate = str(startdate), str(enddate) if enddate[:5].lower() == 'today': enddate = DATES[-1-int(enddate[6:])] dates = dateutil.cut_window( DATES, dateutil.compliment_datestring(str(startdate), -1, True), dateutil.compliment_datestring(str(enddate), 1, True) ) if parts is None: return dates chksize = len(dates)/parts if len(dates) > chksize * parts: chksize += 1 return [dates[i: i+chksize] for i in range(0, len(dates), chksize)]
def fetch_intervals(self, dname, date, time, num=None, offset=0, **kwargs): """Return a consecutive interval data ``offset`` is to set offset of ``time``; along with ``date``, they determine the ending datetime. """ date_check = kwargs.get('date_check', self.date_check) reindex = kwargs.get('reindex', self.reindex) date = dateutil.compliment_datestring(str(date), -1, date_check) date = dateutil.parse_date(DATES, date, -1)[1] dateintervals = self.generate_dateintervals( date, time, num=1 if num is None else num, offset=offset) dateindex = pd.to_datetime( [dis[0] + ' ' + dis[1] for dis in dateintervals]) window = [dis[0] for dis in dateintervals] query = { 'dname': dname, 'date': { '$gte': window[0], '$lte': window[-1] }, } proj = {'_id': 0, 'dvalue': 1, 'date': 1, 'time': 1} cursor = self.collection.find(query, proj) df = pd.DataFrame( {row['date'] + ' ' + row['time']: row['dvalue'] for row in cursor}).T del cursor df.index = pd.to_datetime(df.index) df = df.ix[dateindex] if reindex: df = df.reindex(columns=SIDS) return df.iloc[0] if num is None else df
def fetch_history(self, *args, **kwargs): date_check = kwargs.get('date_check', self.date_check) delay = kwargs.get('delay', self.delay) try: date = dateutil.compliment_datestring(args[0], -1, date_check) backdays = args[1] dname = self._dname except ValueError: date = dateutil.compliment_datestring(args[1], -1, date_check) backdays = args[2] dname = args[0] date = dateutil.compliment_datestring(date, -1, date_check) di, date = dateutil.parse_date(DATES, date, -1) di -= delay window = DATES[di - backdays + 1:di + 1] return self.fetch_window(dname, window, **kwargs)
def fetch_history(self, *args, **kwargs): date_check = kwargs.get('date_check', self.date_check) delay = kwargs.get('delay', self.delay) try: date = dateutil.compliment_datestring(args[0], -1, date_check) backdays = args[1] dname = self._dname except ValueError: date = dateutil.compliment_datestring(args[1], -1, date_check) backdays = args[2] dname = args[0] date = dateutil.compliment_datestring(date, -1, date_check) di, date = dateutil.parse_date(DATES, date, -1) di -= delay window = DATES[di-backdays+1: di+1] return self.fetch_window(dname, window, **kwargs)
def fetch_history(self, indicator, date, backdays, **kwargs): date_check = kwargs.get('date_check', self.date_check) delay = kwargs.get('delay', self.delay) date = dateutil.compliment_datestring(str(date), -1, date_check) di, date = dateutil.parse_date(DATES, date, -1) di -= delay window = DATES[di-backdays+1:di+1] return self.fetch_window(indicator, window, **kwargs)
def fetch_history(self, indicator, date, backdays, **kwargs): date_check = kwargs.get('date_check', self.date_check) delay = kwargs.get('delay', self.delay) date = dateutil.compliment_datestring(str(date), -1, date_check) di, date = dateutil.parse_date(DATES, date, -1) di -= delay window = DATES[di - backdays + 1:di + 1] return self.fetch_window(indicator, window, **kwargs)
def filter(self, startdate, enddate=None, parent=None, **kwargs): if 'datetime_index' not in kwargs: kwargs.update({'datetime_index': self.datetime_index}) kwargs.update({'reindex': self.reindex}) if 'date_check' not in kwargs: kwargs.update({'date_check': self.date_check}) date_check = kwargs.get('date_check', self.date_check) univ_window = dateutil.cut_window( DATES, dateutil.compliment_datestring(str(startdate), -1, date_check), dateutil.compliment_datestring(str(enddate), 1, date_check) if enddate is not None else None) startdate, enddate = univ_window[0], univ_window[-1] res = [elem.filter(startdate, enddate=enddate, parent=parent, **kwargs) for elem in self._filters] seed = res.pop() for df in res: seed = seed | df return seed
def record_fetch_history(df, date, backdays=None, delay=0, date_check=False): date = dateutil.compliment_datestring(str(date), -1, date_check) di, date = dateutil.parse_date(DATES, date, -1) di -= delay if backdays is None: window = DATES[:di+1] else: window = DATES[di-backdays+1: di+1] return AlphaBase.record_fetch_window(df, window)
def fetch_history(self, dname, times, date, backdays, **kwargs): """Use :py:meth:`fetch_window` behind the scene.""" date_check = kwargs.get('date_check', self.date_check) delay = kwargs.get('delay', self.delay) date = dateutil.compliment_datestring(str(date), -1, date_check) di, date = dateutil.parse_date(DATES, date, -1) di -= delay window = DATES[di-backdays+1: di+1] return self.fetch_window(dname, times, window, **kwargs)
def fetch_history(self, dname, times, date, backdays, **kwargs): """Use :py:meth:`fetch_window` behind the scene.""" date_check = kwargs.get('date_check', self.date_check) delay = kwargs.get('delay', self.delay) date = dateutil.compliment_datestring(str(date), -1, date_check) di, date = dateutil.parse_date(DATES, date, -1) di -= delay window = DATES[di - backdays + 1:di + 1] return self.fetch_window(dname, times, window, **kwargs)
def random_intalpha(startdate='20140103', freq='30min', n=None): if not n >= 0: n = np.random.randint(10, 20) dates = dateutil.get_startfrom(DATES, dateutil.compliment_datestring(str(startdate), -1, True), n) times = dateutil.generate_intervals(int(freq[:-3])*60) dts = pd.to_datetime([d+' '+t for d in dates for t in times]) df = pd.DataFrame(np.random.randn(len(dts), len(SIDS)), index=dts, columns=SIDS) return df
def fetch(self, *args, **kwargs): date_check = kwargs.get('date_check', self.date_check) enddate = kwargs.get('enddate', None) backdays = kwargs.get('backdays', 0) try: startdate = dateutil.compliment_datestring(str(args[0]), -1, date_check) dname = self._dname args = args[1:] except ValueError: startdate = dateutil.compliment_datestring(str(args[1]), -1, date_check) dname = args[0] args = args[2:] if args: enddate = args[0] window = dateutil.cut_window( DATES, startdate, dateutil.compliment_datestring(str(enddate), 1, date_check) if enddate is not None else None, backdays=backdays) return self.fetch_window(dname, window, **kwargs)
def filter(self, startdate, enddate=None, parent=None, return_parent=False, **kwargs): datetime_index = kwargs.get('datetime_index', self.datetime_index) reindex = kwargs.get('reindex', self.reindex) date_check = kwargs.get('date_check', self.date_check) univ_window = dateutil.cut_window( DATES, dateutil.compliment_datestring(str(startdate), -1, date_check), dateutil.compliment_datestring(str(enddate), 1, date_check) if enddate is not None else None) si, ei = map(DATES.index, [univ_window[0], univ_window[-1]]) data_window = DATES[si - self.delay - (self.window - 1):ei - self.delay + 1] dfs = [] for fetcher, dname in self.datas: df = fetcher.fetch_window(dname, data_window) df.index = DATES[si - (self.window - 1):ei + 1] dfs.append(df) df = self.synth(*dfs) parent = self.comply(df, parent) df = self.rule(self.window)(df) self.comply(df, parent, False) df = df.iloc[self.window - 1:] df.index = univ_window if return_parent is True: return (self.format(parent, datetime_index=datetime_index, reindex=reindex), self.format(df, datetime_index=datetime_index, reindex=reindex)) return self.format(df, datetime_index=datetime_index, reindex=reindex)
def filter(self, startdate, enddate=None, parent=None, **kwargs): if 'datetime_index' not in kwargs: kwargs.update({'datetime_index': self.datetime_index}) kwargs.update({'reindex': self.reindex}) if 'date_check' not in kwargs: kwargs.update({'date_check': self.date_check}) date_check = kwargs.get('date_check', self.date_check) univ_window = dateutil.cut_window( DATES, dateutil.compliment_datestring(str(startdate), -1, date_check), dateutil.compliment_datestring(str(enddate), 1, date_check) if enddate is not None else None) startdate, enddate = univ_window[0], univ_window[-1] res = [ elem.filter(startdate, enddate=enddate, parent=parent, **kwargs) for elem in self._filters ] seed = res.pop() for df in res: seed = seed | df return seed
def fetch(self, *args, **kwargs): date_check = kwargs.get('date_check', self.date_check) enddate = kwargs.get('enddate', None) backdays = kwargs.get('backdays', 0) try: startdate = dateutil.compliment_datestring(str(args[0]), -1, date_check) dname = self._dname args = args[1:] except ValueError: startdate = dateutil.compliment_datestring(str(args[1]), -1, date_check) dname = args[0] args = args[2:] if args: enddate = args[0] window = dateutil.cut_window( DATES, startdate, dateutil.compliment_datestring(str(enddate), 1, date_check) if enddate is not None else None, backdays=backdays) return self.fetch_window(dname, window, **kwargs)
def fetch_daily(self, dname, date, offset=0, **kwargs): """Wrapper for :py:meth:`fetch_returns` and :py:meth:`fetch_covariance`. :param str dname: 'returns', 'covariance' """ date_check = kwargs.get('date_check', self.date_check) date = dateutil.compliment_datestring(date, -1, date_check) di, date = dateutil.parse_date(DATES, date, -1) date = DATES[di - offset] if dname == 'covariance': return self.fetch_daily_covariance(date) factor = kwargs.get('factor', None) return self.fetch_returns(factor, [date], **kwargs).iloc[0]
def fetch_covariance(self, factor=None, startdate=None, enddate=None, **kwargs): if isinstance(factor, str): factor = factor.find('_') == -1 and self.prefix+'_'+factor or factor assert factor in self.all_factors[self.model] datetime_index = kwargs.get('datetime_index', self.datetime_index) prevra = kwargs.get('prevra', False) date_check = kwargs.get('date_check', self.date_check) if enddate: enddate = dateutil.compliment_datestring(str(enddate), 1, date_check) enddate = dateutil.parse_date(DATES, enddate, -1)[1] else: enddate = DATES[-1] if startdate: startdate = dateutil.compliment_datestring(str(startdate), -1, date_check) startdate = dateutil.parse_date(DATES, startdate, 1)[1] else: startdate = DATES[0] query = {'date': {'$lte': enddate, '$gte': startdate}} if isinstance(factor, str): query['factor'] = factor proj = {'_id': 0, 'date': 1, 'covariance': 1, 'factor': 1} if prevra: cursor = self.precov.find(query, proj) else: cursor = self.cov.find(query, proj) if factor: res = pd.DataFrame({row['date']: row['covariance'] for row in cursor}).T if datetime_index: res.index = pd.to_datetime(res.index) else: res = pd.DataFrame({(row['date'], row['factor']): row['covariance'] for row in cursor}).T res = pd.Panel({date: res.ix[date] for date in res.unstack().index}) if datetime_index: res.items = pd.to_datetime(res.items) del cursor return res
def fetch_daily(self, dname, date, offset=0, **kwargs): """Wrapper for :py:meth:`fetch_returns` and :py:meth:`fetch_covariance`. :param str dname: 'returns', 'covariance' """ date_check = kwargs.get('date_check', self.date_check) date = dateutil.compliment_datestring(date, -1, date_check) di, date = dateutil.parse_date(DATES, date, -1) date = DATES[di-offset] if dname == 'covariance': return self.fetch_daily_covariance(date) factor = kwargs.get('factor', None) return self.fetch_returns(factor, [date], **kwargs).iloc[0]
def filter_daily(self, date, offset=0, parent=None, **kwargs): """Filter out a universe on a certain day. A helper method. :param date: The base point :type date: str, int :param int offset: The offset w.r.t. the ``date``. The actual date is calculated from ``date`` and ``offset``. Default: 0 :param DataFrame parent: The super- or parent-universe to be filtered. Default: None :returns: Series """ date = dateutil.compliment_datestring(str(date), -1, self.date_check) di, date = dateutil.parse_date(DATES, date, -1) date = DATES[di-offset] if isinstance(parent, pd.Series): parent = pd.DataFrame({date: parent}).T return self.filter(date, date, **kwargs).iloc[0]
def filter_daily(self, date, offset=0, parent=None, **kwargs): """Filter out a universe on a certain day. A helper method. :param date: The base point :type date: str, int :param int offset: The offset w.r.t. the ``date``. The actual date is calculated from ``date`` and ``offset``. Default: 0 :param DataFrame parent: The super- or parent-universe to be filtered. Default: None :returns: Series """ date = dateutil.compliment_datestring(str(date), -1, self.date_check) di, date = dateutil.parse_date(DATES, date, -1) date = DATES[di - offset] if isinstance(parent, pd.Series): parent = pd.DataFrame({date: parent}).T return self.filter(date, date, **kwargs).iloc[0]
def random_alpha(startdate='20140103', n=None, datetime_index=True): """Generate a random alpha(i.e. a DataFrame of random floats with DatetimeINdex and full sids columns). :param startdate: Starting point. Default: '20140103' :type startdate: int, str :param int n: Length of the returned DataFrame; when None, it will be a random number between 50 and 100. Default: None :param boolean datetime_index: Whether to format the DataFrame with DatetimeIndex. Default: True """ if not n >= 0: n = np.random.randint(50, 100) dates = dateutil.get_startfrom(DATES, dateutil.compliment_datestring(str(startdate), -1, True), n) df = pd.DataFrame(np.random.randn(n, len(SIDS)), index=dates, columns=SIDS) if datetime_index: df.index = pd.to_datetime(df.index) return df
def fetch_daily(self, *args, **kwargs): """This differs from the default :py:meth:`orca.mongo.base.KDayFetcher.fetch_daily` in only one aspect: when the ``dname`` is not given, this will fetch all factors exposure on ``date``. Also, you can provide one of ('industry', 'style') to fetch exposures to industry/style factors. :returns: Series(if a factor name is given), DataFrame(factor names are in the columns) """ factor, date, offset = None, None, 0 if 'offset' in kwargs: offset = int(kwargs.pop('offset')) # is the first argument a date? try: date = dateutil.compliment_datestring(str(args[0]), -1, True) # yes, it is a date if len(args) > 1: offset = int(args[1]) except ValueError: # the first argument is not a date, presumably, it is the factor name! factor, date = args[0], args[1] # offset provided as the 3rd argument? if len(args) > 2: offset = int(args[2]) if factor is not None and factor not in ('industry', 'style'): return super(BarraExposureFetcher, self).fetch_daily(*args, **kwargs) di, date = dateutil.parse_date(DATES, date, -1) date = DATES[di - offset] reindex = kwargs.get('reindex', self.reindex) query = {'date': date} proj = {'_id': 0, 'dname': 1, 'dvalue': 1} cursor = self.collection.find(query, proj) df = pd.DataFrame({row['dname']: row['dvalue'] for row in cursor}) del cursor if reindex: df = df.reindex(index=SIDS) if factor == 'industry': return df[BarraFetcher.industry_factors[self.model]] elif factor == 'style': return df[BarraFetcher.style_factors[self.model]] return df
def fetch_daily(self, *args, **kwargs): """This differs from the default :py:meth:`orca.mongo.base.KDayFetcher.fetch_daily` in only one aspect: when the ``dname`` is not given, this will fetch all factors exposure on ``date``. Also, you can provide one of ('industry', 'style') to fetch exposures to industry/style factors. :returns: Series(if a factor name is given), DataFrame(factor names are in the columns) """ factor, date, offset = None, None, 0 if 'offset' in kwargs: offset = int(kwargs.pop('offset')) # is the first argument a date? try: date = dateutil.compliment_datestring(str(args[0]), -1, True) # yes, it is a date if len(args) > 1: offset = int(args[1]) except ValueError: # the first argument is not a date, presumably, it is the factor name! factor, date = args[0], args[1] # offset provided as the 3rd argument? if len(args) > 2: offset = int(args[2]) if factor is not None and factor not in ('industry', 'style'): return super(BarraExposureFetcher, self).fetch_daily(*args, **kwargs) di, date = dateutil.parse_date(DATES, date, -1) date = DATES[di-offset] reindex = kwargs.get('reindex', self.reindex) query = {'date': date} proj = {'_id': 0, 'dname': 1, 'dvalue': 1} cursor = self.collection.find(query, proj) df = pd.DataFrame({row['dname']: row['dvalue'] for row in cursor}) del cursor if reindex: df = df.reindex(index=SIDS) if factor == 'industry': return df[BarraFetcher.industry_factors[self.model]] elif factor == 'style': return df[BarraFetcher.style_factors[self.model]] return df
def fetch_idmaps(cls, date=None, barra_key=True): """Fetch barra id - local stock id correspondance. :param boolean barra_key: Whether to use barra ids as keys. Default: True :returns: A ``dict`` """ dates = cls.idmaps.distinct('date') if date is None: date = dates[-1] else: date = dateutil.parse_date(dates, dateutil.compliment_datestring(date, -1, True), -1)[1] query = {'date': str(date)} proj = {'_id': 0, 'idmaps': 1} dct = cls.idmaps.find_one(query, proj)['idmaps'] if barra_key: return dct inv_dct = {} for k, v in dct.iteritems(): if v not in inv_dct or inv_dct[v] > k: inv_dct[v] = k return inv_dct
def fetch_idmaps(cls, date=None, barra_key=True): """Fetch barra id - local stock id correspondance. :param boolean barra_key: Whether to use barra ids as keys. Default: True :returns: A ``dict`` """ dates = cls.idmaps.distinct('date') if date is None: date = dates[-1] else: date = dateutil.parse_date( dates, dateutil.compliment_datestring(date, -1, True), -1)[1] query = {'date': str(date)} proj = {'_id': 0, 'idmaps': 1} dct = cls.idmaps.find_one(query, proj)['idmaps'] if barra_key: return dct inv_dct = {} for k, v in dct.iteritems(): if v not in inv_dct or inv_dct[v] > k: inv_dct[v] = k return inv_dct
def fetch_info(self, dname='name', level=0, date=None, **kwargs): """Fetch industry-name/industry-index correspondance. :param str dname: 'name'(default): fetch industry-name mapping; 'index': fetch industry-index mapping :param int level: Which level of industry is of interest? Default: 0, all 3 levels' information are fetched :rtype: dict """ standard = kwargs.get('standard', self.standard) date_check = kwargs.get('date_check', self.date_check) if date is not None: date = dateutil.parse_date(DATES, dateutil.compliment_datestring(date, -1, date_check), -1)[1] else: date = self.info.distinct('date')[-1] query = {'standard': standard, 'date': date} if level == 0: query.update({'dname': 'industry_%s' % dname}) else: query.update({'dname': 'level%d_%s' % (level, dname)}) proj = {'_id': 0, 'dvalue': 1} return self.info.find_one(query, proj)['dvalue']
def test_compliment_datestring_8l_0(self): self.assertEqual(dateutil.compliment_datestring('20141301'), '20141301')
def test_compliment_datestring_4l_1(self): self.assertEqual(dateutil.compliment_datestring('2014', direction=1), '20141231')
def test_compliment_datestring_6l_4(self): self.assertEqual(dateutil.compliment_datestring('201413', direction=1), '20141331')
def test_compliment_datestring_6l_2(self): self.assertEqual( dateutil.compliment_datestring('201402', direction=1, date_check=True), '20140228')
def interval_fetch_history(pl, times, date, backdays, delay=0, date_check=False): date = dateutil.compliment_datestring(str(date), -1, date_check) di, date = dateutil.parse_date(DATES, date, -1) di -= delay window = DATES[di-backdays+1: di+1] return AlphaBase.interval_fetch_window(pl, times, window)