def fetch_returns(self): rshift, lshift = max(self.rshifts), max(self.lshifts) dates = dateutil.to_datestr(self.alpha.index) ei = DATES.index(dates[-1]) edate = DATES[min(len(DATES)-2, ei+rshift)] ret = quote.fetch('returns', dates[0], edate, backdays=rshift+lshift) return ret
def operate(self, alpha, date=None): alpha = alpha[np.isfinite(alpha)] if isinstance(alpha, pd.Series): if self.group is None: return rank(alpha) if isinstance(self.group, pd.DataFrame): group = self.group.ix[date] else: group = self.group sids = group.dropna().index nalpha = alpha.ix[sids] nalpha = nalpha.groupby(group).transform(lambda x: rank(x)) return nalpha.reindex(index=alpha.index) if self.group is None: return rank(alpha) if isinstance(self.group, pd.Series): sids = self.group.dropna().index nalpha = alpha.T.ix[sids] nalpha = nalpha.groupby(self.group.dropna()).transform(lambda x: rank(x)).T return nalpha.reindex(columns=alpha.columns) dates = dateutil.to_datestr(alpha.index) pool = multiprocessing.Pool(self.threads) res = pool.imap_unordered(worker, [(dt1, row, self.group.ix[dt2]) for (dt1, row), dt2 in zip(alpha.iterrows(), dates)]) pool.close() pool.join() df = {} for dt, row in res: df[dt] = row return pd.DataFrame(df).T.reindex(columns=alpha.columns)
def operate(self, alpha, date=None): alpha = alpha[np.isfinite(alpha)] if isinstance(alpha, pd.Series): if self.group is None: return rank(alpha) if isinstance(self.group, pd.DataFrame): group = self.group.ix[date] else: group = self.group sids = group.dropna().index nalpha = alpha.ix[sids] nalpha = nalpha.groupby(group).transform(lambda x: rank(x)) return nalpha.reindex(index=alpha.index) if self.group is None: return rank(alpha) if isinstance(self.group, pd.Series): sids = self.group.dropna().index nalpha = alpha.T.ix[sids] nalpha = nalpha.groupby( self.group.dropna()).transform(lambda x: rank(x)).T return nalpha.reindex(columns=alpha.columns) dates = dateutil.to_datestr(alpha.index) pool = multiprocessing.Pool(self.threads) res = pool.imap_unordered( worker, [(dt1, row, self.group.ix[dt2]) for (dt1, row), dt2 in zip(alpha.iterrows(), dates)]) pool.close() pool.join() df = {} for dt, row in res: df[dt] = row return pd.DataFrame(df).T.reindex(columns=alpha.columns)
def fetch_returns(self): rshift, lshift = max(self.rshifts), max(self.lshifts) dates = dateutil.to_datestr(self.alpha.index) ei = DATES.index(dates[-1]) edate = DATES[min(len(DATES) - 2, ei + rshift)] ret = quote.fetch('returns', dates[0], edate, backdays=rshift + lshift) return ret
def to_frame(panel): """Transform a time-itemized, date-major_axised Panel into DataFrame with DatetimeIndex.""" if isinstance(panel.major_axis, DatetimeIndex): panel.major_axis = dateutil.to_datestr(panel.major_axis) df = panel.transpose(2, 1, 0).to_frame(filter_observations=False) df.index = pd.to_datetime(pd.Series(df.index.get_level_values(0)) + ' ' + \ pd.Series(df.index.get_level_values(1))) return df
def _get_analyser(perf, mode): if mode == 'longshort': return perf.get_longshort() elif mode == 'BTOP70Q': univ = univ_fetcher.fetch_window('BTOP70Q', to_datestr(perf.alpha.index)) return perf.get_universe(univ).get_longshort() elif mode == 'quantile30': return perf.get_qtail(0.3) elif mode == 'top30': return perf.get_qtop(0.3)
def operate(self, alpha, group='sector', simple=False, date=None): alpha = alpha[np.isfinite(alpha)] if isinstance(alpha, pd.Series): group = self.industry.fetch_daily(group, date).dropna() nalpha = alpha.ix[group.index] nalpha = nalpha.groupby(group).transform(lambda x: rank(x)) return nalpha.reindex(index=alpha.index) window = np.unique(dateutil.to_datestr(alpha.index)) group = self.industry.fetch_window(group, window) self.group = group.iloc[-1] if simple else group return super(IndustryRankOperation, self).operate(alpha)
def rebase_index(alpha): res = {} for date in np.unique(dateutil.to_datestr(alpha.index)): sdf = alpha.ix[date] date = dateutil.find_le(DATES, date)[1] if isinstance(sdf, pd.DataFrame): res[date] = sdf.astype(int).max().astype(bool) else: res[date] = sdf res = pd.DataFrame(res).T.sort_index() res.index = pd.to_datetime(res.index) return res
def comply(df, parent=None, value=None): if parent is None: return if type(df.index) != type(parent.index): parent = parent.copy() if isinstance(df.index, DatetimeIndex): parent.index = dateutil.to_datetime(parent.index) else: parent.index = dateutil.to_datestr(parent.index) parent = parent.ix[df.index].fillna(method='bfill').fillna(False) df[~parent] = value return parent
def __init__(self, alpha, n, rank=None): self.alpha = api.format(alpha) self.rank_alpha = self.alpha.rank(axis=1, ascending=False) self.rank_alpha = self.rank_alpha[self.rank_alpha <= n] if rank is None: self.alpha = (self.rank_alpha <= n).astype(float) else: if rank < 0: self.alpha = self.alpha[self.rank_alpha <= n] else: self.alpha = rank - np.floor(self.rank_alpha / (n + 1) * rank) self.alpha = api.scale(self.alpha) self.dates = dateutil.to_datestr(self.alpha.index)
def fetch_dates(self, dname, dates, rshift=0, lshift=0, **kwargs): """Use :py:meth:`fetch_window` behind the scene.""" dates_str = dateutil.to_datestr(dates) res, is_df = {}, False for dt, date in zip(dates, dates_str): di, date = dateutil.parse_date(DATES, date, -1) if di-lshift < 0 or di+rshift+1 > len(DATES): continue if rshift+lshift == 0: res[dt] = self.fetch_daily(dname, DATES[di-lshift], **kwargs) if isinstance(res[dt], pd.DataFrame): is_df = True else: res[dt] = self.fetch_window(dname, DATES[di-lshift: di+rshift+1], **kwargs) if rshift+lshift == 0: res = pd.Panel(res).transpose(1, 2, 0) if is_df else pd.DataFrame(res).T return res
def fetch_dates(self, dates, rshift=0, lshift=0, **kwargs): """Use :py:meth:`fetch_window` behind the scene.""" dates_str = dateutil.to_datestr(dates) res, is_df = {}, False for dt, date in zip(dates, dates_str): di, date = dateutil.parse_date(DATES, date, -1) if di - lshift < 0 or di + rshift + 1 > len(DATES): continue if rshift + lshift == 0: res[dt] = self.fetch_daily(DATES[di - lshift], **kwargs) if isinstance(res[dt], pd.DataFrame): is_df = True else: res[dt] = self.fetch_window(DATES[di - lshift:di + rshift + 1], **kwargs) if rshift + lshift == 0: res = pd.Panel(res).transpose(1, 2, 0) if is_df else pd.DataFrame(res).T return res
if args.file: with open(args.file) as file: for line in file: name, fpath = line.strip().split() ext_alphas[name] = read_frame(fpath, args.ftype) if args.dir: assert os.path.exists(args.dir) for name in os.listdir(args.dir): ext_alphas[name] = read_frame(os.path.join(args.dir, name), args.ftype) extalpha_metric = {} if args.db: assert args.alpha db_metrics = perf_fetcher.fetch_window(args.metric, to_datestr(dates), mode=args.mode) for name, metric in db_metrics.iteritems(): extalpha_metric[name] = metric for name, alpha in ext_alphas.iteritems(): perf = Performance(alpha) extalpha_metric[name] = get_metric(perf, args.mode, args.metric) extmetric_df = pd.DataFrame(extalpha_metric) if not args.alpha: if len(extmetric_df) > args.days: extmetric_df = extmetric_df.iloc[-args.days:] print extmetric_df.corr() else: if len(extmetric_df) > 0:
ext_alphas = {} if args.file: with open(args.file) as file: for line in file: name, fpath = line.strip().split() ext_alphas[name] = read_frame(fpath, args.ftype) if args.dir: assert os.path.exists(args.dir) for name in os.listdir(args.dir): ext_alphas[name] = read_frame(os.path.join(args.dir, name), args.ftype) extalpha_metric = {} if args.db: assert args.alpha db_metrics = perf_fetcher.fetch_window(args.metric, to_datestr(dates), mode=args.mode) for name, metric in db_metrics.iteritems(): extalpha_metric[name] = metric for name, alpha in ext_alphas.iteritems(): perf = Performance(alpha) extalpha_metric[name] = get_metric(perf, args.mode, args.metric) extmetric_df = pd.DataFrame(extalpha_metric) if not args.alpha: if len(extmetric_df) > args.days: extmetric_df = extmetric_df.iloc[-args.days:] print extmetric_df.corr() else: if len(extmetric_df) > 0: extmetric_df = extmetric_df.ix[dates]
def test_to_datestr_pddt_input(self): res = dateutil.to_datestr(DateutilTestCase.dates_pddt) self.assertListEqual(DateutilTestCase.dates_str, res)