def update(self, dates): """Update alpha performance metrics for the **same** day after market close.""" dnames = self.db.alpha.distinct('dname') si, ei = map(self.dates.index, [dates[0], dates[-1]]) BTOP70Q = univ_fetcher.fetch_window('BTOP70Q', self.dates[si-20: ei+1]) cnt = 0 for dname in dnames: if self.options['alphas'] and dname not in self.options['alphas']: continue cursor = self.db.alpha.find( {'dname': dname, 'date': {'$gte': self.dates[si-20], '$lte': dates[-1]}}, {'_id': 0, 'dvalue': 1, 'date': 1}) alpha = pd.DataFrame({row['date']: row['dvalue'] for row in cursor}).T if len(alpha) == 0: continue perf = Performance(alpha) # original analyser = perf.get_longshort() for date in dates: if date not in alpha.index: continue key = {'alpha': dname, 'mode': 'longshort', 'date': date} metrics = self.get_metrics(analyser, date) self.collection.update(key, {'$set': metrics}, upsert=True) # quantile analyser = perf.get_qtail(0.3) for date in dates: if date not in alpha.index: continue key = {'alpha': dname, 'mode': 'quantile30', 'date': date} metrics = self.get_metrics(analyser, date) self.collection.update(key, {'$set': metrics}, upsert=True) # universe(s) analyser = perf.get_universe(BTOP70Q).get_longshort() for date in dates: if date not in alpha.index: continue key = {'alpha': dname, 'mode': 'BTOP70Q', 'date': date} metrics = self.get_metrics(analyser, date) self.collection.update(key, {'$set': metrics}, upsert=True) # top analyser = perf.get_qtop(0.3) for date in dates: if date not in alpha.index: continue key = {'alpha': dname, 'mode': 'top30', 'date': date} metrics = self.get_metrics(analyser, date) self.collection.update(key, {'$set': metrics}, upsert=True) cnt += 1 if len(dates) == 1: self.logger.info('UPSERT documents for {} alphas into (c: [{}]) of (d: [{}]) on {}', cnt, self.collection.name, self.db.name, dates[0]) else: self.logger.info('UPSERT documents for {} alphas into (c: [{}]) of (d: [{}]) from {} to {}', cnt, self.collection.name, self.db.name, dates[0], dates[-1])
def run_hdf(store, alpha, params, startdate, enddate, predicate=None, threads=multiprocessing.cpu_count()): """Execute instances of an alpha in parallel and stores DataFrame in HDF5 file. Each item in params should be a ``dict``. :param store: File path of the to-be-created HDFStore :param function predicate: A function with :py:class:`orca.perf.performance.Performance` object as the only parameter; for example: ``lambda x: x.get_original().get_ir() > 0.1``. Default: None """ if os.path.exists(store): os.remove(store) logger = logbook.Logger(store) store = pd.HDFStore(store) iterobj = ((i, alpha, param, startdate, enddate) for i, param in enumerate(params)) pool = multiprocessing.Pool(threads) res = pool.imap_unordered(worker_hdf, iterobj) pool.close() pool.join() for i, param, alpha in res: if predicate is not None and not predicate(Performance(alpha)): continue store['alpha' + str(i)] = alpha store.append('params', pd.DataFrame({i: param}).T) store.flush() logger.debug('Saving alpha with parameter: {!r}'.format(param)) store.close()
def monitor(self, dates): """Monitor alpha performance metrics for the **same** day after market close.""" dnames = self.db.alpha.distinct('dname') si, ei = map(self.dates.index, [dates[0], dates[-1]]) tradable = misc_fetcher.fetch_window('tradable', self.dates[si-20: ei+1]).astype(bool) TOP70Q = univ_fetcher.fetch_window('TOP70Q', self.dates[si-20: ei+1]) CS500 = components_fetcher.fetch_window('CS500', self.dates[si-20: ei+1]) cursor = self.monitor_connection.cursor() SQL1 = "SELECT * FROM alpha_category WHERE name=%s" SQL2 = "INSERT INTO alpha_category (name, category) VALUES (%s, %s)" for dname in dnames: if self.options['alphas'] and dname not in self.options['alphas']: continue if self.options['excludes'] and dname in self.options['excludes']: continue if self.options['pattern'] is not None and not self.options['pattern'].search(dname): continue if self.options['exclude_pattern'] is not None and self.options['exclude_pattern'].search(dname): continue self.logger.info('Processing {}', dname) cursor.execute(SQL1, (dname,)) if not list(cursor): if self.options['category'] is None: category = raw_input('Specify a category for %s: ' % dname) else: category = self.options['category'] cursor.execute(SQL2, (dname, category)) cursor.execute(SQL1, (dname,)) alpha_id = list(cursor)[0][0] cur = self.db.alpha.find( {'dname': dname, 'date': {'$gte': self.dates[si-21], '$lte': dates[-1]}}, {'_id': 0, 'dvalue': 1, 'date': 1}) alpha = pd.DataFrame({row['date']: row['dvalue'] for row in cur}).T if len(alpha) == 0: continue perf = Performance(alpha) self.monitor_alpha(perf.get_universe(tradable), 'tradable', dates, alpha_id, cursor) self.monitor_alpha(perf.get_universe(TOP70Q), 'TOP70Q', dates, alpha_id, cursor) self.monitor_alpha(perf.get_universe(CS500), 'CS500', dates, alpha_id, cursor) self.logger.info('MONITOR for {} from {} to {}', dname, dates[0], dates[-1])
def run_separate_file(outdir, alpha, params, startdate, enddate, predicate=None, threads=multiprocessing.cpu_count(), ftype='csv'): """Execute instances of an alpha in parallel and stores each DataFrame in separate file. Each item in params should be a ``dict``. :param outdir: Diretory to store output files :param function predicate: A function with :py:class:`orca.perf.performance.Performance` object as the only parameter; for example: ``lambda x: x.get_original().get_ir() > 0.1``. Default: None :param str ftype: File format; currently only supports ('csv', 'pickle', 'msgpack') """ if os.path.exists(outdir) and os.path.isdir(outdir): shutil.rmtree(outdir) logger = logbook.Logger(outdir) os.makedirs(outdir) iterobj = ((i, alpha, param, startdate, enddate) for i, param in enumerate(params)) pool = multiprocessing.Pool(threads) res = pool.imap_unordered(worker_hdf, iterobj) pool.close() pool.join() params = {} for i, param, alpha in res: if predicate is not None and not predicate(Performance(alpha)): continue params[i] = param logger.debug('Saving alpha with parameter: {!r}'.format(param)) if ftype == 'csv': alpha.to_csv(os.path.join(outdir, 'alpha' + str(i))) elif ftype == 'pickle': alpha.to_pickle(os.path.join(outdir, 'alpha' + str(i))) elif ftype == 'msgpack': alpha.to_msgpack(os.path.join(outdir, 'alpha' + str(i))) with open(os.path.join(outdir, 'params.json'), 'w') as file: if ftype == 'csv': json.dump(params, file) elif ftype == 'pickle': cPickle.dump(params, file) elif ftype == 'msgpack': msgpack.dump(params, file)
def setUp(self): self.alpha1 = Analyser(alpha1, data=data) self.alpha2 = Analyser(alpha2, data=data) self.alpha3 = Analyser(alpha3, data=data) self.perf = Performance(alpha1) self.perf.set_returns(data)
class PerfTestCase(unittest.TestCase): def setUp(self): self.alpha1 = Analyser(alpha1, data=data) self.alpha2 = Analyser(alpha2, data=data) self.alpha3 = Analyser(alpha3, data=data) self.perf = Performance(alpha1) self.perf.set_returns(data) def tearDown(self): self.alpha1 = None self.alpha2 = None self.alpha3 = None self.perf = None def test_get1(self): self.assertTrue( frames_equal(self.perf.get_long().alpha, self.alpha2.alpha) and frames_equal(self.perf.get_short().alpha, self.alpha3.alpha)) def test_get_bms(self): b, m, s = self.perf.get_bms() b, m, s = b.alpha, m.alpha, s.alpha self.assertTrue( series_equal(self.perf.alpha.count(axis=1), b.count(axis=1) + m.count(axis=1) + s.count(axis=1))) def test_init_1(self): self.assertTrue(np.allclose(np.abs(self.alpha1.alpha).sum(axis=1), 1)) def test_init_2(self): self.assertTrue(np.allclose(np.abs(self.alpha2.alpha).sum(axis=1), 1)) def test_returns1(self): ret1 = self.alpha1.get_returns(cost=0) ret2 = self.alpha2.get_returns(cost=0) ret3 = self.alpha3.get_returns(cost=0) self.assertTrue(np.allclose(ret1 * 2, ret2 - ret3)) def test_returns2(self): ret1 = self.alpha1.get_returns(cost=0) self.assertTrue((ret1.index == dates[1:]).all()) def test_turnover(self): # |((l1-s1)-(l2-s2))| = |(l1-l2) - (s1-s2)| <= |l1-l2| + |s1-s2| tvr1 = self.alpha1.get_turnover() tvr2 = self.alpha2.get_turnover() tvr3 = self.alpha3.get_turnover() self.assertTrue(np.allclose(tvr1 * 2, tvr2 + tvr3)) def test_ac(self): ac1 = self.alpha1.get_ac() ac2 = self.alpha1.alpha.corrwith(self.alpha1.alpha.shift(1), axis=1).iloc[1:] self.assertFalse(np.allclose(ac1, ac2)) def test_ic(self): ic1 = self.alpha1.get_ic() ic2 = data.corrwith(self.alpha1.alpha.shift(1), axis=1).iloc[1:] self.assertTrue(series_equal(ic1, ic2)) def test_summary_ir1(self): ir = self.alpha1.summary_ir() self.assertListEqual(['days', 'IR1', 'rIR1'], list(ir.index)) def test_summary_ir2(self): ir = self.alpha1.summary_ir(by='A') self.assertEqual(len(ir.columns), 1) def test_summary_turnover(self): tvr = self.alpha1.summary_turnover(freq='weekly') self.assertListEqual(['turnover', 'AC1', 'rAC1', 'AC5', 'rAC5'], list(tvr.index)) def test_summary_returns(self): self.alpha1.summary_returns(cost=0.001) self.assertTrue(True) def test_summary(self): ir1 = self.alpha1.summary(group='ir', by='A') ir2 = self.alpha1.summary_ir(by='A') self.assertTrue(frames_equal(ir1, ir2))
if args.pdf and os.path.exists(args.pdf): with magic.Magic() as m: ftype = m.id_filename(args.pdf) if ftype[:3] != 'PDF': print 'The argument --pdf if exists must be a PDF file' exit(0) if args.png and os.path.exists(args.png): with magic.Magic() as m: ftype = m.id_filename(args.png) if ftype[:3] != 'PNG': print 'The argument --png if exists must be a PNG file' exit(0) alpha = read_frame(args.alpha, args.ftype) perf = Performance(alpha) plotter = QuantilesPlotter(perf.get_quantiles(args.quantile)) figs = [] if 'pnl' in args.plot: fig = plotter.plot_pnl(args.startdate, args.enddate) figs.append(fig) if 'returns' in args.plot: fig = plotter.plot_returns(args.by, args.startdate, args.enddate) figs.append(fig) if args.pdf: pdf = os.path.basename(args.alpha) + '-' + str(args.quantile) + '.pdf' if os.path.exists(pdf): os.remove(pdf) pp = PdfPages(pdf)
print '[WARNING] Failed to parse file', alpha else: for alpha in args.alphas: try: alphadf = read_frame(alpha, args.ftype) if args.atype is None: if len(alphadf.index) == len(np.unique( alphadf.index.date)): args.atype = 'daily' else: args.atype = 'intraday' if args.atype == 'intraday': perf = IntPerformance(alphadf) else: perf = Performance(alphadf) alphas[alpha] = perf except: print '[WARNING] Failed to parse file', alpha if args.univ: assert args.univ in univ_fetcher.dnames dates = np.unique([dt.strftime('%Y%m%d') for dt in perf.alpha.index]) univ = univ_fetcher.fetch_window(args.univ, dates) alphas = { alpha: perf.get_universe(univ) for alpha, perf in alphas.iteritems() } if args.longonly: if args.quantile:
if args.pdf and os.path.exists(args.pdf): with magic.Magic() as m: ftype = m.id_filename(args.pdf) if ftype[:3] != 'PDF': print 'The argument --pdf if exists must be a PDF file' exit(0) if args.png and os.path.exists(args.png): with magic.Magic() as m: ftype = m.id_filename(args.png) if ftype[:3] != 'PNG': print 'The argument --png if exists must be a PNG file' exit(0) alpha = read_frame(args.alpha, args.ftype) perf = Performance(alpha) plotter = QuantilesPlotter(perf.get_quantiles(args.quantile)) figs = [] if 'pnl' in args.plot: fig = plotter.plot_pnl(args.startdate, args.enddate) figs.append(fig) if 'returns' in args.plot: fig = plotter.plot_returns(args.by, args.startdate, args.enddate) figs.append(fig) if args.pdf: pdf = os.path.basename(args.alpha)+'-'+str(args.quantile)+'.pdf' if os.path.exists(pdf): os.remove(pdf) pp = PdfPages(pdf)
def update(self, dates): """Update alpha performance metrics for the **same** day after market close.""" dnames = self.db.alpha.distinct('dname') si, ei = map(self.dates.index, [dates[0], dates[-1]]) BTOP70Q = univ_fetcher.fetch_window('BTOP70Q', self.dates[si - 20:ei + 1]) cnt = 0 for dname in dnames: if self.options['alphas'] and dname not in self.options['alphas']: continue cursor = self.db.alpha.find( { 'dname': dname, 'date': { '$gte': self.dates[si - 20], '$lte': dates[-1] } }, { '_id': 0, 'dvalue': 1, 'date': 1 }) alpha = pd.DataFrame( {row['date']: row['dvalue'] for row in cursor}).T if len(alpha) == 0: continue perf = Performance(alpha) # original analyser = perf.get_longshort() for date in dates: if date not in alpha.index: continue key = {'alpha': dname, 'mode': 'longshort', 'date': date} metrics = self.get_metrics(analyser, date) self.collection.update(key, {'$set': metrics}, upsert=True) # quantile analyser = perf.get_qtail(0.3) for date in dates: if date not in alpha.index: continue key = {'alpha': dname, 'mode': 'quantile30', 'date': date} metrics = self.get_metrics(analyser, date) self.collection.update(key, {'$set': metrics}, upsert=True) # universe(s) analyser = perf.get_universe(BTOP70Q).get_longshort() for date in dates: if date not in alpha.index: continue key = {'alpha': dname, 'mode': 'BTOP70Q', 'date': date} metrics = self.get_metrics(analyser, date) self.collection.update(key, {'$set': metrics}, upsert=True) # top analyser = perf.get_qtop(0.3) for date in dates: if date not in alpha.index: continue key = {'alpha': dname, 'mode': 'top30', 'date': date} metrics = self.get_metrics(analyser, date) self.collection.update(key, {'$set': metrics}, upsert=True) cnt += 1 if len(dates) == 1: self.logger.info( 'UPSERT documents for {} alphas into (c: [{}]) of (d: [{}]) on {}', cnt, self.collection.name, self.db.name, dates[0]) else: self.logger.info( 'UPSERT documents for {} alphas into (c: [{}]) of (d: [{}]) from {} to {}', cnt, self.collection.name, self.db.name, dates[0], dates[-1])
class PerfTestCase(unittest.TestCase): def setUp(self): self.alpha1 = Analyser(alpha1, data=data) self.alpha2 = Analyser(alpha2, data=data) self.alpha3 = Analyser(alpha3, data=data) self.perf = Performance(alpha1) self.perf.set_returns(data) def tearDown(self): self.alpha1 = None self.alpha2 = None self.alpha3 = None self.perf = None def test_get1(self): self.assertTrue(frames_equal(self.perf.get_long().alpha, self.alpha2.alpha) and frames_equal(self.perf.get_short().alpha, self.alpha3.alpha)) def test_get_bms(self): b, m, s = self.perf.get_bms() b, m, s = b.alpha, m.alpha, s.alpha self.assertTrue(series_equal(self.perf.alpha.count(axis=1), b.count(axis=1)+m.count(axis=1)+s.count(axis=1))) def test_init_1(self): self.assertTrue(np.allclose(np.abs(self.alpha1.alpha).sum(axis=1), 1)) def test_init_2(self): self.assertTrue(np.allclose(np.abs(self.alpha2.alpha).sum(axis=1), 1)) def test_returns1(self): ret1 = self.alpha1.get_returns(cost=0) ret2 = self.alpha2.get_returns(cost=0) ret3 = self.alpha3.get_returns(cost=0) self.assertTrue(np.allclose(ret1 * 2, ret2 - ret3)) def test_returns2(self): ret1 = self.alpha1.get_returns(cost=0) self.assertTrue((ret1.index == dates[1:]).all()) def test_turnover(self): # |((l1-s1)-(l2-s2))| = |(l1-l2) - (s1-s2)| <= |l1-l2| + |s1-s2| tvr1 = self.alpha1.get_turnover() tvr2 = self.alpha2.get_turnover() tvr3 = self.alpha3.get_turnover() self.assertTrue(np.allclose(tvr1 * 2, tvr2 + tvr3)) def test_ac(self): ac1 = self.alpha1.get_ac() ac2 = self.alpha1.alpha.corrwith(self.alpha1.alpha.shift(1), axis=1).iloc[1:] self.assertFalse(np.allclose(ac1, ac2)) def test_ic(self): ic1 = self.alpha1.get_ic() ic2 = data.corrwith(self.alpha1.alpha.shift(1), axis=1).iloc[1:] self.assertTrue(series_equal(ic1, ic2)) def test_summary_ir1(self): ir = self.alpha1.summary_ir() self.assertListEqual(['days', 'IR1', 'rIR1'], list(ir.index)) def test_summary_ir2(self): ir = self.alpha1.summary_ir(by='A') self.assertEqual(len(ir.columns), 1) def test_summary_turnover(self): tvr = self.alpha1.summary_turnover(freq='weekly') self.assertListEqual(['turnover', 'AC1', 'rAC1', 'AC5', 'rAC5'], list(tvr.index)) def test_summary_returns(self): self.alpha1.summary_returns(cost=0.001) self.assertTrue(True) def test_summary(self): ir1 = self.alpha1.summary(group='ir', by='A') ir2 = self.alpha1.summary_ir(by='A') self.assertTrue(frames_equal(ir1, ir2))
for rank in [True, False] for n in (1, 5, 20)]) parser.add_argument('--metric', choices=metrics, default='rIC1', help='What type of correlations is of interest?') parser.add_argument('--limit', type=int, default=10) parser.add_argument('-n', '--negate', action='store_true') args = parser.parse_args() alpha_metric, dates = {}, None if args.alpha: for path in args.alpha: for name in glob(path): df = read_frame(name, args.ftype) perf = Performance(df) name = os.path.basename(name) alpha_metric[name] = get_metric(perf, args.mode, args.metric) if dates is None: dates = alpha_metric[name].index else: dates = dates.union(alpha_metric[name].index) ext_alphas = {} if args.file: with open(args.file) as file: for line in file: name, fpath = line.strip().split() ext_alphas[name] = read_frame(fpath, args.ftype) if args.dir: assert os.path.exists(args.dir)