def test_03_reconcile_dn_data(self): self.dn_eod.reconcile_spots(self.s_date_spot, self.e_date, self.test_stx) res1 = stxdb.db_read_cmd("select * from {0:s} where " "recon_name='{1:s}' order by stk". format(self.recon_tbl, self.dn_eod.rec_name)) res2 = stxdb.db_read_cmd("select * from {0:s} where divi_type=1 order" " by stk, date".format(self.dn_eod.divi_tbl)) print('test_03_reconcile_dn_data') print('res1') print(res1) print('res2') print(res2) self.assertTrue(res1[0] == ('AEOS', self.dn_eod.rec_name, '20020201_20121231', '2002-02-08', '2007-03-09', '2002-02-04', '2007-03-09', 1, 100.0, 0.0017, 0) and res1[1] == ('EXPE', self.dn_eod.rec_name, '20020201_20121231', '2002-02-08', '2012-12-31', '2005-07-21', '2012-12-31', 0, 68.34, 0.0001, 0) and res1[2] == ('NFLX', self.dn_eod.rec_name, '20020201_20121231', '2002-12-11', '2012-12-31', '2002-05-23', '2012-12-31', 0, 100.0, 0.0012, 0) and res1[3] == ('TIE', self.dn_eod.rec_name, '20020201_20121231', '2005-10-03', '2012-12-31', '2002-02-01', '2012-12-31', 0, 100.0, 0.0041, 0) and res2[0] == ('AEOS', datetime.date(2005, 3, 7), Decimal('0.4999'), 1))
def test_01_loaded_data(self): res00 = stxdb.db_read_cmd('select count(*) from my_eods') res01 = stxdb.db_read_cmd('select count(*) from my_dividends') res10 = stxdb.db_read_cmd('select count(*) from dn_eods') res11 = stxdb.db_read_cmd('select count(*) from dn_dividends') res20 = stxdb.db_read_cmd('select count(*) from ed_eods') res21 = stxdb.db_read_cmd('select count(*) from ed_dividends') res30 = stxdb.db_read_cmd('select count(*) from md_eods') res31 = stxdb.db_read_cmd('select count(*) from md_dividends') res40 = stxdb.db_read_cmd('select count(*) from sq_eods') res41 = stxdb.db_read_cmd('select count(*) from sq_dividends') print('res10\n{0:s}'.format(res00)) print('res11\n{0:s}'.format(res01)) print('res10\n{0:s}'.format(res10)) print('res11\n{0:s}'.format(res11)) print('res10\n{0:s}'.format(res20)) print('res11\n{0:s}'.format(res21)) print('res10\n{0:s}'.format(res30)) print('res11\n{0:s}'.format(res31)) print('res10\n{0:s}'.format(res40)) print('res11\n{0:s}'.format(res41)) self.assertEqual(res00[0][0], 39794) self.assertEqual(res01[0][0], 32) self.assertEqual(res10[0][0], 21960) self.assertEqual(res11[0][0], 21) self.assertEqual(res20[0][0], 1113) self.assertEqual(res21[0][0], 0) self.assertEqual(res30[0][0], 35934) self.assertEqual(res31[0][0], 486) self.assertEqual(res40[0][0], 0) self.assertEqual(res41[0][0], 0)
def test_03_load_dn_data(self): dn_eod = StxEOD(self.dn_in_dir, 'dn', self.recon_tbl) dn_eod.load_deltaneutral_files(self.dn_stx) res1 = stxdb.db_read_cmd( StxEOD.sql_show_tables.format(self.dn_eod_tbl)) res2 = stxdb.db_read_cmd( StxEOD.sql_show_tables.format(self.dn_split_tbl)) res3 = stxdb.db_read_cmd('select distinct stk from {0:s}'. format(self.dn_eod_tbl)) res4 = stxdb.db_read_cmd( "select stk, count(*) from {0:s} where stk in " "('NFLX', 'AEOS', 'TIE', 'EXPE') and date <= '2012-12-31' " "group by stk order by stk".format(self.dn_eod_tbl)) res5 = stxdb.db_read_cmd("select stk, sum(ratio) from {0:s} where stk " "in ('NFLX', 'AEOS', 'TIE', 'EXPE') and date " " <= '2012-12-31' group by stk order by stk". format(self.dn_split_tbl)) print('test_03_load_dn_data') print('res1') print(res1) print('res2') print(res2) print('res3') print(res3) print('res4') print(res4) print('res5') print(res5) self.assertTrue(len(res1) == 1 and len(res2) == 1 and len(res3) == 5 and res4[0][0] == 'AEOS' and res4[0][1] == 1549 and res4[1][0] == 'EXPE' and res4[1][1] == 1875 and res4[2][0] == 'NFLX' and res4[2][1] == 2671 and res4[3][0] == 'TIE' and res4[3][1] == 3017 and res5[0][0] == 'NFLX' and float(res5[0][1]) == 0.5 and res5[1][0] == 'TIE' and float(res5[1][1]) == 15.1793)
def eod_analysis(self, ana_date): # special case when the date is an option expiry date: # 1. wait until eoddata is downloaded. # 2. calculate liquidity leaders # 3. download options for all liquidity leaders # 4. calculate option spread leaders # 5. populate leaders table # 6. MON vs. FRI next_exp = stxcal.next_expiry(ana_date, 0) if next_exp == ana_date: res = stxdb.db_read_cmd('select max(date) from eods where ' 'open_interest>=0') last_eod_date = str(res[0][0]) while last_eod_date < ana_date: print('Could not find eod data for {0:s}, sleeping one hour'. format(ana_date)) time.sleep(3600) res = stxdb.db_read_cmd('select max(date) from eods where ' 'open_interest>=0') last_eod_date = str(res[0][0]) self.get_liq_leaders(ana_date) self.get_data(ana_date, get_eod=False, get_for_all=True) self.get_opt_spread_leaders(ana_date) else: self.get_data(ana_date, get_for_all=False, get_eod=True) setup_df = self.calc_setups(ana_date) self.mail_analysis(setup_df)
def test_06_load_stooq_eod_files(self): sq_eod = StxEOD(self.sq_in_dir, 'sq', self.recon_tbl) sq_eod.parseeodfiles('2016-08-24', '2016-08-26') res1 = stxdb.db_read_cmd( StxEOD.sql_show_tables.format(sq_eod.eod_tbl)) res2 = stxdb.db_read_cmd( StxEOD.sql_show_tables.format(sq_eod.divi_tbl)) res3 = stxdb.db_read_cmd('select distinct stk from {0:s}'. format(sq_eod.eod_tbl)) res4 = stxdb.db_read_cmd("select stk, count(*) from {0:s} where stk in" " ('AA', 'NFLX', 'VXX', 'EXPE') group by stk " "order by stk".format(sq_eod.eod_tbl)) print('res1') print(res1) print('res2') print(res2) print('res3') print(res3) print('res4') print(res4) self.assertTrue(len(res1) == 1 and len(res2) == 1 and len(res3) == 8118 and res4[0][0] == 'AA' and res4[0][1] == 3 and res4[1][0] == 'EXPE' and res4[1][1] == 3 and res4[2][0] == 'NFLX' and res4[2][1] == 3 and res4[3][0] == 'VXX' and res4[3][1] == 3)
def test_06_reconcile_ed_data(self): self.ed_eod.reconcile_spots(self.s_date_ed, self.e_date_ed, self.test_stx) res1 = stxdb.db_read_cmd("select * from {0:s} where " "recon_name='{1:s}' order by stk". format(self.recon_tbl, self.ed_eod.rec_name)) res2 = stxdb.db_read_cmd("select * from {0:s} where divi_type=1 order" " by stk, date".format(self.ed_eod.divi_tbl)) self.assertTrue(res1[0] == ('AA', self.ed_eod.rec_name, '20130102_20131115', '2013-01-02', '2013-11-15', '2013-01-02', '2013-11-15', 0, 100.0, 0.0011, 0) and res1[1] == ('EXPE', self.ed_eod.rec_name, '20130102_20131115', '2013-01-02', '2013-11-15', '2013-01-02', '2013-11-15', 0, 100.0, 0.0005, 0) and res1[2] == ('NFLX', self.ed_eod.rec_name, '20130102_20131115', '2013-01-02', '2013-11-15', '2013-01-02', '2013-11-15', 0, 100.0, 0.001, 0) and res1[3] == ('VXX', self.ed_eod.rec_name, '20130102_20131115', '2013-01-02', '2013-11-15', '2013-01-02', '2013-11-15', 0, 100.0, 0.0017, 0) and len(res2) == 0)
def test_1_create_missing_table(self): res0 = stxdb.db_read_cmd(self.sql_tbls) stxdb.db_create_missing_table(self.tbl_name, self.sql_create_tbl) res1 = stxdb.db_read_cmd(self.sql_tbls) print('res1 = {0:s}'.format(res1)) print('res1[0][0] = {0:s}'.format(res1[0][0])) self.assertEqual(len(res0), 0) self.assertEqual(len(res1), 1) self.assertEqual(res1[0][0], self.tbl_name)
def __init__(self, test=False): ''' - Create US exchange, if not already there - Define the directories from which we will be reading the data - Specify the atem commands used to get stock info and prices ''' self.root_dir = os.getenv('NORGATE_TEST_DIR') if test \ else os.getenv('NORGATE_DIR') self.upload_dir = '/tmp' xchgs = stxdb.db_read_cmd("select * from exchanges where name='US'") if not xchgs: stxdb.db_write_cmd("insert into exchanges values('US')") self.input_dirs = [ 'AMEX', 'Indices', 'NASDAQ', 'NYSE', 'NYSE Arca', 'Delisted Securities' ] self.atem_prices_cmd = [ 'atem', '-o', '{0:s}/prices.txt'.format(self.upload_dir), '--format=symbol,date,open,high,low,close,volume,openint', '--float-openint' ] self.atem_info_cmd = [ 'atem', '-o', '{0:s}/names.txt'.format(self.upload_dir), '--format=symbol,long_name' ] pd.options.mode.chained_assignment = None
def test_02_load_9899_data(self): my_eod = StxEOD(self.my_dir, 'my', self.recon_tbl) my_eod.load_my_9899_files(self.my_stx) in_stx = ','.join(["'{0:s}'".format(x) for x in self.my_stx.split(',')]) res1 = stxdb.db_read_cmd( 'select stk, min(date), max(date), count(*) from {0:s} where ' 'stk in ({1:s}) group by stk order by stk'. format(my_eod.eod_tbl, in_stx)) print('test_02_load_9899_data') print('res1 = {0:s}'.format(res1)) self.assertEqual( res1[0], ('AA', date(1962, 1, 2), date(2012, 12, 31), 12836)) self.assertEqual( res1[1], ('AEOS', date(1994, 4, 14), date(2007, 1, 26), 3214)) self.assertEqual( res1[2], ('EXPE', date(1999, 11, 19), date(2012, 12, 31), 2806)) self.assertEqual( res1[3], ('INKT', date(1998, 6, 10), date(2003, 3, 19), 1200)) self.assertEqual( res1[4], ('MCT', date(1998, 11, 24), date(2001, 11, 29), 757)) self.assertEqual( res1[5], ('NFLX', date(2002, 5, 29), date(2012, 12, 31), 2668)) self.assertEqual( res1[6], ('VXX', date(2009, 1, 30), date(2012, 12, 31), 987))
def test_02_reconcile_my_data(self): self.my_eod.reconcile_spots(self.s_date_spot, self.e_date, self.test_stx) res1 = stxdb.db_read_cmd("select * from {0:s} where " "recon_name='{1:s}' order by stk". format(self.recon_tbl, self.my_eod.rec_name)) res2 = stxdb.db_read_cmd("select * from {0:s} where divi_type=1 order " "by stk, date".format(self.my_eod.divi_tbl)) print('test_02_reconcile_my_data') print('res1') print(res1) print('res2') print(res2) self.assertEqual(res1[0][0], 'AEOS') self.assertEqual(res1[0][1], self.my_eod.rec_name) self.assertEqual(res1[0][2], '20020201_20121231') self.assertEqual(res1[0][3], '2002-02-08') self.assertEqual(res1[0][4], '2007-03-09') self.assertEqual(res1[0][5], '2002-02-04') self.assertEqual(res1[0][6], '2007-01-26') self.assertEqual(res1[0][7], 0) self.assertEqual(res1[0][8], 97.73) self.assertEqual(res1[0][9], 0.0016) self.assertEqual(res1[0][10], 0) self.assertTrue(res1[0] == ('AEOS', self.my_eod.rec_name, '20020201_20121231', '2002-02-08', '2007-03-09', '2002-02-04', '2007-01-26', 0, 97.73, 0.0016, 0)) self.assertTrue(res1[1] == ('EXPE', self.my_eod.rec_name, '20020201_20121231', '2002-02-08', '2012-12-31', '2002-02-01', '2012-12-31', 1, 100.0, 0.0012, 0)) self.assertTrue(res1[2] == ('NFLX', self.my_eod.rec_name, '20020201_20121231', '2002-12-11', '2012-12-31', '2002-05-29', '2012-12-31', 0, 100.0, 0.0012, 0)) self.assertTrue(res1[3] == ('TIE', self.my_eod.rec_name, '20020201_20121231', '2005-10-03', '2012-12-31', '2002-02-04', '2012-12-31', 2, 100.0, 0.0011, 0)) self.assertTrue(res2[0] == ('EXPE', datetime.date(2003, 3, 10), Decimal('0.5000'), 1)) self.assertTrue(res2[1] == ('TIE', datetime.date(2006, 2, 16), Decimal('0.5000'), 1)) self.assertTrue(res2[2] == ('TIE', datetime.date(2006, 5, 15), Decimal('0.5000'), 1))
def test_05_load_ed_data(self): ed_eod = StxEOD(self.ed_in_dir, 'ed', self.recon_tbl) ed_eod.load_eoddata_files(sd=self.sd_01, stks=self.ed_stx) res1 = stxdb.db_read_cmd( StxEOD.sql_show_tables.format(self.ed_eod_tbl)) res2 = stxdb.db_read_cmd( StxEOD.sql_show_tables.format(self.ed_split_tbl)) res3 = stxdb.db_read_cmd('select distinct stk from {0:s}'. format(self.ed_eod_tbl)) res4 = stxdb.db_read_cmd("select stk, count(*) from {0:s} where stk in" " ('AA', 'NFLX', 'VXX', 'EXPE') group by stk " "order by stk".format(self.ed_eod_tbl)) self.assertTrue(len(res1) == 1 and len(res2) == 1 and len(res3) == 4 and res4[0][0] == 'AA' and res4[0][1] == 242 and res4[1][0] == 'EXPE' and res4[1][1] == 242 and res4[2][0] == 'NFLX' and res4[2][1] == 242 and res4[3][0] == 'VXX' and res4[3][1] == 241)
def get_etf_words(elim_dct={}): q = sql.Composed([sql.SQL("SELECT name FROM etfs")]) res = stxdb.db_read_cmd(q.as_string(stxdb.db_get_cnx())) etf_words = ' '.join([x[0] for x in res]) etf_words_list = [ x for x in etf_words.split() if x not in elim_dct ] dct = {} for w in etf_words_list: count = dct.get(w, 0) dct[w] = count + 1 return dct
def test_6_create_table_like(self): tbl_name = 'eods' new_tbl_name = 'eods2' sql_new_tbl = "SELECT table_name FROM information_schema.tables "\ "WHERE table_schema='public' AND table_name='{0:s}'".\ format(new_tbl_name) res0 = stxdb.db_read_cmd(sql_new_tbl) stxdb.db_create_table_like(tbl_name, new_tbl_name) res1 = stxdb.db_read_cmd(sql_new_tbl) print('res1 = {0:s}'.format(res1)) print('res1[0][0] = {0:s}'.format(res1[0][0])) self.assertEqual(len(res0), 0) self.assertEqual(len(res1), 1) self.assertEqual(res1[0][0], new_tbl_name) res = stxdb.db_get_table_columns(new_tbl_name) print('res = {0:s}'.format(res)) self.assertEqual(len(res), 8) self.assertEqual(res[0][0], 'stk') self.assertEqual(res[0][1], 'varchar') self.assertEqual(res[0][2], 8) self.assertEqual(res[1][0], 'date') self.assertEqual(res[1][1], 'date') self.assertEqual(res[2][0], 'o') self.assertEqual(res[2][1], 'numeric') self.assertEqual(res[2][3], 10) self.assertEqual(res[2][4], 2) self.assertEqual(res[3][0], 'hi') self.assertEqual(res[3][1], 'numeric') self.assertEqual(res[3][3], 10) self.assertEqual(res[3][4], 2) self.assertEqual(res[4][0], 'lo') self.assertEqual(res[4][1], 'numeric') self.assertEqual(res[4][3], 10) self.assertEqual(res[4][4], 2) self.assertEqual(res[5][0], 'c') self.assertEqual(res[5][1], 'numeric') self.assertEqual(res[5][3], 10) self.assertEqual(res[5][4], 2) self.assertEqual(res[6][0], 'volume') self.assertEqual(res[6][1], 'int4') self.assertEqual(res[7][0], 'open_interest') self.assertEqual(res[7][1], 'int4')
def test_04_split_recon(self): self.dn_eod.reconcile_big_changes('AEOS', self.s_date, self.e_date, [self.my_eod.divi_tbl]) res = stxdb.db_read_cmd("select * from {0:s} where stk='{1:s}'" .format(self.dn_eod.divi_tbl, 'AEOS')) print('res') print(res) self.assertTrue( res[0] == ('AEOS', datetime.date(2005, 3, 7), Decimal('0.4999'), 1) and res[1] == ('AEOS', datetime.date(2006, 12, 18), Decimal('0.6700'), 0))
def test_07_split_recon(self): stk_list = self.test_stx.split(',') for stk in stk_list: self.ed_eod.reconcile_big_changes( stk, self.s_date_ed, self.e_date_ed, ['dividends', 'dn_dividends']) res = stxdb.db_read_cmd("select * from {0:s}". format(self.ed_eod.divi_tbl)) print('res') print(res) self.assertTrue(res[0] == ('VXX', datetime.date(2013, 11, 7), Decimal('4.0000'), 0))
def test_01_load_my_data(self): my_eod = StxEOD(self.my_dir, 'my', self.recon_tbl) my_eod.load_my_files(self.my_stx) res1 = stxdb.db_read_cmd( StxEOD.sql_show_tables.format(self.my_eod_tbl)) res2 = stxdb.db_read_cmd( StxEOD.sql_show_tables.format(self.my_split_tbl)) in_stx = ','.join(["'{0:s}'".format(x) for x in self.my_stx.split(',')]) res3 = stxdb.db_read_cmd( 'select stk, min(date), max(date), count(*) from {0:s} where ' 'stk in ({1:s}) group by stk order by stk'. format(my_eod.eod_tbl, in_stx)) res4 = stxdb.db_read_cmd( "select stk, sum(ratio), count(*) from {0:s} where stk in ({1:s}) " "group by stk order by stk".format(my_eod.divi_tbl, in_stx)) print('res1 = {0:s}'.format(res1)) print('res2 = {0:s}'.format(res2)) print('res3 = {0:s}'.format(res3)) print('res4 = {0:s}'.format(res4)) self.assertEqual(len(res1), 1) self.assertEqual(len(res2), 1) self.assertEqual( res3[0], ('AA', date(1962, 1, 2), date(2012, 12, 31), 12836)) self.assertEqual( res3[1], ('AEOS', date(1994, 4, 14), date(2007, 1, 26), 3214)) self.assertEqual( res3[2], ('EXPE', date(2000, 5, 4), date(2012, 12, 31), 2694)) self.assertEqual( res3[3], ('INKT', date(2000, 5, 4), date(2003, 3, 19), 720)) self.assertEqual( res3[4], ('MCT', date(2000, 5, 4), date(2001, 11, 29), 394)) self.assertEqual( res3[5], ('NFLX', date(2002, 5, 29), date(2012, 12, 31), 2668)) self.assertEqual( res3[6], ('VXX', date(2009, 1, 30), date(2012, 12, 31), 987)) self.assertEqual(res4[0], ('AA', Decimal('4.4800'), 7)) self.assertEqual(res4[1], ('AEOS', Decimal('3.6800'), 6)) self.assertEqual(res4[2], ('NFLX', Decimal('0.5000'), 1)) self.assertEqual(res4[3], ('VXX', Decimal('8.0100'), 2))
def analyze(self, exp): # 1. Select all the leaders for that expiry # 2. Run StxJL for each leader, for each factor # ldr_list = self.get_leaders( setup_df = pd.DataFrame(columns=['date', 'stk', 'setup']) factors = [1.0, 1.5, 2.0] q1 = "select min(dt), max(dt) from leaders where exp='{0:s}'".format( exp) date_list = stxdb.db_read_cmd(q1) exp_dt = datetime.datetime.strptime(exp, '%Y-%m-%d') jls_date = stxcal.next_busday('{0:d}-01-01'.format(exp_dt.year)) jle_date = stxcal.move_busdays(exp, 0) q2 = "select distinct stk from leaders where exp='{0:s}'".format(exp) ldr_list = stxdb.db_read_cmd(q2) for ldr in ldr_list: s_date = str(date_list[0][0]) e_date = str(date_list[0][1]) stk = ldr[0] ts = StxTS(stk, jls_date, jle_date) for ixx in range(1, 5): ts.df['hi_{0:d}'.format(ixx)] = ts.df['hi'].shift(ixx) ts.df['lo_{0:d}'.format(ixx)] = ts.df['lo'].shift(ixx) jl_list = [] for factor in factors: jl = StxJL(ts, factor) jl.jl(s_date) jl_list.append(jl) while s_date < e_date: setup_df = self.setups(ts, jl_list, setup_df) ts.next_day() for jl in jl_list: jl.nextjl() s_date = stxcal.next_busday(s_date) print('Finished {0:s}'.format(stk)) setup_df = setup_df.sort_values(by=['date', 'setup', 'stk']) for _, row in setup_df.iterrows(): print('{0:s} {1:12s} {2:s}'.format(row['date'], row['stk'], row['setup']))
def stock_labels(stk, elim_dct=get_eliminated_words()): q = sql.Composed( [ sql.SQL("SELECT name FROM etfs WHERE ticker IN "), sql.SQL("(SELECT etf FROM stk_etfs WHERE stk = "), sql.Literal(stk), sql.SQL(")") ] ) res = stxdb.db_read_cmd(q.as_string(stxdb.db_get_cnx())) etf_words = ' '.join([x[0] for x in res]) etf_words_list = etf_words.split() labels = [x for x in etf_words_list if x not in elim_dct] labels = list(set(labels)) return labels
def test_04_load_md_data(self): md_eod = StxEOD(self.md_in_dir, 'md', self.recon_tbl) log_fname = 'splitsdivistest{0:s}.csv'.format(datetime.datetime.now(). strftime('%Y%m%d%H%M%S')) db_stx, stx_dct = md_eod.create_exchange() with open(log_fname, 'w') as logfile: md_eod.load_marketdata_file(os.path.join( self.md_in_dir, 'NASDAQ', 'EXPE.csv'), logfile, db_stx) md_eod.load_marketdata_file(os.path.join( self.md_in_dir, 'NASDAQ', 'NFLX.csv'), logfile, db_stx) md_eod.load_marketdata_file(os.path.join( self.md_in_dir, 'NYSE', 'IBM.csv'), logfile, db_stx) md_eod.load_marketdata_file(os.path.join( self.md_in_dir, 'AMEX', 'VXX.csv'), logfile, db_stx) os.remove(log_fname) res1 = stxdb.db_read_cmd( StxEOD.sql_show_tables.format(self.md_eod_tbl)) res2 = stxdb.db_read_cmd( StxEOD.sql_show_tables.format(self.md_split_tbl)) res3 = stxdb.db_read_cmd('select distinct stk from {0:s}'. format(self.md_eod_tbl)) res4 = stxdb.db_read_cmd("select stk, count(*) from {0:s} group by stk" " order by stk".format(self.md_eod_tbl)) print('res1') print(res1) print('res2') print(res2) print('res3') print(res3) print('res4') print(res4) self.assertTrue(len(res1) == 1 and len(res2) == 1 and len(res3) == 4 and res4[0][0] == 'EXPE' and res4[0][1] == 2820 and res4[1][0] == 'IBM' and res4[1][1] == 13783 and res4[2][0] == 'NFLX' and res4[2][1] == 3616 and res4[3][0] == 'VXX' and res4[3][1] == 1932)
def eow_job(self): print('247 end of week job') print('247 end of week job') ana_date = '2002-04-19' crt_date = '2002-07-19' max_dt_q = stxdb.db_read_cmd('select max(dt) from leaders') if max_dt_q[0][0] is not None: ana_date = str(max_dt_q[0][0]) crt_date = str(datetime.datetime.now().date()) self.end_date = stxcal.move_busdays( str(datetime.datetime.now().date()), 1) while ana_date <= crt_date: self.get_liq_leaders(ana_date) self.get_opt_spread_leaders(ana_date) ana_date = stxcal.move_busdays(stxcal.next_expiry(ana_date), 0)
def get_dataset(self, query, n, is_training): db_data = stxdb.db_read_cmd(query) ml_data = [] total = 0 dropped = 0 ub = n if n is None else (n + 4) for x in db_data: total += 1 arr = np.array(x[4:ub]) if not np.isfinite(arr).all(): dropped += 1 continue data = np.array([np.array([y], dtype=np.float32) for y in x[4:ub]]) result = self.get_result_new(x, is_training) ml_data.append(tuple([data, result])) print('Found {0:d} records, dropped {1:d}'.format(total, dropped)) return ml_data
def get_liq_leaders(self, ana_date, min_act=80000, min_rcr=0.015): stk_list = stxdb.db_read_cmd( "select distinct stk from eods where " "date='{0:s}' order by stk".format(ana_date)) all_stocks = [ s[0] for s in stk_list if re.match(r'^[A-Za-z]', str(s[0])) ] print('Found {0:d} stocks for {1:s}'.format(len(all_stocks), ana_date)) next_exp = stxcal.next_expiry(ana_date) next_exp_busday = stxcal.move_busdays(next_exp, 0) num_stx = 0 num = 0 liq_leaders = [] for s in all_stocks: num += 1 ts = self.ts_dct.get(s) if ts is None: ts = StxTS(s, self.start_date, self.end_date) ts.set_day(str(ts.df.index[-1].date())) ts.df['activity'] = ts.df['volume'] * ts.df['c'] ts.df['avg_act'] = ts.df['activity'].rolling(50).mean() ts.df['rg'] = ts.df['hi'] - ts.df['lo'] ts.df['avg_rg'] = ts.df['rg'].rolling(50).mean() ts.df['rg_c_ratio'] = ts.df['avg_rg'] / ts.df['c'] self.ts_dct[s] = ts num_stx += 1 stk_act = [s] if self.is_liq_leader(ts, ana_date, min_act, min_rcr, stk_act): liq_leaders.append(stk_act) if num % 1000 == 0 or num == len(all_stocks): print('Processed {0:d} stocks, found {1:d} liquidity leaders'. format(num, len(liq_leaders))) print('Found {0:d} liquidity leaders for {1:s}'.format( len(liq_leaders), ana_date)) print('Loaded {0:d} stocks for {1:s}'.format(num_stx, ana_date)) cnx = stxdb.db_get_cnx() with cnx.cursor() as crs: for ldr in liq_leaders: crs.execute( 'insert into leaders(exp,stk,activity,opt_spread) values ' + crs.mogrify('(%s,%s,%s,%s)', [next_exp, ldr[0], int(ldr[1]), -1000]) + 'on conflict do nothing')
def get_indices(self): indices = {'^DJI': '^DJI', '^SPX': '^SPX', '^NDQ': '^COMP'} end_date = stxcal.move_busdays(str(datetime.datetime.now().date()), 0) for stooq_name, db_name in indices.items(): db_cmd = "select max(date) from eods where stk='{0:s}' and " \ "volume > 1".format(db_name) res_db = stxdb.db_read_cmd(db_cmd) start_date = stxcal.next_busday(str(res_db[0][0])) res = requests.get( 'https://stooq.com/q/d/l?s={0:s}&d1={1:s}&d2={2:s}'. format(stooq_name, start_date.replace('-', ''), end_date.replace('-', ''))) lines = res.text.split('\n') for line in lines[1:]: tokens = line.split(',') with open('{0:s}.csv'.format(db_name), 'w') as f: f.write(res.text) logging.info('{0:s}: {1:d}'.format(stooq_name, res.status_code))
def upload_splits(self): with open(self.fname, 'r') as f: lines = f.readlines() lines = list(set(lines)) dct = {} for l in lines: tokens = l.split('\t') ticker = tokens[0].strip() dct[ticker] = '' db_stx = {x[0]: 0 for x in stxdb.db_read_cmd("select * from equities")} print('len(dct) = {0:d}, len(db_stx) = {1:d}'. format(len(dct), len(db_stx))) for stk in dct: if stk not in db_stx: insert_stx = "INSERT INTO equities VALUES "\ "('{0:s}', '', 'US Stocks', 'US')".format(stk) print('insert_stx = {0:s}'.format(insert_stx)) stxdb.db_write_cmd(insert_stx) try: stxdb.db_upload_file(self.fname, 'dividends', '\t') except Exception as ex: print('Failed to upload {0:s}: {1:s}'.format(self.fname, str(ex)))
def __init__(self, stk, sd, ed, eod_tbl='eods', split_tbl='dividends'): self.stk = stk self.sd = pd.to_datetime(sd) self.ed = pd.to_datetime(ed) q = "select * from {0:s} where stk='{1:s}' and dt "\ "between '{2:s}' and '{3:s}' order by dt".format( eod_tbl, stk, sd, ed) df = pd.read_sql(q, stxdb.db_get_cnx(), index_col='dt', parse_dates=['dt']) if self.sd < df.index[0]: self.sd = df.index[0] if self.ed > df.index[-1]: self.ed = df.index[-1] self.sd_str = str(self.sd.date()) self.ed_str = str(self.ed.date()) self.gaps = self.get_gaps(df) df.drop(['stk', 'prev_dt', 'prev_date', 'gap'], axis=1, inplace=True) s_lst = stxdb.db_read_cmd("select dt, ratio, divi_type from " "{0:s} where stk='{1:s}'".format( split_tbl, stk)) # print('stk = {0:s}, s_lst = {1:s}'.format(stk, str(s_lst))) self.splits = { pd.to_datetime(stxcal.next_busday(s[0])): [float(s[1]), int(s[2])] for s in s_lst } self.df = self.fill_gaps(df) self.l = len(self.df) self.pos = 0 self.num_gaps = [ tuple([self.find(str(x[0].date())), self.find(str(x[1].date()))]) for x in self.gaps ] self.start = self.num_gaps[0][0] self.end = self.num_gaps[0][1] self.adj_splits = []
def get_raw_dataset(self, query, n): db_data = stxdb.db_read_cmd(query) ml_data = [] total = 0 dropped = 0 ub = n if n is None else (n + 4) for x in db_data: total += 1 arr = np.array(x[4:ub]) if not np.isfinite(arr).all(): dropped += 1 continue x2, x3 = 0, 0 if x[2] > 0: x2 = 1 elif x[2] < 0: x2 = -1 if x[3] > 0: x3 = 1 elif x[3] < 0: x3 = -1 ml_data.append(x[0:2] + tuple([x2, x3]) + x[4:]) print('Found {0:d} records, dropped {1:d}'.format(total, dropped)) return ml_data
def test_5_drop_table(self): stxdb.db_write_cmd(self.sql_drop_tbl) res = stxdb.db_read_cmd(self.sql_tbls) self.assertEqual(len(res), 0)
type=str, default=os.path.join(os.getenv('HOME'), 'Downloads')) args = parser.parse_args() logging.basicConfig( format='%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] - ' '%(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO) logging.info('Getting index (S&P500, Nasdaq, Dow Jones) quotes') si = StxIndex() index_end_date = stxcal.current_busdate(hr=9) index_start_date = stxcal.move_busdays(index_end_date, -5) for idx in ['^GSPC', '^IXIC', '^DJI']: try: si.get_quote(idx, index_start_date, index_end_date) except: logging.error(f'Get index quote failed for {idx}') tb.print_exc() sdf = StxDatafeed() res = stxdb.db_read_cmd("SELECT dt FROM analyses WHERE " "analysis='eod_datafeed'") start_date = str(res[0][0]) if res else '2000-01-01' logging.info('The start date is: {0:s}'.format(start_date)) res = stxdb.db_read_cmd("SELECT MAX(dt) FROM dividends") splits_start_date = str(res[0][0]) if res else '2000-01-01' sdf.parse_stooq_new(start_date) sdf.handle_splits(splits_start_date) sdf.backup_database()
def __init__(self, activity_threshold=250): self.activity_threshold = activity_threshold db_stx = stxdb.db_read_cmd( 'select ticker from equities order by ticker') self.all_stx = [x[0] for x in db_stx]
def get_etf_holdings(): filename = os.path.join(os.getenv('HOME'), 'reports', 'etf_list.txt') with open(filename, 'r') as f: text = f.read() lines = text.split('\n') for line in lines: tokens = line.split('\t') if len(tokens) > 4: etf_ticker = tokens[0] etf_name = tokens[1].replace("'", '') etf_category = tokens[2] stxdb.db_write_cmd( f"INSERT INTO etfs VALUES('{etf_ticker}', '{etf_name}', " f"'{etf_category}') ON CONFLICT (ticker) DO NOTHING" ) # begin - parse the holdings info for each fund etf_list = stxdb.db_read_cmd('select ticker from etfs') num_etfs = len(etf_list) print(f'Getting data for {num_etfs} ETFs') headers = requests.utils.default_headers() headers['User-Agent'] = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) '\ 'Gecko/20100101 Firefox/88.0' etf_name = 'XLE' for i, etf_record in enumerate(etf_list): etf_name = etf_record[0] req = requests.get( f"https://www.zacks.com/funds/etf/{etf_name}/holding", headers=headers ) if req.status_code != 200: print(f'Request for ETF {etf_name} failed with status ' f'{req.status_code}, error: {req.text}') continue lines = req.text.split('\n') # print(f'The request response has {len(lines)} lines') holdings_line = '' for line in lines: if line.startswith('etf_holdings.formatted_data'): holdings_line = line if not holdings_line: print(f'No holdings line found for {etf_name}, skipping...') continue holdings_tokens = holdings_line[34: -5].split(' ] , [ ') # print(f'There are {len(holdings_tokens)} holdings tokens') hold_list = [] for holding_row in holdings_tokens: # print(f'holding_row = {holding_row}') holding_tokens = holding_row.split(', ') # print(f'There are {len(holding_tokens)} holding tokens') if len(holding_tokens) < 2: continue ticker_token = holding_tokens[1] # print(f'Ticker token = {ticker_token}') ticker_index = ticker_token.find('rel=') # print(f'Ticker index = {ticker_index}') if ticker_index == -1: continue ticker_tokens = ticker_token[ticker_index:].split('\\"') # print(f'There are {len(ticker_tokens)} ticker tokens') if len(ticker_tokens) >= 2: ticker = ticker_tokens[1] hold_list.append(ticker) stxdb.db_write_cmd( f"INSERT INTO stk_etfs VALUES('{ticker}', '{etf_name}') " f"ON CONFLICT(stk, etf) DO NOTHING" ) print(f'ETF {etf_name} has {len(hold_list)} equity holdings: ' f'{hold_list}') if i > 0 and (i % 100 == 0 or i == num_etfs - 1): print(f'Got data for {i} out of {num_etfs} ETFS') time.sleep(2)