Ejemplo n.º 1
0
 def parseeodfiles(self, s_date, e_date):
     dt = s_date
     num_days = stxcal.num_busdays(s_date, e_date)
     logging.info('Stooq: uploading EOD data for {0:d} days'.
                  format(num_days))
     day_num = 0
     while dt <= e_date:
         logging.info('stooq: {0:s}'.format(dt))
         # db_stx, _ = self.create_exchange()
         try:
             with open('{0:s}/{1:s}_d.prn'.format
                       (self.in_dir, dt.replace('-', ''))) as ifile:
                 lines = ifile.readlines()
         except IOError as ioe:
             logging.error('{0:s} failed to read EOD file: {1:s}'.
                           format(dt, str(ioe)))
             dt = stxcal.next_busday(dt)
             continue
         for line in lines:
             try:
                 self.parseeodline(line)
             except Exception as ex:
                 logging.info('Error with line {0:s}: {1:s}'.format
                              (line.strip(), str(ex)))
         dt = stxcal.next_busday(dt)
         day_num += 1
         if day_num % 20 == 0 or day_num == num_days:
             logging.info(' Uploaded EOD data for {0:d} days'.
                          format(day_num))
Ejemplo n.º 2
0
 def test_1_next_busday(self):
     res1 = stxcal.next_busday(self.dt1)
     res2 = stxcal.next_busday(self.dt2)
     res3 = stxcal.next_busday(self.dt3)
     res4 = stxcal.next_busday(self.dt4)
     self.assertTrue((res1 == '2016-12-16') and (res2 == '2016-12-19') and
                     (res3 == '2016-12-19') and (res4 == '2016-12-27'))
Ejemplo n.º 3
0
 def load_eoddata_files(self, sd, ed, stks='', batch=False):
     print('Loading eoddata files...')
     dt = sd
     # dt = stxcal.move_busdays(sd, -25)
     fnames = [
         os.path.join(self.in_dir, 'AMEX_{0:s}.txt'),
         os.path.join(self.in_dir, 'NASDAQ_{0:s}.txt'),
         os.path.join(self.in_dir, 'NYSE_{0:s}.txt')
     ]
     while dt <= ed:
         print('eoddata: {0:s}'.format(dt))
         data_available = True
         dtc = dt.replace('-', '')
         for fname in fnames:
             fname_dt = fname.format(dtc)
             if not os.path.isfile(fname_dt):
                 print('Could not find file {0:s}'.format(fname_dt))
                 data_available = False
         if not data_available:
             print('Data is missing for date {0:s}. Exiting.'.format(dt))
             return
         for fname in fnames:
             fname_dt = fname.format(dtc)
             self.load_eoddata_file(fname_dt, dt, dtc, stks, batch=batch)
         stxdb.db_write_cmd("update analyses set dt='{0:s}' where "
                            "analysis='eod_datafeed'".format(dt))
         dt = stxcal.next_busday(dt)
     print('Loaded eoddata files')
Ejemplo n.º 4
0
    def analyze(self, exp):
        # 1. Select all the leaders for that expiry
        # 2. Run StxJL for each leader, for each factor
        # ldr_list = self.get_leaders(
        setup_df = pd.DataFrame(columns=['date', 'stk', 'setup'])
        factors = [1.0, 1.5, 2.0]
        q1 = "select min(dt), max(dt) from leaders where exp='{0:s}'".format(
            exp)
        date_list = stxdb.db_read_cmd(q1)
        exp_dt = datetime.datetime.strptime(exp, '%Y-%m-%d')
        jls_date = stxcal.next_busday('{0:d}-01-01'.format(exp_dt.year))
        jle_date = stxcal.move_busdays(exp, 0)
        q2 = "select distinct stk from leaders where exp='{0:s}'".format(exp)
        ldr_list = stxdb.db_read_cmd(q2)
        for ldr in ldr_list:
            s_date = str(date_list[0][0])
            e_date = str(date_list[0][1])
            stk = ldr[0]
            ts = StxTS(stk, jls_date, jle_date)
            for ixx in range(1, 5):
                ts.df['hi_{0:d}'.format(ixx)] = ts.df['hi'].shift(ixx)
                ts.df['lo_{0:d}'.format(ixx)] = ts.df['lo'].shift(ixx)

            jl_list = []
            for factor in factors:
                jl = StxJL(ts, factor)
                jl.jl(s_date)
                jl_list.append(jl)
            while s_date < e_date:
                setup_df = self.setups(ts, jl_list, setup_df)
                ts.next_day()
                for jl in jl_list:
                    jl.nextjl()
                s_date = stxcal.next_busday(s_date)
            print('Finished {0:s}'.format(stk))
        setup_df = setup_df.sort_values(by=['date', 'setup', 'stk'])
        for _, row in setup_df.iterrows():
            print('{0:s} {1:12s} {2:s}'.format(row['date'], row['stk'],
                                               row['setup']))
Ejemplo n.º 5
0
 def get_setups_for_tomorrow(self, dt):
     next_dt = stxcal.next_busday(dt)
     q = sql.Composed([
         sql.SQL('select * from setups where dt='),
         sql.Literal(next_dt),
         sql.SQL(' and setup in ('),
         sql.SQL(', ').join(
             [sql.Literal('JC_5DAYS'),
              sql.Literal('JC_1234')]),
         sql.SQL(')')
     ])
     df = pd.read_sql(q, stxdb.db_get_cnx())
     return df
Ejemplo n.º 6
0
 def do_analysis(self, crt_date, max_spread, eod):
     spreads = self.get_opt_spreads(crt_date, eod)
     df_1 = self.get_triggered_setups(crt_date)
     df_3 = self.get_jl_setups(crt_date)
     if df_1.empty and df_3.empty:
         logging.error(
             f'No triggered/JL setups for {crt_date}.  Exiting...')
         return None
     self.get_high_activity(crt_date, df_1)
     self.get_high_activity(crt_date, df_3)
     df_1 = self.filter_spreads_hiact(df_1, spreads, max_spread)
     df_3 = self.filter_spreads_hiact(df_3, spreads, max_spread)
     res = ['<html>', self.report_style, '<body>']
     res.append('<h3>TODAY - {0:s}</h3>'.format(crt_date))
     res.extend(self.get_report(crt_date, df_1, True))
     if eod:
         df_2 = self.get_setups_for_tomorrow(crt_date)
         next_date = stxcal.next_busday(crt_date)
         self.get_high_activity(crt_date, df_2)
         df_2 = self.filter_spreads_hiact(df_2, spreads, max_spread)
         res.append('<h3>TOMMORROW - {0:s}</h3>'.format(next_date))
         res.extend(self.get_report(crt_date, df_2, False))
     res.append('<h3>JL - {0:s}</h3>'.format(crt_date))
     res.extend(self.get_report(crt_date, df_3, False))
     res.append('</body>')
     res.append('</html>')
     with open('/tmp/x.html', 'w') as html_file:
         html_file.write('\n'.join(res))
     logging.info('Generated HTML report')
     time_now = datetime.datetime.now()
     time_now_date = '{0:d}-{1:02d}-{2:02d}'.format(time_now.year,
                                                    time_now.month,
                                                    time_now.day)
     suffix = 'EOD'
     if time_now_date == crt_date:
         if time_now.hour >= 10 and time_now.hour < 16:
             suffix = '{0:02d}{1:02d}'.format(time_now.hour,
                                              time_now.minute)
         else:
             suffix = 'EOD'
     else:
         suffix = 'EOD' if eod else 'ID'
     pdf_fname = '{0:s}_{1:s}.pdf'.format(crt_date, suffix)
     logging.info('PDF report file name: {0:s}'.format(pdf_fname))
     pdf_filename = os.path.join(self.report_dir, pdf_fname)
     HTML(filename='/tmp/x.html').write_pdf(pdf_filename)
     logging.info('Saved report locally in {0:s}'.format(pdf_filename))
     return pdf_filename
Ejemplo n.º 7
0
 def get_indices(self):
     indices = {'^DJI': '^DJI', '^SPX': '^SPX', '^NDQ': '^COMP'}
     end_date = stxcal.move_busdays(str(datetime.datetime.now().date()), 0)
     for stooq_name, db_name in indices.items():
         db_cmd = "select max(date) from eods where stk='{0:s}' and " \
                  "volume > 1".format(db_name)
         res_db = stxdb.db_read_cmd(db_cmd)
         start_date = stxcal.next_busday(str(res_db[0][0]))
         res = requests.get(
             'https://stooq.com/q/d/l?s={0:s}&d1={1:s}&d2={2:s}'.
             format(stooq_name,
                    start_date.replace('-', ''),
                    end_date.replace('-', '')))
         lines = res.text.split('\n')
         for line in lines[1:]:
             tokens = line.split(',')
         with open('{0:s}.csv'.format(db_name), 'w') as f:
             f.write(res.text)
         logging.info('{0:s}: {1:d}'.format(stooq_name, res.status_code))
Ejemplo n.º 8
0
 def __init__(self, stk, sd, ed, eod_tbl='eods', split_tbl='dividends'):
     self.stk = stk
     self.sd = pd.to_datetime(sd)
     self.ed = pd.to_datetime(ed)
     q = "select * from {0:s} where stk='{1:s}' and dt "\
         "between '{2:s}' and '{3:s}' order by dt".format(
         eod_tbl, stk, sd, ed)
     df = pd.read_sql(q,
                      stxdb.db_get_cnx(),
                      index_col='dt',
                      parse_dates=['dt'])
     if self.sd < df.index[0]:
         self.sd = df.index[0]
     if self.ed > df.index[-1]:
         self.ed = df.index[-1]
     self.sd_str = str(self.sd.date())
     self.ed_str = str(self.ed.date())
     self.gaps = self.get_gaps(df)
     df.drop(['stk', 'prev_dt', 'prev_date', 'gap'], axis=1, inplace=True)
     s_lst = stxdb.db_read_cmd("select dt, ratio, divi_type from "
                               "{0:s} where stk='{1:s}'".format(
                                   split_tbl, stk))
     # print('stk = {0:s}, s_lst = {1:s}'.format(stk, str(s_lst)))
     self.splits = {
         pd.to_datetime(stxcal.next_busday(s[0])): [float(s[1]),
                                                    int(s[2])]
         for s in s_lst
     }
     self.df = self.fill_gaps(df)
     self.l = len(self.df)
     self.pos = 0
     self.num_gaps = [
         tuple([self.find(str(x[0].date())),
                self.find(str(x[1].date()))]) for x in self.gaps
     ]
     self.start = self.num_gaps[0][0]
     self.end = self.num_gaps[0][1]
     self.adj_splits = []
Ejemplo n.º 9
0
 def move_downloaded_options(self, start_yymm, end_yymm):
     start_yy, start_mm = start_yymm.split('-')
     start_year = int(start_yy)
     start_month = int(start_mm)
     end_yy, end_mm = end_yymm.split('-')
     end_year = int(end_yy)
     end_month = int(end_mm)
     start_date = '{0:d}-{1:02d}-01'.format(start_year, start_month)
     logging.info('start_date = {0:s}'.format(start_date))
     if not stxcal.is_busday(start_date):
         start_date = stxcal.next_busday(start_date)
     if end_month == 12:
         end_year += 1
     end_month = (end_month + 1) % 12
     end_date = '{0:d}-{1:02d}-01'.format(end_year, end_month)
     logging.info('end_date = {0:s}'.format(end_date))
     end_date = stxcal.prev_busday(end_date)
     logging.info('Moving to downloaded_options table all options dated '
                  'between {0:s} and {1:s}'.format(start_date, end_date))
     sql = 'INSERT INTO downloaded_options '\
         '(expiry, und, cp, strike, dt, bid, ask, v, oi) '\
         "SELECT * FROM options WHERE dt BETWEEN '{0:s}' AND '{1:s}' "\
         'ON CONFLICT (expiry, und, cp, strike, dt) DO NOTHING'.format(
         start_date, end_date)
     logging.info('sql cmd: {0:s}'.format(sql))
     stxdb.db_write_cmd(sql)
     logging.info('Moved to downloaded_options table all options dated '
                  'between {0:s} and {1:s}'.format(start_date, end_date))
     logging.info('Removing from options table all options downloaded '
                  'between {0:s} and {1:s}'.format(start_date, end_date))
     sql = "DELETE FROM options WHERE dt BETWEEN '{0:s}' AND '{1:s}'"\
         " ".format(start_date, end_date)
     logging.info('sql cmd: {0:s}'.format(sql))
     stxdb.db_write_cmd(sql)
     logging.info('Removed from options table all options downloaded '
                  'between {0:s} and {1:s}'.format(start_date, end_date))
Ejemplo n.º 10
0
    def parse_stooq_new(self, last_db_date):
        logging.info('Checking if a new stooq file has been downloaded')
        # stooq_file = os.path.join(os.getenv('DOWNLOAD_DIR'), 'data_d.txt')
        download_dir = self.config.get('datafeed', 'download_dir')
        stooq_file = os.path.join(download_dir, 'data_d.txt')
        if not os.path.exists(stooq_file):
            logging.info('No new stooq data file found.  Nothing to do.')
            return
        logging.info('Reading stooq file, renaming columns, getting daily '
                     'US stocks data')
        df = pd.read_csv(stooq_file,
                         dtype={
                             "<TICKER>": "string",
                             "<PER>": "string",
                             "<DATE>": "string",
                             "<TIME>": "string",
                             "<OPEN>": float,
                             "<HIGH>": float,
                             "<LOW>": float,
                             "<CLOSE>": float,
                             "<VOL>": int,
                             "<OPENINT>": int
                         })
        df.columns = [x[1:-1].lower() for x in df.columns]
        stx_df = df.query('ticker.str.endswith(".US") and per == "D"',
                          engine='python').copy()
        logging.info(
            'Getting {0:d} daily US stocks out of {1:d} records'.format(
                len(stx_df), len(df)))
        stx_df['date'] = stx_df['date'].astype(str)
        stx_df['date'] = stx_df.apply(lambda r: '{0:s}-{1:s}-{2:s}'.format(
            r['date'][0:4], r['date'][4:6], r['date'][6:8]),
                                      axis=1)
        logging.info('Converted stx_df dates in yyyy-mm-dd format')
        dates = stx_df.groupby(by='date')['ticker'].count()
        next_date = stxcal.next_busday(last_db_date)
        ix0, num_dates = 0, len(dates)
        logging.info('Data available for {0:d} dates, from {1:s} to {2:s}; DB '
                     'needs data starting from {3:s}'.format(
                         len(dates), dates.index[0],
                         dates.index[num_dates - 1], next_date))
        db_dates = []
        while ix0 < num_dates:
            if dates.index[ix0] == next_date:
                break
            ix0 += 1
        for ixx in range(ix0, num_dates):
            if dates.index[ixx] == next_date and dates.values[ixx] > 9000:
                db_dates.append(dates.index[ixx])
            else:
                if dates.index[ixx] != next_date:
                    logging.error(f'Missing date {next_date}; got '
                                  f'{dates.index[ixx]} instead')

                if dates.values[ixx] < 9000:
                    logging.error(f'Not enough records ({dates.values[ixx]}) '
                                  f'available for {dates.index[ixx]}')
                break
            next_date = stxcal.next_busday(next_date)

        if not db_dates:
            logging.info('No new data available for processing. Exiting')
            return
        logging.info('Check that there are no time gaps between DB data and '
                     'upload data')
        start_date = stxcal.next_busday(last_db_date)
        num_bdays = stxcal.num_busdays(start_date, db_dates[0])
        if num_bdays > 0:
            logging.warn(
                'No data for {0:d} days ({1:s} - {2:s}). Exiting ...'.format(
                    num_bdays, start_date, stxcal.prev_busday(db_dates[0])))
            return
        logging.info('Check that there are no time gaps in the upload data')
        for ixx in range(len(db_dates) - 1):
            if stxcal.next_busday(db_dates[ixx]) != db_dates[ixx + 1]:
                logging.warn('Inconsistent dates {0:s} and {1:s} '
                             'at indexes {2:d} and {3:d}'.format(
                                 db_dates[ixx], db_dates[ixx + 1], ixx,
                                 ixx + 1))

        sel_stx_df = stx_df.query('date in @db_dates').copy()
        logging.info(
            '{0:d}/{1:d} records found for following dates: [{2:s}]'.format(
                len(sel_stx_df), len(stx_df), ', '.join(db_dates)))
        sel_stx_df['invalid'] = sel_stx_df.apply(
            lambda r: np.isnan(r['open']) or np.isnan(r['high']) or np.
            isnan(r['low']) or np.isnan(r['close']) or np.isnan(r['vol']) or r[
                'vol'] == 0 or r['open'] > r['high'] or r['open'] < r[
                    'low'] or r['close'] > r['high'] or r['close'] < r['low'],
            axis=1)
        valid_stx_df = sel_stx_df.query('not invalid').copy()
        logging.info('Found {0:d} valid records out of {1:d} records'.format(
            len(valid_stx_df), len(sel_stx_df)))

        def process_row(r):
            stk = r['ticker'][:-3].replace("-.", ".P.").replace("_",
                                                                ".").replace(
                                                                    '-', '.')
            o = int(100 * r['open'])
            hi = int(100 * r['high'])
            lo = int(100 * r['low'])
            c = int(100 * r['close'])
            v = int(r['vol'])
            v = v // 1000
            if v == 0:
                v = 1
            lst = [stk, o, hi, lo, c, v]
            return pd.Series(lst)

        valid_stx_df[['ticker', 'open', 'high', 'low', 'close', 'vol']] = \
            valid_stx_df.apply(process_row, axis=1)
        valid_stx_df['openint'] = 2
        valid_stx_df.drop(columns=['per', 'time', 'invalid'],
                          axis=1,
                          inplace=True)
        valid_stx_df.columns = ['stk', 'dt', 'o', 'hi', 'lo', 'c', 'v', 'oi']

        with closing(stxdb.db_get_cnx().cursor()) as crs:
            sql = 'CREATE TEMPORARY TABLE temp_table ('\
                'stk VARCHAR(16) NOT NULL, '\
                'dt DATE NOT NULL, '\
                'o INTEGER NOT NULL, '\
                'hi INTEGER NOT NULL, '\
                'lo INTEGER NOT NULL, '\
                'c INTEGER NOT NULL, '\
                'v INTEGER, '\
                'oi INTEGER, '\
                'PRIMARY KEY(stk, dt))'
            crs.execute(sql)
            logging.info('Created temporary table')
            upload_data = valid_stx_df.values.tolist()
            execute_values(
                crs, 'INSERT INTO temp_table '
                '(stk, dt, o, hi, lo, c, v, oi) VALUES %s', upload_data)
            logging.info('Uploaded dataframe into temporary table')
            stxdb.db_write_cmd(
                'INSERT INTO eods (stk, dt, o, hi, lo, c, v, oi) '
                'SELECT * FROM temp_table ON CONFLICT (stk, dt) DO '
                'UPDATE SET o = EXCLUDED.o, hi = EXCLUDED.hi, '
                'lo = EXCLUDED.lo, c = EXCLUDED.c, v = EXCLUDED.v, '
                'oi = EXCLUDED.oi')
            logging.info('Uploaded data into eods table')
        last_upload_date = valid_stx_df['dt'].max()
        stxdb.db_write_cmd("UPDATE analyses SET dt='{0:s}' WHERE "
                           "analysis='eod_datafeed'".format(last_upload_date))
        logging.info('Updated latest eod datafeed date {0:s} in DB'.format(
            last_upload_date))
        self.rename_stooq_file(dates.index[0], dates.index[num_dates - 1])
Ejemplo n.º 11
0
    args = parser.parse_args()
    data_dir = os.getenv('DOWNLOAD_DIR')
    logging.basicConfig(
        format='%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] - '
        '%(message)s',
        datefmt='%Y-%m-%d %H:%M:%S',
        level=logging.INFO
    )
    seod = StxEOD(data_dir)
    if args.stooq:
        s_date_sq = '2018-03-12'
        e_date_sq = '2018-03-29'
        seod.parseeodfiles(s_date_sq, e_date_sq)
        sys.exit(0)

    # Handle default EODData stream
    # 1. Get the last date for which eod data is available in the database
    # res = stxdb.db_read_cmd("select max(dt) from eods where oi=0")
    res = stxdb.db_read_cmd("select dt from analyses where "
                            "analysis='eod_datafeed'")
    start_date = stxcal.next_busday(str(res[0][0]))
    # 2. get the last trading date
    end_date = stxcal.move_busdays(str(datetime.datetime.now().date()), 0)
    # 3. Find out if files are available for the dates
    # 4. if the files are available, upload them
    batch_load = True if args.batch else False
    seod.load_eoddata_files(start_date, end_date, batch=batch_load)
    res = stxdb.db_read_cmd("select max(dt) from dividends")
    start_date = stxcal.next_busday(str(res[0][0]))
    seod.handle_splits(start_date)
Ejemplo n.º 12
0
        analysis_type = 'EOD'
        eod = True
    if args.intraday:
        analysis_type = 'Intraday'
    if args.date:
        crt_date = args.date
    stx_ana = StxAnalyzer()
    if args.startdate and args.enddate:
        logging.info(
            'Running analysis from {args.startdate} to {args.enddate}')
        crs_date = args.startdate
        num = 0
        while crs_date <= args.enddate:
            logging.info(f'Running analysis for {crs_date}')
            pdf_report = stx_ana.do_analysis(crs_date, args.max_spread, True)
            if pdf_report is None:
                logging.error(f'No report was generated for {crs_date}')
            stx_ana.update_local_directory(crs_date)
            crs_date = stxcal.next_busday(crs_date)
            num += 1
        logging.info(f'Ran EOD analysis for {num} days between '
                     f'{args.startdate} and {args.enddate}')
    else:
        logging.info(f'Running analysis for {crt_date}')
        pdf_report = stx_ana.do_analysis(crt_date, args.max_spread, eod)
        if pdf_report is None:
            logging.error(f'No report was generated for {crt_date}')
        stx_ana.update_local_directory(crt_date)
    # gdc = GoogleDriveClient()
    # gdc.upload_report(pdf_report, os.path.basename(pdf_report))