def business_days(start, end=ae_utils.last_close()): """business_days""" ret = [] while start < end: is_a_business_day = ( start not in holidays() and start.weekday() != 6 and start.weekday() != 5) if is_a_business_day: ret.append(start) start += timedelta(days=1) return ret
def get_days_between_dates( from_historical_date, last_close_to_use=None): """get_days_between_dates :param from_historical_date: historical date in time to start walking forward until the last close datetime :param last_close_to_use: starting date in time (left leg of window) """ use_last_close = last_close_to_use if not use_last_close: use_last_close = ae_utils.last_close() dates = [] while from_historical_date < last_close_to_use: dates.append(from_historical_date) from_historical_date += datetime.timedelta( days=1) return dates
def inspect_datasets(ticker=None, start_date=None, datasets=None): """inspect_datasets Loop over all cached data in redis by going sequentially per date and examine the latest ``date`` value in the cache to check if it matches the redis key's date. For IEX Cloud minute data errors, running this function will print out commands to fix any issues (if possible): :: fetch -t TICKER -g iex_min -F DATE_TO_FIX :param ticker: optional - string ticker :param start_date: optional - datetime start date for the loop (default is ``2019-01-01``) :param datasets: optional - list of strings to extract specific, supported datasets (default is ``['minute']``) """ if not start_date: start_date = datetime.datetime(year=2019, month=1, day=1) if not datasets: datasets = ['minute'] if not ticker: ticker = 'SPY' tickers = [ticker] fix_suggestions = [] last_close = ae_utils.last_close() for ticker in tickers: not_done = True cur_date = start_date while not_done: cur_date_str = cur_date.strftime(ae_consts.COMMON_DATE_FORMAT) log.info(f'extracting {ticker} date={cur_date_str}') res = None # get from a date or the latest if not set if cur_date_str: res = ae_extract.extract(ticker=ticker, date=cur_date_str, datasets=datasets) else: res = ae_extract.extract(ticker=ticker, datasets=datasets) weekday_name = cur_date.strftime('%A') for ds_name in datasets: df = res[ticker][ds_name] if ae_consts.is_df(df=df): if 'date' in df: latest_date = df['date'].iloc[-1] latest_date_str = latest_date.strftime( ae_consts.COMMON_DATE_FORMAT) if latest_date_str == cur_date_str: log.info(f'valid - {ds_name} latest dates match ' f'{weekday_name}: ' f'{latest_date_str} == {cur_date_str}') else: if ds_name != 'daily': log.critical( f'{ds_name} latest dates does ' f'NOT match on ' f'{weekday_name} {cur_date_str} found: ' f'{latest_date_str}') else: one_day_back = (latest_date + datetime.timedelta(days=1)) if weekday_name == 'Monday': one_day_back = (latest_date + datetime.timedelta(days=3)) latest_date_str = one_day_back.strftime( ae_consts.COMMON_DATE_FORMAT) if latest_date_str == cur_date_str: log.info(f'valid - {ds_name} latest dates ' f'match ' f'{weekday_name}: ' f'{latest_date_str} == ' f'{cur_date_str}') else: log.critical( f'{ds_name} latest dates does ' f'NOT match on ' f'{weekday_name} {cur_date_str} ' f'found: ' f'{latest_date_str}') if ds_name == 'minute': fix_suggestions.append( f'fetch -t {ticker} -g iex_min ' f'-F {cur_date_str}') else: log.error(f'{ds_name} df does not have a date column ' f'on {cur_date_str}') else: log.error(f'Missing {ds_name} df on {cur_date_str}') # end of inspecting datasets if cur_date > last_close: not_done = False else: cur_date += datetime.timedelta(days=1) not_a_weekday = True while not_a_weekday: weekday = cur_date.date().weekday() if weekday > 4: log.debug('SKIP weekend day: ' f'{cur_date.strftime("%A on %Y-%m-%d")}') cur_date += datetime.timedelta(days=1) else: not_a_weekday = False # end for all dates # end of for all tickers if len(fix_suggestions) > 0: print('-------------------------------') print('Detected invalid dates - below are the suggested fixes ' 'to run using the fetch command.') print(' - Please be aware fetching data may incur usages and ' 'costs on your account') for s in fix_suggestions: print(s) else: log.info('done')
def ingress_scrub_dataset(label, datafeed_type, df, date_str=None, msg_format=None, scrub_mode='sort-by-date', ds_id='no-id'): """ingress_scrub_dataset Scrub a ``pandas.DataFrame`` from an Ingress pricing service and return the resulting ``pandas.DataFrame`` :param label: log label :param datafeed_type: ``analysis_engine.iex.consts.DATAFEED_*`` type or ``analysis_engine.yahoo.consts.DATAFEED_*``` type .. code-block:: python DATAFEED_DAILY = 900 DATAFEED_MINUTE = 901 DATAFEED_QUOTE = 902 DATAFEED_STATS = 903 DATAFEED_PEERS = 904 DATAFEED_NEWS = 905 DATAFEED_FINANCIALS = 906 DATAFEED_EARNINGS = 907 DATAFEED_DIVIDENDS = 908 DATAFEED_COMPANY = 909 DATAFEED_PRICING_YAHOO = 1100 DATAFEED_OPTIONS_YAHOO = 1101 DATAFEED_NEWS_YAHOO = 1102 :param df: ``pandas DataFrame`` :param date_str: date string for simulating historical dates or ``datetime.datetime.now()`` if not set :param msg_format: msg format for a ``string.format()`` :param scrub_mode: mode to scrub this dataset :param ds_id: dataset identifier """ if not hasattr(df, 'empty'): log.info('{} - {} no dataset_id={}'.format(label, datafeed_type, ds_id)) return None out_df = df daily_date_format = '%I:%M %p' minute_date_format = '%I:%M %p' use_msg_format = msg_format if not msg_format: use_msg_format = 'df={} date_str={}' use_date_str = date_str last_close_date = ae_utils.last_close() today_str = last_close_date.strftime('%Y-%m-%d') year_str = today_str.split('-')[0] if not use_date_str: use_date_str = today_str daily_date_format = ae_consts.IEX_DAILY_DATE_FORMAT minute_date_format = ae_consts.IEX_MINUTE_DATE_FORMAT debug_msg(label=label, datafeed_type=datafeed_type, msg_format='START - {}'.format(use_msg_format), date_str=use_date_str, df=df) try: if scrub_mode == 'sort-by-date': if datafeed_type == iex_consts.DATAFEED_DAILY: new_dates = [] if 'label' in df: for idx, i in enumerate(out_df['label']): new_str = '' if ',' not in i: # Oct 3 new_str = '{}-{}-{}'.format( year_str, i.split(' ')[0], i.split(' ')[1]) else: # Aug 29, 18 new_str = '20{}-{}-{}'.format( i.split(' ')[2], i.split(' ')[0], i.split(' ')[1]).replace(',', '') new_dates.append(new_str) # end for all rows out_df['date'] = pd.to_datetime(new_dates, format=daily_date_format) # end if label is in df elif datafeed_type == iex_consts.DATAFEED_MINUTE: new_dates = [] if 'label' in df: new_dates = build_dates_from_df_col( src_col='label', src_date_format=minute_date_format, use_date_str=use_date_str, df=out_df) out_df['date'] = pd.to_datetime(new_dates, format='%Y-%m-%d %H:%M:%S') # end if label is in df elif datafeed_type == iex_consts.DATAFEED_QUOTE: columns_list = out_df.columns.values if 'latestTime' in columns_list: out_df['date'] = pd.to_datetime( out_df['latestTime'], format=ae_consts.IEX_QUOTE_DATE_FORMAT) if 'latestUpdate' in columns_list: out_df['latest_update'] = pd.to_datetime( out_df['latestUpdate'], unit='ns') if 'extendedPriceTime' in columns_list: out_df['extended_price_time'] = pd.to_datetime( out_df['extendedPriceTime'], unit='ns') if 'iexLastUpdated' in columns_list: out_df['iex_last_update'] = pd.to_datetime( out_df['iexLastUpdated'], unit='ns') if 'openTime' in columns_list: out_df['open_time'] = pd.to_datetime(out_df['openTime'], unit='ns') if 'closeTime' in columns_list: out_df['close_time'] = pd.to_datetime(out_df['closeTime'], unit='ns') # end if label is in df elif datafeed_type == iex_consts.DATAFEED_STATS: log.info('{} - {} - no scrub_mode={} ' 'support'.format(label, datafeed_type, scrub_mode)) if 'label' in df: out_df['date'] = pd.to_datetime(df['label'], format=daily_date_format) elif datafeed_type == iex_consts.DATAFEED_PEERS: log.info('{} - {} - no scrub_mode={} ' 'support'.format(label, datafeed_type, scrub_mode)) if 'label' in df: out_df['date'] = pd.to_datetime(df['label'], format=daily_date_format) elif datafeed_type == iex_consts.DATAFEED_NEWS: log.info('{} - {} - no scrub_mode={} ' 'support'.format(label, datafeed_type, scrub_mode)) if 'label' in df: out_df['date'] = pd.to_datetime(df['label'], format=daily_date_format) elif datafeed_type == iex_consts.DATAFEED_FINANCIALS: log.info('{} - {} - no scrub_mode={} ' 'support'.format(label, datafeed_type, scrub_mode)) if 'label' in df: out_df['date'] = pd.to_datetime(df['label'], format=daily_date_format) elif datafeed_type == iex_consts.DATAFEED_EARNINGS: log.info('{} - {} - no scrub_mode={} ' 'support'.format(label, datafeed_type, scrub_mode)) if 'label' in df: out_df['date'] = pd.to_datetime(df['label'], format=daily_date_format) elif datafeed_type == iex_consts.DATAFEED_DIVIDENDS: log.info('{} - {} - no scrub_mode={} ' 'support'.format(label, datafeed_type, scrub_mode)) if 'label' in df: out_df['date'] = pd.to_datetime(df['label'], format=daily_date_format) elif datafeed_type == iex_consts.DATAFEED_COMPANY: log.info('{} - {} - no scrub_mode={} ' 'support'.format(label, datafeed_type, scrub_mode)) if 'label' in df: out_df['date'] = pd.to_datetime(df['label'], format=daily_date_format) elif datafeed_type == yahoo_consts.DATAFEED_PRICING_YAHOO: log.info('{} - {} - no scrub_mode={} ' 'support'.format(label, datafeed_type, scrub_mode)) if 'date' in df: out_df['date'] = pd.to_datetime(df['date'], format=daily_date_format) elif datafeed_type == yahoo_consts.DATAFEED_OPTIONS_YAHOO: log.info('{} - {} - no scrub_mode={} ' 'support'.format(label, datafeed_type, scrub_mode)) if 'date' in df: out_df['date'] = pd.to_datetime(df['date'], format=daily_date_format) elif datafeed_type == yahoo_consts.DATAFEED_NEWS_YAHOO: log.info('{} - {} - no scrub_mode={} ' 'support'.format(label, datafeed_type, scrub_mode)) if 'date' in df: out_df['date'] = pd.to_datetime(df['date'], format=daily_date_format) elif datafeed_type == td_consts.DATAFEED_TD_CALLS: log.debug('{} - {} - no scrub_mode={} ' 'support'.format(label, datafeed_type, scrub_mode)) elif datafeed_type == td_consts.DATAFEED_TD_PUTS: log.debug('{} - {} - no scrub_mode={} ' 'support'.format(label, datafeed_type, scrub_mode)) else: log.info('{} - {} - no scrub_mode={} ' 'support'.format(label, datafeed_type, scrub_mode)) if 'label' in df: out_df['date'] = pd.to_datetime(df['label'], format=daily_date_format) # if/else else: log.info('{} - {} - no scrub_mode'.format(label, datafeed_type)) except Exception as e: log.critical('{} - {} sort={} - ' 'failed with ex={} data={}'.format( label, datafeed_type, scrub_mode, e, df)) out_df = None # end of try/ex debug_msg(label=label, datafeed_type=datafeed_type, msg_format='END - df={} date_str={}', date_str=use_date_str, df=out_df) return out_df
def fetch_new_stock_datasets(): """fetch_new_stock_datasets Collect datasets for a ticker from IEX Cloud or Tradier .. warning: IEX Cloud charges per request. Here are example commands to help you monitor your usage while handling first time users and automation (intraday, daily, and weekly options are supported). **Setup** :: export IEX_TOKEN=YOUR_IEX_CLOUD_TOKEN export TD_TOKEN=YOUR_TRADIER_TOKEN **Pull Data for a Ticker from IEX and Tradier** :: fetch -t TICKER **Pull from All Supported IEX Feeds** :: fetch -t TICKER -g iex-all **Pull from All Supported Tradier Feeds** :: fetch -t TICKER -g td **Intraday IEX and Tradier Feeds (only minute and news to reduce costs)** :: fetch -t TICKER -g intra # or manually: # fetch -t TICKER -g td,iex_min,iex_news **Daily IEX Feeds (daily and news)** :: fetch -t TICKER -g daily # or manually: # fetch -t TICKER -g iex_day,iex_news **Weekly IEX Feeds (company, financials, earnings, dividends, and peers)** :: fetch -t TICKER -g weekly # or manually: # fetch -t TICKER -g iex_fin,iex_earn,iex_div,iex_peers,iex_news, # iex_comp **IEX Minute** :: fetch -t TICKER -g iex_min **IEX News** :: fetch -t TICKER -g iex_news **IEX Daily** :: fetch -t TICKER -g iex_day **IEX Stats** :: fetch -t TICKER -g iex_stats **IEX Peers** :: fetch -t TICKER -g iex_peers **IEX Financials** :: fetch -t TICKER -g iex_fin **IEX Earnings** :: fetch -t TICKER -g iex_earn **IEX Dividends** :: fetch -t TICKER -g iex_div **IEX Quote** :: fetch -t TICKER -g iex_quote **IEX Company** :: fetch -t TICKER -g iex_comp .. note:: This requires the following services are listening on: - redis ``localhost:6379`` - minio ``localhost:9000`` """ log.info('start - fetch_new_stock_datasets') parser = argparse.ArgumentParser( description=('Download and store the latest stock pricing, ' 'news, and options chain data ' 'and store it in Minio (S3) and Redis. ' 'Also includes support for getting FinViz ' 'screener tickers')) parser.add_argument('-t', help=('ticker'), required=False, dest='ticker') parser.add_argument( '-g', help=('optional - fetch mode: ' 'initial = default fetch from initial data feeds ' '(IEX and Tradier), ' 'intra = fetch intraday from IEX and Tradier, ' 'daily or day = fetch daily from IEX, ' 'weekly = fetch weekly from IEX, ' 'all = fetch from all data feeds, ' 'td = fetch from Tradier feeds only, ' 'iex = fetch from IEX Cloud feeds only, ' 'min or minute or iex_min = fetch IEX Cloud intraday ' 'per-minute feed ' 'https://iexcloud.io/docs/api/#historical-prices, ' 'day or daily or iex_day = fetch IEX Cloud daily feed ' 'https://iexcloud.io/docs/api/#historical-prices, ' 'quote or iex_quote = fetch IEX Cloud quotes feed ' 'https://iexcloud.io/docs/api/#quote, ' 'stats or iex_stats = fetch IEX Cloud key stats feed ' 'https://iexcloud.io/docs/api/#key-stats, ' 'peers or iex_peers = fetch from just IEX Cloud peers feed ' 'https://iexcloud.io/docs/api/#peers, ' 'news or iex_news = fetch IEX Cloud news feed ' 'https://iexcloud.io/docs/api/#news, ' 'fin or iex_fin = fetch IEX Cloud financials feed' 'https://iexcloud.io/docs/api/#financials, ' 'earn or iex_earn = fetch from just IEX Cloud earnings feeed ' 'https://iexcloud.io/docs/api/#earnings, ' 'div or iex_div = fetch from just IEX Cloud dividends feed' 'https://iexcloud.io/docs/api/#dividends, ' 'iex_comp = fetch from just IEX Cloud company feed ' 'https://iexcloud.io/docs/api/#company'), required=False, dest='fetch_mode') parser.add_argument('-i', help=('optional - ticker id ' 'not used without a database'), required=False, dest='ticker_id') parser.add_argument('-e', help=('optional - options expiration date'), required=False, dest='exp_date_str') parser.add_argument('-l', help=('optional - path to the log config file'), required=False, dest='log_config_path') parser.add_argument('-b', help=('optional - broker url for Celery'), required=False, dest='broker_url') parser.add_argument('-B', help=('optional - backend url for Celery'), required=False, dest='backend_url') parser.add_argument('-k', help=('optional - s3 access key'), required=False, dest='s3_access_key') parser.add_argument('-s', help=('optional - s3 secret key'), required=False, dest='s3_secret_key') parser.add_argument('-a', help=('optional - s3 address format: <host:port>'), required=False, dest='s3_address') parser.add_argument('-S', help=('optional - s3 ssl or not'), required=False, dest='s3_secure') parser.add_argument('-u', help=('optional - s3 bucket name'), required=False, dest='s3_bucket_name') parser.add_argument('-G', help=('optional - s3 region name'), required=False, dest='s3_region_name') parser.add_argument('-p', help=('optional - redis_password'), required=False, dest='redis_password') parser.add_argument('-r', help=('optional - redis_address format: <host:port>'), required=False, dest='redis_address') parser.add_argument('-n', help=('optional - redis and s3 key name'), required=False, dest='keyname') parser.add_argument( '-m', help=('optional - redis database number (0 by default)'), required=False, dest='redis_db') parser.add_argument('-x', help=('optional - redis expiration in seconds'), required=False, dest='redis_expire') parser.add_argument('-z', help=('optional - strike price'), required=False, dest='strike') parser.add_argument( '-c', help=('optional - contract type "C" for calls "P" for puts'), required=False, dest='contract_type') parser.add_argument( '-P', help=('optional - get pricing data if "1" or "0" disabled'), required=False, dest='get_pricing') parser.add_argument( '-N', help=('optional - get news data if "1" or "0" disabled'), required=False, dest='get_news') parser.add_argument( '-O', help=('optional - get options data if "1" or "0" disabled'), required=False, dest='get_options') parser.add_argument('-U', help=('optional - s3 enabled for publishing if "1" or ' '"0" is disabled'), required=False, dest='s3_enabled') parser.add_argument( '-R', help=('optional - redis enabled for publishing if "1" or ' '"0" is disabled'), required=False, dest='redis_enabled') parser.add_argument('-A', help=('optional - run an analysis ' 'supported modes: scn'), required=False, dest='analysis_type') parser.add_argument('-L', help=('optional - screener urls to pull ' 'tickers for analysis'), required=False, dest='urls') parser.add_argument( '-Z', help=('disable run without an engine for local testing and demos'), required=False, dest='celery_enabled', action='store_true') parser.add_argument('-F', help=('optional - backfill date for filling in ' 'gaps for the IEX Cloud minute dataset ' 'format is YYYY-MM-DD'), required=False, dest='backfill_date') parser.add_argument('-d', help=('debug'), required=False, dest='debug', action='store_true') args = parser.parse_args() run_offline = True ticker = ae_consts.TICKER ticker_id = ae_consts.TICKER_ID fetch_mode = 'initial' exp_date_str = ae_consts.NEXT_EXP_STR ssl_options = ae_consts.SSL_OPTIONS transport_options = ae_consts.TRANSPORT_OPTIONS broker_url = ae_consts.WORKER_BROKER_URL backend_url = ae_consts.WORKER_BACKEND_URL celery_config_module = ae_consts.WORKER_CELERY_CONFIG_MODULE include_tasks = ae_consts.INCLUDE_TASKS s3_access_key = ae_consts.S3_ACCESS_KEY s3_secret_key = ae_consts.S3_SECRET_KEY s3_region_name = ae_consts.S3_REGION_NAME s3_address = ae_consts.S3_ADDRESS s3_secure = ae_consts.S3_SECURE s3_bucket_name = ae_consts.S3_BUCKET s3_key = ae_consts.S3_KEY redis_address = ae_consts.REDIS_ADDRESS redis_key = ae_consts.REDIS_KEY redis_password = ae_consts.REDIS_PASSWORD redis_db = ae_consts.REDIS_DB redis_expire = ae_consts.REDIS_EXPIRE strike = None contract_type = None get_pricing = True get_news = True get_options = True s3_enabled = True redis_enabled = True analysis_type = None backfill_date = None debug = False if args.ticker: ticker = args.ticker.upper() if args.ticker_id: ticker_id = args.ticker_id if args.exp_date_str: exp_date_str = ae_consts.NEXT_EXP_STR if args.broker_url: broker_url = args.broker_url if args.backend_url: backend_url = args.backend_url if args.s3_access_key: s3_access_key = args.s3_access_key if args.s3_secret_key: s3_secret_key = args.s3_secret_key if args.s3_region_name: s3_region_name = args.s3_region_name if args.s3_address: s3_address = args.s3_address if args.s3_secure: s3_secure = args.s3_secure if args.s3_bucket_name: s3_bucket_name = args.s3_bucket_name if args.keyname: s3_key = args.keyname redis_key = args.keyname if args.redis_address: redis_address = args.redis_address if args.redis_password: redis_password = args.redis_password if args.redis_db: redis_db = args.redis_db if args.redis_expire: redis_expire = args.redis_expire if args.strike: strike = args.strike if args.contract_type: contract_type = args.contract_type if args.get_pricing: get_pricing = args.get_pricing == '1' if args.get_news: get_news = args.get_news == '1' if args.get_options: get_options = args.get_options == '1' if args.s3_enabled: s3_enabled = args.s3_enabled == '1' if args.redis_enabled: redis_enabled = args.redis_enabled == '1' if args.fetch_mode: fetch_mode = str(args.fetch_mode).lower() if args.analysis_type: analysis_type = str(args.analysis_type).lower() if args.celery_enabled: run_offline = False if args.backfill_date: backfill_date = args.backfill_date if args.debug: debug = True work = api_requests.build_get_new_pricing_request() work['ticker'] = ticker work['ticker_id'] = ticker_id work['s3_bucket'] = s3_bucket_name work['s3_key'] = s3_key work['redis_key'] = redis_key work['strike'] = strike work['contract'] = contract_type work['exp_date'] = exp_date_str work['s3_access_key'] = s3_access_key work['s3_secret_key'] = s3_secret_key work['s3_region_name'] = s3_region_name work['s3_address'] = s3_address work['s3_secure'] = s3_secure work['redis_address'] = redis_address work['redis_password'] = redis_password work['redis_db'] = redis_db work['redis_expire'] = redis_expire work['get_pricing'] = get_pricing work['get_news'] = get_news work['get_options'] = get_options work['s3_enabled'] = s3_enabled work['redis_enabled'] = redis_enabled work['fetch_mode'] = fetch_mode work['analysis_type'] = analysis_type work['iex_datasets'] = iex_consts.DEFAULT_FETCH_DATASETS work['backfill_date'] = backfill_date work['debug'] = debug work['label'] = f'ticker={ticker}' if analysis_type == 'scn': label = f'screener={work["ticker"]}' fv_urls = [] if args.urls: fv_urls = str(args.urls).split('|') if len(fv_urls) == 0: fv_urls = os.getenv('SCREENER_URLS', []).split('|') screener_req = api_requests.build_screener_analysis_request( ticker=ticker, fv_urls=fv_urls, label=label) work.update(screener_req) start_screener_analysis(req=work) # end of analysis_type else: last_close_date = ae_utils.last_close() last_close_str = last_close_date.strftime(ae_consts.COMMON_DATE_FORMAT) cache_base_key = f'{ticker}_{last_close_str}' if not args.keyname: work['s3_key'] = cache_base_key work['redis_key'] = cache_base_key path_to_tasks = 'analysis_engine.work_tasks' task_name = (f'{path_to_tasks}' f'.get_new_pricing_data.get_new_pricing_data') task_res = None if ae_consts.is_celery_disabled() or run_offline: work['celery_disabled'] = True work['verbose'] = debug log.debug(f'starting without celery work={ae_consts.ppj(work)} ' f'offline={run_offline}') task_res = task_pricing.get_new_pricing_data(work) status_str = ae_consts.get_status(status=task_res['status']) cur_date = backfill_date if not backfill_date: cur_date = ae_utils.get_last_close_str() redis_arr = work["redis_address"].split(':') include_results = '' if debug: include_results = task_res['rec'] if task_res['status'] == ae_consts.SUCCESS: if task_res['rec']['num_success'] == 0: log.error(f'failed fetching ticker={work["ticker"]} ' f'from {fetch_mode} - please check the ' 'environment variables') else: log.info(f'done fetching ticker={work["ticker"]} ' f'mode={fetch_mode} ' f'status={status_str} ' f'err={task_res["err"]} {include_results}') print('View keys in redis with:\n' f'redis-cli -h {redis_arr[0]} ' 'keys ' f'"{work["ticker"]}_{cur_date}*"') elif task_res['status'] == ae_consts.MISSING_TOKEN: print('Set an IEX or Tradier token: ' '\n' ' export IEX_TOKEN=YOUR_IEX_TOKEN\n' ' export TD_TOKEN=YOUR_TD_TOKEN\n') else: log.error(f'done fetching ticker={work["ticker"]} ' f'mode={fetch_mode} ' f'status={status_str} ' f'err={task_res["err"]}') # if/else debug else: log.debug(f'connecting to broker={broker_url} ' f'backend={backend_url}') # Get the Celery app app = get_celery_app.get_celery_app( name=__name__, auth_url=broker_url, backend_url=backend_url, path_to_config_module=celery_config_module, ssl_options=ssl_options, transport_options=transport_options, include_tasks=include_tasks) log.debug(f'calling task={task_name} - work={ae_consts.ppj(work)}') job_id = app.send_task(task_name, (work, )) log.debug(f'task={task_name} - job_id={job_id}')
def run_algo( ticker=None, tickers=None, algo=None, # optional derived ``analysis_engine.algo.Algo`` instance balance=None, # float starting base capital commission=None, # float for single trade commission for buy or sell start_date=None, # string YYYY-MM-DD HH:MM:SS end_date=None, # string YYYY-MM-DD HH:MM:SS datasets=None, # string list of identifiers num_owned_dict=None, # not supported cache_freq='daily', # 'minute' not supported auto_fill=True, load_config=None, report_config=None, history_config=None, extract_config=None, use_key=None, extract_mode='all', iex_datasets=None, redis_enabled=True, redis_address=None, redis_db=None, redis_password=None, redis_expire=None, redis_key=None, s3_enabled=True, s3_address=None, s3_bucket=None, s3_access_key=None, s3_secret_key=None, s3_region_name=None, s3_secure=False, s3_key=None, celery_disabled=True, broker_url=None, result_backend=None, label=None, name=None, timeseries=None, trade_strategy=None, verbose=False, publish_to_slack=True, publish_to_s3=True, publish_to_redis=True, extract_datasets=None, config_file=None, config_dict=None, version=1, raise_on_err=True, **kwargs): """run_algo Run an algorithm with steps: 1) Extract redis keys between dates 2) Compile a data pipeline dictionary (call it ``data``) 3) Call algorithm's ``myalgo.handle_data(data=data)`` .. note:: If no ``algo`` is set, the ``analysis_engine.algo.BaseAlgo`` algorithm is used. .. note:: Please ensure Redis and Minio are running before trying to extract tickers **Stock tickers to extract** :param ticker: single stock ticker/symbol/ETF to extract :param tickers: optional - list of tickers to extract :param use_key: optional - extract historical key from Redis **Algo Configuration** :param algo: derived instance of ``analysis_engine.algo.Algo`` object :param balance: optional - float balance parameter can also be set on the ``algo`` object if not set on the args :param commission: float for single trade commission for buy or sell. can also be set on the ``algo`` objet :param start_date: string ``YYYY-MM-DD_HH:MM:SS`` cache value :param end_date: string ``YYYY-MM-DD_HH:MM:SS`` cache value :param dataset_types: list of strings that are ``iex`` or ``yahoo`` datasets that are cached. :param cache_freq: optional - depending on if you are running data feeds on a ``daily`` cron (default) vs every ``minute`` (or faster) :param num_owned_dict: not supported yet :param auto_fill: optional - boolean for auto filling buy/sell orders for backtesting (default is ``True``) :param trading_calendar: ``trading_calendar.TradingCalendar`` object, by default ``analysis_engine.calendars. always_open.AlwaysOpen`` trading calendar # TradingCalendar by ``TFSExchangeCalendar`` :param config_file: path to a json file containing custom algorithm object member values (like indicator configuration and predict future date units ahead for a backtest) :param config_dict: optional - dictionary that can be passed to derived class implementations of: ``def load_from_config(config_dict=config_dict)`` **Timeseries** :param timeseries: optional - string to set ``day`` or ``minute`` backtesting or live trading (default is ``minute``) **Trading Strategy** :param trade_strategy: optional - string to set the type of ``Trading Strategy`` for backtesting or live trading (default is ``count``) **Algorithm Dataset Loading, Extracting, Reporting and Trading History arguments** :param load_config: optional - dictionary for setting member variables to load an agorithm-ready dataset from a file, s3 or redis :param report_config: optional - dictionary for setting member variables to publish an algo ``trading performance report`` to s3, redis, a file or slack :param history_config: optional - dictionary for setting member variables to publish an algo ``trade history`` to s3, redis, a file or slack :param extract_config: optional - dictionary for setting member variables to publish an algo ``trading performance report`` to s3, redis, a file or slack **(Optional) Data sources, datafeeds and datasets to gather** :param iex_datasets: list of strings for gathering specific `IEX datasets <https://iexcloud.io/>`__ which are set as consts: ``analysis_engine.iex.consts.FETCH_*``. **(Optional) Redis connectivity arguments** :param redis_enabled: bool - toggle for auto-caching all datasets in Redis (default is ``True``) :param redis_address: Redis connection string format is ``host:port`` (default is ``localhost:6379``) :param redis_db: Redis db to use (default is ``0``) :param redis_password: optional - Redis password (default is ``None``) :param redis_expire: optional - Redis expire value (default is ``None``) :param redis_key: optional - redis key not used (default is ``None``) **(Optional) Minio (S3) connectivity arguments** :param s3_enabled: bool - toggle for auto-archiving on Minio (S3) (default is ``True``) :param s3_address: Minio S3 connection string format ``host:port`` (default is ``localhost:9000``) :param s3_bucket: S3 Bucket for storing the artifacts (default is ``dev``) which should be viewable on a browser: http://localhost:9000/minio/dev/ :param s3_access_key: S3 Access key (default is ``trexaccesskey``) :param s3_secret_key: S3 Secret key (default is ``trex123321``) :param s3_region_name: S3 region name (default is ``us-east-1``) :param s3_secure: Transmit using tls encryption (default is ``False``) :param s3_key: optional s3 key not used (default is ``None``) **(Optional) Celery worker broker connectivity arguments** :param celery_disabled: bool - toggle synchronous mode or publish to an engine connected to the `Celery broker and backend <https://github.com/celery/celery#transports-and-backends>`__ (default is ``True`` - synchronous mode without an engine or need for a broker or backend for Celery) :param broker_url: Celery broker url (default is ``redis://0.0.0.0:6379/13``) :param result_backend: Celery backend url (default is ``redis://0.0.0.0:6379/14``) :param label: tracking log label :param publish_to_slack: optional - boolean for publishing to slack (coming soon) :param publish_to_s3: optional - boolean for publishing to s3 (coming soon) :param publish_to_redis: optional - boolean for publishing to redis (coming soon) **(Optional) Debugging** :param verbose: bool - show extract warnings and other debug logging (default is False) :param raise_on_err: optional - boolean for unittests and developing algorithms with the ``analysis_engine.run_algo.run_algo`` helper. When set to ``True`` exceptions will are raised to the calling functions :param kwargs: keyword arguments dictionary """ # dictionary structure with a list sorted on: ascending dates # algo_data_req[ticker][list][dataset] = pd.DataFrame algo_data_req = {} extract_requests = [] return_algo = False # return created algo objects for use by caller rec = {} msg = None use_tickers = tickers use_balance = balance use_commission = commission if ticker: use_tickers = [ticker] else: if not use_tickers: use_tickers = [] # if these are not set as args, but the algo object # has them, use them instead: if algo: if len(use_tickers) == 0: use_tickers = algo.get_tickers() if not use_balance: use_balance = algo.get_balance() if not use_commission: use_commission = algo.get_commission() default_iex_datasets = [ 'daily', 'minute', 'quote', 'stats', 'peers', 'news', 'financials', 'earnings', 'dividends', 'company' ] if not iex_datasets: iex_datasets = default_iex_datasets if redis_enabled: if not redis_address: redis_address = os.getenv('REDIS_ADDRESS', 'localhost:6379') if not redis_password: redis_password = os.getenv('REDIS_PASSWORD', None) if not redis_db: redis_db = int(os.getenv('REDIS_DB', '0')) if not redis_expire: redis_expire = os.getenv('REDIS_EXPIRE', None) if s3_enabled: if not s3_address: s3_address = os.getenv('S3_ADDRESS', 'localhost:9000') if not s3_access_key: s3_access_key = os.getenv('AWS_ACCESS_KEY_ID', 'trexaccesskey') if not s3_secret_key: s3_secret_key = os.getenv('AWS_SECRET_ACCESS_KEY', 'trex123321') if not s3_region_name: s3_region_name = os.getenv('AWS_DEFAULT_REGION', 'us-east-1') if not s3_secure: s3_secure = os.getenv('S3_SECURE', '0') == '1' if not s3_bucket: s3_bucket = os.getenv('S3_BUCKET', 'dev') if not broker_url: broker_url = os.getenv('WORKER_BROKER_URL', 'redis://0.0.0.0:6379/11') if not result_backend: result_backend = os.getenv('WORKER_BACKEND_URL', 'redis://0.0.0.0:6379/12') if not label: label = 'run-algo' num_tickers = len(use_tickers) last_close_str = ae_utils.get_last_close_str() if iex_datasets: if verbose: log.info(f'{label} - tickers={num_tickers} ' f'iex={json.dumps(iex_datasets)}') else: if verbose: log.info(f'{label} - tickers={num_tickers}') ticker_key = use_key if not ticker_key: ticker_key = f'{ticker}_{last_close_str}' if not algo: algo = base_algo.BaseAlgo(ticker=None, tickers=use_tickers, balance=use_balance, commission=use_commission, config_dict=config_dict, name=label, auto_fill=auto_fill, timeseries=timeseries, trade_strategy=trade_strategy, publish_to_slack=publish_to_slack, publish_to_s3=publish_to_s3, publish_to_redis=publish_to_redis, raise_on_err=raise_on_err) return_algo = True # the algo object is stored # in the result at: res['rec']['algo'] if not algo: msg = f'{label} - missing algo object' log.error(msg) return build_result.build_result(status=ae_consts.EMPTY, err=msg, rec=rec) if raise_on_err: log.debug(f'{label} - enabling algo exception raises') algo.raise_on_err = True indicator_datasets = algo.get_indicator_datasets() if len(indicator_datasets) == 0: indicator_datasets = ae_consts.BACKUP_DATASETS log.info(f'using all datasets={indicator_datasets}') verbose_extract = False if config_dict: verbose_extract = config_dict.get('verbose_extract', False) common_vals = {} common_vals['base_key'] = ticker_key common_vals['celery_disabled'] = celery_disabled common_vals['ticker'] = ticker common_vals['label'] = label common_vals['iex_datasets'] = iex_datasets common_vals['s3_enabled'] = s3_enabled common_vals['s3_bucket'] = s3_bucket common_vals['s3_address'] = s3_address common_vals['s3_secure'] = s3_secure common_vals['s3_region_name'] = s3_region_name common_vals['s3_access_key'] = s3_access_key common_vals['s3_secret_key'] = s3_secret_key common_vals['s3_key'] = ticker_key common_vals['redis_enabled'] = redis_enabled common_vals['redis_address'] = redis_address common_vals['redis_password'] = redis_password common_vals['redis_db'] = redis_db common_vals['redis_key'] = ticker_key common_vals['redis_expire'] = redis_expire use_start_date_str = start_date use_end_date_str = end_date last_close_date = ae_utils.last_close() end_date_val = None cache_freq_fmt = ae_consts.COMMON_TICK_DATE_FORMAT if not use_end_date_str: use_end_date_str = last_close_date.strftime(cache_freq_fmt) end_date_val = ae_utils.get_date_from_str(date_str=use_end_date_str, fmt=cache_freq_fmt) start_date_val = None if not use_start_date_str: start_date_val = end_date_val - datetime.timedelta(days=60) use_start_date_str = start_date_val.strftime(cache_freq_fmt) else: start_date_val = datetime.datetime.strptime( use_start_date_str, ae_consts.COMMON_TICK_DATE_FORMAT) total_dates = (end_date_val - start_date_val).days if end_date_val < start_date_val: msg = ( f'{label} - invalid dates - start_date={start_date_val} is after ' f'end_date={end_date_val}') raise Exception(msg) if verbose: log.info(f'{label} - days={total_dates} ' f'start={use_start_date_str} ' f'end={use_end_date_str} ' f'datasets={indicator_datasets}') for ticker in use_tickers: req = algo_utils.build_algo_request(ticker=ticker, use_key=use_key, start_date=use_start_date_str, end_date=use_end_date_str, datasets=datasets, balance=use_balance, cache_freq=cache_freq, timeseries=timeseries, trade_strategy=trade_strategy, label=label) ticker_key = f'{ticker}_{last_close_str}' common_vals['ticker'] = ticker common_vals['base_key'] = ticker_key common_vals['redis_key'] = ticker_key common_vals['s3_key'] = ticker_key for date_key in req['extract_datasets']: date_req = api_requests.get_ds_dict(ticker=ticker, base_key=date_key, ds_id=label, service_dict=common_vals) node_date_key = date_key.replace(f'{ticker}_', '') extract_requests.append({ 'id': date_key, 'ticker': ticker, 'date_key': date_key, 'date': node_date_key, 'req': date_req }) # end of for all ticker in use_tickers first_extract_date = None last_extract_date = None total_extract_requests = len(extract_requests) cur_idx = 1 for idx, extract_node in enumerate(extract_requests): extract_ticker = extract_node['ticker'] extract_date = extract_node['date'] ds_node_id = extract_node['id'] if not first_extract_date: first_extract_date = extract_date last_extract_date = extract_date perc_progress = ae_consts.get_percent_done( progress=cur_idx, total=total_extract_requests) percent_label = (f'{label} ' f'ticker={extract_ticker} ' f'date={extract_date} ' f'{perc_progress} ' f'{idx}/{total_extract_requests} ' f'{indicator_datasets}') if verbose: log.info(f'extracting - {percent_label}') ticker_bt_data = build_ds_node.build_dataset_node( ticker=extract_ticker, date=extract_date, service_dict=common_vals, datasets=indicator_datasets, log_label=label, verbose=verbose_extract) if ticker not in algo_data_req: algo_data_req[ticker] = [] algo_data_req[ticker].append({ 'id': ds_node_id, # id is currently the cache key in redis 'date': extract_date, # used to confirm dates in asc order 'data': ticker_bt_data }) if verbose: log.info(f'extract - {percent_label} ' f'dataset={len(algo_data_req[ticker])}') cur_idx += 1 # end of for service_dict in extract_requests # this could be a separate celery task status = ae_consts.NOT_RUN if len(algo_data_req) == 0: msg = (f'{label} - nothing to test - no data found for ' f'tickers={use_tickers} ' f'between {first_extract_date} and {last_extract_date}') log.info(msg) return build_result.build_result(status=ae_consts.EMPTY, err=msg, rec=rec) # this could be a separate celery task try: if verbose: log.info(f'handle_data START - {percent_label} from ' f'{first_extract_date} to {last_extract_date}') algo.handle_data(data=algo_data_req) if verbose: log.info(f'handle_data END - {percent_label} from ' f'{first_extract_date} to {last_extract_date}') except Exception as e: a_name = algo.get_name() a_debug_msg = algo.get_debug_msg() if not a_debug_msg: a_debug_msg = 'debug message not set' a_config_dict = ae_consts.ppj(algo.config_dict) msg = (f'{percent_label} - algo={a_name} ' f'encountered exception in handle_data tickers={use_tickers} ' f'from {first_extract_date} to {last_extract_date} ex={e} ' f'and failed during operation: {a_debug_msg}') if raise_on_err: if algo: try: ind_obj = \ algo.get_indicator_process_last_indicator() if ind_obj: ind_obj_path = ind_obj.get_path_to_module() ind_obj_config = ae_consts.ppj(ind_obj.get_config()) found_error_hint = False if hasattr(ind_obj.use_df, 'to_json'): if len(ind_obj.use_df.index) == 0: log.critical( f'indicator failure report for ' f'last module: ' f'{ind_obj_path} ' f'indicator={ind_obj.get_name()} ' f'config={ind_obj_config} ' f'dataset={ind_obj.use_df.head(5)} ' f'name_of_dataset={ind_obj.uses_data}') log.critical( '--------------------------------------' '--------------------------------------') log.critical('Please check if this indicator: ' f'{ind_obj_path} ' 'supports Empty Dataframes') log.critical( '--------------------------------------' '--------------------------------------') found_error_hint = True # indicator error hints if not found_error_hint: log.critical( f'indicator failure report for last module: ' f'{ind_obj_path} ' f'indicator={ind_obj.get_name()} ' f'config={ind_obj_config} ' f'dataset={ind_obj.use_df.head(5)} ' f'name_of_dataset={ind_obj.uses_data}') except Exception as f: log.critical(f'failed to pull indicator processor ' f'last indicator for debugging ' f'from ex={e} with parsing ex={f}') # end of ignoring non-supported ways of creating # indicator processors log.error(msg) log.error(f'algo failure report: ' f'algo={a_name} handle_data() ' f'config={a_config_dict} ') log.critical(f'algo failed during operation: {a_debug_msg}') raise e else: log.error(msg) return build_result.build_result(status=ae_consts.ERR, err=msg, rec=rec) # end of try/ex # this could be a separate celery task try: if verbose: log.info(f'get_result START - {percent_label} from ' f'{first_extract_date} to {last_extract_date}') rec = algo.get_result() status = ae_consts.SUCCESS if verbose: log.info(f'get_result END - {percent_label} from ' f'{first_extract_date} to {last_extract_date}') except Exception as e: msg = ( f'{percent_label} - algo={algo.get_name()} encountered exception ' f'in get_result tickers={use_tickers} from ' f'{first_extract_date} to {last_extract_date} ex={e}') if raise_on_err: if algo: log.error(f'algo={algo.get_name()} failed in get_result with ' f'debug_msg={algo.get_debug_msg()}') log.error(msg) raise e else: log.error(msg) return build_result.build_result(status=ae_consts.ERR, err=msg, rec=rec) # end of try/ex if return_algo: rec['algo'] = algo return build_result.build_result(status=status, err=msg, rec=rec)
def get_new_pricing_data(self, work_dict): """get_new_pricing_data Get Ticker information on: - prices - turn off with ``work_dict.get_pricing = False`` - news - turn off with ``work_dict.get_news = False`` - options - turn off with ``work_dict.get_options = False`` :param work_dict: dictionary for key/values """ label = 'get_new_pricing_data' log.debug(f'task - {label} - start ' f'work_dict={work_dict}') num_success = 0 ticker = ae_consts.TICKER ticker_id = ae_consts.TICKER_ID rec = { 'pricing': None, 'options': None, 'calls': None, 'puts': None, 'news': None, 'daily': None, 'minute': None, 'quote': None, 'stats': None, 'peers': None, 'iex_news': None, 'financials': None, 'earnings': None, 'dividends': None, 'company': None, 'exp_date': None, 'publish_pricing_update': None, 'num_success': num_success, 'date': ae_utils.utc_now_str(), 'updated': None, 'version': ae_consts.DATASET_COLLECTION_VERSION } res = {'status': ae_consts.NOT_RUN, 'err': None, 'rec': rec} try: ticker = work_dict.get('ticker', ticker) ticker_id = work_dict.get('ticker_id', ae_consts.TICKER_ID) s3_bucket = work_dict.get('s3_bucket', ae_consts.S3_BUCKET) s3_key = work_dict.get('s3_key', ae_consts.S3_KEY) redis_key = work_dict.get('redis_key', ae_consts.REDIS_KEY) exp_date = work_dict.get('exp_date', None) cur_date = ae_utils.last_close() cur_strike = work_dict.get('strike', None) contract_type = str(work_dict.get('contract', 'C')).upper() label = work_dict.get('label', label) iex_datasets = work_dict.get('iex_datasets', iex_consts.DEFAULT_FETCH_DATASETS) td_datasets = work_dict.get('td_datasets', td_consts.DEFAULT_FETCH_DATASETS_TD) fetch_mode = work_dict.get('fetch_mode', ae_consts.FETCH_MODE_ALL) iex_token = work_dict.get('iex_token', iex_consts.IEX_TOKEN) td_token = work_dict.get('td_token', td_consts.TD_TOKEN) str_fetch_mode = str(fetch_mode).lower() # control flags to deal with feed issues: get_iex_data = True get_td_data = True if (fetch_mode == ae_consts.FETCH_MODE_ALL or str_fetch_mode == 'initial'): get_iex_data = True get_td_data = True iex_datasets = ae_consts.IEX_INITIAL_DATASETS elif (fetch_mode == ae_consts.FETCH_MODE_ALL or str_fetch_mode == 'all'): get_iex_data = True get_td_data = True iex_datasets = ae_consts.IEX_DATASETS_DEFAULT elif (fetch_mode == ae_consts.FETCH_MODE_YHO or str_fetch_mode == 'yahoo'): get_iex_data = False get_td_data = False elif (fetch_mode == ae_consts.FETCH_MODE_IEX or str_fetch_mode == 'iex-all'): get_iex_data = True get_td_data = False iex_datasets = ae_consts.IEX_DATASETS_DEFAULT elif (fetch_mode == ae_consts.FETCH_MODE_IEX or str_fetch_mode == 'iex'): get_iex_data = True get_td_data = False iex_datasets = ae_consts.IEX_INTRADAY_DATASETS elif (fetch_mode == ae_consts.FETCH_MODE_INTRADAY or str_fetch_mode == 'intra'): get_iex_data = True get_td_data = True iex_datasets = ae_consts.IEX_INTRADAY_DATASETS elif (fetch_mode == ae_consts.FETCH_MODE_DAILY or str_fetch_mode == 'daily'): get_iex_data = True get_td_data = False iex_datasets = ae_consts.IEX_DAILY_DATASETS elif (fetch_mode == ae_consts.FETCH_MODE_WEEKLY or str_fetch_mode == 'weekly'): get_iex_data = True get_td_data = False iex_datasets = ae_consts.IEX_WEEKLY_DATASETS elif (fetch_mode == ae_consts.FETCH_MODE_TD or str_fetch_mode == 'td'): get_iex_data = False get_td_data = True else: get_iex_data = False get_td_data = False fetch_arr = str_fetch_mode.split(',') found_fetch = False iex_datasets = [] for fetch_name in fetch_arr: if fetch_name not in iex_datasets: if fetch_name == 'iex_min': iex_datasets.append('minute') elif fetch_name == 'iex_day': iex_datasets.append('daily') elif fetch_name == 'iex_quote': iex_datasets.append('quote') elif fetch_name == 'iex_stats': iex_datasets.append('stats') elif fetch_name == 'iex_peers': iex_datasets.append('peers') elif fetch_name == 'iex_news': iex_datasets.append('news') elif fetch_name == 'iex_fin': iex_datasets.append('financials') elif fetch_name == 'iex_earn': iex_datasets.append('earnings') elif fetch_name == 'iex_div': iex_datasets.append('dividends') elif fetch_name == 'iex_comp': iex_datasets.append('company') elif fetch_name == 'td': get_td_data = True else: log.warn('unsupported IEX dataset ' f'{fetch_name}') found_fetch = (len(iex_datasets) != 0) if not found_fetch: log.error(f'{label} - unsupported ' f'fetch_mode={fetch_mode} value') else: get_iex_data = True log.debug(f'{label} - ' f'fetching={len(iex_datasets)} ' f'{iex_datasets} ' f'fetch_mode={fetch_mode}') # end of screening custom fetch_mode settings num_tokens = 0 if get_iex_data: if not iex_token: log.warn(f'{label} - ' 'please set a valid IEX Cloud Account token (' 'https://iexcloud.io/cloud-login/#/register' ') to fetch data from IEX Cloud. It must be ' 'set as an environment variable like: ' 'export IEX_TOKEN=<token>') get_iex_data = False else: num_tokens += 1 # sanity check - disable IEX fetch if the token is not set if get_td_data: missing_td_token = [ 'MISSING_TD_TOKEN', 'SETYOURTDTOKEN', 'SETYOURTRADIERTOKENHERE' ] if td_token in missing_td_token: log.warn(f'{label} - ' 'please set a valid Tradier Account token (' 'https://developer.tradier.com/user/sign_up' ') to fetch pricing data from Tradier. It must be ' 'set as an environment variable like: ' 'export TD_TOKEN=<token>') get_td_data = False else: num_tokens += 1 # sanity check - disable Tradier fetch if the token is not set """ as of Thursday, Jan. 3, 2019: https://developer.yahoo.com/yql/ Important EOL Notice: As of Thursday, Jan. 3, 2019 the YQL service at query.yahooapis.com will be retired """ get_yahoo_data = False if (not get_iex_data and not get_td_data and not get_yahoo_data): err = None if num_tokens == 0: res['status'] = ae_consts.MISSING_TOKEN err = (f'Please set a valid IEX_TOKEN or TD_TOKEN ' f'environment variable') else: err = (f'Please set at least one supported datafeed from ' f'either: ' f'IEX Cloud (fetch -t TICKER -g iex) or ' f'Tradier (fetch -t TICKER -g td) ' f'for ' f'ticker={ticker} ' f'cur_date={cur_date} ' f'IEX enabled={get_iex_data} ' f'TD enabled={get_td_data} ' f'YHO enabled={get_yahoo_data}') res['status'] = ae_consts.ERR res['err'] = err return get_task_results.get_task_results(work_dict=work_dict, result=res) # end of checking that there is at least 1 feed on if not exp_date: exp_date = opt_dates.option_expiration(date=exp_date) else: exp_date = datetime.datetime.strptime(exp_date, '%Y-%m-%d') rec['updated'] = cur_date.strftime('%Y-%m-%d %H:%M:%S') log.debug(f'{label} getting pricing for ticker={ticker} ' f'cur_date={cur_date} exp_date={exp_date} ' f'IEX={get_iex_data} ' f'TD={get_td_data} ' f'YHO={get_yahoo_data}') yahoo_rec = { 'ticker': ticker, 'pricing': None, 'options': None, 'calls': None, 'puts': None, 'news': None, 'exp_date': None, 'publish_pricing_update': None, 'date': None, 'updated': None } # disabled on 2019-01-03 if get_yahoo_data: log.debug(f'{label} YHO ticker={ticker}') yahoo_res = yahoo_data.get_data_from_yahoo(work_dict=work_dict) status_str = ae_consts.get_status(status=yahoo_res['status']) if yahoo_res['status'] == ae_consts.SUCCESS: yahoo_rec = yahoo_res['rec'] msg = (f'{label} YHO ticker={ticker} ' f'status={status_str} err={yahoo_res["err"]}') if ae_consts.ev('SHOW_SUCCESS', '0') == '1': log.info(msg) else: log.debug(msg) rec['pricing'] = yahoo_rec.get('pricing', '{}') rec['news'] = yahoo_rec.get('news', '{}') rec['options'] = yahoo_rec.get('options', '{}') rec['calls'] = rec['options'].get('calls', ae_consts.EMPTY_DF_STR) rec['puts'] = rec['options'].get('puts', ae_consts.EMPTY_DF_STR) num_success += 1 else: log.error(f'{label} failed YHO ticker={ticker} ' f'status={status_str} err={yahoo_res["err"]}') # end of get from yahoo if get_iex_data: num_iex_ds = len(iex_datasets) log.debug(f'{label} IEX datasets={num_iex_ds}') for idx, ft_type in enumerate(iex_datasets): dataset_field = iex_consts.get_ft_str(ft_type=ft_type) log.debug(f'{label} IEX={idx}/{num_iex_ds} ' f'field={dataset_field} ticker={ticker}') iex_label = f'{label}-{dataset_field}' iex_req = copy.deepcopy(work_dict) iex_req['label'] = iex_label iex_req['ft_type'] = ft_type iex_req['field'] = dataset_field iex_req['ticker'] = ticker iex_res = iex_data.get_data_from_iex(work_dict=iex_req) status_str = (ae_consts.get_status(status=iex_res['status'])) if iex_res['status'] == ae_consts.SUCCESS: iex_rec = iex_res['rec'] msg = (f'{label} IEX ticker={ticker} ' f'field={dataset_field} ' f'status={status_str} ' f'err={iex_res["err"]}') if ae_consts.ev('SHOW_SUCCESS', '0') == '1': log.info(msg) else: log.debug(msg) if dataset_field == 'news': rec['iex_news'] = iex_rec['data'] else: rec[dataset_field] = iex_rec['data'] num_success += 1 else: log.debug(f'{label} failed IEX ticker={ticker} ' f'field={dataset_field} ' f'status={status_str} err={iex_res["err"]}') # end of if/else succcess # end idx, ft_type in enumerate(iex_datasets): # end of if get_iex_data if get_td_data: num_td_ds = len(td_datasets) log.debug(f'{label} TD datasets={num_td_ds}') for idx, ft_type in enumerate(td_datasets): dataset_field = td_consts.get_ft_str_td(ft_type=ft_type) log.debug(f'{label} TD={idx}/{num_td_ds} ' f'field={dataset_field} ticker={ticker}') td_label = (f'{label}-{dataset_field}') td_req = copy.deepcopy(work_dict) td_req['label'] = td_label td_req['ft_type'] = ft_type td_req['field'] = dataset_field td_req['ticker'] = ticker td_res = td_data.get_data_from_td(work_dict=td_req) status_str = (ae_consts.get_status(status=td_res['status'])) if td_res['status'] == ae_consts.SUCCESS: td_rec = td_res['rec'] msg = (f'{label} TD ticker={ticker} ' f'field={dataset_field} ' f'status={status_str} ' f'err={td_res["err"]}') if ae_consts.ev('SHOW_SUCCESS', '0') == '1': log.info(msg) else: log.debug(msg) if dataset_field == 'tdcalls': rec['tdcalls'] = td_rec['data'] if dataset_field == 'tdputs': rec['tdputs'] = td_rec['data'] else: rec[dataset_field] = td_rec['data'] num_success += 1 else: log.critical(f'{label} failed TD ticker={ticker} ' f'field={dataset_field} ' f'status={status_str} err={td_res["err"]}') # end of if/else succcess # end idx, ft_type in enumerate(td_datasets): # end of if get_td_data rec['num_success'] = num_success update_req = {'data': rec} update_req['ticker'] = ticker update_req['ticker_id'] = ticker_id update_req['strike'] = cur_strike update_req['contract'] = contract_type update_req['s3_enabled'] = work_dict.get('s3_enabled', ae_consts.ENABLED_S3_UPLOAD) update_req['redis_enabled'] = work_dict.get( 'redis_enabled', ae_consts.ENABLED_REDIS_PUBLISH) update_req['s3_bucket'] = s3_bucket update_req['s3_key'] = s3_key update_req['s3_access_key'] = work_dict.get('s3_access_key', ae_consts.S3_ACCESS_KEY) update_req['s3_secret_key'] = work_dict.get('s3_secret_key', ae_consts.S3_SECRET_KEY) update_req['s3_region_name'] = work_dict.get('s3_region_name', ae_consts.S3_REGION_NAME) update_req['s3_address'] = work_dict.get('s3_address', ae_consts.S3_ADDRESS) update_req['s3_secure'] = work_dict.get('s3_secure', ae_consts.S3_SECURE) update_req['redis_key'] = redis_key update_req['redis_address'] = work_dict.get('redis_address', ae_consts.REDIS_ADDRESS) update_req['redis_password'] = work_dict.get('redis_password', ae_consts.REDIS_PASSWORD) update_req['redis_db'] = int( work_dict.get('redis_db', ae_consts.REDIS_DB)) update_req['redis_expire'] = work_dict.get('redis_expire', ae_consts.REDIS_EXPIRE) update_req['updated'] = rec['updated'] update_req['label'] = label update_req['celery_disabled'] = True update_status = ae_consts.NOT_SET try: update_res = publisher.run_publish_pricing_update( work_dict=update_req) update_status = update_res.get('status', ae_consts.NOT_SET) status_str = ae_consts.get_status(status=update_status) if ae_consts.ev('DEBUG_RESULTS', '0') == '1': log.debug(f'{label} update_res ' f'status={status_str} ' f'data={ae_consts.ppj(update_res)}') else: log.debug(f'{label} run_publish_pricing_update ' f'status={status_str}') # end of if/else rec['publish_pricing_update'] = update_res res = build_result.build_result(status=ae_consts.SUCCESS, err=None, rec=rec) except Exception as f: err = (f'{label} publisher.run_publish_pricing_update failed ' f'with ex={f}') log.error(err) res = build_result.build_result(status=ae_consts.ERR, err=err, rec=rec) # end of trying to publish results to connected services except Exception as e: res = build_result.build_result(status=ae_consts.ERR, err=('failed - get_new_pricing_data ' f'dict={work_dict} with ex={e}'), rec=rec) log.error(f'{label} - {res["err"]}') # end of try/ex if ae_consts.ev('DATASET_COLLECTION_SLACK_ALERTS', '0') == '1': env_name = 'DEV' if ae_consts.ev('PROD_SLACK_ALERTS', '1') == '1': env_name = 'PROD' done_msg = (f'Dataset collected ticker=*{ticker}* on ' f'env=*{env_name}* ' f'redis_key={redis_key} s3_key={s3_key} ' f'IEX={get_iex_data} ' f'TD={get_td_data} ' f'YHO={get_yahoo_data}') log.debug(f'{label} sending slack msg={done_msg}') if res['status'] == ae_consts.SUCCESS: slack_utils.post_success(msg=done_msg, block=False, jupyter=True) else: slack_utils.post_failure(msg=done_msg, block=False, jupyter=True) # end of if/else success # end of publishing to slack log.debug('task - get_new_pricing_data done - ' f'{label} - status={ae_consts.get_status(res["status"])}') return get_task_results.get_task_results(work_dict=work_dict, result=res)
def fetch_new_stock_datasets(): """fetch_new_stock_datasets Collect all datasets for the ticker **SPY**: :: fetch_new_stock_datasets.py -t SPY .. note:: This requires the following services are listening on: - redis ``localhost:6379`` - minio ``localhost:9000`` """ log.info('start - fetch_new_stock_datasets') parser = argparse.ArgumentParser( description=('Download and store the latest stock pricing, ' 'news, and options chain data ' 'and store it in Minio (S3) and Redis. ' 'Also includes support for getting FinViz ' 'screener tickers')) parser.add_argument('-t', help=('ticker'), required=False, dest='ticker') parser.add_argument('-g', help=('optional - fetch mode: ' 'all = fetch from all data sources (default), ' 'td = fetch from just Tradier sources, ' 'iex = fetch from just IEX sources'), required=False, dest='fetch_mode') parser.add_argument('-i', help=('optional - ticker id ' 'not used without a database'), required=False, dest='ticker_id') parser.add_argument('-e', help=('optional - options expiration date'), required=False, dest='exp_date_str') parser.add_argument('-l', help=('optional - path to the log config file'), required=False, dest='log_config_path') parser.add_argument('-b', help=('optional - broker url for Celery'), required=False, dest='broker_url') parser.add_argument('-B', help=('optional - backend url for Celery'), required=False, dest='backend_url') parser.add_argument('-k', help=('optional - s3 access key'), required=False, dest='s3_access_key') parser.add_argument('-s', help=('optional - s3 secret key'), required=False, dest='s3_secret_key') parser.add_argument('-a', help=('optional - s3 address format: <host:port>'), required=False, dest='s3_address') parser.add_argument('-S', help=('optional - s3 ssl or not'), required=False, dest='s3_secure') parser.add_argument('-u', help=('optional - s3 bucket name'), required=False, dest='s3_bucket_name') parser.add_argument('-G', help=('optional - s3 region name'), required=False, dest='s3_region_name') parser.add_argument('-p', help=('optional - redis_password'), required=False, dest='redis_password') parser.add_argument('-r', help=('optional - redis_address format: <host:port>'), required=False, dest='redis_address') parser.add_argument('-n', help=('optional - redis and s3 key name'), required=False, dest='keyname') parser.add_argument( '-m', help=('optional - redis database number (0 by default)'), required=False, dest='redis_db') parser.add_argument('-x', help=('optional - redis expiration in seconds'), required=False, dest='redis_expire') parser.add_argument('-z', help=('optional - strike price'), required=False, dest='strike') parser.add_argument( '-c', help=('optional - contract type "C" for calls "P" for puts'), required=False, dest='contract_type') parser.add_argument( '-P', help=('optional - get pricing data if "1" or "0" disabled'), required=False, dest='get_pricing') parser.add_argument( '-N', help=('optional - get news data if "1" or "0" disabled'), required=False, dest='get_news') parser.add_argument( '-O', help=('optional - get options data if "1" or "0" disabled'), required=False, dest='get_options') parser.add_argument('-U', help=('optional - s3 enabled for publishing if "1" or ' '"0" is disabled'), required=False, dest='s3_enabled') parser.add_argument( '-R', help=('optional - redis enabled for publishing if "1" or ' '"0" is disabled'), required=False, dest='redis_enabled') parser.add_argument('-A', help=('optional - run an analysis ' 'supported modes: scn'), required=False, dest='analysis_type') parser.add_argument('-L', help=('optional - screener urls to pull ' 'tickers for analysis'), required=False, dest='urls') parser.add_argument( '-Z', help=('disable run without an engine for local testing and demos'), required=False, dest='celery_enabled', action='store_true') parser.add_argument('-d', help=('debug'), required=False, dest='debug', action='store_true') args = parser.parse_args() run_offline = True ticker = ae_consts.TICKER ticker_id = ae_consts.TICKER_ID fetch_mode = 'all' exp_date_str = ae_consts.NEXT_EXP_STR ssl_options = ae_consts.SSL_OPTIONS transport_options = ae_consts.TRANSPORT_OPTIONS broker_url = ae_consts.WORKER_BROKER_URL backend_url = ae_consts.WORKER_BACKEND_URL celery_config_module = ae_consts.WORKER_CELERY_CONFIG_MODULE include_tasks = ae_consts.INCLUDE_TASKS s3_access_key = ae_consts.S3_ACCESS_KEY s3_secret_key = ae_consts.S3_SECRET_KEY s3_region_name = ae_consts.S3_REGION_NAME s3_address = ae_consts.S3_ADDRESS s3_secure = ae_consts.S3_SECURE s3_bucket_name = ae_consts.S3_BUCKET s3_key = ae_consts.S3_KEY redis_address = ae_consts.REDIS_ADDRESS redis_key = ae_consts.REDIS_KEY redis_password = ae_consts.REDIS_PASSWORD redis_db = ae_consts.REDIS_DB redis_expire = ae_consts.REDIS_EXPIRE strike = None contract_type = None get_pricing = True get_news = True get_options = True s3_enabled = True redis_enabled = True analysis_type = None debug = False if args.ticker: ticker = args.ticker.upper() if args.ticker_id: ticker_id = args.ticker_id if args.exp_date_str: exp_date_str = ae_consts.NEXT_EXP_STR if args.broker_url: broker_url = args.broker_url if args.backend_url: backend_url = args.backend_url if args.s3_access_key: s3_access_key = args.s3_access_key if args.s3_secret_key: s3_secret_key = args.s3_secret_key if args.s3_region_name: s3_region_name = args.s3_region_name if args.s3_address: s3_address = args.s3_address if args.s3_secure: s3_secure = args.s3_secure if args.s3_bucket_name: s3_bucket_name = args.s3_bucket_name if args.keyname: s3_key = args.keyname redis_key = args.keyname if args.redis_address: redis_address = args.redis_address if args.redis_password: redis_password = args.redis_password if args.redis_db: redis_db = args.redis_db if args.redis_expire: redis_expire = args.redis_expire if args.strike: strike = args.strike if args.contract_type: contract_type = args.contract_type if args.get_pricing: get_pricing = args.get_pricing == '1' if args.get_news: get_news = args.get_news == '1' if args.get_options: get_options = args.get_options == '1' if args.s3_enabled: s3_enabled = args.s3_enabled == '1' if args.redis_enabled: redis_enabled = args.redis_enabled == '1' if args.fetch_mode: fetch_mode = str(args.fetch_mode).lower() if args.analysis_type: analysis_type = str(args.analysis_type).lower() if args.celery_enabled: run_offline = False if args.debug: debug = True work = api_requests.build_get_new_pricing_request() work['ticker'] = ticker work['ticker_id'] = ticker_id work['s3_bucket'] = s3_bucket_name work['s3_key'] = s3_key work['redis_key'] = redis_key work['strike'] = strike work['contract'] = contract_type work['exp_date'] = exp_date_str work['s3_access_key'] = s3_access_key work['s3_secret_key'] = s3_secret_key work['s3_region_name'] = s3_region_name work['s3_address'] = s3_address work['s3_secure'] = s3_secure work['redis_address'] = redis_address work['redis_password'] = redis_password work['redis_db'] = redis_db work['redis_expire'] = redis_expire work['get_pricing'] = get_pricing work['get_news'] = get_news work['get_options'] = get_options work['s3_enabled'] = s3_enabled work['redis_enabled'] = redis_enabled work['fetch_mode'] = fetch_mode work['analysis_type'] = analysis_type work['iex_datasets'] = iex_consts.DEFAULT_FETCH_DATASETS work['debug'] = debug work['label'] = 'ticker={}'.format(ticker) if analysis_type == 'scn': label = 'screener={}'.format(work['ticker']) fv_urls = [] if args.urls: fv_urls = str(args.urls).split('|') if len(fv_urls) == 0: fv_urls = os.getenv('SCREENER_URLS', []).split('|') screener_req = api_requests.build_screener_analysis_request( ticker=ticker, fv_urls=fv_urls, label=label) work.update(screener_req) start_screener_analysis(req=work) # end of analysis_type else: if not args.keyname: last_close_date = ae_utils.last_close() work['s3_key'] = '{}_{}'.format( work['ticker'], last_close_date.strftime(ae_consts.COMMON_DATE_FORMAT)) work['redis_key'] = '{}_{}'.format( work['ticker'], last_close_date.strftime(ae_consts.COMMON_DATE_FORMAT)) path_to_tasks = 'analysis_engine.work_tasks' task_name = ('{}.get_new_pricing_data.get_new_pricing_data'.format( path_to_tasks)) task_res = None if ae_consts.is_celery_disabled() or run_offline: work['celery_disabled'] = True log.debug('starting without celery work={} offline={}'.format( ae_consts.ppj(work), run_offline)) task_res = task_pricing.get_new_pricing_data(work) if debug: log.info('done - result={} ' 'task={} status={} ' 'err={} label={}'.format( ae_consts.ppj(task_res), task_name, ae_consts.get_status(status=task_res['status']), task_res['err'], work['label'])) else: log.info('done - result ' 'task={} status={} ' 'err={} label={}'.format( task_name, ae_consts.get_status(status=task_res['status']), task_res['err'], work['label'])) # if/else debug else: log.info('connecting to broker={} backend={}'.format( broker_url, backend_url)) # Get the Celery app app = get_celery_app.get_celery_app( name=__name__, auth_url=broker_url, backend_url=backend_url, path_to_config_module=celery_config_module, ssl_options=ssl_options, transport_options=transport_options, include_tasks=include_tasks) log.info('calling task={} - work={}'.format( task_name, ae_consts.ppj(work))) job_id = app.send_task(task_name, (work, )) log.info('calling task={} - success job_id={}'.format( task_name, job_id))
#!/usr/bin/env python from analysis_engine.utils import last_close last_close_str = last_close().strftime('%Y-%m-%d %H:%M:%S') print(last_close_str)