def run_algo( ticker=None, tickers=None, algo=None, # optional derived ``analysis_engine.algo.Algo`` instance balance=None, # float starting base capital commission=None, # float for single trade commission for buy or sell start_date=None, # string YYYY-MM-DD HH:MM:SS end_date=None, # string YYYY-MM-DD HH:MM:SS datasets=None, # string list of identifiers num_owned_dict=None, # not supported cache_freq='daily', # 'minute' not supported auto_fill=True, load_config=None, report_config=None, history_config=None, extract_config=None, use_key=None, extract_mode='all', iex_datasets=None, redis_enabled=True, redis_address=None, redis_db=None, redis_password=None, redis_expire=None, redis_key=None, s3_enabled=True, s3_address=None, s3_bucket=None, s3_access_key=None, s3_secret_key=None, s3_region_name=None, s3_secure=False, s3_key=None, celery_disabled=True, broker_url=None, result_backend=None, label=None, name=None, timeseries=None, trade_strategy=None, verbose=False, publish_to_slack=True, publish_to_s3=True, publish_to_redis=True, extract_datasets=None, config_file=None, config_dict=None, version=1, raise_on_err=True, **kwargs): """run_algo Run an algorithm with steps: 1) Extract redis keys between dates 2) Compile a data pipeline dictionary (call it ``data``) 3) Call algorithm's ``myalgo.handle_data(data=data)`` .. note:: If no ``algo`` is set, the ``analysis_engine.algo.BaseAlgo`` algorithm is used. .. note:: Please ensure Redis and Minio are running before trying to extract tickers **Stock tickers to extract** :param ticker: single stock ticker/symbol/ETF to extract :param tickers: optional - list of tickers to extract :param use_key: optional - extract historical key from Redis **Algo Configuration** :param algo: derived instance of ``analysis_engine.algo.Algo`` object :param balance: optional - float balance parameter can also be set on the ``algo`` object if not set on the args :param commission: float for single trade commission for buy or sell. can also be set on the ``algo`` objet :param start_date: string ``YYYY-MM-DD_HH:MM:SS`` cache value :param end_date: string ``YYYY-MM-DD_HH:MM:SS`` cache value :param dataset_types: list of strings that are ``iex`` or ``yahoo`` datasets that are cached. :param cache_freq: optional - depending on if you are running data feeds on a ``daily`` cron (default) vs every ``minute`` (or faster) :param num_owned_dict: not supported yet :param auto_fill: optional - boolean for auto filling buy/sell orders for backtesting (default is ``True``) :param trading_calendar: ``trading_calendar.TradingCalendar`` object, by default ``analysis_engine.calendars. always_open.AlwaysOpen`` trading calendar # TradingCalendar by ``TFSExchangeCalendar`` :param config_file: path to a json file containing custom algorithm object member values (like indicator configuration and predict future date units ahead for a backtest) :param config_dict: optional - dictionary that can be passed to derived class implementations of: ``def load_from_config(config_dict=config_dict)`` **Timeseries** :param timeseries: optional - string to set ``day`` or ``minute`` backtesting or live trading (default is ``minute``) **Trading Strategy** :param trade_strategy: optional - string to set the type of ``Trading Strategy`` for backtesting or live trading (default is ``count``) **Algorithm Dataset Loading, Extracting, Reporting and Trading History arguments** :param load_config: optional - dictionary for setting member variables to load an agorithm-ready dataset from a file, s3 or redis :param report_config: optional - dictionary for setting member variables to publish an algo ``trading performance report`` to s3, redis, a file or slack :param history_config: optional - dictionary for setting member variables to publish an algo ``trade history`` to s3, redis, a file or slack :param extract_config: optional - dictionary for setting member variables to publish an algo ``trading performance report`` to s3, redis, a file or slack **(Optional) Data sources, datafeeds and datasets to gather** :param iex_datasets: list of strings for gathering specific `IEX datasets <https://iexcloud.io/>`__ which are set as consts: ``analysis_engine.iex.consts.FETCH_*``. **(Optional) Redis connectivity arguments** :param redis_enabled: bool - toggle for auto-caching all datasets in Redis (default is ``True``) :param redis_address: Redis connection string format is ``host:port`` (default is ``localhost:6379``) :param redis_db: Redis db to use (default is ``0``) :param redis_password: optional - Redis password (default is ``None``) :param redis_expire: optional - Redis expire value (default is ``None``) :param redis_key: optional - redis key not used (default is ``None``) **(Optional) Minio (S3) connectivity arguments** :param s3_enabled: bool - toggle for auto-archiving on Minio (S3) (default is ``True``) :param s3_address: Minio S3 connection string format ``host:port`` (default is ``localhost:9000``) :param s3_bucket: S3 Bucket for storing the artifacts (default is ``dev``) which should be viewable on a browser: http://localhost:9000/minio/dev/ :param s3_access_key: S3 Access key (default is ``trexaccesskey``) :param s3_secret_key: S3 Secret key (default is ``trex123321``) :param s3_region_name: S3 region name (default is ``us-east-1``) :param s3_secure: Transmit using tls encryption (default is ``False``) :param s3_key: optional s3 key not used (default is ``None``) **(Optional) Celery worker broker connectivity arguments** :param celery_disabled: bool - toggle synchronous mode or publish to an engine connected to the `Celery broker and backend <https://github.com/celery/celery#transports-and-backends>`__ (default is ``True`` - synchronous mode without an engine or need for a broker or backend for Celery) :param broker_url: Celery broker url (default is ``redis://0.0.0.0:6379/13``) :param result_backend: Celery backend url (default is ``redis://0.0.0.0:6379/14``) :param label: tracking log label :param publish_to_slack: optional - boolean for publishing to slack (coming soon) :param publish_to_s3: optional - boolean for publishing to s3 (coming soon) :param publish_to_redis: optional - boolean for publishing to redis (coming soon) **(Optional) Debugging** :param verbose: bool - show extract warnings and other debug logging (default is False) :param raise_on_err: optional - boolean for unittests and developing algorithms with the ``analysis_engine.run_algo.run_algo`` helper. When set to ``True`` exceptions will are raised to the calling functions :param kwargs: keyword arguments dictionary """ # dictionary structure with a list sorted on: ascending dates # algo_data_req[ticker][list][dataset] = pd.DataFrame algo_data_req = {} extract_requests = [] return_algo = False # return created algo objects for use by caller rec = {} msg = None use_tickers = tickers use_balance = balance use_commission = commission if ticker: use_tickers = [ticker] else: if not use_tickers: use_tickers = [] # if these are not set as args, but the algo object # has them, use them instead: if algo: if len(use_tickers) == 0: use_tickers = algo.get_tickers() if not use_balance: use_balance = algo.get_balance() if not use_commission: use_commission = algo.get_commission() default_iex_datasets = [ 'daily', 'minute', 'quote', 'stats', 'peers', 'news', 'financials', 'earnings', 'dividends', 'company' ] if not iex_datasets: iex_datasets = default_iex_datasets if redis_enabled: if not redis_address: redis_address = os.getenv('REDIS_ADDRESS', 'localhost:6379') if not redis_password: redis_password = os.getenv('REDIS_PASSWORD', None) if not redis_db: redis_db = int(os.getenv('REDIS_DB', '0')) if not redis_expire: redis_expire = os.getenv('REDIS_EXPIRE', None) if s3_enabled: if not s3_address: s3_address = os.getenv('S3_ADDRESS', 'localhost:9000') if not s3_access_key: s3_access_key = os.getenv('AWS_ACCESS_KEY_ID', 'trexaccesskey') if not s3_secret_key: s3_secret_key = os.getenv('AWS_SECRET_ACCESS_KEY', 'trex123321') if not s3_region_name: s3_region_name = os.getenv('AWS_DEFAULT_REGION', 'us-east-1') if not s3_secure: s3_secure = os.getenv('S3_SECURE', '0') == '1' if not s3_bucket: s3_bucket = os.getenv('S3_BUCKET', 'dev') if not broker_url: broker_url = os.getenv('WORKER_BROKER_URL', 'redis://0.0.0.0:6379/11') if not result_backend: result_backend = os.getenv('WORKER_BACKEND_URL', 'redis://0.0.0.0:6379/12') if not label: label = 'run-algo' num_tickers = len(use_tickers) last_close_str = ae_utils.get_last_close_str() if iex_datasets: if verbose: log.info(f'{label} - tickers={num_tickers} ' f'iex={json.dumps(iex_datasets)}') else: if verbose: log.info(f'{label} - tickers={num_tickers}') ticker_key = use_key if not ticker_key: ticker_key = f'{ticker}_{last_close_str}' if not algo: algo = base_algo.BaseAlgo(ticker=None, tickers=use_tickers, balance=use_balance, commission=use_commission, config_dict=config_dict, name=label, auto_fill=auto_fill, timeseries=timeseries, trade_strategy=trade_strategy, publish_to_slack=publish_to_slack, publish_to_s3=publish_to_s3, publish_to_redis=publish_to_redis, raise_on_err=raise_on_err) return_algo = True # the algo object is stored # in the result at: res['rec']['algo'] if not algo: msg = f'{label} - missing algo object' log.error(msg) return build_result.build_result(status=ae_consts.EMPTY, err=msg, rec=rec) if raise_on_err: log.debug(f'{label} - enabling algo exception raises') algo.raise_on_err = True indicator_datasets = algo.get_indicator_datasets() if len(indicator_datasets) == 0: indicator_datasets = ae_consts.BACKUP_DATASETS log.info(f'using all datasets={indicator_datasets}') verbose_extract = False if config_dict: verbose_extract = config_dict.get('verbose_extract', False) common_vals = {} common_vals['base_key'] = ticker_key common_vals['celery_disabled'] = celery_disabled common_vals['ticker'] = ticker common_vals['label'] = label common_vals['iex_datasets'] = iex_datasets common_vals['s3_enabled'] = s3_enabled common_vals['s3_bucket'] = s3_bucket common_vals['s3_address'] = s3_address common_vals['s3_secure'] = s3_secure common_vals['s3_region_name'] = s3_region_name common_vals['s3_access_key'] = s3_access_key common_vals['s3_secret_key'] = s3_secret_key common_vals['s3_key'] = ticker_key common_vals['redis_enabled'] = redis_enabled common_vals['redis_address'] = redis_address common_vals['redis_password'] = redis_password common_vals['redis_db'] = redis_db common_vals['redis_key'] = ticker_key common_vals['redis_expire'] = redis_expire use_start_date_str = start_date use_end_date_str = end_date last_close_date = ae_utils.last_close() end_date_val = None cache_freq_fmt = ae_consts.COMMON_TICK_DATE_FORMAT if not use_end_date_str: use_end_date_str = last_close_date.strftime(cache_freq_fmt) end_date_val = ae_utils.get_date_from_str(date_str=use_end_date_str, fmt=cache_freq_fmt) start_date_val = None if not use_start_date_str: start_date_val = end_date_val - datetime.timedelta(days=60) use_start_date_str = start_date_val.strftime(cache_freq_fmt) else: start_date_val = datetime.datetime.strptime( use_start_date_str, ae_consts.COMMON_TICK_DATE_FORMAT) total_dates = (end_date_val - start_date_val).days if end_date_val < start_date_val: msg = ( f'{label} - invalid dates - start_date={start_date_val} is after ' f'end_date={end_date_val}') raise Exception(msg) if verbose: log.info(f'{label} - days={total_dates} ' f'start={use_start_date_str} ' f'end={use_end_date_str} ' f'datasets={indicator_datasets}') for ticker in use_tickers: req = algo_utils.build_algo_request(ticker=ticker, use_key=use_key, start_date=use_start_date_str, end_date=use_end_date_str, datasets=datasets, balance=use_balance, cache_freq=cache_freq, timeseries=timeseries, trade_strategy=trade_strategy, label=label) ticker_key = f'{ticker}_{last_close_str}' common_vals['ticker'] = ticker common_vals['base_key'] = ticker_key common_vals['redis_key'] = ticker_key common_vals['s3_key'] = ticker_key for date_key in req['extract_datasets']: date_req = api_requests.get_ds_dict(ticker=ticker, base_key=date_key, ds_id=label, service_dict=common_vals) node_date_key = date_key.replace(f'{ticker}_', '') extract_requests.append({ 'id': date_key, 'ticker': ticker, 'date_key': date_key, 'date': node_date_key, 'req': date_req }) # end of for all ticker in use_tickers first_extract_date = None last_extract_date = None total_extract_requests = len(extract_requests) cur_idx = 1 for idx, extract_node in enumerate(extract_requests): extract_ticker = extract_node['ticker'] extract_date = extract_node['date'] ds_node_id = extract_node['id'] if not first_extract_date: first_extract_date = extract_date last_extract_date = extract_date perc_progress = ae_consts.get_percent_done( progress=cur_idx, total=total_extract_requests) percent_label = (f'{label} ' f'ticker={extract_ticker} ' f'date={extract_date} ' f'{perc_progress} ' f'{idx}/{total_extract_requests} ' f'{indicator_datasets}') if verbose: log.info(f'extracting - {percent_label}') ticker_bt_data = build_ds_node.build_dataset_node( ticker=extract_ticker, date=extract_date, service_dict=common_vals, datasets=indicator_datasets, log_label=label, verbose=verbose_extract) if ticker not in algo_data_req: algo_data_req[ticker] = [] algo_data_req[ticker].append({ 'id': ds_node_id, # id is currently the cache key in redis 'date': extract_date, # used to confirm dates in asc order 'data': ticker_bt_data }) if verbose: log.info(f'extract - {percent_label} ' f'dataset={len(algo_data_req[ticker])}') cur_idx += 1 # end of for service_dict in extract_requests # this could be a separate celery task status = ae_consts.NOT_RUN if len(algo_data_req) == 0: msg = (f'{label} - nothing to test - no data found for ' f'tickers={use_tickers} ' f'between {first_extract_date} and {last_extract_date}') log.info(msg) return build_result.build_result(status=ae_consts.EMPTY, err=msg, rec=rec) # this could be a separate celery task try: if verbose: log.info(f'handle_data START - {percent_label} from ' f'{first_extract_date} to {last_extract_date}') algo.handle_data(data=algo_data_req) if verbose: log.info(f'handle_data END - {percent_label} from ' f'{first_extract_date} to {last_extract_date}') except Exception as e: a_name = algo.get_name() a_debug_msg = algo.get_debug_msg() if not a_debug_msg: a_debug_msg = 'debug message not set' a_config_dict = ae_consts.ppj(algo.config_dict) msg = (f'{percent_label} - algo={a_name} ' f'encountered exception in handle_data tickers={use_tickers} ' f'from {first_extract_date} to {last_extract_date} ex={e} ' f'and failed during operation: {a_debug_msg}') if raise_on_err: if algo: try: ind_obj = \ algo.get_indicator_process_last_indicator() if ind_obj: ind_obj_path = ind_obj.get_path_to_module() ind_obj_config = ae_consts.ppj(ind_obj.get_config()) found_error_hint = False if hasattr(ind_obj.use_df, 'to_json'): if len(ind_obj.use_df.index) == 0: log.critical( f'indicator failure report for ' f'last module: ' f'{ind_obj_path} ' f'indicator={ind_obj.get_name()} ' f'config={ind_obj_config} ' f'dataset={ind_obj.use_df.head(5)} ' f'name_of_dataset={ind_obj.uses_data}') log.critical( '--------------------------------------' '--------------------------------------') log.critical('Please check if this indicator: ' f'{ind_obj_path} ' 'supports Empty Dataframes') log.critical( '--------------------------------------' '--------------------------------------') found_error_hint = True # indicator error hints if not found_error_hint: log.critical( f'indicator failure report for last module: ' f'{ind_obj_path} ' f'indicator={ind_obj.get_name()} ' f'config={ind_obj_config} ' f'dataset={ind_obj.use_df.head(5)} ' f'name_of_dataset={ind_obj.uses_data}') except Exception as f: log.critical(f'failed to pull indicator processor ' f'last indicator for debugging ' f'from ex={e} with parsing ex={f}') # end of ignoring non-supported ways of creating # indicator processors log.error(msg) log.error(f'algo failure report: ' f'algo={a_name} handle_data() ' f'config={a_config_dict} ') log.critical(f'algo failed during operation: {a_debug_msg}') raise e else: log.error(msg) return build_result.build_result(status=ae_consts.ERR, err=msg, rec=rec) # end of try/ex # this could be a separate celery task try: if verbose: log.info(f'get_result START - {percent_label} from ' f'{first_extract_date} to {last_extract_date}') rec = algo.get_result() status = ae_consts.SUCCESS if verbose: log.info(f'get_result END - {percent_label} from ' f'{first_extract_date} to {last_extract_date}') except Exception as e: msg = ( f'{percent_label} - algo={algo.get_name()} encountered exception ' f'in get_result tickers={use_tickers} from ' f'{first_extract_date} to {last_extract_date} ex={e}') if raise_on_err: if algo: log.error(f'algo={algo.get_name()} failed in get_result with ' f'debug_msg={algo.get_debug_msg()}') log.error(msg) raise e else: log.error(msg) return build_result.build_result(status=ae_consts.ERR, err=msg, rec=rec) # end of try/ex if return_algo: rec['algo'] = algo return build_result.build_result(status=status, err=msg, rec=rec)
def build_algo_request(ticker=None, tickers=None, use_key=None, start_date=None, end_date=None, datasets=None, balance=None, commission=None, num_shares=None, config_file=None, config_dict=None, load_config=None, history_config=None, report_config=None, extract_config=None, timeseries=None, trade_strategy=None, cache_freq='daily', label='algo'): """build_algo_request Create a dictionary for building an algorithm. This is opinionated to how the underlying date-based caching strategy is running per day. Each business day becomes a possible dataset to process with an algorithm. :param ticker: ticker :param tickers: optional - list of tickers :param use_key: redis and s3 to store the algo result :param start_date: string date format ``YYYY-MM-DD HH:MM:SS`` :param end_date: string date format ``YYYY-MM-DD HH:MM:SS`` :param datasets: list of string dataset types :param balance: starting capital balance :param commission: commission for buy or sell :param num_shares: optional - integer number of starting shares :param cache_freq: optional - cache frequency (``daily`` is default) :param label: optional - algo log tracking name :param config_file: path to a json file containing custom algorithm object member values (like indicator configuration and predict future date units ahead for a backtest) :param config_dict: optional - dictionary that can be passed to derived class implementations of: ``def load_from_config(config_dict=config_dict)`` **Timeseries** :param timeseries: optional - string to set ``day`` or ``minute`` backtesting or live trading (default is ``minute``) **Trading Strategy** :param trade_strategy: optional - string to set the type of ``Trading Strategy`` for backtesting or live trading (default is ``count``) **Algorithm Dataset Extraction, Loading and Publishing arguments** :param load_config: optional - dictionary for setting member variables to load an agorithm-ready dataset from a file, s3 or redis :param history_config: optional - dictionary for setting member variables to publish an algo ``trade history`` to s3, redis, a file or slack :param report_config: optional - dictionary for setting member variables to publish an algo ``trading performance report`` to s3, redis, a file or slack :param extract_config: optional - dictionary for setting member variables to publish an algo ``trading performance report`` to s3, redis, a file or slack """ use_tickers = [] if ticker: use_tickers = [ticker.upper()] if tickers: for t in tickers: if t not in use_tickers: use_tickers.append(t.upper()) s3_bucket_name = ae_consts.ALGO_RESULT_S3_BUCKET_NAME s3_key = use_key redis_key = use_key s3_enabled = True redis_enabled = True work = { 'tickers': use_tickers, 's3_bucket': s3_bucket_name, 's3_key': s3_key, 'redis_key': redis_key, 's3_enabled': s3_enabled, 'redis_enabled': redis_enabled, 'extract_datasets': [], 'cache_freq': cache_freq, 'config_file': config_file, 'config_dict': config_dict, 'balance': balance, 'commission': commission, 'load_config': load_config, 'history_config': history_config, 'report_config': report_config, 'extract_config': extract_config, 'start_date': None, 'end_date': None, 'timeseries': timeseries, 'trade_strategy': trade_strategy, 'version': 1, 'label': label } start_date_val = ae_utils.get_date_from_str(start_date) end_date_val = ae_utils.get_date_from_str(end_date) if start_date_val > end_date_val: raise Exception( 'Invalid start_date={} must be less than end_date={}'.format( start_date, end_date)) use_dates = [] new_dataset = None cur_date = start_date_val if not work['start_date']: work['start_date'] = start_date_val.strftime( ae_consts.COMMON_TICK_DATE_FORMAT) if not work['end_date']: work['end_date'] = end_date_val.strftime( ae_consts.COMMON_TICK_DATE_FORMAT) while cur_date <= end_date_val: if cur_date.weekday() < 5: for t in use_tickers: if cache_freq == 'daily': new_dataset = '{}_{}'.format( t, cur_date.strftime(ae_consts.COMMON_DATE_FORMAT)) else: new_dataset = '{}_{}'.format( t, cur_date.strftime(ae_consts.COMMON_TICK_DATE_FORMAT)) if new_dataset: use_dates.append(new_dataset) new_dataset = None # end for all tickers # end of valid days M-F if cache_freq == 'daily': cur_date += datetime.timedelta(days=1) else: cur_date += datetime.timedelta(minute=1) # end of walking all dates to add if len(use_dates) > 0: work['extract_datasets'] = use_dates log.debug('tickers={} balance={} start={} end={} ' 'cache_freq={} request={}'.format( work['tickers'], work['balance'], work['extract_datasets'][0], work['extract_datasets'][-1], cache_freq, ae_consts.ppj(work))) else: log.error('there are not enough dates to test between start={} end={} ' 'tickers={} request={}'.format(start_date_val, end_date_val, work['tickers'], cache_freq, ae_consts.ppj(work))) return work