def load_algo_dataset_from_redis( redis_key, redis_address, redis_db, redis_password, redis_expire=None, redis_serializer='json', serialize_datasets=DEFAULT_SERIALIZED_DATASETS, compress=False, encoding='utf-8'): """load_algo_dataset_from_redis Load an algorithm-ready dataset for algorithm backtesting from a redis key :param serialize_datasets: optional - list of dataset names to deserialize in the dataset :param compress: optional - boolean flag for decompressing the contents of the ``path_to_file`` if necessary (default is ``False`` and algorithms use ``zlib`` for compression) :param encoding: optional - string for data encoding """ log.debug('start') data_from_file = None redis_host = redis_address.split(':')[0] redis_port = int(redis_address.split(':')[0]) redis_res = redis_utils.get_data_from_redis_key( key=redis_key, host=redis_host, port=redis_port, db=redis_db, password=redis_password, expire=redis_expire, serializer=redis_serializer, encoding=encoding) if redis_res['status'] != SUCCESS: log.error('failed getting data from redis={}:{}/{}'.format( redis_address, redis_db, redis_key)) return None data_from_file = redis_res['rec']['data'] if not data_from_file: log.error('missing data from redis={}:{}/{}'.format( redis_address, redis_db, redis_key)) return None return prepare_utils.prepare_dict_for_algo(data=data_from_file, compress=compress, convert_to_dict=True, encoding=encoding)
def build_df_from_redis(label=None, client=None, address=None, host=None, port=None, password=None, db=None, key=None, expire=None, serializer='json', encoding='utf-8', orient='records'): """build_df_from_redis :param label: log tracking label :param client: initialized redis client :param address: redis address: <host:port> :param host: redis host :param port: redis port :param password: redis password :param db: redis db :param key: redis key :param expire: not used yet - redis expire :param serializer: support for future pickle objects in redis :param encoding: format of the encoded key in redis :param orient: use the same orient value as the ``to_json(orient='records')`` used to deserialize the DataFrame correctly. """ data = None valid_df = False df = None rec = {'valid_df': valid_df, 'data': data} res = build_result.build_result(status=NOT_RUN, err=None, rec=rec) log_id = label if label else 'build-df' try: log.debug('{} calling get redis key={}'.format(log_id, key)) use_host = host use_port = port if not use_host and not use_port: if address: use_host = address.split(':')[0] use_port = int(address.split(':')[1]) use_client = client if not use_client: log.debug('{} connecting to redis={}:{}@{}'.format( log_id, use_host, use_port, db)) use_client = redis.Redis(host=use_host, port=use_port, password=password, db=db) redis_res = redis_get.get_data_from_redis_key(label=log_id, client=use_client, host=use_host, port=use_port, password=password, db=db, key=key, expire=expire, serializer='json', encoding=encoding) valid_df = False if redis_res['status'] == SUCCESS: data = redis_res['rec'].get('data', None) if data: if ev('DEBUG_REDIS', '0') == '1': log.info('{} - found key={} data={}'.format( log_id, key, ppj(data))) else: log.debug('{} - loading df from key={}'.format( log_id, key)) df = pd.read_json(data, orient='records') valid_df = True else: log.debug('{} key={} no data'.format(log_id, key)) # if data rec['data'] = df rec['valid_df'] = valid_df res = build_result.build_result(status=SUCCESS, err=None, rec=rec) return res else: log.debug('{} no data key={}'.format(log_id, key)) res = build_result.build_result(status=SUCCESS, err=None, rec=rec) return res except Exception as e: err = ('{} failed - build_df_from_redis data={} ' 'key={} ex={}'.format(log_id, (data == '0'), key, e)) log.error(err) res = build_result.build_result(status=ERR, err=err, rec=rec) # end of try/ex for getting redis data return res
def restore_dataset( show_summary=True, force_restore=False, algo_dataset=None, dataset_type=SA_DATASET_TYPE_ALGO_READY, serialize_datasets=DEFAULT_SERIALIZED_DATASETS, path_to_file=None, compress=False, encoding='utf-8', redis_enabled=True, redis_key=None, redis_address=None, redis_db=None, redis_password=None, redis_expire=None, redis_serializer='json', redis_encoding='utf-8', redis_output_db=None, s3_enabled=True, s3_key=None, s3_address=None, s3_bucket=None, s3_access_key=None, s3_secret_key=None, s3_region_name=None, s3_secure=False, slack_enabled=False, slack_code_block=False, slack_full_width=False, verbose=False): """restore_dataset Restore missing dataset nodes in redis from an algorithm-ready dataset file on disk. Use this to restore redis from scratch. :param show_summary: optional - show a summary of the algorithm-ready dataset using ``analysis_engine.show_dataset.show_dataset`` (default is ``True``) :param force_restore: optional - boolean - publish whatever is in the algorithm-ready dataset into redis. If ``False`` this will ensure that datasets are only set in redis if they are not already set :param algo_dataset: optional - already loaded algorithm-ready dataset :param dataset_type: optional - dataset type (default is ``SA_DATASET_TYPE_ALGO_READY``) :param serialize_datasets: optional - list of dataset names to deserialize in the dataset :param path_to_file: optional - path to an algorithm-ready dataset in a file :param compress: optional - boolean flag for decompressing the contents of the ``path_to_file`` if necessary (default is ``False`` and algorithms use ``zlib`` for compression) :param encoding: optional - string for data encoding **(Optional) Redis connectivity arguments** :param redis_enabled: bool - toggle for auto-caching all datasets in Redis (default is ``True``) :param redis_key: string - key to save the data in redis (default is ``None``) :param redis_address: Redis connection string format: ``host:port`` (default is ``localhost:6379``) :param redis_db: Redis db to use (default is ``0``) :param redis_password: optional - Redis password (default is ``None``) :param redis_expire: optional - Redis expire value (default is ``None``) :param redis_serializer: not used yet - support for future pickle objects in redis :param redis_encoding: format of the encoded key in redis :param redis_output_db: optional - integer publish to a separate redis database **(Optional) Minio (S3) connectivity arguments** :param s3_enabled: bool - toggle for auto-archiving on Minio (S3) (default is ``True``) :param s3_key: string - key to save the data in redis (default is ``None``) :param s3_address: Minio S3 connection string format: ``host:port`` (default is ``localhost:9000``) :param s3_bucket: S3 Bucket for storing the artifacts (default is ``dev``) which should be viewable on a browser: http://localhost:9000/minio/dev/ :param s3_access_key: S3 Access key (default is ``trexaccesskey``) :param s3_secret_key: S3 Secret key (default is ``trex123321``) :param s3_region_name: S3 region name (default is ``us-east-1``) :param s3_secure: Transmit using tls encryption (default is ``False``) **(Optional) Slack arguments** :param slack_enabled: optional - boolean for publishing to slack :param slack_code_block: optional - boolean for publishing as a code black in slack :param slack_full_width: optional - boolean for publishing as a to slack using the full width allowed Additonal arguments :param verbose: optional - bool for increasing logging """ use_ds = algo_dataset redis_host = REDIS_ADDRESS.split(':')[0] redis_port = int(REDIS_ADDRESS.split(':')[1]) if redis_address: redis_host = redis_address.split(':')[0] redis_port = int(redis_address.split(':')[1]) if show_summary: use_ds = show_dataset.show_dataset( dataset_type=dataset_type, compress=compress, encoding=redis_encoding, path_to_file=path_to_file, s3_key=s3_key, s3_address=s3_address, s3_bucket=s3_bucket, s3_access_key=s3_access_key, s3_secret_key=s3_secret_key, s3_region_name=s3_region_name, s3_secure=s3_secure, redis_key=redis_key, redis_address=redis_address, redis_db=redis_db, redis_password=redis_password, redis_expire=redis_expire, redis_serializer=redis_serializer, serialize_datasets=serialize_datasets) # end of if show_summary if not use_ds: log.info( 'loading from file={} s3={} redis={}'.format( path_to_file, s3_key, redis_key)) use_ds = load_dataset.load_dataset( dataset_type=dataset_type, compress=compress, encoding=redis_encoding, path_to_file=path_to_file, s3_key=s3_key, s3_address=s3_address, s3_bucket=s3_bucket, s3_access_key=s3_access_key, s3_secret_key=s3_secret_key, s3_region_name=s3_region_name, s3_secure=s3_secure, redis_key=redis_key, redis_address=redis_address, redis_db=redis_db, redis_password=redis_password, redis_expire=redis_expire, redis_serializer=redis_serializer, serialize_datasets=serialize_datasets) # load if not loaded if not use_ds: log.error( 'unable to load a dataset from file={} ' 's3={} redis={}'.format( path_to_file, s3_key, redis_key)) return None log.info('restore - start') total_to_restore = 0 for ticker in use_ds: for ds_node in use_ds[ticker]: for ds_key in ds_node['data']: if ds_key in serialize_datasets: total_to_restore += 1 # end of counting total_to_restore log.info('restore - records={}'.format(total_to_restore)) num_done = 0 for ticker in use_ds: for ds_node in use_ds[ticker]: ds_parent_key = ds_node['id'] log.info( 'restore - parent_key={} - {} {}/{}'.format( ds_parent_key, get_percent_done( progress=num_done, total=total_to_restore), num_done, total_to_restore)) if verbose: print(ds_parent_key) cache_res = redis_utils.get_data_from_redis_key( host=redis_host, port=redis_port, password=redis_password, db=redis_db, key=ds_parent_key, serializer=redis_serializer, encoding=redis_encoding, expire=redis_expire, label='restore-{}'.format(ds_parent_key)) should_restore = False if (not force_restore and cache_res['status'] == SUCCESS and 'data' in cache_res['rec'] and cache_res['rec']['data'] and len(cache_res['rec']['data']) > 10): should_restore = False else: should_restore = True if should_restore: log.info( ' - parent {} restore'.format( ds_parent_key)) new_parent_rec = { 'exp_date': None, 'publish_pricing_update': None, 'date': ds_node['date'], 'updated': None, 'version': DATASET_COLLECTION_VERSION } for sname in serialize_datasets: if sname in ds_node['data']: if hasattr( ds_node['data'][sname], 'index'): new_parent_rec[sname] = \ ds_node['data'][sname].to_json( orient='records', date_format='iso') else: new_parent_rec[sname] = \ ds_node['data'][sname] publish.publish( data=new_parent_rec, convert_to_json=False, compress=compress, redis_enabled=True, redis_key=ds_parent_key, redis_db=redis_output_db, redis_address=redis_address, redis_password=redis_password, redis_expire=redis_expire, redis_serializer=redis_serializer, redis_encoding=redis_encoding, s3_enabled=False, output_file=None, verbose=verbose) for ds_key in ds_node['data']: if ds_key in serialize_datasets: new_key = '{}_{}'.format( ds_parent_key, ds_key) if hasattr( ds_node['data'][ds_key], 'index'): loaded_df = ds_node['data'][ds_key] if len(loaded_df.index) > 0: if verbose: print( ' - checking: {}'.format( new_key)) cache_res = redis_utils.get_data_from_redis_key( host=redis_host, port=redis_port, password=redis_password, db=redis_db, key=new_key, serializer=redis_serializer, encoding=redis_encoding, expire=redis_expire, label='restore-{}'.format(new_key)) should_restore = False if (not force_restore and cache_res['status'] == SUCCESS and 'data' in cache_res['rec'] and cache_res['rec']['data'] and len(cache_res['rec']['data']) > 10): should_restore = False else: if (str(cache_res['rec']['data']) != EMPTY_DF_STR): should_restore = True if should_restore: log.info( 'restore nested dataset: {}'.format( ds_parent_key, new_key)) publish.publish( data=loaded_df, is_df=True, compress=compress, redis_enabled=True, redis_key=new_key, redis_db=redis_output_db, redis_address=redis_address, redis_password=redis_password, redis_expire=redis_expire, redis_serializer=redis_serializer, redis_encoding=redis_encoding, s3_enabled=False, output_file=None, verbose=verbose) else: if verbose: print( ' - checking: {} - SKIP'.format( new_key)) if verbose: print(' - {} - no data to sync'.format( new_key)) # end of is a dataframe # else: # end of handling dataframe vs dictionary num_done += 1 # end of for all datasets print('-----------------------------------') # end for all dataset to restore log.info( 'restore - done - num_done={} total={}'.format( num_done, total_to_restore)) return use_ds
def prepare_pricing_dataset( self, work_dict): """prepare_pricing_dataset Prepare dataset for analysis. Supports loading dataset from s3 if not found in redis. Outputs prepared artifact as a csv to s3 and redis. :param work_dict: dictionary for key/values """ label = 'prepare' log.info( 'task - {} - start ' 'work_dict={}'.format( label, work_dict)) initial_data = None ticker = ae_consts.TICKER ticker_id = ae_consts.TICKER_ID rec = { 'ticker': None, 'ticker_id': None, 's3_enabled': True, 'redis_enabled': True, 's3_bucket': None, 's3_key': None, 'redis_key': None, 'prepared_s3_key': None, 'prepared_s3_bucket': None, 'prepared_redis_key': None, 'prepared_data': None, 'prepared_size': None, 'initial_data': None, 'initial_size': None, 'ignore_columns': None, 'updated': None } res = build_result.build_result( status=ae_consts.NOT_RUN, err=None, rec=rec) try: ticker = work_dict.get( 'ticker', ae_consts.TICKER) ticker_id = int(work_dict.get( 'ticker_id', ae_consts.TICKER_ID)) if not ticker: res = build_result.build_result( status=ae_consts.ERR, err='missing ticker', rec=rec) return res label = work_dict.get( 'label', label) s3_key = work_dict.get( 's3_key', None) s3_bucket_name = work_dict.get( 's3_bucket', 'pricing') s3_access_key = work_dict.get( 's3_access_key', ae_consts.S3_ACCESS_KEY) s3_secret_key = work_dict.get( 's3_secret_key', ae_consts.S3_SECRET_KEY) s3_region_name = work_dict.get( 's3_region_name', ae_consts.S3_REGION_NAME) s3_address = work_dict.get( 's3_address', ae_consts.S3_ADDRESS) s3_secure = work_dict.get( 's3_secure', ae_consts.S3_SECURE) == '1' redis_address = work_dict.get( 'redis_address', ae_consts.REDIS_ADDRESS) redis_key = work_dict.get( 'redis_key', ae_consts.REDIS_KEY) redis_password = work_dict.get( 'redis_password', ae_consts.REDIS_PASSWORD) redis_db = work_dict.get( 'redis_db', None) if not redis_db: redis_db = ae_consts.REDIS_DB redis_expire = None if 'redis_expire' in work_dict: redis_expire = work_dict.get( 'redis_expire', ae_consts.REDIS_EXPIRE) updated = work_dict.get( 'updated', datetime.datetime.utcnow().strftime( '%Y_%m_%d_%H_%M_%S')) prepared_s3_key = work_dict.get( 'prepared_s3_key', '{}_{}.csv'.format( ticker, updated)) prepared_s3_bucket = work_dict.get( 'prepared_s3_bucket', 'prepared') prepared_redis_key = work_dict.get( 'prepared_redis_key', 'prepared') ignore_columns = work_dict.get( 'ignore_columns', None) log.info( '{} redis enabled address={}@{} ' 'key={} prepare_s3={}:{} prepare_redis={} ' 'ignore_columns={}'.format( label, redis_address, redis_db, redis_key, prepared_s3_bucket, prepared_s3_key, prepared_redis_key, ignore_columns)) redis_host = redis_address.split(':')[0] redis_port = redis_address.split(':')[1] enable_s3 = True enable_redis_publish = True rec['ticker'] = ticker rec['ticker_id'] = ticker_id rec['s3_bucket'] = s3_bucket_name rec['s3_key'] = s3_key rec['redis_key'] = redis_key rec['prepared_s3_key'] = prepared_s3_key rec['prepared_s3_bucket'] = prepared_s3_bucket rec['prepared_redis_key'] = prepared_redis_key rec['updated'] = updated rec['s3_enabled'] = enable_s3 rec['redis_enabled'] = enable_redis_publish try: log.info( '{} connecting redis={}:{} ' 'db={} key={} ' 'updated={} expire={}'.format( label, redis_host, redis_port, redis_db, redis_key, updated, redis_expire)) rc = redis.Redis( host=redis_host, port=redis_port, password=redis_password, db=redis_db) except Exception as e: err = ( '{} failed - redis connection to address={}@{} ' 'key={} ex={}'.format( label, redis_address, redis_key, redis_db, e)) res = build_result.build_result( status=ae_consts.ERR, err=err, rec=rec) return res # end of try/ex for connecting to redis initial_data_res = redis_get.get_data_from_redis_key( label=label, client=rc, key=redis_key) log.info( '{} get redis key={} status={} err={}'.format( label, redis_key, ae_consts.get_status(initial_data_res['status']), initial_data_res['err'])) initial_data = initial_data_res['rec'].get( 'data', None) if enable_s3 and not initial_data: log.info( '{} failed to find redis_key={} trying s3 ' 'from s3_key={} s3_bucket={} s3_address={}'.format( label, redis_key, s3_key, s3_bucket_name, s3_address)) get_from_s3_req = \ api_requests.build_publish_from_s3_to_redis_request() get_from_s3_req['s3_enabled'] = enable_s3 get_from_s3_req['s3_access_key'] = s3_access_key get_from_s3_req['s3_secret_key'] = s3_secret_key get_from_s3_req['s3_region_name'] = s3_region_name get_from_s3_req['s3_address'] = s3_address get_from_s3_req['s3_secure'] = s3_secure get_from_s3_req['s3_key'] = s3_key get_from_s3_req['s3_bucket'] = s3_bucket_name get_from_s3_req['redis_key'] = redis_key get_from_s3_req['label'] = ( '{}-run_publish_from_s3_to_redis'.format( label)) log.info( '{} load from s3={} to ' 'redis={}'.format( label, s3_key, redis_key)) try: # run in synchronous mode: get_from_s3_req['celery_disabled'] = True task_res = s3_to_redis.run_publish_from_s3_to_redis( get_from_s3_req) if task_res.get( 'status', ae_consts.ERR) == ae_consts.SUCCESS: log.info( '{} loaded s3={}:{} ' 'to redis={} retrying'.format( label, s3_bucket_name, s3_key, redis_key)) initial_data_res = redis_get.get_data_from_redis_key( label=label, client=rc, key=redis_key) log.info( '{} get redis try=2 key={} status={} err={}'.format( label, redis_key, ae_consts.get_status(initial_data_res['status']), initial_data_res['err'])) initial_data = initial_data_res['rec'].get( 'data', None) else: err = ( '{} ERR failed loading from bucket={} ' 's3_key={} to redis_key={} with res={}'.format( label, s3_bucket_name, s3_key, redis_key, task_res)) log.error(err) res = build_result.build_result( status=ae_consts.ERR, err=err, rec=rec) return res except Exception as e: err = ( '{} extract from s3 and publish to redis failed loading ' 'data from bucket={} in ' 's3_key={} with publish to redis_key={} ' 'with ex={}'.format( label, s3_bucket_name, s3_key, redis_key, e)) log.error(err) res = build_result.build_result( status=ae_consts.ERR, err=err, rec=rec) return res # end of try/ex for publishing from s3->redis # end of if enable_s3 if not initial_data: err = ( '{} did not find any data to prepare in redis_key={} or ' 's3_key={} in bucket={}'.format( label, redis_key, s3_key, s3_bucket_name)) log.error(err) res = build_result.build_result( status=ae_consts.ERR, err=err, rec=rec) return res initial_data_num_chars = len(str(initial_data)) initial_size_value = None initial_size_str = None if initial_data_num_chars < ae_consts.PREPARE_DATA_MIN_SIZE: err = ( '{} not enough data={} in redis_key={} or ' 's3_key={} in bucket={}'.format( label, initial_data_num_chars, redis_key, s3_key, s3_bucket_name)) log.error(err) res = build_result.build_result( status=ae_consts.ERR, err=err, rec=rec) return res else: initial_size_value = initial_data_num_chars / 1024000 initial_size_str = ae_consts.to_f(initial_size_value) if ae_consts.ev('DEBUG_PREPARE', '0') == '1': log.info( '{} initial - redis_key={} data={}'.format( label, redis_key, str(initial_data))) else: log.info( '{} initial - redis_key={} data size={} MB'.format( label, redis_key, initial_size_str)) # end of trying to get initial_data rec['initial_data'] = initial_data rec['initial_size'] = initial_data_num_chars prepare_data = None try: if ae_consts.ev('DEBUG_PREPARE', '0') == '1': log.info( '{} data={} - flatten - {} MB from ' 'redis_key={}'.format( label, ae_consts.ppj(initial_data), initial_size_str, redis_key)) else: log.info( '{} flatten - {} MB from ' 'redis_key={}'.format( label, initial_size_str, redis_key)) prepare_data = dict_to_csv.flatten_dict( data=initial_data) except Exception as e: prepare_data = None err = ( '{} flatten - convert to csv failed with ex={} ' 'redis_key={}'.format( label, e, redis_key)) log.error(err) res = build_result.build_result( status=ae_consts.ERR, err=err, rec=rec) return res # end of try/ex if not prepare_data: err = ( '{} flatten - did not return any data from redis_key={} ' 'or s3_key={} in bucket={}'.format( label, redis_key, s3_key, s3_bucket_name)) log.error(err) res = build_result.build_result( status=ae_consts.ERR, err=err, rec=rec) return res # end of prepare_data prepare_data_num_chars = len(str(prepare_data)) prepare_size_value = None if prepare_data_num_chars < ae_consts.PREPARE_DATA_MIN_SIZE: err = ( '{} prepare - there is not enough data={} in redis_key={}' ''.format( label, prepare_data_num_chars, redis_key)) log.error(err) res = build_result.build_result( status=ae_consts.ERR, err=err, rec=rec) return res else: prepare_size_value = prepare_data_num_chars / 1024000 prepare_size_str = ae_consts.to_f(prepare_size_value) if ae_consts.ev('DEBUG_PREPARE', '0') == '1': log.info( '{} data={} - prepare - redis_key={}'.format( label, redis_key, ae_consts.ppj(prepare_data))) else: log.info( '{} prepare - redis_key={} data size={} MB'.format( label, redis_key, prepare_size_str)) # end of trying to the size of the prepared data rec['prepared_data'] = prepare_data rec['prepared_size'] = prepare_data_num_chars res = build_result.build_result( status=ae_consts.SUCCESS, err=None, rec=rec) rc = None except Exception as e: res = build_result.build_result( status=ae_consts.ERR, err=( 'failed - prepare_pricing_dataset ' 'dict={} with ex={}').format( work_dict, e), rec=rec) log.error( '{} - {}'.format( label, res['err'])) # end of try/ex log.info( 'task - prepare_pricing_dataset done - ' '{} - status={}'.format( label, ae_consts.get_status(res['status']))) return get_task_results.get_task_results( work_dict=work_dict, result=res)
def extract_option_calls_dataset( work_dict, scrub_mode='sort-by-date'): """extract_option_calls_dataset Extract the TD options calls for a ticker and return it as a ``pandas.Dataframe`` :param work_dict: dictionary of args :param scrub_mode: type of scrubbing handler to run """ label = '{}'.format(work_dict.get('label', 'extract')) ds_id = work_dict.get('ticker') df_type = td_consts.DATAFEED_TD_CALLS df_str = td_consts.get_datafeed_str_td(df_type=df_type) redis_key = work_dict.get( 'redis_key', work_dict.get('tdcalls', 'missing-redis-key')) s3_key = work_dict.get( 's3_key', work_dict.get('tdcalls', 'missing-s3-key')) redis_host = work_dict.get( 'redis_host', None) redis_port = work_dict.get( 'redis_port', None) redis_db = work_dict.get( 'redis_db', ae_consts.REDIS_DB) verbose = work_dict.get( 'verbose_td', False) if verbose: log.info( '{} - {} - start - redis_key={} s3_key={}'.format( label, df_str, redis_key, s3_key)) if not redis_host and not redis_port: redis_host = ae_consts.REDIS_ADDRESS.split(':')[0] redis_port = ae_consts.REDIS_ADDRESS.split(':')[1] exp_date_str = None calls_df = None status = ae_consts.NOT_RUN try: redis_rec = redis_get.get_data_from_redis_key( label=label, host=redis_host, port=redis_port, db=redis_db, password=work_dict.get('password', None), key=redis_key, decompress_df=True) status = redis_rec['status'] if verbose: log.info( '{} - {} redis get data key={} status={}'.format( label, df_str, redis_key, ae_consts.get_status(status=status))) if status == ae_consts.SUCCESS: calls_json = None if 'calls' in redis_rec['rec']['data']: calls_json = redis_rec['rec']['data']['calls'] else: calls_json = redis_rec['rec']['data'] if verbose: log.info( '{} - {} redis convert calls to df'.format( label, df_str)) exp_date_str = None try: calls_df = pd.read_json( calls_json, orient='records') if len(calls_df.index) == 0: return ae_consts.SUCCESS, None if 'date' not in calls_df: log.debug( 'failed to find date column in TD calls ' 'df={}'.format( calls_df, len(calls_df.index))) return ae_consts.SUCCESS, None calls_df.sort_values( by=[ 'date', 'strike' ]) """ for i, r in calls_df.iterrows(): print(r['date']) convert_epochs = [ 'ask_date', 'bid_date', 'trade_date' ] for c in convert_epochs: if c in calls_df: calls_df[c] = pd.DatetimeIndex(pd.to_datetime( calls_df[c], format=ae_consts.COMMON_TICK_DATE_FORMAT )).tz_localize( 'UTC').tz_convert( 'US/Eastern') # dates converted """ exp_date_str = ( calls_df['exp_date'].iloc[-1]) calls_df['date'] = calls_df['date'].dt.strftime( ae_consts.COMMON_TICK_DATE_FORMAT) except Exception as f: log.error( '{} - {} redis_key={} ' 'no calls df found or ex={}'.format( label, df_str, redis_key, f)) return ae_consts.EMPTY, None # end of try/ex to convert to df if verbose: log.info( '{} - {} redis_key={} calls={} exp_date={}'.format( label, df_str, redis_key, len(calls_df.index), exp_date_str)) else: if verbose: log.info( '{} - {} did not find valid redis option calls ' 'in redis_key={} status={}'.format( label, df_str, redis_key, ae_consts.get_status(status=status))) except Exception as e: log.debug( '{} - {} - ds_id={} failed getting option calls from ' 'redis={}:{}@{} key={} ex={}'.format( label, df_str, ds_id, redis_host, redis_port, redis_db, redis_key, e)) return ae_consts.ERR, None # end of try/ex extract from redis if verbose: log.info( '{} - {} ds_id={} extract scrub={}'.format( label, df_str, ds_id, scrub_mode)) scrubbed_df = scrub_utils.extract_scrub_dataset( label=label, scrub_mode=scrub_mode, datafeed_type=df_type, msg_format='df={} date_str={}', ds_id=ds_id, df=calls_df) status = ae_consts.SUCCESS return status, scrubbed_df
def extract_pricing_dataset( work_dict, scrub_mode='sort-by-date'): """extract_pricing_dataset Extract the Yahoo pricing data for a ticker and return it as a pandas Dataframe :param work_dict: dictionary of args :param scrub_mode: type of scrubbing handler to run """ label = work_dict.get('label', 'extract') ds_id = work_dict.get('ticker') df_type = DATAFEED_PRICING_YAHOO df_str = get_datafeed_str_yahoo(df_type=df_type) redis_key = work_dict.get( 'redis_key', work_dict.get('pricing', 'missing-redis-key')) s3_key = work_dict.get( 's3_key', work_dict.get('pricing', 'missing-s3-key')) redis_host = work_dict.get( 'redis_host', None) redis_port = work_dict.get( 'redis_port', None) redis_db = work_dict.get( 'redis_db', REDIS_DB) log.debug( '{} - {} - start - redis_key={} s3_key={}'.format( label, df_str, redis_key, s3_key)) if not redis_host and not redis_port: redis_host = REDIS_ADDRESS.split(':')[0] redis_port = REDIS_ADDRESS.split(':')[1] df = None status = NOT_RUN try: redis_rec = redis_get.get_data_from_redis_key( label=label, host=redis_host, port=redis_port, db=redis_db, password=work_dict.get('password', None), key=redis_key) status = redis_rec['status'] log.debug( '{} - {} redis get data key={} status={}'.format( label, df_str, redis_key, get_status(status=status))) if status == SUCCESS: log.debug( '{} - {} redis convert pricing to json'.format( label, df_str)) cached_dict = redis_rec['rec']['data'] log.debug( '{} - {} redis convert pricing to df'.format( label, df_str)) try: df = pd.DataFrame( cached_dict, index=[0]) except Exception as f: log.debug( '{} - {} redis_key={} ' 'no pricing df found'.format( label, df_str, redis_key)) return EMPTY, None # end of try/ex to convert to df log.debug( '{} - {} redis_key={} done convert pricing to df'.format( label, df_str, redis_key)) else: log.debug( '{} - {} did not find valid redis pricing ' 'in redis_key={} status={}'.format( label, df_str, redis_key, get_status(status=status))) except Exception as e: log.debug( '{} - {} - ds_id={} failed getting pricing from ' 'redis={}:{}@{} key={} ex={}'.format( label, df_str, ds_id, redis_host, redis_port, redis_db, redis_key, e)) return ERR, None # end of try/ex extract from redis log.debug( '{} - {} ds_id={} extract scrub={}'.format( label, df_str, ds_id, scrub_mode)) scrubbed_df = scrub_utils.extract_scrub_dataset( label=label, scrub_mode=scrub_mode, datafeed_type=df_type, msg_format='df={} date_str={}', ds_id=ds_id, df=df) status = SUCCESS return status, scrubbed_df
def extract_option_puts_dataset(work_dict, scrub_mode='sort-by-date'): """extract_option_puts_dataset Extract the TD options puts for a ticker and return it as a ``pandas.Dataframe`` :param work_dict: dictionary of args :param scrub_mode: type of scrubbing handler to run """ label = f'{work_dict.get("label", "extract")}' ds_id = work_dict.get('ticker') df_type = td_consts.DATAFEED_TD_PUTS df_str = td_consts.get_datafeed_str_td(df_type=df_type) redis_key = work_dict.get('redis_key', work_dict.get('tdputs', 'missing-redis-key')) s3_key = work_dict.get('s3_key', work_dict.get('tdputs', 'missing-s3-key')) redis_host = work_dict.get('redis_host', None) redis_port = work_dict.get('redis_port', None) redis_db = work_dict.get('redis_db', ae_consts.REDIS_DB) verbose = work_dict.get('verbose_td', False) if verbose: log.info(f'{label} - {df_str} - start - redis_key={redis_key} ' f's3_key={s3_key}') if not redis_host and not redis_port: redis_host = ae_consts.REDIS_ADDRESS.split(':')[0] redis_port = ae_consts.REDIS_ADDRESS.split(':')[1] exp_date_str = None puts_df = None status = ae_consts.NOT_RUN try: redis_rec = redis_get.get_data_from_redis_key(label=label, host=redis_host, port=redis_port, db=redis_db, password=work_dict.get( 'password', None), key=redis_key, decompress_df=True) status = redis_rec['status'] if verbose: log.info(f'{label} - {df_str} redis get data key={redis_key} ' f'status={ae_consts.get_status(status=status)}') if status == ae_consts.SUCCESS: puts_json = None if 'puts' in redis_rec['rec']['data']: puts_json = redis_rec['rec']['data']['puts'] else: puts_json = redis_rec['rec']['data'] if verbose: log.info(f'{label} - {df_str} redis convert puts to df') try: puts_df = pd.read_json(puts_json, orient='records') if len(puts_df.index) == 0: return ae_consts.SUCCESS, None if 'date' not in puts_df: log.debug('failed to find date column in TD puts ' f'df={puts_df} len={len(puts_df.index)}') return ae_consts.SUCCESS, None puts_df.sort_values(by=['date', 'strike']) """ for i, r in calls_df.iterrows(): print(r['date']) convert_epochs = [ 'ask_date', 'bid_date', 'trade_date' ] for c in convert_epochs: if c in puts_df: puts_df[c] = pd.DatetimeIndex(pd.to_datetime( puts_df[c], format=ae_consts.COMMON_TICK_DATE_FORMAT )).tz_localize( 'UTC').tz_convert( 'US/Eastern') # dates converted """ exp_date_str = (puts_df['exp_date'].iloc[-1]) puts_df['date'] = puts_df['date'].dt.strftime( ae_consts.COMMON_TICK_DATE_FORMAT) except Exception: log.debug(f'{label} - {df_str} redis_key={redis_key} ' 'no puts df found') return ae_consts.EMPTY, None # end of try/ex to convert to df if verbose: log.info(f'{label} - {df_str} redis_key={redis_key} ' f'puts={len(puts_df.index)} exp_date={exp_date_str}') else: if verbose: log.info(f'{label} - {df_str} did not find valid redis ' f'option puts in redis_key={redis_key} ' f'status={ae_consts.get_status(status=status)}') except Exception as e: log.debug(f'{label} - {df_str} - ds_id={ds_id} failed getting option ' f'puts from redis={redis_host}:{redis_port}@{redis_db} ' f'key={redis_key} ex={e}') return ae_consts.ERR, None # end of try/ex extract from redis if verbose: log.info( f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}') scrubbed_df = scrub_utils.extract_scrub_dataset( label=label, scrub_mode=scrub_mode, datafeed_type=df_type, msg_format='df={} date_str={}', ds_id=ds_id, df=puts_df) status = ae_consts.SUCCESS return status, scrubbed_df
def extract_option_puts_dataset( work_dict, scrub_mode='sort-by-date'): """extract_option_puts_dataset Extract the Yahoo options puts for a ticker and return it as a ``pandas.Dataframe`` :param work_dict: dictionary of args :param scrub_mode: type of scrubbing handler to run """ label = '{}-puts'.format(work_dict.get('label', 'extract')) ds_id = work_dict.get('ticker') df_type = DATAFEED_OPTIONS_YAHOO df_str = get_datafeed_str_yahoo(df_type=df_type) redis_key = work_dict.get( 'redis_key', work_dict.get('options', 'missing-redis-key')) s3_key = work_dict.get( 's3_key', work_dict.get('options', 'missing-s3-key')) redis_host = work_dict.get( 'redis_host', None) redis_port = work_dict.get( 'redis_port', None) redis_db = work_dict.get( 'redis_db', REDIS_DB) log.debug( '{} - {} - start - redis_key={} s3_key={}'.format( label, df_str, redis_key, s3_key)) if not redis_host and not redis_port: redis_host = REDIS_ADDRESS.split(':')[0] redis_port = REDIS_ADDRESS.split(':')[1] exp_date_str = None puts_df = None status = NOT_RUN try: redis_rec = redis_get.get_data_from_redis_key( label=label, host=redis_host, port=redis_port, db=redis_db, password=work_dict.get('password', None), key=redis_key) status = redis_rec['status'] log.debug( '{} - {} redis get data key={} status={}'.format( label, df_str, redis_key, get_status(status=status))) if status == SUCCESS: exp_date_str = redis_rec['rec']['data']['exp_date'] puts_json = redis_rec['rec']['data']['puts'] log.debug( '{} - {} redis convert puts to df'.format( label, df_str)) try: puts_df = pd.read_json( puts_json, orient='records') except Exception as f: log.debug( '{} - {} redis_key={} ' 'no puts df found'.format( label, df_str, redis_key)) return EMPTY, None # end of try/ex to convert to df log.debug( '{} - {} redis_key={} puts={} exp_date={}'.format( label, df_str, redis_key, len(puts_df.index), exp_date_str)) else: log.debug( '{} - {} did not find valid redis option puts ' 'in redis_key={} status={}'.format( label, df_str, redis_key, get_status(status=status))) except Exception as e: log.debug( '{} - {} - ds_id={} failed getting option puts from ' 'redis={}:{}@{} key={} ex={}'.format( label, df_str, ds_id, redis_host, redis_port, redis_db, redis_key, e)) return ERR, None # end of try/ex extract from redis log.debug( '{} - {} ds_id={} extract scrub={}'.format( label, df_str, ds_id, scrub_mode)) scrubbed_df = scrub_utils.extract_scrub_dataset( label=label, scrub_mode=scrub_mode, datafeed_type=df_type, msg_format='df={} date_str={}', ds_id=ds_id, df=puts_df) status = SUCCESS return status, scrubbed_df
def extract_pricing_dataset(work_dict, scrub_mode='sort-by-date'): """extract_pricing_dataset Extract the Yahoo pricing data for a ticker and return it as a pandas Dataframe :param work_dict: dictionary of args :param scrub_mode: type of scrubbing handler to run """ label = work_dict.get('label', 'extract') ds_id = work_dict.get('ticker') df_type = yahoo_consts.DATAFEED_PRICING_YAHOO df_str = yahoo_consts.get_datafeed_str_yahoo(df_type=df_type) redis_key = work_dict.get('redis_key', work_dict.get('pricing', 'missing-redis-key')) s3_key = work_dict.get('s3_key', work_dict.get('pricing', 'missing-s3-key')) redis_host = work_dict.get('redis_host', None) redis_port = work_dict.get('redis_port', None) redis_db = work_dict.get('redis_db', ae_consts.REDIS_DB) log.debug( f'{label} - {df_str} - start - redis_key={redis_key} s3_key={s3_key}') if not redis_host and not redis_port: redis_host = ae_consts.REDIS_ADDRESS.split(':')[0] redis_port = ae_consts.REDIS_ADDRESS.split(':')[1] df = None status = ae_consts.NOT_RUN try: redis_rec = redis_get.get_data_from_redis_key(label=label, host=redis_host, port=redis_port, db=redis_db, password=work_dict.get( 'password', None), key=redis_key, decompress_df=True) status = redis_rec['status'] log.debug(f'{label} - {df_str} redis get data key={redis_key} ' f'status={ae_consts.get_status(status=status)}') if status == ae_consts.SUCCESS: log.debug(f'{label} - {df_str} redis convert pricing to json') cached_dict = redis_rec['rec']['data'] log.debug(f'{label} - {df_str} redis convert pricing to df') try: df = pd.DataFrame(cached_dict, index=[0]) except Exception: log.debug(f'{label} - {df_str} redis_key={redis_key} ' 'no pricing df found') return ae_consts.EMPTY, None # end of try/ex to convert to df log.debug(f'{label} - {df_str} redis_key={redis_key} done ' 'convert pricing to df') else: log.debug(f'{label} - {df_str} did not find valid redis pricing ' f'in redis_key={redis_key} ' f'status={ae_consts.get_status(status=status)}') except Exception as e: log.debug( f'{label} - {df_str} - ds_id={ds_id} failed getting pricing from ' f'redis={redis_host}:{redis_port}@{redis_db} ' f'key={redis_key} ex={e}') return ae_consts.ERR, None # end of try/ex extract from redis log.debug(f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}') scrubbed_df = scrub_utils.extract_scrub_dataset( label=label, scrub_mode=scrub_mode, datafeed_type=df_type, msg_format='df={} date_str={}', ds_id=ds_id, df=df) status = ae_consts.SUCCESS return status, scrubbed_df
def extract_option_puts_dataset(work_dict, scrub_mode='sort-by-date'): """extract_option_puts_dataset Extract the Yahoo options puts for a ticker and return it as a ``pandas.Dataframe`` :param work_dict: dictionary of args :param scrub_mode: type of scrubbing handler to run """ label = f'{work_dict.get("label", "extract")}-puts' ds_id = work_dict.get('ticker') df_type = yahoo_consts.DATAFEED_OPTIONS_YAHOO df_str = yahoo_consts.get_datafeed_str_yahoo(df_type=df_type) redis_key = work_dict.get('redis_key', work_dict.get('puts', 'missing-redis-key')) s3_key = work_dict.get('s3_key', work_dict.get('puts', 'missing-s3-key')) redis_host = work_dict.get('redis_host', None) redis_port = work_dict.get('redis_port', None) redis_db = work_dict.get('redis_db', ae_consts.REDIS_DB) log.debug( f'{label} - {df_str} - start - redis_key={redis_key} s3_key={s3_key}') if not redis_host and not redis_port: redis_host = ae_consts.REDIS_ADDRESS.split(':')[0] redis_port = ae_consts.REDIS_ADDRESS.split(':')[1] exp_date_str = None puts_df = None status = ae_consts.NOT_RUN try: redis_rec = redis_get.get_data_from_redis_key(label=label, host=redis_host, port=redis_port, db=redis_db, password=work_dict.get( 'password', None), key=redis_key, decompress_df=True) status = redis_rec['status'] log.debug(f'{label} - {df_str} redis get data key={redis_key} ' f'status={ae_consts.get_status(status=status)}') if status == ae_consts.SUCCESS: puts_json = None if 'puts' in redis_rec['rec']['data']: puts_json = redis_rec['rec']['data']['puts'] else: puts_json = redis_rec['rec']['data'] log.debug(f'{label} - {df_str} redis convert puts to df') try: puts_df = pd.read_json(puts_json, orient='records') exp_epoch_value = puts_df['expiration'].iloc[-1] exp_date_str = ae_utils.convert_epoch_to_datetime_string( epoch=exp_epoch_value, fmt=ae_consts.COMMON_DATE_FORMAT, use_utc=True) except Exception: log.debug(f'{label} - {df_str} redis_key={redis_key} ' 'no puts df found') return ae_consts.EMPTY, None # end of try/ex to convert to df log.debug(f'{label} - {df_str} redis_key={redis_key} ' f'puts={len(puts_df.index)} exp_date={exp_date_str}') else: log.debug( f'{label} - {df_str} did not find valid redis option puts ' f'in redis_key={redis_key} ' f'status={ae_consts.get_status(status=status)}') except Exception as e: log.debug( f'{label} - {df_str} - ds_id={ds_id} failed getting option puts ' f'from redis={redis_host}:{redis_port}@{redis_db} ' f'key={redis_key} ex={e}') return ae_consts.ERR, None # end of try/ex extract from redis log.debug(f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}') scrubbed_df = scrub_utils.extract_scrub_dataset( label=label, scrub_mode=scrub_mode, datafeed_type=df_type, msg_format='df={} date_str={}', ds_id=ds_id, df=puts_df) status = ae_consts.SUCCESS return status, scrubbed_df
def extract_option_calls_dataset(ticker=None, date=None, work_dict=None, scrub_mode='sort-by-date', verbose=False): """extract_option_calls_dataset Extract the TD options calls for a ticker and return a tuple (status, ``pandas.Dataframe``) .. code-block:: python import analysis_engine.td.extract_df_from_redis as td_extract # extract by historical date is also supported as an arg # date='2019-02-15' calls_status, calls_df = td_extract.extract_option_calls_dataset( ticker='SPY') print(calls_df) :param ticker: string ticker to extract :param date: optional - string date to extract formatted ``YYYY-MM-DD`` :param work_dict: dictionary of args :param scrub_mode: optional - string type of scrubbing handler to run :param verbose: optional - boolean for turning on logging """ label = 'extract_td_calls' latest_close_date = ae_utils.get_last_close_str() use_date = date if work_dict: if not ticker: ticker = work_dict.get('ticker', None) label = f'{work_dict.get("label", label)}' if not use_date: use_date = latest_close_date ds_id = ticker df_type = td_consts.DATAFEED_TD_CALLS df_str = td_consts.get_datafeed_str_td(df_type=df_type) redis_db = ae_consts.REDIS_DB redis_key = f'{ticker}_{use_date}_tdcalls' redis_host, redis_port = ae_consts.get_redis_host_and_port(req=work_dict) redis_password = ae_consts.REDIS_PASSWORD s3_key = redis_key if work_dict: redis_db = work_dict.get('redis_db', redis_db) redis_password = work_dict.get('redis_password', redis_password) verbose = work_dict.get('verbose_td', verbose) if verbose: log.info(f'{label} - {df_str} - start - redis_key={redis_key} ' f's3_key={s3_key}') exp_date_str = None calls_df = None status = ae_consts.NOT_RUN try: redis_rec = redis_get.get_data_from_redis_key(label=label, host=redis_host, port=redis_port, db=redis_db, password=redis_password, key=redis_key, decompress_df=True) status = redis_rec['status'] if verbose: log.info(f'{label} - {df_str} redis get data key={redis_key} ' f'status={ae_consts.get_status(status=status)}') if status == ae_consts.SUCCESS: calls_json = None if 'tdcalls' in redis_rec['rec']['data']: calls_json = redis_rec['rec']['data']['tdcalls'] elif 'calls' in redis_rec['rec']['data']: calls_json = redis_rec['rec']['data']['calls'] else: calls_json = redis_rec['rec']['data'] if not calls_json: return ae_consts.SUCCESS, pd.DataFrame([]) if verbose: log.info(f'{label} - {df_str} redis convert calls to df') exp_date_str = None try: calls_df = pd.read_json(calls_json, orient='records') if len(calls_df.index) == 0: return ae_consts.SUCCESS, pd.DataFrame([]) if 'date' not in calls_df: if verbose: log.error( 'failed to find date column in TD calls ' f'df={calls_df} from lens={len(calls_df.index)}') return ae_consts.SUCCESS, pd.DataFrame([]) calls_df.sort_values(by=['date', 'strike']) """ for i, r in calls_df.iterrows(): print(r['date']) convert_epochs = [ 'ask_date', 'bid_date', 'trade_date' ] for c in convert_epochs: if c in calls_df: calls_df[c] = pd.DatetimeIndex(pd.to_datetime( calls_df[c], format=ae_consts.COMMON_TICK_DATE_FORMAT )).tz_localize( 'UTC').tz_convert( 'US/Eastern') # dates converted """ exp_date_str = (calls_df['exp_date'].iloc[-1]) calls_df['date'] = calls_df['date'].dt.strftime( ae_consts.COMMON_TICK_DATE_FORMAT) except Exception as f: not_fixed = True if ('Can only use .dt accessor with ' 'datetimelike values') in str(f): try: log.critical(f'fixing dates in {redis_key}') # remove epoch second data and # use only the millisecond date values bad_date = ae_consts.EPOCH_MINIMUM_DATE calls_df['date'][calls_df['date'] < bad_date] = None calls_df = calls_df.dropna(axis=0, how='any') fmt = ae_consts.COMMON_TICK_DATE_FORMAT calls_df['date'] = pd.to_datetime( calls_df['date'], unit='ms').dt.strftime(fmt) not_fixed = False except Exception as g: log.critical( f'failed to parse date column {calls_df["date"]} ' f'with dt.strftime ex={f} and EPOCH EX={g}') return ae_consts.SUCCESS, pd.DataFrame([]) # if able to fix error or not if not_fixed: log.debug(f'{label} - {df_str} redis_key={redis_key} ' f'no calls df found or ex={f}') return ae_consts.SUCCESS, pd.DataFrame([]) # if unable to fix - return out log.error(f'{label} - {df_str} redis_key={redis_key} ' f'no calls df found or ex={f}') return ae_consts.SUCCESS, pd.DataFrame([]) # end of try/ex to convert to df if verbose: log.info( f'{label} - {df_str} redis_key={redis_key} ' f'calls={len(calls_df.index)} exp_date={exp_date_str}') else: if verbose: log.info(f'{label} - {df_str} did not find valid redis ' f'option calls in redis_key={redis_key} ' f'status={ae_consts.get_status(status=status)}') except Exception as e: if verbose: log.error( f'{label} - {df_str} - ds_id={ds_id} failed getting option ' f'calls from redis={redis_host}:{redis_port}@{redis_db} ' f'key={redis_key} ex={e}') return ae_consts.ERR, pd.DataFrame([]) # end of try/ex extract from redis if verbose: log.info( f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}') scrubbed_df = scrub_utils.extract_scrub_dataset( label=label, scrub_mode=scrub_mode, datafeed_type=df_type, msg_format='df={} date_str={}', ds_id=ds_id, df=calls_df) status = ae_consts.SUCCESS return status, scrubbed_df
def extract_option_puts_dataset(ticker=None, date=None, work_dict=None, scrub_mode='sort-by-date', verbose=False): """extract_option_puts_dataset Extract the TD options puts for a ticker and return a tuple (status, ``pandas.Dataframe``) .. code-block:: python import analysis_engine.td.extract_df_from_redis as td_extract # extract by historical date is also supported as an arg # date='2019-02-15' puts_status, puts_df = td_extract.extract_option_puts_dataset( ticker='SPY') print(puts_df) :param ticker: string ticker to extract :param date: optional - string date to extract formatted ``YYYY-MM-DD`` :param work_dict: dictionary of args :param scrub_mode: optional - string type of scrubbing handler to run :param verbose: optional - boolean for turning on logging """ label = 'extract_td_puts' latest_close_date = ae_utils.get_last_close_str() use_date = date if work_dict: if not ticker: ticker = work_dict.get('ticker', None) label = f'{work_dict.get("label", label)}' if not use_date: use_date = latest_close_date ds_id = ticker df_type = td_consts.DATAFEED_TD_PUTS df_str = td_consts.get_datafeed_str_td(df_type=df_type) redis_db = ae_consts.REDIS_DB redis_key = f'{ticker}_{use_date}_tdputs' redis_host, redis_port = ae_consts.get_redis_host_and_port(req=work_dict) redis_password = ae_consts.REDIS_PASSWORD s3_key = redis_key if work_dict: redis_db = work_dict.get('redis_db', redis_db) redis_password = work_dict.get('redis_password', redis_password) verbose = work_dict.get('verbose_td', verbose) if verbose: log.info(f'{label} - {df_str} - start - redis_key={redis_key} ' f's3_key={s3_key}') exp_date_str = None puts_df = None status = ae_consts.NOT_RUN try: redis_rec = redis_get.get_data_from_redis_key(label=label, host=redis_host, port=redis_port, db=redis_db, password=redis_password, key=redis_key, decompress_df=True) status = redis_rec['status'] if verbose: log.info(f'{label} - {df_str} redis get data key={redis_key} ' f'status={ae_consts.get_status(status=status)}') if status == ae_consts.SUCCESS: puts_json = None if 'tdputs' in redis_rec['rec']['data']: puts_json = redis_rec['rec']['data']['tdputs'] if 'puts' in redis_rec['rec']['data']: puts_json = redis_rec['rec']['data']['puts'] else: puts_json = redis_rec['rec']['data'] if not puts_json: return ae_consts.SUCCESS, pd.DataFrame([]) if verbose: log.info(f'{label} - {df_str} redis convert puts to df') try: puts_df = pd.read_json(puts_json, orient='records') if len(puts_df.index) == 0: return ae_consts.SUCCESS, pd.DataFrame([]) if 'date' not in puts_df: log.debug('failed to find date column in TD puts ' f'df={puts_df} len={len(puts_df.index)}') return ae_consts.SUCCESS, pd.DataFrame([]) puts_df.sort_values(by=['date', 'strike']) """ for i, r in calls_df.iterrows(): print(r['date']) convert_epochs = [ 'ask_date', 'bid_date', 'trade_date' ] for c in convert_epochs: if c in puts_df: puts_df[c] = pd.DatetimeIndex(pd.to_datetime( puts_df[c], format=ae_consts.COMMON_TICK_DATE_FORMAT )).tz_localize( 'UTC').tz_convert( 'US/Eastern') # dates converted """ exp_date_str = (puts_df['exp_date'].iloc[-1]) puts_df['date'] = puts_df['date'].dt.strftime( ae_consts.COMMON_TICK_DATE_FORMAT) except Exception: log.debug(f'{label} - {df_str} redis_key={redis_key} ' 'no puts df found') return ae_consts.SUCCESS, pd.DataFrame([]) # end of try/ex to convert to df if verbose: log.info(f'{label} - {df_str} redis_key={redis_key} ' f'puts={len(puts_df.index)} exp_date={exp_date_str}') else: if verbose: log.info(f'{label} - {df_str} did not find valid redis ' f'option puts in redis_key={redis_key} ' f'status={ae_consts.get_status(status=status)}') except Exception as e: if verbose: log.error( f'{label} - {df_str} - ds_id={ds_id} failed getting option ' f'puts from redis={redis_host}:{redis_port}@{redis_db} ' f'key={redis_key} ex={e}') return ae_consts.ERR, pd.DataFrame([]) # end of try/ex extract from redis if verbose: log.info( f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}') scrubbed_df = scrub_utils.extract_scrub_dataset( label=label, scrub_mode=scrub_mode, datafeed_type=df_type, msg_format='df={} date_str={}', ds_id=ds_id, df=puts_df) status = ae_consts.SUCCESS return status, scrubbed_df