def load_algo_dataset_from_redis(
        redis_key,
        redis_address,
        redis_db,
        redis_password,
        redis_expire=None,
        redis_serializer='json',
        serialize_datasets=DEFAULT_SERIALIZED_DATASETS,
        compress=False,
        encoding='utf-8'):
    """load_algo_dataset_from_redis

    Load an algorithm-ready dataset for algorithm backtesting
    from a redis key

    :param serialize_datasets: optional - list of dataset names to
        deserialize in the dataset
    :param compress: optional - boolean flag for decompressing
        the contents of the ``path_to_file`` if necessary
        (default is ``False`` and algorithms
        use ``zlib`` for compression)
    :param encoding: optional - string for data encoding
    """
    log.debug('start')
    data_from_file = None

    redis_host = redis_address.split(':')[0]
    redis_port = int(redis_address.split(':')[0])

    redis_res = redis_utils.get_data_from_redis_key(
        key=redis_key,
        host=redis_host,
        port=redis_port,
        db=redis_db,
        password=redis_password,
        expire=redis_expire,
        serializer=redis_serializer,
        encoding=encoding)

    if redis_res['status'] != SUCCESS:
        log.error('failed getting data from redis={}:{}/{}'.format(
            redis_address, redis_db, redis_key))
        return None

    data_from_file = redis_res['rec']['data']
    if not data_from_file:
        log.error('missing data from redis={}:{}/{}'.format(
            redis_address, redis_db, redis_key))
        return None

    return prepare_utils.prepare_dict_for_algo(data=data_from_file,
                                               compress=compress,
                                               convert_to_dict=True,
                                               encoding=encoding)
예제 #2
0
def build_df_from_redis(label=None,
                        client=None,
                        address=None,
                        host=None,
                        port=None,
                        password=None,
                        db=None,
                        key=None,
                        expire=None,
                        serializer='json',
                        encoding='utf-8',
                        orient='records'):
    """build_df_from_redis

    :param label: log tracking label
    :param client: initialized redis client
    :param address: redis address: <host:port>
    :param host: redis host
    :param port: redis port
    :param password: redis password
    :param db: redis db
    :param key: redis key
    :param expire: not used yet - redis expire
    :param serializer: support for future
                       pickle objects in redis
    :param encoding: format of the encoded key in redis
    :param orient: use the same orient value as
                   the ``to_json(orient='records')`` used
                   to deserialize the DataFrame correctly.
    """

    data = None
    valid_df = False
    df = None

    rec = {'valid_df': valid_df, 'data': data}
    res = build_result.build_result(status=NOT_RUN, err=None, rec=rec)

    log_id = label if label else 'build-df'

    try:
        log.debug('{} calling get redis key={}'.format(log_id, key))

        use_host = host
        use_port = port
        if not use_host and not use_port:
            if address:
                use_host = address.split(':')[0]
                use_port = int(address.split(':')[1])

        use_client = client
        if not use_client:
            log.debug('{} connecting to redis={}:{}@{}'.format(
                log_id, use_host, use_port, db))
            use_client = redis.Redis(host=use_host,
                                     port=use_port,
                                     password=password,
                                     db=db)

        redis_res = redis_get.get_data_from_redis_key(label=log_id,
                                                      client=use_client,
                                                      host=use_host,
                                                      port=use_port,
                                                      password=password,
                                                      db=db,
                                                      key=key,
                                                      expire=expire,
                                                      serializer='json',
                                                      encoding=encoding)

        valid_df = False
        if redis_res['status'] == SUCCESS:
            data = redis_res['rec'].get('data', None)
            if data:
                if ev('DEBUG_REDIS', '0') == '1':
                    log.info('{} - found key={} data={}'.format(
                        log_id, key, ppj(data)))
                else:
                    log.debug('{} - loading df from key={}'.format(
                        log_id, key))
                    df = pd.read_json(data, orient='records')
                    valid_df = True
            else:
                log.debug('{} key={} no data'.format(log_id, key))
            # if data

            rec['data'] = df
            rec['valid_df'] = valid_df

            res = build_result.build_result(status=SUCCESS, err=None, rec=rec)
            return res
        else:
            log.debug('{} no data key={}'.format(log_id, key))
            res = build_result.build_result(status=SUCCESS, err=None, rec=rec)
            return res
    except Exception as e:
        err = ('{} failed - build_df_from_redis data={} '
               'key={} ex={}'.format(log_id, (data == '0'), key, e))
        log.error(err)
        res = build_result.build_result(status=ERR, err=err, rec=rec)
    # end of try/ex for getting redis data

    return res
예제 #3
0
def restore_dataset(
        show_summary=True,
        force_restore=False,
        algo_dataset=None,
        dataset_type=SA_DATASET_TYPE_ALGO_READY,
        serialize_datasets=DEFAULT_SERIALIZED_DATASETS,
        path_to_file=None,
        compress=False,
        encoding='utf-8',
        redis_enabled=True,
        redis_key=None,
        redis_address=None,
        redis_db=None,
        redis_password=None,
        redis_expire=None,
        redis_serializer='json',
        redis_encoding='utf-8',
        redis_output_db=None,
        s3_enabled=True,
        s3_key=None,
        s3_address=None,
        s3_bucket=None,
        s3_access_key=None,
        s3_secret_key=None,
        s3_region_name=None,
        s3_secure=False,
        slack_enabled=False,
        slack_code_block=False,
        slack_full_width=False,
        verbose=False):
    """restore_dataset

    Restore missing dataset nodes in redis from an algorithm-ready
    dataset file on disk. Use this to restore redis from scratch.

    :param show_summary: optional - show a summary of the algorithm-ready
        dataset using ``analysis_engine.show_dataset.show_dataset``
        (default is ``True``)
    :param force_restore: optional - boolean - publish whatever is in
        the algorithm-ready dataset into redis. If ``False`` this will
        ensure that datasets are only set in redis if they are not already
        set
    :param algo_dataset: optional - already loaded algorithm-ready dataset
    :param dataset_type: optional - dataset type
        (default is ``SA_DATASET_TYPE_ALGO_READY``)
    :param serialize_datasets: optional - list of dataset names to
        deserialize in the dataset
    :param path_to_file: optional - path to an algorithm-ready dataset
        in a file
    :param compress: optional - boolean flag for decompressing
        the contents of the ``path_to_file`` if necessary
        (default is ``False`` and algorithms
        use ``zlib`` for compression)
    :param encoding: optional - string for data encoding

    **(Optional) Redis connectivity arguments**

    :param redis_enabled: bool - toggle for auto-caching all
        datasets in Redis
        (default is ``True``)
    :param redis_key: string - key to save the data in redis
        (default is ``None``)
    :param redis_address: Redis connection string format: ``host:port``
        (default is ``localhost:6379``)
    :param redis_db: Redis db to use
        (default is ``0``)
    :param redis_password: optional - Redis password
        (default is ``None``)
    :param redis_expire: optional - Redis expire value
        (default is ``None``)
    :param redis_serializer: not used yet - support for future
        pickle objects in redis
    :param redis_encoding: format of the encoded key in redis
    :param redis_output_db: optional - integer publish to a separate
        redis database

    **(Optional) Minio (S3) connectivity arguments**

    :param s3_enabled: bool - toggle for auto-archiving on Minio (S3)
        (default is ``True``)
    :param s3_key: string - key to save the data in redis
        (default is ``None``)
    :param s3_address: Minio S3 connection string format: ``host:port``
        (default is ``localhost:9000``)
    :param s3_bucket: S3 Bucket for storing the artifacts
        (default is ``dev``) which should be viewable on a browser:
        http://localhost:9000/minio/dev/
    :param s3_access_key: S3 Access key
        (default is ``trexaccesskey``)
    :param s3_secret_key: S3 Secret key
        (default is ``trex123321``)
    :param s3_region_name: S3 region name
        (default is ``us-east-1``)
    :param s3_secure: Transmit using tls encryption
        (default is ``False``)

    **(Optional) Slack arguments**

    :param slack_enabled: optional - boolean for
        publishing to slack
    :param slack_code_block: optional - boolean for
        publishing as a code black in slack
    :param slack_full_width: optional - boolean for
        publishing as a to slack using the full
        width allowed

    Additonal arguments

    :param verbose: optional - bool for increasing
        logging
    """

    use_ds = algo_dataset
    redis_host = REDIS_ADDRESS.split(':')[0]
    redis_port = int(REDIS_ADDRESS.split(':')[1])
    if redis_address:
        redis_host = redis_address.split(':')[0]
        redis_port = int(redis_address.split(':')[1])

    if show_summary:
        use_ds = show_dataset.show_dataset(
            dataset_type=dataset_type,
            compress=compress,
            encoding=redis_encoding,
            path_to_file=path_to_file,
            s3_key=s3_key,
            s3_address=s3_address,
            s3_bucket=s3_bucket,
            s3_access_key=s3_access_key,
            s3_secret_key=s3_secret_key,
            s3_region_name=s3_region_name,
            s3_secure=s3_secure,
            redis_key=redis_key,
            redis_address=redis_address,
            redis_db=redis_db,
            redis_password=redis_password,
            redis_expire=redis_expire,
            redis_serializer=redis_serializer,
            serialize_datasets=serialize_datasets)

    # end of if show_summary

    if not use_ds:
        log.info(
            'loading from file={} s3={} redis={}'.format(
                path_to_file,
                s3_key,
                redis_key))
        use_ds = load_dataset.load_dataset(
            dataset_type=dataset_type,
            compress=compress,
            encoding=redis_encoding,
            path_to_file=path_to_file,
            s3_key=s3_key,
            s3_address=s3_address,
            s3_bucket=s3_bucket,
            s3_access_key=s3_access_key,
            s3_secret_key=s3_secret_key,
            s3_region_name=s3_region_name,
            s3_secure=s3_secure,
            redis_key=redis_key,
            redis_address=redis_address,
            redis_db=redis_db,
            redis_password=redis_password,
            redis_expire=redis_expire,
            redis_serializer=redis_serializer,
            serialize_datasets=serialize_datasets)
    # load if not loaded

    if not use_ds:
        log.error(
            'unable to load a dataset from file={} '
            's3={} redis={}'.format(
                path_to_file,
                s3_key,
                redis_key))
        return None

    log.info('restore - start')
    total_to_restore = 0
    for ticker in use_ds:
        for ds_node in use_ds[ticker]:
            for ds_key in ds_node['data']:
                if ds_key in serialize_datasets:
                    total_to_restore += 1
    # end of counting total_to_restore

    log.info('restore - records={}'.format(total_to_restore))
    num_done = 0
    for ticker in use_ds:
        for ds_node in use_ds[ticker]:
            ds_parent_key = ds_node['id']
            log.info(
                'restore - parent_key={} - {} {}/{}'.format(
                    ds_parent_key,
                    get_percent_done(
                        progress=num_done,
                        total=total_to_restore),
                    num_done,
                    total_to_restore))
            if verbose:
                print(ds_parent_key)

            cache_res = redis_utils.get_data_from_redis_key(
                host=redis_host,
                port=redis_port,
                password=redis_password,
                db=redis_db,
                key=ds_parent_key,
                serializer=redis_serializer,
                encoding=redis_encoding,
                expire=redis_expire,
                label='restore-{}'.format(ds_parent_key))
            should_restore = False
            if (not force_restore and
                    cache_res['status'] == SUCCESS and
                    'data' in cache_res['rec'] and
                    cache_res['rec']['data'] and
                    len(cache_res['rec']['data']) > 10):
                should_restore = False
            else:
                should_restore = True
            if should_restore:
                log.info(
                    ' - parent {} restore'.format(
                        ds_parent_key))
                new_parent_rec = {
                    'exp_date': None,
                    'publish_pricing_update': None,
                    'date': ds_node['date'],
                    'updated': None,
                    'version': DATASET_COLLECTION_VERSION
                }
                for sname in serialize_datasets:
                    if sname in ds_node['data']:
                        if hasattr(
                                ds_node['data'][sname],
                                'index'):
                            new_parent_rec[sname] = \
                                ds_node['data'][sname].to_json(
                                    orient='records',
                                    date_format='iso')
                        else:
                            new_parent_rec[sname] = \
                                ds_node['data'][sname]

                publish.publish(
                    data=new_parent_rec,
                    convert_to_json=False,
                    compress=compress,
                    redis_enabled=True,
                    redis_key=ds_parent_key,
                    redis_db=redis_output_db,
                    redis_address=redis_address,
                    redis_password=redis_password,
                    redis_expire=redis_expire,
                    redis_serializer=redis_serializer,
                    redis_encoding=redis_encoding,
                    s3_enabled=False,
                    output_file=None,
                    verbose=verbose)

            for ds_key in ds_node['data']:
                if ds_key in serialize_datasets:
                    new_key = '{}_{}'.format(
                        ds_parent_key,
                        ds_key)
                    if hasattr(
                            ds_node['data'][ds_key],
                            'index'):
                        loaded_df = ds_node['data'][ds_key]
                        if len(loaded_df.index) > 0:
                            if verbose:
                                print(
                                    ' - checking: {}'.format(
                                        new_key))

                            cache_res = redis_utils.get_data_from_redis_key(
                                host=redis_host,
                                port=redis_port,
                                password=redis_password,
                                db=redis_db,
                                key=new_key,
                                serializer=redis_serializer,
                                encoding=redis_encoding,
                                expire=redis_expire,
                                label='restore-{}'.format(new_key))
                            should_restore = False
                            if (not force_restore and
                                    cache_res['status'] == SUCCESS and
                                    'data' in cache_res['rec'] and
                                    cache_res['rec']['data'] and
                                    len(cache_res['rec']['data']) > 10):
                                should_restore = False
                            else:
                                if (str(cache_res['rec']['data']) !=
                                        EMPTY_DF_STR):
                                    should_restore = True
                            if should_restore:
                                log.info(
                                    'restore nested dataset: {}'.format(
                                        ds_parent_key,
                                        new_key))
                                publish.publish(
                                    data=loaded_df,
                                    is_df=True,
                                    compress=compress,
                                    redis_enabled=True,
                                    redis_key=new_key,
                                    redis_db=redis_output_db,
                                    redis_address=redis_address,
                                    redis_password=redis_password,
                                    redis_expire=redis_expire,
                                    redis_serializer=redis_serializer,
                                    redis_encoding=redis_encoding,
                                    s3_enabled=False,
                                    output_file=None,
                                    verbose=verbose)
                            else:
                                if verbose:
                                    print(
                                        ' - checking: {} - SKIP'.format(
                                            new_key))

                        if verbose:
                            print(' - {} - no data to sync'.format(
                                new_key))
                    # end of is a dataframe
                    # else:
                    # end of handling dataframe vs dictionary

                    num_done += 1
        # end of for all datasets
        print('-----------------------------------')
    # end for all dataset to restore

    log.info(
        'restore - done - num_done={} total={}'.format(
            num_done,
            total_to_restore))

    return use_ds
def prepare_pricing_dataset(
        self,
        work_dict):
    """prepare_pricing_dataset

    Prepare dataset for analysis. Supports loading dataset from
    s3 if not found in redis. Outputs prepared artifact as a csv
    to s3 and redis.

    :param work_dict: dictionary for key/values
    """

    label = 'prepare'

    log.info(
        'task - {} - start '
        'work_dict={}'.format(
            label,
            work_dict))

    initial_data = None

    ticker = ae_consts.TICKER
    ticker_id = ae_consts.TICKER_ID
    rec = {
        'ticker': None,
        'ticker_id': None,
        's3_enabled': True,
        'redis_enabled': True,
        's3_bucket': None,
        's3_key': None,
        'redis_key': None,
        'prepared_s3_key': None,
        'prepared_s3_bucket': None,
        'prepared_redis_key': None,
        'prepared_data': None,
        'prepared_size': None,
        'initial_data': None,
        'initial_size': None,
        'ignore_columns': None,
        'updated': None
    }
    res = build_result.build_result(
        status=ae_consts.NOT_RUN,
        err=None,
        rec=rec)

    try:
        ticker = work_dict.get(
            'ticker',
            ae_consts.TICKER)
        ticker_id = int(work_dict.get(
            'ticker_id',
            ae_consts.TICKER_ID))

        if not ticker:
            res = build_result.build_result(
                status=ae_consts.ERR,
                err='missing ticker',
                rec=rec)
            return res

        label = work_dict.get(
            'label',
            label)
        s3_key = work_dict.get(
            's3_key',
            None)
        s3_bucket_name = work_dict.get(
            's3_bucket',
            'pricing')
        s3_access_key = work_dict.get(
            's3_access_key',
            ae_consts.S3_ACCESS_KEY)
        s3_secret_key = work_dict.get(
            's3_secret_key',
            ae_consts.S3_SECRET_KEY)
        s3_region_name = work_dict.get(
            's3_region_name',
            ae_consts.S3_REGION_NAME)
        s3_address = work_dict.get(
            's3_address',
            ae_consts.S3_ADDRESS)
        s3_secure = work_dict.get(
            's3_secure',
            ae_consts.S3_SECURE) == '1'
        redis_address = work_dict.get(
            'redis_address',
            ae_consts.REDIS_ADDRESS)
        redis_key = work_dict.get(
            'redis_key',
            ae_consts.REDIS_KEY)
        redis_password = work_dict.get(
            'redis_password',
            ae_consts.REDIS_PASSWORD)
        redis_db = work_dict.get(
            'redis_db',
            None)
        if not redis_db:
            redis_db = ae_consts.REDIS_DB
        redis_expire = None
        if 'redis_expire' in work_dict:
            redis_expire = work_dict.get(
                'redis_expire',
                ae_consts.REDIS_EXPIRE)
        updated = work_dict.get(
            'updated',
            datetime.datetime.utcnow().strftime(
                '%Y_%m_%d_%H_%M_%S'))
        prepared_s3_key = work_dict.get(
            'prepared_s3_key',
            '{}_{}.csv'.format(
                ticker,
                updated))
        prepared_s3_bucket = work_dict.get(
            'prepared_s3_bucket',
            'prepared')
        prepared_redis_key = work_dict.get(
            'prepared_redis_key',
            'prepared')
        ignore_columns = work_dict.get(
            'ignore_columns',
            None)
        log.info(
            '{} redis enabled address={}@{} '
            'key={} prepare_s3={}:{} prepare_redis={} '
            'ignore_columns={}'.format(
                label,
                redis_address,
                redis_db,
                redis_key,
                prepared_s3_bucket,
                prepared_s3_key,
                prepared_redis_key,
                ignore_columns))
        redis_host = redis_address.split(':')[0]
        redis_port = redis_address.split(':')[1]

        enable_s3 = True
        enable_redis_publish = True

        rec['ticker'] = ticker
        rec['ticker_id'] = ticker_id
        rec['s3_bucket'] = s3_bucket_name
        rec['s3_key'] = s3_key
        rec['redis_key'] = redis_key
        rec['prepared_s3_key'] = prepared_s3_key
        rec['prepared_s3_bucket'] = prepared_s3_bucket
        rec['prepared_redis_key'] = prepared_redis_key
        rec['updated'] = updated
        rec['s3_enabled'] = enable_s3
        rec['redis_enabled'] = enable_redis_publish

        try:
            log.info(
                '{} connecting redis={}:{} '
                'db={} key={} '
                'updated={} expire={}'.format(
                    label,
                    redis_host,
                    redis_port,
                    redis_db,
                    redis_key,
                    updated,
                    redis_expire))
            rc = redis.Redis(
                host=redis_host,
                port=redis_port,
                password=redis_password,
                db=redis_db)
        except Exception as e:
            err = (
                '{} failed - redis connection to address={}@{} '
                'key={} ex={}'.format(
                    label,
                    redis_address,
                    redis_key,
                    redis_db,
                    e))
            res = build_result.build_result(
                status=ae_consts.ERR,
                err=err,
                rec=rec)
            return res
        # end of try/ex for connecting to redis

        initial_data_res = redis_get.get_data_from_redis_key(
            label=label,
            client=rc,
            key=redis_key)

        log.info(
            '{} get redis key={} status={} err={}'.format(
                label,
                redis_key,
                ae_consts.get_status(initial_data_res['status']),
                initial_data_res['err']))

        initial_data = initial_data_res['rec'].get(
            'data',
            None)

        if enable_s3 and not initial_data:

            log.info(
                '{} failed to find redis_key={} trying s3 '
                'from s3_key={} s3_bucket={} s3_address={}'.format(
                    label,
                    redis_key,
                    s3_key,
                    s3_bucket_name,
                    s3_address))

            get_from_s3_req = \
                api_requests.build_publish_from_s3_to_redis_request()

            get_from_s3_req['s3_enabled'] = enable_s3
            get_from_s3_req['s3_access_key'] = s3_access_key
            get_from_s3_req['s3_secret_key'] = s3_secret_key
            get_from_s3_req['s3_region_name'] = s3_region_name
            get_from_s3_req['s3_address'] = s3_address
            get_from_s3_req['s3_secure'] = s3_secure
            get_from_s3_req['s3_key'] = s3_key
            get_from_s3_req['s3_bucket'] = s3_bucket_name
            get_from_s3_req['redis_key'] = redis_key
            get_from_s3_req['label'] = (
                '{}-run_publish_from_s3_to_redis'.format(
                    label))

            log.info(
                '{} load from s3={} to '
                'redis={}'.format(
                    label,
                    s3_key,
                    redis_key))

            try:
                # run in synchronous mode:
                get_from_s3_req['celery_disabled'] = True
                task_res = s3_to_redis.run_publish_from_s3_to_redis(
                    get_from_s3_req)
                if task_res.get(
                        'status',
                        ae_consts.ERR) == ae_consts.SUCCESS:
                    log.info(
                        '{} loaded s3={}:{} '
                        'to redis={} retrying'.format(
                            label,
                            s3_bucket_name,
                            s3_key,
                            redis_key))
                    initial_data_res = redis_get.get_data_from_redis_key(
                        label=label,
                        client=rc,
                        key=redis_key)

                    log.info(
                        '{} get redis try=2 key={} status={} err={}'.format(
                            label,
                            redis_key,
                            ae_consts.get_status(initial_data_res['status']),
                            initial_data_res['err']))

                    initial_data = initial_data_res['rec'].get(
                        'data',
                        None)
                else:
                    err = (
                        '{} ERR failed loading from bucket={} '
                        's3_key={} to redis_key={} with res={}'.format(
                            label,
                            s3_bucket_name,
                            s3_key,
                            redis_key,
                            task_res))
                    log.error(err)
                    res = build_result.build_result(
                        status=ae_consts.ERR,
                        err=err,
                        rec=rec)
                    return res
            except Exception as e:
                err = (
                    '{} extract from s3 and publish to redis failed loading '
                    'data from bucket={} in '
                    's3_key={} with publish to redis_key={} '
                    'with ex={}'.format(
                        label,
                        s3_bucket_name,
                        s3_key,
                        redis_key,
                        e))
                log.error(err)
                res = build_result.build_result(
                    status=ae_consts.ERR,
                    err=err,
                    rec=rec)
                return res
            # end of try/ex for publishing from s3->redis
        # end of if enable_s3

        if not initial_data:
            err = (
                '{} did not find any data to prepare in redis_key={} or '
                's3_key={} in bucket={}'.format(
                    label,
                    redis_key,
                    s3_key,
                    s3_bucket_name))
            log.error(err)
            res = build_result.build_result(
                status=ae_consts.ERR,
                err=err,
                rec=rec)
            return res

        initial_data_num_chars = len(str(initial_data))
        initial_size_value = None
        initial_size_str = None
        if initial_data_num_chars < ae_consts.PREPARE_DATA_MIN_SIZE:
            err = (
                '{} not enough data={} in redis_key={} or '
                's3_key={} in bucket={}'.format(
                    label,
                    initial_data_num_chars,
                    redis_key,
                    s3_key,
                    s3_bucket_name))
            log.error(err)
            res = build_result.build_result(
                status=ae_consts.ERR,
                err=err,
                rec=rec)
            return res
        else:
            initial_size_value = initial_data_num_chars / 1024000
            initial_size_str = ae_consts.to_f(initial_size_value)
            if ae_consts.ev('DEBUG_PREPARE', '0') == '1':
                log.info(
                    '{} initial - redis_key={} data={}'.format(
                        label,
                        redis_key,
                        str(initial_data)))
            else:
                log.info(
                    '{} initial - redis_key={} data size={} MB'.format(
                        label,
                        redis_key,
                        initial_size_str))
        # end of trying to get initial_data

        rec['initial_data'] = initial_data
        rec['initial_size'] = initial_data_num_chars

        prepare_data = None

        try:
            if ae_consts.ev('DEBUG_PREPARE', '0') == '1':
                log.info(
                    '{} data={} - flatten - {} MB from '
                    'redis_key={}'.format(
                        label,
                        ae_consts.ppj(initial_data),
                        initial_size_str,
                        redis_key))
            else:
                log.info(
                    '{} flatten - {} MB from '
                    'redis_key={}'.format(
                        label,
                        initial_size_str,
                        redis_key))
            prepare_data = dict_to_csv.flatten_dict(
                data=initial_data)
        except Exception as e:
            prepare_data = None
            err = (
                '{} flatten - convert to csv failed with ex={} '
                'redis_key={}'.format(
                    label,
                    e,
                    redis_key))
            log.error(err)
            res = build_result.build_result(
                status=ae_consts.ERR,
                err=err,
                rec=rec)
            return res
        # end of try/ex

        if not prepare_data:
            err = (
                '{} flatten - did not return any data from redis_key={} '
                'or s3_key={} in bucket={}'.format(
                    label,
                    redis_key,
                    s3_key,
                    s3_bucket_name))
            log.error(err)
            res = build_result.build_result(
                status=ae_consts.ERR,
                err=err,
                rec=rec)
            return res
        # end of prepare_data

        prepare_data_num_chars = len(str(prepare_data))
        prepare_size_value = None

        if prepare_data_num_chars < ae_consts.PREPARE_DATA_MIN_SIZE:
            err = (
                '{} prepare - there is not enough data={} in redis_key={}'
                ''.format(
                    label,
                    prepare_data_num_chars,
                    redis_key))
            log.error(err)
            res = build_result.build_result(
                status=ae_consts.ERR,
                err=err,
                rec=rec)
            return res
        else:
            prepare_size_value = prepare_data_num_chars / 1024000
            prepare_size_str = ae_consts.to_f(prepare_size_value)
            if ae_consts.ev('DEBUG_PREPARE', '0') == '1':
                log.info(
                    '{} data={} - prepare - redis_key={}'.format(
                        label,
                        redis_key,
                        ae_consts.ppj(prepare_data)))
            else:
                log.info(
                    '{} prepare - redis_key={} data size={} MB'.format(
                        label,
                        redis_key,
                        prepare_size_str))
        # end of trying to the size of the prepared data

        rec['prepared_data'] = prepare_data
        rec['prepared_size'] = prepare_data_num_chars

        res = build_result.build_result(
            status=ae_consts.SUCCESS,
            err=None,
            rec=rec)

        rc = None

    except Exception as e:
        res = build_result.build_result(
            status=ae_consts.ERR,
            err=(
                'failed - prepare_pricing_dataset '
                'dict={} with ex={}').format(
                    work_dict,
                    e),
            rec=rec)
        log.error(
            '{} - {}'.format(
                label,
                res['err']))
    # end of try/ex

    log.info(
        'task - prepare_pricing_dataset done - '
        '{} - status={}'.format(
            label,
            ae_consts.get_status(res['status'])))

    return get_task_results.get_task_results(
        work_dict=work_dict,
        result=res)
def extract_option_calls_dataset(
        work_dict,
        scrub_mode='sort-by-date'):
    """extract_option_calls_dataset

    Extract the TD options calls for a ticker and
    return it as a ``pandas.Dataframe``

    :param work_dict: dictionary of args
    :param scrub_mode: type of scrubbing handler to run
    """
    label = '{}'.format(work_dict.get('label', 'extract'))
    ds_id = work_dict.get('ticker')
    df_type = td_consts.DATAFEED_TD_CALLS
    df_str = td_consts.get_datafeed_str_td(df_type=df_type)
    redis_key = work_dict.get(
        'redis_key',
        work_dict.get('tdcalls', 'missing-redis-key'))
    s3_key = work_dict.get(
        's3_key',
        work_dict.get('tdcalls', 'missing-s3-key'))
    redis_host = work_dict.get(
        'redis_host',
        None)
    redis_port = work_dict.get(
        'redis_port',
        None)
    redis_db = work_dict.get(
        'redis_db',
        ae_consts.REDIS_DB)
    verbose = work_dict.get(
        'verbose_td',
        False)

    if verbose:
        log.info(
            '{} - {} - start - redis_key={} s3_key={}'.format(
                label,
                df_str,
                redis_key,
                s3_key))

    if not redis_host and not redis_port:
        redis_host = ae_consts.REDIS_ADDRESS.split(':')[0]
        redis_port = ae_consts.REDIS_ADDRESS.split(':')[1]

    exp_date_str = None
    calls_df = None
    status = ae_consts.NOT_RUN
    try:
        redis_rec = redis_get.get_data_from_redis_key(
            label=label,
            host=redis_host,
            port=redis_port,
            db=redis_db,
            password=work_dict.get('password', None),
            key=redis_key,
            decompress_df=True)

        status = redis_rec['status']
        if verbose:
            log.info(
                '{} - {} redis get data key={} status={}'.format(
                    label,
                    df_str,
                    redis_key,
                    ae_consts.get_status(status=status)))

        if status == ae_consts.SUCCESS:
            calls_json = None
            if 'calls' in redis_rec['rec']['data']:
                calls_json = redis_rec['rec']['data']['calls']
            else:
                calls_json = redis_rec['rec']['data']
            if verbose:
                log.info(
                    '{} - {} redis convert calls to df'.format(
                        label,
                        df_str))
            exp_date_str = None
            try:
                calls_df = pd.read_json(
                    calls_json,
                    orient='records')
                if len(calls_df.index) == 0:
                    return ae_consts.SUCCESS, None
                if 'date' not in calls_df:
                    log.debug(
                        'failed to find date column in TD calls '
                        'df={}'.format(
                            calls_df,
                            len(calls_df.index)))
                    return ae_consts.SUCCESS, None
                calls_df.sort_values(
                        by=[
                            'date',
                            'strike'
                        ])
                """
                for i, r in calls_df.iterrows():
                    print(r['date'])
                convert_epochs = [
                    'ask_date',
                    'bid_date',
                    'trade_date'
                ]
                for c in convert_epochs:
                    if c in calls_df:
                        calls_df[c] = pd.DatetimeIndex(pd.to_datetime(
                            calls_df[c],
                            format=ae_consts.COMMON_TICK_DATE_FORMAT
                        )).tz_localize(
                            'UTC').tz_convert(
                                'US/Eastern')
                # dates converted
                """
                exp_date_str = (
                    calls_df['exp_date'].iloc[-1])

                calls_df['date'] = calls_df['date'].dt.strftime(
                    ae_consts.COMMON_TICK_DATE_FORMAT)

            except Exception as f:
                log.error(
                    '{} - {} redis_key={} '
                    'no calls df found or ex={}'.format(
                        label,
                        df_str,
                        redis_key,
                        f))
                return ae_consts.EMPTY, None
            # end of try/ex to convert to df
            if verbose:
                log.info(
                    '{} - {} redis_key={} calls={} exp_date={}'.format(
                        label,
                        df_str,
                        redis_key,
                        len(calls_df.index),
                        exp_date_str))
        else:
            if verbose:
                log.info(
                    '{} - {} did not find valid redis option calls '
                    'in redis_key={} status={}'.format(
                        label,
                        df_str,
                        redis_key,
                        ae_consts.get_status(status=status)))

    except Exception as e:
        log.debug(
            '{} - {} - ds_id={} failed getting option calls from '
            'redis={}:{}@{} key={} ex={}'.format(
                label,
                df_str,
                ds_id,
                redis_host,
                redis_port,
                redis_db,
                redis_key,
                e))
        return ae_consts.ERR, None
    # end of try/ex extract from redis

    if verbose:
        log.info(
            '{} - {} ds_id={} extract scrub={}'.format(
                label,
                df_str,
                ds_id,
                scrub_mode))

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=calls_df)

    status = ae_consts.SUCCESS

    return status, scrubbed_df
def extract_pricing_dataset(
        work_dict,
        scrub_mode='sort-by-date'):
    """extract_pricing_dataset

    Extract the Yahoo pricing data for a ticker and
    return it as a pandas Dataframe

    :param work_dict: dictionary of args
    :param scrub_mode: type of scrubbing handler to run
    """
    label = work_dict.get('label', 'extract')
    ds_id = work_dict.get('ticker')
    df_type = DATAFEED_PRICING_YAHOO
    df_str = get_datafeed_str_yahoo(df_type=df_type)
    redis_key = work_dict.get(
        'redis_key',
        work_dict.get('pricing', 'missing-redis-key'))
    s3_key = work_dict.get(
        's3_key',
        work_dict.get('pricing', 'missing-s3-key'))
    redis_host = work_dict.get(
        'redis_host',
        None)
    redis_port = work_dict.get(
        'redis_port',
        None)
    redis_db = work_dict.get(
        'redis_db',
        REDIS_DB)

    log.debug(
        '{} - {} - start - redis_key={} s3_key={}'.format(
            label,
            df_str,
            redis_key,
            s3_key))

    if not redis_host and not redis_port:
        redis_host = REDIS_ADDRESS.split(':')[0]
        redis_port = REDIS_ADDRESS.split(':')[1]

    df = None
    status = NOT_RUN
    try:
        redis_rec = redis_get.get_data_from_redis_key(
            label=label,
            host=redis_host,
            port=redis_port,
            db=redis_db,
            password=work_dict.get('password', None),
            key=redis_key)

        status = redis_rec['status']
        log.debug(
            '{} - {} redis get data key={} status={}'.format(
                label,
                df_str,
                redis_key,
                get_status(status=status)))

        if status == SUCCESS:
            log.debug(
                '{} - {} redis convert pricing to json'.format(
                    label,
                    df_str))
            cached_dict = redis_rec['rec']['data']
            log.debug(
                '{} - {} redis convert pricing to df'.format(
                    label,
                    df_str))
            try:
                df = pd.DataFrame(
                    cached_dict,
                    index=[0])
            except Exception as f:
                log.debug(
                    '{} - {} redis_key={} '
                    'no pricing df found'.format(
                        label,
                        df_str,
                        redis_key))
                return EMPTY, None
            # end of try/ex to convert to df
            log.debug(
                '{} - {} redis_key={} done convert pricing to df'.format(
                    label,
                    df_str,
                    redis_key))
        else:
            log.debug(
                '{} - {} did not find valid redis pricing '
                'in redis_key={} status={}'.format(
                    label,
                    df_str,
                    redis_key,
                    get_status(status=status)))

    except Exception as e:
        log.debug(
            '{} - {} - ds_id={} failed getting pricing from '
            'redis={}:{}@{} key={} ex={}'.format(
                label,
                df_str,
                ds_id,
                redis_host,
                redis_port,
                redis_db,
                redis_key,
                e))
        return ERR, None
    # end of try/ex extract from redis

    log.debug(
        '{} - {} ds_id={} extract scrub={}'.format(
            label,
            df_str,
            ds_id,
            scrub_mode))

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=df)

    status = SUCCESS

    return status, scrubbed_df
def extract_option_puts_dataset(work_dict, scrub_mode='sort-by-date'):
    """extract_option_puts_dataset

    Extract the TD options puts for a ticker and
    return it as a ``pandas.Dataframe``

    :param work_dict: dictionary of args
    :param scrub_mode: type of scrubbing handler to run
    """
    label = f'{work_dict.get("label", "extract")}'
    ds_id = work_dict.get('ticker')
    df_type = td_consts.DATAFEED_TD_PUTS
    df_str = td_consts.get_datafeed_str_td(df_type=df_type)
    redis_key = work_dict.get('redis_key',
                              work_dict.get('tdputs', 'missing-redis-key'))
    s3_key = work_dict.get('s3_key', work_dict.get('tdputs', 'missing-s3-key'))
    redis_host = work_dict.get('redis_host', None)
    redis_port = work_dict.get('redis_port', None)
    redis_db = work_dict.get('redis_db', ae_consts.REDIS_DB)
    verbose = work_dict.get('verbose_td', False)

    if verbose:
        log.info(f'{label} - {df_str} - start - redis_key={redis_key} '
                 f's3_key={s3_key}')

    if not redis_host and not redis_port:
        redis_host = ae_consts.REDIS_ADDRESS.split(':')[0]
        redis_port = ae_consts.REDIS_ADDRESS.split(':')[1]

    exp_date_str = None
    puts_df = None
    status = ae_consts.NOT_RUN
    try:
        redis_rec = redis_get.get_data_from_redis_key(label=label,
                                                      host=redis_host,
                                                      port=redis_port,
                                                      db=redis_db,
                                                      password=work_dict.get(
                                                          'password', None),
                                                      key=redis_key,
                                                      decompress_df=True)

        status = redis_rec['status']
        if verbose:
            log.info(f'{label} - {df_str} redis get data key={redis_key} '
                     f'status={ae_consts.get_status(status=status)}')

        if status == ae_consts.SUCCESS:
            puts_json = None
            if 'puts' in redis_rec['rec']['data']:
                puts_json = redis_rec['rec']['data']['puts']
            else:
                puts_json = redis_rec['rec']['data']
            if verbose:
                log.info(f'{label} - {df_str} redis convert puts to df')
            try:
                puts_df = pd.read_json(puts_json, orient='records')
                if len(puts_df.index) == 0:
                    return ae_consts.SUCCESS, None
                if 'date' not in puts_df:
                    log.debug('failed to find date column in TD puts '
                              f'df={puts_df} len={len(puts_df.index)}')
                    return ae_consts.SUCCESS, None
                puts_df.sort_values(by=['date', 'strike'])
                """
                for i, r in calls_df.iterrows():
                    print(r['date'])
                convert_epochs = [
                    'ask_date',
                    'bid_date',
                    'trade_date'
                ]
                for c in convert_epochs:
                    if c in puts_df:
                        puts_df[c] = pd.DatetimeIndex(pd.to_datetime(
                            puts_df[c],
                            format=ae_consts.COMMON_TICK_DATE_FORMAT
                        )).tz_localize(
                            'UTC').tz_convert(
                                'US/Eastern')
                # dates converted
                """
                exp_date_str = (puts_df['exp_date'].iloc[-1])

                puts_df['date'] = puts_df['date'].dt.strftime(
                    ae_consts.COMMON_TICK_DATE_FORMAT)

            except Exception:
                log.debug(f'{label} - {df_str} redis_key={redis_key} '
                          'no puts df found')
                return ae_consts.EMPTY, None
            # end of try/ex to convert to df
            if verbose:
                log.info(f'{label} - {df_str} redis_key={redis_key} '
                         f'puts={len(puts_df.index)} exp_date={exp_date_str}')
        else:
            if verbose:
                log.info(f'{label} - {df_str} did not find valid redis '
                         f'option puts in redis_key={redis_key} '
                         f'status={ae_consts.get_status(status=status)}')

    except Exception as e:
        log.debug(f'{label} - {df_str} - ds_id={ds_id} failed getting option '
                  f'puts from redis={redis_host}:{redis_port}@{redis_db} '
                  f'key={redis_key} ex={e}')
        return ae_consts.ERR, None
    # end of try/ex extract from redis

    if verbose:
        log.info(
            f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}')

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=puts_df)

    status = ae_consts.SUCCESS

    return status, scrubbed_df
def extract_option_puts_dataset(
        work_dict,
        scrub_mode='sort-by-date'):
    """extract_option_puts_dataset

    Extract the Yahoo options puts for a ticker and
    return it as a ``pandas.Dataframe``

    :param work_dict: dictionary of args
    :param scrub_mode: type of scrubbing handler to run
    """
    label = '{}-puts'.format(work_dict.get('label', 'extract'))
    ds_id = work_dict.get('ticker')
    df_type = DATAFEED_OPTIONS_YAHOO
    df_str = get_datafeed_str_yahoo(df_type=df_type)
    redis_key = work_dict.get(
        'redis_key',
        work_dict.get('options', 'missing-redis-key'))
    s3_key = work_dict.get(
        's3_key',
        work_dict.get('options', 'missing-s3-key'))
    redis_host = work_dict.get(
        'redis_host',
        None)
    redis_port = work_dict.get(
        'redis_port',
        None)
    redis_db = work_dict.get(
        'redis_db',
        REDIS_DB)

    log.debug(
        '{} - {} - start - redis_key={} s3_key={}'.format(
            label,
            df_str,
            redis_key,
            s3_key))

    if not redis_host and not redis_port:
        redis_host = REDIS_ADDRESS.split(':')[0]
        redis_port = REDIS_ADDRESS.split(':')[1]

    exp_date_str = None
    puts_df = None
    status = NOT_RUN
    try:
        redis_rec = redis_get.get_data_from_redis_key(
            label=label,
            host=redis_host,
            port=redis_port,
            db=redis_db,
            password=work_dict.get('password', None),
            key=redis_key)

        status = redis_rec['status']
        log.debug(
            '{} - {} redis get data key={} status={}'.format(
                label,
                df_str,
                redis_key,
                get_status(status=status)))

        if status == SUCCESS:
            exp_date_str = redis_rec['rec']['data']['exp_date']
            puts_json = redis_rec['rec']['data']['puts']
            log.debug(
                '{} - {} redis convert puts to df'.format(
                    label,
                    df_str))
            try:
                puts_df = pd.read_json(
                    puts_json,
                    orient='records')
            except Exception as f:
                log.debug(
                    '{} - {} redis_key={} '
                    'no puts df found'.format(
                        label,
                        df_str,
                        redis_key))
                return EMPTY, None
            # end of try/ex to convert to df
            log.debug(
                '{} - {} redis_key={} puts={} exp_date={}'.format(
                    label,
                    df_str,
                    redis_key,
                    len(puts_df.index),
                    exp_date_str))
        else:
            log.debug(
                '{} - {} did not find valid redis option puts '
                'in redis_key={} status={}'.format(
                    label,
                    df_str,
                    redis_key,
                    get_status(status=status)))

    except Exception as e:
        log.debug(
            '{} - {} - ds_id={} failed getting option puts from '
            'redis={}:{}@{} key={} ex={}'.format(
                label,
                df_str,
                ds_id,
                redis_host,
                redis_port,
                redis_db,
                redis_key,
                e))
        return ERR, None
    # end of try/ex extract from redis

    log.debug(
        '{} - {} ds_id={} extract scrub={}'.format(
            label,
            df_str,
            ds_id,
            scrub_mode))

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=puts_df)

    status = SUCCESS

    return status, scrubbed_df
예제 #9
0
def extract_pricing_dataset(work_dict, scrub_mode='sort-by-date'):
    """extract_pricing_dataset

    Extract the Yahoo pricing data for a ticker and
    return it as a pandas Dataframe

    :param work_dict: dictionary of args
    :param scrub_mode: type of scrubbing handler to run
    """
    label = work_dict.get('label', 'extract')
    ds_id = work_dict.get('ticker')
    df_type = yahoo_consts.DATAFEED_PRICING_YAHOO
    df_str = yahoo_consts.get_datafeed_str_yahoo(df_type=df_type)
    redis_key = work_dict.get('redis_key',
                              work_dict.get('pricing', 'missing-redis-key'))
    s3_key = work_dict.get('s3_key', work_dict.get('pricing',
                                                   'missing-s3-key'))
    redis_host = work_dict.get('redis_host', None)
    redis_port = work_dict.get('redis_port', None)
    redis_db = work_dict.get('redis_db', ae_consts.REDIS_DB)

    log.debug(
        f'{label} - {df_str} - start - redis_key={redis_key} s3_key={s3_key}')

    if not redis_host and not redis_port:
        redis_host = ae_consts.REDIS_ADDRESS.split(':')[0]
        redis_port = ae_consts.REDIS_ADDRESS.split(':')[1]

    df = None
    status = ae_consts.NOT_RUN
    try:
        redis_rec = redis_get.get_data_from_redis_key(label=label,
                                                      host=redis_host,
                                                      port=redis_port,
                                                      db=redis_db,
                                                      password=work_dict.get(
                                                          'password', None),
                                                      key=redis_key,
                                                      decompress_df=True)

        status = redis_rec['status']
        log.debug(f'{label} - {df_str} redis get data key={redis_key} '
                  f'status={ae_consts.get_status(status=status)}')

        if status == ae_consts.SUCCESS:
            log.debug(f'{label} - {df_str} redis convert pricing to json')
            cached_dict = redis_rec['rec']['data']
            log.debug(f'{label} - {df_str} redis convert pricing to df')
            try:
                df = pd.DataFrame(cached_dict, index=[0])
            except Exception:
                log.debug(f'{label} - {df_str} redis_key={redis_key} '
                          'no pricing df found')
                return ae_consts.EMPTY, None
            # end of try/ex to convert to df
            log.debug(f'{label} - {df_str} redis_key={redis_key} done '
                      'convert pricing to df')
        else:
            log.debug(f'{label} - {df_str} did not find valid redis pricing '
                      f'in redis_key={redis_key} '
                      f'status={ae_consts.get_status(status=status)}')

    except Exception as e:
        log.debug(
            f'{label} - {df_str} - ds_id={ds_id} failed getting pricing from '
            f'redis={redis_host}:{redis_port}@{redis_db} '
            f'key={redis_key} ex={e}')
        return ae_consts.ERR, None
    # end of try/ex extract from redis

    log.debug(f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}')

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=df)

    status = ae_consts.SUCCESS

    return status, scrubbed_df
예제 #10
0
def extract_option_puts_dataset(work_dict, scrub_mode='sort-by-date'):
    """extract_option_puts_dataset

    Extract the Yahoo options puts for a ticker and
    return it as a ``pandas.Dataframe``

    :param work_dict: dictionary of args
    :param scrub_mode: type of scrubbing handler to run
    """
    label = f'{work_dict.get("label", "extract")}-puts'
    ds_id = work_dict.get('ticker')
    df_type = yahoo_consts.DATAFEED_OPTIONS_YAHOO
    df_str = yahoo_consts.get_datafeed_str_yahoo(df_type=df_type)
    redis_key = work_dict.get('redis_key',
                              work_dict.get('puts', 'missing-redis-key'))
    s3_key = work_dict.get('s3_key', work_dict.get('puts', 'missing-s3-key'))
    redis_host = work_dict.get('redis_host', None)
    redis_port = work_dict.get('redis_port', None)
    redis_db = work_dict.get('redis_db', ae_consts.REDIS_DB)

    log.debug(
        f'{label} - {df_str} - start - redis_key={redis_key} s3_key={s3_key}')

    if not redis_host and not redis_port:
        redis_host = ae_consts.REDIS_ADDRESS.split(':')[0]
        redis_port = ae_consts.REDIS_ADDRESS.split(':')[1]

    exp_date_str = None
    puts_df = None
    status = ae_consts.NOT_RUN
    try:
        redis_rec = redis_get.get_data_from_redis_key(label=label,
                                                      host=redis_host,
                                                      port=redis_port,
                                                      db=redis_db,
                                                      password=work_dict.get(
                                                          'password', None),
                                                      key=redis_key,
                                                      decompress_df=True)

        status = redis_rec['status']
        log.debug(f'{label} - {df_str} redis get data key={redis_key} '
                  f'status={ae_consts.get_status(status=status)}')

        if status == ae_consts.SUCCESS:
            puts_json = None
            if 'puts' in redis_rec['rec']['data']:
                puts_json = redis_rec['rec']['data']['puts']
            else:
                puts_json = redis_rec['rec']['data']
            log.debug(f'{label} - {df_str} redis convert puts to df')
            try:
                puts_df = pd.read_json(puts_json, orient='records')
                exp_epoch_value = puts_df['expiration'].iloc[-1]
                exp_date_str = ae_utils.convert_epoch_to_datetime_string(
                    epoch=exp_epoch_value,
                    fmt=ae_consts.COMMON_DATE_FORMAT,
                    use_utc=True)
            except Exception:
                log.debug(f'{label} - {df_str} redis_key={redis_key} '
                          'no puts df found')
                return ae_consts.EMPTY, None
            # end of try/ex to convert to df
            log.debug(f'{label} - {df_str} redis_key={redis_key} '
                      f'puts={len(puts_df.index)} exp_date={exp_date_str}')
        else:
            log.debug(
                f'{label} - {df_str} did not find valid redis option puts '
                f'in redis_key={redis_key} '
                f'status={ae_consts.get_status(status=status)}')

    except Exception as e:
        log.debug(
            f'{label} - {df_str} - ds_id={ds_id} failed getting option puts '
            f'from redis={redis_host}:{redis_port}@{redis_db} '
            f'key={redis_key} ex={e}')
        return ae_consts.ERR, None
    # end of try/ex extract from redis

    log.debug(f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}')

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=puts_df)

    status = ae_consts.SUCCESS

    return status, scrubbed_df
예제 #11
0
def extract_option_calls_dataset(ticker=None,
                                 date=None,
                                 work_dict=None,
                                 scrub_mode='sort-by-date',
                                 verbose=False):
    """extract_option_calls_dataset

    Extract the TD options calls for a ticker and
    return a tuple (status, ``pandas.Dataframe``)

    .. code-block:: python

        import analysis_engine.td.extract_df_from_redis as td_extract

        # extract by historical date is also supported as an arg
        # date='2019-02-15'
        calls_status, calls_df = td_extract.extract_option_calls_dataset(
            ticker='SPY')
        print(calls_df)

    :param ticker: string ticker to extract
    :param date: optional - string date to extract
        formatted ``YYYY-MM-DD``
    :param work_dict: dictionary of args
    :param scrub_mode: optional - string type of
        scrubbing handler to run
    :param verbose: optional - boolean for turning on logging
    """
    label = 'extract_td_calls'
    latest_close_date = ae_utils.get_last_close_str()
    use_date = date
    if work_dict:
        if not ticker:
            ticker = work_dict.get('ticker', None)
        label = f'{work_dict.get("label", label)}'
    if not use_date:
        use_date = latest_close_date

    ds_id = ticker
    df_type = td_consts.DATAFEED_TD_CALLS
    df_str = td_consts.get_datafeed_str_td(df_type=df_type)
    redis_db = ae_consts.REDIS_DB
    redis_key = f'{ticker}_{use_date}_tdcalls'
    redis_host, redis_port = ae_consts.get_redis_host_and_port(req=work_dict)
    redis_password = ae_consts.REDIS_PASSWORD
    s3_key = redis_key

    if work_dict:
        redis_db = work_dict.get('redis_db', redis_db)
        redis_password = work_dict.get('redis_password', redis_password)
        verbose = work_dict.get('verbose_td', verbose)

    if verbose:
        log.info(f'{label} - {df_str} - start - redis_key={redis_key} '
                 f's3_key={s3_key}')

    exp_date_str = None
    calls_df = None
    status = ae_consts.NOT_RUN
    try:
        redis_rec = redis_get.get_data_from_redis_key(label=label,
                                                      host=redis_host,
                                                      port=redis_port,
                                                      db=redis_db,
                                                      password=redis_password,
                                                      key=redis_key,
                                                      decompress_df=True)

        status = redis_rec['status']
        if verbose:
            log.info(f'{label} - {df_str} redis get data key={redis_key} '
                     f'status={ae_consts.get_status(status=status)}')

        if status == ae_consts.SUCCESS:
            calls_json = None
            if 'tdcalls' in redis_rec['rec']['data']:
                calls_json = redis_rec['rec']['data']['tdcalls']
            elif 'calls' in redis_rec['rec']['data']:
                calls_json = redis_rec['rec']['data']['calls']
            else:
                calls_json = redis_rec['rec']['data']
            if not calls_json:
                return ae_consts.SUCCESS, pd.DataFrame([])
            if verbose:
                log.info(f'{label} - {df_str} redis convert calls to df')
            exp_date_str = None
            try:
                calls_df = pd.read_json(calls_json, orient='records')
                if len(calls_df.index) == 0:
                    return ae_consts.SUCCESS, pd.DataFrame([])
                if 'date' not in calls_df:
                    if verbose:
                        log.error(
                            'failed to find date column in TD calls '
                            f'df={calls_df} from lens={len(calls_df.index)}')
                    return ae_consts.SUCCESS, pd.DataFrame([])
                calls_df.sort_values(by=['date', 'strike'])
                """
                for i, r in calls_df.iterrows():
                    print(r['date'])
                convert_epochs = [
                    'ask_date',
                    'bid_date',
                    'trade_date'
                ]
                for c in convert_epochs:
                    if c in calls_df:
                        calls_df[c] = pd.DatetimeIndex(pd.to_datetime(
                            calls_df[c],
                            format=ae_consts.COMMON_TICK_DATE_FORMAT
                        )).tz_localize(
                            'UTC').tz_convert(
                                'US/Eastern')
                # dates converted
                """
                exp_date_str = (calls_df['exp_date'].iloc[-1])

                calls_df['date'] = calls_df['date'].dt.strftime(
                    ae_consts.COMMON_TICK_DATE_FORMAT)

            except Exception as f:
                not_fixed = True
                if ('Can only use .dt accessor with '
                        'datetimelike values') in str(f):
                    try:
                        log.critical(f'fixing dates in {redis_key}')
                        # remove epoch second data and
                        # use only the millisecond date values
                        bad_date = ae_consts.EPOCH_MINIMUM_DATE
                        calls_df['date'][calls_df['date'] < bad_date] = None
                        calls_df = calls_df.dropna(axis=0, how='any')
                        fmt = ae_consts.COMMON_TICK_DATE_FORMAT
                        calls_df['date'] = pd.to_datetime(
                            calls_df['date'], unit='ms').dt.strftime(fmt)
                        not_fixed = False
                    except Exception as g:
                        log.critical(
                            f'failed to parse date column {calls_df["date"]} '
                            f'with dt.strftime ex={f} and EPOCH EX={g}')
                        return ae_consts.SUCCESS, pd.DataFrame([])
                # if able to fix error or not

                if not_fixed:
                    log.debug(f'{label} - {df_str} redis_key={redis_key} '
                              f'no calls df found or ex={f}')
                    return ae_consts.SUCCESS, pd.DataFrame([])
                # if unable to fix - return out

                log.error(f'{label} - {df_str} redis_key={redis_key} '
                          f'no calls df found or ex={f}')
                return ae_consts.SUCCESS, pd.DataFrame([])
            # end of try/ex to convert to df
            if verbose:
                log.info(
                    f'{label} - {df_str} redis_key={redis_key} '
                    f'calls={len(calls_df.index)} exp_date={exp_date_str}')
        else:
            if verbose:
                log.info(f'{label} - {df_str} did not find valid redis '
                         f'option calls in redis_key={redis_key} '
                         f'status={ae_consts.get_status(status=status)}')

    except Exception as e:
        if verbose:
            log.error(
                f'{label} - {df_str} - ds_id={ds_id} failed getting option '
                f'calls from redis={redis_host}:{redis_port}@{redis_db} '
                f'key={redis_key} ex={e}')
        return ae_consts.ERR, pd.DataFrame([])
    # end of try/ex extract from redis

    if verbose:
        log.info(
            f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}')

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=calls_df)

    status = ae_consts.SUCCESS

    return status, scrubbed_df
예제 #12
0
def extract_option_puts_dataset(ticker=None,
                                date=None,
                                work_dict=None,
                                scrub_mode='sort-by-date',
                                verbose=False):
    """extract_option_puts_dataset

    Extract the TD options puts for a ticker and
    return a tuple (status, ``pandas.Dataframe``)

    .. code-block:: python

        import analysis_engine.td.extract_df_from_redis as td_extract

        # extract by historical date is also supported as an arg
        # date='2019-02-15'
        puts_status, puts_df = td_extract.extract_option_puts_dataset(
            ticker='SPY')
        print(puts_df)

    :param ticker: string ticker to extract
    :param date: optional - string date to extract
        formatted ``YYYY-MM-DD``
    :param work_dict: dictionary of args
    :param scrub_mode: optional - string type of
        scrubbing handler to run
    :param verbose: optional - boolean for turning on logging
    """
    label = 'extract_td_puts'
    latest_close_date = ae_utils.get_last_close_str()
    use_date = date
    if work_dict:
        if not ticker:
            ticker = work_dict.get('ticker', None)
        label = f'{work_dict.get("label", label)}'
    if not use_date:
        use_date = latest_close_date

    ds_id = ticker
    df_type = td_consts.DATAFEED_TD_PUTS
    df_str = td_consts.get_datafeed_str_td(df_type=df_type)
    redis_db = ae_consts.REDIS_DB
    redis_key = f'{ticker}_{use_date}_tdputs'
    redis_host, redis_port = ae_consts.get_redis_host_and_port(req=work_dict)
    redis_password = ae_consts.REDIS_PASSWORD
    s3_key = redis_key

    if work_dict:
        redis_db = work_dict.get('redis_db', redis_db)
        redis_password = work_dict.get('redis_password', redis_password)
        verbose = work_dict.get('verbose_td', verbose)

    if verbose:
        log.info(f'{label} - {df_str} - start - redis_key={redis_key} '
                 f's3_key={s3_key}')

    exp_date_str = None
    puts_df = None
    status = ae_consts.NOT_RUN
    try:
        redis_rec = redis_get.get_data_from_redis_key(label=label,
                                                      host=redis_host,
                                                      port=redis_port,
                                                      db=redis_db,
                                                      password=redis_password,
                                                      key=redis_key,
                                                      decompress_df=True)

        status = redis_rec['status']
        if verbose:
            log.info(f'{label} - {df_str} redis get data key={redis_key} '
                     f'status={ae_consts.get_status(status=status)}')

        if status == ae_consts.SUCCESS:
            puts_json = None
            if 'tdputs' in redis_rec['rec']['data']:
                puts_json = redis_rec['rec']['data']['tdputs']
            if 'puts' in redis_rec['rec']['data']:
                puts_json = redis_rec['rec']['data']['puts']
            else:
                puts_json = redis_rec['rec']['data']
            if not puts_json:
                return ae_consts.SUCCESS, pd.DataFrame([])
            if verbose:
                log.info(f'{label} - {df_str} redis convert puts to df')
            try:
                puts_df = pd.read_json(puts_json, orient='records')
                if len(puts_df.index) == 0:
                    return ae_consts.SUCCESS, pd.DataFrame([])
                if 'date' not in puts_df:
                    log.debug('failed to find date column in TD puts '
                              f'df={puts_df} len={len(puts_df.index)}')
                    return ae_consts.SUCCESS, pd.DataFrame([])
                puts_df.sort_values(by=['date', 'strike'])
                """
                for i, r in calls_df.iterrows():
                    print(r['date'])
                convert_epochs = [
                    'ask_date',
                    'bid_date',
                    'trade_date'
                ]
                for c in convert_epochs:
                    if c in puts_df:
                        puts_df[c] = pd.DatetimeIndex(pd.to_datetime(
                            puts_df[c],
                            format=ae_consts.COMMON_TICK_DATE_FORMAT
                        )).tz_localize(
                            'UTC').tz_convert(
                                'US/Eastern')
                # dates converted
                """
                exp_date_str = (puts_df['exp_date'].iloc[-1])

                puts_df['date'] = puts_df['date'].dt.strftime(
                    ae_consts.COMMON_TICK_DATE_FORMAT)

            except Exception:
                log.debug(f'{label} - {df_str} redis_key={redis_key} '
                          'no puts df found')
                return ae_consts.SUCCESS, pd.DataFrame([])
            # end of try/ex to convert to df
            if verbose:
                log.info(f'{label} - {df_str} redis_key={redis_key} '
                         f'puts={len(puts_df.index)} exp_date={exp_date_str}')
        else:
            if verbose:
                log.info(f'{label} - {df_str} did not find valid redis '
                         f'option puts in redis_key={redis_key} '
                         f'status={ae_consts.get_status(status=status)}')

    except Exception as e:
        if verbose:
            log.error(
                f'{label} - {df_str} - ds_id={ds_id} failed getting option '
                f'puts from redis={redis_host}:{redis_port}@{redis_db} '
                f'key={redis_key} ex={e}')
        return ae_consts.ERR, pd.DataFrame([])
    # end of try/ex extract from redis

    if verbose:
        log.info(
            f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}')

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=puts_df)

    status = ae_consts.SUCCESS

    return status, scrubbed_df