Ejemplo n.º 1
0
def perform_extract(df_type,
                    df_str,
                    work_dict,
                    dataset_id_key='ticker',
                    scrub_mode='sort-by-date'):
    """perform_extract

    Helper for extracting from Redis or S3

    :param df_type: datafeed type enum
    :param ds_str: dataset string name
    :param work_dict: incoming work request dictionary
    :param dataset_id_key: configurable dataset identifier
                           key for tracking scrubbing and
                           debugging errors
    :param scrub_mode: scrubbing mode on extraction for
                       one-off cleanup before analysis
    """
    status = FAILED
    ds_id = work_dict.get(dataset_id_key, None)
    label = work_dict.get('label', 'extract')
    s3_bucket = work_dict.get('s3_bucket', S3_BUCKET)
    s3_key = work_dict.get('s3_key', S3_KEY)
    redis_key = work_dict.get('redis_key', REDIS_KEY)
    s3_enabled = work_dict.get('s3_enabled', ENABLED_S3_UPLOAD)
    s3_access_key = work_dict.get('s3_access_key', S3_ACCESS_KEY)
    s3_secret_key = work_dict.get('s3_secret_key', S3_SECRET_KEY)
    s3_region_name = work_dict.get('s3_region_name', S3_REGION_NAME)
    s3_address = work_dict.get('s3_address', S3_ADDRESS)
    s3_secure = work_dict.get('s3_secure', S3_SECURE)
    redis_address = work_dict.get('redis_address', REDIS_ADDRESS)
    redis_password = work_dict.get('redis_password', REDIS_PASSWORD)
    redis_db = work_dict.get('redis_db', REDIS_DB)
    redis_expire = work_dict.get('redis_expire', REDIS_EXPIRE)

    log.debug('{} - {} - START - ds_id={} scrub_mode={} '
              'redis_address={}@{} redis_key={} '
              's3={} s3_address={} s3_bucket={} s3_key={}'.format(
                  label, df_str, ds_id, scrub_mode, redis_address, redis_db,
                  redis_key, s3_enabled, s3_address, s3_bucket, s3_key))

    if ev('DEBUG_REDIS_EXTRACT', '0') == '1':
        log.info('{} - {} - ds_id={} redis '
                 'pw={} expire={}'.format(label, df_str, ds_id, redis_password,
                                          redis_expire))

    if ev('DEBUG_S3_EXTRACT', '0') == '1':
        log.info('{} - {} - ds_id={} s3 '
                 'ak={} sk={} region={} secure={}'.format(
                     label, df_str, ds_id, s3_access_key, s3_secret_key,
                     s3_region_name, s3_secure))

    extract_res = None
    try:
        extract_res = build_df.build_df_from_redis(label=label,
                                                   address=redis_address,
                                                   db=redis_db,
                                                   key=redis_key)
    except Exception as e:
        extract_res = None
        log.error('{} - {} - ds_id={} failed extract from '
                  'redis={}@{} key={} ex={}'.format(label, df_str, ds_id,
                                                    redis_address, redis_db,
                                                    redis_key, e))
    # end of try/ex extract from redis

    if not extract_res:
        return status, None

    valid_df = (extract_res['status'] == SUCCESS
                and extract_res['rec']['valid_df'])

    if not valid_df:
        if ev('DEBUG_S3_EXTRACT', '0') == '1':
            log.error('{} - {} ds_id={} invalid df '
                      'status={} extract_res={}'.format(
                          label, df_str, ds_id,
                          get_status(status=extract_res['status']),
                          extract_res))
        return status, None

    extract_df = extract_res['rec']['data']

    log.debug('{} - {} ds_id={} extract scrub={}'.format(
        label, df_str, ds_id, scrub_mode))

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=extract_df)

    status = SUCCESS

    return status, scrubbed_df
def extract_option_puts_dataset(work_dict, scrub_mode='sort-by-date'):
    """extract_option_puts_dataset

    Extract the TD options puts for a ticker and
    return it as a ``pandas.Dataframe``

    :param work_dict: dictionary of args
    :param scrub_mode: type of scrubbing handler to run
    """
    label = f'{work_dict.get("label", "extract")}'
    ds_id = work_dict.get('ticker')
    df_type = td_consts.DATAFEED_TD_PUTS
    df_str = td_consts.get_datafeed_str_td(df_type=df_type)
    redis_key = work_dict.get('redis_key',
                              work_dict.get('tdputs', 'missing-redis-key'))
    s3_key = work_dict.get('s3_key', work_dict.get('tdputs', 'missing-s3-key'))
    redis_host = work_dict.get('redis_host', None)
    redis_port = work_dict.get('redis_port', None)
    redis_db = work_dict.get('redis_db', ae_consts.REDIS_DB)
    verbose = work_dict.get('verbose_td', False)

    if verbose:
        log.info(f'{label} - {df_str} - start - redis_key={redis_key} '
                 f's3_key={s3_key}')

    if not redis_host and not redis_port:
        redis_host = ae_consts.REDIS_ADDRESS.split(':')[0]
        redis_port = ae_consts.REDIS_ADDRESS.split(':')[1]

    exp_date_str = None
    puts_df = None
    status = ae_consts.NOT_RUN
    try:
        redis_rec = redis_get.get_data_from_redis_key(label=label,
                                                      host=redis_host,
                                                      port=redis_port,
                                                      db=redis_db,
                                                      password=work_dict.get(
                                                          'password', None),
                                                      key=redis_key,
                                                      decompress_df=True)

        status = redis_rec['status']
        if verbose:
            log.info(f'{label} - {df_str} redis get data key={redis_key} '
                     f'status={ae_consts.get_status(status=status)}')

        if status == ae_consts.SUCCESS:
            puts_json = None
            if 'puts' in redis_rec['rec']['data']:
                puts_json = redis_rec['rec']['data']['puts']
            else:
                puts_json = redis_rec['rec']['data']
            if verbose:
                log.info(f'{label} - {df_str} redis convert puts to df')
            try:
                puts_df = pd.read_json(puts_json, orient='records')
                if len(puts_df.index) == 0:
                    return ae_consts.SUCCESS, None
                if 'date' not in puts_df:
                    log.debug('failed to find date column in TD puts '
                              f'df={puts_df} len={len(puts_df.index)}')
                    return ae_consts.SUCCESS, None
                puts_df.sort_values(by=['date', 'strike'])
                """
                for i, r in calls_df.iterrows():
                    print(r['date'])
                convert_epochs = [
                    'ask_date',
                    'bid_date',
                    'trade_date'
                ]
                for c in convert_epochs:
                    if c in puts_df:
                        puts_df[c] = pd.DatetimeIndex(pd.to_datetime(
                            puts_df[c],
                            format=ae_consts.COMMON_TICK_DATE_FORMAT
                        )).tz_localize(
                            'UTC').tz_convert(
                                'US/Eastern')
                # dates converted
                """
                exp_date_str = (puts_df['exp_date'].iloc[-1])

                puts_df['date'] = puts_df['date'].dt.strftime(
                    ae_consts.COMMON_TICK_DATE_FORMAT)

            except Exception:
                log.debug(f'{label} - {df_str} redis_key={redis_key} '
                          'no puts df found')
                return ae_consts.EMPTY, None
            # end of try/ex to convert to df
            if verbose:
                log.info(f'{label} - {df_str} redis_key={redis_key} '
                         f'puts={len(puts_df.index)} exp_date={exp_date_str}')
        else:
            if verbose:
                log.info(f'{label} - {df_str} did not find valid redis '
                         f'option puts in redis_key={redis_key} '
                         f'status={ae_consts.get_status(status=status)}')

    except Exception as e:
        log.debug(f'{label} - {df_str} - ds_id={ds_id} failed getting option '
                  f'puts from redis={redis_host}:{redis_port}@{redis_db} '
                  f'key={redis_key} ex={e}')
        return ae_consts.ERR, None
    # end of try/ex extract from redis

    if verbose:
        log.info(
            f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}')

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=puts_df)

    status = ae_consts.SUCCESS

    return status, scrubbed_df
def extract_option_calls_dataset(
        work_dict,
        scrub_mode='sort-by-date'):
    """extract_option_calls_dataset

    Extract the TD options calls for a ticker and
    return it as a ``pandas.Dataframe``

    :param work_dict: dictionary of args
    :param scrub_mode: type of scrubbing handler to run
    """
    label = '{}'.format(work_dict.get('label', 'extract'))
    ds_id = work_dict.get('ticker')
    df_type = td_consts.DATAFEED_TD_CALLS
    df_str = td_consts.get_datafeed_str_td(df_type=df_type)
    redis_key = work_dict.get(
        'redis_key',
        work_dict.get('tdcalls', 'missing-redis-key'))
    s3_key = work_dict.get(
        's3_key',
        work_dict.get('tdcalls', 'missing-s3-key'))
    redis_host = work_dict.get(
        'redis_host',
        None)
    redis_port = work_dict.get(
        'redis_port',
        None)
    redis_db = work_dict.get(
        'redis_db',
        ae_consts.REDIS_DB)
    verbose = work_dict.get(
        'verbose_td',
        False)

    if verbose:
        log.info(
            '{} - {} - start - redis_key={} s3_key={}'.format(
                label,
                df_str,
                redis_key,
                s3_key))

    if not redis_host and not redis_port:
        redis_host = ae_consts.REDIS_ADDRESS.split(':')[0]
        redis_port = ae_consts.REDIS_ADDRESS.split(':')[1]

    exp_date_str = None
    calls_df = None
    status = ae_consts.NOT_RUN
    try:
        redis_rec = redis_get.get_data_from_redis_key(
            label=label,
            host=redis_host,
            port=redis_port,
            db=redis_db,
            password=work_dict.get('password', None),
            key=redis_key,
            decompress_df=True)

        status = redis_rec['status']
        if verbose:
            log.info(
                '{} - {} redis get data key={} status={}'.format(
                    label,
                    df_str,
                    redis_key,
                    ae_consts.get_status(status=status)))

        if status == ae_consts.SUCCESS:
            calls_json = None
            if 'calls' in redis_rec['rec']['data']:
                calls_json = redis_rec['rec']['data']['calls']
            else:
                calls_json = redis_rec['rec']['data']
            if verbose:
                log.info(
                    '{} - {} redis convert calls to df'.format(
                        label,
                        df_str))
            exp_date_str = None
            try:
                calls_df = pd.read_json(
                    calls_json,
                    orient='records')
                if len(calls_df.index) == 0:
                    return ae_consts.SUCCESS, None
                if 'date' not in calls_df:
                    log.debug(
                        'failed to find date column in TD calls '
                        'df={}'.format(
                            calls_df,
                            len(calls_df.index)))
                    return ae_consts.SUCCESS, None
                calls_df.sort_values(
                        by=[
                            'date',
                            'strike'
                        ])
                """
                for i, r in calls_df.iterrows():
                    print(r['date'])
                convert_epochs = [
                    'ask_date',
                    'bid_date',
                    'trade_date'
                ]
                for c in convert_epochs:
                    if c in calls_df:
                        calls_df[c] = pd.DatetimeIndex(pd.to_datetime(
                            calls_df[c],
                            format=ae_consts.COMMON_TICK_DATE_FORMAT
                        )).tz_localize(
                            'UTC').tz_convert(
                                'US/Eastern')
                # dates converted
                """
                exp_date_str = (
                    calls_df['exp_date'].iloc[-1])

                calls_df['date'] = calls_df['date'].dt.strftime(
                    ae_consts.COMMON_TICK_DATE_FORMAT)

            except Exception as f:
                log.error(
                    '{} - {} redis_key={} '
                    'no calls df found or ex={}'.format(
                        label,
                        df_str,
                        redis_key,
                        f))
                return ae_consts.EMPTY, None
            # end of try/ex to convert to df
            if verbose:
                log.info(
                    '{} - {} redis_key={} calls={} exp_date={}'.format(
                        label,
                        df_str,
                        redis_key,
                        len(calls_df.index),
                        exp_date_str))
        else:
            if verbose:
                log.info(
                    '{} - {} did not find valid redis option calls '
                    'in redis_key={} status={}'.format(
                        label,
                        df_str,
                        redis_key,
                        ae_consts.get_status(status=status)))

    except Exception as e:
        log.debug(
            '{} - {} - ds_id={} failed getting option calls from '
            'redis={}:{}@{} key={} ex={}'.format(
                label,
                df_str,
                ds_id,
                redis_host,
                redis_port,
                redis_db,
                redis_key,
                e))
        return ae_consts.ERR, None
    # end of try/ex extract from redis

    if verbose:
        log.info(
            '{} - {} ds_id={} extract scrub={}'.format(
                label,
                df_str,
                ds_id,
                scrub_mode))

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=calls_df)

    status = ae_consts.SUCCESS

    return status, scrubbed_df
def extract_option_puts_dataset(
        work_dict,
        scrub_mode='sort-by-date'):
    """extract_option_puts_dataset

    Extract the Yahoo options puts for a ticker and
    return it as a ``pandas.Dataframe``

    :param work_dict: dictionary of args
    :param scrub_mode: type of scrubbing handler to run
    """
    label = '{}-puts'.format(work_dict.get('label', 'extract'))
    ds_id = work_dict.get('ticker')
    df_type = DATAFEED_OPTIONS_YAHOO
    df_str = get_datafeed_str_yahoo(df_type=df_type)
    redis_key = work_dict.get(
        'redis_key',
        work_dict.get('options', 'missing-redis-key'))
    s3_key = work_dict.get(
        's3_key',
        work_dict.get('options', 'missing-s3-key'))
    redis_host = work_dict.get(
        'redis_host',
        None)
    redis_port = work_dict.get(
        'redis_port',
        None)
    redis_db = work_dict.get(
        'redis_db',
        REDIS_DB)

    log.debug(
        '{} - {} - start - redis_key={} s3_key={}'.format(
            label,
            df_str,
            redis_key,
            s3_key))

    if not redis_host and not redis_port:
        redis_host = REDIS_ADDRESS.split(':')[0]
        redis_port = REDIS_ADDRESS.split(':')[1]

    exp_date_str = None
    puts_df = None
    status = NOT_RUN
    try:
        redis_rec = redis_get.get_data_from_redis_key(
            label=label,
            host=redis_host,
            port=redis_port,
            db=redis_db,
            password=work_dict.get('password', None),
            key=redis_key)

        status = redis_rec['status']
        log.debug(
            '{} - {} redis get data key={} status={}'.format(
                label,
                df_str,
                redis_key,
                get_status(status=status)))

        if status == SUCCESS:
            exp_date_str = redis_rec['rec']['data']['exp_date']
            puts_json = redis_rec['rec']['data']['puts']
            log.debug(
                '{} - {} redis convert puts to df'.format(
                    label,
                    df_str))
            try:
                puts_df = pd.read_json(
                    puts_json,
                    orient='records')
            except Exception as f:
                log.debug(
                    '{} - {} redis_key={} '
                    'no puts df found'.format(
                        label,
                        df_str,
                        redis_key))
                return EMPTY, None
            # end of try/ex to convert to df
            log.debug(
                '{} - {} redis_key={} puts={} exp_date={}'.format(
                    label,
                    df_str,
                    redis_key,
                    len(puts_df.index),
                    exp_date_str))
        else:
            log.debug(
                '{} - {} did not find valid redis option puts '
                'in redis_key={} status={}'.format(
                    label,
                    df_str,
                    redis_key,
                    get_status(status=status)))

    except Exception as e:
        log.debug(
            '{} - {} - ds_id={} failed getting option puts from '
            'redis={}:{}@{} key={} ex={}'.format(
                label,
                df_str,
                ds_id,
                redis_host,
                redis_port,
                redis_db,
                redis_key,
                e))
        return ERR, None
    # end of try/ex extract from redis

    log.debug(
        '{} - {} ds_id={} extract scrub={}'.format(
            label,
            df_str,
            ds_id,
            scrub_mode))

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=puts_df)

    status = SUCCESS

    return status, scrubbed_df
def extract_pricing_dataset(
        work_dict,
        scrub_mode='sort-by-date'):
    """extract_pricing_dataset

    Extract the Yahoo pricing data for a ticker and
    return it as a pandas Dataframe

    :param work_dict: dictionary of args
    :param scrub_mode: type of scrubbing handler to run
    """
    label = work_dict.get('label', 'extract')
    ds_id = work_dict.get('ticker')
    df_type = DATAFEED_PRICING_YAHOO
    df_str = get_datafeed_str_yahoo(df_type=df_type)
    redis_key = work_dict.get(
        'redis_key',
        work_dict.get('pricing', 'missing-redis-key'))
    s3_key = work_dict.get(
        's3_key',
        work_dict.get('pricing', 'missing-s3-key'))
    redis_host = work_dict.get(
        'redis_host',
        None)
    redis_port = work_dict.get(
        'redis_port',
        None)
    redis_db = work_dict.get(
        'redis_db',
        REDIS_DB)

    log.debug(
        '{} - {} - start - redis_key={} s3_key={}'.format(
            label,
            df_str,
            redis_key,
            s3_key))

    if not redis_host and not redis_port:
        redis_host = REDIS_ADDRESS.split(':')[0]
        redis_port = REDIS_ADDRESS.split(':')[1]

    df = None
    status = NOT_RUN
    try:
        redis_rec = redis_get.get_data_from_redis_key(
            label=label,
            host=redis_host,
            port=redis_port,
            db=redis_db,
            password=work_dict.get('password', None),
            key=redis_key)

        status = redis_rec['status']
        log.debug(
            '{} - {} redis get data key={} status={}'.format(
                label,
                df_str,
                redis_key,
                get_status(status=status)))

        if status == SUCCESS:
            log.debug(
                '{} - {} redis convert pricing to json'.format(
                    label,
                    df_str))
            cached_dict = redis_rec['rec']['data']
            log.debug(
                '{} - {} redis convert pricing to df'.format(
                    label,
                    df_str))
            try:
                df = pd.DataFrame(
                    cached_dict,
                    index=[0])
            except Exception as f:
                log.debug(
                    '{} - {} redis_key={} '
                    'no pricing df found'.format(
                        label,
                        df_str,
                        redis_key))
                return EMPTY, None
            # end of try/ex to convert to df
            log.debug(
                '{} - {} redis_key={} done convert pricing to df'.format(
                    label,
                    df_str,
                    redis_key))
        else:
            log.debug(
                '{} - {} did not find valid redis pricing '
                'in redis_key={} status={}'.format(
                    label,
                    df_str,
                    redis_key,
                    get_status(status=status)))

    except Exception as e:
        log.debug(
            '{} - {} - ds_id={} failed getting pricing from '
            'redis={}:{}@{} key={} ex={}'.format(
                label,
                df_str,
                ds_id,
                redis_host,
                redis_port,
                redis_db,
                redis_key,
                e))
        return ERR, None
    # end of try/ex extract from redis

    log.debug(
        '{} - {} ds_id={} extract scrub={}'.format(
            label,
            df_str,
            ds_id,
            scrub_mode))

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=df)

    status = SUCCESS

    return status, scrubbed_df
Ejemplo n.º 6
0
def extract_pricing_dataset(work_dict, scrub_mode='sort-by-date'):
    """extract_pricing_dataset

    Extract the Yahoo pricing data for a ticker and
    return it as a pandas Dataframe

    :param work_dict: dictionary of args
    :param scrub_mode: type of scrubbing handler to run
    """
    label = work_dict.get('label', 'extract')
    ds_id = work_dict.get('ticker')
    df_type = yahoo_consts.DATAFEED_PRICING_YAHOO
    df_str = yahoo_consts.get_datafeed_str_yahoo(df_type=df_type)
    redis_key = work_dict.get('redis_key',
                              work_dict.get('pricing', 'missing-redis-key'))
    s3_key = work_dict.get('s3_key', work_dict.get('pricing',
                                                   'missing-s3-key'))
    redis_host = work_dict.get('redis_host', None)
    redis_port = work_dict.get('redis_port', None)
    redis_db = work_dict.get('redis_db', ae_consts.REDIS_DB)

    log.debug(
        f'{label} - {df_str} - start - redis_key={redis_key} s3_key={s3_key}')

    if not redis_host and not redis_port:
        redis_host = ae_consts.REDIS_ADDRESS.split(':')[0]
        redis_port = ae_consts.REDIS_ADDRESS.split(':')[1]

    df = None
    status = ae_consts.NOT_RUN
    try:
        redis_rec = redis_get.get_data_from_redis_key(label=label,
                                                      host=redis_host,
                                                      port=redis_port,
                                                      db=redis_db,
                                                      password=work_dict.get(
                                                          'password', None),
                                                      key=redis_key,
                                                      decompress_df=True)

        status = redis_rec['status']
        log.debug(f'{label} - {df_str} redis get data key={redis_key} '
                  f'status={ae_consts.get_status(status=status)}')

        if status == ae_consts.SUCCESS:
            log.debug(f'{label} - {df_str} redis convert pricing to json')
            cached_dict = redis_rec['rec']['data']
            log.debug(f'{label} - {df_str} redis convert pricing to df')
            try:
                df = pd.DataFrame(cached_dict, index=[0])
            except Exception:
                log.debug(f'{label} - {df_str} redis_key={redis_key} '
                          'no pricing df found')
                return ae_consts.EMPTY, None
            # end of try/ex to convert to df
            log.debug(f'{label} - {df_str} redis_key={redis_key} done '
                      'convert pricing to df')
        else:
            log.debug(f'{label} - {df_str} did not find valid redis pricing '
                      f'in redis_key={redis_key} '
                      f'status={ae_consts.get_status(status=status)}')

    except Exception as e:
        log.debug(
            f'{label} - {df_str} - ds_id={ds_id} failed getting pricing from '
            f'redis={redis_host}:{redis_port}@{redis_db} '
            f'key={redis_key} ex={e}')
        return ae_consts.ERR, None
    # end of try/ex extract from redis

    log.debug(f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}')

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=df)

    status = ae_consts.SUCCESS

    return status, scrubbed_df
Ejemplo n.º 7
0
def extract_option_puts_dataset(work_dict, scrub_mode='sort-by-date'):
    """extract_option_puts_dataset

    Extract the Yahoo options puts for a ticker and
    return it as a ``pandas.Dataframe``

    :param work_dict: dictionary of args
    :param scrub_mode: type of scrubbing handler to run
    """
    label = f'{work_dict.get("label", "extract")}-puts'
    ds_id = work_dict.get('ticker')
    df_type = yahoo_consts.DATAFEED_OPTIONS_YAHOO
    df_str = yahoo_consts.get_datafeed_str_yahoo(df_type=df_type)
    redis_key = work_dict.get('redis_key',
                              work_dict.get('puts', 'missing-redis-key'))
    s3_key = work_dict.get('s3_key', work_dict.get('puts', 'missing-s3-key'))
    redis_host = work_dict.get('redis_host', None)
    redis_port = work_dict.get('redis_port', None)
    redis_db = work_dict.get('redis_db', ae_consts.REDIS_DB)

    log.debug(
        f'{label} - {df_str} - start - redis_key={redis_key} s3_key={s3_key}')

    if not redis_host and not redis_port:
        redis_host = ae_consts.REDIS_ADDRESS.split(':')[0]
        redis_port = ae_consts.REDIS_ADDRESS.split(':')[1]

    exp_date_str = None
    puts_df = None
    status = ae_consts.NOT_RUN
    try:
        redis_rec = redis_get.get_data_from_redis_key(label=label,
                                                      host=redis_host,
                                                      port=redis_port,
                                                      db=redis_db,
                                                      password=work_dict.get(
                                                          'password', None),
                                                      key=redis_key,
                                                      decompress_df=True)

        status = redis_rec['status']
        log.debug(f'{label} - {df_str} redis get data key={redis_key} '
                  f'status={ae_consts.get_status(status=status)}')

        if status == ae_consts.SUCCESS:
            puts_json = None
            if 'puts' in redis_rec['rec']['data']:
                puts_json = redis_rec['rec']['data']['puts']
            else:
                puts_json = redis_rec['rec']['data']
            log.debug(f'{label} - {df_str} redis convert puts to df')
            try:
                puts_df = pd.read_json(puts_json, orient='records')
                exp_epoch_value = puts_df['expiration'].iloc[-1]
                exp_date_str = ae_utils.convert_epoch_to_datetime_string(
                    epoch=exp_epoch_value,
                    fmt=ae_consts.COMMON_DATE_FORMAT,
                    use_utc=True)
            except Exception:
                log.debug(f'{label} - {df_str} redis_key={redis_key} '
                          'no puts df found')
                return ae_consts.EMPTY, None
            # end of try/ex to convert to df
            log.debug(f'{label} - {df_str} redis_key={redis_key} '
                      f'puts={len(puts_df.index)} exp_date={exp_date_str}')
        else:
            log.debug(
                f'{label} - {df_str} did not find valid redis option puts '
                f'in redis_key={redis_key} '
                f'status={ae_consts.get_status(status=status)}')

    except Exception as e:
        log.debug(
            f'{label} - {df_str} - ds_id={ds_id} failed getting option puts '
            f'from redis={redis_host}:{redis_port}@{redis_db} '
            f'key={redis_key} ex={e}')
        return ae_consts.ERR, None
    # end of try/ex extract from redis

    log.debug(f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}')

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=puts_df)

    status = ae_consts.SUCCESS

    return status, scrubbed_df
def perform_extract(df_type,
                    df_str,
                    work_dict,
                    dataset_id_key='ticker',
                    scrub_mode='sort-by-date',
                    verbose=False):
    """perform_extract

    Helper for extracting from Redis or S3

    :param df_type: datafeed type enum
    :param ds_str: dataset string name
    :param work_dict: incoming work request dictionary
    :param dataset_id_key: configurable dataset identifier
                           key for tracking scrubbing and
                           debugging errors
    :param scrub_mode: scrubbing mode on extraction for
                       one-off cleanup before analysis
    :param verbose: optional - boolean for turning on logging
    """
    status = ae_consts.FAILED
    ds_id = work_dict.get(dataset_id_key, None)
    label = work_dict.get('label', 'extract')
    s3_bucket = work_dict.get('s3_bucket', ae_consts.S3_BUCKET)
    s3_key = work_dict.get('s3_key', ae_consts.S3_KEY)
    redis_key = work_dict.get('redis_key', ae_consts.REDIS_KEY)
    s3_enabled = work_dict.get('s3_enabled', ae_consts.ENABLED_S3_UPLOAD)
    s3_access_key = work_dict.get('s3_access_key', ae_consts.S3_ACCESS_KEY)
    s3_secret_key = work_dict.get('s3_secret_key', ae_consts.S3_SECRET_KEY)
    s3_region_name = work_dict.get('s3_region_name', ae_consts.S3_REGION_NAME)
    s3_address = work_dict.get('s3_address', ae_consts.S3_ADDRESS)
    s3_secure = work_dict.get('s3_secure', ae_consts.S3_SECURE)
    redis_address = work_dict.get('redis_address', ae_consts.REDIS_ADDRESS)
    redis_password = work_dict.get('redis_password', ae_consts.REDIS_PASSWORD)
    redis_db = work_dict.get('redis_db', ae_consts.REDIS_DB)
    redis_expire = work_dict.get('redis_expire', ae_consts.REDIS_EXPIRE)

    if verbose:
        log.info(
            f'{label} - {df_str} - START - '
            f'ds_id={ds_id} scrub_mode={scrub_mode} '
            f'redis_address={redis_address}@{redis_db} redis_key={redis_key} '
            f's3={s3_enabled} s3_address={s3_address} s3_bucket={s3_bucket} '
            f's3_key={s3_key}')

    if verbose or ae_consts.ev('DEBUG_REDIS_EXTRACT', '0') == '1':
        log.info(f'{label} - {df_str} - ds_id={ds_id} redis '
                 f'pw={redis_password} expire={redis_expire}')

    if verbose or ae_consts.ev('DEBUG_S3_EXTRACT', '0') == '1':
        log.info(f'{label} - {df_str} - ds_id={ds_id} s3 '
                 f'ak={s3_access_key} sk={s3_secret_key} '
                 f'region={s3_region_name} secure={s3_secure}')

    extract_res = None
    try:
        extract_res = build_df.build_df_from_redis(label=label,
                                                   address=redis_address,
                                                   db=redis_db,
                                                   key=redis_key,
                                                   verbose=verbose)
    except Exception as e:
        extract_res = None
        log.error(f'{label} - {df_str} - ds_id={ds_id} failed extract from '
                  f'redis={redis_address}@{redis_db} key={redis_key} ex={e}')
    # end of try/ex extract from redis

    if not extract_res:
        return status, None

    valid_df = (extract_res['status'] == ae_consts.SUCCESS
                and extract_res['rec']['valid_df'])

    if not valid_df:
        if verbose or ae_consts.ev('DEBUG_S3_EXTRACT', '0') == '1':
            log.error(
                f'{label} - {df_str} ds_id={ds_id} invalid df '
                f'status={ae_consts.get_status(status=extract_res["status"])} '
                f'extract_res={extract_res}')
        return status, None

    extract_df = extract_res['rec']['data']

    if verbose:
        log.info(
            f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}')

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=extract_df)

    status = ae_consts.SUCCESS

    return status, scrubbed_df
Ejemplo n.º 9
0
def extract_option_calls_dataset(ticker=None,
                                 date=None,
                                 work_dict=None,
                                 scrub_mode='sort-by-date',
                                 verbose=False):
    """extract_option_calls_dataset

    Extract the TD options calls for a ticker and
    return a tuple (status, ``pandas.Dataframe``)

    .. code-block:: python

        import analysis_engine.td.extract_df_from_redis as td_extract

        # extract by historical date is also supported as an arg
        # date='2019-02-15'
        calls_status, calls_df = td_extract.extract_option_calls_dataset(
            ticker='SPY')
        print(calls_df)

    :param ticker: string ticker to extract
    :param date: optional - string date to extract
        formatted ``YYYY-MM-DD``
    :param work_dict: dictionary of args
    :param scrub_mode: optional - string type of
        scrubbing handler to run
    :param verbose: optional - boolean for turning on logging
    """
    label = 'extract_td_calls'
    latest_close_date = ae_utils.get_last_close_str()
    use_date = date
    if work_dict:
        if not ticker:
            ticker = work_dict.get('ticker', None)
        label = f'{work_dict.get("label", label)}'
    if not use_date:
        use_date = latest_close_date

    ds_id = ticker
    df_type = td_consts.DATAFEED_TD_CALLS
    df_str = td_consts.get_datafeed_str_td(df_type=df_type)
    redis_db = ae_consts.REDIS_DB
    redis_key = f'{ticker}_{use_date}_tdcalls'
    redis_host, redis_port = ae_consts.get_redis_host_and_port(req=work_dict)
    redis_password = ae_consts.REDIS_PASSWORD
    s3_key = redis_key

    if work_dict:
        redis_db = work_dict.get('redis_db', redis_db)
        redis_password = work_dict.get('redis_password', redis_password)
        verbose = work_dict.get('verbose_td', verbose)

    if verbose:
        log.info(f'{label} - {df_str} - start - redis_key={redis_key} '
                 f's3_key={s3_key}')

    exp_date_str = None
    calls_df = None
    status = ae_consts.NOT_RUN
    try:
        redis_rec = redis_get.get_data_from_redis_key(label=label,
                                                      host=redis_host,
                                                      port=redis_port,
                                                      db=redis_db,
                                                      password=redis_password,
                                                      key=redis_key,
                                                      decompress_df=True)

        status = redis_rec['status']
        if verbose:
            log.info(f'{label} - {df_str} redis get data key={redis_key} '
                     f'status={ae_consts.get_status(status=status)}')

        if status == ae_consts.SUCCESS:
            calls_json = None
            if 'tdcalls' in redis_rec['rec']['data']:
                calls_json = redis_rec['rec']['data']['tdcalls']
            elif 'calls' in redis_rec['rec']['data']:
                calls_json = redis_rec['rec']['data']['calls']
            else:
                calls_json = redis_rec['rec']['data']
            if not calls_json:
                return ae_consts.SUCCESS, pd.DataFrame([])
            if verbose:
                log.info(f'{label} - {df_str} redis convert calls to df')
            exp_date_str = None
            try:
                calls_df = pd.read_json(calls_json, orient='records')
                if len(calls_df.index) == 0:
                    return ae_consts.SUCCESS, pd.DataFrame([])
                if 'date' not in calls_df:
                    if verbose:
                        log.error(
                            'failed to find date column in TD calls '
                            f'df={calls_df} from lens={len(calls_df.index)}')
                    return ae_consts.SUCCESS, pd.DataFrame([])
                calls_df.sort_values(by=['date', 'strike'])
                """
                for i, r in calls_df.iterrows():
                    print(r['date'])
                convert_epochs = [
                    'ask_date',
                    'bid_date',
                    'trade_date'
                ]
                for c in convert_epochs:
                    if c in calls_df:
                        calls_df[c] = pd.DatetimeIndex(pd.to_datetime(
                            calls_df[c],
                            format=ae_consts.COMMON_TICK_DATE_FORMAT
                        )).tz_localize(
                            'UTC').tz_convert(
                                'US/Eastern')
                # dates converted
                """
                exp_date_str = (calls_df['exp_date'].iloc[-1])

                calls_df['date'] = calls_df['date'].dt.strftime(
                    ae_consts.COMMON_TICK_DATE_FORMAT)

            except Exception as f:
                not_fixed = True
                if ('Can only use .dt accessor with '
                        'datetimelike values') in str(f):
                    try:
                        log.critical(f'fixing dates in {redis_key}')
                        # remove epoch second data and
                        # use only the millisecond date values
                        bad_date = ae_consts.EPOCH_MINIMUM_DATE
                        calls_df['date'][calls_df['date'] < bad_date] = None
                        calls_df = calls_df.dropna(axis=0, how='any')
                        fmt = ae_consts.COMMON_TICK_DATE_FORMAT
                        calls_df['date'] = pd.to_datetime(
                            calls_df['date'], unit='ms').dt.strftime(fmt)
                        not_fixed = False
                    except Exception as g:
                        log.critical(
                            f'failed to parse date column {calls_df["date"]} '
                            f'with dt.strftime ex={f} and EPOCH EX={g}')
                        return ae_consts.SUCCESS, pd.DataFrame([])
                # if able to fix error or not

                if not_fixed:
                    log.debug(f'{label} - {df_str} redis_key={redis_key} '
                              f'no calls df found or ex={f}')
                    return ae_consts.SUCCESS, pd.DataFrame([])
                # if unable to fix - return out

                log.error(f'{label} - {df_str} redis_key={redis_key} '
                          f'no calls df found or ex={f}')
                return ae_consts.SUCCESS, pd.DataFrame([])
            # end of try/ex to convert to df
            if verbose:
                log.info(
                    f'{label} - {df_str} redis_key={redis_key} '
                    f'calls={len(calls_df.index)} exp_date={exp_date_str}')
        else:
            if verbose:
                log.info(f'{label} - {df_str} did not find valid redis '
                         f'option calls in redis_key={redis_key} '
                         f'status={ae_consts.get_status(status=status)}')

    except Exception as e:
        if verbose:
            log.error(
                f'{label} - {df_str} - ds_id={ds_id} failed getting option '
                f'calls from redis={redis_host}:{redis_port}@{redis_db} '
                f'key={redis_key} ex={e}')
        return ae_consts.ERR, pd.DataFrame([])
    # end of try/ex extract from redis

    if verbose:
        log.info(
            f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}')

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=calls_df)

    status = ae_consts.SUCCESS

    return status, scrubbed_df
Ejemplo n.º 10
0
def extract_option_puts_dataset(ticker=None,
                                date=None,
                                work_dict=None,
                                scrub_mode='sort-by-date',
                                verbose=False):
    """extract_option_puts_dataset

    Extract the TD options puts for a ticker and
    return a tuple (status, ``pandas.Dataframe``)

    .. code-block:: python

        import analysis_engine.td.extract_df_from_redis as td_extract

        # extract by historical date is also supported as an arg
        # date='2019-02-15'
        puts_status, puts_df = td_extract.extract_option_puts_dataset(
            ticker='SPY')
        print(puts_df)

    :param ticker: string ticker to extract
    :param date: optional - string date to extract
        formatted ``YYYY-MM-DD``
    :param work_dict: dictionary of args
    :param scrub_mode: optional - string type of
        scrubbing handler to run
    :param verbose: optional - boolean for turning on logging
    """
    label = 'extract_td_puts'
    latest_close_date = ae_utils.get_last_close_str()
    use_date = date
    if work_dict:
        if not ticker:
            ticker = work_dict.get('ticker', None)
        label = f'{work_dict.get("label", label)}'
    if not use_date:
        use_date = latest_close_date

    ds_id = ticker
    df_type = td_consts.DATAFEED_TD_PUTS
    df_str = td_consts.get_datafeed_str_td(df_type=df_type)
    redis_db = ae_consts.REDIS_DB
    redis_key = f'{ticker}_{use_date}_tdputs'
    redis_host, redis_port = ae_consts.get_redis_host_and_port(req=work_dict)
    redis_password = ae_consts.REDIS_PASSWORD
    s3_key = redis_key

    if work_dict:
        redis_db = work_dict.get('redis_db', redis_db)
        redis_password = work_dict.get('redis_password', redis_password)
        verbose = work_dict.get('verbose_td', verbose)

    if verbose:
        log.info(f'{label} - {df_str} - start - redis_key={redis_key} '
                 f's3_key={s3_key}')

    exp_date_str = None
    puts_df = None
    status = ae_consts.NOT_RUN
    try:
        redis_rec = redis_get.get_data_from_redis_key(label=label,
                                                      host=redis_host,
                                                      port=redis_port,
                                                      db=redis_db,
                                                      password=redis_password,
                                                      key=redis_key,
                                                      decompress_df=True)

        status = redis_rec['status']
        if verbose:
            log.info(f'{label} - {df_str} redis get data key={redis_key} '
                     f'status={ae_consts.get_status(status=status)}')

        if status == ae_consts.SUCCESS:
            puts_json = None
            if 'tdputs' in redis_rec['rec']['data']:
                puts_json = redis_rec['rec']['data']['tdputs']
            if 'puts' in redis_rec['rec']['data']:
                puts_json = redis_rec['rec']['data']['puts']
            else:
                puts_json = redis_rec['rec']['data']
            if not puts_json:
                return ae_consts.SUCCESS, pd.DataFrame([])
            if verbose:
                log.info(f'{label} - {df_str} redis convert puts to df')
            try:
                puts_df = pd.read_json(puts_json, orient='records')
                if len(puts_df.index) == 0:
                    return ae_consts.SUCCESS, pd.DataFrame([])
                if 'date' not in puts_df:
                    log.debug('failed to find date column in TD puts '
                              f'df={puts_df} len={len(puts_df.index)}')
                    return ae_consts.SUCCESS, pd.DataFrame([])
                puts_df.sort_values(by=['date', 'strike'])
                """
                for i, r in calls_df.iterrows():
                    print(r['date'])
                convert_epochs = [
                    'ask_date',
                    'bid_date',
                    'trade_date'
                ]
                for c in convert_epochs:
                    if c in puts_df:
                        puts_df[c] = pd.DatetimeIndex(pd.to_datetime(
                            puts_df[c],
                            format=ae_consts.COMMON_TICK_DATE_FORMAT
                        )).tz_localize(
                            'UTC').tz_convert(
                                'US/Eastern')
                # dates converted
                """
                exp_date_str = (puts_df['exp_date'].iloc[-1])

                puts_df['date'] = puts_df['date'].dt.strftime(
                    ae_consts.COMMON_TICK_DATE_FORMAT)

            except Exception:
                log.debug(f'{label} - {df_str} redis_key={redis_key} '
                          'no puts df found')
                return ae_consts.SUCCESS, pd.DataFrame([])
            # end of try/ex to convert to df
            if verbose:
                log.info(f'{label} - {df_str} redis_key={redis_key} '
                         f'puts={len(puts_df.index)} exp_date={exp_date_str}')
        else:
            if verbose:
                log.info(f'{label} - {df_str} did not find valid redis '
                         f'option puts in redis_key={redis_key} '
                         f'status={ae_consts.get_status(status=status)}')

    except Exception as e:
        if verbose:
            log.error(
                f'{label} - {df_str} - ds_id={ds_id} failed getting option '
                f'puts from redis={redis_host}:{redis_port}@{redis_db} '
                f'key={redis_key} ex={e}')
        return ae_consts.ERR, pd.DataFrame([])
    # end of try/ex extract from redis

    if verbose:
        log.info(
            f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}')

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=puts_df)

    status = ae_consts.SUCCESS

    return status, scrubbed_df