def prepare_pricing_dataset(
        self,
        work_dict):
    """prepare_pricing_dataset

    Prepare dataset for analysis. Supports loading dataset from
    s3 if not found in redis. Outputs prepared artifact as a csv
    to s3 and redis.

    :param work_dict: dictionary for key/values
    """

    label = 'prepare'

    log.info(
        'task - {} - start '
        'work_dict={}'.format(
            label,
            work_dict))

    initial_data = None

    ticker = ae_consts.TICKER
    ticker_id = ae_consts.TICKER_ID
    rec = {
        'ticker': None,
        'ticker_id': None,
        's3_enabled': True,
        'redis_enabled': True,
        's3_bucket': None,
        's3_key': None,
        'redis_key': None,
        'prepared_s3_key': None,
        'prepared_s3_bucket': None,
        'prepared_redis_key': None,
        'prepared_data': None,
        'prepared_size': None,
        'initial_data': None,
        'initial_size': None,
        'ignore_columns': None,
        'updated': None
    }
    res = build_result.build_result(
        status=ae_consts.NOT_RUN,
        err=None,
        rec=rec)

    try:
        ticker = work_dict.get(
            'ticker',
            ae_consts.TICKER)
        ticker_id = int(work_dict.get(
            'ticker_id',
            ae_consts.TICKER_ID))

        if not ticker:
            res = build_result.build_result(
                status=ae_consts.ERR,
                err='missing ticker',
                rec=rec)
            return res

        label = work_dict.get(
            'label',
            label)
        s3_key = work_dict.get(
            's3_key',
            None)
        s3_bucket_name = work_dict.get(
            's3_bucket',
            'pricing')
        s3_access_key = work_dict.get(
            's3_access_key',
            ae_consts.S3_ACCESS_KEY)
        s3_secret_key = work_dict.get(
            's3_secret_key',
            ae_consts.S3_SECRET_KEY)
        s3_region_name = work_dict.get(
            's3_region_name',
            ae_consts.S3_REGION_NAME)
        s3_address = work_dict.get(
            's3_address',
            ae_consts.S3_ADDRESS)
        s3_secure = work_dict.get(
            's3_secure',
            ae_consts.S3_SECURE) == '1'
        redis_address = work_dict.get(
            'redis_address',
            ae_consts.REDIS_ADDRESS)
        redis_key = work_dict.get(
            'redis_key',
            ae_consts.REDIS_KEY)
        redis_password = work_dict.get(
            'redis_password',
            ae_consts.REDIS_PASSWORD)
        redis_db = work_dict.get(
            'redis_db',
            None)
        if not redis_db:
            redis_db = ae_consts.REDIS_DB
        redis_expire = None
        if 'redis_expire' in work_dict:
            redis_expire = work_dict.get(
                'redis_expire',
                ae_consts.REDIS_EXPIRE)
        updated = work_dict.get(
            'updated',
            datetime.datetime.utcnow().strftime(
                '%Y_%m_%d_%H_%M_%S'))
        prepared_s3_key = work_dict.get(
            'prepared_s3_key',
            '{}_{}.csv'.format(
                ticker,
                updated))
        prepared_s3_bucket = work_dict.get(
            'prepared_s3_bucket',
            'prepared')
        prepared_redis_key = work_dict.get(
            'prepared_redis_key',
            'prepared')
        ignore_columns = work_dict.get(
            'ignore_columns',
            None)
        log.info(
            '{} redis enabled address={}@{} '
            'key={} prepare_s3={}:{} prepare_redis={} '
            'ignore_columns={}'.format(
                label,
                redis_address,
                redis_db,
                redis_key,
                prepared_s3_bucket,
                prepared_s3_key,
                prepared_redis_key,
                ignore_columns))
        redis_host = redis_address.split(':')[0]
        redis_port = redis_address.split(':')[1]

        enable_s3 = True
        enable_redis_publish = True

        rec['ticker'] = ticker
        rec['ticker_id'] = ticker_id
        rec['s3_bucket'] = s3_bucket_name
        rec['s3_key'] = s3_key
        rec['redis_key'] = redis_key
        rec['prepared_s3_key'] = prepared_s3_key
        rec['prepared_s3_bucket'] = prepared_s3_bucket
        rec['prepared_redis_key'] = prepared_redis_key
        rec['updated'] = updated
        rec['s3_enabled'] = enable_s3
        rec['redis_enabled'] = enable_redis_publish

        try:
            log.info(
                '{} connecting redis={}:{} '
                'db={} key={} '
                'updated={} expire={}'.format(
                    label,
                    redis_host,
                    redis_port,
                    redis_db,
                    redis_key,
                    updated,
                    redis_expire))
            rc = redis.Redis(
                host=redis_host,
                port=redis_port,
                password=redis_password,
                db=redis_db)
        except Exception as e:
            err = (
                '{} failed - redis connection to address={}@{} '
                'key={} ex={}'.format(
                    label,
                    redis_address,
                    redis_key,
                    redis_db,
                    e))
            res = build_result.build_result(
                status=ae_consts.ERR,
                err=err,
                rec=rec)
            return res
        # end of try/ex for connecting to redis

        initial_data_res = redis_get.get_data_from_redis_key(
            label=label,
            client=rc,
            key=redis_key)

        log.info(
            '{} get redis key={} status={} err={}'.format(
                label,
                redis_key,
                ae_consts.get_status(initial_data_res['status']),
                initial_data_res['err']))

        initial_data = initial_data_res['rec'].get(
            'data',
            None)

        if enable_s3 and not initial_data:

            log.info(
                '{} failed to find redis_key={} trying s3 '
                'from s3_key={} s3_bucket={} s3_address={}'.format(
                    label,
                    redis_key,
                    s3_key,
                    s3_bucket_name,
                    s3_address))

            get_from_s3_req = \
                api_requests.build_publish_from_s3_to_redis_request()

            get_from_s3_req['s3_enabled'] = enable_s3
            get_from_s3_req['s3_access_key'] = s3_access_key
            get_from_s3_req['s3_secret_key'] = s3_secret_key
            get_from_s3_req['s3_region_name'] = s3_region_name
            get_from_s3_req['s3_address'] = s3_address
            get_from_s3_req['s3_secure'] = s3_secure
            get_from_s3_req['s3_key'] = s3_key
            get_from_s3_req['s3_bucket'] = s3_bucket_name
            get_from_s3_req['redis_key'] = redis_key
            get_from_s3_req['label'] = (
                '{}-run_publish_from_s3_to_redis'.format(
                    label))

            log.info(
                '{} load from s3={} to '
                'redis={}'.format(
                    label,
                    s3_key,
                    redis_key))

            try:
                # run in synchronous mode:
                get_from_s3_req['celery_disabled'] = True
                task_res = s3_to_redis.run_publish_from_s3_to_redis(
                    get_from_s3_req)
                if task_res.get(
                        'status',
                        ae_consts.ERR) == ae_consts.SUCCESS:
                    log.info(
                        '{} loaded s3={}:{} '
                        'to redis={} retrying'.format(
                            label,
                            s3_bucket_name,
                            s3_key,
                            redis_key))
                    initial_data_res = redis_get.get_data_from_redis_key(
                        label=label,
                        client=rc,
                        key=redis_key)

                    log.info(
                        '{} get redis try=2 key={} status={} err={}'.format(
                            label,
                            redis_key,
                            ae_consts.get_status(initial_data_res['status']),
                            initial_data_res['err']))

                    initial_data = initial_data_res['rec'].get(
                        'data',
                        None)
                else:
                    err = (
                        '{} ERR failed loading from bucket={} '
                        's3_key={} to redis_key={} with res={}'.format(
                            label,
                            s3_bucket_name,
                            s3_key,
                            redis_key,
                            task_res))
                    log.error(err)
                    res = build_result.build_result(
                        status=ae_consts.ERR,
                        err=err,
                        rec=rec)
                    return res
            except Exception as e:
                err = (
                    '{} extract from s3 and publish to redis failed loading '
                    'data from bucket={} in '
                    's3_key={} with publish to redis_key={} '
                    'with ex={}'.format(
                        label,
                        s3_bucket_name,
                        s3_key,
                        redis_key,
                        e))
                log.error(err)
                res = build_result.build_result(
                    status=ae_consts.ERR,
                    err=err,
                    rec=rec)
                return res
            # end of try/ex for publishing from s3->redis
        # end of if enable_s3

        if not initial_data:
            err = (
                '{} did not find any data to prepare in redis_key={} or '
                's3_key={} in bucket={}'.format(
                    label,
                    redis_key,
                    s3_key,
                    s3_bucket_name))
            log.error(err)
            res = build_result.build_result(
                status=ae_consts.ERR,
                err=err,
                rec=rec)
            return res

        initial_data_num_chars = len(str(initial_data))
        initial_size_value = None
        initial_size_str = None
        if initial_data_num_chars < ae_consts.PREPARE_DATA_MIN_SIZE:
            err = (
                '{} not enough data={} in redis_key={} or '
                's3_key={} in bucket={}'.format(
                    label,
                    initial_data_num_chars,
                    redis_key,
                    s3_key,
                    s3_bucket_name))
            log.error(err)
            res = build_result.build_result(
                status=ae_consts.ERR,
                err=err,
                rec=rec)
            return res
        else:
            initial_size_value = initial_data_num_chars / 1024000
            initial_size_str = ae_consts.to_f(initial_size_value)
            if ae_consts.ev('DEBUG_PREPARE', '0') == '1':
                log.info(
                    '{} initial - redis_key={} data={}'.format(
                        label,
                        redis_key,
                        str(initial_data)))
            else:
                log.info(
                    '{} initial - redis_key={} data size={} MB'.format(
                        label,
                        redis_key,
                        initial_size_str))
        # end of trying to get initial_data

        rec['initial_data'] = initial_data
        rec['initial_size'] = initial_data_num_chars

        prepare_data = None

        try:
            if ae_consts.ev('DEBUG_PREPARE', '0') == '1':
                log.info(
                    '{} data={} - flatten - {} MB from '
                    'redis_key={}'.format(
                        label,
                        ae_consts.ppj(initial_data),
                        initial_size_str,
                        redis_key))
            else:
                log.info(
                    '{} flatten - {} MB from '
                    'redis_key={}'.format(
                        label,
                        initial_size_str,
                        redis_key))
            prepare_data = dict_to_csv.flatten_dict(
                data=initial_data)
        except Exception as e:
            prepare_data = None
            err = (
                '{} flatten - convert to csv failed with ex={} '
                'redis_key={}'.format(
                    label,
                    e,
                    redis_key))
            log.error(err)
            res = build_result.build_result(
                status=ae_consts.ERR,
                err=err,
                rec=rec)
            return res
        # end of try/ex

        if not prepare_data:
            err = (
                '{} flatten - did not return any data from redis_key={} '
                'or s3_key={} in bucket={}'.format(
                    label,
                    redis_key,
                    s3_key,
                    s3_bucket_name))
            log.error(err)
            res = build_result.build_result(
                status=ae_consts.ERR,
                err=err,
                rec=rec)
            return res
        # end of prepare_data

        prepare_data_num_chars = len(str(prepare_data))
        prepare_size_value = None

        if prepare_data_num_chars < ae_consts.PREPARE_DATA_MIN_SIZE:
            err = (
                '{} prepare - there is not enough data={} in redis_key={}'
                ''.format(
                    label,
                    prepare_data_num_chars,
                    redis_key))
            log.error(err)
            res = build_result.build_result(
                status=ae_consts.ERR,
                err=err,
                rec=rec)
            return res
        else:
            prepare_size_value = prepare_data_num_chars / 1024000
            prepare_size_str = ae_consts.to_f(prepare_size_value)
            if ae_consts.ev('DEBUG_PREPARE', '0') == '1':
                log.info(
                    '{} data={} - prepare - redis_key={}'.format(
                        label,
                        redis_key,
                        ae_consts.ppj(prepare_data)))
            else:
                log.info(
                    '{} prepare - redis_key={} data size={} MB'.format(
                        label,
                        redis_key,
                        prepare_size_str))
        # end of trying to the size of the prepared data

        rec['prepared_data'] = prepare_data
        rec['prepared_size'] = prepare_data_num_chars

        res = build_result.build_result(
            status=ae_consts.SUCCESS,
            err=None,
            rec=rec)

        rc = None

    except Exception as e:
        res = build_result.build_result(
            status=ae_consts.ERR,
            err=(
                'failed - prepare_pricing_dataset '
                'dict={} with ex={}').format(
                    work_dict,
                    e),
            rec=rec)
        log.error(
            '{} - {}'.format(
                label,
                res['err']))
    # end of try/ex

    log.info(
        'task - prepare_pricing_dataset done - '
        '{} - status={}'.format(
            label,
            ae_consts.get_status(res['status'])))

    return get_task_results.get_task_results(
        work_dict=work_dict,
        result=res)
Esempio n. 2
0
def publish_ticker_aggregate_from_s3(self, work_dict):
    """publish_ticker_aggregate_from_s3

    Publish Aggregated Ticker Data from S3 to Redis

    :param work_dict: dictionary for key/values
    """

    label = 'pub-tic-agg-s3-to-redis'

    log.info(f'task - {label} - start work_dict={work_dict}')

    ticker = ae_consts.TICKER
    ticker_id = ae_consts.TICKER_ID
    rec = {
        'ticker': None,
        'ticker_id': None,
        's3_read_enabled': True,
        's3_upload_enabled': True,
        'redis_enabled': True,
        's3_bucket': None,
        's3_compiled_bucket': None,
        's3_key': None,
        'redis_key': None,
        'updated': None
    }
    res = build_result.build_result(status=ae_consts.NOT_RUN,
                                    err=None,
                                    rec=rec)

    try:
        ticker = work_dict.get('ticker', ae_consts.TICKER)
        ticker_id = int(work_dict.get('ticker_id', ae_consts.TICKER_ID))

        if not ticker:
            res = build_result.build_result(status=ae_consts.ERR,
                                            err='missing ticker',
                                            rec=rec)
            return res

        label = work_dict.get('label', label)
        s3_key = work_dict.get('s3_key', None)
        s3_bucket_name = work_dict.get('s3_bucket', 'pricing')
        s3_compiled_bucket_name = work_dict.get('s3_compiled_bucket',
                                                'compileddatasets')
        redis_key = work_dict.get('redis_key', None)
        updated = work_dict.get('updated', None)
        enable_s3_upload = work_dict.get('s3_upload_enabled',
                                         ae_consts.ENABLED_S3_UPLOAD)
        enable_redis_publish = work_dict.get('redis_enabled',
                                             ae_consts.ENABLED_REDIS_PUBLISH)
        serializer = work_dict.get('serializer', 'json')
        encoding = work_dict.get('encoding', 'utf-8')

        enable_s3_read = True

        rec['ticker'] = ticker
        rec['ticker_id'] = ticker_id
        rec['s3_bucket'] = s3_bucket_name
        rec['s3_compiled_bucket'] = s3_compiled_bucket_name
        rec['s3_key'] = s3_key
        rec['redis_key'] = redis_key
        rec['updated'] = updated
        rec['s3_read_enabled'] = enable_s3_read
        rec['s3_upload_enabled'] = enable_s3_upload
        rec['redis_enabled'] = enable_redis_publish

        if enable_s3_read:
            log.info(f'{label} parsing s3 values')
            access_key = work_dict.get('s3_access_key',
                                       ae_consts.S3_ACCESS_KEY)
            secret_key = work_dict.get('s3_secret_key',
                                       ae_consts.S3_SECRET_KEY)
            region_name = work_dict.get('s3_region_name',
                                        ae_consts.S3_REGION_NAME)
            service_address = work_dict.get('s3_address', ae_consts.S3_ADDRESS)
            secure = work_dict.get('s3_secure', ae_consts.S3_SECURE) == '1'

            endpoint_url = f'http{"s" if secure else ""}://{service_address}'

            log.info(f'{label} building s3 endpoint_url={endpoint_url} '
                     f'region={region_name}')

            s3 = boto3.resource(
                's3',
                endpoint_url=endpoint_url,
                aws_access_key_id=access_key,
                aws_secret_access_key=secret_key,
                region_name=region_name,
                config=boto3.session.Config(signature_version='s3v4'))

            try:
                log.info(f'{label} checking bucket={s3_bucket_name} exists')
                if s3.Bucket(s3_bucket_name) not in s3.buckets.all():
                    log.info(f'{label} creating bucket={s3_bucket_name}')
                    s3.create_bucket(Bucket=s3_bucket_name)
            except Exception as e:
                log.info(f'{label} failed creating bucket={s3_bucket_name} '
                         f'with ex={e}')
            # end of try/ex for creating bucket

            try:
                log.info(f'{label} checking bucket={s3_bucket_name} keys')
                date_keys = []
                keys = []
                # {TICKER}_YYYY-DD-MM regex
                reg = r'^.*_\d{4}-(0?[1-9]|1[012])-(0?[1-9]|[12][0-9]|3[01])$'
                for bucket in s3.buckets.all():
                    for key in bucket.objects.all():
                        if (ticker.lower() in key.key.lower()
                                and bool(re.compile(reg).search(key.key))):
                            keys.append(key.key)
                            date_keys.append(key.key.split(f'{ticker}_')[1])
            except Exception as e:
                log.info(f'{label} failed to get bucket={s3_bucket_name} '
                         f'keys with ex={e}')
            # end of try/ex for getting bucket keys

            if keys:
                data = []
                for idx, key in enumerate(keys):
                    try:
                        log.info(
                            f'{label} reading to s3={s3_bucket_name}/{key} '
                            f'updated={updated}')
                        loop_data = s3_read_contents_from_key.\
                            s3_read_contents_from_key(
                                s3=s3,
                                s3_bucket_name=s3_bucket_name,
                                s3_key=key,
                                encoding=encoding,
                                convert_as_json=True)

                        initial_size_value = \
                            len(str(loop_data)) / 1024000
                        initial_size_str = ae_consts.to_f(initial_size_value)
                        if ae_consts.ev('DEBUG_S3', '0') == '1':
                            log.info(f'{label} read s3={s3_bucket_name}/{key} '
                                     f'data={ae_consts.ppj(loop_data)}')
                        else:
                            log.info(
                                f'{label} read s3={s3_bucket_name}/{key} data '
                                f'size={initial_size_str} MB')
                        data.append({f'{date_keys[idx]}': loop_data})
                    except Exception as e:
                        err = (
                            f'{label} failed reading bucket={s3_bucket_name} '
                            f'key={key} ex={e}')
                        log.error(err)
                        res = build_result.build_result(
                            status=ae_consts.NOT_RUN, err=err, rec=rec)
                    # end of try/ex for creating bucket
            else:
                log.info(f'{label} No keys found in S3 '
                         f'bucket={s3_bucket_name} for ticker={ticker}')
        else:
            log.info(f'{label} SKIP S3 read bucket={s3_bucket_name} '
                     f'ticker={ticker}')
        # end of if enable_s3_read

        if data and enable_s3_upload:
            try:
                log.info(f'{label} checking bucket={s3_compiled_bucket_name} '
                         'exists')
                if s3.Bucket(s3_compiled_bucket_name) not in s3.buckets.all():
                    log.info(
                        f'{label} creating bucket={s3_compiled_bucket_name}')
                    s3.create_bucket(Bucket=s3_compiled_bucket_name)
            except Exception as e:
                log.info(f'{label} failed creating '
                         f'bucket={s3_compiled_bucket_name} with ex={e}')
            # end of try/ex for creating bucket

            try:
                cmpr_data = zlib.compress(json.dumps(data).encode(encoding), 9)

                if ae_consts.ev('DEBUG_S3', '0') == '1':
                    log.info(
                        f'{label} uploading to '
                        f's3={s3_compiled_bucket_name}/{s3_key} '
                        f'data={ae_consts.ppj(loop_data)} updated={updated}')
                else:
                    sizes = {
                        'MB': 1024000,
                        'GB': 1024000000,
                        'TB': 1024000000000,
                        'PB': 1024000000000000
                    }
                    initial_size_value = len(str(data))
                    org_data_size = 'MB'
                    for key in sizes.keys():
                        size = float(initial_size_value) / float(sizes[key])
                        if size > 1024:
                            continue
                        org_data_size = key
                        initial_size_value = size
                        break
                    initial_size_str = ae_consts.to_f(initial_size_value)

                    cmpr_data_size_value = len(cmpr_data)
                    cmpr_data_size = 'MB'
                    for key in sizes.keys():
                        size = float(cmpr_data_size_value) / float(sizes[key])
                        if size > 1024:
                            continue
                        cmpr_data_size = key
                        cmpr_data_size_value = size
                        break
                    cmpr_size_str = ae_consts.to_f(cmpr_data_size_value)
                    log.info(
                        f'{label} uploading to '
                        f's3={s3_compiled_bucket_name}/{s3_key} data '
                        f'original_size={initial_size_str} {org_data_size} '
                        f'compressed_size={cmpr_size_str} {cmpr_data_size} '
                        f'updated={updated}')
                s3.Bucket(s3_compiled_bucket_name).put_object(Key=s3_key,
                                                              Body=cmpr_data)
            except Exception as e:
                log.error(f'{label} failed '
                          f'uploading bucket={s3_compiled_bucket_name} '
                          f'key={s3_key} ex={e}')
            # end of try/ex for creating bucket
        else:
            log.info(
                f'{label} SKIP S3 upload bucket={s3_bucket_name} key={s3_key}')
        # end of if enable_s3_upload

        if data and enable_redis_publish:
            redis_address = work_dict.get('redis_address',
                                          ae_consts.REDIS_ADDRESS)
            redis_key = work_dict.get('redis_key', ae_consts.REDIS_KEY)
            redis_password = work_dict.get('redis_password',
                                           ae_consts.REDIS_PASSWORD)
            redis_db = work_dict.get('redis_db', None)
            if not redis_db:
                redis_db = ae_consts.REDIS_DB
            redis_expire = None
            if 'redis_expire' in work_dict:
                redis_expire = work_dict.get('redis_expire',
                                             ae_consts.REDIS_EXPIRE)
            log.info(f'redis enabled address={redis_address}@{redis_db} '
                     f'key={redis_key}')
            redis_host = redis_address.split(':')[0]
            redis_port = redis_address.split(':')[1]
            try:
                if ae_consts.ev('DEBUG_REDIS', '0') == '1':
                    log.info(
                        f'{label} publishing redis={redis_host}:{redis_port} '
                        f'db={redis_db} key={redis_key} updated={updated} '
                        f'expire={redis_expire} data={ae_consts.ppj(data)}')
                else:
                    log.info(
                        f'{label} publishing redis={redis_host}:{redis_port} '
                        f'db={redis_db} key={redis_key} '
                        f'updated={updated} expire={redis_expire}')
                # end of if/else

                rc = redis.Redis(host=redis_host,
                                 port=redis_port,
                                 password=redis_password,
                                 db=redis_db)

                redis_set_res = redis_set.set_data_in_redis_key(
                    label=label,
                    client=rc,
                    key=redis_key,
                    data=data,
                    serializer=serializer,
                    encoding=encoding,
                    expire=redis_expire,
                    px=None,
                    nx=False,
                    xx=False)

                log.info(
                    f'{label} redis_set '
                    f'status={ae_consts.get_status(redis_set_res["status"])} '
                    f'err={redis_set_res["err"]}')

            except Exception as e:
                log.error(f'{label} failed - redis publish to '
                          f'key={redis_key} ex={e}')
            # end of try/ex for creating bucket
        else:
            log.info(f'{label} SKIP REDIS publish key={redis_key}')
        # end of if enable_redis_publish

        res = build_result.build_result(status=ae_consts.SUCCESS,
                                        err=None,
                                        rec=rec)

    except Exception as e:
        res = build_result.build_result(
            status=ae_consts.ERR,
            err=(f'failed - publish_from_s3 dict={work_dict} with ex={e}'),
            rec=rec)
        log.error(f'{label} - {res["err"]}')
    # end of try/ex

    log.info('task - publish_from_s3 done - '
             f'{label} - status={ae_consts.get_status(res["status"])}')

    return get_task_results.get_task_results(work_dict=work_dict, result=res)
Esempio n. 3
0
def run_distributed_algorithm(self, algo_req):
    """run_distributed_algorithm

    Process a distributed Algorithm

    :param algo_req: dictionary for key/values for
        running an algorithm using Celery workers
    """

    label = algo_req.get('name', 'ae-algo')
    verbose = algo_req.get('verbose', False)
    debug = algo_req.get('debug', False)

    # please be careful logging prod passwords:
    if verbose or debug:
        log.info('task - {} - start ' 'algo_req={}'.format(label, algo_req))
    else:
        log.info('task - {} - start '.format(label))
    # end of start log

    rec = {}
    res = build_result.build_result(status=ae_consts.NOT_RUN,
                                    err=None,
                                    rec=rec)

    created_algo_object = None
    custom_algo_module = None
    new_algo_object = None
    use_custom_algo = False
    found_algo_module = True  # assume the BaseAlgo
    should_publish_extract_dataset = False
    should_publish_history_dataset = False
    should_publish_report_dataset = False

    ticker = algo_req.get('ticker', 'SPY')
    num_days_back = algo_req.get('num_days_back', 75)
    name = algo_req.get('name', 'ae-algo')
    algo_module_path = algo_req.get('mod_path', None)
    module_name = algo_req.get('module_name', 'BaseAlgo')
    custom_algo_module = algo_req.get('custom_algo_module', None)
    new_algo_object = algo_req.get('new_algo_object', None)
    use_custom_algo = algo_req.get('use_custom_algo', False)
    should_publish_extract_dataset = algo_req.get(
        'should_publish_extract_dataset', False)
    should_publish_history_dataset = algo_req.get(
        'should_publish_history_dataset', False)
    should_publish_report_dataset = algo_req.get(
        'should_publish_report_dataset', False)
    start_date = algo_req.get('start_date', None)
    end_date = algo_req.get('end_date', None)
    raise_on_err = algo_req.get('raise_on_err', False)

    report_config = algo_req.get('report_config', None)
    history_config = algo_req.get('history_config', None)
    extract_config = algo_req.get('extract_config', None)

    err = None
    if algo_module_path:
        found_algo_module = False
        module_name = algo_module_path.split('/')[-1]
        loader = importlib.machinery.SourceFileLoader(module_name,
                                                      algo_module_path)
        custom_algo_module = types.ModuleType(loader.name)
        loader.exec_module(custom_algo_module)
        use_custom_algo = True

        for member in inspect.getmembers(custom_algo_module):
            if module_name in str(member):
                found_algo_module = True
                break
        # for all members in this custom module file
    # if loading a custom algorithm module from a file on disk

    if not found_algo_module:
        err = ('{} - unable to find custom algorithm module={} '
               'module_path={}'.format(label, custom_algo_module,
                                       algo_module_path))
        if algo_module_path:
            err = (
                '{} - analysis_engine.work_tasks.run_distributed_algorithm '
                'was unable '
                'to find custom algorithm module={} with provided path to \n '
                'file: {} \n'
                '\n'
                'Please confirm '
                'that the class inherits from the BaseAlgo class like:\n'
                '\n'
                'import analysis_engine.algo\n'
                'class MyAlgo(analysis_engine.algo.BaseAlgo):\n '
                '\n'
                'If it is then please file an issue on github:\n '
                'https://github.com/AlgoTraders/stock-analysis-engine/'
                'issues/new \n\nFor now this error results in a shutdown'
                '\n'.format(label, custom_algo_module, algo_module_path))
        # if algo_module_path set

        log.error(err)
        res = build_result.build_result(status=ae_consts.ERR,
                                        err=err,
                                        rec=None)
        return get_task_results.get_task_results(work_dict=algo_req,
                                                 result=res)
    # if not found_algo_module

    use_start_date = start_date
    use_end_date = end_date
    if not use_end_date:
        end_date = datetime.datetime.utcnow()
        use_end_date = end_date.strftime(ae_consts.COMMON_TICK_DATE_FORMAT)
    if not use_start_date:
        start_date = end_date - datetime.timedelta(days=num_days_back)
        use_start_date = start_date.strftime(ae_consts.COMMON_TICK_DATE_FORMAT)
    dataset_publish_extract = algo_req.get('dataset_publish_extract', False)
    dataset_publish_history = algo_req.get('dataset_publish_history', False)
    dataset_publish_report = algo_req.get('dataset_publish_report', False)
    try:
        if use_custom_algo:
            log.info('inspecting {} for class {}'.format(
                custom_algo_module, module_name))
            use_class_member_object = None
            for member in inspect.getmembers(custom_algo_module):
                if module_name in str(member):
                    log.info('start {} with {}'.format(name, member[1]))
                    use_class_member_object = member
                    break
            # end of looking over the class definition but did not find it

            if use_class_member_object:
                new_algo_object = member[1](**algo_req)
            else:
                err = ('{} - did not find a derived '
                       'analysis_engine.algo.BaseAlgo '
                       'class in the module file={} '
                       'for ticker={} algo_name={}'.format(
                           label, algo_module_path, ticker, name))
                log.error(err)
                res = build_result.build_result(status=ae_consts.ERR,
                                                err=err,
                                                rec=None)
                return get_task_results.get_task_results(work_dict=algo_req,
                                                         result=res)
            # end of finding a valid algorithm object
        else:
            new_algo_object = ae_algo.BaseAlgo(**algo_req)
        # if using a custom module path or the BaseAlgo

        if new_algo_object:
            # heads up - logging this might have passwords in the algo_req
            # log.debug(
            #     '{} algorithm request: {}'.format(
            #         name,
            #         algo_req))
            log.info('{} - run ticker={} from {} to {}'.format(
                name, ticker, use_start_date, use_end_date))
            algo_res = run_algo.run_algo(algo=new_algo_object,
                                         raise_on_err=raise_on_err,
                                         **algo_req)
            created_algo_object = new_algo_object
            log.info('{} - run ticker={} from {} to {}'.format(
                name, ticker, use_start_date, use_end_date))
            if custom_algo_module:
                log.info(
                    '{} - done run_algo custom_algo_module={} module_name={} '
                    'ticker={} from {} to {}'.format(name, custom_algo_module,
                                                     module_name, ticker,
                                                     use_start_date,
                                                     use_end_date))
            else:
                log.info('{} - done run_algo BaseAlgo ticker={} from {} '
                         'to {}'.format(name, ticker, use_start_date,
                                        use_end_date))
        else:
            err = ('{} - missing a derived analysis_engine.algo.BaseAlgo '
                   'class in the module file={} for '
                   'ticker={} algo_name={}'.format(label, algo_module_path,
                                                   ticker, name))
            log.error(err)
            res = build_result.build_result(status=ae_consts.ERR,
                                            err=err,
                                            rec=None)
            return get_task_results.get_task_results(work_dict=algo_req,
                                                     result=res)
        # end of finding a valid algorithm object

        if not created_algo_object:
            err = ('{} - failed creating algorithm object - '
                   'ticker={} status={} error={}'
                   'algo name={} custom_algo_module={} module_name={} '
                   'from {} to {}'.format(
                       label, ticker,
                       ae_consts.get_status(status=algo_res['status']),
                       algo_res['err'], name, custom_algo_module, module_name,
                       use_start_date, use_end_date))
            res = build_result.build_result(status=ae_consts.ERR,
                                            err=err,
                                            rec=None)
            return get_task_results.get_task_results(work_dict=algo_req,
                                                     result=res)
        # end of stop early

        if should_publish_extract_dataset or dataset_publish_extract:
            s3_log = ''
            redis_log = ''
            file_log = ''
            use_log = 'publish'

            if (extract_config['redis_address'] and extract_config['redis_db']
                    and extract_config['redis_key']):
                redis_log = 'redis://{}@{}/{}'.format(
                    extract_config['redis_address'],
                    extract_config['redis_db'], extract_config['redis_key'])
                use_log += ' {}'.format(redis_log)
            else:
                extract_config['redis_enabled'] = False
            if (extract_config['s3_address'] and extract_config['s3_bucket']
                    and extract_config['s3_key']):
                s3_log = 's3://{}/{}/{}'.format(extract_config['s3_address'],
                                                extract_config['s3_bucket'],
                                                extract_config['s3_key'])
                use_log += ' {}'.format(s3_log)
            else:
                extract_config['s3_enabled'] = False
            if extract_config['output_file']:
                file_log = 'file:{}'.format(extract_config['output_file'])
                use_log += ' {}'.format(file_log)

            log.info('{} - publish - start ticker={} algorithm-ready {}'
                     ''.format(name, ticker, use_log))

            publish_status = created_algo_object.publish_input_dataset(
                **extract_config)
            if publish_status != ae_consts.SUCCESS:
                msg = ('failed to publish algorithm-ready datasets '
                       'with status {} attempted to {}'.format(
                           ae_consts.get_status(status=publish_status),
                           use_log))
                log.error(msg)
                res = build_result.build_result(status=ae_consts.ERR,
                                                err=err,
                                                rec=None)
                return get_task_results.get_task_results(work_dict=algo_req,
                                                         result=res)
            # end of stop early

            log.info('{} - publish - done ticker={} algorithm-ready {}'
                     ''.format(name, ticker, use_log))
        # if publish the algorithm-ready dataset

        if should_publish_history_dataset or dataset_publish_history:
            s3_log = ''
            redis_log = ''
            file_log = ''
            use_log = 'publish'

            if (history_config['redis_address'] and history_config['redis_db']
                    and history_config['redis_key']):
                redis_log = 'redis://{}@{}/{}'.format(
                    history_config['redis_address'],
                    history_config['redis_db'], history_config['redis_key'])
                use_log += ' {}'.format(redis_log)
            if (history_config['s3_address'] and history_config['s3_bucket']
                    and history_config['s3_key']):
                s3_log = 's3://{}/{}/{}'.format(history_config['s3_address'],
                                                history_config['s3_bucket'],
                                                history_config['s3_key'])
                use_log += ' {}'.format(s3_log)
            if history_config['output_file']:
                file_log = 'file:{}'.format(history_config['output_file'])
                use_log += ' {}'.format(file_log)

            log.info('{} - publish - start ticker={} trading history {}'
                     ''.format(name, ticker, use_log))

            publish_status = \
                created_algo_object.publish_trade_history_dataset(
                    **history_config)
            if publish_status != ae_consts.SUCCESS:
                msg = ('failed to publish trading history datasets '
                       'with status {} attempted to {}'.format(
                           ae_consts.get_status(status=publish_status),
                           use_log))
                log.error(msg)
                res = build_result.build_result(status=ae_consts.ERR,
                                                err=err,
                                                rec=None)
                return get_task_results.get_task_results(work_dict=algo_req,
                                                         result=res)
            # end of stop early

            log.info('{} - publish - done ticker={} trading history {}'
                     ''.format(name, ticker, use_log))
        # if publish an trading history dataset

        if should_publish_report_dataset or dataset_publish_report:
            s3_log = ''
            redis_log = ''
            file_log = ''
            use_log = 'publish'

            if (report_config['redis_address'] and report_config['redis_db']
                    and report_config['redis_key']):
                redis_log = 'redis://{}@{}/{}'.format(
                    report_config['redis_address'], report_config['redis_db'],
                    report_config['redis_key'])
                use_log += ' {}'.format(redis_log)
            if (report_config['s3_address'] and report_config['s3_bucket']
                    and report_config['s3_key']):
                s3_log = 's3://{}/{}/{}'.format(report_config['s3_address'],
                                                report_config['s3_bucket'],
                                                report_config['s3_key'])
                use_log += ' {}'.format(s3_log)
            if report_config['output_file']:
                file_log = ' file:{}'.format(report_config['output_file'])
                use_log += ' {}'.format(file_log)

            log.info('{} - publishing ticker={} trading performance report {}'
                     ''.format(name, ticker, use_log))

            publish_status = created_algo_object.publish_report_dataset(
                **report_config)
            if publish_status != ae_consts.SUCCESS:
                msg = ('failed to publish trading performance report datasets '
                       'with status {} attempted to {}'.format(
                           ae_consts.get_status(status=publish_status),
                           use_log))
                log.error(msg)
                res = build_result.build_result(status=ae_consts.ERR,
                                                err=err,
                                                rec=None)
                return get_task_results.get_task_results(work_dict=algo_req,
                                                         result=res)
            # end of stop early

            log.info(
                '{} - publish - done ticker={} trading performance report {}'
                ''.format(name, ticker, use_log))
        # if publish an trading performance report dataset

        log.info(
            '{} - done publishing datasets for ticker={} from {} to {}'.format(
                name, ticker, use_start_date, use_end_date))

        res = build_result.build_result(status=ae_consts.SUCCESS,
                                        err=None,
                                        rec=rec)

    except Exception as e:
        res = build_result.build_result(
            status=ae_consts.ERR,
            err=('failed - run_distributed_algorithm '
                 'dict={} with ex={}').format(algo_req, e),
            rec=rec)
        log.error('{} - {}'.format(label, res['err']))
    # end of try/ex

    log.info('task - run_distributed_algorithm done - '
             '{} - status={}'.format(label,
                                     ae_consts.get_status(res['status'])))

    return get_task_results.get_task_results(work_dict=algo_req, result=res)
def get_new_pricing_data(self, work_dict):
    """get_new_pricing_data

    Get Ticker information on:

    - prices - turn off with ``work_dict.get_pricing = False``
    - news - turn off with ``work_dict.get_news = False``
    - options - turn off with ``work_dict.get_options = False``

    :param work_dict: dictionary for key/values
    """

    label = 'get_new_pricing_data'

    log.debug(f'task - {label} - start ' f'work_dict={work_dict}')

    num_success = 0
    ticker = ae_consts.TICKER
    ticker_id = ae_consts.TICKER_ID
    rec = {
        'pricing': None,
        'options': None,
        'calls': None,
        'puts': None,
        'news': None,
        'daily': None,
        'minute': None,
        'quote': None,
        'stats': None,
        'peers': None,
        'iex_news': None,
        'financials': None,
        'earnings': None,
        'dividends': None,
        'company': None,
        'exp_date': None,
        'publish_pricing_update': None,
        'num_success': num_success,
        'date': ae_utils.utc_now_str(),
        'updated': None,
        'version': ae_consts.DATASET_COLLECTION_VERSION
    }
    res = {'status': ae_consts.NOT_RUN, 'err': None, 'rec': rec}

    try:
        ticker = work_dict.get('ticker', ticker)
        ticker_id = work_dict.get('ticker_id', ae_consts.TICKER_ID)
        s3_bucket = work_dict.get('s3_bucket', ae_consts.S3_BUCKET)
        s3_key = work_dict.get('s3_key', ae_consts.S3_KEY)
        redis_key = work_dict.get('redis_key', ae_consts.REDIS_KEY)
        exp_date = work_dict.get('exp_date', None)
        cur_date = ae_utils.last_close()
        cur_strike = work_dict.get('strike', None)
        contract_type = str(work_dict.get('contract', 'C')).upper()
        label = work_dict.get('label', label)
        iex_datasets = work_dict.get('iex_datasets',
                                     iex_consts.DEFAULT_FETCH_DATASETS)
        td_datasets = work_dict.get('td_datasets',
                                    td_consts.DEFAULT_FETCH_DATASETS_TD)
        fetch_mode = work_dict.get('fetch_mode', ae_consts.FETCH_MODE_ALL)
        iex_token = work_dict.get('iex_token', iex_consts.IEX_TOKEN)
        td_token = work_dict.get('td_token', td_consts.TD_TOKEN)
        str_fetch_mode = str(fetch_mode).lower()

        # control flags to deal with feed issues:
        get_iex_data = True
        get_td_data = True

        if (fetch_mode == ae_consts.FETCH_MODE_ALL
                or str_fetch_mode == 'initial'):
            get_iex_data = True
            get_td_data = True
            iex_datasets = ae_consts.IEX_INITIAL_DATASETS
        elif (fetch_mode == ae_consts.FETCH_MODE_ALL
              or str_fetch_mode == 'all'):
            get_iex_data = True
            get_td_data = True
            iex_datasets = ae_consts.IEX_DATASETS_DEFAULT
        elif (fetch_mode == ae_consts.FETCH_MODE_YHO
              or str_fetch_mode == 'yahoo'):
            get_iex_data = False
            get_td_data = False
        elif (fetch_mode == ae_consts.FETCH_MODE_IEX
              or str_fetch_mode == 'iex-all'):
            get_iex_data = True
            get_td_data = False
            iex_datasets = ae_consts.IEX_DATASETS_DEFAULT
        elif (fetch_mode == ae_consts.FETCH_MODE_IEX
              or str_fetch_mode == 'iex'):
            get_iex_data = True
            get_td_data = False
            iex_datasets = ae_consts.IEX_INTRADAY_DATASETS
        elif (fetch_mode == ae_consts.FETCH_MODE_INTRADAY
              or str_fetch_mode == 'intra'):
            get_iex_data = True
            get_td_data = True
            iex_datasets = ae_consts.IEX_INTRADAY_DATASETS
        elif (fetch_mode == ae_consts.FETCH_MODE_DAILY
              or str_fetch_mode == 'daily'):
            get_iex_data = True
            get_td_data = False
            iex_datasets = ae_consts.IEX_DAILY_DATASETS
        elif (fetch_mode == ae_consts.FETCH_MODE_WEEKLY
              or str_fetch_mode == 'weekly'):
            get_iex_data = True
            get_td_data = False
            iex_datasets = ae_consts.IEX_WEEKLY_DATASETS
        elif (fetch_mode == ae_consts.FETCH_MODE_TD or str_fetch_mode == 'td'):
            get_iex_data = False
            get_td_data = True
        else:
            get_iex_data = False
            get_td_data = False

            fetch_arr = str_fetch_mode.split(',')
            found_fetch = False
            iex_datasets = []
            for fetch_name in fetch_arr:
                if fetch_name not in iex_datasets:
                    if fetch_name == 'iex_min':
                        iex_datasets.append('minute')
                    elif fetch_name == 'iex_day':
                        iex_datasets.append('daily')
                    elif fetch_name == 'iex_quote':
                        iex_datasets.append('quote')
                    elif fetch_name == 'iex_stats':
                        iex_datasets.append('stats')
                    elif fetch_name == 'iex_peers':
                        iex_datasets.append('peers')
                    elif fetch_name == 'iex_news':
                        iex_datasets.append('news')
                    elif fetch_name == 'iex_fin':
                        iex_datasets.append('financials')
                    elif fetch_name == 'iex_earn':
                        iex_datasets.append('earnings')
                    elif fetch_name == 'iex_div':
                        iex_datasets.append('dividends')
                    elif fetch_name == 'iex_comp':
                        iex_datasets.append('company')
                    elif fetch_name == 'td':
                        get_td_data = True
                    else:
                        log.warn('unsupported IEX dataset ' f'{fetch_name}')
            found_fetch = (len(iex_datasets) != 0)
            if not found_fetch:
                log.error(f'{label} - unsupported '
                          f'fetch_mode={fetch_mode} value')
            else:
                get_iex_data = True
                log.debug(f'{label} - '
                          f'fetching={len(iex_datasets)} '
                          f'{iex_datasets} '
                          f'fetch_mode={fetch_mode}')
        # end of screening custom fetch_mode settings

        num_tokens = 0

        if get_iex_data:
            if not iex_token:
                log.warn(f'{label} - '
                         'please set a valid IEX Cloud Account token ('
                         'https://iexcloud.io/cloud-login/#/register'
                         ') to fetch data from IEX Cloud. It must be '
                         'set as an environment variable like: '
                         'export IEX_TOKEN=<token>')
                get_iex_data = False
            else:
                num_tokens += 1
        # sanity check - disable IEX fetch if the token is not set
        if get_td_data:
            missing_td_token = [
                'MISSING_TD_TOKEN', 'SETYOURTDTOKEN', 'SETYOURTRADIERTOKENHERE'
            ]
            if td_token in missing_td_token:
                log.warn(f'{label} - '
                         'please set a valid Tradier Account token ('
                         'https://developer.tradier.com/user/sign_up'
                         ') to fetch pricing data from Tradier. It must be '
                         'set as an environment variable like: '
                         'export TD_TOKEN=<token>')
                get_td_data = False
            else:
                num_tokens += 1
        # sanity check - disable Tradier fetch if the token is not set
        """
        as of Thursday, Jan. 3, 2019:
        https://developer.yahoo.com/yql/
        Important EOL Notice: As of Thursday, Jan. 3, 2019
        the YQL service at query.yahooapis.com will be retired
        """
        get_yahoo_data = False

        if (not get_iex_data and not get_td_data and not get_yahoo_data):
            err = None
            if num_tokens == 0:
                res['status'] = ae_consts.MISSING_TOKEN
                err = (f'Please set a valid IEX_TOKEN or TD_TOKEN '
                       f'environment variable')
            else:
                err = (f'Please set at least one supported datafeed from '
                       f'either: '
                       f'IEX Cloud (fetch -t TICKER -g iex) or '
                       f'Tradier (fetch -t TICKER -g td) '
                       f'for '
                       f'ticker={ticker} '
                       f'cur_date={cur_date} '
                       f'IEX enabled={get_iex_data} '
                       f'TD enabled={get_td_data} '
                       f'YHO enabled={get_yahoo_data}')
                res['status'] = ae_consts.ERR
                res['err'] = err
            return get_task_results.get_task_results(work_dict=work_dict,
                                                     result=res)
        # end of checking that there is at least 1 feed on

        if not exp_date:
            exp_date = opt_dates.option_expiration(date=exp_date)
        else:
            exp_date = datetime.datetime.strptime(exp_date, '%Y-%m-%d')

        rec['updated'] = cur_date.strftime('%Y-%m-%d %H:%M:%S')
        log.debug(f'{label} getting pricing for ticker={ticker} '
                  f'cur_date={cur_date} exp_date={exp_date} '
                  f'IEX={get_iex_data} '
                  f'TD={get_td_data} '
                  f'YHO={get_yahoo_data}')

        yahoo_rec = {
            'ticker': ticker,
            'pricing': None,
            'options': None,
            'calls': None,
            'puts': None,
            'news': None,
            'exp_date': None,
            'publish_pricing_update': None,
            'date': None,
            'updated': None
        }

        # disabled on 2019-01-03
        if get_yahoo_data:
            log.debug(f'{label} YHO ticker={ticker}')
            yahoo_res = yahoo_data.get_data_from_yahoo(work_dict=work_dict)
            status_str = ae_consts.get_status(status=yahoo_res['status'])
            if yahoo_res['status'] == ae_consts.SUCCESS:
                yahoo_rec = yahoo_res['rec']
                msg = (f'{label} YHO ticker={ticker} '
                       f'status={status_str} err={yahoo_res["err"]}')
                if ae_consts.ev('SHOW_SUCCESS', '0') == '1':
                    log.info(msg)
                else:
                    log.debug(msg)
                rec['pricing'] = yahoo_rec.get('pricing', '{}')
                rec['news'] = yahoo_rec.get('news', '{}')
                rec['options'] = yahoo_rec.get('options', '{}')
                rec['calls'] = rec['options'].get('calls',
                                                  ae_consts.EMPTY_DF_STR)
                rec['puts'] = rec['options'].get('puts',
                                                 ae_consts.EMPTY_DF_STR)
                num_success += 1
            else:
                log.error(f'{label} failed YHO ticker={ticker} '
                          f'status={status_str} err={yahoo_res["err"]}')
        # end of get from yahoo

        if get_iex_data:
            num_iex_ds = len(iex_datasets)
            log.debug(f'{label} IEX datasets={num_iex_ds}')
            for idx, ft_type in enumerate(iex_datasets):
                dataset_field = iex_consts.get_ft_str(ft_type=ft_type)

                log.debug(f'{label} IEX={idx}/{num_iex_ds} '
                          f'field={dataset_field} ticker={ticker}')
                iex_label = f'{label}-{dataset_field}'
                iex_req = copy.deepcopy(work_dict)
                iex_req['label'] = iex_label
                iex_req['ft_type'] = ft_type
                iex_req['field'] = dataset_field
                iex_req['ticker'] = ticker
                iex_res = iex_data.get_data_from_iex(work_dict=iex_req)

                status_str = (ae_consts.get_status(status=iex_res['status']))
                if iex_res['status'] == ae_consts.SUCCESS:
                    iex_rec = iex_res['rec']
                    msg = (f'{label} IEX ticker={ticker} '
                           f'field={dataset_field} '
                           f'status={status_str} '
                           f'err={iex_res["err"]}')
                    if ae_consts.ev('SHOW_SUCCESS', '0') == '1':
                        log.info(msg)
                    else:
                        log.debug(msg)
                    if dataset_field == 'news':
                        rec['iex_news'] = iex_rec['data']
                    else:
                        rec[dataset_field] = iex_rec['data']
                    num_success += 1
                else:
                    log.debug(f'{label} failed IEX ticker={ticker} '
                              f'field={dataset_field} '
                              f'status={status_str} err={iex_res["err"]}')
                # end of if/else succcess
            # end idx, ft_type in enumerate(iex_datasets):
        # end of if get_iex_data

        if get_td_data:
            num_td_ds = len(td_datasets)
            log.debug(f'{label} TD datasets={num_td_ds}')

            for idx, ft_type in enumerate(td_datasets):
                dataset_field = td_consts.get_ft_str_td(ft_type=ft_type)
                log.debug(f'{label} TD={idx}/{num_td_ds} '
                          f'field={dataset_field} ticker={ticker}')
                td_label = (f'{label}-{dataset_field}')
                td_req = copy.deepcopy(work_dict)
                td_req['label'] = td_label
                td_req['ft_type'] = ft_type
                td_req['field'] = dataset_field
                td_req['ticker'] = ticker
                td_res = td_data.get_data_from_td(work_dict=td_req)

                status_str = (ae_consts.get_status(status=td_res['status']))
                if td_res['status'] == ae_consts.SUCCESS:
                    td_rec = td_res['rec']
                    msg = (f'{label} TD ticker={ticker} '
                           f'field={dataset_field} '
                           f'status={status_str} '
                           f'err={td_res["err"]}')
                    if ae_consts.ev('SHOW_SUCCESS', '0') == '1':
                        log.info(msg)
                    else:
                        log.debug(msg)
                    if dataset_field == 'tdcalls':
                        rec['tdcalls'] = td_rec['data']
                    if dataset_field == 'tdputs':
                        rec['tdputs'] = td_rec['data']
                    else:
                        rec[dataset_field] = td_rec['data']
                    num_success += 1
                else:
                    log.critical(f'{label} failed TD ticker={ticker} '
                                 f'field={dataset_field} '
                                 f'status={status_str} err={td_res["err"]}')
                # end of if/else succcess
            # end idx, ft_type in enumerate(td_datasets):
        # end of if get_td_data

        rec['num_success'] = num_success

        update_req = {'data': rec}
        update_req['ticker'] = ticker
        update_req['ticker_id'] = ticker_id
        update_req['strike'] = cur_strike
        update_req['contract'] = contract_type
        update_req['s3_enabled'] = work_dict.get('s3_enabled',
                                                 ae_consts.ENABLED_S3_UPLOAD)
        update_req['redis_enabled'] = work_dict.get(
            'redis_enabled', ae_consts.ENABLED_REDIS_PUBLISH)
        update_req['s3_bucket'] = s3_bucket
        update_req['s3_key'] = s3_key
        update_req['s3_access_key'] = work_dict.get('s3_access_key',
                                                    ae_consts.S3_ACCESS_KEY)
        update_req['s3_secret_key'] = work_dict.get('s3_secret_key',
                                                    ae_consts.S3_SECRET_KEY)
        update_req['s3_region_name'] = work_dict.get('s3_region_name',
                                                     ae_consts.S3_REGION_NAME)
        update_req['s3_address'] = work_dict.get('s3_address',
                                                 ae_consts.S3_ADDRESS)
        update_req['s3_secure'] = work_dict.get('s3_secure',
                                                ae_consts.S3_SECURE)
        update_req['redis_key'] = redis_key
        update_req['redis_address'] = work_dict.get('redis_address',
                                                    ae_consts.REDIS_ADDRESS)
        update_req['redis_password'] = work_dict.get('redis_password',
                                                     ae_consts.REDIS_PASSWORD)
        update_req['redis_db'] = int(
            work_dict.get('redis_db', ae_consts.REDIS_DB))
        update_req['redis_expire'] = work_dict.get('redis_expire',
                                                   ae_consts.REDIS_EXPIRE)
        update_req['updated'] = rec['updated']
        update_req['label'] = label
        update_req['celery_disabled'] = True
        update_status = ae_consts.NOT_SET

        try:
            update_res = publisher.run_publish_pricing_update(
                work_dict=update_req)
            update_status = update_res.get('status', ae_consts.NOT_SET)
            status_str = ae_consts.get_status(status=update_status)
            if ae_consts.ev('DEBUG_RESULTS', '0') == '1':
                log.debug(f'{label} update_res '
                          f'status={status_str} '
                          f'data={ae_consts.ppj(update_res)}')
            else:
                log.debug(f'{label} run_publish_pricing_update '
                          f'status={status_str}')
            # end of if/else

            rec['publish_pricing_update'] = update_res
            res = build_result.build_result(status=ae_consts.SUCCESS,
                                            err=None,
                                            rec=rec)
        except Exception as f:
            err = (f'{label} publisher.run_publish_pricing_update failed '
                   f'with ex={f}')
            log.error(err)
            res = build_result.build_result(status=ae_consts.ERR,
                                            err=err,
                                            rec=rec)
        # end of trying to publish results to connected services

    except Exception as e:
        res = build_result.build_result(status=ae_consts.ERR,
                                        err=('failed - get_new_pricing_data '
                                             f'dict={work_dict} with ex={e}'),
                                        rec=rec)
        log.error(f'{label} - {res["err"]}')
    # end of try/ex

    if ae_consts.ev('DATASET_COLLECTION_SLACK_ALERTS', '0') == '1':
        env_name = 'DEV'
        if ae_consts.ev('PROD_SLACK_ALERTS', '1') == '1':
            env_name = 'PROD'
        done_msg = (f'Dataset collected ticker=*{ticker}* on '
                    f'env=*{env_name}* '
                    f'redis_key={redis_key} s3_key={s3_key} '
                    f'IEX={get_iex_data} '
                    f'TD={get_td_data} '
                    f'YHO={get_yahoo_data}')
        log.debug(f'{label} sending slack msg={done_msg}')
        if res['status'] == ae_consts.SUCCESS:
            slack_utils.post_success(msg=done_msg, block=False, jupyter=True)
        else:
            slack_utils.post_failure(msg=done_msg, block=False, jupyter=True)
        # end of if/else success
    # end of publishing to slack

    log.debug('task - get_new_pricing_data done - '
              f'{label} - status={ae_consts.get_status(res["status"])}')

    return get_task_results.get_task_results(work_dict=work_dict, result=res)
def task_screener_analysis(self, work_dict):
    """task_screener_analysis

    :param work_dict: task dictionary
    """

    label = work_dict.get('label', 'screener')

    log.info('{} - start'.format(label))

    rec = {}
    res = build_result.build_result(status=ae_consts.NOT_RUN,
                                    err=None,
                                    rec=rec)
    """
    Input - Set up dataset sources to collect
    """

    ticker = work_dict.get('ticker', None)
    org_tickers = work_dict.get('tickers', None)

    if not ticker and not org_tickers:
        res = build_result.build_result(status=ae_consts.ERR,
                                        err='missing ticker or tickers',
                                        rec=rec)

    tickers = []
    if not org_tickers:
        if ticker:
            tickers = [ticker]
    else:
        for t in org_tickers:
            upper_cased_ticker = str(t).upper()
            if upper_cased_ticker not in tickers:
                tickers.append(upper_cased_ticker)
        # build a unique ticker list
    # end of ensuring tickers is a unique list of
    # upper-cased ticker symbol strings

    # fetch from: 'all', 'iex' or 'yahoo'
    fetch_mode = work_dict.get('fetch_mode', os.getenv('FETCH_MODE', 'iex'))
    iex_datasets = work_dict.get(
        'iex_datasets',
        os.getenv('IEX_DATASETS_DEFAULT', ae_consts.IEX_DATASETS_DEFAULT))

    # if defined, these are task functions for
    # calling customiized determine Celery tasks
    determine_sells_callback = work_dict.get('determine_sells', None)
    determine_buys_callback = work_dict.get('determine_buys', None)

    try:

        log.info('{} fetch={} tickers={} '
                 'iex_datasets={} '
                 'sell_task={} '
                 'buy_task={}'.format(label, fetch_mode, tickers, iex_datasets,
                                      determine_sells_callback,
                                      determine_buys_callback))
        """
        Input - Set up required urls for building buckets
        """
        fv_urls = work_dict.get('urls', )

        if not fv_urls:
            res = build_result.build_result(
                status=ae_consts.ERR,
                err='missing required urls list of screeners',
                rec=rec)

        # stop if something errored out with the
        # celery helper for turning off celery to debug
        # without an engine running
        if res['err']:
            log.error('{} - tickers={} fetch={} iex_datasets={} '
                      'hit validation err={}'.format(label, tickers,
                                                     fetch_mode, iex_datasets,
                                                     res['err']))

            return get_task_results.get_task_results(work_dict=work_dict,
                                                     result=res)
        # end of input validation checks

        num_urls = len(fv_urls)
        log.info('{} - running urls={}'.format(label, fv_urls))

        fv_dfs = []
        for uidx, url in enumerate(fv_urls):
            log.info('{} - url={}/{} url={}'.format(label, uidx, num_urls,
                                                    url))
            fv_res = finviz_utils.fetch_tickers_from_screener(url=url)
            if fv_res['status'] == ae_consts.SUCCESS:
                fv_dfs.append(fv_res['rec']['data'])
                for ft_tick in fv_res['rec']['tickers']:
                    upper_ft_ticker = ft_tick.upper()
                    if upper_ft_ticker not in tickers:
                        tickers.append(upper_ft_ticker)
                # end of for all found tickers
            else:
                log.error('{} - failed url={}/{} url={}'.format(
                    label, uidx, num_urls, url))
            # if success vs log the error
        # end of urls to get pandas.DataFrame and unique tickers
        """
        Find tickers in screens
        """

        num_tickers = len(tickers)

        log.info('{} - fetching tickers={} from urls={}'.format(
            label, num_tickers, num_urls))
        """
        pull ticker data
        """

        fetch_recs = fetch_utils.fetch(tickers=tickers,
                                       fetch_mode=fetch_mode,
                                       iex_datasets=iex_datasets)

        if fetch_recs:
            rec = fetch_recs
            """
            Output - Where is data getting cached and archived?
            (this helps to retroactively evaluate trading performance)
            """

            res = build_result.build_result(status=ae_consts.SUCCESS,
                                            err=None,
                                            rec=rec)
        else:
            err = ('{} - tickers={} failed fetch={} '
                   'iex_datasets={}'.format(label, tickers, fetch_mode,
                                            iex_datasets))
            res = build_result.build_result(status=ae_consts.ERR,
                                            err=err,
                                            rec=rec)

        log.info('{} - done'.format(label))
    except Exception as e:
        err = ('{} - tickers={} fetch={} hit ex={} '.format(
            label, tickers, fetch_mode, e))
        log.error(err)
        res = build_result.build_result(status=ae_consts.ERR, err=err, rec=rec)
    # end of try/ex

    return get_task_results.get_task_results(work_dict=work_dict, result=res)
def get_new_pricing_data(self, work_dict):
    """get_new_pricing_data

    Get Ticker information on:

    - prices - turn off with ``work_dict.get_pricing = False``
    - news - turn off with ``work_dict.get_news = False``
    - options - turn off with ``work_dict.get_options = False``

    :param work_dict: dictionary for key/values
    """

    label = 'get_new_pricing_data'

    log.info('task - {} - start ' 'work_dict={}'.format(label, work_dict))

    ticker = ae_consts.TICKER
    ticker_id = ae_consts.TICKER_ID
    rec = {
        'pricing': None,
        'options': None,
        'calls': None,
        'puts': None,
        'news': None,
        'daily': None,
        'minute': None,
        'quote': None,
        'stats': None,
        'peers': None,
        'iex_news': None,
        'financials': None,
        'earnings': None,
        'dividends': None,
        'company': None,
        'exp_date': None,
        'publish_pricing_update': None,
        'date': ae_utils.utc_now_str(),
        'updated': None,
        'version': ae_consts.DATASET_COLLECTION_VERSION
    }
    res = {'status': ae_consts.NOT_RUN, 'err': None, 'rec': rec}

    try:
        ticker = work_dict.get('ticker', ticker)
        ticker_id = work_dict.get('ticker_id', ae_consts.TICKER_ID)
        s3_bucket = work_dict.get('s3_bucket', ae_consts.S3_BUCKET)
        s3_key = work_dict.get('s3_key', ae_consts.S3_KEY)
        redis_key = work_dict.get('redis_key', ae_consts.REDIS_KEY)
        exp_date = work_dict.get('exp_date', None)
        cur_date = datetime.datetime.utcnow()
        cur_strike = work_dict.get('strike', None)
        contract_type = str(work_dict.get('contract', 'C')).upper()
        label = work_dict.get('label', label)
        iex_datasets = work_dict.get('iex_datasets',
                                     iex_consts.DEFAULT_FETCH_DATASETS)
        td_datasets = work_dict.get('td_datasets',
                                    td_consts.DEFAULT_FETCH_DATASETS_TD)
        fetch_mode = work_dict.get('fetch_mode', ae_consts.FETCH_MODE_ALL)

        # control flags to deal with feed issues:
        get_iex_data = True
        get_td_data = True

        if (fetch_mode == ae_consts.FETCH_MODE_ALL
                or str(fetch_mode).lower() == 'all'):
            get_iex_data = True
            get_td_data = True
        elif (fetch_mode == ae_consts.FETCH_MODE_YHO
              or str(fetch_mode).lower() == 'yahoo'):
            get_iex_data = False
            get_td_data = False
        elif (fetch_mode == ae_consts.FETCH_MODE_IEX
              or str(fetch_mode).lower() == 'iex'):
            get_iex_data = True
            get_td_data = False
        elif (fetch_mode == ae_consts.FETCH_MODE_TD
              or str(fetch_mode).lower() == 'td'):
            get_iex_data = False
            get_td_data = True
        else:
            log.debug('{} - unsupported fetch_mode={} value'.format(
                label, fetch_mode))
        """
        as of Thursday, Jan. 3, 2019:
        https://developer.yahoo.com/yql/
        Important EOL Notice: As of Thursday, Jan. 3, 2019
        the YQL service at query.yahooapis.com will be retired
        """
        get_yahoo_data = False

        if not exp_date:
            exp_date = opt_dates.option_expiration(date=exp_date)
        else:
            exp_date = datetime.datetime.strptime(exp_date, '%Y-%m-%d')

        rec['updated'] = cur_date.strftime('%Y-%m-%d %H:%M:%S')
        log.info('{} getting pricing for ticker={} '
                 'cur_date={} exp_date={} '
                 'yahoo={} iex={}'.format(label, ticker, cur_date, exp_date,
                                          get_yahoo_data, get_iex_data))

        yahoo_rec = {
            'ticker': ticker,
            'pricing': None,
            'options': None,
            'calls': None,
            'puts': None,
            'news': None,
            'exp_date': None,
            'publish_pricing_update': None,
            'date': None,
            'updated': None
        }

        # disabled on 2019-01-03
        if get_yahoo_data:
            log.info('{} yahoo ticker={}'.format(label, ticker))
            yahoo_res = yahoo_data.get_data_from_yahoo(work_dict=work_dict)
            if yahoo_res['status'] == ae_consts.SUCCESS:
                yahoo_rec = yahoo_res['rec']
                log.info('{} yahoo ticker={} '
                         'status={} err={}'.format(
                             label, ticker,
                             ae_consts.get_status(status=yahoo_res['status']),
                             yahoo_res['err']))
                rec['pricing'] = yahoo_rec.get('pricing', '{}')
                rec['news'] = yahoo_rec.get('news', '{}')
                rec['options'] = yahoo_rec.get('options', '{}')
                rec['calls'] = rec['options'].get('calls',
                                                  ae_consts.EMPTY_DF_STR)
                rec['puts'] = rec['options'].get('puts',
                                                 ae_consts.EMPTY_DF_STR)
            else:
                log.error('{} failed YAHOO ticker={} '
                          'status={} err={}'.format(
                              label, ticker,
                              ae_consts.get_status(status=yahoo_res['status']),
                              yahoo_res['err']))
        # end of get from yahoo

        if get_iex_data:
            num_iex_ds = len(iex_datasets)
            log.debug('{} iex datasets={}'.format(label, num_iex_ds))
            for idx, ft_type in enumerate(iex_datasets):
                dataset_field = iex_consts.get_ft_str(ft_type=ft_type)

                log.info('{} iex={}/{} field={} ticker={}'.format(
                    label, idx, num_iex_ds, dataset_field, ticker))
                iex_label = '{}-{}'.format(label, dataset_field)
                iex_req = copy.deepcopy(work_dict)
                iex_req['label'] = iex_label
                iex_req['ft_type'] = ft_type
                iex_req['field'] = dataset_field
                iex_req['ticker'] = ticker
                iex_res = iex_data.get_data_from_iex(work_dict=iex_req)

                if iex_res['status'] == ae_consts.SUCCESS:
                    iex_rec = iex_res['rec']
                    log.info(
                        '{} iex ticker={} field={} '
                        'status={} err={}'.format(
                            label, ticker, dataset_field,
                            ae_consts.get_status(status=iex_res['status']),
                            iex_res['err']))
                    if dataset_field == 'news':
                        rec['iex_news'] = iex_rec['data']
                    else:
                        rec[dataset_field] = iex_rec['data']
                else:
                    log.debug(
                        '{} failed IEX ticker={} field={} '
                        'status={} err={}'.format(
                            label, ticker, dataset_field,
                            ae_consts.get_status(status=iex_res['status']),
                            iex_res['err']))
                # end of if/else succcess
            # end idx, ft_type in enumerate(iex_datasets):
        # end of if get_iex_data

        if get_td_data:
            num_td_ds = len(td_datasets)
            log.debug('{} td datasets={}'.format(label, num_td_ds))
            for idx, ft_type in enumerate(td_datasets):
                dataset_field = td_consts.get_ft_str_td(ft_type=ft_type)

                log.info('{} td={}/{} field={} ticker={}'.format(
                    label, idx, num_td_ds, dataset_field, ticker))
                td_label = '{}-{}'.format(label, dataset_field)
                td_req = copy.deepcopy(work_dict)
                td_req['label'] = td_label
                td_req['ft_type'] = ft_type
                td_req['field'] = dataset_field
                td_req['ticker'] = ticker
                td_res = td_data.get_data_from_td(work_dict=td_req)

                if td_res['status'] == ae_consts.SUCCESS:
                    td_rec = td_res['rec']
                    log.info('{} td ticker={} field={} '
                             'status={} err={}'.format(
                                 label, ticker, dataset_field,
                                 ae_consts.get_status(status=td_res['status']),
                                 td_res['err']))
                    if dataset_field == 'tdcalls':
                        rec['tdcalls'] = td_rec['data']
                    if dataset_field == 'tdputs':
                        rec['tdputs'] = td_rec['data']
                    else:
                        rec[dataset_field] = td_rec['data']
                else:
                    log.critical(
                        '{} failed TD ticker={} field={} '
                        'status={} err={}'.format(
                            label, ticker, dataset_field,
                            ae_consts.get_status(status=td_res['status']),
                            td_res['err']))
                # end of if/else succcess
            # end idx, ft_type in enumerate(td_datasets):
        # end of if get_td_data

        update_req = {'data': rec}
        update_req['ticker'] = ticker
        update_req['ticker_id'] = ticker_id
        update_req['strike'] = cur_strike
        update_req['contract'] = contract_type
        update_req['s3_enabled'] = work_dict.get('s3_enabled',
                                                 ae_consts.ENABLED_S3_UPLOAD)
        update_req['redis_enabled'] = work_dict.get(
            'redis_enabled', ae_consts.ENABLED_REDIS_PUBLISH)
        update_req['s3_bucket'] = s3_bucket
        update_req['s3_key'] = s3_key
        update_req['s3_access_key'] = work_dict.get('s3_access_key',
                                                    ae_consts.S3_ACCESS_KEY)
        update_req['s3_secret_key'] = work_dict.get('s3_secret_key',
                                                    ae_consts.S3_SECRET_KEY)
        update_req['s3_region_name'] = work_dict.get('s3_region_name',
                                                     ae_consts.S3_REGION_NAME)
        update_req['s3_address'] = work_dict.get('s3_address',
                                                 ae_consts.S3_ADDRESS)
        update_req['s3_secure'] = work_dict.get('s3_secure',
                                                ae_consts.S3_SECURE)
        update_req['redis_key'] = redis_key
        update_req['redis_address'] = work_dict.get('redis_address',
                                                    ae_consts.REDIS_ADDRESS)
        update_req['redis_password'] = work_dict.get('redis_password',
                                                     ae_consts.REDIS_PASSWORD)
        update_req['redis_db'] = int(
            work_dict.get('redis_db', ae_consts.REDIS_DB))
        update_req['redis_expire'] = work_dict.get('redis_expire',
                                                   ae_consts.REDIS_EXPIRE)
        update_req['updated'] = rec['updated']
        update_req['label'] = label
        update_req['celery_disabled'] = True
        update_status = ae_consts.NOT_SET

        try:
            update_res = publisher.run_publish_pricing_update(
                work_dict=update_req)
            update_status = update_res.get('status', ae_consts.NOT_SET)
            if ae_consts.ev('DEBUG_RESULTS', '0') == '1':
                log.info('{} update_res status={} data={}'.format(
                    label, ae_consts.get_status(status=update_status),
                    ae_consts.ppj(update_res)))
            else:
                log.info('{} run_publish_pricing_update status={}'.format(
                    label, ae_consts.get_status(status=update_status)))
            # end of if/else

            rec['publish_pricing_update'] = update_res
            res = build_result.build_result(status=ae_consts.SUCCESS,
                                            err=None,
                                            rec=rec)
        except Exception as f:
            err = ('{} publisher.run_publish_pricing_update failed '
                   'with ex={}'.format(label, f))
            log.error(err)
            res = build_result.build_result(status=ae_consts.ERR,
                                            err=err,
                                            rec=rec)
        # end of trying to publish results to connected services

    except Exception as e:
        res = build_result.build_result(status=ae_consts.ERR,
                                        err=('failed - get_new_pricing_data '
                                             'dict={} with ex={}').format(
                                                 work_dict, e),
                                        rec=rec)
        log.error('{} - {}'.format(label, res['err']))
    # end of try/ex

    if ae_consts.ev('DATASET_COLLECTION_SLACK_ALERTS', '0') == '1':
        env_name = 'DEV'
        if ae_consts.ev('PROD_SLACK_ALERTS', '1') == '1':
            env_name = 'PROD'
        done_msg = ('Dataset collected ticker=*{}* on env=*{}* '
                    'redis_key={} s3_key={} iex={} yahoo={}'.format(
                        ticker, env_name, redis_key, s3_key, get_iex_data,
                        get_yahoo_data))
        log.debug('{} sending slack msg={}'.format(label, done_msg))
        if res['status'] == ae_consts.SUCCESS:
            slack_utils.post_success(msg=done_msg, block=False, jupyter=True)
        else:
            slack_utils.post_failure(msg=done_msg, block=False, jupyter=True)
        # end of if/else success
    # end of publishing to slack

    log.info('task - get_new_pricing_data done - '
             '{} - status={}'.format(label,
                                     ae_consts.get_status(res['status'])))

    return get_task_results.get_task_results(work_dict=work_dict, result=res)
def publish_from_s3_to_redis(self, work_dict):
    """publish_from_s3_to_redis

    Publish Ticker Data from S3 to Redis

    :param work_dict: dictionary for key/values
    """

    label = 'pub-s3-to-redis'

    log.info('task - {} - start ' 'work_dict={}'.format(label, work_dict))

    ticker = ae_consts.TICKER
    ticker_id = ae_consts.TICKER_ID
    rec = {
        'ticker': None,
        'ticker_id': None,
        's3_enabled': True,
        'redis_enabled': True,
        's3_bucket': None,
        's3_key': None,
        'redis_key': None,
        'updated': None
    }
    res = build_result.build_result(status=ae_consts.NOT_RUN,
                                    err=None,
                                    rec=rec)

    try:
        ticker = work_dict.get('ticker', ae_consts.TICKER)
        ticker_id = int(work_dict.get('ticker_id', ae_consts.TICKER_ID))

        if not ticker:
            res = build_result.build_result(status=ae_consts.ERR,
                                            err='missing ticker',
                                            rec=rec)
            return res

        s3_key = work_dict.get('s3_key', None)
        s3_bucket_name = work_dict.get('s3_bucket', 'pricing')
        redis_key = work_dict.get('redis_key', None)
        updated = work_dict.get('updated', None)
        serializer = work_dict.get('serializer', 'json')
        encoding = work_dict.get('encoding', 'utf-8')
        label = work_dict.get('label', label)

        enable_s3_read = True
        enable_redis_publish = True

        rec['ticker'] = ticker
        rec['ticker_id'] = ticker_id
        rec['s3_bucket'] = s3_bucket_name
        rec['s3_key'] = s3_key
        rec['redis_key'] = redis_key
        rec['updated'] = updated
        rec['s3_enabled'] = enable_s3_read
        rec['redis_enabled'] = enable_redis_publish

        data = None

        if enable_s3_read:

            log.info('{} parsing s3 values'.format(label))
            access_key = work_dict.get('s3_access_key',
                                       ae_consts.S3_ACCESS_KEY)
            secret_key = work_dict.get('s3_secret_key',
                                       ae_consts.S3_SECRET_KEY)
            region_name = work_dict.get('s3_region_name',
                                        ae_consts.S3_REGION_NAME)
            service_address = work_dict.get('s3_address', ae_consts.S3_ADDRESS)
            secure = work_dict.get('s3_secure', ae_consts.S3_SECURE) == '1'

            endpoint_url = 'http://{}'.format(service_address)
            if secure:
                endpoint_url = 'https://{}'.format(service_address)

            log.info('{} building s3 endpoint_url={} '
                     'region={}'.format(label, endpoint_url, region_name))

            s3 = boto3.resource(
                's3',
                endpoint_url=endpoint_url,
                aws_access_key_id=access_key,
                aws_secret_access_key=secret_key,
                region_name=region_name,
                config=boto3.session.Config(signature_version='s3v4'))

            try:
                log.info('{} checking bucket={} exists'.format(
                    label, s3_bucket_name))
                if s3.Bucket(s3_bucket_name) not in s3.buckets.all():
                    log.info('{} creating bucket={}'.format(
                        label, s3_bucket_name))
                    s3.create_bucket(Bucket=s3_bucket_name)
            except Exception as e:
                log.info('{} failed creating bucket={} '
                         'with ex={}'.format(label, s3_bucket_name, e))
            # end of try/ex for creating bucket

            try:
                log.info('{} reading to s3={}/{} '
                         'updated={}'.format(label, s3_bucket_name, s3_key,
                                             updated))
                data = s3_read_contents_from_key.s3_read_contents_from_key(
                    s3=s3,
                    s3_bucket_name=s3_bucket_name,
                    s3_key=s3_key,
                    encoding=encoding,
                    convert_as_json=True)

                initial_size_value = \
                    len(str(data)) / 1024000
                initial_size_str = ae_consts.to_f(initial_size_value)
                if ae_consts.ev('DEBUG_S3', '0') == '1':
                    log.info('{} read s3={}/{} data={}'.format(
                        label, s3_bucket_name, s3_key, ae_consts.ppj(data)))
                else:
                    log.info('{} read s3={}/{} data size={} MB'.format(
                        label, s3_bucket_name, s3_key, initial_size_str))
            except Exception as e:
                err = ('{} failed reading bucket={} '
                       'key={} ex={}').format(label, s3_bucket_name, s3_key, e)
                log.error(err)
                res = build_result.build_result(status=ae_consts.NOT_RUN,
                                                err=err,
                                                rec=rec)
            # end of try/ex for creating bucket
        else:
            log.info('{} SKIP S3 read bucket={} '
                     'key={}'.format(label, s3_bucket_name, s3_key))
        # end of if enable_s3_read

        if enable_redis_publish:
            redis_address = work_dict.get('redis_address',
                                          ae_consts.REDIS_ADDRESS)
            redis_key = work_dict.get('redis_key', ae_consts.REDIS_KEY)
            redis_password = work_dict.get('redis_password',
                                           ae_consts.REDIS_PASSWORD)
            redis_db = work_dict.get('redis_db', None)
            if not redis_db:
                redis_db = ae_consts.REDIS_DB
            redis_expire = None
            if 'redis_expire' in work_dict:
                redis_expire = work_dict.get('redis_expire',
                                             ae_consts.REDIS_EXPIRE)
            log.info('redis enabled address={}@{} '
                     'key={}'.format(redis_address, redis_db, redis_key))
            redis_host = redis_address.split(':')[0]
            redis_port = redis_address.split(':')[1]
            try:
                if ae_consts.ev('DEBUG_REDIS', '0') == '1':
                    log.info('{} publishing redis={}:{} '
                             'db={} key={} '
                             'updated={} expire={} '
                             'data={}'.format(label, redis_host, redis_port,
                                              redis_db, redis_key,
                                              updated, redis_expire,
                                              ae_consts.ppj(data)))
                else:
                    log.info('{} publishing redis={}:{} '
                             'db={} key={} '
                             'updated={} expire={}'.format(
                                 label, redis_host, redis_port, redis_db,
                                 redis_key, updated, redis_expire))
                # end of if/else

                rc = redis.Redis(host=redis_host,
                                 port=redis_port,
                                 password=redis_password,
                                 db=redis_db)

                redis_set_res = redis_set.set_data_in_redis_key(
                    label=label,
                    client=rc,
                    key=redis_key,
                    data=data,
                    serializer=serializer,
                    encoding=encoding,
                    expire=redis_expire,
                    px=None,
                    nx=False,
                    xx=False)

                log.info('{} redis_set status={} err={}'.format(
                    label, ae_consts.get_status(redis_set_res['status']),
                    redis_set_res['err']))

            except Exception as e:
                log.error('{} failed - redis publish to '
                          'key={} ex={}'.format(label, redis_key, e))
            # end of try/ex for creating bucket
        else:
            log.info('{} SKIP REDIS publish '
                     'key={}'.format(label, redis_key))
        # end of if enable_redis_publish

        res = build_result.build_result(status=ae_consts.SUCCESS,
                                        err=None,
                                        rec=rec)

    except Exception as e:
        res = build_result.build_result(
            status=ae_consts.ERR,
            err=('failed - publish_from_s3_to_redis '
                 'dict={} with ex={}').format(work_dict, e),
            rec=rec)
        log.error('{} - {}'.format(label, res['err']))
    # end of try/ex

    log.info('task - publish_from_s3_to_redis done - '
             '{} - status={}'.format(label,
                                     ae_consts.get_status(res['status'])))

    return get_task_results.get_task_results(work_dict=work_dict, result=res)
Esempio n. 8
0
def publish_pricing_update(self, work_dict):
    """publish_pricing_update

    Publish Ticker Data to S3 and Redis

    - prices - turn off with ``work_dict.get_pricing = False``
    - news - turn off with ``work_dict.get_news = False``
    - options - turn off with ``work_dict.get_options = False``

    :param work_dict: dictionary for key/values
    """

    label = 'publish_pricing'

    log.debug(f'task - {label} - start')

    ticker = ae_consts.TICKER
    ticker_id = ae_consts.TICKER_ID
    rec = {
        'ticker': None,
        'ticker_id': None,
        's3_enabled': False,
        'redis_enabled': False,
        's3_bucket': None,
        's3_key': None,
        'redis_key': None,
        'updated': None
    }
    res = build_result.build_result(status=ae_consts.NOT_RUN,
                                    err=None,
                                    rec=rec)

    try:
        ticker = work_dict.get('ticker', ae_consts.TICKER)
        ticker_id = int(work_dict.get('ticker_id', ae_consts.TICKER_ID))

        if not ticker:
            res = build_result.build_result(status=ae_consts.ERR,
                                            err='missing ticker',
                                            rec=rec)
            return res

        label = work_dict.get('label', label)
        s3_key = work_dict.get('s3_key', None)
        s3_bucket_name = work_dict.get('s3_bucket', 'pricing')
        redis_key = work_dict.get('redis_key', None)
        data = work_dict.get('data', None)
        updated = work_dict.get('updated', None)
        enable_s3_upload = work_dict.get('s3_enabled',
                                         ae_consts.ENABLED_S3_UPLOAD)
        enable_redis_publish = work_dict.get('redis_enabled',
                                             ae_consts.ENABLED_REDIS_PUBLISH)
        serializer = work_dict.get('serializer', 'json')
        encoding = work_dict.get('encoding', 'utf-8')

        rec['ticker'] = ticker
        rec['ticker_id'] = ticker_id
        rec['s3_bucket'] = s3_bucket_name
        rec['s3_key'] = s3_key
        rec['redis_key'] = redis_key
        rec['updated'] = updated
        rec['s3_enabled'] = enable_s3_upload
        rec['redis_enabled'] = enable_redis_publish

        if (enable_s3_upload and s3_bucket_name != 'MISSING_AN_S3_BUCKET'):
            access_key = work_dict.get('s3_access_key',
                                       ae_consts.S3_ACCESS_KEY)
            secret_key = work_dict.get('s3_secret_key',
                                       ae_consts.S3_SECRET_KEY)
            region_name = work_dict.get('s3_region_name',
                                        ae_consts.S3_REGION_NAME)
            service_address = work_dict.get('s3_address', ae_consts.S3_ADDRESS)
            secure = work_dict.get('s3_secure', ae_consts.S3_SECURE) == '1'

            endpoint_url = f'http{"s" if secure else ""}://{service_address}'

            log.debug(f'{label} building s3 endpoint_url={endpoint_url} '
                      f'region={region_name}')

            s3 = boto3.resource(
                's3',
                endpoint_url=endpoint_url,
                aws_access_key_id=access_key,
                aws_secret_access_key=secret_key,
                region_name=region_name,
                config=boto3.session.Config(signature_version='s3v4'))

            try:
                log.debug(f'{label} checking bucket={s3_bucket_name} exists')
                if s3.Bucket(s3_bucket_name) not in s3.buckets.all():
                    log.debug(f'{label} creating bucket={s3_bucket_name}')
                    s3.create_bucket(Bucket=s3_bucket_name)
            except Exception as e:
                log.debug(f'{label} failed creating bucket={s3_bucket_name} '
                          f'with ex={e}')
            # end of try/ex for creating bucket

            try:
                log.debug(f'{label} uploading to s3={s3_bucket_name}/{s3_key} '
                          f'updated={updated}')
                s3.Bucket(s3_bucket_name).put_object(
                    Key=s3_key, Body=json.dumps(data).encode(encoding))
            except Exception as e:
                log.error(f'{label} failed uploading bucket={s3_bucket_name} '
                          f'key={s3_key} ex={e}')
            # end of try/ex for creating bucket
        else:
            log.debug(f'{label} SKIP S3 upload bucket={s3_bucket_name} '
                      f'key={s3_key}')
        # end of if enable_s3_upload

        if enable_redis_publish:
            redis_address = work_dict.get('redis_address',
                                          ae_consts.REDIS_ADDRESS)
            redis_key = work_dict.get('redis_key', ae_consts.REDIS_KEY)
            redis_password = work_dict.get('redis_password',
                                           ae_consts.REDIS_PASSWORD)
            redis_db = work_dict.get('redis_db', ae_consts.REDIS_DB)
            redis_expire = None
            if 'redis_expire' in work_dict:
                redis_expire = work_dict.get('redis_expire',
                                             ae_consts.REDIS_EXPIRE)
            log.debug(f'redis enabled address={redis_address}@{redis_db} '
                      f'key={redis_key}')
            redis_host = None
            redis_port = None
            try:
                redis_host = redis_address.split(':')[0]
                redis_port = redis_address.split(':')[1]
            except Exception as c:
                err = (f'{label} failed parsing redis_address={redis_address} '
                       f'with ex={c} '
                       'please set one with the format: '
                       '<hostname>:<port>')
                log.critical(err)
                res = build_result.build_result(status=ae_consts.ERR,
                                                err=err,
                                                rec=rec)
                return res
            # end of checking that redis_address is valid

            try:
                log.debug(
                    f'{label} publishing redis={redis_host}:{redis_port} '
                    f'db={redis_db} key={redis_key} '
                    f'updated={updated} expire={redis_expire}')

                rc = redis.Redis(host=redis_host,
                                 port=redis_port,
                                 password=redis_password,
                                 db=redis_db)

                already_compressed = False
                uses_data = data
                try:
                    uses_data = zlib.compress(
                        json.dumps(data).encode(encoding), 9)
                    already_compressed = True
                except Exception as p:
                    log.critical('failed to compress dataset for '
                                 f'redis_key={redis_key} with ex={p}')

                redis_set_res = redis_set.set_data_in_redis_key(
                    label=label,
                    client=rc,
                    key=redis_key,
                    data=uses_data,
                    already_compressed=already_compressed,
                    serializer=serializer,
                    encoding=encoding,
                    expire=redis_expire,
                    px=None,
                    nx=False,
                    xx=False)

                log.debug(
                    f'{label} redis_set '
                    f'status={ae_consts.get_status(redis_set_res["status"])} '
                    f'err={redis_set_res["err"]}')

            except Exception as e:
                log.error(f'{label} failed - redis publish to '
                          f'key={redis_key} ex={e}')
            # end of try/ex for creating bucket
        else:
            log.debug(f'{label} SKIP REDIS publish key={redis_key}')
        # end of if enable_redis_publish

        res = build_result.build_result(status=ae_consts.SUCCESS,
                                        err=None,
                                        rec=rec)

    except Exception as e:
        res = build_result.build_result(status=ae_consts.ERR,
                                        err=('failed - publish_pricing_update '
                                             f'dict={work_dict} with ex={e}'),
                                        rec=rec)
        log.error(f'{label} - {res["err"]}')
    # end of try/ex

    log.debug(f'task - publish_pricing_update done - {label} - '
              f'status={ae_consts.get_status(res["status"])}')

    return get_task_results.get_task_results(work_dict=work_dict, result=res)
Esempio n. 9
0
def handle_pricing_update_task(
        self,
        work_dict):
    """handle_pricing_update_task

    Writes pricing updates to S3 and Redis

    :param work_dict: dictionary for key/values
    """

    label = 'update_prices'

    log.info(
        'task - {} - start'.format(
            label))

    ticker = ae_consts.TICKER
    ticker_id = 1
    rec = {
        'ticker': None,
        'ticker_id': None,
        'pricing_s3_bucket': None,
        'pricing_s3_key': None,
        'pricing_size': None,
        'pricing_redis_key': None,
        'news_s3_bucket': None,
        'news_s3_key': None,
        'news_size': None,
        'news_redis_key': None,
        'options_s3_bucket': None,
        'options_s3_key': None,
        'options_size': None,
        'options_redis_key': None
    }
    res = build_result.build_result(
        status=ae_consts.NOT_RUN,
        err=None,
        rec=rec)

    try:
        ticker = work_dict.get(
            'ticker',
            ae_consts.TICKER)
        ticker_id = int(work_dict.get(
            'ticker_id',
            1))

        rec['ticker'] = ticker
        rec['ticker_id'] = ticker_id

        pricing_data = work_dict['pricing']
        news_data = work_dict['news']
        options_data = work_dict['options']
        calls_data = options_data.get(
            'calls',
            ae_consts.EMPTY_DF_STR)
        puts_data = options_data.get(
            'puts',
            ae_consts.EMPTY_DF_STR)
        updated = work_dict['updated']
        label = work_dict.get(
            'label',
            label)

        cur_date = datetime.datetime.utcnow()
        cur_date_str = cur_date.strftime(
            '%Y_%m_%d_%H_%M_%S')

        pricing_s3_key = work_dict.get(
            'pricing_s3_key',
            'pricing_ticker_{}_id_{}_date_{}'.format(
                ticker,
                ticker_id,
                cur_date_str))
        news_s3_key = work_dict.get(
            'news_s3_key',
            'news_ticker_{}_id_{}_date_{}'.format(
                ticker,
                ticker_id,
                cur_date_str))
        options_s3_key = work_dict.get(
            'options_s3_key',
            'options_ticker_{}_id_{}_date_{}'.format(
                ticker,
                ticker_id,
                cur_date_str))
        calls_s3_key = work_dict.get(
            'calls_s3_key',
            'calls_ticker_{}_id_{}_date_{}'.format(
                ticker,
                ticker_id,
                cur_date_str))
        puts_s3_key = work_dict.get(
            'puts_s3_key',
            'puts_ticker_{}_id_{}_date_{}'.format(
                ticker,
                ticker_id,
                cur_date_str))

        pricing_s3_bucket = work_dict.get(
            'pricing_s3_bucket',
            'pricing')
        news_s3_bucket = work_dict.get(
            'news_s3_bucket',
            'news')
        options_s3_bucket = work_dict.get(
            'options_s3_bucket',
            'options')

        pricing_by_ticker_redis_key = work_dict.get(
            'pricing_redis_key',
            'price_{}'.format(
                ticker))
        news_by_ticker_redis_key = work_dict.get(
            'news_redis_key',
            'news_{}'.format(
                ticker))
        options_by_ticker_redis_key = work_dict.get(
            'options_redis_key',
            'options_{}'.format(
                ticker))
        calls_by_ticker_redis_key = work_dict.get(
            'calls_redis_key',
            'calls_{}'.format(
                ticker))
        puts_by_ticker_redis_key = work_dict.get(
            'puts_redis_key',
            'puts_{}'.format(
                ticker))

        pricing_size = len(str(
            pricing_data))
        news_size = len(str(
            news_data))
        options_size = len(str(
            options_data))
        calls_size = len(str(
            calls_data))
        puts_size = len(str(
            puts_data))

        payloads_to_publish = [
            {
                'ticker': ticker,
                'ticker_id': ticker_id,
                's3_bucket': pricing_s3_bucket,
                's3_key': pricing_s3_key,
                'data': pricing_data,
                'redis_key': pricing_by_ticker_redis_key,
                'size': pricing_size,
                'updated': updated,
                'label': label
            },
            {
                'ticker': ticker,
                'ticker_id': ticker_id,
                's3_bucket': options_s3_bucket,
                's3_key': options_s3_key,
                'data': options_data,
                'redis_key': options_by_ticker_redis_key,
                'size': options_size,
                'updated': updated,
                'label': label
            },
            {
                'ticker': ticker,
                'ticker_id': ticker_id,
                's3_bucket': options_s3_bucket,
                's3_key': calls_s3_key,
                'data': calls_data,
                'redis_key': calls_by_ticker_redis_key,
                'size': calls_size,
                'updated': updated,
                'label': label
            },
            {
                'ticker': ticker,
                'ticker_id': ticker_id,
                's3_bucket': options_s3_bucket,
                's3_key': puts_s3_key,
                'data': puts_data,
                'redis_key': puts_by_ticker_redis_key,
                'size': puts_size,
                'updated': updated,
                'label': label
            },
            {
                'ticker': ticker,
                'ticker_id': ticker_id,
                's3_bucket': news_s3_bucket,
                's3_key': news_s3_key,
                'data': news_data,
                'redis_key': news_by_ticker_redis_key,
                'size': news_size,
                'updated': updated,
                'label': label
            }
        ]

        rec['pricing_s3_bucket'] = pricing_s3_bucket
        rec['pricing_s3_key'] = pricing_s3_key
        rec['pricing_redis_key'] = pricing_by_ticker_redis_key
        rec['news_s3_bucket'] = news_s3_bucket
        rec['news_s3_key'] = news_s3_key
        rec['news_redis_key'] = news_by_ticker_redis_key
        rec['options_s3_bucket'] = options_s3_bucket
        rec['options_s3_key'] = options_s3_bucket
        rec['options_redis_key'] = options_by_ticker_redis_key

        total_payloads = len(payloads_to_publish)

        log.info(
            '{} ticker={} processing payloads={}'.format(
                label,
                ticker,
                total_payloads))

        for ridx, r in enumerate(payloads_to_publish):
            log.info(
                '{} ticker={} update={}/{} key={} redis_key={}'.format(
                    label,
                    ticker,
                    ridx,
                    total_payloads,
                    r['s3_key'],
                    r['redis_key']))
            r['celery_disabled'] = False
            r['label'] = 'handle_pricing_update_task-{}'.format(
                label)
            payload_res = \
                publisher.task_publish_pricing_update(
                    work_dict=r)
            log.info(
                '{} ticker={} update={}/{} status={} '
                's3_key={} redis_key={}'.format(
                    label,
                    ticker,
                    ridx,
                    total_payloads,
                    ae_consts.get_status(status=payload_res['status']),
                    r['s3_key'],
                    r['redis_key']))
        # end of for all payloads to publish

        res = build_result.build_result(
            status=ae_consts.SUCCESS,
            err=None,
            rec=rec)

    except Exception as e:
        res = build_result.build_result(
            status=ae_consts.ERR,
            err=(
                'failed - handle_pricing_update_task '
                'dict={} with ex={}').format(
                    work_dict,
                    e),
            rec=rec)
        log.error(
            '{} - {}'.format(
                label,
                res['err']))
    # end of try/ex

    log.info(
        'task - handle_pricing_update_task done - '
        '{} - status={}'.format(
            label,
            ae_consts.get_status(res['status'])))

    return get_task_results.get_task_results(
        work_dict=work_dict,
        result=res)