예제 #1
0
    def test_extract_option_puts_success(self):
        """test_extract_option_puts_success"""
        test_name = 'test_extract_option_puts_success'
        work = api_requests.get_ds_dict(ticker=self.ticker, label=test_name)

        status, df = yahoo_extract.extract_option_puts_dataset(work_dict=work)
        self.assertIsNotNone(df)
        self.assertEqual(ae_consts.get_status(status=status), 'SUCCESS')
        self.assertTrue(len(df.index) == 1)
        self.assertEqual(df['strike'][0], 380)
예제 #2
0
    def test_extract_pricing_success(self):
        """test_extract_pricing_success"""
        test_name = 'test_extract_pricing_dataset_success'
        work = api_requests.get_ds_dict(ticker=self.ticker, label=test_name)

        status, df = yahoo_extract.extract_pricing_dataset(work_dict=work)
        self.assertIsNotNone(df)
        self.assertEqual(ae_consts.get_status(status=status), 'SUCCESS')
        self.assertTrue(len(df.index) == 1)
        self.assertEqual(df['regularMarketPrice'][0], 288.09)
예제 #3
0
def extract_dataset(key,
                    ticker=None,
                    date=None,
                    work_dict=None,
                    scrub_mode='NO_SORT',
                    verbose=False):
    """extract_dataset

    Extract the IEX key data for a ticker from Redis and
    return it as a tuple (status, ``pandas.Dataframe``)

    :param key: IEX dataset key
    :param ticker: string ticker to extract
    :param date: optional - string date to extract
        formatted ``YYYY-MM-DD``
    :param work_dict: dictionary of args
    :param scrub_mode: type of scrubbing handler to run
    :param verbose: optional - boolean for turning on logging
    """
    if not key or key not in keys:
        log.error(f'unsupported extract key={key} in keys={keys}')
        return None
    label = key
    df_type = keys[key]
    df_str = iex_consts.get_datafeed_str(df_type=df_type)
    latest_close_date = ae_utils.get_last_close_str()

    use_date = date
    if work_dict:
        if not ticker:
            ticker = work_dict.get('ticker', None)
    if not work_dict:
        work_dict = api_requests.get_ds_dict(ticker=ticker)

    req = copy.deepcopy(work_dict)

    if not use_date:
        use_date = latest_close_date

    redis_key = f'{ticker}_{use_date}_{key}'
    req['redis_key'] = redis_key
    req['s3_key'] = redis_key

    if verbose:
        log.info(f'{label} - {df_str} - '
                 f'date={date} '
                 f'redis_key={req["redis_key"]} '
                 f's3_key={req["s3_key"]} '
                 f'{ae_consts.ppj(req)}')

    return extract_utils.perform_extract(df_type=df_type,
                                         df_str=df_str,
                                         work_dict=req,
                                         scrub_mode=scrub_mode,
                                         verbose=verbose)
    def test_integration_extract_minute_dataset(self):
        """test_integration_extract_minute_dataset"""
        if ev('INT_TESTS', '0') == '0':
            return
        ticker = 'NFLX'
        label = 'IEX minute dataset'
        # build dataset cache dictionary
        work = get_ds_dict(ticker=ticker, label=label)

        status, df = extract_minute_dataset(work_dict=work)
        self._check(df=df, status=status, label=label, work=work)
예제 #5
0
    def test_extract_news_success(self):
        """test_extract_news_success"""
        test_name = 'test_extract_news_success'
        work = api_requests.get_ds_dict(ticker=self.ticker, label=test_name)

        status, df = yahoo_extract.extract_yahoo_news_dataset(work_dict=work)
        self.assertIsNotNone(df)
        self.assertEqual(ae_consts.get_status(status=status), 'SUCCESS')
        self.assertTrue(len(df.index) == 2)
        self.assertEqual(df['u'][1], 'http://finance.yahoo.com/news/url2')
        self.assertEqual(df['tt'][1], '1493311950')
예제 #6
0
    def test_integration_extract_option_puts(self):
        """test_integration_extract_option_puts"""
        if ae_consts.ev('INT_TESTS', '0') == '0':
            return

        # build dataset cache dictionary
        work = api_requests.get_ds_dict(
            ticker='SPY', label='test_integration_extract_option_puts')

        status, df = yahoo_extract.extract_option_puts_dataset(work_dict=work)
        if status == ae_consts.SUCCESS:
            self.assertIsNotNone(df)
            self.debug_df(df=df)
        else:
            log.critical('Yahoo Option Puts are missing in redis '
                         f'for ticker={work["ticker"]} '
                         f'status={ae_consts.get_status(status=status)}')
예제 #7
0
    def test_integration_fetch_puts_dataset(self):
        """test_integration_fetch_puts_dataset"""
        if ae_consts.ev('INT_TESTS', '0') == '0':
            return
        ticker = 'SPY'
        label = 'TD puts dataset'
        # build dataset cache dictionary
        work = api_requests.get_ds_dict(ticker=ticker, label=label)

        status, df = td_fetch.fetch_data(work_dict=work, fetch_type='tdputs')
        if status == ae_consts.SUCCESS:
            self.assertIsNotNone(df)
            self.debug_df(df=df)
        else:
            log.critical(f'{label} is missing in redis '
                         f'for ticker={work["ticker"]} '
                         f'status={ae_consts.get_status(status=status)}')
예제 #8
0
    def test_integration_extract_option_puts(self):
        """test_integration_extract_option_puts"""
        if ev('INT_TESTS', '0') == '0':
            return

        # build dataset cache dictionary
        work = get_ds_dict(ticker='NFLX',
                           label='test_integration_extract_option_puts')

        status, df = extract_option_puts_dataset(work_dict=work)
        if status == SUCCESS:
            self.assertIsNotNone(df)
            self.debug_df(df=df)
        else:
            log.critical('Yahoo Option Puts are missing in redis '
                         'for ticker={} status={}'.format(
                             work['ticker'], get_status(status=status)))
    def test_integration_extract_minute_dataset(self):
        """test_integration_extract_minute_dataset"""
        if ev('INT_TESTS', '0') == '0':
            return
        ticker = 'NFLX'
        label = 'IEX minute dataset'
        # build dataset cache dictionary
        work = get_ds_dict(ticker=ticker, label=label)

        status, df = extract_minute_dataset(work_dict=work)
        if status == SUCCESS:
            self.assertIsNotNone(df)
            self.debug_df(df=df)
        else:
            log.critical('{} is missing in redis '
                         'for ticker={} status={}'.format(
                             label, work['ticker'], get_status(status=status)))
예제 #10
0
    def test_get_ds_dict(self):
        """test_get_ds_dict"""
        test_name = 'test_build_dataset_cache_dict'
        base_key = f'{self.ticker}_{self.last_close_str}'
        cache_dict = get_ds_dict(ticker=self.ticker, label=test_name)

        self.assertIsNotNone(cache_dict)
        self.assertEqual(cache_dict['ticker'], self.ticker)
        self.assertEqual(cache_dict['daily'], f'{base_key}_daily')
        self.assertEqual(cache_dict['minute'], f'{base_key}_minute')
        self.assertEqual(cache_dict['quote'], f'{base_key}_quote')
        self.assertEqual(cache_dict['stats'], f'{base_key}_stats')
        self.assertEqual(cache_dict['peers'], f'{base_key}_peers')
        self.assertEqual(cache_dict['news1'], f'{base_key}_news1')
        self.assertEqual(cache_dict['financials'], f'{base_key}_financials')
        self.assertEqual(cache_dict['earnings'], f'{base_key}_earnings')
        self.assertEqual(cache_dict['dividends'], f'{base_key}_dividends')
        self.assertEqual(cache_dict['company'], f'{base_key}_company')
        self.assertEqual(cache_dict['options'], f'{base_key}_options')
        self.assertEqual(cache_dict['pricing'], f'{base_key}_pricing')
        self.assertEqual(cache_dict['news'], f'{base_key}_news')
        self.assertEqual(cache_dict['version'], CACHE_DICT_VERSION)
예제 #11
0
    def test_integration_extract_option_calls(self):
        """test_integration_extract_option_calls"""
        if ae_consts.ev('INT_TESTS', '0') == '0':
            return

        # build dataset cache dictionary
        work = api_requests.get_ds_dict(
            ticker='SPY',
            base_key='SPY_2018-12-31',
            label='test_integration_extract_option_calls')

        status, df = yahoo_extract.extract_option_calls_dataset(work_dict=work)
        if status == ae_consts.SUCCESS:
            self.assertIsNotNone(df)
            self.debug_df(df=df)
            self.assertTrue(ae_consts.is_df(df=df))
            for i, r in df.iterrows():
                print(ae_consts.ppj(json.loads(r.to_json())))
            log.info('done printing option call data')
        else:
            log.critical('Yahoo Option Calls are missing in redis '
                         f'for ticker={work["ticker"]} '
                         f'status={ae_consts.get_status(status=status)}')
예제 #12
0
def run_algo(
        ticker=None,
        tickers=None,
        algo=None,  # optional derived ``analysis_engine.algo.Algo`` instance
        balance=None,  # float starting base capital
        commission=None,  # float for single trade commission for buy or sell
        start_date=None,  # string YYYY-MM-DD HH:MM:SS
        end_date=None,  # string YYYY-MM-DD HH:MM:SS
        datasets=None,  # string list of identifiers
        num_owned_dict=None,  # not supported
        cache_freq='daily',  # 'minute' not supported
        auto_fill=True,
        load_config=None,
        report_config=None,
        history_config=None,
        extract_config=None,
        use_key=None,
        extract_mode='all',
        iex_datasets=None,
        redis_enabled=True,
        redis_address=None,
        redis_db=None,
        redis_password=None,
        redis_expire=None,
        redis_key=None,
        s3_enabled=True,
        s3_address=None,
        s3_bucket=None,
        s3_access_key=None,
        s3_secret_key=None,
        s3_region_name=None,
        s3_secure=False,
        s3_key=None,
        celery_disabled=True,
        broker_url=None,
        result_backend=None,
        label=None,
        name=None,
        timeseries=None,
        trade_strategy=None,
        verbose=False,
        publish_to_slack=True,
        publish_to_s3=True,
        publish_to_redis=True,
        extract_datasets=None,
        config_file=None,
        config_dict=None,
        version=1,
        raise_on_err=True,
        **kwargs):
    """run_algo

    Run an algorithm with steps:

        1) Extract redis keys between dates
        2) Compile a data pipeline dictionary (call it ``data``)
        3) Call algorithm's ``myalgo.handle_data(data=data)``

    .. note:: If no ``algo`` is set, the
        ``analysis_engine.algo.BaseAlgo`` algorithm
        is used.

    .. note:: Please ensure Redis and Minio are running
        before trying to extract tickers

    **Stock tickers to extract**

    :param ticker: single stock ticker/symbol/ETF to extract
    :param tickers: optional - list of tickers to extract
    :param use_key: optional - extract historical key from Redis

    **Algo Configuration**

    :param algo: derived instance of ``analysis_engine.algo.Algo`` object
    :param balance: optional - float balance parameter
        can also be set on the ``algo`` object if not
        set on the args
    :param commission: float for single trade commission for
        buy or sell. can also be set on the ``algo`` objet
    :param start_date: string ``YYYY-MM-DD_HH:MM:SS`` cache value
    :param end_date: string ``YYYY-MM-DD_HH:MM:SS`` cache value
    :param dataset_types: list of strings that are ``iex`` or ``yahoo``
        datasets that are cached.
    :param cache_freq: optional - depending on if you are running data feeds
        on a ``daily`` cron (default) vs every ``minute`` (or faster)
    :param num_owned_dict: not supported yet
    :param auto_fill: optional - boolean for auto filling
        buy/sell orders for backtesting (default is
        ``True``)
    :param trading_calendar: ``trading_calendar.TradingCalendar``
        object, by default ``analysis_engine.calendars.
        always_open.AlwaysOpen`` trading calendar
        # TradingCalendar by ``TFSExchangeCalendar``
    :param config_file: path to a json file
        containing custom algorithm object
        member values (like indicator configuration and
        predict future date units ahead for a backtest)
    :param config_dict: optional - dictionary that
        can be passed to derived class implementations
        of: ``def load_from_config(config_dict=config_dict)``

    **Timeseries**

    :param timeseries: optional - string to
        set ``day`` or ``minute`` backtesting
        or live trading
        (default is ``minute``)

    **Trading Strategy**

    :param trade_strategy: optional - string to
        set the type of ``Trading Strategy``
        for backtesting or live trading
        (default is ``count``)

    **Algorithm Dataset Loading, Extracting, Reporting
    and Trading History arguments**

    :param load_config: optional - dictionary
        for setting member variables to load an
        agorithm-ready dataset from
        a file, s3 or redis
    :param report_config: optional - dictionary
        for setting member variables to publish
        an algo ``trading performance report`` to s3,
        redis, a file or slack
    :param history_config: optional - dictionary
        for setting member variables to publish
        an algo ``trade history`` to s3, redis, a file
        or slack
    :param extract_config: optional - dictionary
        for setting member variables to publish
        an algo ``trading performance report`` to s3,
        redis, a file or slack

    **(Optional) Data sources, datafeeds and datasets to gather**

    :param iex_datasets: list of strings for gathering specific `IEX
        datasets <https://iexcloud.io/>`__
        which are set as consts: ``analysis_engine.iex.consts.FETCH_*``.

    **(Optional) Redis connectivity arguments**

    :param redis_enabled: bool - toggle for auto-caching all
        datasets in Redis
        (default is ``True``)
    :param redis_address: Redis connection string
        format is ``host:port``
        (default is ``localhost:6379``)
    :param redis_db: Redis db to use
        (default is ``0``)
    :param redis_password: optional - Redis password
        (default is ``None``)
    :param redis_expire: optional - Redis expire value
        (default is ``None``)
    :param redis_key: optional - redis key not used
        (default is ``None``)

    **(Optional) Minio (S3) connectivity arguments**

    :param s3_enabled: bool - toggle for auto-archiving on Minio (S3)
        (default is ``True``)
    :param s3_address: Minio S3 connection string
        format ``host:port``
        (default is ``localhost:9000``)
    :param s3_bucket: S3 Bucket for storing the artifacts
        (default is ``dev``) which should be viewable on a browser:
        http://localhost:9000/minio/dev/
    :param s3_access_key: S3 Access key
        (default is ``trexaccesskey``)
    :param s3_secret_key: S3 Secret key
        (default is ``trex123321``)
    :param s3_region_name: S3 region name
        (default is ``us-east-1``)
    :param s3_secure: Transmit using tls encryption
        (default is ``False``)
    :param s3_key: optional s3 key not used
        (default is ``None``)

    **(Optional) Celery worker broker connectivity arguments**

    :param celery_disabled: bool - toggle synchronous mode or publish
        to an engine connected to the `Celery broker and backend
        <https://github.com/celery/celery#transports-and-backends>`__
        (default is ``True`` - synchronous mode without an engine
        or need for a broker or backend for Celery)
    :param broker_url: Celery broker url
        (default is ``redis://0.0.0.0:6379/13``)
    :param result_backend: Celery backend url
        (default is ``redis://0.0.0.0:6379/14``)
    :param label: tracking log label
    :param publish_to_slack: optional - boolean for
        publishing to slack (coming soon)
    :param publish_to_s3: optional - boolean for
        publishing to s3 (coming soon)
    :param publish_to_redis: optional - boolean for
        publishing to redis (coming soon)

    **(Optional) Debugging**

    :param verbose: bool - show extract warnings
        and other debug logging (default is False)
    :param raise_on_err: optional - boolean for
        unittests and developing algorithms with the
        ``analysis_engine.run_algo.run_algo`` helper.
        When set to ``True`` exceptions will
        are raised to the calling functions

    :param kwargs: keyword arguments dictionary
    """

    # dictionary structure with a list sorted on: ascending dates
    # algo_data_req[ticker][list][dataset] = pd.DataFrame
    algo_data_req = {}
    extract_requests = []
    return_algo = False  # return created algo objects for use by caller
    rec = {}
    msg = None

    use_tickers = tickers
    use_balance = balance
    use_commission = commission

    if ticker:
        use_tickers = [ticker]
    else:
        if not use_tickers:
            use_tickers = []

    # if these are not set as args, but the algo object
    # has them, use them instead:
    if algo:
        if len(use_tickers) == 0:
            use_tickers = algo.get_tickers()
        if not use_balance:
            use_balance = algo.get_balance()
        if not use_commission:
            use_commission = algo.get_commission()

    default_iex_datasets = [
        'daily', 'minute', 'quote', 'stats', 'peers', 'news', 'financials',
        'earnings', 'dividends', 'company'
    ]

    if not iex_datasets:
        iex_datasets = default_iex_datasets

    if redis_enabled:
        if not redis_address:
            redis_address = os.getenv('REDIS_ADDRESS', 'localhost:6379')
        if not redis_password:
            redis_password = os.getenv('REDIS_PASSWORD', None)
        if not redis_db:
            redis_db = int(os.getenv('REDIS_DB', '0'))
        if not redis_expire:
            redis_expire = os.getenv('REDIS_EXPIRE', None)
    if s3_enabled:
        if not s3_address:
            s3_address = os.getenv('S3_ADDRESS', 'localhost:9000')
        if not s3_access_key:
            s3_access_key = os.getenv('AWS_ACCESS_KEY_ID', 'trexaccesskey')
        if not s3_secret_key:
            s3_secret_key = os.getenv('AWS_SECRET_ACCESS_KEY', 'trex123321')
        if not s3_region_name:
            s3_region_name = os.getenv('AWS_DEFAULT_REGION', 'us-east-1')
        if not s3_secure:
            s3_secure = os.getenv('S3_SECURE', '0') == '1'
        if not s3_bucket:
            s3_bucket = os.getenv('S3_BUCKET', 'dev')
    if not broker_url:
        broker_url = os.getenv('WORKER_BROKER_URL', 'redis://0.0.0.0:6379/11')
    if not result_backend:
        result_backend = os.getenv('WORKER_BACKEND_URL',
                                   'redis://0.0.0.0:6379/12')

    if not label:
        label = 'run-algo'

    num_tickers = len(use_tickers)
    last_close_str = ae_utils.get_last_close_str()

    if iex_datasets:
        if verbose:
            log.info(f'{label} - tickers={num_tickers} '
                     f'iex={json.dumps(iex_datasets)}')
    else:
        if verbose:
            log.info(f'{label} - tickers={num_tickers}')

    ticker_key = use_key
    if not ticker_key:
        ticker_key = f'{ticker}_{last_close_str}'

    if not algo:
        algo = base_algo.BaseAlgo(ticker=None,
                                  tickers=use_tickers,
                                  balance=use_balance,
                                  commission=use_commission,
                                  config_dict=config_dict,
                                  name=label,
                                  auto_fill=auto_fill,
                                  timeseries=timeseries,
                                  trade_strategy=trade_strategy,
                                  publish_to_slack=publish_to_slack,
                                  publish_to_s3=publish_to_s3,
                                  publish_to_redis=publish_to_redis,
                                  raise_on_err=raise_on_err)
        return_algo = True
        # the algo object is stored
        # in the result at: res['rec']['algo']

    if not algo:
        msg = f'{label} - missing algo object'
        log.error(msg)
        return build_result.build_result(status=ae_consts.EMPTY,
                                         err=msg,
                                         rec=rec)

    if raise_on_err:
        log.debug(f'{label} - enabling algo exception raises')
        algo.raise_on_err = True

    indicator_datasets = algo.get_indicator_datasets()
    if len(indicator_datasets) == 0:
        indicator_datasets = ae_consts.BACKUP_DATASETS
        log.info(f'using all datasets={indicator_datasets}')

    verbose_extract = False
    if config_dict:
        verbose_extract = config_dict.get('verbose_extract', False)

    common_vals = {}
    common_vals['base_key'] = ticker_key
    common_vals['celery_disabled'] = celery_disabled
    common_vals['ticker'] = ticker
    common_vals['label'] = label
    common_vals['iex_datasets'] = iex_datasets
    common_vals['s3_enabled'] = s3_enabled
    common_vals['s3_bucket'] = s3_bucket
    common_vals['s3_address'] = s3_address
    common_vals['s3_secure'] = s3_secure
    common_vals['s3_region_name'] = s3_region_name
    common_vals['s3_access_key'] = s3_access_key
    common_vals['s3_secret_key'] = s3_secret_key
    common_vals['s3_key'] = ticker_key
    common_vals['redis_enabled'] = redis_enabled
    common_vals['redis_address'] = redis_address
    common_vals['redis_password'] = redis_password
    common_vals['redis_db'] = redis_db
    common_vals['redis_key'] = ticker_key
    common_vals['redis_expire'] = redis_expire

    use_start_date_str = start_date
    use_end_date_str = end_date
    last_close_date = ae_utils.last_close()
    end_date_val = None

    cache_freq_fmt = ae_consts.COMMON_TICK_DATE_FORMAT

    if not use_end_date_str:
        use_end_date_str = last_close_date.strftime(cache_freq_fmt)

    end_date_val = ae_utils.get_date_from_str(date_str=use_end_date_str,
                                              fmt=cache_freq_fmt)
    start_date_val = None

    if not use_start_date_str:
        start_date_val = end_date_val - datetime.timedelta(days=60)
        use_start_date_str = start_date_val.strftime(cache_freq_fmt)
    else:
        start_date_val = datetime.datetime.strptime(
            use_start_date_str, ae_consts.COMMON_TICK_DATE_FORMAT)

    total_dates = (end_date_val - start_date_val).days

    if end_date_val < start_date_val:
        msg = (
            f'{label} - invalid dates - start_date={start_date_val} is after '
            f'end_date={end_date_val}')
        raise Exception(msg)

    if verbose:
        log.info(f'{label} - days={total_dates} '
                 f'start={use_start_date_str} '
                 f'end={use_end_date_str} '
                 f'datasets={indicator_datasets}')

    for ticker in use_tickers:
        req = algo_utils.build_algo_request(ticker=ticker,
                                            use_key=use_key,
                                            start_date=use_start_date_str,
                                            end_date=use_end_date_str,
                                            datasets=datasets,
                                            balance=use_balance,
                                            cache_freq=cache_freq,
                                            timeseries=timeseries,
                                            trade_strategy=trade_strategy,
                                            label=label)
        ticker_key = f'{ticker}_{last_close_str}'
        common_vals['ticker'] = ticker
        common_vals['base_key'] = ticker_key
        common_vals['redis_key'] = ticker_key
        common_vals['s3_key'] = ticker_key

        for date_key in req['extract_datasets']:
            date_req = api_requests.get_ds_dict(ticker=ticker,
                                                base_key=date_key,
                                                ds_id=label,
                                                service_dict=common_vals)
            node_date_key = date_key.replace(f'{ticker}_', '')
            extract_requests.append({
                'id': date_key,
                'ticker': ticker,
                'date_key': date_key,
                'date': node_date_key,
                'req': date_req
            })
    # end of for all ticker in use_tickers

    first_extract_date = None
    last_extract_date = None
    total_extract_requests = len(extract_requests)
    cur_idx = 1
    for idx, extract_node in enumerate(extract_requests):

        extract_ticker = extract_node['ticker']
        extract_date = extract_node['date']
        ds_node_id = extract_node['id']

        if not first_extract_date:
            first_extract_date = extract_date
        last_extract_date = extract_date
        perc_progress = ae_consts.get_percent_done(
            progress=cur_idx, total=total_extract_requests)
        percent_label = (f'{label} '
                         f'ticker={extract_ticker} '
                         f'date={extract_date} '
                         f'{perc_progress} '
                         f'{idx}/{total_extract_requests} '
                         f'{indicator_datasets}')
        if verbose:
            log.info(f'extracting - {percent_label}')

        ticker_bt_data = build_ds_node.build_dataset_node(
            ticker=extract_ticker,
            date=extract_date,
            service_dict=common_vals,
            datasets=indicator_datasets,
            log_label=label,
            verbose=verbose_extract)

        if ticker not in algo_data_req:
            algo_data_req[ticker] = []

        algo_data_req[ticker].append({
            'id': ds_node_id,  # id is currently the cache key in redis
            'date': extract_date,  # used to confirm dates in asc order
            'data': ticker_bt_data
        })

        if verbose:
            log.info(f'extract - {percent_label} '
                     f'dataset={len(algo_data_req[ticker])}')
        cur_idx += 1
    # end of for service_dict in extract_requests

    # this could be a separate celery task
    status = ae_consts.NOT_RUN
    if len(algo_data_req) == 0:
        msg = (f'{label} - nothing to test - no data found for '
               f'tickers={use_tickers} '
               f'between {first_extract_date} and {last_extract_date}')
        log.info(msg)
        return build_result.build_result(status=ae_consts.EMPTY,
                                         err=msg,
                                         rec=rec)

    # this could be a separate celery task
    try:
        if verbose:
            log.info(f'handle_data START - {percent_label} from '
                     f'{first_extract_date} to {last_extract_date}')
        algo.handle_data(data=algo_data_req)
        if verbose:
            log.info(f'handle_data END - {percent_label} from '
                     f'{first_extract_date} to {last_extract_date}')
    except Exception as e:
        a_name = algo.get_name()
        a_debug_msg = algo.get_debug_msg()
        if not a_debug_msg:
            a_debug_msg = 'debug message not set'
        a_config_dict = ae_consts.ppj(algo.config_dict)
        msg = (f'{percent_label} - algo={a_name} '
               f'encountered exception in handle_data tickers={use_tickers} '
               f'from {first_extract_date} to {last_extract_date} ex={e} '
               f'and failed during operation: {a_debug_msg}')
        if raise_on_err:
            if algo:
                try:
                    ind_obj = \
                        algo.get_indicator_process_last_indicator()
                    if ind_obj:
                        ind_obj_path = ind_obj.get_path_to_module()
                        ind_obj_config = ae_consts.ppj(ind_obj.get_config())
                        found_error_hint = False
                        if hasattr(ind_obj.use_df, 'to_json'):
                            if len(ind_obj.use_df.index) == 0:
                                log.critical(
                                    f'indicator failure report for '
                                    f'last module: '
                                    f'{ind_obj_path} '
                                    f'indicator={ind_obj.get_name()} '
                                    f'config={ind_obj_config} '
                                    f'dataset={ind_obj.use_df.head(5)} '
                                    f'name_of_dataset={ind_obj.uses_data}')
                                log.critical(
                                    '--------------------------------------'
                                    '--------------------------------------')
                                log.critical('Please check if this indicator: '
                                             f'{ind_obj_path} '
                                             'supports Empty Dataframes')
                                log.critical(
                                    '--------------------------------------'
                                    '--------------------------------------')
                                found_error_hint = True
                        # indicator error hints

                        if not found_error_hint:
                            log.critical(
                                f'indicator failure report for last module: '
                                f'{ind_obj_path} '
                                f'indicator={ind_obj.get_name()} '
                                f'config={ind_obj_config} '
                                f'dataset={ind_obj.use_df.head(5)} '
                                f'name_of_dataset={ind_obj.uses_data}')
                except Exception as f:
                    log.critical(f'failed to pull indicator processor '
                                 f'last indicator for debugging '
                                 f'from ex={e} with parsing ex={f}')
                # end of ignoring non-supported ways of creating
                # indicator processors
            log.error(msg)
            log.error(f'algo failure report: '
                      f'algo={a_name} handle_data() '
                      f'config={a_config_dict} ')
            log.critical(f'algo failed during operation: {a_debug_msg}')
            raise e
        else:
            log.error(msg)
            return build_result.build_result(status=ae_consts.ERR,
                                             err=msg,
                                             rec=rec)
    # end of try/ex

    # this could be a separate celery task
    try:
        if verbose:
            log.info(f'get_result START - {percent_label} from '
                     f'{first_extract_date} to {last_extract_date}')
        rec = algo.get_result()
        status = ae_consts.SUCCESS
        if verbose:
            log.info(f'get_result END - {percent_label} from '
                     f'{first_extract_date} to {last_extract_date}')
    except Exception as e:
        msg = (
            f'{percent_label} - algo={algo.get_name()} encountered exception '
            f'in get_result tickers={use_tickers} from '
            f'{first_extract_date} to {last_extract_date} ex={e}')
        if raise_on_err:
            if algo:
                log.error(f'algo={algo.get_name()} failed in get_result with '
                          f'debug_msg={algo.get_debug_msg()}')
            log.error(msg)
            raise e
        else:
            log.error(msg)
            return build_result.build_result(status=ae_consts.ERR,
                                             err=msg,
                                             rec=rec)
    # end of try/ex

    if return_algo:
        rec['algo'] = algo

    return build_result.build_result(status=status, err=msg, rec=rec)
예제 #13
0
def fetch(ticker=None,
          tickers=None,
          fetch_mode=None,
          iex_datasets=None,
          redis_enabled=True,
          redis_address=None,
          redis_db=None,
          redis_password=None,
          redis_expire=None,
          s3_enabled=True,
          s3_address=None,
          s3_bucket=None,
          s3_access_key=None,
          s3_secret_key=None,
          s3_region_name=None,
          s3_secure=False,
          celery_disabled=True,
          broker_url=None,
          result_backend=None,
          label=None,
          verbose=False):
    """fetch

    Fetch all supported datasets for a stock ``ticker`` or
    a list of ``tickers`` and returns a dictionary. Once
    run, the datasets will all be cached in Redis and archived
    in Minio (S3) by default.

    Python example:

    .. code-block:: python

        from analysis_engine.fetch import fetch
        d = fetch(ticker='NFLX')
        print(d)
        for k in d['NFLX']:
            print('dataset key: {}'.format(k))

    By default, it synchronously automates:

        - fetching all datasets
        - caching all datasets in Redis
        - archiving all datasets in Minio (S3)
        - returns all datasets in a single dictionary

    This was created for reducing the amount of typying in
    Jupyter notebooks. It can be set up for use with a
    distributed engine as well with the optional arguments
    depending on your connectitivty requirements.

    .. note:: Please ensure Redis and Minio are running
              before trying to extract tickers

    **Stock tickers to fetch**

    :param ticker: single stock ticker/symbol/ETF to fetch
    :param tickers: optional - list of tickers to fetch

    **(Optional) Data sources, datafeeds and datasets to gather**

    :param fetch_mode: data sources - default is ``all`` (both IEX
        and Yahoo), ``iex`` for only IEX, ``yahoo`` for only Yahoo.
    :param iex_datasets: list of strings for gathering specific `IEX
        datasets <https://iextrading.com/developer/docs/#stocks>`__
        which are set as consts: ``analysis_engine.iex.consts.FETCH_*``.

    **(Optional) Redis connectivity arguments**

    :param redis_enabled: bool - toggle for auto-caching all
        datasets in Redis
        (default is ``True``)
    :param redis_address: Redis connection string format: ``host:port``
        (default is ``localhost:6379``)
    :param redis_db: Redis db to use
        (default is ``0``)
    :param redis_password: optional - Redis password
        (default is ``None``)
    :param redis_expire: optional - Redis expire value
        (default is ``None``)

    **(Optional) Minio (S3) connectivity arguments**

    :param s3_enabled: bool - toggle for auto-archiving on Minio (S3)
        (default is ``True``)
    :param s3_address: Minio S3 connection string format: ``host:port``
        (default is ``localhost:9000``)
    :param s3_bucket: S3 Bucket for storing the artifacts
        (default is ``dev``) which should be viewable on a browser:
        http://localhost:9000/minio/dev/
    :param s3_access_key: S3 Access key
        (default is ``trexaccesskey``)
    :param s3_secret_key: S3 Secret key
        (default is ``trex123321``)
    :param s3_region_name: S3 region name
        (default is ``us-east-1``)
    :param s3_secure: Transmit using tls encryption
        (default is ``False``)

    **(Optional) Celery worker broker connectivity arguments**

    :param celery_disabled: bool - toggle synchronous mode or publish
        to an engine connected to the `Celery broker and backend
        <https://github.com/celery/celery#transports-and-backends>`__
        (default is ``True`` - synchronous mode without an engine
        or need for a broker or backend for Celery)
    :param broker_url: Celery broker url
        (default is ``redis://0.0.0.0:6379/13``)
    :param result_backend: Celery backend url
        (default is ``redis://0.0.0.0:6379/14``)
    :param label: tracking log label

    **(Optional) Debugging**

    :param verbose: bool - show fetch warnings
        and other debug logging (default is False)

    **Supported environment variables**

    ::

        export REDIS_ADDRESS="localhost:6379"
        export REDIS_DB="0"
        export S3_ADDRESS="localhost:9000"
        export S3_BUCKET="dev"
        export AWS_ACCESS_KEY_ID="trexaccesskey"
        export AWS_SECRET_ACCESS_KEY="trex123321"
        export AWS_DEFAULT_REGION="us-east-1"
        export S3_SECURE="0"
        export WORKER_BROKER_URL="redis://0.0.0.0:6379/13"
        export WORKER_BACKEND_URL="redis://0.0.0.0:6379/14"
    """

    rec = {}

    extract_records = []

    use_tickers = tickers
    if ticker:
        use_tickers = [ticker]
    else:
        if not use_tickers:
            use_tickers = []

    default_iex_datasets = [
        'daily', 'minute', 'quote', 'stats', 'peers', 'news', 'financials',
        'earnings', 'dividends', 'company'
    ]

    if not iex_datasets:
        iex_datasets = default_iex_datasets
    if not fetch_mode:
        fetch_mode = 'all'

    if redis_enabled:
        if not redis_address:
            redis_address = os.getenv('REDIS_ADDRESS', 'localhost:6379')
        if not redis_password:
            redis_password = os.getenv('REDIS_PASSWORD', None)
        if not redis_db:
            redis_db = int(os.getenv('REDIS_DB', '0'))
        if not redis_expire:
            redis_expire = os.getenv('REDIS_EXPIRE', None)
    if s3_enabled:
        if not s3_address:
            s3_address = os.getenv('S3_ADDRESS', 'localhost:9000')
        if not s3_access_key:
            s3_access_key = os.getenv('AWS_ACCESS_KEY_ID', 'trexaccesskey')
        if not s3_secret_key:
            s3_secret_key = os.getenv('AWS_SECRET_ACCESS_KEY', 'trex123321')
        if not s3_region_name:
            s3_region_name = os.getenv('AWS_DEFAULT_REGION', 'us-east-1')
        if not s3_secure:
            s3_secure = os.getenv('S3_SECURE', '0') == '1'
        if not s3_bucket:
            s3_bucket = os.getenv('S3_BUCKET', 'dev')
    if not broker_url:
        broker_url = os.getenv('WORKER_BROKER_URL', 'redis://0.0.0.0:6379/13')
    if not result_backend:
        result_backend = os.getenv('WORKER_BACKEND_URL',
                                   'redis://0.0.0.0:6379/14')

    if not label:
        label = 'get-latest'

    num_tickers = len(use_tickers)
    last_close_str = get_last_close_str()

    if iex_datasets:
        log.info('{} - getting latest for tickers={} '
                 'iex={}'.format(label, num_tickers, json.dumps(iex_datasets)))
    else:
        log.info('{} - getting latest for tickers={}'.format(
            label, num_tickers))

    for ticker in use_tickers:

        ticker_key = '{}_{}'.format(ticker, last_close_str)

        fetch_req = build_get_new_pricing_request()
        fetch_req['base_key'] = ticker_key
        fetch_req['celery_disabled'] = celery_disabled
        fetch_req['ticker'] = ticker
        fetch_req['label'] = label
        fetch_req['fetch_mode'] = fetch_mode
        fetch_req['iex_datasets'] = iex_datasets
        fetch_req['s3_enabled'] = s3_enabled
        fetch_req['s3_bucket'] = s3_bucket
        fetch_req['s3_address'] = s3_address
        fetch_req['s3_secure'] = s3_secure
        fetch_req['s3_region_name'] = s3_region_name
        fetch_req['s3_access_key'] = s3_access_key
        fetch_req['s3_secret_key'] = s3_secret_key
        fetch_req['s3_key'] = ticker_key
        fetch_req['redis_enabled'] = redis_enabled
        fetch_req['redis_address'] = redis_address
        fetch_req['redis_password'] = redis_password
        fetch_req['redis_db'] = redis_db
        fetch_req['redis_key'] = ticker_key
        fetch_req['redis_expire'] = redis_expire

        fetch_req['redis_address'] = redis_address
        fetch_req['s3_address'] = s3_address

        log.info('{} - fetching ticker={} last_close={} '
                 'redis_address={} s3_address={}'.format(
                     label, ticker, last_close_str, fetch_req['redis_address'],
                     fetch_req['s3_address']))

        fetch_res = price_utils.run_get_new_pricing_data(work_dict=fetch_req)
        if fetch_res['status'] == SUCCESS:
            log.info('{} - fetched ticker={} '
                     'preparing for extraction'.format(label, ticker))
            extract_req = fetch_req
            extract_records.append(extract_req)
        else:
            log.warning('{} - failed getting ticker={} data '
                        'status={} err={}'.format(
                            label, ticker,
                            get_status(status=fetch_res['status']),
                            fetch_res['err']))
        # end of if worked or not
    # end for all tickers to fetch
    """
    Extract Datasets
    """

    iex_daily_status = FAILED
    iex_minute_status = FAILED
    iex_quote_status = FAILED
    iex_stats_status = FAILED
    iex_peers_status = FAILED
    iex_news_status = FAILED
    iex_financials_status = FAILED
    iex_earnings_status = FAILED
    iex_dividends_status = FAILED
    iex_company_status = FAILED
    yahoo_news_status = FAILED
    yahoo_options_status = FAILED
    yahoo_pricing_status = FAILED

    iex_daily_df = None
    iex_minute_df = None
    iex_quote_df = None
    iex_stats_df = None
    iex_peers_df = None
    iex_news_df = None
    iex_financials_df = None
    iex_earnings_df = None
    iex_dividends_df = None
    iex_company_df = None
    yahoo_option_calls_df = None
    yahoo_option_puts_df = None
    yahoo_pricing_df = None
    yahoo_news_df = None

    extract_iex = True
    if fetch_mode not in ['all', 'iex']:
        extract_iex = False

    extract_yahoo = True
    if fetch_mode not in ['all', 'yahoo']:
        extract_yahoo = False

    for service_dict in extract_records:
        ticker_data = {}
        ticker = service_dict['ticker']

        extract_req = get_ds_dict(ticker=ticker,
                                  base_key=service_dict.get('base_key', None),
                                  ds_id=label,
                                  service_dict=service_dict)

        if 'daily' in iex_datasets or extract_iex:
            iex_daily_status, iex_daily_df = \
                iex_extract_utils.extract_daily_dataset(
                    extract_req)
            if iex_daily_status != SUCCESS:
                if verbose:
                    log.warning('unable to fetch iex_daily={}'.format(ticker))
        if 'minute' in iex_datasets or extract_iex:
            iex_minute_status, iex_minute_df = \
                iex_extract_utils.extract_minute_dataset(
                    extract_req)
            if iex_minute_status != SUCCESS:
                if verbose:
                    log.warning('unable to fetch iex_minute={}'.format(ticker))
        if 'quote' in iex_datasets or extract_iex:
            iex_quote_status, iex_quote_df = \
                iex_extract_utils.extract_quote_dataset(
                    extract_req)
            if iex_quote_status != SUCCESS:
                if verbose:
                    log.warning('unable to fetch iex_quote={}'.format(ticker))
        if 'stats' in iex_datasets or extract_iex:
            iex_stats_df, iex_stats_df = \
                iex_extract_utils.extract_stats_dataset(
                    extract_req)
            if iex_stats_status != SUCCESS:
                if verbose:
                    log.warning('unable to fetch iex_stats={}'.format(ticker))
        if 'peers' in iex_datasets or extract_iex:
            iex_peers_df, iex_peers_df = \
                iex_extract_utils.extract_peers_dataset(
                    extract_req)
            if iex_peers_status != SUCCESS:
                if verbose:
                    log.warning('unable to fetch iex_peers={}'.format(ticker))
        if 'news' in iex_datasets or extract_iex:
            iex_news_status, iex_news_df = \
                iex_extract_utils.extract_news_dataset(
                    extract_req)
            if iex_news_status != SUCCESS:
                if verbose:
                    log.warning('unable to fetch iex_news={}'.format(ticker))
        if 'financials' in iex_datasets or extract_iex:
            iex_financials_status, iex_financials_df = \
                iex_extract_utils.extract_financials_dataset(
                    extract_req)
            if iex_financials_status != SUCCESS:
                if verbose:
                    log.warning(
                        'unable to fetch iex_financials={}'.format(ticker))
        if 'earnings' in iex_datasets or extract_iex:
            iex_earnings_status, iex_earnings_df = \
                iex_extract_utils.extract_dividends_dataset(
                    extract_req)
            if iex_earnings_status != SUCCESS:
                if verbose:
                    log.warning(
                        'unable to fetch iex_earnings={}'.format(ticker))
        if 'dividends' in iex_datasets or extract_iex:
            iex_dividends_status, iex_dividends_df = \
                iex_extract_utils.extract_dividends_dataset(
                    extract_req)
            if iex_dividends_status != SUCCESS:
                if verbose:
                    log.warning(
                        'unable to fetch iex_dividends={}'.format(ticker))
        if 'company' in iex_datasets or extract_iex:
            iex_company_status, iex_company_df = \
                iex_extract_utils.extract_dividends_dataset(
                    extract_req)
            if iex_company_status != SUCCESS:
                if verbose:
                    log.warning(
                        'unable to fetch iex_company={}'.format(ticker))
        # end of iex extracts

        if extract_yahoo:
            yahoo_options_status, yahoo_option_calls_df = \
                yahoo_extract_utils.extract_option_calls_dataset(
                    extract_req)
            yahoo_options_status, yahoo_option_puts_df = \
                yahoo_extract_utils.extract_option_puts_dataset(
                    extract_req)
            if yahoo_options_status != SUCCESS:
                if verbose:
                    log.warning(
                        'unable to fetch yahoo_options={}'.format(ticker))
            yahoo_pricing_status, yahoo_pricing_df = \
                yahoo_extract_utils.extract_pricing_dataset(
                    extract_req)
            if yahoo_pricing_status != SUCCESS:
                if verbose:
                    log.warning(
                        'unable to fetch yahoo_pricing={}'.format(ticker))
            yahoo_news_status, yahoo_news_df = \
                yahoo_extract_utils.extract_yahoo_news_dataset(
                    extract_req)
            if yahoo_news_status != SUCCESS:
                if verbose:
                    log.warning('unable to fetch yahoo_news={}'.format(ticker))
        # end of yahoo extracts

        ticker_data['daily'] = iex_daily_df
        ticker_data['minute'] = iex_minute_df
        ticker_data['quote'] = iex_quote_df
        ticker_data['stats'] = iex_stats_df
        ticker_data['peers'] = iex_peers_df
        ticker_data['news1'] = iex_news_df
        ticker_data['financials'] = iex_financials_df
        ticker_data['earnings'] = iex_earnings_df
        ticker_data['dividends'] = iex_dividends_df
        ticker_data['company'] = iex_company_df
        ticker_data['calls'] = yahoo_option_calls_df
        ticker_data['puts'] = yahoo_option_puts_df
        ticker_data['pricing'] = yahoo_pricing_df
        ticker_data['news'] = yahoo_news_df

        rec[ticker] = ticker_data
    # end of for service_dict in extract_records

    return rec
예제 #14
0
def build_dataset_node(ticker,
                       datasets,
                       date=None,
                       service_dict=None,
                       log_label=None,
                       redis_enabled=True,
                       redis_address=None,
                       redis_db=None,
                       redis_password=None,
                       redis_expire=None,
                       redis_key=None,
                       s3_enabled=True,
                       s3_address=None,
                       s3_bucket=None,
                       s3_access_key=None,
                       s3_secret_key=None,
                       s3_region_name=None,
                       s3_secure=False,
                       s3_key=None,
                       verbose=False):
    """build_dataset_node

    Helper for building a dictionary that of
    cached datasets from redis.

    The datasets should be built from
    off the algorithm's config indicators
    ``uses_data`` fields which if not
    set will default to ``minute`` data

    :param ticker: string ticker
    :param datasets: list of string dataset names
        to extract from redis
    :param date: optional - string datetime formatted
        ``YYYY-MM-DD``
        (default is last trading close date)
    :param service_dict: optional - dictionary for all
        service connectivity to Redis and Minio if not
        set the arguments for all ``s3_*`` and ``redis_*``
        will be used to lookup data in Redis and Minio

    **(Optional) Redis connectivity arguments**

    :param redis_enabled: bool - toggle for auto-caching all
        datasets in Redis
        (default is ``True``)
    :param redis_address: Redis connection string
        format is ``host:port``
        (default is ``localhost:6379``)
    :param redis_db: Redis db to use
        (default is ``0``)
    :param redis_password: optional - Redis password
        (default is ``None``)
    :param redis_expire: optional - Redis expire value
        (default is ``None``)
    :param redis_key: optional - redis key not used
        (default is ``None``)

    :param s3_enabled: bool - toggle for turning on/off
        Minio or AWS S3
        (default is ``True``)
    :param s3_address: Minio S3 connection string address
        format is ``host:port``
        (default is ``localhost:9000``)
    :param s3_bucket: S3 Bucket for storing the artifacts
        (default is ``dev``) which should be viewable on a browser:
        http://localhost:9000/minio/dev/
    :param s3_access_key: S3 Access key
        (default is ``trexaccesskey``)
    :param s3_secret_key: S3 Secret key
        (default is ``trex123321``)
    :param s3_region_name: S3 region name
        (default is ``us-east-1``)
    :param s3_secure: Transmit using tls encryption
        (default is ``False``)
    :param s3_key: optional s3 key not used
        (default is ``None``)

    **Debugging**

    :param log_label: optional - log label string
    :param verbose: optional - flag for debugging
        (default to ``False``)
    """

    label = log_label
    if not label:
        label = 'build_bt'

    if not date:
        date = ae_utils.get_last_close_str()

    td_convert_to_datetime = (ae_consts.TRADIER_CONVERT_TO_DATETIME)

    date_key = f'{ticker}_{date}'

    base_req = api_requests.get_ds_dict(ticker=ticker,
                                        base_key=date_key,
                                        ds_id=label,
                                        service_dict=service_dict)

    if not service_dict:
        base_req['redis_enabled'] = redis_enabled
        base_req['redis_address'] = redis_address
        base_req['redis_password'] = redis_password
        base_req['redis_db'] = redis_db
        base_req['redis_key'] = date_key
        base_req['redis_expire'] = redis_expire
        base_req['s3_enabled'] = s3_enabled
        base_req['s3_bucket'] = s3_bucket
        base_req['s3_address'] = s3_address
        base_req['s3_secure'] = s3_secure
        base_req['s3_region_name'] = s3_region_name
        base_req['s3_access_key'] = s3_access_key
        base_req['s3_secret_key'] = s3_secret_key
        base_req['s3_key'] = date_key

    if verbose:
        log.info(f'extracting {date_key}')
        """
        for showing connectivity args in the logs
        log.debug(
            f'bt {date_key} {ae_consts.ppj(base_req)}')
        """

    iex_daily_status = ae_consts.FAILED
    iex_minute_status = ae_consts.FAILED
    iex_quote_status = ae_consts.FAILED
    iex_stats_status = ae_consts.FAILED
    iex_peers_status = ae_consts.FAILED
    iex_news_status = ae_consts.FAILED
    iex_financials_status = ae_consts.FAILED
    iex_earnings_status = ae_consts.FAILED
    iex_dividends_status = ae_consts.FAILED
    iex_company_status = ae_consts.FAILED
    td_calls_status = ae_consts.FAILED
    td_puts_status = ae_consts.FAILED

    iex_daily_df = None
    iex_minute_df = None
    iex_quote_df = None
    iex_stats_df = None
    iex_peers_df = None
    iex_news_df = None
    iex_financials_df = None
    iex_earnings_df = None
    iex_dividends_df = None
    iex_company_df = None
    td_calls_df = None
    td_puts_df = None

    if 'daily' in datasets:
        iex_daily_status, iex_daily_df = \
            iex_extract_utils.extract_daily_dataset(
                base_req)
        if iex_daily_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract iex_daily={ticker}')
    if 'minute' in datasets:
        iex_minute_status, iex_minute_df = \
            iex_extract_utils.extract_minute_dataset(
                base_req)
        if iex_minute_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract iex_minute={ticker}')
    if 'quote' in datasets:
        iex_quote_status, iex_quote_df = \
            iex_extract_utils.extract_quote_dataset(
                base_req)
        if iex_quote_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract iex_quote={ticker}')
    if 'stats' in datasets:
        iex_stats_df, iex_stats_df = \
            iex_extract_utils.extract_stats_dataset(
                base_req)
        if iex_stats_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract iex_stats={ticker}')
    if 'peers' in datasets:
        iex_peers_df, iex_peers_df = \
            iex_extract_utils.extract_peers_dataset(
                base_req)
        if iex_peers_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract iex_peers={ticker}')
    if 'news' in datasets:
        iex_news_status, iex_news_df = \
            iex_extract_utils.extract_news_dataset(
                base_req)
        if iex_news_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract iex_news={ticker}')
    if 'financials' in datasets:
        iex_financials_status, iex_financials_df = \
            iex_extract_utils.extract_financials_dataset(
                base_req)
        if iex_financials_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract iex_financials={ticker}')
    if 'earnings' in datasets:
        iex_earnings_status, iex_earnings_df = \
            iex_extract_utils.extract_earnings_dataset(
                base_req)
        if iex_earnings_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract iex_earnings={ticker}')
    if 'dividends' in datasets:
        iex_dividends_status, iex_dividends_df = \
            iex_extract_utils.extract_dividends_dataset(
                base_req)
        if iex_dividends_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract iex_dividends={ticker}')
    if 'company' in datasets:
        iex_company_status, iex_company_df = \
            iex_extract_utils.extract_company_dataset(
                base_req)
        if iex_company_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract iex_company={ticker}')
    # end of iex extracts
    """
    Yahoo disabled on Jan 2019

    yahoo_news_status = ae_consts.FAILED
    yahoo_options_status = ae_consts.FAILED
    yahoo_pricing_status = ae_consts.FAILED
    yahoo_option_calls_df = None
    yahoo_option_puts_df = None
    yahoo_pricing_df = None
    yahoo_news_df = None

    if 'options' in datasets:
        yahoo_options_status, yahoo_option_calls_df = \
            yahoo_extract_utils.extract_option_calls_dataset(
                base_req)
        yahoo_options_status, yahoo_option_puts_df = \
            yahoo_extract_utils.extract_option_puts_dataset(
                base_req)
        if yahoo_options_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract yahoo_options={ticker}')
    if 'pricing' in datasets:
        yahoo_pricing_status, yahoo_pricing_df = \
            yahoo_extract_utils.extract_pricing_dataset(
                base_req)
        if yahoo_pricing_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract yahoo_pricing={ticker}')
    if 'news' in datasets:
        yahoo_news_status, yahoo_news_df = \
            yahoo_extract_utils.extract_yahoo_news_dataset(
                base_req)
        if yahoo_news_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract yahoo_news={ticker}')
    # end of yahoo extracts
    """
    """
    Tradier Extraction
    Debug by setting:

    base_req['verbose_td'] = True
    """
    if ('calls' in datasets or 'tdcalls' in datasets):
        td_calls_status, td_calls_df = \
            td_extract_utils.extract_option_calls_dataset(
                base_req)
        if td_calls_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract tdcalls={ticker}')
        else:
            if ae_consts.is_df(df=td_calls_df):
                for c in td_convert_to_datetime:
                    if c in td_calls_df:
                        td_calls_df[c] = pd.to_datetime(
                            td_calls_df[c],
                            format=ae_consts.COMMON_TICK_DATE_FORMAT)
                if 'date' in td_calls_df:
                    td_calls_df.sort_values('date', ascending=True)
        # end of converting dates
    # end of Tradier calls extraction

    if ('puts' in datasets or 'tdputs' in datasets):
        td_puts_status, td_puts_df = \
            td_extract_utils.extract_option_puts_dataset(
                base_req)
        if td_puts_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract tdputs={ticker}')
        else:
            if ae_consts.is_df(df=td_puts_df):
                for c in td_convert_to_datetime:
                    if c in td_puts_df:
                        td_puts_df[c] = pd.to_datetime(
                            td_puts_df[c],
                            format=ae_consts.COMMON_TICK_DATE_FORMAT)
                if 'date' in td_puts_df:
                    td_puts_df.sort_values('date', ascending=True)
        # end of converting dates
    # end of Tradier puts extraction

    ticker_data = {
        'daily': iex_daily_df,
        'minute': iex_minute_df,
        'quote': iex_quote_df,
        'stats': iex_stats_df,
        'peers': iex_peers_df,
        'news1': iex_news_df,
        'financials': iex_financials_df,
        'earnings': iex_earnings_df,
        'dividends': iex_dividends_df,
        'company': iex_company_df,
        'tdcalls': td_calls_df,
        'tdputs': td_puts_df,
        'calls': None,  # yahoo - here for legacy
        'news': None,  # yahoo - here for legacy
        'pricing': None,  # yahoo - here for legacy
        'puts': None  # yahoo - here for legacy
    }

    return ticker_data
예제 #15
0
def extract(ticker=None,
            tickers=None,
            use_key=None,
            extract_mode='all',
            iex_datasets=None,
            redis_enabled=True,
            redis_address=None,
            redis_db=None,
            redis_password=None,
            redis_expire=None,
            s3_enabled=True,
            s3_address=None,
            s3_bucket=None,
            s3_access_key=None,
            s3_secret_key=None,
            s3_region_name=None,
            s3_secure=False,
            celery_disabled=True,
            broker_url=None,
            result_backend=None,
            label=None,
            verbose=False):
    """extract

    Extract all cached datasets for a stock ``ticker`` or
    a list of ``tickers`` and returns a dictionary. Please
    make sure the datasets are already cached in Redis
    before running this method. If not please refer to
    the ``analysis_engine.fetch.fetch`` function
    to prepare the datasets on your environment.

    Python example:

    .. code-block:: python

        from analysis_engine.extract import extract
        d = extract(ticker='NFLX')
        print(d)
        for k in d['NFLX']:
            print('dataset key: {}'.format(k))


    This was created for reducing the amount of typying in
    Jupyter notebooks. It can be set up for use with a
    distributed engine as well with the optional arguments
    depending on your connectitivty requirements.

    .. note:: Please ensure Redis and Minio are running
              before trying to extract tickers

    **Stock tickers to extract**

    :param ticker: single stock ticker/symbol/ETF to extract
    :param tickers: optional - list of tickers to extract
    :param use_key: optional - extract historical key from Redis
        usually formatted ``<TICKER>_<date formatted YYYY-MM-DD>``

    **(Optional) Data sources, datafeeds and datasets to gather**

    :param iex_datasets: list of strings for gathering specific `IEX
        datasets <https://iextrading.com/developer/docs/#stocks>`__
        which are set as consts: ``analysis_engine.iex.consts.FETCH_*``.

    **(Optional) Redis connectivity arguments**

    :param redis_enabled: bool - toggle for auto-caching all
        datasets in Redis
        (default is ``True``)
    :param redis_address: Redis connection string format: ``host:port``
        (default is ``localhost:6379``)
    :param redis_db: Redis db to use
        (default is ``0``)
    :param redis_password: optional - Redis password
        (default is ``None``)
    :param redis_expire: optional - Redis expire value
        (default is ``None``)

    **(Optional) Minio (S3) connectivity arguments**

    :param s3_enabled: bool - toggle for auto-archiving on Minio (S3)
        (default is ``True``)
    :param s3_address: Minio S3 connection string format: ``host:port``
        (default is ``localhost:9000``)
    :param s3_bucket: S3 Bucket for storing the artifacts
        (default is ``dev``) which should be viewable on a browser:
        http://localhost:9000/minio/dev/
    :param s3_access_key: S3 Access key
        (default is ``trexaccesskey``)
    :param s3_secret_key: S3 Secret key
        (default is ``trex123321``)
    :param s3_region_name: S3 region name
        (default is ``us-east-1``)
    :param s3_secure: Transmit using tls encryption
        (default is ``False``)

    **(Optional) Celery worker broker connectivity arguments**

    :param celery_disabled: bool - toggle synchronous mode or publish
        to an engine connected to the `Celery broker and backend
        <https://github.com/celery/celery#transports-and-backends>`__
        (default is ``True`` - synchronous mode without an engine
        or need for a broker or backend for Celery)
    :param broker_url: Celery broker url
        (default is ``redis://0.0.0.0:6379/13``)
    :param result_backend: Celery backend url
        (default is ``redis://0.0.0.0:6379/14``)
    :param label: tracking log label

    **(Optional) Debugging**

    :param verbose: bool - show extract warnings
        and other debug logging (default is False)

    **Supported environment variables**

    ::

        export REDIS_ADDRESS="localhost:6379"
        export REDIS_DB="0"
        export S3_ADDRESS="localhost:9000"
        export S3_BUCKET="dev"
        export AWS_ACCESS_KEY_ID="trexaccesskey"
        export AWS_SECRET_ACCESS_KEY="trex123321"
        export AWS_DEFAULT_REGION="us-east-1"
        export S3_SECURE="0"
        export WORKER_BROKER_URL="redis://0.0.0.0:6379/13"
        export WORKER_BACKEND_URL="redis://0.0.0.0:6379/14"
    """

    rec = {}
    extract_requests = []

    use_tickers = tickers
    if ticker:
        use_tickers = [ticker]
    else:
        if not use_tickers:
            use_tickers = []

    default_iex_datasets = [
        'daily', 'minute', 'quote', 'stats', 'peers', 'news', 'financials',
        'earnings', 'dividends', 'company'
    ]

    if not iex_datasets:
        iex_datasets = default_iex_datasets

    if redis_enabled:
        if not redis_address:
            redis_address = os.getenv('REDIS_ADDRESS', 'localhost:6379')
        if not redis_password:
            redis_password = os.getenv('REDIS_PASSWORD', None)
        if not redis_db:
            redis_db = int(os.getenv('REDIS_DB', '0'))
        if not redis_expire:
            redis_expire = os.getenv('REDIS_EXPIRE', None)
    if s3_enabled:
        if not s3_address:
            s3_address = os.getenv('S3_ADDRESS', 'localhost:9000')
        if not s3_access_key:
            s3_access_key = os.getenv('AWS_ACCESS_KEY_ID', 'trexaccesskey')
        if not s3_secret_key:
            s3_secret_key = os.getenv('AWS_SECRET_ACCESS_KEY', 'trex123321')
        if not s3_region_name:
            s3_region_name = os.getenv('AWS_DEFAULT_REGION', 'us-east-1')
        if not s3_secure:
            s3_secure = os.getenv('S3_SECURE', '0') == '1'
        if not s3_bucket:
            s3_bucket = os.getenv('S3_BUCKET', 'dev')
    if not broker_url:
        broker_url = os.getenv('WORKER_BROKER_URL', 'redis://0.0.0.0:6379/13')
    if not result_backend:
        result_backend = os.getenv('WORKER_BACKEND_URL',
                                   'redis://0.0.0.0:6379/14')

    if not label:
        label = 'get-latest'

    num_tickers = len(use_tickers)
    last_close_str = ae_utils.get_last_close_str()

    if iex_datasets:
        log.info('{} - getting latest for tickers={} '
                 'iex={}'.format(label, num_tickers, json.dumps(iex_datasets)))
    else:
        log.info('{} - getting latest for tickers={}'.format(
            label, num_tickers))

    ticker_key = use_key
    if not ticker_key:
        ticker_key = '{}_{}'.format(ticker, last_close_str)

    common_vals = {}
    common_vals['base_key'] = ticker_key
    common_vals['celery_disabled'] = celery_disabled
    common_vals['ticker'] = ticker
    common_vals['label'] = label
    common_vals['iex_datasets'] = iex_datasets
    common_vals['s3_enabled'] = s3_enabled
    common_vals['s3_bucket'] = s3_bucket
    common_vals['s3_address'] = s3_address
    common_vals['s3_secure'] = s3_secure
    common_vals['s3_region_name'] = s3_region_name
    common_vals['s3_access_key'] = s3_access_key
    common_vals['s3_secret_key'] = s3_secret_key
    common_vals['s3_key'] = ticker_key
    common_vals['redis_enabled'] = redis_enabled
    common_vals['redis_address'] = redis_address
    common_vals['redis_password'] = redis_password
    common_vals['redis_db'] = redis_db
    common_vals['redis_key'] = ticker_key
    common_vals['redis_expire'] = redis_expire

    common_vals['redis_address'] = redis_address
    common_vals['s3_address'] = s3_address

    log.info('{} - extract ticker={} last_close={} base_key={} '
             'redis_address={} s3_address={}'.format(
                 label, ticker, last_close_str, common_vals['base_key'],
                 common_vals['redis_address'], common_vals['s3_address']))
    """
    Extract Datasets
    """

    iex_daily_status = ae_consts.FAILED
    iex_minute_status = ae_consts.FAILED
    iex_quote_status = ae_consts.FAILED
    iex_stats_status = ae_consts.FAILED
    iex_peers_status = ae_consts.FAILED
    iex_news_status = ae_consts.FAILED
    iex_financials_status = ae_consts.FAILED
    iex_earnings_status = ae_consts.FAILED
    iex_dividends_status = ae_consts.FAILED
    iex_company_status = ae_consts.FAILED
    yahoo_news_status = ae_consts.FAILED
    yahoo_options_status = ae_consts.FAILED
    yahoo_pricing_status = ae_consts.FAILED
    td_calls_status = ae_consts.FAILED
    td_puts_status = ae_consts.FAILED

    iex_daily_df = None
    iex_minute_df = None
    iex_quote_df = None
    iex_stats_df = None
    iex_peers_df = None
    iex_news_df = None
    iex_financials_df = None
    iex_earnings_df = None
    iex_dividends_df = None
    iex_company_df = None
    yahoo_option_calls_df = None
    yahoo_option_puts_df = None
    yahoo_pricing_df = None
    yahoo_news_df = None
    td_calls_df = None
    td_puts_df = None

    for ticker in use_tickers:
        req = api_requests.get_ds_dict(ticker=ticker,
                                       base_key=common_vals['base_key'],
                                       ds_id=label,
                                       service_dict=common_vals)
        extract_requests.append(req)
    # end of for all ticker in use_tickers

    extract_iex = True
    if extract_mode not in ['all', 'iex']:
        extract_iex = False

    extract_yahoo = True
    if extract_mode not in ['all', 'yahoo']:
        extract_yahoo = False

    extract_td = True
    if extract_mode not in ['all', 'td']:
        extract_td = False

    for extract_req in extract_requests:
        if 'daily' in iex_datasets or extract_iex:
            iex_daily_status, iex_daily_df = \
                iex_extract_utils.extract_daily_dataset(
                    extract_req)
            if iex_daily_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract iex_daily={}'.format(ticker))
        if 'minute' in iex_datasets or extract_iex:
            iex_minute_status, iex_minute_df = \
                iex_extract_utils.extract_minute_dataset(
                    extract_req)
            if iex_minute_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract iex_minute={}'.format(ticker))
        if 'quote' in iex_datasets or extract_iex:
            iex_quote_status, iex_quote_df = \
                iex_extract_utils.extract_quote_dataset(
                    extract_req)
            if iex_quote_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract iex_quote={}'.format(ticker))
        if 'stats' in iex_datasets or extract_iex:
            iex_stats_df, iex_stats_df = \
                iex_extract_utils.extract_stats_dataset(
                    extract_req)
            if iex_stats_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract iex_stats={}'.format(ticker))
        if 'peers' in iex_datasets or extract_iex:
            iex_peers_df, iex_peers_df = \
                iex_extract_utils.extract_peers_dataset(
                    extract_req)
            if iex_peers_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract iex_peers={}'.format(ticker))
        if 'news' in iex_datasets or extract_iex:
            iex_news_status, iex_news_df = \
                iex_extract_utils.extract_news_dataset(
                    extract_req)
            if iex_news_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning('unable to extract iex_news={}'.format(ticker))
        if 'financials' in iex_datasets or extract_iex:
            iex_financials_status, iex_financials_df = \
                iex_extract_utils.extract_financials_dataset(
                    extract_req)
            if iex_financials_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract iex_financials={}'.format(ticker))
        if 'earnings' in iex_datasets or extract_iex:
            iex_earnings_status, iex_earnings_df = \
                iex_extract_utils.extract_earnings_dataset(
                    extract_req)
            if iex_earnings_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract iex_earnings={}'.format(ticker))
        if 'dividends' in iex_datasets or extract_iex:
            iex_dividends_status, iex_dividends_df = \
                iex_extract_utils.extract_dividends_dataset(
                    extract_req)
            if iex_dividends_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract iex_dividends={}'.format(ticker))
        if 'company' in iex_datasets or extract_iex:
            iex_company_status, iex_company_df = \
                iex_extract_utils.extract_company_dataset(
                    extract_req)
            if iex_company_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract iex_company={}'.format(ticker))
        # end of iex extracts

        if extract_yahoo:
            yahoo_options_status, yahoo_option_calls_df = \
                yahoo_extract_utils.extract_option_calls_dataset(
                    extract_req)
            yahoo_options_status, yahoo_option_puts_df = \
                yahoo_extract_utils.extract_option_puts_dataset(
                    extract_req)
            if yahoo_options_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract yahoo_options={}'.format(ticker))
            yahoo_pricing_status, yahoo_pricing_df = \
                yahoo_extract_utils.extract_pricing_dataset(
                    extract_req)
            if yahoo_pricing_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract yahoo_pricing={}'.format(ticker))
            yahoo_news_status, yahoo_news_df = \
                yahoo_extract_utils.extract_yahoo_news_dataset(
                    extract_req)
            if yahoo_news_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract yahoo_news={}'.format(ticker))
        # end of yahoo extracts

        if extract_td:
            td_calls_status, td_calls_df = \
                td_extract_utils.extract_option_calls_dataset(
                    extract_req)
            if td_calls_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning('unable to extract tdcalls={}'.format(ticker))
            td_puts_status, td_puts_df = \
                td_extract_utils.extract_option_puts_dataset(
                    extract_req)
            if td_puts_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning('unable to extract tdputs={}'.format(ticker))
        # td extracts

        ticker_data = {}
        ticker_data['daily'] = iex_daily_df
        ticker_data['minute'] = iex_minute_df
        ticker_data['quote'] = iex_quote_df
        ticker_data['stats'] = iex_stats_df
        ticker_data['peers'] = iex_peers_df
        ticker_data['news1'] = iex_news_df
        ticker_data['financials'] = iex_financials_df
        ticker_data['earnings'] = iex_earnings_df
        ticker_data['dividends'] = iex_dividends_df
        ticker_data['company'] = iex_company_df
        ticker_data['calls'] = yahoo_option_calls_df
        ticker_data['puts'] = yahoo_option_puts_df
        ticker_data['pricing'] = yahoo_pricing_df
        ticker_data['news'] = yahoo_news_df
        ticker_data['tdcalls'] = td_calls_df
        ticker_data['tdputs'] = td_puts_df

        rec[ticker] = ticker_data
    # end of for service_dict in extract_requests

    return rec
def get_pricing_on_date(ticker, date_str=None, label=None):
    """get_pricing_on_date

    Get the latest pricing data from the
    cached IEX data in redis. Use this to
    keep costs down!

    .. code-block:: python

        import analysis_engine.iex.get_pricing_on_date as iex_cache
        print(iex_cache.get_pricing_on_date('SPY'))
        print(iex_cache.get_pricing_on_date(
            ticker='SPY',
            date_str='2019-02-07'))

    :param ticker: ticker string
    :param date_str: optional - string date
        to pull data from redis. if ``None`` use
        today's date. format is
        ``ae_consts.COMMON_TICK_DATE_FORMAT``
    :param label: log label from tracking
    """

    ret_dict = {
        'status': ae_consts.NOT_SET,
        'pricing_type': None,
        'high': None,
        'low': None,
        'open': None,
        'close': None,
        'volume': None,
        'date': None,
        'minute': None,
        'average': None,
        'changeOverTime': None,
        'label': None,
        'marketAverage': None,
        'marketChangeOverTime': None,
        'marketClose': None,
        'marketHigh': None,
        'marketLow': None,
        'marketNotional': None,
        'marketNumberOfTrades': None,
        'marketOpen': None,
        'marketVolume': None,
        'notional': None,
        'numberOfTrades': None
    }

    use_date_str = None
    if date_str:
        use_date_str = (f'{ticker}_{date_str}')

    all_extract_reqs = api_requests.get_ds_dict(ticker=ticker,
                                                base_key=use_date_str,
                                                label=label)

    minute_key = all_extract_reqs['minute']
    daily_key = all_extract_reqs['daily']
    base_ex_req = {
        'ticker': ticker,
        's3_bucket': 'pricing',
        's3_key': minute_key,
        'redis_key': minute_key,
        's3_enabled': True,
        's3_access_key': ae_consts.S3_ACCESS_KEY,
        's3_secret_key': ae_consts.S3_SECRET_KEY,
        's3_region_name': ae_consts.S3_REGION_NAME,
        's3_address': ae_consts.S3_ADDRESS,
        's3_secure': ae_consts.S3_SECURE,
        'redis_address': ae_consts.REDIS_ADDRESS,
        'redis_password': ae_consts.REDIS_PASSWORD,
        'redis_db': ae_consts.REDIS_DB,
        'redis_expire': ae_consts.REDIS_EXPIRE,
        'redis_enabled': True,
        'fetch_mode': 'td',
        'analysis_type': None,
        'iex_datasets': [],
        'debug': False,
        'label': label,
        'celery_disabled': True
    }
    log.debug(f'{ticker} - minute={minute_key} daily={daily_key}')
    reqs = []
    minute_ex_req = copy.deepcopy(base_ex_req)
    minute_ex_req['ex_type'] = iex_consts.FETCH_MINUTE
    minute_ex_req['iex_datasets'] = [iex_consts.FETCH_MINUTE]
    reqs.append(minute_ex_req)
    daily_ex_req = copy.deepcopy(base_ex_req)
    daily_ex_req['ex_type'] = iex_consts.FETCH_DAILY
    daily_ex_req['s3_key'] = daily_key
    daily_ex_req['redis_key'] = daily_key
    daily_ex_req['iex_datasets'] = [iex_consts.FETCH_DAILY]
    reqs.append(daily_ex_req)
    try:
        for ex_req in reqs:
            iex_status = ae_consts.FAILED
            iex_df = None
            if ex_req['ex_type'] == iex_consts.FETCH_MINUTE:
                iex_status, iex_df = \
                    iex_extract_utils.extract_minute_dataset(
                        work_dict=ex_req)
            else:
                iex_status, iex_df = \
                    iex_extract_utils.extract_daily_dataset(
                        work_dict=ex_req)
            # end of extracting

            if ae_consts.is_df(df=iex_df):
                if 'date' in iex_df:
                    iex_df.sort_values(by=['date'], ascending=True)
                    ret_dict = json.loads(iex_df.iloc[-1].to_json())
                    if 'date' in ret_dict:
                        try:
                            ret_dict['date'] = ae_utils.epoch_to_dt(
                                epoch=int(ret_dict['date'] / 1000),
                                use_utc=False,
                                convert_to_est=False).strftime(
                                    ae_consts.COMMON_TICK_DATE_FORMAT)

                        except Exception as f:
                            log.critical(
                                f'failed converting {ret_dict} date to str '
                                f'with ex={f}')
                    if ex_req['ex_type'] == iex_consts.FETCH_MINUTE:
                        ret_dict['pricing_type'] = 'minute'
                        ret_dict['minute'] = ret_dict.get('date', None)
                    else:
                        ret_dict['pricing_type'] = 'daily'
                    ret_dict['status'] = iex_status
                    return ret_dict
            # if a valid df then return it
    except Exception as e:
        log.critical(f'failed to get {ticker} iex minute data with ex={e}')
        ret_dict['status'] = ae_consts.ERR
    # end of try/ex to get latest pricing

    return ret_dict
예제 #17
0
    def latest(self,
               date_str=None,
               start_row=-200,
               extract_iex=True,
               extract_yahoo=False,
               extract_td=True,
               verbose=False,
               **kwargs):
        """latest

        Run the algorithm with the latest pricing data. Also
        supports running a backtest for a historical date in
        the pricing history (format ``YYYY-MM-DD``)

        :param date_str: optional - string start date ``YYYY-MM-DD``
            default is the latest close date
        :param start_row: negative number of rows back
            from the end of the list in the data
            default is ``-200`` where this means the algorithm
            will process the latest 200 rows in the minute
            dataset
        :param extract_iex: bool flag for extracting from ``IEX``
        :param extract_yahoo: bool flag for extracting from ``Yahoo``
            which is disabled as of 1/2019
        :param extract_td: bool flag for extracting from ``Tradier``
        :param verbose: bool flag for logs
        :param kwargs: keyword arg dict
        """
        use_date_str = date_str
        if not use_date_str:
            use_date_str = ae_utils.get_last_close_str()

        log.info(f'creating algo')
        self.algo_obj = base_algo.BaseAlgo(
            ticker=self.config_dict['ticker'],
            balance=self.config_dict['balance'],
            commission=self.config_dict['commission'],
            name=self.use_name,
            start_date=self.use_start_date,
            end_date=self.use_end_date,
            auto_fill=self.auto_fill,
            config_dict=self.config_dict,
            load_from_s3_bucket=self.load_from_s3_bucket,
            load_from_s3_key=self.load_from_s3_key,
            load_from_redis_key=self.load_from_redis_key,
            load_from_file=self.load_from_file,
            load_compress=self.load_compress,
            load_publish=self.load_publish,
            load_config=self.load_config,
            report_redis_key=self.report_redis_key,
            report_s3_bucket=self.report_s3_bucket,
            report_s3_key=self.report_s3_key,
            report_file=self.report_file,
            report_compress=self.report_compress,
            report_publish=self.report_publish,
            report_config=self.report_config,
            history_redis_key=self.history_redis_key,
            history_s3_bucket=self.history_s3_bucket,
            history_s3_key=self.history_s3_key,
            history_file=self.history_file,
            history_compress=self.history_compress,
            history_publish=self.history_publish,
            history_config=self.history_config,
            extract_redis_key=self.extract_redis_key,
            extract_s3_bucket=self.extract_s3_bucket,
            extract_s3_key=self.extract_s3_key,
            extract_file=self.extract_file,
            extract_save_dir=self.extract_save_dir,
            extract_compress=self.extract_compress,
            extract_publish=self.extract_publish,
            extract_config=self.extract_config,
            publish_to_slack=self.publish_to_slack,
            publish_to_s3=self.publish_to_s3,
            publish_to_redis=self.publish_to_redis,
            dataset_type=self.dataset_type,
            serialize_datasets=self.serialize_datasets,
            compress=self.compress,
            encoding=self.encoding,
            redis_enabled=self.redis_enabled,
            redis_key=self.redis_key,
            redis_address=self.redis_address,
            redis_db=self.redis_db,
            redis_password=self.redis_password,
            redis_expire=self.redis_expire,
            redis_serializer=self.redis_serializer,
            redis_encoding=self.redis_encoding,
            s3_enabled=self.s3_enabled,
            s3_key=self.s3_key,
            s3_address=self.s3_address,
            s3_bucket=self.s3_bucket,
            s3_access_key=self.s3_access_key,
            s3_secret_key=self.s3_secret_key,
            s3_region_name=self.s3_region_name,
            s3_secure=self.s3_secure,
            slack_enabled=self.slack_enabled,
            slack_code_block=self.slack_code_block,
            slack_full_width=self.slack_full_width,
            dataset_publish_extract=self.extract_publish,
            dataset_publish_history=self.history_publish,
            dataset_publish_report=self.report_publish,
            run_on_engine=self.run_on_engine,
            auth_url=self.broker_url,
            backend_url=self.backend_url,
            include_tasks=self.include_tasks,
            ssl_options=self.ssl_options,
            transport_options=self.transport_options,
            path_to_config_module=self.path_to_config_module,
            timeseries=self.timeseries,
            trade_strategy=self.trade_strategy,
            verbose=False,
            raise_on_err=self.raise_on_err)

        log.info(f'run latest - start')

        ticker = self.config_dict['ticker']
        self.common_fetch_vals['base_key'] = f'{ticker}_{use_date_str}'
        extract_req = api_requests.get_ds_dict(
            ticker=ticker,
            base_key=self.common_fetch_vals['base_key'],
            ds_id=ticker,
            service_dict=self.common_fetch_vals)
        node_date_key = use_date_str.replace(f'{ticker}_', '')
        req = {
            'id': use_date_str,
            'ticker': ticker,
            'date_key': self.common_fetch_vals['base_key'],
            'date': node_date_key,
            'req': extract_req
        }
        # fetch
        iex_daily_df = None
        iex_minute_df = None
        iex_quote_df = None
        iex_stats_df = None
        iex_peers_df = None
        iex_news_df = None
        iex_financials_df = None
        iex_earnings_df = None
        iex_dividends_df = None
        iex_company_df = None
        yahoo_option_calls_df = None
        yahoo_option_puts_df = None
        yahoo_pricing_df = None
        yahoo_news_df = None
        td_calls_df = None
        td_puts_df = None

        node_date_key = req['date']
        dataset_node_id = req['id']
        dataset_id = dataset_node_id

        label = (f'ticker={ticker} ' f'date={node_date_key}')
        if verbose:
            log.info(f'{label} - extract - start')
        if 'daily' in self.iex_datasets or extract_iex:
            iex_daily_status, iex_daily_df = \
                iex_extract_utils.extract_daily_dataset(
                    extract_req)
            if iex_daily_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract iex_daily={ticker}')
        if 'minute' in self.iex_datasets or extract_iex:
            iex_minute_status, iex_minute_df = \
                iex_extract_utils.extract_minute_dataset(
                    extract_req)
            if iex_minute_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract iex_minute={ticker}')
        if 'quote' in self.iex_datasets or extract_iex:
            iex_quote_status, iex_quote_df = \
                iex_extract_utils.extract_quote_dataset(
                    extract_req)
            if iex_quote_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract iex_quote={ticker}')
        if 'stats' in self.iex_datasets or extract_iex:
            iex_stats_status, iex_stats_df = \
                iex_extract_utils.extract_stats_dataset(
                    extract_req)
            if iex_stats_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract iex_stats={ticker}')
        if 'peers' in self.iex_datasets or extract_iex:
            iex_peers_status, iex_peers_df = \
                iex_extract_utils.extract_peers_dataset(
                    extract_req)
            if iex_peers_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract iex_peers={ticker}')
        if 'news' in self.iex_datasets or extract_iex:
            iex_news_status, iex_news_df = \
                iex_extract_utils.extract_news_dataset(
                    extract_req)
            if iex_news_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract iex_news={ticker}')
        if 'financials' in self.iex_datasets or extract_iex:
            iex_financials_status, iex_financials_df = \
                iex_extract_utils.extract_financials_dataset(
                    extract_req)
            if iex_financials_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract iex_financials={ticker}')
        if 'earnings' in self.iex_datasets or extract_iex:
            iex_earnings_status, iex_earnings_df = \
                iex_extract_utils.extract_earnings_dataset(
                    extract_req)
            if iex_earnings_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract iex_earnings={ticker}')
        if 'dividends' in self.iex_datasets or extract_iex:
            iex_dividends_status, iex_dividends_df = \
                iex_extract_utils.extract_dividends_dataset(
                    extract_req)
            if iex_dividends_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract iex_dividends={ticker}')
        if 'company' in self.iex_datasets or extract_iex:
            iex_company_status, iex_company_df = \
                iex_extract_utils.extract_company_dataset(
                    extract_req)
            if iex_company_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract iex_company={ticker}')
        # end of iex extracts

        if extract_yahoo:
            yahoo_options_status, yahoo_option_calls_df = \
                yahoo_extract_utils.extract_option_calls_dataset(
                    extract_req)
            yahoo_options_status, yahoo_option_puts_df = \
                yahoo_extract_utils.extract_option_puts_dataset(
                    extract_req)
            if yahoo_options_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract yahoo_options={ticker}')
            yahoo_pricing_status, yahoo_pricing_df = \
                yahoo_extract_utils.extract_pricing_dataset(
                    extract_req)
            if yahoo_pricing_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract yahoo_pricing={ticker}')
            yahoo_news_status, yahoo_news_df = \
                yahoo_extract_utils.extract_yahoo_news_dataset(
                    extract_req)
            if yahoo_news_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract yahoo_news={ticker}')
        # end of yahoo extracts

        if extract_td:
            """
            Debug by setting:

            extract_req['verbose_td'] = True
            """
            convert_to_datetime = [
                'date', 'created', 'ask_date', 'bid_date', 'trade_date'
            ]
            td_calls_status, td_calls_df = \
                td_extract_utils.extract_option_calls_dataset(
                    extract_req)
            if td_calls_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract tdcalls={ticker}')
            else:
                if ae_consts.is_df(df=td_calls_df):
                    for c in convert_to_datetime:
                        if c in td_calls_df:
                            td_calls_df[c] = pd.to_datetime(
                                td_calls_df[c],
                                format=ae_consts.COMMON_TICK_DATE_FORMAT)
                    if 'date' in td_calls_df:
                        td_calls_df.sort_values('date', ascending=True)
            # end of converting dates

            td_puts_status, td_puts_df = \
                td_extract_utils.extract_option_puts_dataset(
                    extract_req)
            if td_puts_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract tdputs={ticker}')
            else:
                if ae_consts.is_df(df=td_puts_df):
                    for c in convert_to_datetime:
                        if c in td_puts_df:
                            td_puts_df[c] = pd.to_datetime(
                                td_puts_df[c],
                                format=ae_consts.COMMON_TICK_DATE_FORMAT)
                    if 'date' in td_puts_df:
                        td_puts_df.sort_values('date', ascending=True)
            # end of converting dates
        # td extracts

        # map extracted data to DEFAULT_SERIALIZED_DATASETS
        ticker_data = {}
        ticker_data['daily'] = iex_daily_df
        ticker_data['minute'] = iex_minute_df
        ticker_data['quote'] = iex_quote_df
        ticker_data['stats'] = iex_stats_df
        ticker_data['peers'] = iex_peers_df
        ticker_data['news1'] = iex_news_df
        ticker_data['financials'] = iex_financials_df
        ticker_data['earnings'] = iex_earnings_df
        ticker_data['dividends'] = iex_dividends_df
        ticker_data['company'] = iex_company_df
        ticker_data['calls'] = yahoo_option_calls_df
        ticker_data['puts'] = yahoo_option_puts_df
        ticker_data['pricing'] = yahoo_pricing_df
        ticker_data['news'] = yahoo_news_df
        ticker_data['tdcalls'] = td_calls_df
        ticker_data['tdputs'] = td_puts_df

        algo_data_req = {
            ticker: [{
                'id': dataset_id,  # id is currently the cache key in redis
                'date': use_date_str,  # used to confirm dates in asc order
                'data': ticker_data,
                'start_row': start_row
            }]
        }

        if verbose:
            log.info(f'extract - {label} '
                     f'dataset={len(algo_data_req[ticker])}')

        # this could be a separate celery task
        try:
            if verbose:
                log.info(f'handle_data START - {label} from '
                         f'{node_date_key}')
            self.algo_obj.handle_data(data=algo_data_req)
            if verbose:
                log.info(f'handle_data END - {label} from ' f'{node_date_key}')
        except Exception as e:
            a_name = self.algo_obj.get_name()
            a_debug_msg = self.algo_obj.get_debug_msg()
            if not a_debug_msg:
                a_debug_msg = 'debug message not set'
            # a_config_dict = ae_consts.ppj(self.algo_obj.config_dict)
            msg = (f'{label} - algo={a_name} '
                   f'encountered exception in handle_data tickers={ticker} '
                   f'from {node_date_key} ex={e} '
                   f'and failed during operation: {a_debug_msg}')
            log.critical(f'{msg}')
        # end try/ex

        log.info(f'run latest - create history')

        history_ds = self.algo_obj.create_history_dataset()
        self.history_df = pd.DataFrame(history_ds[ticker])
        self.determine_latest_times_in_history()

        self.num_rows = len(self.history_df.index)

        if verbose:
            log.info(self.history_df[['minute', 'close']].tail(5))

        log.info(f'run latest minute={self.end_date} - '
                 f'rows={self.num_rows} - done')

        return self.get_history()