Exemple #1
0
def extract_dataset(key,
                    ticker=None,
                    date=None,
                    work_dict=None,
                    scrub_mode='NO_SORT',
                    verbose=False):
    """extract_dataset

    Extract the IEX key data for a ticker from Redis and
    return it as a tuple (status, ``pandas.Dataframe``)

    :param key: IEX dataset key
    :param ticker: string ticker to extract
    :param date: optional - string date to extract
        formatted ``YYYY-MM-DD``
    :param work_dict: dictionary of args
    :param scrub_mode: type of scrubbing handler to run
    :param verbose: optional - boolean for turning on logging
    """
    if not key or key not in keys:
        log.error(f'unsupported extract key={key} in keys={keys}')
        return None
    label = key
    df_type = keys[key]
    df_str = iex_consts.get_datafeed_str(df_type=df_type)
    latest_close_date = ae_utils.get_last_close_str()

    use_date = date
    if work_dict:
        if not ticker:
            ticker = work_dict.get('ticker', None)
    if not work_dict:
        work_dict = api_requests.get_ds_dict(ticker=ticker)

    req = copy.deepcopy(work_dict)

    if not use_date:
        use_date = latest_close_date

    redis_key = f'{ticker}_{use_date}_{key}'
    req['redis_key'] = redis_key
    req['s3_key'] = redis_key

    if verbose:
        log.info(f'{label} - {df_str} - '
                 f'date={date} '
                 f'redis_key={req["redis_key"]} '
                 f's3_key={req["s3_key"]} '
                 f'{ae_consts.ppj(req)}')

    return extract_utils.perform_extract(df_type=df_type,
                                         df_str=df_str,
                                         work_dict=req,
                                         scrub_mode=scrub_mode,
                                         verbose=verbose)
def get_options(ticker, contract_type, exp_date_str, strike=None):
    """get_options

    :param ticker: ticker to lookup
    :param exp_date_str: ``YYYY-MM-DD`` expiration date format
    :param strike: optional strike price, ``None`` returns
                   all option chains
    :param contract_type: ``C`` calls or ``P`` for puts, if
                          ``strike=None`` then the ``contract_type``
                          is ignored
    """
    log.info('get_options ticker={} '
             'contract={} exp_date={} strike={}'.format(
                 ticker, contract_type, exp_date_str, strike))

    response = make_request(ticker=ticker, exp_date_str=exp_date_str)
    try:
        options_data = extract_options_data(response=response,
                                            contract_type=contract_type,
                                            strike=strike)
        options_dict = {
            'date': ae_utils.get_last_close_str(),
            'exp_date': None,
            'num_calls': None,
            'num_puts': None,
            'calls': None,
            'puts': None
        }
        if 'expirationDate' in options_data[0]:
            epoch_exp = options_data[0]['expirationDate']
            options_dict['exp_date'] = \
                datetime.datetime.fromtimestamp(
                    epoch_exp).strftime(
                        ae_consts.COMMON_TICK_DATE_FORMAT)
        calls_df = pd.DataFrame(options_data[0]['calls'])
        options_dict['num_calls'] = len(options_data[0]['calls'])
        options_dict['calls'] = calls_df.to_json(orient='records')
        puts_df = pd.DataFrame(options_data[0]['puts'])
        options_dict['num_puts'] = len(options_data[0]['puts'])
        options_dict['puts'] = puts_df.to_json(orient='records')

        return options_dict
    except Exception as e:
        log.error('failed get_options('
                  'ticker={}, '
                  'contract_type={}, '
                  'exp_date_str={}, '
                  'strike={}) with ex={}'.format(ticker, contract_type,
                                                 exp_date_str, strike, e))
        return []
Exemple #3
0
    def test_latest(self):
        """test_latest"""
        if ae_consts.ev('INT_TESTS', '0') == '0':
            return
        ticker = 'SPY'
        start_date = ae_utils.get_last_close_str()
        # build dataset cache dictionary
        runner = algo_runner.AlgoRunner(ticker=ticker,
                                        start_date=start_date,
                                        end_date=None,
                                        history_loc=self.algo_history_loc,
                                        algo_config=self.algo_config,
                                        verbose_algo=True,
                                        verbose_processor=False,
                                        verbose_indicators=False)

        req = {'ticker': ticker, 'date_str': start_date, 'start_row': -200}
        df = runner.latest(**req)
        self.assertEqual(len(df.index), len(runner.get_history().index))
def fetch(ticker=None,
          tickers=None,
          fetch_mode=None,
          iex_datasets=None,
          redis_enabled=True,
          redis_address=None,
          redis_db=None,
          redis_password=None,
          redis_expire=None,
          s3_enabled=True,
          s3_address=None,
          s3_bucket=None,
          s3_access_key=None,
          s3_secret_key=None,
          s3_region_name=None,
          s3_secure=False,
          celery_disabled=True,
          broker_url=None,
          result_backend=None,
          label=None,
          verbose=False):
    """fetch

    Fetch all supported datasets for a stock ``ticker`` or
    a list of ``tickers`` and returns a dictionary. Once
    run, the datasets will all be cached in Redis and archived
    in Minio (S3) by default.

    Python example:

    .. code-block:: python

        from analysis_engine.fetch import fetch
        d = fetch(ticker='NFLX')
        print(d)
        for k in d['NFLX']:
            print('dataset key: {}'.format(k))

    By default, it synchronously automates:

        - fetching all datasets
        - caching all datasets in Redis
        - archiving all datasets in Minio (S3)
        - returns all datasets in a single dictionary

    This was created for reducing the amount of typying in
    Jupyter notebooks. It can be set up for use with a
    distributed engine as well with the optional arguments
    depending on your connectitivty requirements.

    .. note:: Please ensure Redis and Minio are running
              before trying to extract tickers

    **Stock tickers to fetch**

    :param ticker: single stock ticker/symbol/ETF to fetch
    :param tickers: optional - list of tickers to fetch

    **(Optional) Data sources, datafeeds and datasets to gather**

    :param fetch_mode: data sources - default is ``all`` (both IEX
        and Yahoo), ``iex`` for only IEX, ``yahoo`` for only Yahoo.
    :param iex_datasets: list of strings for gathering specific `IEX
        datasets <https://iextrading.com/developer/docs/#stocks>`__
        which are set as consts: ``analysis_engine.iex.consts.FETCH_*``.

    **(Optional) Redis connectivity arguments**

    :param redis_enabled: bool - toggle for auto-caching all
        datasets in Redis
        (default is ``True``)
    :param redis_address: Redis connection string format: ``host:port``
        (default is ``localhost:6379``)
    :param redis_db: Redis db to use
        (default is ``0``)
    :param redis_password: optional - Redis password
        (default is ``None``)
    :param redis_expire: optional - Redis expire value
        (default is ``None``)

    **(Optional) Minio (S3) connectivity arguments**

    :param s3_enabled: bool - toggle for auto-archiving on Minio (S3)
        (default is ``True``)
    :param s3_address: Minio S3 connection string format: ``host:port``
        (default is ``localhost:9000``)
    :param s3_bucket: S3 Bucket for storing the artifacts
        (default is ``dev``) which should be viewable on a browser:
        http://localhost:9000/minio/dev/
    :param s3_access_key: S3 Access key
        (default is ``trexaccesskey``)
    :param s3_secret_key: S3 Secret key
        (default is ``trex123321``)
    :param s3_region_name: S3 region name
        (default is ``us-east-1``)
    :param s3_secure: Transmit using tls encryption
        (default is ``False``)

    **(Optional) Celery worker broker connectivity arguments**

    :param celery_disabled: bool - toggle synchronous mode or publish
        to an engine connected to the `Celery broker and backend
        <https://github.com/celery/celery#transports-and-backends>`__
        (default is ``True`` - synchronous mode without an engine
        or need for a broker or backend for Celery)
    :param broker_url: Celery broker url
        (default is ``redis://0.0.0.0:6379/13``)
    :param result_backend: Celery backend url
        (default is ``redis://0.0.0.0:6379/14``)
    :param label: tracking log label

    **(Optional) Debugging**

    :param verbose: bool - show fetch warnings
        and other debug logging (default is False)

    **Supported environment variables**

    ::

        export REDIS_ADDRESS="localhost:6379"
        export REDIS_DB="0"
        export S3_ADDRESS="localhost:9000"
        export S3_BUCKET="dev"
        export AWS_ACCESS_KEY_ID="trexaccesskey"
        export AWS_SECRET_ACCESS_KEY="trex123321"
        export AWS_DEFAULT_REGION="us-east-1"
        export S3_SECURE="0"
        export WORKER_BROKER_URL="redis://0.0.0.0:6379/13"
        export WORKER_BACKEND_URL="redis://0.0.0.0:6379/14"
    """

    rec = {}

    extract_records = []

    use_tickers = tickers
    if ticker:
        use_tickers = [ticker]
    else:
        if not use_tickers:
            use_tickers = []

    default_iex_datasets = [
        'daily', 'minute', 'quote', 'stats', 'peers', 'news', 'financials',
        'earnings', 'dividends', 'company'
    ]

    if not iex_datasets:
        iex_datasets = default_iex_datasets
    if not fetch_mode:
        fetch_mode = 'all'

    if redis_enabled:
        if not redis_address:
            redis_address = os.getenv('REDIS_ADDRESS', 'localhost:6379')
        if not redis_password:
            redis_password = os.getenv('REDIS_PASSWORD', None)
        if not redis_db:
            redis_db = int(os.getenv('REDIS_DB', '0'))
        if not redis_expire:
            redis_expire = os.getenv('REDIS_EXPIRE', None)
    if s3_enabled:
        if not s3_address:
            s3_address = os.getenv('S3_ADDRESS', 'localhost:9000')
        if not s3_access_key:
            s3_access_key = os.getenv('AWS_ACCESS_KEY_ID', 'trexaccesskey')
        if not s3_secret_key:
            s3_secret_key = os.getenv('AWS_SECRET_ACCESS_KEY', 'trex123321')
        if not s3_region_name:
            s3_region_name = os.getenv('AWS_DEFAULT_REGION', 'us-east-1')
        if not s3_secure:
            s3_secure = os.getenv('S3_SECURE', '0') == '1'
        if not s3_bucket:
            s3_bucket = os.getenv('S3_BUCKET', 'dev')
    if not broker_url:
        broker_url = os.getenv('WORKER_BROKER_URL', 'redis://0.0.0.0:6379/13')
    if not result_backend:
        result_backend = os.getenv('WORKER_BACKEND_URL',
                                   'redis://0.0.0.0:6379/14')

    if not label:
        label = 'get-latest'

    num_tickers = len(use_tickers)
    last_close_str = get_last_close_str()

    if iex_datasets:
        log.info('{} - getting latest for tickers={} '
                 'iex={}'.format(label, num_tickers, json.dumps(iex_datasets)))
    else:
        log.info('{} - getting latest for tickers={}'.format(
            label, num_tickers))

    for ticker in use_tickers:

        ticker_key = '{}_{}'.format(ticker, last_close_str)

        fetch_req = build_get_new_pricing_request()
        fetch_req['base_key'] = ticker_key
        fetch_req['celery_disabled'] = celery_disabled
        fetch_req['ticker'] = ticker
        fetch_req['label'] = label
        fetch_req['fetch_mode'] = fetch_mode
        fetch_req['iex_datasets'] = iex_datasets
        fetch_req['s3_enabled'] = s3_enabled
        fetch_req['s3_bucket'] = s3_bucket
        fetch_req['s3_address'] = s3_address
        fetch_req['s3_secure'] = s3_secure
        fetch_req['s3_region_name'] = s3_region_name
        fetch_req['s3_access_key'] = s3_access_key
        fetch_req['s3_secret_key'] = s3_secret_key
        fetch_req['s3_key'] = ticker_key
        fetch_req['redis_enabled'] = redis_enabled
        fetch_req['redis_address'] = redis_address
        fetch_req['redis_password'] = redis_password
        fetch_req['redis_db'] = redis_db
        fetch_req['redis_key'] = ticker_key
        fetch_req['redis_expire'] = redis_expire

        fetch_req['redis_address'] = redis_address
        fetch_req['s3_address'] = s3_address

        log.info('{} - fetching ticker={} last_close={} '
                 'redis_address={} s3_address={}'.format(
                     label, ticker, last_close_str, fetch_req['redis_address'],
                     fetch_req['s3_address']))

        fetch_res = price_utils.run_get_new_pricing_data(work_dict=fetch_req)
        if fetch_res['status'] == SUCCESS:
            log.info('{} - fetched ticker={} '
                     'preparing for extraction'.format(label, ticker))
            extract_req = fetch_req
            extract_records.append(extract_req)
        else:
            log.warning('{} - failed getting ticker={} data '
                        'status={} err={}'.format(
                            label, ticker,
                            get_status(status=fetch_res['status']),
                            fetch_res['err']))
        # end of if worked or not
    # end for all tickers to fetch
    """
    Extract Datasets
    """

    iex_daily_status = FAILED
    iex_minute_status = FAILED
    iex_quote_status = FAILED
    iex_stats_status = FAILED
    iex_peers_status = FAILED
    iex_news_status = FAILED
    iex_financials_status = FAILED
    iex_earnings_status = FAILED
    iex_dividends_status = FAILED
    iex_company_status = FAILED
    yahoo_news_status = FAILED
    yahoo_options_status = FAILED
    yahoo_pricing_status = FAILED

    iex_daily_df = None
    iex_minute_df = None
    iex_quote_df = None
    iex_stats_df = None
    iex_peers_df = None
    iex_news_df = None
    iex_financials_df = None
    iex_earnings_df = None
    iex_dividends_df = None
    iex_company_df = None
    yahoo_option_calls_df = None
    yahoo_option_puts_df = None
    yahoo_pricing_df = None
    yahoo_news_df = None

    extract_iex = True
    if fetch_mode not in ['all', 'iex']:
        extract_iex = False

    extract_yahoo = True
    if fetch_mode not in ['all', 'yahoo']:
        extract_yahoo = False

    for service_dict in extract_records:
        ticker_data = {}
        ticker = service_dict['ticker']

        extract_req = get_ds_dict(ticker=ticker,
                                  base_key=service_dict.get('base_key', None),
                                  ds_id=label,
                                  service_dict=service_dict)

        if 'daily' in iex_datasets or extract_iex:
            iex_daily_status, iex_daily_df = \
                iex_extract_utils.extract_daily_dataset(
                    extract_req)
            if iex_daily_status != SUCCESS:
                if verbose:
                    log.warning('unable to fetch iex_daily={}'.format(ticker))
        if 'minute' in iex_datasets or extract_iex:
            iex_minute_status, iex_minute_df = \
                iex_extract_utils.extract_minute_dataset(
                    extract_req)
            if iex_minute_status != SUCCESS:
                if verbose:
                    log.warning('unable to fetch iex_minute={}'.format(ticker))
        if 'quote' in iex_datasets or extract_iex:
            iex_quote_status, iex_quote_df = \
                iex_extract_utils.extract_quote_dataset(
                    extract_req)
            if iex_quote_status != SUCCESS:
                if verbose:
                    log.warning('unable to fetch iex_quote={}'.format(ticker))
        if 'stats' in iex_datasets or extract_iex:
            iex_stats_df, iex_stats_df = \
                iex_extract_utils.extract_stats_dataset(
                    extract_req)
            if iex_stats_status != SUCCESS:
                if verbose:
                    log.warning('unable to fetch iex_stats={}'.format(ticker))
        if 'peers' in iex_datasets or extract_iex:
            iex_peers_df, iex_peers_df = \
                iex_extract_utils.extract_peers_dataset(
                    extract_req)
            if iex_peers_status != SUCCESS:
                if verbose:
                    log.warning('unable to fetch iex_peers={}'.format(ticker))
        if 'news' in iex_datasets or extract_iex:
            iex_news_status, iex_news_df = \
                iex_extract_utils.extract_news_dataset(
                    extract_req)
            if iex_news_status != SUCCESS:
                if verbose:
                    log.warning('unable to fetch iex_news={}'.format(ticker))
        if 'financials' in iex_datasets or extract_iex:
            iex_financials_status, iex_financials_df = \
                iex_extract_utils.extract_financials_dataset(
                    extract_req)
            if iex_financials_status != SUCCESS:
                if verbose:
                    log.warning(
                        'unable to fetch iex_financials={}'.format(ticker))
        if 'earnings' in iex_datasets or extract_iex:
            iex_earnings_status, iex_earnings_df = \
                iex_extract_utils.extract_dividends_dataset(
                    extract_req)
            if iex_earnings_status != SUCCESS:
                if verbose:
                    log.warning(
                        'unable to fetch iex_earnings={}'.format(ticker))
        if 'dividends' in iex_datasets or extract_iex:
            iex_dividends_status, iex_dividends_df = \
                iex_extract_utils.extract_dividends_dataset(
                    extract_req)
            if iex_dividends_status != SUCCESS:
                if verbose:
                    log.warning(
                        'unable to fetch iex_dividends={}'.format(ticker))
        if 'company' in iex_datasets or extract_iex:
            iex_company_status, iex_company_df = \
                iex_extract_utils.extract_dividends_dataset(
                    extract_req)
            if iex_company_status != SUCCESS:
                if verbose:
                    log.warning(
                        'unable to fetch iex_company={}'.format(ticker))
        # end of iex extracts

        if extract_yahoo:
            yahoo_options_status, yahoo_option_calls_df = \
                yahoo_extract_utils.extract_option_calls_dataset(
                    extract_req)
            yahoo_options_status, yahoo_option_puts_df = \
                yahoo_extract_utils.extract_option_puts_dataset(
                    extract_req)
            if yahoo_options_status != SUCCESS:
                if verbose:
                    log.warning(
                        'unable to fetch yahoo_options={}'.format(ticker))
            yahoo_pricing_status, yahoo_pricing_df = \
                yahoo_extract_utils.extract_pricing_dataset(
                    extract_req)
            if yahoo_pricing_status != SUCCESS:
                if verbose:
                    log.warning(
                        'unable to fetch yahoo_pricing={}'.format(ticker))
            yahoo_news_status, yahoo_news_df = \
                yahoo_extract_utils.extract_yahoo_news_dataset(
                    extract_req)
            if yahoo_news_status != SUCCESS:
                if verbose:
                    log.warning('unable to fetch yahoo_news={}'.format(ticker))
        # end of yahoo extracts

        ticker_data['daily'] = iex_daily_df
        ticker_data['minute'] = iex_minute_df
        ticker_data['quote'] = iex_quote_df
        ticker_data['stats'] = iex_stats_df
        ticker_data['peers'] = iex_peers_df
        ticker_data['news1'] = iex_news_df
        ticker_data['financials'] = iex_financials_df
        ticker_data['earnings'] = iex_earnings_df
        ticker_data['dividends'] = iex_dividends_df
        ticker_data['company'] = iex_company_df
        ticker_data['calls'] = yahoo_option_calls_df
        ticker_data['puts'] = yahoo_option_puts_df
        ticker_data['pricing'] = yahoo_pricing_df
        ticker_data['news'] = yahoo_news_df

        rec[ticker] = ticker_data
    # end of for service_dict in extract_records

    return rec
    def setUp(
            self):
        """setUp"""
        self.ticker = 'SPY'
        self.timeseries = 'day'
        self.trade_strategy = 'count'
        self.daily_dataset = json.loads(
            open('tests/datasets/spy-daily.json', 'r').read())
        self.daily_df = pd.DataFrame(self.daily_dataset)
        self.daily_df['date'] = pd.to_datetime(
            self.daily_df['date'])
        self.start_date_str = self.daily_df['date'].iloc[0].strftime(
            ae_consts.COMMON_TICK_DATE_FORMAT)
        self.end_date_str = self.daily_df['date'].iloc[-1].strftime(
            ae_consts.COMMON_TICK_DATE_FORMAT)
        self.minute_df = pd.DataFrame([])
        self.options_df = pd.DataFrame([])
        self.use_date = '2018-11-05'
        self.dataset_id = '{}_{}'.format(
            self.ticker,
            self.use_date)
        self.datasets = [
            'daily'
        ]
        self.data = {}
        self.data[self.ticker] = [
            {
                'id': self.dataset_id,
                'date': self.use_date,
                'data': {
                    'daily': self.daily_df,
                    'minute': self.minute_df,
                    'options': self.options_df
                }
            }
        ]
        self.balance = 10000.00
        self.last_close_str = ae_utils.get_last_close_str(
            fmt=ae_consts.COMMON_DATE_FORMAT)
        self.output_dir = (
            '/opt/sa/tests/datasets/algo')

        self.willr_close_path = (
            'analysis_engine/mocks/example_indicator_williamsr.py')
        self.willr_open_path = (
            'analysis_engine/mocks/example_indicator_williamsr_open.py')
        self.algo_config_dict = {
            'name': 'test_5_days_ahead',
            'algo_module_path': None,
            'algo_version': 1,
            'timeseries': self.timeseries,
            'trade_strategy': self.trade_strategy,
            'trade_horizon': 5,
            'num_owned': 10,
            'buy_shares': 10,
            'balance': 100000,
            'ticker': 'SPY',
            'verbose': True,
            'verbose_processor': True,
            'positions': {
                'SPY': {
                    'shares': 10,
                    'buys': [],
                    'sells': []
                }
            },
            'buy_rules': {
                'confidence': 75,
                'min_indicators': 3
            },
            'sell_rules': {
                'confidence': 75,
                'min_indicators': 3
            },
            'indicators': [
                {
                    'name': 'willr_-70_-30',
                    'module_path': self.willr_close_path,
                    'category': 'technical',
                    'type': 'momentum',
                    'uses_data': 'daily',
                    'high': 0,
                    'low': 0,
                    'close': 0,
                    'open': 0,
                    'willr_value': 0,
                    'num_points': 10,
                    'buy_below': -70,
                    'sell_above': -30,
                    'is_buy': False,
                    'is_sell': False,
                    'verbose': True
                },
                {
                    'name': 'willr_-80_-20',
                    'module_path': self.willr_close_path,
                    'category': 'technical',
                    'type': 'momentum',
                    'uses_data': 'daily',
                    'high': 0,
                    'low': 0,
                    'close': 0,
                    'open': 0,
                    'willr_value': 0,
                    'num_points': 10,
                    'buy_below': -80,
                    'sell_above': -20,
                    'is_buy': False,
                    'is_sell': False
                },
                {
                    'name': 'willr_-90_-10',
                    'module_path': self.willr_close_path,
                    'category': 'technical',
                    'type': 'momentum',
                    'uses_data': 'daily',
                    'high': 0,
                    'low': 0,
                    'close': 0,
                    'open': 0,
                    'willr_value': 0,
                    'num_points': 10,
                    'buy_below': -90,
                    'sell_above': -10,
                    'is_buy': False,
                    'is_sell': False
                },
                {
                    'name': 'willr_open_-80_-20',
                    'module_path': self.willr_open_path,
                    'category': 'technical',
                    'type': 'momentum',
                    'uses_data': 'daily',
                    'high': 0,
                    'low': 0,
                    'close': 0,
                    'open': 0,
                    'willr_open_value': 0,
                    'num_points': 15,
                    'buy_below': -80,
                    'sell_above': -20,
                    'is_buy': False,
                    'is_sell': False
                }
            ],
            'slack': {
                'webhook': None
            }
        }
Exemple #6
0
def fetch_tickers_from_screener(
        url,
        columns=DEFAULT_FINVIZ_COLUMNS,
        as_json=False,
        soup_selector='td.screener-body-table-nw',
        label='fz-screen-converter'):
    """fetch_tickers_from_screener

    Convert all the tickers on a FinViz screener
    url to a ``pandas.DataFrame``. Returns a dictionary
    with a ticker list and DataFrame or a json-serialized
    DataFrame in a string (by default ``as_json=False`` will
    return a ``pandas.DataFrame`` if the
    ``returned-dictionary['status'] == SUCCESS``

    Works with urls created on:

    https://finviz.com/screener.ashx

    .. code-block:: python

        import analysis_engine.finviz.fetch_api as fv

        url = (
            'https://finviz.com/screener.ashx?'
            'v=111&'
            'f=cap_midunder,exch_nyse,fa_div_o5,idx_sp500'
            '&ft=4')
        res = fv.fetch_tickers_from_screener(url=url)
        print(res)

    :param url: FinViz screener url
    :param columns: ordered header column as a list of strings
                    and corresponds to the header row from the
                    FinViz screener table
    :param soup_selector: ``bs4.BeautifulSoup.selector`` string
                          for pulling selected html data
                          (by default ``td.screener-body-table-nw``)
    :param as_json: FinViz screener url
    :param label: log tracking label string
    """

    rec = {
        'data': None,
        'created': get_last_close_str(),
        'tickers': []
    }
    res = req_utils.build_result(
        status=NOT_RUN,
        err=None,
        rec=rec)

    try:

        log.info(
            '{} fetching url={}'.format(
                label,
                url))

        response = requests.get(url)

        if response.status_code != requests.codes.ok:
            err = (
                '{} finviz returned non-ok HTTP (200) '
                'status_code={} with text={} for url={}'.format(
                    label,
                    response.status_code,
                    response.text,
                    url))
            log.error(err)
            return req_utils.build_result(
                status=ERR,
                err=err,
                rec=rec)
        # end of checking for a good HTTP response status code

        soup = bs4.BeautifulSoup(
            response.text,
            features='html.parser')
        selected = soup.select(soup_selector)

        log.debug(
            '{} found={} url={}'.format(
                label,
                len(selected),
                url))

        ticker_list = []
        rows = []
        use_columns = columns
        num_columns = len(use_columns)
        new_row = {}
        col_idx = 0

        for idx, node in enumerate(selected):

            if col_idx >= num_columns:
                col_idx = 0
            column_name = use_columns[col_idx]
            test_text = str(node.text).lower().strip()
            col_idx += 1

            if column_name != 'ignore' and (
                    test_text != 'save as portfolio'
                    and test_text != 'export'):

                cur_text = str(node.text).strip()

                if column_name == 'ticker':
                    ticker_list.append(cur_text)
                    new_row[column_name] = cur_text.upper()
                else:
                    new_row[column_name] = cur_text
                # end of filtering bad sections around table

                if len(new_row) >= num_columns:
                    log.debug(
                        '{} adding ticker={}'.format(
                            label,
                            new_row['ticker']))
                    rows.append(new_row)
                    new_row = {}
                    col_idx = 0
                # end of if valid row
            # end if column is valid
        # end of walking through all matched html data on the screener

        log.debug(
            '{} done convert url={} to tickers={} '
            'rows={}'.format(
                label,
                url,
                ticker_list,
                len(rows)))

        df = pd.DataFrame(
            rows)

        log.info(
            '{} fetch done - df={} from url={} with tickers={} '
            'rows={}'.format(
                label,
                len(df.index),
                url,
                ticker_list,
                len(rows)))

        rec['tickers'] = ticker_list
        rec['data'] = df

        res = req_utils.build_result(
            status=SUCCESS,
            err=None,
            rec=rec)

    except Exception as e:
        rec['tickers'] = []
        rec['data'] = None
        err = (
            '{} failed converting screen url={} to list '
            'with ex={}'.format(
                label,
                url,
                e))
        log.error(err)
        res = req_utils.build_result(
            status=EX,
            err=err,
            rec=rec)
    # end of try/ex

    return res
Exemple #7
0
def extract_option_calls_dataset(ticker=None,
                                 date=None,
                                 work_dict=None,
                                 scrub_mode='sort-by-date',
                                 verbose=False):
    """extract_option_calls_dataset

    Extract the TD options calls for a ticker and
    return a tuple (status, ``pandas.Dataframe``)

    .. code-block:: python

        import analysis_engine.td.extract_df_from_redis as td_extract

        # extract by historical date is also supported as an arg
        # date='2019-02-15'
        calls_status, calls_df = td_extract.extract_option_calls_dataset(
            ticker='SPY')
        print(calls_df)

    :param ticker: string ticker to extract
    :param date: optional - string date to extract
        formatted ``YYYY-MM-DD``
    :param work_dict: dictionary of args
    :param scrub_mode: optional - string type of
        scrubbing handler to run
    :param verbose: optional - boolean for turning on logging
    """
    label = 'extract_td_calls'
    latest_close_date = ae_utils.get_last_close_str()
    use_date = date
    if work_dict:
        if not ticker:
            ticker = work_dict.get('ticker', None)
        label = f'{work_dict.get("label", label)}'
    if not use_date:
        use_date = latest_close_date

    ds_id = ticker
    df_type = td_consts.DATAFEED_TD_CALLS
    df_str = td_consts.get_datafeed_str_td(df_type=df_type)
    redis_db = ae_consts.REDIS_DB
    redis_key = f'{ticker}_{use_date}_tdcalls'
    redis_host, redis_port = ae_consts.get_redis_host_and_port(req=work_dict)
    redis_password = ae_consts.REDIS_PASSWORD
    s3_key = redis_key

    if work_dict:
        redis_db = work_dict.get('redis_db', redis_db)
        redis_password = work_dict.get('redis_password', redis_password)
        verbose = work_dict.get('verbose_td', verbose)

    if verbose:
        log.info(f'{label} - {df_str} - start - redis_key={redis_key} '
                 f's3_key={s3_key}')

    exp_date_str = None
    calls_df = None
    status = ae_consts.NOT_RUN
    try:
        redis_rec = redis_get.get_data_from_redis_key(label=label,
                                                      host=redis_host,
                                                      port=redis_port,
                                                      db=redis_db,
                                                      password=redis_password,
                                                      key=redis_key,
                                                      decompress_df=True)

        status = redis_rec['status']
        if verbose:
            log.info(f'{label} - {df_str} redis get data key={redis_key} '
                     f'status={ae_consts.get_status(status=status)}')

        if status == ae_consts.SUCCESS:
            calls_json = None
            if 'tdcalls' in redis_rec['rec']['data']:
                calls_json = redis_rec['rec']['data']['tdcalls']
            elif 'calls' in redis_rec['rec']['data']:
                calls_json = redis_rec['rec']['data']['calls']
            else:
                calls_json = redis_rec['rec']['data']
            if not calls_json:
                return ae_consts.SUCCESS, pd.DataFrame([])
            if verbose:
                log.info(f'{label} - {df_str} redis convert calls to df')
            exp_date_str = None
            try:
                calls_df = pd.read_json(calls_json, orient='records')
                if len(calls_df.index) == 0:
                    return ae_consts.SUCCESS, pd.DataFrame([])
                if 'date' not in calls_df:
                    if verbose:
                        log.error(
                            'failed to find date column in TD calls '
                            f'df={calls_df} from lens={len(calls_df.index)}')
                    return ae_consts.SUCCESS, pd.DataFrame([])
                calls_df.sort_values(by=['date', 'strike'])
                """
                for i, r in calls_df.iterrows():
                    print(r['date'])
                convert_epochs = [
                    'ask_date',
                    'bid_date',
                    'trade_date'
                ]
                for c in convert_epochs:
                    if c in calls_df:
                        calls_df[c] = pd.DatetimeIndex(pd.to_datetime(
                            calls_df[c],
                            format=ae_consts.COMMON_TICK_DATE_FORMAT
                        )).tz_localize(
                            'UTC').tz_convert(
                                'US/Eastern')
                # dates converted
                """
                exp_date_str = (calls_df['exp_date'].iloc[-1])

                calls_df['date'] = calls_df['date'].dt.strftime(
                    ae_consts.COMMON_TICK_DATE_FORMAT)

            except Exception as f:
                not_fixed = True
                if ('Can only use .dt accessor with '
                        'datetimelike values') in str(f):
                    try:
                        log.critical(f'fixing dates in {redis_key}')
                        # remove epoch second data and
                        # use only the millisecond date values
                        bad_date = ae_consts.EPOCH_MINIMUM_DATE
                        calls_df['date'][calls_df['date'] < bad_date] = None
                        calls_df = calls_df.dropna(axis=0, how='any')
                        fmt = ae_consts.COMMON_TICK_DATE_FORMAT
                        calls_df['date'] = pd.to_datetime(
                            calls_df['date'], unit='ms').dt.strftime(fmt)
                        not_fixed = False
                    except Exception as g:
                        log.critical(
                            f'failed to parse date column {calls_df["date"]} '
                            f'with dt.strftime ex={f} and EPOCH EX={g}')
                        return ae_consts.SUCCESS, pd.DataFrame([])
                # if able to fix error or not

                if not_fixed:
                    log.debug(f'{label} - {df_str} redis_key={redis_key} '
                              f'no calls df found or ex={f}')
                    return ae_consts.SUCCESS, pd.DataFrame([])
                # if unable to fix - return out

                log.error(f'{label} - {df_str} redis_key={redis_key} '
                          f'no calls df found or ex={f}')
                return ae_consts.SUCCESS, pd.DataFrame([])
            # end of try/ex to convert to df
            if verbose:
                log.info(
                    f'{label} - {df_str} redis_key={redis_key} '
                    f'calls={len(calls_df.index)} exp_date={exp_date_str}')
        else:
            if verbose:
                log.info(f'{label} - {df_str} did not find valid redis '
                         f'option calls in redis_key={redis_key} '
                         f'status={ae_consts.get_status(status=status)}')

    except Exception as e:
        if verbose:
            log.error(
                f'{label} - {df_str} - ds_id={ds_id} failed getting option '
                f'calls from redis={redis_host}:{redis_port}@{redis_db} '
                f'key={redis_key} ex={e}')
        return ae_consts.ERR, pd.DataFrame([])
    # end of try/ex extract from redis

    if verbose:
        log.info(
            f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}')

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=calls_df)

    status = ae_consts.SUCCESS

    return status, scrubbed_df
def backtest_with_runner():
    """backtest_with_runner

    build and publish a trading history from an algorithm config.

    ::

        backtest_with_runner.py -t TICKER -c ALGO_CONFIG -s START_DATE
        -k S3_KEY -b S3_BUCKET -l
    """

    parser = argparse.ArgumentParser(
        description=('backtest an algorithm and publish '
                     'the trading history'))
    parser.add_argument('-t',
                        help=('ticker symbol'),
                        required=False,
                        dest='ticker')
    parser.add_argument('-k', help=('s3_key'), required=False, dest='s3_key')
    parser.add_argument('-b',
                        help=('s3_bucket'),
                        required=False,
                        dest='s3_bucket')
    parser.add_argument('-s',
                        help=('start date format YYYY-MM-DD'),
                        required=False,
                        dest='start_date')
    parser.add_argument('-c',
                        help=('algo config file'),
                        required=False,
                        dest='algo_config')
    parser.add_argument('-l',
                        help=('run a backtest with the latest '
                              'pricing data'),
                        required=False,
                        dest='latest',
                        action='store_true')
    parser.add_argument('-d',
                        help='debug',
                        required=False,
                        dest='debug',
                        action='store_true')
    args = parser.parse_args()

    ticker = 'SPY'
    s3_bucket = (f'algohistory')
    s3_key = (f'trade_history_{ticker}')
    start_date = (f'2019-01-01')
    algo_config = (f'/opt/sa/cfg/default_algo.json')
    latest = False
    show_plot = True
    debug = False

    if args.ticker:
        ticker = args.ticker.upper()
    if args.s3_key:
        s3_key = args.s3_key
    if args.s3_bucket:
        s3_bucket = args.s3_bucket
    if args.start_date:
        start_date = args.start_date
    if args.algo_config:
        algo_config = args.algo_config
    if args.latest:
        latest = True
        start_date = ae_utils.get_last_close_str()
    if args.debug:
        debug = True

    history_loc = (f's3://{s3_bucket}/{s3_key}')

    log.info(f'building {ticker} trade history '
             f'start_date={start_date} '
             f'config={algo_config} '
             f'history_loc={history_loc}')

    runner = algo_runner.AlgoRunner(ticker=ticker,
                                    start_date=start_date,
                                    history_loc=history_loc,
                                    algo_config=algo_config,
                                    verbose_algo=debug,
                                    verbose_processor=False,
                                    verbose_indicators=False)

    trading_history_df = None
    if latest:
        trading_history_df = runner.latest()
        log.info(f'{ticker} latest:')
        print(trading_history_df[['minute', 'close']].tail(5))
        log.info(f'Other available columns to plot:')
        print(trading_history_df.columns.values)
        if show_plot:
            plot.plot_trading_history(
                title=(f'{ticker} at '
                       f'${trading_history_df["close"].iloc[-1]} '
                       f'at: '
                       f'{trading_history_df["minute"].iloc[-1]}'),
                df=trading_history_df,
                red='high',
                blue='close')
    else:
        runner.start()

    sys.exit(0)
Exemple #9
0
def fetch_new_stock_datasets():
    """fetch_new_stock_datasets

    Collect datasets for a ticker from IEX Cloud or Tradier

    .. warning: IEX Cloud charges per request. Here are example
        commands to help you monitor your usage while handling
        first time users and automation (intraday, daily, and weekly
        options are supported).

    **Setup**

    ::

        export IEX_TOKEN=YOUR_IEX_CLOUD_TOKEN
        export TD_TOKEN=YOUR_TRADIER_TOKEN

    **Pull Data for a Ticker from IEX and Tradier**

    ::

        fetch -t TICKER

    **Pull from All Supported IEX Feeds**

    ::

        fetch -t TICKER -g iex-all

    **Pull from All Supported Tradier Feeds**

    ::

        fetch -t TICKER -g td

    **Intraday IEX and Tradier Feeds (only minute and news to reduce costs)**

    ::

        fetch -t TICKER -g intra
        # or manually:
        # fetch -t TICKER -g td,iex_min,iex_news

    **Daily IEX Feeds (daily and news)**

    ::

        fetch -t TICKER -g daily
        # or manually:
        # fetch -t TICKER -g iex_day,iex_news

    **Weekly IEX Feeds (company, financials, earnings, dividends, and peers)**

    ::

        fetch -t TICKER -g weekly
        # or manually:
        # fetch -t TICKER -g iex_fin,iex_earn,iex_div,iex_peers,iex_news,
        # iex_comp

    **IEX Minute**

    ::

        fetch -t TICKER -g iex_min

    **IEX News**

    ::

        fetch -t TICKER -g iex_news

    **IEX Daily**

    ::

        fetch -t TICKER -g iex_day

    **IEX Stats**

    ::

        fetch -t TICKER -g iex_stats

    **IEX Peers**

    ::

        fetch -t TICKER -g iex_peers

    **IEX Financials**

    ::

        fetch -t TICKER -g iex_fin

    **IEX Earnings**

    ::

        fetch -t TICKER -g iex_earn

    **IEX Dividends**

    ::

        fetch -t TICKER -g iex_div

    **IEX Quote**

    ::

        fetch -t TICKER -g iex_quote

    **IEX Company**

    ::

        fetch -t TICKER -g iex_comp

    .. note:: This requires the following services are listening on:

        - redis ``localhost:6379``
        - minio ``localhost:9000``

    """
    log.info('start - fetch_new_stock_datasets')

    parser = argparse.ArgumentParser(
        description=('Download and store the latest stock pricing, '
                     'news, and options chain data '
                     'and store it in Minio (S3) and Redis. '
                     'Also includes support for getting FinViz '
                     'screener tickers'))
    parser.add_argument('-t', help=('ticker'), required=False, dest='ticker')
    parser.add_argument(
        '-g',
        help=('optional - fetch mode: '
              'initial = default fetch from initial data feeds '
              '(IEX and Tradier), '
              'intra = fetch intraday from IEX and Tradier, '
              'daily or day = fetch daily from IEX, '
              'weekly = fetch weekly from IEX, '
              'all = fetch from all data feeds, '
              'td = fetch from Tradier feeds only, '
              'iex = fetch from IEX Cloud feeds only, '
              'min or minute or iex_min = fetch IEX Cloud intraday '
              'per-minute feed '
              'https://iexcloud.io/docs/api/#historical-prices, '
              'day or daily or iex_day = fetch IEX Cloud daily feed '
              'https://iexcloud.io/docs/api/#historical-prices, '
              'quote or iex_quote = fetch IEX Cloud quotes feed '
              'https://iexcloud.io/docs/api/#quote, '
              'stats or iex_stats = fetch IEX Cloud key stats feed '
              'https://iexcloud.io/docs/api/#key-stats, '
              'peers or iex_peers = fetch from just IEX Cloud peers feed '
              'https://iexcloud.io/docs/api/#peers, '
              'news or iex_news = fetch IEX Cloud news feed '
              'https://iexcloud.io/docs/api/#news, '
              'fin or iex_fin = fetch IEX Cloud financials feed'
              'https://iexcloud.io/docs/api/#financials, '
              'earn or iex_earn = fetch from just IEX Cloud earnings feeed '
              'https://iexcloud.io/docs/api/#earnings, '
              'div or iex_div = fetch from just IEX Cloud dividends feed'
              'https://iexcloud.io/docs/api/#dividends, '
              'iex_comp = fetch from just IEX Cloud company feed '
              'https://iexcloud.io/docs/api/#company'),
        required=False,
        dest='fetch_mode')
    parser.add_argument('-i',
                        help=('optional - ticker id '
                              'not used without a database'),
                        required=False,
                        dest='ticker_id')
    parser.add_argument('-e',
                        help=('optional - options expiration date'),
                        required=False,
                        dest='exp_date_str')
    parser.add_argument('-l',
                        help=('optional - path to the log config file'),
                        required=False,
                        dest='log_config_path')
    parser.add_argument('-b',
                        help=('optional - broker url for Celery'),
                        required=False,
                        dest='broker_url')
    parser.add_argument('-B',
                        help=('optional - backend url for Celery'),
                        required=False,
                        dest='backend_url')
    parser.add_argument('-k',
                        help=('optional - s3 access key'),
                        required=False,
                        dest='s3_access_key')
    parser.add_argument('-s',
                        help=('optional - s3 secret key'),
                        required=False,
                        dest='s3_secret_key')
    parser.add_argument('-a',
                        help=('optional - s3 address format: <host:port>'),
                        required=False,
                        dest='s3_address')
    parser.add_argument('-S',
                        help=('optional - s3 ssl or not'),
                        required=False,
                        dest='s3_secure')
    parser.add_argument('-u',
                        help=('optional - s3 bucket name'),
                        required=False,
                        dest='s3_bucket_name')
    parser.add_argument('-G',
                        help=('optional - s3 region name'),
                        required=False,
                        dest='s3_region_name')
    parser.add_argument('-p',
                        help=('optional - redis_password'),
                        required=False,
                        dest='redis_password')
    parser.add_argument('-r',
                        help=('optional - redis_address format: <host:port>'),
                        required=False,
                        dest='redis_address')
    parser.add_argument('-n',
                        help=('optional - redis and s3 key name'),
                        required=False,
                        dest='keyname')
    parser.add_argument(
        '-m',
        help=('optional - redis database number (0 by default)'),
        required=False,
        dest='redis_db')
    parser.add_argument('-x',
                        help=('optional - redis expiration in seconds'),
                        required=False,
                        dest='redis_expire')
    parser.add_argument('-z',
                        help=('optional - strike price'),
                        required=False,
                        dest='strike')
    parser.add_argument(
        '-c',
        help=('optional - contract type "C" for calls "P" for puts'),
        required=False,
        dest='contract_type')
    parser.add_argument(
        '-P',
        help=('optional - get pricing data if "1" or "0" disabled'),
        required=False,
        dest='get_pricing')
    parser.add_argument(
        '-N',
        help=('optional - get news data if "1" or "0" disabled'),
        required=False,
        dest='get_news')
    parser.add_argument(
        '-O',
        help=('optional - get options data if "1" or "0" disabled'),
        required=False,
        dest='get_options')
    parser.add_argument('-U',
                        help=('optional - s3 enabled for publishing if "1" or '
                              '"0" is disabled'),
                        required=False,
                        dest='s3_enabled')
    parser.add_argument(
        '-R',
        help=('optional - redis enabled for publishing if "1" or '
              '"0" is disabled'),
        required=False,
        dest='redis_enabled')
    parser.add_argument('-A',
                        help=('optional - run an analysis '
                              'supported modes: scn'),
                        required=False,
                        dest='analysis_type')
    parser.add_argument('-L',
                        help=('optional - screener urls to pull '
                              'tickers for analysis'),
                        required=False,
                        dest='urls')
    parser.add_argument(
        '-Z',
        help=('disable run without an engine for local testing and demos'),
        required=False,
        dest='celery_enabled',
        action='store_true')
    parser.add_argument('-F',
                        help=('optional - backfill date for filling in '
                              'gaps for the IEX Cloud minute dataset '
                              'format is YYYY-MM-DD'),
                        required=False,
                        dest='backfill_date')
    parser.add_argument('-d',
                        help=('debug'),
                        required=False,
                        dest='debug',
                        action='store_true')
    args = parser.parse_args()

    run_offline = True
    ticker = ae_consts.TICKER
    ticker_id = ae_consts.TICKER_ID
    fetch_mode = 'initial'
    exp_date_str = ae_consts.NEXT_EXP_STR
    ssl_options = ae_consts.SSL_OPTIONS
    transport_options = ae_consts.TRANSPORT_OPTIONS
    broker_url = ae_consts.WORKER_BROKER_URL
    backend_url = ae_consts.WORKER_BACKEND_URL
    celery_config_module = ae_consts.WORKER_CELERY_CONFIG_MODULE
    include_tasks = ae_consts.INCLUDE_TASKS
    s3_access_key = ae_consts.S3_ACCESS_KEY
    s3_secret_key = ae_consts.S3_SECRET_KEY
    s3_region_name = ae_consts.S3_REGION_NAME
    s3_address = ae_consts.S3_ADDRESS
    s3_secure = ae_consts.S3_SECURE
    s3_bucket_name = ae_consts.S3_BUCKET
    s3_key = ae_consts.S3_KEY
    redis_address = ae_consts.REDIS_ADDRESS
    redis_key = ae_consts.REDIS_KEY
    redis_password = ae_consts.REDIS_PASSWORD
    redis_db = ae_consts.REDIS_DB
    redis_expire = ae_consts.REDIS_EXPIRE
    strike = None
    contract_type = None
    get_pricing = True
    get_news = True
    get_options = True
    s3_enabled = True
    redis_enabled = True
    analysis_type = None
    backfill_date = None
    debug = False

    if args.ticker:
        ticker = args.ticker.upper()
    if args.ticker_id:
        ticker_id = args.ticker_id
    if args.exp_date_str:
        exp_date_str = ae_consts.NEXT_EXP_STR
    if args.broker_url:
        broker_url = args.broker_url
    if args.backend_url:
        backend_url = args.backend_url
    if args.s3_access_key:
        s3_access_key = args.s3_access_key
    if args.s3_secret_key:
        s3_secret_key = args.s3_secret_key
    if args.s3_region_name:
        s3_region_name = args.s3_region_name
    if args.s3_address:
        s3_address = args.s3_address
    if args.s3_secure:
        s3_secure = args.s3_secure
    if args.s3_bucket_name:
        s3_bucket_name = args.s3_bucket_name
    if args.keyname:
        s3_key = args.keyname
        redis_key = args.keyname
    if args.redis_address:
        redis_address = args.redis_address
    if args.redis_password:
        redis_password = args.redis_password
    if args.redis_db:
        redis_db = args.redis_db
    if args.redis_expire:
        redis_expire = args.redis_expire
    if args.strike:
        strike = args.strike
    if args.contract_type:
        contract_type = args.contract_type
    if args.get_pricing:
        get_pricing = args.get_pricing == '1'
    if args.get_news:
        get_news = args.get_news == '1'
    if args.get_options:
        get_options = args.get_options == '1'
    if args.s3_enabled:
        s3_enabled = args.s3_enabled == '1'
    if args.redis_enabled:
        redis_enabled = args.redis_enabled == '1'
    if args.fetch_mode:
        fetch_mode = str(args.fetch_mode).lower()
    if args.analysis_type:
        analysis_type = str(args.analysis_type).lower()
    if args.celery_enabled:
        run_offline = False
    if args.backfill_date:
        backfill_date = args.backfill_date
    if args.debug:
        debug = True

    work = api_requests.build_get_new_pricing_request()

    work['ticker'] = ticker
    work['ticker_id'] = ticker_id
    work['s3_bucket'] = s3_bucket_name
    work['s3_key'] = s3_key
    work['redis_key'] = redis_key
    work['strike'] = strike
    work['contract'] = contract_type
    work['exp_date'] = exp_date_str
    work['s3_access_key'] = s3_access_key
    work['s3_secret_key'] = s3_secret_key
    work['s3_region_name'] = s3_region_name
    work['s3_address'] = s3_address
    work['s3_secure'] = s3_secure
    work['redis_address'] = redis_address
    work['redis_password'] = redis_password
    work['redis_db'] = redis_db
    work['redis_expire'] = redis_expire
    work['get_pricing'] = get_pricing
    work['get_news'] = get_news
    work['get_options'] = get_options
    work['s3_enabled'] = s3_enabled
    work['redis_enabled'] = redis_enabled
    work['fetch_mode'] = fetch_mode
    work['analysis_type'] = analysis_type
    work['iex_datasets'] = iex_consts.DEFAULT_FETCH_DATASETS
    work['backfill_date'] = backfill_date
    work['debug'] = debug
    work['label'] = f'ticker={ticker}'

    if analysis_type == 'scn':
        label = f'screener={work["ticker"]}'
        fv_urls = []
        if args.urls:
            fv_urls = str(args.urls).split('|')
        if len(fv_urls) == 0:
            fv_urls = os.getenv('SCREENER_URLS', []).split('|')
        screener_req = api_requests.build_screener_analysis_request(
            ticker=ticker, fv_urls=fv_urls, label=label)
        work.update(screener_req)
        start_screener_analysis(req=work)
    # end of analysis_type
    else:
        last_close_date = ae_utils.last_close()
        last_close_str = last_close_date.strftime(ae_consts.COMMON_DATE_FORMAT)
        cache_base_key = f'{ticker}_{last_close_str}'
        if not args.keyname:
            work['s3_key'] = cache_base_key
            work['redis_key'] = cache_base_key

        path_to_tasks = 'analysis_engine.work_tasks'
        task_name = (f'{path_to_tasks}'
                     f'.get_new_pricing_data.get_new_pricing_data')
        task_res = None
        if ae_consts.is_celery_disabled() or run_offline:
            work['celery_disabled'] = True
            work['verbose'] = debug
            log.debug(f'starting without celery work={ae_consts.ppj(work)} '
                      f'offline={run_offline}')
            task_res = task_pricing.get_new_pricing_data(work)
            status_str = ae_consts.get_status(status=task_res['status'])

            cur_date = backfill_date
            if not backfill_date:
                cur_date = ae_utils.get_last_close_str()
            redis_arr = work["redis_address"].split(':')
            include_results = ''
            if debug:
                include_results = task_res['rec']
            if task_res['status'] == ae_consts.SUCCESS:
                if task_res['rec']['num_success'] == 0:
                    log.error(f'failed fetching ticker={work["ticker"]} '
                              f'from {fetch_mode} - please check the '
                              'environment variables')
                else:
                    log.info(f'done fetching ticker={work["ticker"]} '
                             f'mode={fetch_mode} '
                             f'status={status_str} '
                             f'err={task_res["err"]} {include_results}')
                    print('View keys in redis with:\n'
                          f'redis-cli -h {redis_arr[0]} '
                          'keys '
                          f'"{work["ticker"]}_{cur_date}*"')
            elif task_res['status'] == ae_consts.MISSING_TOKEN:
                print('Set an IEX or Tradier token: '
                      '\n'
                      '  export IEX_TOKEN=YOUR_IEX_TOKEN\n'
                      '  export TD_TOKEN=YOUR_TD_TOKEN\n')
            else:
                log.error(f'done fetching ticker={work["ticker"]} '
                          f'mode={fetch_mode} '
                          f'status={status_str} '
                          f'err={task_res["err"]}')
            # if/else debug
        else:
            log.debug(f'connecting to broker={broker_url} '
                      f'backend={backend_url}')

            # Get the Celery app
            app = get_celery_app.get_celery_app(
                name=__name__,
                auth_url=broker_url,
                backend_url=backend_url,
                path_to_config_module=celery_config_module,
                ssl_options=ssl_options,
                transport_options=transport_options,
                include_tasks=include_tasks)

            log.debug(f'calling task={task_name} - work={ae_consts.ppj(work)}')
            job_id = app.send_task(task_name, (work, ))
            log.debug(f'task={task_name} - job_id={job_id}')
    def publish_trading_history(self,
                                records_for_history,
                                pt_s3_access_key=None,
                                pt_s3_secret_key=None,
                                pt_s3_address=None,
                                pt_s3_region=None,
                                pt_s3_bucket=None,
                                pt_s3_key=None,
                                pt_s3_secure=ae_consts.NOT_SET,
                                **kwargs):
        """publish_trading_history

        Helper for publishing a trading history
        to another S3 service like AWS

        :param records_for_history: list of dictionaries
            for the history file
        :param pt_s3_access_key: access key
        :param pt_s3_secret_key: secret
        :param pt_s3_address: address
        :param pt_s3_region: region
        :param pt_s3_bucket: bucket
        :param pt_s3_key: key
        :param pt_s3_secure: secure flag
        :param kwargs: support for keyword arg dict
        """
        use_s3_access_key = self.pt_s3_access_key
        use_s3_secret_key = self.pt_s3_secret_key
        use_s3_address = self.pt_s3_address
        use_s3_region = self.pt_s3_region
        use_s3_bucket = self.pt_s3_bucket
        use_s3_key = self.pt_s3_key
        use_s3_secure = self.pt_s3_secure

        use_s3_enabled = kwargs.get('s3_enabled', True)
        use_redis_enabled = kwargs.get('redis_enabled', False)
        use_redis_address = kwargs.get('redis_address', None)
        use_redis_db = kwargs.get('redis_db', None)
        use_redis_key = kwargs.get('redis_key', None)
        use_redis_password = kwargs.get('redis_password', None)
        use_redis_expire = kwargs.get('redis_expire', None)
        use_redis_serializer = kwargs.get('redis_serializer', 'json')
        use_redis_encoding = kwargs.get('redis_encoding', 'utf-8')
        verbose = kwargs.get('verbose', False)

        if pt_s3_access_key:
            use_s3_access_key = pt_s3_access_key
        if pt_s3_secret_key:
            use_s3_secret_key = pt_s3_secret_key
        if pt_s3_address:
            use_s3_address = pt_s3_address
        if pt_s3_region:
            use_s3_region = pt_s3_region
        if pt_s3_bucket:
            use_s3_bucket = pt_s3_bucket
        if pt_s3_key:
            use_s3_key = pt_s3_key
        if pt_s3_secure != ae_consts.NOT_SET:
            use_s3_secure = pt_s3_secure

        rec = {
            'tickers': self.ticker,
            'version': int(ae_consts.ALGO_HISTORY_VERSION),
            'last_trade_date': ae_utils.get_last_close_str(),
            'algo_config_dict': self.config_dict,
            'algo_name': self.use_name,
            'created': ae_utils.utc_now_str(),
            self.ticker: records_for_history
        }

        num_bytes = len(str(rec))
        num_mb = ae_consts.get_mb(num_bytes)

        msg = (
            f'publish - {self.ticker} - {rec["last_trade_date"]} '
            # f'{use_s3_access_key} with: {use_s3_secret_key} '
            f's3_loc={use_s3_address}/{use_s3_bucket}/{use_s3_key} '
            f'mb={num_mb}MB')
        log.info(msg)

        publish.publish(data=rec,
                        label='pub',
                        df_compress=True,
                        compress=False,
                        convert_to_dict=False,
                        output_file=None,
                        redis_enabled=use_redis_enabled,
                        redis_key=use_redis_key,
                        redis_address=use_redis_address,
                        redis_db=use_redis_db,
                        redis_password=use_redis_password,
                        redis_expire=use_redis_expire,
                        redis_serializer=use_redis_serializer,
                        redis_encoding=use_redis_encoding,
                        s3_enabled=use_s3_enabled,
                        s3_key=use_s3_key,
                        s3_address=use_s3_address,
                        s3_bucket=use_s3_bucket,
                        s3_access_key=use_s3_access_key,
                        s3_secret_key=use_s3_secret_key,
                        s3_region_name=use_s3_region,
                        s3_secure=use_s3_secure,
                        slack_enabled=False,
                        verbose=verbose)
Exemple #11
0
def fetch_calls(work_dict, scrub_mode='sort-by-date'):
    """fetch_calls

    Fetch the Tradier daily data for a ticker and
    return it as a ``pandas.DataFrame``.

    :param work_dict: dictionary of args
    :param scrub_mode: type of scrubbing handler to run
    """
    datafeed_type = td_consts.DATAFEED_TD_CALLS
    ticker = work_dict.get('ticker', None)
    label = work_dict.get('label', None)
    exp_date = work_dict.get('exp_date', None)

    log.debug(f'{label} - call - scrub_mode={scrub_mode} '
              f'args={work_dict} ticker={ticker}')

    exp_date = opt_dates.option_expiration().strftime(
        ae_consts.COMMON_DATE_FORMAT)
    use_url = td_consts.TD_URLS['options'].format(ticker, exp_date)
    headers = td_consts.get_auth_headers()
    session = requests.Session()
    session.headers = headers
    res = url_helper.url_helper(sess=session).get(use_url)

    if res.status_code != requests.codes.OK:
        if res.status_code in [401, 403]:
            log.critical('Please check the TD_TOKEN is correct '
                         f'received {res.status_code} during '
                         'fetch for: calls')
        else:
            log.info(f'failed to get call with response={res} '
                     f'code={res.status_code} '
                     f'text={res.text}')
        return ae_consts.EMPTY, pd.DataFrame([{}])
    records = json.loads(res.text)
    org_records = records.get('options', {}).get('option', [])

    if len(org_records) == 0:
        log.info('failed to get call records ' f'text={res.text}')
        return ae_consts.EMPTY, pd.DataFrame([{}])

    options_list = []

    # assumes UTC conversion will work with the system clock
    created_minute = (
        datetime.datetime.utcnow() -
        datetime.timedelta(hours=5)).strftime('%Y-%m-%d %H:%M:00')
    last_close_date = ae_utils.get_last_close_str(fmt='%Y-%m-%d %H:%M:00')

    # hit bug where dates were None
    if not last_close_date:
        last_close_date = created_minute

    for node in org_records:
        node['date'] = last_close_date
        node['created'] = created_minute
        node['ticker'] = ticker
        if (node['option_type'] == 'call'
                and node['expiration_type'] == 'standard'):
            node['opt_type'] = int(ae_consts.OPTION_CALL)
            node['exp_date'] = node['expiration_date']

            new_node = {}
            for col in td_consts.TD_OPTION_COLUMNS:
                if col in node:
                    if col in td_consts.TD_EPOCH_COLUMNS:
                        # trade_date can be None
                        if node[col] == 0:
                            new_node[col] = None
                        else:
                            new_node[col] = ae_utils.epoch_to_dt(
                                epoch=node[col] / 1000,
                                use_utc=False,
                                convert_to_est=True).strftime(
                                    ae_consts.COMMON_TICK_DATE_FORMAT)
                            """
                            Debug epoch ms converter:
                            """
                            """
                            print('-----------')
                            print(col)
                            print(node[col])
                            print(new_node[col])
                            print('===========')
                            """
                        # if/else valid date
                    else:
                        new_node[col] = node[col]
                    # if date column to convert
                # if column is in the row
            # convert all columns

            options_list.append(new_node)
    # end of records

    full_df = pd.DataFrame(options_list).sort_values(by=['strike'],
                                                     ascending=True)
    num_chains = len(full_df.index)
    mid_chain_idx = int(num_chains / 2)
    low_idx = int(mid_chain_idx - 20)
    high_idx = int(mid_chain_idx + 30)
    if low_idx < 0:
        low_idx = 0
    if high_idx > num_chains:
        high_idx = num_chains

    df = full_df[low_idx:high_idx].copy().sort_values(
        by=['date', 'strike']).reset_index()

    scrubbed_df = scrub_utils.ingress_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=datafeed_type,
        msg_format='df={} date_str={}',
        ds_id=ticker,
        date_str=exp_date,
        df=df)

    return ae_consts.SUCCESS, scrubbed_df
Exemple #12
0
def get_data_from_yahoo(work_dict):
    """get_data_from_yahoo

    Get data from yahoo

    :param work_dict: request dictionary
    """
    label = 'get_data_from_yahoo'

    log.info(f'task - {label} - start work_dict={work_dict}')

    num_news_rec = 0
    num_option_calls = 0
    num_option_puts = 0
    cur_high = -1
    cur_low = -1
    cur_open = -1
    cur_close = -1
    cur_volume = -1

    rec = {
        'pricing': None,
        'options': None,
        'calls': None,
        'puts': None,
        'news': None,
        'exp_date': None,
        'publish_pricing_update': None,
        'date': None,
        'updated': None
    }
    res = {'status': NOT_RUN, 'err': None, 'rec': rec}
    log.error('sorry - yahoo is disabled and '
              'pinance is no longer supported '
              'https://github.com/neberej/pinance')
    return res

    try:

        ticker = work_dict.get('ticker', TICKER)
        exp_date = work_dict.get('exp_date', None)
        cur_strike = work_dict.get('strike', None)
        contract_type = str(work_dict.get('contract', 'C')).upper()
        get_pricing = work_dict.get('get_pricing', True)
        get_news = work_dict.get('get_news', True)
        get_options = work_dict.get('get_options', True)
        orient = work_dict.get('orient', 'records')
        label = work_dict.get('label', label)

        ticker_results = None
        num_news_rec = 0

        use_date = exp_date
        if not exp_date:
            exp_date = opt_dates.option_expiration(date=exp_date)
            use_date = exp_date.strftime('%Y-%m-%d')
        """
        Debug control flags

        Quickly turn specific fetches off:

        get_news = False
        get_pricing = False
        get_options = False

        """
        if get_pricing:
            log.info(f'{label} getting ticker={ticker} pricing')
            ticker_results.get_quotes()
            if ticker_results.quotes_data:
                pricing_dict = ticker_results.quotes_data

                cur_high = pricing_dict.get('regularMarketDayHigh', None)
                cur_low = pricing_dict.get('regularMarketDayLow', None)
                cur_open = pricing_dict.get('regularMarketOpen', None)
                cur_close = pricing_dict.get('regularMarketPreviousClose',
                                             None)
                cur_volume = pricing_dict.get('regularMarketVolume', None)
                pricing_dict['high'] = cur_high
                pricing_dict['low'] = cur_low
                pricing_dict['open'] = cur_open
                pricing_dict['close'] = cur_close
                pricing_dict['volume'] = cur_volume
                pricing_dict['date'] = get_last_close_str()
                if 'regularMarketTime' in pricing_dict:
                    pricing_dict['market_time'] = \
                        datetime.datetime.fromtimestamp(
                            pricing_dict['regularMarketTime']).strftime(
                                COMMON_TICK_DATE_FORMAT)
                if 'postMarketTime' in pricing_dict:
                    pricing_dict['post_market_time'] = \
                        datetime.datetime.fromtimestamp(
                            pricing_dict['postMarketTime']).strftime(
                                COMMON_TICK_DATE_FORMAT)

                log.info(f'{label} ticker={ticker} converting pricing to '
                         f'df orient={orient}')

                try:
                    rec['pricing'] = pricing_dict
                except Exception as f:
                    rec['pricing'] = '{}'
                    log.info(
                        f'{label} ticker={ticker} failed converting pricing '
                        f'data={ppj(pricing_dict)} to df ex={f}')
                # try/ex

                log.info(f'{label} ticker={ticker} done converting pricing to '
                         f'df orient={orient}')

            else:
                log.error(f'{label} ticker={ticker} '
                          f'missing quotes_data={ticker_results.quotes_data}')
            # end of if ticker_results.quotes_data

            log.info(
                f'{label} ticker={ticker} close={cur_close} vol={cur_volume}')
        else:
            log.info(f'{label} skip - getting ticker={ticker} pricing')
        # if get_pricing

        if get_news:
            log.info(f'{label} getting ticker={ticker} news')
            ticker_results.get_news()
            if ticker_results.news_data:
                news_list = None
                try:
                    news_list = ticker_results.news_data
                    log.info(f'{label} ticker={ticker} converting news to '
                             f'df orient={orient}')

                    num_news_rec = len(news_list)

                    rec['news'] = news_list
                except Exception as f:
                    rec['news'] = '{}'
                    log.info(f'{label} ticker={ticker} failed converting news '
                             f'data={news_list} to df ex={f}')
                # try/ex

                log.info(f'{label} ticker={ticker} done converting news to '
                         f'df orient={orient}')
            else:
                log.info(f'{label} ticker={ticker} Yahoo NO '
                         f'news={ticker_results.news_data}')
            # end of if ticker_results.news_data
        else:
            log.info(f'{label} skip - getting ticker={ticker} news')
        # end if get_news

        if get_options:

            get_all_strikes = True
            if get_all_strikes:
                cur_strike = None
            else:
                if cur_close:
                    cur_strike = int(cur_close)
                if not cur_strike:
                    cur_strike = 287

            log.info(
                f'{label} ticker={ticker} num_news={num_news_rec} get options '
                f'close={cur_close} exp_date={use_date} '
                f'contract={contract_type} strike={cur_strike}')

            options_dict = \
                yahoo_get_pricing.get_options(
                    ticker=ticker,
                    exp_date_str=use_date,
                    contract_type=contract_type,
                    strike=cur_strike)

            rec['options'] = '{}'

            try:
                log.info(f'{label} ticker={ticker} converting options to '
                         f'df orient={orient}')

                num_option_calls = options_dict.get('num_calls', None)
                num_option_puts = options_dict.get('num_puts', None)
                rec['options'] = {
                    'exp_date': options_dict.get('exp_date', None),
                    'calls': options_dict.get('calls', None),
                    'puts': options_dict.get('puts', None),
                    'num_calls': num_option_calls,
                    'num_puts': num_option_puts
                }
                rec['calls'] = rec['options'].get('calls', EMPTY_DF_STR)
                rec['puts'] = rec['options'].get('puts', EMPTY_DF_STR)
            except Exception as f:
                rec['options'] = '{}'
                log.info(f'{label} ticker={ticker} failed converting options '
                         f'data={options_dict} to df ex={f}')
            # try/ex

            log.info(f'{label} ticker={ticker} done converting options to '
                     f'df orient={orient} num_calls={num_option_calls} '
                     f'num_puts={num_option_puts}')

        else:
            log.info(f'{label} skip - getting ticker={ticker} options')
        # end of if get_options

        log.info(
            f'{label} yahoo pricing for ticker={ticker} close={cur_close} '
            f'num_calls={num_option_calls} num_puts={num_option_puts} '
            f'news={num_news_rec}')

        fields_to_upload = ['pricing', 'options', 'calls', 'puts', 'news']

        for field_name in fields_to_upload:
            upload_and_cache_req = copy.deepcopy(work_dict)
            upload_and_cache_req['celery_disabled'] = True
            upload_and_cache_req['data'] = rec[field_name]
            if not upload_and_cache_req['data']:
                upload_and_cache_req['data'] = '{}'

            if 'redis_key' in work_dict:
                upload_and_cache_req['redis_key'] = f'''{work_dict.get(
                        'redis_key',
                        f'{ticker}_{field_name}')}_{field_name}'''
            if 's3_key' in work_dict:
                upload_and_cache_req['s3_key'] = f'''{work_dict.get(
                        's3_key',
                        f'{ticker}_{field_name}')}_{field_name}'''
            try:
                update_res = publisher.run_publish_pricing_update(
                    work_dict=upload_and_cache_req)
                update_status = update_res.get('status', NOT_SET)
                log.info(f'{label} publish update '
                         f'status={get_status(status=update_status)} '
                         f'data={update_res}')
            except Exception:
                err = (f'{label} - failed to upload YAHOO '
                       f'data={upload_and_cache_req} to '
                       f's3_key={upload_and_cache_req["s3_key"]} and '
                       f'redis_key={upload_and_cache_req["redis_key"]}')
                log.error(err)
            # end of try/ex to upload and cache
            if not rec[field_name]:
                log.debug(f'{label} - ticker={ticker} no data from YAHOO for '
                          f'field_name={field_name}')
        # end of for all fields

        res = build_result.build_result(status=SUCCESS, err=None, rec=rec)
    except Exception as e:
        res = build_result.build_result(status=ERR,
                                        err=('failed - get_data_from_yahoo '
                                             f'dict={work_dict} with ex={e}'),
                                        rec=rec)
        log.error(f'{label} - {res["err"]}')
    # end of try/ex

    log.info('task - get_data_from_yahoo done - '
             f'{label} - status={get_status(res["status"])}')

    return res
Exemple #13
0
    def latest(self,
               date_str=None,
               start_row=-200,
               extract_iex=True,
               extract_yahoo=False,
               extract_td=True,
               verbose=False,
               **kwargs):
        """latest

        Run the algorithm with the latest pricing data. Also
        supports running a backtest for a historical date in
        the pricing history (format ``YYYY-MM-DD``)

        :param date_str: optional - string start date ``YYYY-MM-DD``
            default is the latest close date
        :param start_row: negative number of rows back
            from the end of the list in the data
            default is ``-200`` where this means the algorithm
            will process the latest 200 rows in the minute
            dataset
        :param extract_iex: bool flag for extracting from ``IEX``
        :param extract_yahoo: bool flag for extracting from ``Yahoo``
            which is disabled as of 1/2019
        :param extract_td: bool flag for extracting from ``Tradier``
        :param verbose: bool flag for logs
        :param kwargs: keyword arg dict
        """
        use_date_str = date_str
        if not use_date_str:
            use_date_str = ae_utils.get_last_close_str()

        log.info(f'creating algo')
        self.algo_obj = base_algo.BaseAlgo(
            ticker=self.config_dict['ticker'],
            balance=self.config_dict['balance'],
            commission=self.config_dict['commission'],
            name=self.use_name,
            start_date=self.use_start_date,
            end_date=self.use_end_date,
            auto_fill=self.auto_fill,
            config_dict=self.config_dict,
            load_from_s3_bucket=self.load_from_s3_bucket,
            load_from_s3_key=self.load_from_s3_key,
            load_from_redis_key=self.load_from_redis_key,
            load_from_file=self.load_from_file,
            load_compress=self.load_compress,
            load_publish=self.load_publish,
            load_config=self.load_config,
            report_redis_key=self.report_redis_key,
            report_s3_bucket=self.report_s3_bucket,
            report_s3_key=self.report_s3_key,
            report_file=self.report_file,
            report_compress=self.report_compress,
            report_publish=self.report_publish,
            report_config=self.report_config,
            history_redis_key=self.history_redis_key,
            history_s3_bucket=self.history_s3_bucket,
            history_s3_key=self.history_s3_key,
            history_file=self.history_file,
            history_compress=self.history_compress,
            history_publish=self.history_publish,
            history_config=self.history_config,
            extract_redis_key=self.extract_redis_key,
            extract_s3_bucket=self.extract_s3_bucket,
            extract_s3_key=self.extract_s3_key,
            extract_file=self.extract_file,
            extract_save_dir=self.extract_save_dir,
            extract_compress=self.extract_compress,
            extract_publish=self.extract_publish,
            extract_config=self.extract_config,
            publish_to_slack=self.publish_to_slack,
            publish_to_s3=self.publish_to_s3,
            publish_to_redis=self.publish_to_redis,
            dataset_type=self.dataset_type,
            serialize_datasets=self.serialize_datasets,
            compress=self.compress,
            encoding=self.encoding,
            redis_enabled=self.redis_enabled,
            redis_key=self.redis_key,
            redis_address=self.redis_address,
            redis_db=self.redis_db,
            redis_password=self.redis_password,
            redis_expire=self.redis_expire,
            redis_serializer=self.redis_serializer,
            redis_encoding=self.redis_encoding,
            s3_enabled=self.s3_enabled,
            s3_key=self.s3_key,
            s3_address=self.s3_address,
            s3_bucket=self.s3_bucket,
            s3_access_key=self.s3_access_key,
            s3_secret_key=self.s3_secret_key,
            s3_region_name=self.s3_region_name,
            s3_secure=self.s3_secure,
            slack_enabled=self.slack_enabled,
            slack_code_block=self.slack_code_block,
            slack_full_width=self.slack_full_width,
            dataset_publish_extract=self.extract_publish,
            dataset_publish_history=self.history_publish,
            dataset_publish_report=self.report_publish,
            run_on_engine=self.run_on_engine,
            auth_url=self.broker_url,
            backend_url=self.backend_url,
            include_tasks=self.include_tasks,
            ssl_options=self.ssl_options,
            transport_options=self.transport_options,
            path_to_config_module=self.path_to_config_module,
            timeseries=self.timeseries,
            trade_strategy=self.trade_strategy,
            verbose=False,
            raise_on_err=self.raise_on_err)

        log.info(f'run latest - start')

        ticker = self.config_dict['ticker']
        self.common_fetch_vals['base_key'] = f'{ticker}_{use_date_str}'
        extract_req = api_requests.get_ds_dict(
            ticker=ticker,
            base_key=self.common_fetch_vals['base_key'],
            ds_id=ticker,
            service_dict=self.common_fetch_vals)
        node_date_key = use_date_str.replace(f'{ticker}_', '')
        req = {
            'id': use_date_str,
            'ticker': ticker,
            'date_key': self.common_fetch_vals['base_key'],
            'date': node_date_key,
            'req': extract_req
        }
        # fetch
        iex_daily_df = None
        iex_minute_df = None
        iex_quote_df = None
        iex_stats_df = None
        iex_peers_df = None
        iex_news_df = None
        iex_financials_df = None
        iex_earnings_df = None
        iex_dividends_df = None
        iex_company_df = None
        yahoo_option_calls_df = None
        yahoo_option_puts_df = None
        yahoo_pricing_df = None
        yahoo_news_df = None
        td_calls_df = None
        td_puts_df = None

        node_date_key = req['date']
        dataset_node_id = req['id']
        dataset_id = dataset_node_id

        label = (f'ticker={ticker} ' f'date={node_date_key}')
        if verbose:
            log.info(f'{label} - extract - start')
        if 'daily' in self.iex_datasets or extract_iex:
            iex_daily_status, iex_daily_df = \
                iex_extract_utils.extract_daily_dataset(
                    extract_req)
            if iex_daily_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract iex_daily={ticker}')
        if 'minute' in self.iex_datasets or extract_iex:
            iex_minute_status, iex_minute_df = \
                iex_extract_utils.extract_minute_dataset(
                    extract_req)
            if iex_minute_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract iex_minute={ticker}')
        if 'quote' in self.iex_datasets or extract_iex:
            iex_quote_status, iex_quote_df = \
                iex_extract_utils.extract_quote_dataset(
                    extract_req)
            if iex_quote_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract iex_quote={ticker}')
        if 'stats' in self.iex_datasets or extract_iex:
            iex_stats_status, iex_stats_df = \
                iex_extract_utils.extract_stats_dataset(
                    extract_req)
            if iex_stats_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract iex_stats={ticker}')
        if 'peers' in self.iex_datasets or extract_iex:
            iex_peers_status, iex_peers_df = \
                iex_extract_utils.extract_peers_dataset(
                    extract_req)
            if iex_peers_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract iex_peers={ticker}')
        if 'news' in self.iex_datasets or extract_iex:
            iex_news_status, iex_news_df = \
                iex_extract_utils.extract_news_dataset(
                    extract_req)
            if iex_news_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract iex_news={ticker}')
        if 'financials' in self.iex_datasets or extract_iex:
            iex_financials_status, iex_financials_df = \
                iex_extract_utils.extract_financials_dataset(
                    extract_req)
            if iex_financials_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract iex_financials={ticker}')
        if 'earnings' in self.iex_datasets or extract_iex:
            iex_earnings_status, iex_earnings_df = \
                iex_extract_utils.extract_earnings_dataset(
                    extract_req)
            if iex_earnings_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract iex_earnings={ticker}')
        if 'dividends' in self.iex_datasets or extract_iex:
            iex_dividends_status, iex_dividends_df = \
                iex_extract_utils.extract_dividends_dataset(
                    extract_req)
            if iex_dividends_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract iex_dividends={ticker}')
        if 'company' in self.iex_datasets or extract_iex:
            iex_company_status, iex_company_df = \
                iex_extract_utils.extract_company_dataset(
                    extract_req)
            if iex_company_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract iex_company={ticker}')
        # end of iex extracts

        if extract_yahoo:
            yahoo_options_status, yahoo_option_calls_df = \
                yahoo_extract_utils.extract_option_calls_dataset(
                    extract_req)
            yahoo_options_status, yahoo_option_puts_df = \
                yahoo_extract_utils.extract_option_puts_dataset(
                    extract_req)
            if yahoo_options_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract yahoo_options={ticker}')
            yahoo_pricing_status, yahoo_pricing_df = \
                yahoo_extract_utils.extract_pricing_dataset(
                    extract_req)
            if yahoo_pricing_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract yahoo_pricing={ticker}')
            yahoo_news_status, yahoo_news_df = \
                yahoo_extract_utils.extract_yahoo_news_dataset(
                    extract_req)
            if yahoo_news_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract yahoo_news={ticker}')
        # end of yahoo extracts

        if extract_td:
            """
            Debug by setting:

            extract_req['verbose_td'] = True
            """
            convert_to_datetime = [
                'date', 'created', 'ask_date', 'bid_date', 'trade_date'
            ]
            td_calls_status, td_calls_df = \
                td_extract_utils.extract_option_calls_dataset(
                    extract_req)
            if td_calls_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract tdcalls={ticker}')
            else:
                if ae_consts.is_df(df=td_calls_df):
                    for c in convert_to_datetime:
                        if c in td_calls_df:
                            td_calls_df[c] = pd.to_datetime(
                                td_calls_df[c],
                                format=ae_consts.COMMON_TICK_DATE_FORMAT)
                    if 'date' in td_calls_df:
                        td_calls_df.sort_values('date', ascending=True)
            # end of converting dates

            td_puts_status, td_puts_df = \
                td_extract_utils.extract_option_puts_dataset(
                    extract_req)
            if td_puts_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(f'unable to extract tdputs={ticker}')
            else:
                if ae_consts.is_df(df=td_puts_df):
                    for c in convert_to_datetime:
                        if c in td_puts_df:
                            td_puts_df[c] = pd.to_datetime(
                                td_puts_df[c],
                                format=ae_consts.COMMON_TICK_DATE_FORMAT)
                    if 'date' in td_puts_df:
                        td_puts_df.sort_values('date', ascending=True)
            # end of converting dates
        # td extracts

        # map extracted data to DEFAULT_SERIALIZED_DATASETS
        ticker_data = {}
        ticker_data['daily'] = iex_daily_df
        ticker_data['minute'] = iex_minute_df
        ticker_data['quote'] = iex_quote_df
        ticker_data['stats'] = iex_stats_df
        ticker_data['peers'] = iex_peers_df
        ticker_data['news1'] = iex_news_df
        ticker_data['financials'] = iex_financials_df
        ticker_data['earnings'] = iex_earnings_df
        ticker_data['dividends'] = iex_dividends_df
        ticker_data['company'] = iex_company_df
        ticker_data['calls'] = yahoo_option_calls_df
        ticker_data['puts'] = yahoo_option_puts_df
        ticker_data['pricing'] = yahoo_pricing_df
        ticker_data['news'] = yahoo_news_df
        ticker_data['tdcalls'] = td_calls_df
        ticker_data['tdputs'] = td_puts_df

        algo_data_req = {
            ticker: [{
                'id': dataset_id,  # id is currently the cache key in redis
                'date': use_date_str,  # used to confirm dates in asc order
                'data': ticker_data,
                'start_row': start_row
            }]
        }

        if verbose:
            log.info(f'extract - {label} '
                     f'dataset={len(algo_data_req[ticker])}')

        # this could be a separate celery task
        try:
            if verbose:
                log.info(f'handle_data START - {label} from '
                         f'{node_date_key}')
            self.algo_obj.handle_data(data=algo_data_req)
            if verbose:
                log.info(f'handle_data END - {label} from ' f'{node_date_key}')
        except Exception as e:
            a_name = self.algo_obj.get_name()
            a_debug_msg = self.algo_obj.get_debug_msg()
            if not a_debug_msg:
                a_debug_msg = 'debug message not set'
            # a_config_dict = ae_consts.ppj(self.algo_obj.config_dict)
            msg = (f'{label} - algo={a_name} '
                   f'encountered exception in handle_data tickers={ticker} '
                   f'from {node_date_key} ex={e} '
                   f'and failed during operation: {a_debug_msg}')
            log.critical(f'{msg}')
        # end try/ex

        log.info(f'run latest - create history')

        history_ds = self.algo_obj.create_history_dataset()
        self.history_df = pd.DataFrame(history_ds[ticker])
        self.determine_latest_times_in_history()

        self.num_rows = len(self.history_df.index)

        if verbose:
            log.info(self.history_df[['minute', 'close']].tail(5))

        log.info(f'run latest minute={self.end_date} - '
                 f'rows={self.num_rows} - done')

        return self.get_history()
Exemple #14
0
        else:
            break

    fixed_df = pd.DataFrame(new_recs)

    if 'date' in fixed_df:
        fixed_df.sort_values(by=['date'], ascending=True).reset_index()

    print(fixed_df)
    return fixed_df


# end of fix_df

use_redis_address = ae_consts.REDIS_ADDRESS
last_close_str = ae_utils.get_last_close_str(ae_consts.COMMON_DATE_FORMAT)
use_date_str = last_close_str

src_date = '2019-02-15'
dst_date = src_date
dst_date = '2019-02-14'
tickers = ['SPY']
for ticker in tickers:

    log.info(f'extracting src df for ticker: {ticker}')

    res = None

    # get from a date or the latest if not set
    if src_date:
        use_key = f'{ticker}_{src_date}'
def get_data_from_yahoo(work_dict):
    """get_data_from_yahoo

    Get data from yahoo

    :param work_dict: request dictionary
    """
    label = 'get_data_from_yahoo'

    log.info('task - {} - start ' 'work_dict={}'.format(label, work_dict))

    num_news_rec = 0
    num_option_calls = 0
    num_option_puts = 0
    cur_high = -1
    cur_low = -1
    cur_open = -1
    cur_close = -1
    cur_volume = -1

    rec = {
        'pricing': None,
        'options': None,
        'calls': None,
        'puts': None,
        'news': None,
        'exp_date': None,
        'publish_pricing_update': None,
        'date': None,
        'updated': None
    }
    res = {'status': NOT_RUN, 'err': None, 'rec': rec}

    try:

        ticker = work_dict.get('ticker', TICKER)
        exp_date = work_dict.get('exp_date', None)
        cur_strike = work_dict.get('strike', None)
        contract_type = str(work_dict.get('contract', 'C')).upper()
        get_pricing = work_dict.get('get_pricing', True)
        get_news = work_dict.get('get_news', True)
        get_options = work_dict.get('get_options', True)
        orient = work_dict.get('orient', 'records')
        label = work_dict.get('label', label)

        ticker_results = pinance.Pinance(ticker)
        num_news_rec = 0

        use_date = exp_date
        if not exp_date:
            exp_date = opt_dates.option_expiration(date=exp_date)
            use_date = exp_date.strftime('%Y-%m-%d')
        """
        Debug control flags

        Quickly turn specific fetches off:

        get_news = False
        get_pricing = False
        get_options = False

        """
        if get_pricing:
            log.info('{} getting ticker={} pricing'.format(label, ticker))
            ticker_results.get_quotes()
            if ticker_results.quotes_data:
                pricing_dict = ticker_results.quotes_data

                cur_high = pricing_dict.get('regularMarketDayHigh', None)
                cur_low = pricing_dict.get('regularMarketDayLow', None)
                cur_open = pricing_dict.get('regularMarketOpen', None)
                cur_close = pricing_dict.get('regularMarketPreviousClose',
                                             None)
                cur_volume = pricing_dict.get('regularMarketVolume', None)
                pricing_dict['high'] = cur_high
                pricing_dict['low'] = cur_low
                pricing_dict['open'] = cur_open
                pricing_dict['close'] = cur_close
                pricing_dict['volume'] = cur_volume
                pricing_dict['date'] = get_last_close_str()
                if 'regularMarketTime' in pricing_dict:
                    pricing_dict['market_time'] = \
                        datetime.datetime.fromtimestamp(
                            pricing_dict['regularMarketTime']).strftime(
                                COMMON_TICK_DATE_FORMAT)
                if 'postMarketTime' in pricing_dict:
                    pricing_dict['post_market_time'] = \
                        datetime.datetime.fromtimestamp(
                            pricing_dict['postMarketTime']).strftime(
                                COMMON_TICK_DATE_FORMAT)

                log.info('{} ticker={} converting pricing to '
                         'df orient={}'.format(label, ticker, orient))

                try:
                    rec['pricing'] = pricing_dict
                except Exception as f:
                    rec['pricing'] = '{}'
                    log.info('{} ticker={} failed converting pricing '
                             'data={} to df ex={}'.format(
                                 label, ticker, ppj(pricing_dict), f))
                # try/ex

                log.info('{} ticker={} done converting pricing to '
                         'df orient={}'.format(label, ticker, orient))

            else:
                log.error('{} ticker={} missing quotes_data={}'.format(
                    label, ticker, ticker_results.quotes_data))
            # end of if ticker_results.quotes_data

            log.info('{} ticker={} close={} vol={}'.format(
                label, ticker, cur_close, cur_volume))
        else:
            log.info('{} skip - getting ticker={} pricing'.format(
                label, ticker, get_pricing))
        # if get_pricing

        if get_news:
            log.info('{} getting ticker={} news'.format(label, ticker))
            ticker_results.get_news()
            if ticker_results.news_data:
                news_list = None
                try:
                    news_list = ticker_results.news_data
                    log.info('{} ticker={} converting news to '
                             'df orient={}'.format(label, ticker, orient))

                    num_news_rec = len(news_list)

                    rec['news'] = news_list
                except Exception as f:
                    rec['news'] = '{}'
                    log.info('{} ticker={} failed converting news '
                             'data={} to df ex={}'.format(
                                 label, ticker, news_list, f))
                # try/ex

                log.info('{} ticker={} done converting news to '
                         'df orient={}'.format(label, ticker, orient))
            else:
                log.info('{} ticker={} Yahoo NO news={}'.format(
                    label, ticker, ticker_results.news_data))
            # end of if ticker_results.news_data
        else:
            log.info('{} skip - getting ticker={} news'.format(label, ticker))
        # end if get_news

        if get_options:

            get_all_strikes = True
            if get_all_strikes:
                cur_strike = None
            else:
                if cur_close:
                    cur_strike = int(cur_close)
                if not cur_strike:
                    cur_strike = 287

            log.info('{} ticker={} num_news={} get options close={} '
                     'exp_date={} contract={} strike={}'.format(
                         label, ticker, num_news_rec, cur_close, use_date,
                         contract_type, cur_strike))

            options_dict = \
                yahoo_get_pricing.get_options(
                    ticker=ticker,
                    exp_date_str=use_date,
                    contract_type=contract_type,
                    strike=cur_strike)

            rec['options'] = '{}'

            try:
                log.info('{} ticker={} converting options to '
                         'df orient={}'.format(label, ticker, orient))

                num_option_calls = options_dict.get('num_calls', None)
                num_option_puts = options_dict.get('num_puts', None)
                rec['options'] = {
                    'exp_date': options_dict.get('exp_date', None),
                    'calls': options_dict.get('calls', None),
                    'puts': options_dict.get('puts', None),
                    'num_calls': num_option_calls,
                    'num_puts': num_option_puts
                }
                rec['calls'] = rec['options'].get('calls', EMPTY_DF_STR)
                rec['puts'] = rec['options'].get('puts', EMPTY_DF_STR)
            except Exception as f:
                rec['options'] = '{}'
                log.info('{} ticker={} failed converting options '
                         'data={} to df ex={}'.format(label, ticker,
                                                      options_dict, f))
            # try/ex

            log.info('{} ticker={} done converting options to '
                     'df orient={} num_calls={} num_puts={}'.format(
                         label, ticker, orient, num_option_calls,
                         num_option_puts))

        else:
            log.info('{} skip - getting ticker={} options'.format(
                label, ticker))
        # end of if get_options

        log.info('{} yahoo pricing for ticker={} close={} '
                 'num_calls={} num_puts={} news={}'.format(
                     label, ticker, cur_close, num_option_calls,
                     num_option_puts, num_news_rec))

        fields_to_upload = ['pricing', 'options', 'calls', 'puts', 'news']

        for field_name in fields_to_upload:
            upload_and_cache_req = copy.deepcopy(work_dict)
            upload_and_cache_req['celery_disabled'] = True
            upload_and_cache_req['data'] = rec[field_name]
            if not upload_and_cache_req['data']:
                upload_and_cache_req['data'] = '{}'

            if 'redis_key' in work_dict:
                upload_and_cache_req['redis_key'] = '{}_{}'.format(
                    work_dict.get('redis_key',
                                  '{}_{}'.format(ticker, field_name)),
                    field_name)
            if 's3_key' in work_dict:
                upload_and_cache_req['s3_key'] = '{}_{}'.format(
                    work_dict.get('s3_key', '{}_{}'.format(ticker,
                                                           field_name)),
                    field_name)
            try:
                update_res = publisher.run_publish_pricing_update(
                    work_dict=upload_and_cache_req)
                update_status = update_res.get('status', NOT_SET)
                log.info('{} publish update status={} data={}'.format(
                    label, get_status(status=update_status), update_res))
            except Exception as f:
                err = ('{} - failed to upload YAHOO data={} to '
                       'to s3_key={} and redis_key={}'.format(
                           label, upload_and_cache_req,
                           upload_and_cache_req['s3_key'],
                           upload_and_cache_req['redis_key']))
                log.error(err)
            # end of try/ex to upload and cache
            if not rec[field_name]:
                log.debug('{} - ticker={} no data from YAHOO for '
                          'field_name={}'.format(label, ticker, field_name))
        # end of for all fields

        res = build_result.build_result(status=SUCCESS, err=None, rec=rec)
    except Exception as e:
        res = build_result.build_result(status=ERR,
                                        err=('failed - get_data_from_yahoo '
                                             'dict={} with ex={}').format(
                                                 work_dict, e),
                                        rec=rec)
        log.error('{} - {}'.format(label, res['err']))
    # end of try/ex

    log.info('task - get_data_from_yahoo done - '
             '{} - status={}'.format(label, get_status(res['status'])))

    return res
def run_algo(
        ticker=None,
        tickers=None,
        algo=None,  # optional derived ``analysis_engine.algo.Algo`` instance
        balance=None,  # float starting base capital
        commission=None,  # float for single trade commission for buy or sell
        start_date=None,  # string YYYY-MM-DD HH:MM:SS
        end_date=None,  # string YYYY-MM-DD HH:MM:SS
        datasets=None,  # string list of identifiers
        num_owned_dict=None,  # not supported
        cache_freq='daily',  # 'minute' not supported
        auto_fill=True,
        load_config=None,
        report_config=None,
        history_config=None,
        extract_config=None,
        use_key=None,
        extract_mode='all',
        iex_datasets=None,
        redis_enabled=True,
        redis_address=None,
        redis_db=None,
        redis_password=None,
        redis_expire=None,
        redis_key=None,
        s3_enabled=True,
        s3_address=None,
        s3_bucket=None,
        s3_access_key=None,
        s3_secret_key=None,
        s3_region_name=None,
        s3_secure=False,
        s3_key=None,
        celery_disabled=True,
        broker_url=None,
        result_backend=None,
        label=None,
        name=None,
        timeseries=None,
        trade_strategy=None,
        verbose=False,
        publish_to_slack=True,
        publish_to_s3=True,
        publish_to_redis=True,
        extract_datasets=None,
        config_file=None,
        config_dict=None,
        version=1,
        raise_on_err=True,
        **kwargs):
    """run_algo

    Run an algorithm with steps:

        1) Extract redis keys between dates
        2) Compile a data pipeline dictionary (call it ``data``)
        3) Call algorithm's ``myalgo.handle_data(data=data)``

    .. note:: If no ``algo`` is set, the
        ``analysis_engine.algo.BaseAlgo`` algorithm
        is used.

    .. note:: Please ensure Redis and Minio are running
        before trying to extract tickers

    **Stock tickers to extract**

    :param ticker: single stock ticker/symbol/ETF to extract
    :param tickers: optional - list of tickers to extract
    :param use_key: optional - extract historical key from Redis

    **Algo Configuration**

    :param algo: derived instance of ``analysis_engine.algo.Algo`` object
    :param balance: optional - float balance parameter
        can also be set on the ``algo`` object if not
        set on the args
    :param commission: float for single trade commission for
        buy or sell. can also be set on the ``algo`` objet
    :param start_date: string ``YYYY-MM-DD_HH:MM:SS`` cache value
    :param end_date: string ``YYYY-MM-DD_HH:MM:SS`` cache value
    :param dataset_types: list of strings that are ``iex`` or ``yahoo``
        datasets that are cached.
    :param cache_freq: optional - depending on if you are running data feeds
        on a ``daily`` cron (default) vs every ``minute`` (or faster)
    :param num_owned_dict: not supported yet
    :param auto_fill: optional - boolean for auto filling
        buy/sell orders for backtesting (default is
        ``True``)
    :param trading_calendar: ``trading_calendar.TradingCalendar``
        object, by default ``analysis_engine.calendars.
        always_open.AlwaysOpen`` trading calendar
        # TradingCalendar by ``TFSExchangeCalendar``
    :param config_file: path to a json file
        containing custom algorithm object
        member values (like indicator configuration and
        predict future date units ahead for a backtest)
    :param config_dict: optional - dictionary that
        can be passed to derived class implementations
        of: ``def load_from_config(config_dict=config_dict)``

    **Timeseries**

    :param timeseries: optional - string to
        set ``day`` or ``minute`` backtesting
        or live trading
        (default is ``minute``)

    **Trading Strategy**

    :param trade_strategy: optional - string to
        set the type of ``Trading Strategy``
        for backtesting or live trading
        (default is ``count``)

    **Algorithm Dataset Loading, Extracting, Reporting
    and Trading History arguments**

    :param load_config: optional - dictionary
        for setting member variables to load an
        agorithm-ready dataset from
        a file, s3 or redis
    :param report_config: optional - dictionary
        for setting member variables to publish
        an algo ``trading performance report`` to s3,
        redis, a file or slack
    :param history_config: optional - dictionary
        for setting member variables to publish
        an algo ``trade history`` to s3, redis, a file
        or slack
    :param extract_config: optional - dictionary
        for setting member variables to publish
        an algo ``trading performance report`` to s3,
        redis, a file or slack

    **(Optional) Data sources, datafeeds and datasets to gather**

    :param iex_datasets: list of strings for gathering specific `IEX
        datasets <https://iexcloud.io/>`__
        which are set as consts: ``analysis_engine.iex.consts.FETCH_*``.

    **(Optional) Redis connectivity arguments**

    :param redis_enabled: bool - toggle for auto-caching all
        datasets in Redis
        (default is ``True``)
    :param redis_address: Redis connection string
        format is ``host:port``
        (default is ``localhost:6379``)
    :param redis_db: Redis db to use
        (default is ``0``)
    :param redis_password: optional - Redis password
        (default is ``None``)
    :param redis_expire: optional - Redis expire value
        (default is ``None``)
    :param redis_key: optional - redis key not used
        (default is ``None``)

    **(Optional) Minio (S3) connectivity arguments**

    :param s3_enabled: bool - toggle for auto-archiving on Minio (S3)
        (default is ``True``)
    :param s3_address: Minio S3 connection string
        format ``host:port``
        (default is ``localhost:9000``)
    :param s3_bucket: S3 Bucket for storing the artifacts
        (default is ``dev``) which should be viewable on a browser:
        http://localhost:9000/minio/dev/
    :param s3_access_key: S3 Access key
        (default is ``trexaccesskey``)
    :param s3_secret_key: S3 Secret key
        (default is ``trex123321``)
    :param s3_region_name: S3 region name
        (default is ``us-east-1``)
    :param s3_secure: Transmit using tls encryption
        (default is ``False``)
    :param s3_key: optional s3 key not used
        (default is ``None``)

    **(Optional) Celery worker broker connectivity arguments**

    :param celery_disabled: bool - toggle synchronous mode or publish
        to an engine connected to the `Celery broker and backend
        <https://github.com/celery/celery#transports-and-backends>`__
        (default is ``True`` - synchronous mode without an engine
        or need for a broker or backend for Celery)
    :param broker_url: Celery broker url
        (default is ``redis://0.0.0.0:6379/13``)
    :param result_backend: Celery backend url
        (default is ``redis://0.0.0.0:6379/14``)
    :param label: tracking log label
    :param publish_to_slack: optional - boolean for
        publishing to slack (coming soon)
    :param publish_to_s3: optional - boolean for
        publishing to s3 (coming soon)
    :param publish_to_redis: optional - boolean for
        publishing to redis (coming soon)

    **(Optional) Debugging**

    :param verbose: bool - show extract warnings
        and other debug logging (default is False)
    :param raise_on_err: optional - boolean for
        unittests and developing algorithms with the
        ``analysis_engine.run_algo.run_algo`` helper.
        When set to ``True`` exceptions will
        are raised to the calling functions

    :param kwargs: keyword arguments dictionary
    """

    # dictionary structure with a list sorted on: ascending dates
    # algo_data_req[ticker][list][dataset] = pd.DataFrame
    algo_data_req = {}
    extract_requests = []
    return_algo = False  # return created algo objects for use by caller
    rec = {}
    msg = None

    use_tickers = tickers
    use_balance = balance
    use_commission = commission

    if ticker:
        use_tickers = [ticker]
    else:
        if not use_tickers:
            use_tickers = []

    # if these are not set as args, but the algo object
    # has them, use them instead:
    if algo:
        if len(use_tickers) == 0:
            use_tickers = algo.get_tickers()
        if not use_balance:
            use_balance = algo.get_balance()
        if not use_commission:
            use_commission = algo.get_commission()

    default_iex_datasets = [
        'daily', 'minute', 'quote', 'stats', 'peers', 'news', 'financials',
        'earnings', 'dividends', 'company'
    ]

    if not iex_datasets:
        iex_datasets = default_iex_datasets

    if redis_enabled:
        if not redis_address:
            redis_address = os.getenv('REDIS_ADDRESS', 'localhost:6379')
        if not redis_password:
            redis_password = os.getenv('REDIS_PASSWORD', None)
        if not redis_db:
            redis_db = int(os.getenv('REDIS_DB', '0'))
        if not redis_expire:
            redis_expire = os.getenv('REDIS_EXPIRE', None)
    if s3_enabled:
        if not s3_address:
            s3_address = os.getenv('S3_ADDRESS', 'localhost:9000')
        if not s3_access_key:
            s3_access_key = os.getenv('AWS_ACCESS_KEY_ID', 'trexaccesskey')
        if not s3_secret_key:
            s3_secret_key = os.getenv('AWS_SECRET_ACCESS_KEY', 'trex123321')
        if not s3_region_name:
            s3_region_name = os.getenv('AWS_DEFAULT_REGION', 'us-east-1')
        if not s3_secure:
            s3_secure = os.getenv('S3_SECURE', '0') == '1'
        if not s3_bucket:
            s3_bucket = os.getenv('S3_BUCKET', 'dev')
    if not broker_url:
        broker_url = os.getenv('WORKER_BROKER_URL', 'redis://0.0.0.0:6379/11')
    if not result_backend:
        result_backend = os.getenv('WORKER_BACKEND_URL',
                                   'redis://0.0.0.0:6379/12')

    if not label:
        label = 'run-algo'

    num_tickers = len(use_tickers)
    last_close_str = ae_utils.get_last_close_str()

    if iex_datasets:
        if verbose:
            log.info(f'{label} - tickers={num_tickers} '
                     f'iex={json.dumps(iex_datasets)}')
    else:
        if verbose:
            log.info(f'{label} - tickers={num_tickers}')

    ticker_key = use_key
    if not ticker_key:
        ticker_key = f'{ticker}_{last_close_str}'

    if not algo:
        algo = base_algo.BaseAlgo(ticker=None,
                                  tickers=use_tickers,
                                  balance=use_balance,
                                  commission=use_commission,
                                  config_dict=config_dict,
                                  name=label,
                                  auto_fill=auto_fill,
                                  timeseries=timeseries,
                                  trade_strategy=trade_strategy,
                                  publish_to_slack=publish_to_slack,
                                  publish_to_s3=publish_to_s3,
                                  publish_to_redis=publish_to_redis,
                                  raise_on_err=raise_on_err)
        return_algo = True
        # the algo object is stored
        # in the result at: res['rec']['algo']

    if not algo:
        msg = f'{label} - missing algo object'
        log.error(msg)
        return build_result.build_result(status=ae_consts.EMPTY,
                                         err=msg,
                                         rec=rec)

    if raise_on_err:
        log.debug(f'{label} - enabling algo exception raises')
        algo.raise_on_err = True

    indicator_datasets = algo.get_indicator_datasets()
    if len(indicator_datasets) == 0:
        indicator_datasets = ae_consts.BACKUP_DATASETS
        log.info(f'using all datasets={indicator_datasets}')

    verbose_extract = False
    if config_dict:
        verbose_extract = config_dict.get('verbose_extract', False)

    common_vals = {}
    common_vals['base_key'] = ticker_key
    common_vals['celery_disabled'] = celery_disabled
    common_vals['ticker'] = ticker
    common_vals['label'] = label
    common_vals['iex_datasets'] = iex_datasets
    common_vals['s3_enabled'] = s3_enabled
    common_vals['s3_bucket'] = s3_bucket
    common_vals['s3_address'] = s3_address
    common_vals['s3_secure'] = s3_secure
    common_vals['s3_region_name'] = s3_region_name
    common_vals['s3_access_key'] = s3_access_key
    common_vals['s3_secret_key'] = s3_secret_key
    common_vals['s3_key'] = ticker_key
    common_vals['redis_enabled'] = redis_enabled
    common_vals['redis_address'] = redis_address
    common_vals['redis_password'] = redis_password
    common_vals['redis_db'] = redis_db
    common_vals['redis_key'] = ticker_key
    common_vals['redis_expire'] = redis_expire

    use_start_date_str = start_date
    use_end_date_str = end_date
    last_close_date = ae_utils.last_close()
    end_date_val = None

    cache_freq_fmt = ae_consts.COMMON_TICK_DATE_FORMAT

    if not use_end_date_str:
        use_end_date_str = last_close_date.strftime(cache_freq_fmt)

    end_date_val = ae_utils.get_date_from_str(date_str=use_end_date_str,
                                              fmt=cache_freq_fmt)
    start_date_val = None

    if not use_start_date_str:
        start_date_val = end_date_val - datetime.timedelta(days=60)
        use_start_date_str = start_date_val.strftime(cache_freq_fmt)
    else:
        start_date_val = datetime.datetime.strptime(
            use_start_date_str, ae_consts.COMMON_TICK_DATE_FORMAT)

    total_dates = (end_date_val - start_date_val).days

    if end_date_val < start_date_val:
        msg = (
            f'{label} - invalid dates - start_date={start_date_val} is after '
            f'end_date={end_date_val}')
        raise Exception(msg)

    if verbose:
        log.info(f'{label} - days={total_dates} '
                 f'start={use_start_date_str} '
                 f'end={use_end_date_str} '
                 f'datasets={indicator_datasets}')

    for ticker in use_tickers:
        req = algo_utils.build_algo_request(ticker=ticker,
                                            use_key=use_key,
                                            start_date=use_start_date_str,
                                            end_date=use_end_date_str,
                                            datasets=datasets,
                                            balance=use_balance,
                                            cache_freq=cache_freq,
                                            timeseries=timeseries,
                                            trade_strategy=trade_strategy,
                                            label=label)
        ticker_key = f'{ticker}_{last_close_str}'
        common_vals['ticker'] = ticker
        common_vals['base_key'] = ticker_key
        common_vals['redis_key'] = ticker_key
        common_vals['s3_key'] = ticker_key

        for date_key in req['extract_datasets']:
            date_req = api_requests.get_ds_dict(ticker=ticker,
                                                base_key=date_key,
                                                ds_id=label,
                                                service_dict=common_vals)
            node_date_key = date_key.replace(f'{ticker}_', '')
            extract_requests.append({
                'id': date_key,
                'ticker': ticker,
                'date_key': date_key,
                'date': node_date_key,
                'req': date_req
            })
    # end of for all ticker in use_tickers

    first_extract_date = None
    last_extract_date = None
    total_extract_requests = len(extract_requests)
    cur_idx = 1
    for idx, extract_node in enumerate(extract_requests):

        extract_ticker = extract_node['ticker']
        extract_date = extract_node['date']
        ds_node_id = extract_node['id']

        if not first_extract_date:
            first_extract_date = extract_date
        last_extract_date = extract_date
        perc_progress = ae_consts.get_percent_done(
            progress=cur_idx, total=total_extract_requests)
        percent_label = (f'{label} '
                         f'ticker={extract_ticker} '
                         f'date={extract_date} '
                         f'{perc_progress} '
                         f'{idx}/{total_extract_requests} '
                         f'{indicator_datasets}')
        if verbose:
            log.info(f'extracting - {percent_label}')

        ticker_bt_data = build_ds_node.build_dataset_node(
            ticker=extract_ticker,
            date=extract_date,
            service_dict=common_vals,
            datasets=indicator_datasets,
            log_label=label,
            verbose=verbose_extract)

        if ticker not in algo_data_req:
            algo_data_req[ticker] = []

        algo_data_req[ticker].append({
            'id': ds_node_id,  # id is currently the cache key in redis
            'date': extract_date,  # used to confirm dates in asc order
            'data': ticker_bt_data
        })

        if verbose:
            log.info(f'extract - {percent_label} '
                     f'dataset={len(algo_data_req[ticker])}')
        cur_idx += 1
    # end of for service_dict in extract_requests

    # this could be a separate celery task
    status = ae_consts.NOT_RUN
    if len(algo_data_req) == 0:
        msg = (f'{label} - nothing to test - no data found for '
               f'tickers={use_tickers} '
               f'between {first_extract_date} and {last_extract_date}')
        log.info(msg)
        return build_result.build_result(status=ae_consts.EMPTY,
                                         err=msg,
                                         rec=rec)

    # this could be a separate celery task
    try:
        if verbose:
            log.info(f'handle_data START - {percent_label} from '
                     f'{first_extract_date} to {last_extract_date}')
        algo.handle_data(data=algo_data_req)
        if verbose:
            log.info(f'handle_data END - {percent_label} from '
                     f'{first_extract_date} to {last_extract_date}')
    except Exception as e:
        a_name = algo.get_name()
        a_debug_msg = algo.get_debug_msg()
        if not a_debug_msg:
            a_debug_msg = 'debug message not set'
        a_config_dict = ae_consts.ppj(algo.config_dict)
        msg = (f'{percent_label} - algo={a_name} '
               f'encountered exception in handle_data tickers={use_tickers} '
               f'from {first_extract_date} to {last_extract_date} ex={e} '
               f'and failed during operation: {a_debug_msg}')
        if raise_on_err:
            if algo:
                try:
                    ind_obj = \
                        algo.get_indicator_process_last_indicator()
                    if ind_obj:
                        ind_obj_path = ind_obj.get_path_to_module()
                        ind_obj_config = ae_consts.ppj(ind_obj.get_config())
                        found_error_hint = False
                        if hasattr(ind_obj.use_df, 'to_json'):
                            if len(ind_obj.use_df.index) == 0:
                                log.critical(
                                    f'indicator failure report for '
                                    f'last module: '
                                    f'{ind_obj_path} '
                                    f'indicator={ind_obj.get_name()} '
                                    f'config={ind_obj_config} '
                                    f'dataset={ind_obj.use_df.head(5)} '
                                    f'name_of_dataset={ind_obj.uses_data}')
                                log.critical(
                                    '--------------------------------------'
                                    '--------------------------------------')
                                log.critical('Please check if this indicator: '
                                             f'{ind_obj_path} '
                                             'supports Empty Dataframes')
                                log.critical(
                                    '--------------------------------------'
                                    '--------------------------------------')
                                found_error_hint = True
                        # indicator error hints

                        if not found_error_hint:
                            log.critical(
                                f'indicator failure report for last module: '
                                f'{ind_obj_path} '
                                f'indicator={ind_obj.get_name()} '
                                f'config={ind_obj_config} '
                                f'dataset={ind_obj.use_df.head(5)} '
                                f'name_of_dataset={ind_obj.uses_data}')
                except Exception as f:
                    log.critical(f'failed to pull indicator processor '
                                 f'last indicator for debugging '
                                 f'from ex={e} with parsing ex={f}')
                # end of ignoring non-supported ways of creating
                # indicator processors
            log.error(msg)
            log.error(f'algo failure report: '
                      f'algo={a_name} handle_data() '
                      f'config={a_config_dict} ')
            log.critical(f'algo failed during operation: {a_debug_msg}')
            raise e
        else:
            log.error(msg)
            return build_result.build_result(status=ae_consts.ERR,
                                             err=msg,
                                             rec=rec)
    # end of try/ex

    # this could be a separate celery task
    try:
        if verbose:
            log.info(f'get_result START - {percent_label} from '
                     f'{first_extract_date} to {last_extract_date}')
        rec = algo.get_result()
        status = ae_consts.SUCCESS
        if verbose:
            log.info(f'get_result END - {percent_label} from '
                     f'{first_extract_date} to {last_extract_date}')
    except Exception as e:
        msg = (
            f'{percent_label} - algo={algo.get_name()} encountered exception '
            f'in get_result tickers={use_tickers} from '
            f'{first_extract_date} to {last_extract_date} ex={e}')
        if raise_on_err:
            if algo:
                log.error(f'algo={algo.get_name()} failed in get_result with '
                          f'debug_msg={algo.get_debug_msg()}')
            log.error(msg)
            raise e
        else:
            log.error(msg)
            return build_result.build_result(status=ae_consts.ERR,
                                             err=msg,
                                             rec=rec)
    # end of try/ex

    if return_algo:
        rec['algo'] = algo

    return build_result.build_result(status=status, err=msg, rec=rec)
Exemple #17
0
def extract(ticker=None,
            tickers=None,
            use_key=None,
            extract_mode='all',
            iex_datasets=None,
            redis_enabled=True,
            redis_address=None,
            redis_db=None,
            redis_password=None,
            redis_expire=None,
            s3_enabled=True,
            s3_address=None,
            s3_bucket=None,
            s3_access_key=None,
            s3_secret_key=None,
            s3_region_name=None,
            s3_secure=False,
            celery_disabled=True,
            broker_url=None,
            result_backend=None,
            label=None,
            verbose=False):
    """extract

    Extract all cached datasets for a stock ``ticker`` or
    a list of ``tickers`` and returns a dictionary. Please
    make sure the datasets are already cached in Redis
    before running this method. If not please refer to
    the ``analysis_engine.fetch.fetch`` function
    to prepare the datasets on your environment.

    Python example:

    .. code-block:: python

        from analysis_engine.extract import extract
        d = extract(ticker='NFLX')
        print(d)
        for k in d['NFLX']:
            print('dataset key: {}'.format(k))


    This was created for reducing the amount of typying in
    Jupyter notebooks. It can be set up for use with a
    distributed engine as well with the optional arguments
    depending on your connectitivty requirements.

    .. note:: Please ensure Redis and Minio are running
              before trying to extract tickers

    **Stock tickers to extract**

    :param ticker: single stock ticker/symbol/ETF to extract
    :param tickers: optional - list of tickers to extract
    :param use_key: optional - extract historical key from Redis
        usually formatted ``<TICKER>_<date formatted YYYY-MM-DD>``

    **(Optional) Data sources, datafeeds and datasets to gather**

    :param iex_datasets: list of strings for gathering specific `IEX
        datasets <https://iextrading.com/developer/docs/#stocks>`__
        which are set as consts: ``analysis_engine.iex.consts.FETCH_*``.

    **(Optional) Redis connectivity arguments**

    :param redis_enabled: bool - toggle for auto-caching all
        datasets in Redis
        (default is ``True``)
    :param redis_address: Redis connection string format: ``host:port``
        (default is ``localhost:6379``)
    :param redis_db: Redis db to use
        (default is ``0``)
    :param redis_password: optional - Redis password
        (default is ``None``)
    :param redis_expire: optional - Redis expire value
        (default is ``None``)

    **(Optional) Minio (S3) connectivity arguments**

    :param s3_enabled: bool - toggle for auto-archiving on Minio (S3)
        (default is ``True``)
    :param s3_address: Minio S3 connection string format: ``host:port``
        (default is ``localhost:9000``)
    :param s3_bucket: S3 Bucket for storing the artifacts
        (default is ``dev``) which should be viewable on a browser:
        http://localhost:9000/minio/dev/
    :param s3_access_key: S3 Access key
        (default is ``trexaccesskey``)
    :param s3_secret_key: S3 Secret key
        (default is ``trex123321``)
    :param s3_region_name: S3 region name
        (default is ``us-east-1``)
    :param s3_secure: Transmit using tls encryption
        (default is ``False``)

    **(Optional) Celery worker broker connectivity arguments**

    :param celery_disabled: bool - toggle synchronous mode or publish
        to an engine connected to the `Celery broker and backend
        <https://github.com/celery/celery#transports-and-backends>`__
        (default is ``True`` - synchronous mode without an engine
        or need for a broker or backend for Celery)
    :param broker_url: Celery broker url
        (default is ``redis://0.0.0.0:6379/13``)
    :param result_backend: Celery backend url
        (default is ``redis://0.0.0.0:6379/14``)
    :param label: tracking log label

    **(Optional) Debugging**

    :param verbose: bool - show extract warnings
        and other debug logging (default is False)

    **Supported environment variables**

    ::

        export REDIS_ADDRESS="localhost:6379"
        export REDIS_DB="0"
        export S3_ADDRESS="localhost:9000"
        export S3_BUCKET="dev"
        export AWS_ACCESS_KEY_ID="trexaccesskey"
        export AWS_SECRET_ACCESS_KEY="trex123321"
        export AWS_DEFAULT_REGION="us-east-1"
        export S3_SECURE="0"
        export WORKER_BROKER_URL="redis://0.0.0.0:6379/13"
        export WORKER_BACKEND_URL="redis://0.0.0.0:6379/14"
    """

    rec = {}
    extract_requests = []

    use_tickers = tickers
    if ticker:
        use_tickers = [ticker]
    else:
        if not use_tickers:
            use_tickers = []

    default_iex_datasets = [
        'daily', 'minute', 'quote', 'stats', 'peers', 'news', 'financials',
        'earnings', 'dividends', 'company'
    ]

    if not iex_datasets:
        iex_datasets = default_iex_datasets

    if redis_enabled:
        if not redis_address:
            redis_address = os.getenv('REDIS_ADDRESS', 'localhost:6379')
        if not redis_password:
            redis_password = os.getenv('REDIS_PASSWORD', None)
        if not redis_db:
            redis_db = int(os.getenv('REDIS_DB', '0'))
        if not redis_expire:
            redis_expire = os.getenv('REDIS_EXPIRE', None)
    if s3_enabled:
        if not s3_address:
            s3_address = os.getenv('S3_ADDRESS', 'localhost:9000')
        if not s3_access_key:
            s3_access_key = os.getenv('AWS_ACCESS_KEY_ID', 'trexaccesskey')
        if not s3_secret_key:
            s3_secret_key = os.getenv('AWS_SECRET_ACCESS_KEY', 'trex123321')
        if not s3_region_name:
            s3_region_name = os.getenv('AWS_DEFAULT_REGION', 'us-east-1')
        if not s3_secure:
            s3_secure = os.getenv('S3_SECURE', '0') == '1'
        if not s3_bucket:
            s3_bucket = os.getenv('S3_BUCKET', 'dev')
    if not broker_url:
        broker_url = os.getenv('WORKER_BROKER_URL', 'redis://0.0.0.0:6379/13')
    if not result_backend:
        result_backend = os.getenv('WORKER_BACKEND_URL',
                                   'redis://0.0.0.0:6379/14')

    if not label:
        label = 'get-latest'

    num_tickers = len(use_tickers)
    last_close_str = ae_utils.get_last_close_str()

    if iex_datasets:
        log.info('{} - getting latest for tickers={} '
                 'iex={}'.format(label, num_tickers, json.dumps(iex_datasets)))
    else:
        log.info('{} - getting latest for tickers={}'.format(
            label, num_tickers))

    ticker_key = use_key
    if not ticker_key:
        ticker_key = '{}_{}'.format(ticker, last_close_str)

    common_vals = {}
    common_vals['base_key'] = ticker_key
    common_vals['celery_disabled'] = celery_disabled
    common_vals['ticker'] = ticker
    common_vals['label'] = label
    common_vals['iex_datasets'] = iex_datasets
    common_vals['s3_enabled'] = s3_enabled
    common_vals['s3_bucket'] = s3_bucket
    common_vals['s3_address'] = s3_address
    common_vals['s3_secure'] = s3_secure
    common_vals['s3_region_name'] = s3_region_name
    common_vals['s3_access_key'] = s3_access_key
    common_vals['s3_secret_key'] = s3_secret_key
    common_vals['s3_key'] = ticker_key
    common_vals['redis_enabled'] = redis_enabled
    common_vals['redis_address'] = redis_address
    common_vals['redis_password'] = redis_password
    common_vals['redis_db'] = redis_db
    common_vals['redis_key'] = ticker_key
    common_vals['redis_expire'] = redis_expire

    common_vals['redis_address'] = redis_address
    common_vals['s3_address'] = s3_address

    log.info('{} - extract ticker={} last_close={} base_key={} '
             'redis_address={} s3_address={}'.format(
                 label, ticker, last_close_str, common_vals['base_key'],
                 common_vals['redis_address'], common_vals['s3_address']))
    """
    Extract Datasets
    """

    iex_daily_status = ae_consts.FAILED
    iex_minute_status = ae_consts.FAILED
    iex_quote_status = ae_consts.FAILED
    iex_stats_status = ae_consts.FAILED
    iex_peers_status = ae_consts.FAILED
    iex_news_status = ae_consts.FAILED
    iex_financials_status = ae_consts.FAILED
    iex_earnings_status = ae_consts.FAILED
    iex_dividends_status = ae_consts.FAILED
    iex_company_status = ae_consts.FAILED
    yahoo_news_status = ae_consts.FAILED
    yahoo_options_status = ae_consts.FAILED
    yahoo_pricing_status = ae_consts.FAILED
    td_calls_status = ae_consts.FAILED
    td_puts_status = ae_consts.FAILED

    iex_daily_df = None
    iex_minute_df = None
    iex_quote_df = None
    iex_stats_df = None
    iex_peers_df = None
    iex_news_df = None
    iex_financials_df = None
    iex_earnings_df = None
    iex_dividends_df = None
    iex_company_df = None
    yahoo_option_calls_df = None
    yahoo_option_puts_df = None
    yahoo_pricing_df = None
    yahoo_news_df = None
    td_calls_df = None
    td_puts_df = None

    for ticker in use_tickers:
        req = api_requests.get_ds_dict(ticker=ticker,
                                       base_key=common_vals['base_key'],
                                       ds_id=label,
                                       service_dict=common_vals)
        extract_requests.append(req)
    # end of for all ticker in use_tickers

    extract_iex = True
    if extract_mode not in ['all', 'iex']:
        extract_iex = False

    extract_yahoo = True
    if extract_mode not in ['all', 'yahoo']:
        extract_yahoo = False

    extract_td = True
    if extract_mode not in ['all', 'td']:
        extract_td = False

    for extract_req in extract_requests:
        if 'daily' in iex_datasets or extract_iex:
            iex_daily_status, iex_daily_df = \
                iex_extract_utils.extract_daily_dataset(
                    extract_req)
            if iex_daily_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract iex_daily={}'.format(ticker))
        if 'minute' in iex_datasets or extract_iex:
            iex_minute_status, iex_minute_df = \
                iex_extract_utils.extract_minute_dataset(
                    extract_req)
            if iex_minute_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract iex_minute={}'.format(ticker))
        if 'quote' in iex_datasets or extract_iex:
            iex_quote_status, iex_quote_df = \
                iex_extract_utils.extract_quote_dataset(
                    extract_req)
            if iex_quote_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract iex_quote={}'.format(ticker))
        if 'stats' in iex_datasets or extract_iex:
            iex_stats_df, iex_stats_df = \
                iex_extract_utils.extract_stats_dataset(
                    extract_req)
            if iex_stats_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract iex_stats={}'.format(ticker))
        if 'peers' in iex_datasets or extract_iex:
            iex_peers_df, iex_peers_df = \
                iex_extract_utils.extract_peers_dataset(
                    extract_req)
            if iex_peers_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract iex_peers={}'.format(ticker))
        if 'news' in iex_datasets or extract_iex:
            iex_news_status, iex_news_df = \
                iex_extract_utils.extract_news_dataset(
                    extract_req)
            if iex_news_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning('unable to extract iex_news={}'.format(ticker))
        if 'financials' in iex_datasets or extract_iex:
            iex_financials_status, iex_financials_df = \
                iex_extract_utils.extract_financials_dataset(
                    extract_req)
            if iex_financials_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract iex_financials={}'.format(ticker))
        if 'earnings' in iex_datasets or extract_iex:
            iex_earnings_status, iex_earnings_df = \
                iex_extract_utils.extract_earnings_dataset(
                    extract_req)
            if iex_earnings_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract iex_earnings={}'.format(ticker))
        if 'dividends' in iex_datasets or extract_iex:
            iex_dividends_status, iex_dividends_df = \
                iex_extract_utils.extract_dividends_dataset(
                    extract_req)
            if iex_dividends_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract iex_dividends={}'.format(ticker))
        if 'company' in iex_datasets or extract_iex:
            iex_company_status, iex_company_df = \
                iex_extract_utils.extract_company_dataset(
                    extract_req)
            if iex_company_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract iex_company={}'.format(ticker))
        # end of iex extracts

        if extract_yahoo:
            yahoo_options_status, yahoo_option_calls_df = \
                yahoo_extract_utils.extract_option_calls_dataset(
                    extract_req)
            yahoo_options_status, yahoo_option_puts_df = \
                yahoo_extract_utils.extract_option_puts_dataset(
                    extract_req)
            if yahoo_options_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract yahoo_options={}'.format(ticker))
            yahoo_pricing_status, yahoo_pricing_df = \
                yahoo_extract_utils.extract_pricing_dataset(
                    extract_req)
            if yahoo_pricing_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract yahoo_pricing={}'.format(ticker))
            yahoo_news_status, yahoo_news_df = \
                yahoo_extract_utils.extract_yahoo_news_dataset(
                    extract_req)
            if yahoo_news_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning(
                        'unable to extract yahoo_news={}'.format(ticker))
        # end of yahoo extracts

        if extract_td:
            td_calls_status, td_calls_df = \
                td_extract_utils.extract_option_calls_dataset(
                    extract_req)
            if td_calls_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning('unable to extract tdcalls={}'.format(ticker))
            td_puts_status, td_puts_df = \
                td_extract_utils.extract_option_puts_dataset(
                    extract_req)
            if td_puts_status != ae_consts.SUCCESS:
                if verbose:
                    log.warning('unable to extract tdputs={}'.format(ticker))
        # td extracts

        ticker_data = {}
        ticker_data['daily'] = iex_daily_df
        ticker_data['minute'] = iex_minute_df
        ticker_data['quote'] = iex_quote_df
        ticker_data['stats'] = iex_stats_df
        ticker_data['peers'] = iex_peers_df
        ticker_data['news1'] = iex_news_df
        ticker_data['financials'] = iex_financials_df
        ticker_data['earnings'] = iex_earnings_df
        ticker_data['dividends'] = iex_dividends_df
        ticker_data['company'] = iex_company_df
        ticker_data['calls'] = yahoo_option_calls_df
        ticker_data['puts'] = yahoo_option_puts_df
        ticker_data['pricing'] = yahoo_pricing_df
        ticker_data['news'] = yahoo_news_df
        ticker_data['tdcalls'] = td_calls_df
        ticker_data['tdputs'] = td_puts_df

        rec[ticker] = ticker_data
    # end of for service_dict in extract_requests

    return rec
    def latest(self,
               date_str=None,
               start_row=-200,
               extract_iex=True,
               extract_yahoo=False,
               extract_td=True,
               verbose=False,
               **kwargs):
        """latest

        Run the algorithm with the latest pricing data. Also
        supports running a backtest for a historical date in
        the pricing history (format ``YYYY-MM-DD``)

        :param date_str: optional - string start date ``YYYY-MM-DD``
            default is the latest close date
        :param start_row: negative number of rows back
            from the end of the list in the data
            default is ``-200`` where this means the algorithm
            will process the latest 200 rows in the minute
            dataset
        :param extract_iex: bool flag for extracting from ``IEX``
        :param extract_yahoo: bool flag for extracting from ``Yahoo``
            which is disabled as of 1/2019
        :param extract_td: bool flag for extracting from ``Tradier``
        :param verbose: bool flag for logs
        :param kwargs: keyword arg dict
        """
        use_date_str = date_str
        if not use_date_str:
            use_date_str = ae_utils.get_last_close_str()

        log.info('creating algo')
        self.algo_obj = base_algo.BaseAlgo(
            ticker=self.config_dict['ticker'],
            balance=self.config_dict['balance'],
            commission=self.config_dict['commission'],
            name=self.use_name,
            start_date=self.use_start_date,
            end_date=self.use_end_date,
            auto_fill=self.auto_fill,
            config_dict=self.config_dict,
            load_from_s3_bucket=self.load_from_s3_bucket,
            load_from_s3_key=self.load_from_s3_key,
            load_from_redis_key=self.load_from_redis_key,
            load_from_file=self.load_from_file,
            load_compress=self.load_compress,
            load_publish=self.load_publish,
            load_config=self.load_config,
            report_redis_key=self.report_redis_key,
            report_s3_bucket=self.report_s3_bucket,
            report_s3_key=self.report_s3_key,
            report_file=self.report_file,
            report_compress=self.report_compress,
            report_publish=self.report_publish,
            report_config=self.report_config,
            history_redis_key=self.history_redis_key,
            history_s3_bucket=self.history_s3_bucket,
            history_s3_key=self.history_s3_key,
            history_file=self.history_file,
            history_compress=self.history_compress,
            history_publish=self.history_publish,
            history_config=self.history_config,
            extract_redis_key=self.extract_redis_key,
            extract_s3_bucket=self.extract_s3_bucket,
            extract_s3_key=self.extract_s3_key,
            extract_file=self.extract_file,
            extract_save_dir=self.extract_save_dir,
            extract_compress=self.extract_compress,
            extract_publish=self.extract_publish,
            extract_config=self.extract_config,
            publish_to_slack=self.publish_to_slack,
            publish_to_s3=self.publish_to_s3,
            publish_to_redis=self.publish_to_redis,
            dataset_type=self.dataset_type,
            serialize_datasets=self.serialize_datasets,
            compress=self.compress,
            encoding=self.encoding,
            redis_enabled=self.redis_enabled,
            redis_key=self.redis_key,
            redis_address=self.redis_address,
            redis_db=self.redis_db,
            redis_password=self.redis_password,
            redis_expire=self.redis_expire,
            redis_serializer=self.redis_serializer,
            redis_encoding=self.redis_encoding,
            s3_enabled=self.s3_enabled,
            s3_key=self.s3_key,
            s3_address=self.s3_address,
            s3_bucket=self.s3_bucket,
            s3_access_key=self.s3_access_key,
            s3_secret_key=self.s3_secret_key,
            s3_region_name=self.s3_region_name,
            s3_secure=self.s3_secure,
            slack_enabled=self.slack_enabled,
            slack_code_block=self.slack_code_block,
            slack_full_width=self.slack_full_width,
            dataset_publish_extract=self.extract_publish,
            dataset_publish_history=self.history_publish,
            dataset_publish_report=self.report_publish,
            run_on_engine=self.run_on_engine,
            auth_url=self.broker_url,
            backend_url=self.backend_url,
            include_tasks=self.include_tasks,
            ssl_options=self.ssl_options,
            transport_options=self.transport_options,
            path_to_config_module=self.path_to_config_module,
            timeseries=self.timeseries,
            trade_strategy=self.trade_strategy,
            verbose=False,
            raise_on_err=self.raise_on_err)

        log.info('run latest - start')

        ticker = self.config_dict['ticker']
        dataset_id = f'{ticker}_{use_date_str}'
        self.common_fetch_vals['base_key'] = dataset_id
        verbose_extract = self.config_dict.get('verbose_extract', False)
        indicator_datasets = self.algo_obj.get_indicator_datasets()
        ticker_data = build_dataset_node.build_dataset_node(
            ticker=ticker,
            date=use_date_str,
            datasets=indicator_datasets,
            service_dict=self.common_fetch_vals,
            verbose=verbose_extract)

        algo_data_req = {
            ticker: [{
                'id': dataset_id,  # id is currently the cache key in redis
                'date': use_date_str,  # used to confirm dates in asc order
                'data': ticker_data,
                'start_row': start_row
            }]
        }

        if verbose:
            log.info(f'extract - {dataset_id} '
                     f'dataset={len(algo_data_req[ticker])}')

        # this could be a separate celery task
        try:
            if verbose:
                log.info(f'handle_data START - {dataset_id}')
            self.algo_obj.handle_data(data=algo_data_req)
            if verbose:
                log.info(f'handle_data END - {dataset_id}')
        except Exception as e:
            a_name = self.algo_obj.get_name()
            a_debug_msg = self.algo_obj.get_debug_msg()
            if not a_debug_msg:
                a_debug_msg = 'debug message not set'
            # a_config_dict = ae_consts.ppj(self.algo_obj.config_dict)
            msg = (f'{dataset_id} - algo={a_name} '
                   f'encountered exception in handle_data tickers={ticker} '
                   f'from ex={e} '
                   f'and failed during operation: {a_debug_msg}')
            log.critical(f'{msg}')
        # end try/ex

        log.info('run latest - create history')

        history_ds = self.algo_obj.create_history_dataset()
        self.history_df = pd.DataFrame(history_ds[ticker])
        self.determine_latest_times_in_history()

        self.num_rows = len(self.history_df.index)

        if verbose:
            log.info(self.history_df[['minute', 'close']].tail(5))

        log.info(f'run latest minute={self.end_date} - '
                 f'rows={self.num_rows} - done')

        return self.get_history()
Exemple #19
0
def fetch_calls(ticker=None,
                work_dict=None,
                scrub_mode='sort-by-date',
                verbose=False):
    """fetch_calls

    Fetch Tradier option calls for a ticker and
    return a tuple: (status, ``pandas.DataFrame``)

    .. code-block:: python

        import analysis_engine.td.fetch_api as td_fetch

        # Please set the TD_TOKEN environment variable to your token
        calls_status, calls_df = td_fetch.fetch_calls(
            ticker='SPY')

        print(f'Fetched SPY Option Calls from Tradier status={calls_status}:')
        print(calls_df)

    :param ticker: string ticker to fetch
    :param work_dict: dictionary of args
        used by the automation
    :param scrub_mode: optional - string type of
        scrubbing handler to run
    :param verbose: optional - bool for debugging
    """
    label = 'fetch_calls'
    datafeed_type = td_consts.DATAFEED_TD_CALLS
    exp_date = None
    latest_pricing = {}
    latest_close = None

    if work_dict:
        ticker = work_dict.get('ticker', ticker)
        label = work_dict.get('label', label)
        exp_date = work_dict.get('exp_date', exp_date)
        latest_pricing = work_dict.get('latest_pricing', latest_pricing)
        latest_close = latest_pricing.get('close', latest_close)

    log.debug(f'{label} - calls - close={latest_close} ' f'ticker={ticker}')

    exp_date = opt_dates.option_expiration().strftime(
        ae_consts.COMMON_DATE_FORMAT)
    use_url = td_consts.TD_URLS['options'].format(ticker, exp_date)
    headers = td_consts.get_auth_headers()
    session = requests.Session()
    session.headers = headers
    res = url_helper.url_helper(sess=session).get(use_url)

    if res.status_code != requests.codes.OK:
        if res.status_code in [401, 403]:
            log.critical('Please check the TD_TOKEN is correct '
                         f'received {res.status_code} during '
                         'fetch for: calls')
        else:
            log.info(f'failed to get call with response={res} '
                     f'code={res.status_code} '
                     f'text={res.text}')
        return ae_consts.EMPTY, pd.DataFrame([{}])
    records = json.loads(res.text)
    org_records = records.get('options', {}).get('option', [])

    if len(org_records) == 0:
        log.info('failed to get call records ' 'text={}'.format(res.text))
        return ae_consts.EMPTY, pd.DataFrame([{}])

    options_list = []

    # assumes UTC conversion will work with the system clock
    created_minute = (
        datetime.datetime.utcnow() -
        datetime.timedelta(hours=5)).strftime('%Y-%m-%d %H:%M:00')
    last_close_date = ae_utils.get_last_close_str(fmt='%Y-%m-%d %H:%M:00')

    # hit bug where dates were None
    if not last_close_date:
        last_close_date = created_minute

    for node in org_records:
        node['date'] = last_close_date
        node['created'] = created_minute
        node['ticker'] = ticker
        if (node['option_type'] == 'call'
                and node['expiration_type'] == 'standard'
                and float(node['bid']) > 0.01):
            node['opt_type'] = int(ae_consts.OPTION_CALL)
            node['exp_date'] = node['expiration_date']

            new_node = {}
            for col in td_consts.TD_OPTION_COLUMNS:
                if col in node:
                    if col in td_consts.TD_EPOCH_COLUMNS:
                        # trade_date can be None
                        if node[col] == 0:
                            new_node[col] = None
                        else:
                            new_node[col] = ae_utils.epoch_to_dt(
                                epoch=node[col] / 1000,
                                use_utc=False,
                                convert_to_est=True).strftime(
                                    ae_consts.COMMON_TICK_DATE_FORMAT)
                            """
                            Debug epoch ms converter:
                            """
                            """
                            print('-----------')
                            print(col)
                            print(node[col])
                            print(new_node[col])
                            print('===========')
                            """
                        # if/else valid date
                    else:
                        new_node[col] = node[col]
                    # if date column to convert
                # if column is in the row
            # convert all columns

            options_list.append(new_node)
    # end of records

    full_df = pd.DataFrame(options_list).sort_values(by=['strike'],
                                                     ascending=True)

    num_chains = len(full_df.index)

    df = None
    if latest_close:
        df_filter = ((full_df['strike'] >=
                      (latest_close - ae_consts.OPTIONS_LOWER_STRIKE)) &
                     (full_df['strike'] <=
                      (latest_close + ae_consts.OPTIONS_UPPER_STRIKE)))
        df = full_df[df_filter].copy().sort_values(
            by=['date', 'strike']).reset_index()
    else:
        mid_chain_idx = int(num_chains / 2)
        low_idx = int(mid_chain_idx - ae_consts.MAX_OPTIONS_LOWER_STRIKE)
        high_idx = int(mid_chain_idx + ae_consts.MAX_OPTIONS_UPPER_STRIKE)
        if low_idx < 0:
            low_idx = 0
        if high_idx > num_chains:
            high_idx = num_chains
        df = full_df[low_idx:high_idx].copy().sort_values(
            by=['date', 'strike']).reset_index()

    scrubbed_df = scrub_utils.ingress_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=datafeed_type,
        msg_format='df={} date_str={}',
        ds_id=ticker,
        date_str=exp_date,
        df=df)

    return ae_consts.SUCCESS, scrubbed_df
Exemple #20
0
 def setUp(
         self):
     """setUp"""
     self.ticker = 'AAPL'
     self.last_close_str = get_last_close_str(fmt=COMMON_DATE_FORMAT)
Exemple #21
0
def build_dataset_node(ticker,
                       datasets,
                       date=None,
                       service_dict=None,
                       log_label=None,
                       redis_enabled=True,
                       redis_address=None,
                       redis_db=None,
                       redis_password=None,
                       redis_expire=None,
                       redis_key=None,
                       s3_enabled=True,
                       s3_address=None,
                       s3_bucket=None,
                       s3_access_key=None,
                       s3_secret_key=None,
                       s3_region_name=None,
                       s3_secure=False,
                       s3_key=None,
                       verbose=False):
    """build_dataset_node

    Helper for building a dictionary that of
    cached datasets from redis.

    The datasets should be built from
    off the algorithm's config indicators
    ``uses_data`` fields which if not
    set will default to ``minute`` data

    :param ticker: string ticker
    :param datasets: list of string dataset names
        to extract from redis
    :param date: optional - string datetime formatted
        ``YYYY-MM-DD``
        (default is last trading close date)
    :param service_dict: optional - dictionary for all
        service connectivity to Redis and Minio if not
        set the arguments for all ``s3_*`` and ``redis_*``
        will be used to lookup data in Redis and Minio

    **(Optional) Redis connectivity arguments**

    :param redis_enabled: bool - toggle for auto-caching all
        datasets in Redis
        (default is ``True``)
    :param redis_address: Redis connection string
        format is ``host:port``
        (default is ``localhost:6379``)
    :param redis_db: Redis db to use
        (default is ``0``)
    :param redis_password: optional - Redis password
        (default is ``None``)
    :param redis_expire: optional - Redis expire value
        (default is ``None``)
    :param redis_key: optional - redis key not used
        (default is ``None``)

    :param s3_enabled: bool - toggle for turning on/off
        Minio or AWS S3
        (default is ``True``)
    :param s3_address: Minio S3 connection string address
        format is ``host:port``
        (default is ``localhost:9000``)
    :param s3_bucket: S3 Bucket for storing the artifacts
        (default is ``dev``) which should be viewable on a browser:
        http://localhost:9000/minio/dev/
    :param s3_access_key: S3 Access key
        (default is ``trexaccesskey``)
    :param s3_secret_key: S3 Secret key
        (default is ``trex123321``)
    :param s3_region_name: S3 region name
        (default is ``us-east-1``)
    :param s3_secure: Transmit using tls encryption
        (default is ``False``)
    :param s3_key: optional s3 key not used
        (default is ``None``)

    **Debugging**

    :param log_label: optional - log label string
    :param verbose: optional - flag for debugging
        (default to ``False``)
    """

    label = log_label
    if not label:
        label = 'build_bt'

    if not date:
        date = ae_utils.get_last_close_str()

    td_convert_to_datetime = (ae_consts.TRADIER_CONVERT_TO_DATETIME)

    date_key = f'{ticker}_{date}'

    base_req = api_requests.get_ds_dict(ticker=ticker,
                                        base_key=date_key,
                                        ds_id=label,
                                        service_dict=service_dict)

    if not service_dict:
        base_req['redis_enabled'] = redis_enabled
        base_req['redis_address'] = redis_address
        base_req['redis_password'] = redis_password
        base_req['redis_db'] = redis_db
        base_req['redis_key'] = date_key
        base_req['redis_expire'] = redis_expire
        base_req['s3_enabled'] = s3_enabled
        base_req['s3_bucket'] = s3_bucket
        base_req['s3_address'] = s3_address
        base_req['s3_secure'] = s3_secure
        base_req['s3_region_name'] = s3_region_name
        base_req['s3_access_key'] = s3_access_key
        base_req['s3_secret_key'] = s3_secret_key
        base_req['s3_key'] = date_key

    if verbose:
        log.info(f'extracting {date_key}')
        """
        for showing connectivity args in the logs
        log.debug(
            f'bt {date_key} {ae_consts.ppj(base_req)}')
        """

    iex_daily_status = ae_consts.FAILED
    iex_minute_status = ae_consts.FAILED
    iex_quote_status = ae_consts.FAILED
    iex_stats_status = ae_consts.FAILED
    iex_peers_status = ae_consts.FAILED
    iex_news_status = ae_consts.FAILED
    iex_financials_status = ae_consts.FAILED
    iex_earnings_status = ae_consts.FAILED
    iex_dividends_status = ae_consts.FAILED
    iex_company_status = ae_consts.FAILED
    td_calls_status = ae_consts.FAILED
    td_puts_status = ae_consts.FAILED

    iex_daily_df = None
    iex_minute_df = None
    iex_quote_df = None
    iex_stats_df = None
    iex_peers_df = None
    iex_news_df = None
    iex_financials_df = None
    iex_earnings_df = None
    iex_dividends_df = None
    iex_company_df = None
    td_calls_df = None
    td_puts_df = None

    if 'daily' in datasets:
        iex_daily_status, iex_daily_df = \
            iex_extract_utils.extract_daily_dataset(
                base_req)
        if iex_daily_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract iex_daily={ticker}')
    if 'minute' in datasets:
        iex_minute_status, iex_minute_df = \
            iex_extract_utils.extract_minute_dataset(
                base_req)
        if iex_minute_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract iex_minute={ticker}')
    if 'quote' in datasets:
        iex_quote_status, iex_quote_df = \
            iex_extract_utils.extract_quote_dataset(
                base_req)
        if iex_quote_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract iex_quote={ticker}')
    if 'stats' in datasets:
        iex_stats_df, iex_stats_df = \
            iex_extract_utils.extract_stats_dataset(
                base_req)
        if iex_stats_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract iex_stats={ticker}')
    if 'peers' in datasets:
        iex_peers_df, iex_peers_df = \
            iex_extract_utils.extract_peers_dataset(
                base_req)
        if iex_peers_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract iex_peers={ticker}')
    if 'news' in datasets:
        iex_news_status, iex_news_df = \
            iex_extract_utils.extract_news_dataset(
                base_req)
        if iex_news_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract iex_news={ticker}')
    if 'financials' in datasets:
        iex_financials_status, iex_financials_df = \
            iex_extract_utils.extract_financials_dataset(
                base_req)
        if iex_financials_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract iex_financials={ticker}')
    if 'earnings' in datasets:
        iex_earnings_status, iex_earnings_df = \
            iex_extract_utils.extract_earnings_dataset(
                base_req)
        if iex_earnings_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract iex_earnings={ticker}')
    if 'dividends' in datasets:
        iex_dividends_status, iex_dividends_df = \
            iex_extract_utils.extract_dividends_dataset(
                base_req)
        if iex_dividends_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract iex_dividends={ticker}')
    if 'company' in datasets:
        iex_company_status, iex_company_df = \
            iex_extract_utils.extract_company_dataset(
                base_req)
        if iex_company_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract iex_company={ticker}')
    # end of iex extracts
    """
    Yahoo disabled on Jan 2019

    yahoo_news_status = ae_consts.FAILED
    yahoo_options_status = ae_consts.FAILED
    yahoo_pricing_status = ae_consts.FAILED
    yahoo_option_calls_df = None
    yahoo_option_puts_df = None
    yahoo_pricing_df = None
    yahoo_news_df = None

    if 'options' in datasets:
        yahoo_options_status, yahoo_option_calls_df = \
            yahoo_extract_utils.extract_option_calls_dataset(
                base_req)
        yahoo_options_status, yahoo_option_puts_df = \
            yahoo_extract_utils.extract_option_puts_dataset(
                base_req)
        if yahoo_options_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract yahoo_options={ticker}')
    if 'pricing' in datasets:
        yahoo_pricing_status, yahoo_pricing_df = \
            yahoo_extract_utils.extract_pricing_dataset(
                base_req)
        if yahoo_pricing_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract yahoo_pricing={ticker}')
    if 'news' in datasets:
        yahoo_news_status, yahoo_news_df = \
            yahoo_extract_utils.extract_yahoo_news_dataset(
                base_req)
        if yahoo_news_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract yahoo_news={ticker}')
    # end of yahoo extracts
    """
    """
    Tradier Extraction
    Debug by setting:

    base_req['verbose_td'] = True
    """
    if ('calls' in datasets or 'tdcalls' in datasets):
        td_calls_status, td_calls_df = \
            td_extract_utils.extract_option_calls_dataset(
                base_req)
        if td_calls_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract tdcalls={ticker}')
        else:
            if ae_consts.is_df(df=td_calls_df):
                for c in td_convert_to_datetime:
                    if c in td_calls_df:
                        td_calls_df[c] = pd.to_datetime(
                            td_calls_df[c],
                            format=ae_consts.COMMON_TICK_DATE_FORMAT)
                if 'date' in td_calls_df:
                    td_calls_df.sort_values('date', ascending=True)
        # end of converting dates
    # end of Tradier calls extraction

    if ('puts' in datasets or 'tdputs' in datasets):
        td_puts_status, td_puts_df = \
            td_extract_utils.extract_option_puts_dataset(
                base_req)
        if td_puts_status != ae_consts.SUCCESS:
            if verbose:
                log.warn(f'unable to extract tdputs={ticker}')
        else:
            if ae_consts.is_df(df=td_puts_df):
                for c in td_convert_to_datetime:
                    if c in td_puts_df:
                        td_puts_df[c] = pd.to_datetime(
                            td_puts_df[c],
                            format=ae_consts.COMMON_TICK_DATE_FORMAT)
                if 'date' in td_puts_df:
                    td_puts_df.sort_values('date', ascending=True)
        # end of converting dates
    # end of Tradier puts extraction

    ticker_data = {
        'daily': iex_daily_df,
        'minute': iex_minute_df,
        'quote': iex_quote_df,
        'stats': iex_stats_df,
        'peers': iex_peers_df,
        'news1': iex_news_df,
        'financials': iex_financials_df,
        'earnings': iex_earnings_df,
        'dividends': iex_dividends_df,
        'company': iex_company_df,
        'tdcalls': td_calls_df,
        'tdputs': td_puts_df,
        'calls': None,  # yahoo - here for legacy
        'news': None,  # yahoo - here for legacy
        'pricing': None,  # yahoo - here for legacy
        'puts': None  # yahoo - here for legacy
    }

    return ticker_data
Exemple #22
0
def get_ds_dict(ticker,
                base_key=None,
                ds_id=None,
                label=None,
                service_dict=None):
    """get_ds_dict

    Get a dictionary with all cache keys for a ticker and return
    the dictionary. Use this method to decouple your apps
    from the underlying cache key implementations (if you
    do not need them).

    :param ticker: ticker
    :param base_key: optional - base key that is prepended
                     in all cache keys
    :param ds_id: optional - dataset id (useful for
                  external database id)
    :param label: optional - tracking label in the logs
    :param service_dict: optional - parent call functions and Celery
                         tasks can use this dictionary to seed the
                         common service routes and endpoints. Refer
                         to ``analysis_engine.consts.SERVICE_VALS``
                         for automatically-copied over keys by this
                         helper.
    """

    if not ticker:
        raise Exception('please pass in a ticker')

    use_base_key = base_key
    if not use_base_key:
        use_base_key = '{}_{}'.format(
            ticker,
            ae_utils.get_last_close_str(fmt=ae_consts.COMMON_DATE_FORMAT))

    date_str = ae_utils.utc_date_str(fmt=ae_consts.COMMON_DATE_FORMAT)
    now_str = ae_utils.utc_now_str(fmt=ae_consts.COMMON_TICK_DATE_FORMAT)

    daily_redis_key = '{}_{}'.format(use_base_key,
                                     ae_consts.DAILY_S3_BUCKET_NAME)
    minute_redis_key = '{}_{}'.format(use_base_key,
                                      ae_consts.MINUTE_S3_BUCKET_NAME)
    quote_redis_key = '{}_{}'.format(use_base_key,
                                     ae_consts.QUOTE_S3_BUCKET_NAME)
    stats_redis_key = '{}_{}'.format(use_base_key,
                                     ae_consts.STATS_S3_BUCKET_NAME)
    peers_redis_key = '{}_{}'.format(use_base_key,
                                     ae_consts.PEERS_S3_BUCKET_NAME)
    news_iex_redis_key = '{}_{}1'.format(use_base_key,
                                         ae_consts.NEWS_S3_BUCKET_NAME)
    financials_redis_key = '{}_{}'.format(use_base_key,
                                          ae_consts.FINANCIALS_S3_BUCKET_NAME)
    earnings_redis_key = '{}_{}'.format(use_base_key,
                                        ae_consts.EARNINGS_S3_BUCKET_NAME)
    dividends_redis_key = '{}_{}'.format(use_base_key,
                                         ae_consts.DIVIDENDS_S3_BUCKET_NAME)
    company_redis_key = '{}_{}'.format(use_base_key,
                                       ae_consts.COMPANY_S3_BUCKET_NAME)
    options_yahoo_redis_key = '{}_{}'.format(use_base_key,
                                             ae_consts.OPTIONS_S3_BUCKET_NAME)
    call_options_yahoo_redis_key = '{}_calls'.format(use_base_key)
    put_options_yahoo_redis_key = '{}_puts'.format(use_base_key)
    pricing_yahoo_redis_key = '{}_{}'.format(use_base_key,
                                             ae_consts.PRICING_S3_BUCKET_NAME)
    news_yahoo_redis_key = '{}_{}'.format(use_base_key,
                                          ae_consts.NEWS_S3_BUCKET_NAME)
    call_options_td_redis_key = '{}_tdcalls'.format(use_base_key)
    put_options_td_redis_key = '{}_tdputs'.format(use_base_key)

    ds_cache_dict = {
        'daily': daily_redis_key,
        'minute': minute_redis_key,
        'quote': quote_redis_key,
        'stats': stats_redis_key,
        'peers': peers_redis_key,
        'news1': news_iex_redis_key,
        'financials': financials_redis_key,
        'earnings': earnings_redis_key,
        'dividends': dividends_redis_key,
        'company': company_redis_key,
        'options': options_yahoo_redis_key,
        'calls': call_options_yahoo_redis_key,
        'puts': put_options_yahoo_redis_key,
        'pricing': pricing_yahoo_redis_key,
        'news': news_yahoo_redis_key,
        'tdcalls': call_options_td_redis_key,
        'tdputs': put_options_td_redis_key,
        'ticker': ticker,
        'ds_id': ds_id,
        'label': label,
        'created': now_str,
        'date': date_str,
        'manifest_key': use_base_key,
        'version': ae_consts.CACHE_DICT_VERSION
    }

    # set keys/values for redis/minio from the
    # service_dict - helper method for
    # launching job chains
    if service_dict:
        for k in ae_consts.SERVICE_VALS:
            ds_cache_dict[k] = service_dict[k]

    return ds_cache_dict
Exemple #23
0
def extract_option_puts_dataset(ticker=None,
                                date=None,
                                work_dict=None,
                                scrub_mode='sort-by-date',
                                verbose=False):
    """extract_option_puts_dataset

    Extract the TD options puts for a ticker and
    return a tuple (status, ``pandas.Dataframe``)

    .. code-block:: python

        import analysis_engine.td.extract_df_from_redis as td_extract

        # extract by historical date is also supported as an arg
        # date='2019-02-15'
        puts_status, puts_df = td_extract.extract_option_puts_dataset(
            ticker='SPY')
        print(puts_df)

    :param ticker: string ticker to extract
    :param date: optional - string date to extract
        formatted ``YYYY-MM-DD``
    :param work_dict: dictionary of args
    :param scrub_mode: optional - string type of
        scrubbing handler to run
    :param verbose: optional - boolean for turning on logging
    """
    label = 'extract_td_puts'
    latest_close_date = ae_utils.get_last_close_str()
    use_date = date
    if work_dict:
        if not ticker:
            ticker = work_dict.get('ticker', None)
        label = f'{work_dict.get("label", label)}'
    if not use_date:
        use_date = latest_close_date

    ds_id = ticker
    df_type = td_consts.DATAFEED_TD_PUTS
    df_str = td_consts.get_datafeed_str_td(df_type=df_type)
    redis_db = ae_consts.REDIS_DB
    redis_key = f'{ticker}_{use_date}_tdputs'
    redis_host, redis_port = ae_consts.get_redis_host_and_port(req=work_dict)
    redis_password = ae_consts.REDIS_PASSWORD
    s3_key = redis_key

    if work_dict:
        redis_db = work_dict.get('redis_db', redis_db)
        redis_password = work_dict.get('redis_password', redis_password)
        verbose = work_dict.get('verbose_td', verbose)

    if verbose:
        log.info(f'{label} - {df_str} - start - redis_key={redis_key} '
                 f's3_key={s3_key}')

    exp_date_str = None
    puts_df = None
    status = ae_consts.NOT_RUN
    try:
        redis_rec = redis_get.get_data_from_redis_key(label=label,
                                                      host=redis_host,
                                                      port=redis_port,
                                                      db=redis_db,
                                                      password=redis_password,
                                                      key=redis_key,
                                                      decompress_df=True)

        status = redis_rec['status']
        if verbose:
            log.info(f'{label} - {df_str} redis get data key={redis_key} '
                     f'status={ae_consts.get_status(status=status)}')

        if status == ae_consts.SUCCESS:
            puts_json = None
            if 'tdputs' in redis_rec['rec']['data']:
                puts_json = redis_rec['rec']['data']['tdputs']
            if 'puts' in redis_rec['rec']['data']:
                puts_json = redis_rec['rec']['data']['puts']
            else:
                puts_json = redis_rec['rec']['data']
            if not puts_json:
                return ae_consts.SUCCESS, pd.DataFrame([])
            if verbose:
                log.info(f'{label} - {df_str} redis convert puts to df')
            try:
                puts_df = pd.read_json(puts_json, orient='records')
                if len(puts_df.index) == 0:
                    return ae_consts.SUCCESS, pd.DataFrame([])
                if 'date' not in puts_df:
                    log.debug('failed to find date column in TD puts '
                              f'df={puts_df} len={len(puts_df.index)}')
                    return ae_consts.SUCCESS, pd.DataFrame([])
                puts_df.sort_values(by=['date', 'strike'])
                """
                for i, r in calls_df.iterrows():
                    print(r['date'])
                convert_epochs = [
                    'ask_date',
                    'bid_date',
                    'trade_date'
                ]
                for c in convert_epochs:
                    if c in puts_df:
                        puts_df[c] = pd.DatetimeIndex(pd.to_datetime(
                            puts_df[c],
                            format=ae_consts.COMMON_TICK_DATE_FORMAT
                        )).tz_localize(
                            'UTC').tz_convert(
                                'US/Eastern')
                # dates converted
                """
                exp_date_str = (puts_df['exp_date'].iloc[-1])

                puts_df['date'] = puts_df['date'].dt.strftime(
                    ae_consts.COMMON_TICK_DATE_FORMAT)

            except Exception:
                log.debug(f'{label} - {df_str} redis_key={redis_key} '
                          'no puts df found')
                return ae_consts.SUCCESS, pd.DataFrame([])
            # end of try/ex to convert to df
            if verbose:
                log.info(f'{label} - {df_str} redis_key={redis_key} '
                         f'puts={len(puts_df.index)} exp_date={exp_date_str}')
        else:
            if verbose:
                log.info(f'{label} - {df_str} did not find valid redis '
                         f'option puts in redis_key={redis_key} '
                         f'status={ae_consts.get_status(status=status)}')

    except Exception as e:
        if verbose:
            log.error(
                f'{label} - {df_str} - ds_id={ds_id} failed getting option '
                f'puts from redis={redis_host}:{redis_port}@{redis_db} '
                f'key={redis_key} ex={e}')
        return ae_consts.ERR, pd.DataFrame([])
    # end of try/ex extract from redis

    if verbose:
        log.info(
            f'{label} - {df_str} ds_id={ds_id} extract scrub={scrub_mode}')

    scrubbed_df = scrub_utils.extract_scrub_dataset(
        label=label,
        scrub_mode=scrub_mode,
        datafeed_type=df_type,
        msg_format='df={} date_str={}',
        ds_id=ds_id,
        df=puts_df)

    status = ae_consts.SUCCESS

    return status, scrubbed_df