Ejemplo n.º 1
0
def analyze(perf,
            filename,
            doc=None,
            duration=None,
            param=None,
            info=None,
            show_image=True):
    num_positions = perf.positions.shape[0]
    if num_positions == 0:
        raise ValueError("No positions found")

    gc.collect()
    mem = psutil.virtual_memory()
    log.info("Memory used %.2f Gb von %.2f Gb (%d%%)" %
             (mem.used / 1e9, mem.total / 1e9, mem.percent))

    now = datetime.datetime.now()

    serialise(perf, filename, now)

    with warnings.catch_warnings():
        # ignore the many pyfolio warnings
        warnings.simplefilter("ignore")
        create_report(perf, filename, now, doc, duration, param, info,
                      show_image)
Ejemplo n.º 2
0
    def _pipeline_output(self, pipeline, chunks, name):
        # This method is taken from TradingAlgorithm.
        """
        Internal implementation of `pipeline_output`.

        For Live Algo's we have to get the previous session as the Pipeline wont work without,
        it will extrapolate such that it tries to get data for get_datetime which
        is today

        """
        today = normalize_date(self.get_datetime())
        prev_session = normalize_date(
            self.trading_calendar.previous_open(today))

        log.info('today in _pipeline_output : {}'.format(prev_session))

        try:
            data = self._pipeline_cache.get(name, prev_session)
        except KeyError:
            # Calculate the next block.
            data, valid_until = self.run_pipeline(
                pipeline,
                prev_session,
                next(chunks),
            )
            self._pipeline_cache.set(name, data, valid_until)

        # Now that we have a cached result, try to return the data for today.
        try:
            return data.loc[prev_session]
        except KeyError:
            # This happens if no assets passed the pipeline screen on a given
            # day.
            return pd.DataFrame(index=[], columns=data.columns)
Ejemplo n.º 3
0
 def log_order(self, contract, ib_order_id, order):
     if order.orderType == "MKT":
         log.info(
             "Placing order-{order_id}: "
             "{action} {qty} {symbol} with MKT order {tif}.".format(
                 order_id=ib_order_id,
                 action=order.action,
                 qty=order.totalQuantity,
                 symbol=contract.symbol,
                 tif=order.tif
             ))
     else:
         log.info(
             "Placing order-{order_id}: "
             "{action} {qty} {symbol} with {order_type} order. "
             "limit_price={limit_price} stop_price={stop_price} {tif}".format(
                 order_id=ib_order_id,
                 action=order.action,
                 qty=order.totalQuantity,
                 symbol=contract.symbol,
                 order_type=order.orderType,
                 limit_price=order.lmtPrice,
                 stop_price=order.auxPrice,
                 tif=order.tif
             ))
Ejemplo n.º 4
0
def synch_to_calendar(sessions, start_date, end_date, df_ticker, df):
    this_cal = sessions[(sessions >= start_date) & (sessions <= end_date)]

    missing_dates = (len(this_cal) != df_ticker.shape[0])
    if missing_dates:
        sid = df_ticker.index.get_level_values('sid')[0]
        ticker = df_ticker['ticker'][0]
        log.info("Fixing missing interstitial dates for %s (%d)." %
                 (ticker, sid))

        sids = np.full(len(this_cal), sid)
        synch_index = pd.MultiIndex.from_arrays(
            [this_cal.tz_localize(None), sids], names=('date', 'sid'))
        df_ticker_synch = df_ticker.reindex(synch_index)

        # Forward fill missing data, volume and dividens must remain 0
        columns_ffill = ['ticker', 'open', 'high', 'low', 'close']
        df_ticker_synch[columns_ffill] = df_ticker_synch[columns_ffill].fillna(
            method='ffill')
        df_ticker_synch = df_ticker_synch.fillna({'volume': 0, 'dividends': 0})

        # Drop remaining NaN
        df_ticker_synch.dropna(inplace=True)

        # drop the existing sub dataframe
        df.drop(df_ticker.index, inplace=True)
        # and replace if with the new one with all the dates.
        df.append(df_ticker_synch)
Ejemplo n.º 5
0
def print_portfolio(log, context):
    mem = psutil.virtual_memory()
    log.info("Memory used %.2f Gb von %.2f Gb (%d%%)" %
             (mem.used / 1e9, mem.total / 1e9, mem.percent))

    pdf = describe_portfolio(context.portfolio.positions)
    log.info('Portfolio Performance:\n{stats}'.format(stats=pdf))
Ejemplo n.º 6
0
 def publish(self, model):
     try:
         log.info("Percent completed: %3.0f%% (%s - %s): %s" %
                  (model.percent_complete,
                   str(model.current_chunk_bounds[0].date()),
                   str(model.current_chunk_bounds[1].date()),
                   model.current_work))
     except:
         log.error("Cannot publish progress state.")
def get_data(sharadar_metadata_df, related_tickers, start=None, end=None):
    df = fetch_data(start, end)

    log.info("Adding SIDs to all stocks...")
    df['sid'] = df['ticker'].apply(
        lambda x: lookup_sid(sharadar_metadata_df, related_tickers, x))
    # unknown sids are -1 instead of nan to preserve the integer type. Drop them.
    unknown_sids = df[df['sid'] == -1]
    df.drop(unknown_sids.index, inplace=True)
    df.set_index(['date', 'sid'], inplace=True)

    df = process_data_table(df)
    return df.sort_index()
Ejemplo n.º 8
0
 def publish(self, model):
     try:
         start = str(model.current_chunk_bounds[0].date())
         end = str(model.current_chunk_bounds[1].date())
         completed = model.percent_complete
         work = model.current_work
         if start == end:
             log.info("Percent completed: %3.0f%% (%s): %s" %
                      (completed, start, work))
         else:
             log.info("Percent completed: %3.0f%% (%s - %s): %s" %
                      (completed, start, end, work))
     except:
         log.error("Cannot publish progress state.")
Ejemplo n.º 9
0
    def start(self):
        log.info("Connecting: {}:{}:{}".format(self._host, self._port,
                                               self.client_id))
        self.connect(self._host, self._port, self.client_id)

        # Initialise the threads for various components
        thread = threading.Thread(target=self.run, daemon=True)
        thread.start()
        setattr(self, "_thread", thread)

        timeout = _connection_timeout
        while timeout and not self.isConnected():
            log.info("Cannot connect to TWS. Retrying...")
            sleep(_poll_frequency)
            timeout -= _poll_frequency
        else:
            if not self.isConnected():
                raise SystemError("Connection timeout during TWS connection!")

        self._download_account_details()
        log.info("Managed accounts: {}".format(self.managed_accounts))

        self.reqCurrentTime()
        self.reqIds(1)

        while self.time_skew is None or self._next_order_id is None:
            sleep(_poll_frequency)

        log.info("Local-Broker Time Skew: {}".format(self.time_skew))
Ejemplo n.º 10
0
def create_tradable_stocks_universe(output_dir, prices_start, prices_end):
    universes_dbpath = os.path.join(output_dir, "universes.sqlite")
    universe_name = TRADABLE_STOCKS_US
    screen = TradableStocksUS()
    universe_start = prices_start.tz_localize('utc')
    universe_end = prices_end.tz_localize('utc')
    universe_last_date = UniverseReader(universes_dbpath).get_last_date(
        universe_name)
    if not pd.isnull(universe_last_date):
        universe_start = universe_last_date
    log.info("Start creating universe '%s' from %s to %s ..." %
             (universe_name, universe_start, universe_end))
    UniverseWriter(universes_dbpath).write(universe_name, screen,
                                           universe_start, universe_end)
Ejemplo n.º 11
0
    def error(self, id_=None, error_code=None, error_msg=None):
        if isinstance(id_, Exception):
            log.exception(id_)

        if isinstance(error_code, int):
            if error_code in (502, 503, 326):
                # 502: Couldn't connect to TWS.
                # 503: The TWS is out of date and must be upgraded.
                # 326: Unable connect as the client id is already in use.
                self.unrecoverable_error = True

            if error_code < 1000:
                log.error("[{}] {} ({})".format(error_code, error_msg, id_))
            else:
                log.info("[{}] {} ({})".format(error_code, error_msg, id_))
        else:
            log.error("[{}] {} ({})".format(error_code, error_msg, id_))
Ejemplo n.º 12
0
def process_data_table(df):
    log.info("Adjusting for stock splits...")

    # Data are adjusted for stock splits, but not for dividends.
    m = df['closeunadj'] / df['close']

    # Remove the split factor to get back the unadjusted data
    df['open'] *= m
    df['high'] *= m
    df['low'] *= m
    df['close'] = df['closeunadj']
    df['volume'] /= m
    df['dividends'] *= m

    df = df.drop(['closeunadj', 'lastupdated'], axis=1)
    df = df.replace([np.inf, -np.inf, np.nan], 0)
    df = df.fillna({'volume': 0})
    return df
Ejemplo n.º 13
0
    def execDetails(self, req_id, contract, exec_detail):
        order_id, exec_id = exec_detail.orderId, exec_detail.execId
        self.executions[order_id][exec_id] = _method_params_to_dict(vars())
        self._execution_to_order_id[exec_id] = order_id

        log.info("Order-{order_id} executed @ {exec_time}: "
                 "{symbol} current: {shares} @ ${price} "
                 "total: {cum_qty} @ ${avg_price} "
                 "exec_id: {exec_id} by client-{client_id}".format(
                     order_id=order_id,
                     exec_id=exec_id,
                     exec_time=pd.to_datetime(exec_detail.time),
                     symbol=contract.symbol,
                     shares=exec_detail.shares,
                     price=exec_detail.price,
                     cum_qty=exec_detail.cumQty,
                     avg_price=exec_detail.avgPrice,
                     client_id=exec_detail.clientId))
Ejemplo n.º 14
0
    def run_pipeline(self,
                     pipeline,
                     start_date,
                     end_date=None,
                     chunksize=120,
                     hooks=None):
        if end_date is None:
            end_date = start_date

        if hooks is None:
            hooks = [
                ProgressHooks.with_static_publisher(CliProgressPublisher())
            ]

        if chunksize <= 1:
            log.info("Compute pipeline values without chunks.")
            return super().run_pipeline(pipeline, start_date, end_date, hooks)

        return super().run_chunked_pipeline(pipeline, start_date, end_date,
                                            chunksize, hooks)
Ejemplo n.º 15
0
    def initialize(self, *args, **kwargs):
        self._context_persistence_excludes = \
            self._context_persistence_blacklist + \
            [e for e in self.__dict__.keys()
             if e not in self._context_persistence_whitelist]

        if os.path.isfile(self.state_filename):
            log.info("Loading state from {}".format(self.state_filename))
            load_context(self.state_filename,
                         context=self,
                         checksum=self.algo_filename)

        self.initialized = False

        with ZiplineAPI(self):
            super(self.__class__, self).initialize(*args, **kwargs)
            store_context(self.state_filename,
                          context=self,
                          checksum=self.algo_filename,
                          exclude_list=self._context_persistence_excludes)

        self.initialized = True
Ejemplo n.º 16
0
    def __init__(self, *args, **kwargs):
        self.broker = kwargs.pop('broker', None)
        self.orders = {}

        self.algo_filename = kwargs.get('algo_filename', "<algorithm>")
        self.state_filename = kwargs.pop('state_filename', None)
        # Persistence blacklist/whitelist and excludes gives a way to include/
        # exclude (so do not persist on disk if initiated or excluded from the serialization
        # function that reinstate or save the context variable to its last state).
        # trading client can never be serialized, the initialized function and
        # perf tracker remember the context variables and the past performance
        # and need to be whitelisted
        self._context_persistence_blacklist = ['trading_client']
        self._context_persistence_whitelist = ['initialized', 'perf_tracker']
        self._context_persistence_excludes = []

        # blotter is always initialized to SimulationBlotter in run_algo.py.
        # we override it here to use the LiveBlotter for live algos
        blotter_live = BlotterLive(broker=self.broker)
        kwargs['blotter'] = blotter_live

        super(self.__class__, self).__init__(*args, **kwargs)
        log.info("initialization done")
Ejemplo n.º 17
0
    def __init__(self, broker):
        self.broker = broker
        self._processed_closed_orders = []
        self._processed_transactions = []

        self.new_orders = []

        self.slippage_models = {
            Equity:
            FixedBasisPointsSlippage(),
            Future:
            VolatilityVolumeShare(
                volume_limit=DEFAULT_FUTURE_VOLUME_SLIPPAGE_BAR_LIMIT, ),
        }
        self.commission_models = {
            Equity:
            PerShare(),
            Future:
            PerContract(
                cost=DEFAULT_PER_CONTRACT_COST,
                exchange_fee=FUTURE_EXCHANGE_FEES_BY_SYMBOL,
            ),
        }
        log.info('Initialized blotter_live')
Ejemplo n.º 18
0
    def write(self, universe_name, screen, pipe_start, pipe_end):
        log.info("Computing pipeline from %s to %s..." %
                 (pipe_start, pipe_end))
        stocks = self._execute_pipeline(screen, pipe_end, pipe_start)

        # Create schema, if not exists
        with closing(sqlite3.connect(
                self.universes_db_path)) as con, con, closing(
                    con.cursor()) as c:
            c.execute(
                "SELECT count(name) FROM sqlite_master WHERE type='table' AND name='%s'"
                % universe_name)
            if c.fetchone()[0] == 0:
                c.executescript(SCHEMA %
                                (universe_name, universe_name, universe_name))

            log.info("Inserting %d SIDs..." % len(stocks.index))
            with progressbar(stocks.index, show_pos=True) as bar:
                for i in bar:
                    c.execute("INSERT OR REPLACE INTO %s VALUES ('%s', %d);" %
                              (universe_name, i[0].date(), i[1].sid))

        log.info("Universe '%s' successful created/updated" % universe_name)
Ejemplo n.º 19
0
def run_algorithm(initialize,
                  start=None,
                  end=None,
                  capital_base=1e6,
                  handle_data=None,
                  before_trading_start=None,
                  analyze=None,
                  data_frequency='daily',
                  bundle='sharadar',
                  bundle_timestamp=None,
                  trading_calendar=None,
                  metrics_set='default_daily',
                  benchmark_symbol='SPY',
                  default_extension=True,
                  extensions=(),
                  strict_extensions=True,
                  environ=os.environ,
                  blotter='default',
                  broker=None,
                  state_filename=None):
    """
    Run a trading algorithm.

    Parameters
    ----------
    start : datetime
        The start date of the backtest.
    end : datetime
        The end date of the backtest..
    initialize : callable[context -> None]
        The initialize function to use for the algorithm. This is called once
        at the very begining of the backtest and should be used to set up
        any state needed by the algorithm.
    capital_base : float
        The starting capital for the backtest.
    handle_data : callable[(context, BarData) -> None], optional
        The handle_data function to use for the algorithm. This is called
        every minute when ``data_frequency == 'minute'`` or every day
        when ``data_frequency == 'daily'``.
    before_trading_start : callable[(context, BarData) -> None], optional
        The before_trading_start function for the algorithm. This is called
        once before each trading day (after initialize on the first day).
    analyze : callable[(context, pd.DataFrame) -> None], optional
        The analyze function to use for the algorithm. This function is called
        once at the end of the backtest and is passed the context and the
        performance data.
    data_frequency : {'daily', 'minute'}, optional
        The data frequency to run the algorithm at. For live trading the default is 'minute', otherwise 'daily'
    bundle : str, optional
        The name of the data bundle to use to load the data to run the backtest
        with. This defaults to 'quantopian-quandl'.
    bundle_timestamp : datetime, optional
        The datetime to lookup the bundle data for. This defaults to the
        current time.
    trading_calendar : TradingCalendar, optional
        The trading calendar to use for your backtest.
    metrics_set : iterable[Metric] or str, optional
        The set of metrics to compute in the simulation. If a string is passed,
        resolve the set with :func:`zipline.finance.metrics.load`.
    benchmark_symbol: The symbol of the benchmark. For live trading the default None, otherwise 'SPY'.
    default_extension : bool, optional
        Should the default zipline extension be loaded. This is found at
        ``$ZIPLINE_ROOT/extension.py``
    extensions : iterable[str], optional
        The names of any other extensions to load. Each element may either be
        a dotted module path like ``a.b.c`` or a path to a python file ending
        in ``.py`` like ``a/b/c.py``.
    strict_extensions : bool, optional
        Should the run fail if any extensions fail to load. If this is false,
        a warning will be raised instead.
    environ : mapping[str -> str], optional
        The os environment to use. Many extensions use this to get parameters.
        This defaults to ``os.environ``.
    blotter : str or zipline.finance.blotter.Blotter, optional
        Blotter to use with this algorithm. If passed as a string, we look for
        a blotter construction function registered with
        ``zipline.extensions.register`` and call it with no parameters.
        Default is a :class:`zipline.finance.blotter.SimulationBlotter` that
        never cancels orders.
    broker : instance of zipline.gens.brokers.broker.Broker
    state_filename : path to pickle file storing the algorithm "context" (similar to self)

    Returns
    -------
    perf : pd.DataFrame
        The daily performance of the algorithm.

    See Also
    --------
    zipline.data.bundles.bundles : The available data bundles.
    """
    load_extensions(default_extension, extensions, strict_extensions, environ)

    try:
        register_calendar_alias('NYSEMKT', 'XNYS')
        register_calendar_alias('OTC', 'XNYS')
    except CalendarNameCollision as e:
        log.info(e)

    return _run(handle_data=handle_data,
                initialize=initialize,
                before_trading_start=before_trading_start,
                analyze=analyze,
                algofile=None,
                algotext=None,
                defines=(),
                data_frequency=data_frequency,
                capital_base=capital_base,
                bundle=bundle,
                bundle_timestamp=bundle_timestamp,
                start=start,
                end=end,
                output=os.devnull,
                trading_calendar=trading_calendar,
                print_algo=False,
                metrics_set=metrics_set,
                local_namespace=False,
                environ=environ,
                blotter=blotter,
                benchmark_symbol=benchmark_symbol,
                broker=broker,
                state_filename=state_filename)
 def compute(self, today, assets, out):
     log.info('2', today)
     out[:] = 0
Ejemplo n.º 21
0
 def connectionClosed(self):
     self.unrecoverable_error = True
     log.info("IB Connection closed")
Ejemplo n.º 22
0
def _run(handle_data, initialize, before_trading_start, analyze, algofile,
         algotext, defines, data_frequency, capital_base, bundle,
         bundle_timestamp, start, end, output, trading_calendar, print_algo,
         metrics_set, local_namespace, environ, blotter, benchmark_symbol,
         broker, state_filename):
    """Run a backtest for the given algorithm.

    This is shared between the cli and :func:`zipline.run_algo`.

    additions useful for live trading:
    broker - wrapper to connect to a real broker
    state_filename - saving the context of the algo to be able to restart
    """
    log.info("Using bundle '%s'." % bundle)

    if trading_calendar is None:
        trading_calendar = get_calendar('XNYS')

    bundle_data = load_sharadar_bundle(bundle)
    now = pd.Timestamp.utcnow()
    if start is None:
        start = bundle_data.equity_daily_bar_reader.first_trading_day if not broker else now

    if not trading_calendar.is_session(start.date()):
        start = trading_calendar.next_open(start)

    if end is None:
        end = bundle_data.equity_daily_bar_reader.last_available_dt if not broker else start

    # date parameter validation
    if trading_calendar.session_distance(start, end) < 0:
        raise _RunAlgoError(
            'There are no trading days between %s and %s' % (
                start.date(),
                end.date(),
            ), )

    if broker:
        log.info("Live Trading on %s." % start.date())
    else:
        log.info("Backtest from %s to %s." % (start.date(), end.date()))

    if benchmark_symbol:
        benchmark = symbol(benchmark_symbol)
        benchmark_sid = benchmark.sid
        benchmark_returns = load_benchmark_data_bundle(
            bundle_data.equity_daily_bar_reader, benchmark)
    else:
        benchmark_sid = None
        benchmark_returns = pd.Series(index=pd.date_range(start, end,
                                                          tz='utc'),
                                      data=0.0)

    # emission_rate is a string representing the smallest frequency at which metrics should be reported.
    # emission_rate will be either minute or daily. When emission_rate is daily, end_of_bar will not be called at all.
    emission_rate = 'daily'

    if algotext is not None:
        if local_namespace:
            # noinspection PyUnresolvedReferences
            ip = get_ipython()  # noqa
            namespace = ip.user_ns
        else:
            namespace = {}

        for assign in defines:
            try:
                name, value = assign.split('=', 2)
            except ValueError:
                raise ValueError(
                    'invalid define %r, should be of the form name=value' %
                    assign, )
            try:
                # evaluate in the same namespace so names may refer to
                # eachother
                namespace[name] = eval(value, namespace)
            except Exception as e:
                raise ValueError(
                    'failed to execute definition for name %r: %s' %
                    (name, e), )
    elif defines:
        raise _RunAlgoError(
            'cannot pass define without `algotext`',
            "cannot pass '-D' / '--define' without '-t' / '--algotext'",
        )
    else:
        namespace = {}
        if algofile is not None:
            algotext = algofile.read()

    if print_algo:
        if PYGMENTS:
            highlight(
                algotext,
                PythonLexer(),
                TerminalFormatter(),
                outfile=sys.stdout,
            )
        else:
            click.echo(algotext)

    first_trading_day = \
        bundle_data.equity_daily_bar_reader.first_trading_day

    if isinstance(metrics_set, six.string_types):
        try:
            metrics_set = metrics.load(metrics_set)
        except ValueError as e:
            raise _RunAlgoError(str(e))

    if isinstance(blotter, six.string_types):
        try:
            blotter = load(Blotter, blotter)
        except ValueError as e:
            raise _RunAlgoError(str(e))

    # Special defaults for live trading
    if broker:
        data_frequency = 'minute'

        # No benchmark
        benchmark_sid = None
        benchmark_returns = pd.Series(index=pd.date_range(start, end,
                                                          tz='utc'),
                                      data=0.0)

        broker.daily_bar_reader = bundle_data.equity_daily_bar_reader

        if start.date() < now.date():
            backtest_start = start
            backtest_end = bundle_data.equity_daily_bar_reader.last_available_dt

            if not os.path.exists(state_filename):
                log.info("Backtest from %s to %s." %
                         (backtest_start.date(), backtest_end.date()))
                backtest_data = DataPortal(
                    bundle_data.asset_finder,
                    trading_calendar=trading_calendar,
                    first_trading_day=first_trading_day,
                    equity_minute_reader=bundle_data.equity_minute_bar_reader,
                    equity_daily_reader=bundle_data.equity_daily_bar_reader,
                    adjustment_reader=bundle_data.adjustment_reader,
                )
                backtest = create_algo_class(
                    TradingAlgorithm, backtest_start, backtest_end, algofile,
                    algotext, analyze, before_trading_start, benchmark_returns,
                    benchmark_sid, blotter, bundle_data, capital_base,
                    backtest_data, 'daily', emission_rate, handle_data,
                    initialize, metrics_set, namespace, trading_calendar)

                ctx_blacklist = ['trading_client']
                ctx_whitelist = ['perf_tracker']
                ctx_excludes = ctx_blacklist + [
                    e
                    for e in backtest.__dict__.keys() if e not in ctx_whitelist
                ]
                backtest.run()
                #TODO better logic for the checksumq
                checksum = getattr(algofile, 'name', '<algorithm>')
                store_context(state_filename,
                              context=backtest,
                              checksum=checksum,
                              exclude_list=ctx_excludes)
            else:
                log.warn("State file already exists. Do not run the backtest.")

            # Set start and end to now for live trading
            start = pd.Timestamp.utcnow()
            if not trading_calendar.is_session(start.date()):
                start = trading_calendar.next_open(start)
            end = start

    # TODO inizia qui per creare un prerun dell'algo prima del live trading
    # usare store_context prima di passare da TradingAlgorithm a LiveTradingAlgorithm
    TradingAlgorithmClass = (partial(
        LiveTradingAlgorithm, broker=broker, state_filename=state_filename)
                             if broker else TradingAlgorithm)

    DataPortalClass = (partial(DataPortalLive, broker)
                       if broker else DataPortal)
    data = DataPortalClass(
        bundle_data.asset_finder,
        trading_calendar=trading_calendar,
        first_trading_day=first_trading_day,
        equity_minute_reader=bundle_data.equity_minute_bar_reader,
        equity_daily_reader=bundle_data.equity_daily_bar_reader,
        adjustment_reader=bundle_data.adjustment_reader,
    )
    algo = create_algo_class(TradingAlgorithmClass, start, end, algofile,
                             algotext, analyze, before_trading_start,
                             benchmark_returns, benchmark_sid, blotter,
                             bundle_data, capital_base, data, data_frequency,
                             emission_rate, handle_data, initialize,
                             metrics_set, namespace, trading_calendar)

    perf = algo.run()

    if output == '-':
        click.echo(str(perf))
    elif output != os.devnull:  # make the zipline magic not write any data
        perf.to_pickle(output)

    return perf
Ejemplo n.º 23
0
 def bind(self):
     log.info("Connecting: {}:{}:{}".format(self._host, self._port,
                                            self.client_id))
     self.connect(self._host, self._port, self.client_id)
def _ingest(start_session,
            calendar=get_calendar('XNYS'),
            output_dir=get_output_dir(),
            sanity_check=True):
    os.makedirs(output_dir, exist_ok=True)

    print("logfiles:", logfilename)

    log.info("Start ingesting SEP, SFP and SF1 data into %s ..." % output_dir)

    end_session = pd.to_datetime(last_available_date())
    # Check valid trading dates, according to the selected exchange calendar
    sessions = calendar.sessions_in_range(start_session, end_session)

    prices_dbpath = os.path.join(output_dir, "prices.sqlite")

    # use string format expected by quandl
    start_fetch_date = sessions[0].strftime('%Y-%m-%d')
    #end_fetch_date = None if sessions[-1].strftime('%Y-%m-%d') == last_trading_date() else sessions[-1].strftime('%Y-%m-%d')
    if os.path.exists(prices_dbpath):
        start_fetch_date = SQLiteDailyBarReader(
            prices_dbpath).last_available_dt.strftime('%Y-%m-%d')
        log.info("Last available date: %s" % start_fetch_date)

    log.info("Start loading sharadar metadata...")
    related_tickers, sharadar_metadata_df = create_metadata()
    prices_df = get_data(sharadar_metadata_df, related_tickers,
                         start_fetch_date)
    if len(prices_df) > 0:
        # the first price date may differ from start_fetch_date because we query quadl by lastupdate
        log.info("Price data for %d equities from %s to %s." %
                 (len(prices_df.index.get_level_values(1)),
                  prices_df.index[0][0], prices_df.index[-1][0]))
    else:
        log.info("No price data retrieved for period from %s." %
                 start_fetch_date)

    # iterate over all the securities and pack data and metadata for writing
    tickers = prices_df['ticker'].unique()
    log.info("Start creating data for %d equities..." % (len(tickers)))
    equities_df = create_equities_df(prices_df,
                                     tickers,
                                     sessions,
                                     sharadar_metadata_df,
                                     show_progress=True)

    # Additional MACRO data
    macro_equities_df = create_macro_equities_df(calendar)
    equities_df = equities_df.append(macro_equities_df)

    # Write equity metadata
    log.info("Start writing equities...")
    asset_dbpath = os.path.join(output_dir,
                                ("assets-%d.sqlite" % ASSET_DB_VERSION))
    asset_db_writer = SQLiteAssetDBWriter(asset_dbpath)
    asset_db_writer.write(equities=equities_df, exchanges=EXCHANGE_DF)

    # Write PRICING data
    log.info(("Writing pricing data to '%s'..." % (prices_dbpath)))
    sql_daily_bar_writer = SQLiteDailyBarWriter(prices_dbpath, calendar)
    prices_df.sort_index(inplace=True)
    sql_daily_bar_writer.write(prices_df)

    # DIVIDENDS
    log.info("Creating dividends data...")
    dividends_df = create_dividends_df(sharadar_metadata_df, related_tickers,
                                       tickers, start_fetch_date)

    # SPLITS
    log.info("Creating splits data...")
    splits_df = create_splits_df(sharadar_metadata_df, related_tickers,
                                 tickers, start_fetch_date)

    # mergers?
    # see also https://github.com/quantopian/zipline/blob/master/zipline/data/adjustments.py

    # Write dividends and splits_df
    adjustment_dbpath = os.path.join(output_dir, "adjustments.sqlite")
    sql_daily_bar_reader = SQLiteDailyBarReader(prices_dbpath)
    asset_db_reader = SQLiteAssetFinder(asset_dbpath)
    adjustment_writer = SQLiteDailyAdjustmentWriter(adjustment_dbpath,
                                                    sql_daily_bar_reader,
                                                    asset_db_reader, sessions)

    log.info("Start writing %d splits and %d dividends data..." %
             (len(splits_df), len(dividends_df)))
    adjustment_writer.write(splits=splits_df, dividends=dividends_df)

    log.info("Adding macro data from %s ..." % (start_fetch_date))
    macro_prices_df = create_macro_prices_df(start_fetch_date, calendar)
    sql_daily_bar_writer.write(macro_prices_df)

    log.info("Start writing supplementary_mappings data...")
    # EQUITY SUPPLEMENTARY MAPPINGS are used for company name, sector, industry and fundamentals financial data.
    # They could be retrieved by AssetFinder.get_supplementary_field(sid, field_name, as_of_date)
    log.info("Start creating company info dataframe...")
    with closing(sqlite3.connect(asset_dbpath)) as conn, conn, closing(
            conn.cursor()) as cursor:
        insert_asset_info(sharadar_metadata_df, cursor)

    start_date_fundamentals = asset_db_reader.last_available_fundamentals_dt
    log.info("Start creating Fundamentals dataframe...")
    if must_fetch_entire_table(start_date_fundamentals):
        log.info("Fetch entire table.")
        sf1_df = fetch_entire_table(env["QUANDL_API_KEY"],
                                    "SHARADAR/SF1",
                                    parse_dates=['datekey', 'reportperiod'])
    else:
        log.info("Start date: %s" % start_date_fundamentals)
        sf1_df = fetch_sf1_table_date(env["QUANDL_API_KEY"],
                                      start_date_fundamentals)
    with closing(sqlite3.connect(asset_dbpath)) as conn, conn, closing(
            conn.cursor()) as cursor:
        insert_fundamentals(sharadar_metadata_df,
                            sf1_df,
                            cursor,
                            show_progress=True)

    start_date_metrics = asset_db_reader.last_available_daily_metrics_dt
    log.info("Start creating daily metrics dataframe...")
    if must_fetch_entire_table(start_date_metrics):
        log.info("Fetch entire table.")
        daily_df = fetch_entire_table(env["QUANDL_API_KEY"],
                                      "SHARADAR/DAILY",
                                      parse_dates=['date'])
    else:
        log.info("Start date: %s" % start_date_fundamentals)
        daily_df = fetch_table_by_date(env["QUANDL_API_KEY"], 'SHARADAR/DAILY',
                                       start_date_metrics)
    with closing(sqlite3.connect(asset_dbpath)) as conn, conn, closing(
            conn.cursor()) as cursor:
        insert_daily_metrics(sharadar_metadata_df,
                             daily_df,
                             cursor,
                             show_progress=True)

    if sanity_check:
        if asset_db_writer.check_sanity():
            log.info("Sanity check successful!")

    okay_path = os.path.join(output_dir, "ok")
    Path(okay_path).touch()
    log.info("Ingest finished!")
Ejemplo n.º 25
0
    def _ingest(calendar, start_session, end_session):
        # use 'latest' (SHARADAR_BUNDLE_DIR) as output dir
        output_dir = get_output_dir()
        os.makedirs(output_dir, exist_ok=True)

        print("logfiles:", logfilename)

        log.info("Start ingesting SEP, SFP and SF1 data into %s ..." %
                 output_dir)

        # Check valid trading dates, according to the selected exchange calendar
        sessions = calendar.sessions_in_range(start_session, end_session)

        prices_dbpath = os.path.join(output_dir, "prices.sqlite")

        start_fetch_date = None
        if os.path.exists(prices_dbpath):
            start_fetch_date = SQLiteDailyBarReader(
                prices_dbpath).last_available_dt.strftime('%Y-%m-%d')
            log.info("Last available date: %s" % start_fetch_date)

        log.info("Start loading sharadar metadata...")
        sharadar_metadata_df = quandl.get_table('SHARADAR/TICKERS',
                                                table=['SFP', 'SEP'],
                                                paginate=True)
        sharadar_metadata_df.set_index('ticker', inplace=True)
        related_tickers = sharadar_metadata_df['relatedtickers'].dropna()
        # Add a space at the begin and end of relatedtickers, search for ' TICKER '
        related_tickers = ' ' + related_tickers.astype(str) + ' '

        prices_df = get_data(sharadar_metadata_df,
                             related_tickers,
                             start=start_fetch_date)

        # iterate over all the securities and pack data and metadata for writing
        tickers = prices_df['ticker'].unique()
        log.info("Start writing price data for %d equities." % (len(tickers)))

        equities_df = create_equities_df(prices_df,
                                         tickers,
                                         sessions,
                                         sharadar_metadata_df,
                                         show_progress=True)

        # Write PRICING data
        log.info(("Writing pricing data to '%s'..." % (prices_dbpath)))
        sql_daily_bar_writer = SQLiteDailyBarWriter(prices_dbpath, calendar)
        prices_df.sort_index(inplace=True)
        sql_daily_bar_writer.write(prices_df)

        # DIVIDENDS
        log.info("Creating dividends data...")
        # see also https://github.com/shlomikushchi/zipline-live2/blob/master/zipline/data/bundles/csvdir.py
        dividends_df = create_dividends_df(prices_df, sharadar_metadata_df)

        # SPLITS
        log.info("Creating splits data...")
        splits_df = create_splits_df(sharadar_metadata_df, related_tickers,
                                     tickers, start_fetch_date)

        # TODO mergers?
        # see also https://github.com/quantopian/zipline/blob/master/zipline/data/adjustments.py

        # Write dividends and splits_df
        sql_daily_bar_reader = SQLiteDailyBarReader(prices_dbpath)
        adjustment_dbpath = os.path.join(output_dir, "adjustments.sqlite")
        adjustment_writer = SQLiteDailyAdjustmentWriter(
            adjustment_dbpath, sql_daily_bar_reader, sessions)

        log.info("Start writing %d splits and %d dividends data..." %
                 (len(splits_df), len(dividends_df)))
        adjustment_writer.write(splits=splits_df, dividends=dividends_df)

        # Additional MACRO data
        prices_start = prices_df.index[0][0]
        prices_end = prices_df.index[-1][0]
        macro_equities_df = create_macro_equities_df(prices_end)
        equities_df = equities_df.append(macro_equities_df)

        # Write equity metadata
        log.info("Start writing equities and supplementary_mappings data...")
        asset_dbpath = os.path.join(output_dir,
                                    ("assets-%d.sqlite" % ASSET_DB_VERSION))
        asset_db_writer = SQLiteAssetDBWriter(asset_dbpath)
        asset_db_writer.write(equities=equities_df, exchanges=EXCHANGE_DF)

        # EQUITY SUPPLEMENTARY MAPPINGS are used for company name, sector, industry and fundamentals financial data.
        # They could be retrieved by AssetFinder.get_supplementary_field(sid, field_name, as_of_date)
        log.info("Start creating company info dataframe...")
        with closing(sqlite3.connect(asset_dbpath)) as conn, conn, closing(
                conn.cursor()) as cursor:
            insert_equity_extra_data_basic(sharadar_metadata_df, cursor)

        log.info("Start creating Fundamentals dataframe...")
        if start_fetch_date is None:
            sf1_df = fetch_entire_table(
                env["QUANDL_API_KEY"],
                "SHARADAR/SF1",
                parse_dates=['datekey', 'reportperiod'])
            #TODO filter out dimensions other than 'ARQ' and 'ART'
        else:
            sf1_df = fetch_sf1_table_date(env["QUANDL_API_KEY"],
                                          start_fetch_date)
        with closing(sqlite3.connect(asset_dbpath)) as conn, conn, closing(
                conn.cursor()) as cursor:
            insert_equity_extra_data_sf1(sharadar_metadata_df,
                                         sf1_df,
                                         cursor,
                                         show_progress=True)

        log.info("Adding macro data from %s to %s ..." %
                 (prices_start, prices_end))
        macro_prices_df = create_macro_prices_df(prices_start, prices_end,
                                                 calendar)
        sql_daily_bar_writer.write(macro_prices_df)

        # Predefined Named Universes
        from sharadar.pipeline.universes import create_tradable_stocks_universe
        create_tradable_stocks_universe(output_dir, prices_start, prices_end)

        sane = asset_db_writer.check_sanity()
        if sane:
            log.info("Sanity check successful!")

        okay_path = os.path.join(output_dir, "ok")
        Path(okay_path).touch()
        log.info("Ingest finished!")
Ejemplo n.º 26
0
def serialise(perf, filename, now):
    suffix = '_' + now.strftime(DATETIME_FMT) + '_perf.dump'
    perf_dump_file = change_extension(filename, suffix)
    log.info("Serialise performance date in %s" % perf_dump_file)
    # joblib.dump(perf, perf_dump_file)
    perf.to_pickle(perf_dump_file)
Ejemplo n.º 27
0
    inputs = []
    window_length = 1

    def __new__(self, universe_name):
        self.universe_name = universe_name

        universes_db_path = os.path.join(get_output_dir(), "universes.sqlite")
        self.universe_reader = UniverseReader(universes_db_path)
        return super(NamedUniverse, self).__new__(self)

    def compute(self, today, assets, out):
        sids = self.universe_reader.get_sid(self.universe_name, today.date())
        out[:] = assets.isin(sids)


if __name__ == "__main__":
    universe_start = pd.to_datetime('1998-10-16', utc=True)
    universe_end = pd.to_datetime('2020-12-30', utc=True)

    from sharadar.util.output_dir import get_output_dir
    universes_dbpath = os.path.join(get_output_dir(), "universes.sqlite")
    universe_name = TRADABLE_STOCKS_US
    screen = TradableStocksUS()
    universe_last_date = UniverseReader(universes_dbpath).get_last_date(
        universe_name)
    if not pd.isnull(universe_last_date):
        universe_start = universe_last_date
    log.info("Start creating universe '%s' from %s to %s ..." %
             (universe_name, universe_start, universe_end))
    UniverseWriter(universes_dbpath).write(universe_name, screen,
                                           universe_start, universe_end)
Ejemplo n.º 28
0
 def on_exit(self):
     self.teardown()
     self.broker.disconnect()
     log.info(
         "Today's trading ended. The algo needs to be restarted daily.")