Ejemplo n.º 1
0
    def write(self,
              data,
              length=None,
              show_progress=False,
              invalid_data_behavior='warn'):
        """Write a stream of minute data.

        Parameters
        ----------
        data : iterable[(int, pd.DataFrame)]
            The data to write. Each element should be a tuple of sid, data
            where data has the following format:
              columns : ('open', 'high', 'low', 'close', 'volume')
                  open : float64
                  high : float64
                  low  : float64
                  close : float64
                  volume : float64|int64
              index : DatetimeIndex of market minutes.
            A given sid may appear more than once in ``data``; however,
            the dates must be strictly increasing.
        show_progress : bool, optional
            Whether or not to show a progress bar while writing.
        """
        with maybe_show_progress(
                data,
                length=length,
                show_percent=False,
                show_progress=show_progress,
                item_show_func=item_show_count(length),
                label='Compiling five-minute data',
        ) as it:
            write_sid = self.write_sid
            for e in it:
                write_sid(*e, invalid_data_behavior=invalid_data_behavior)
Ejemplo n.º 2
0
    def _post_process_metadata(self, metadata, cache, show_progress=False):
        # Create empty data frame using target metadata column names and dtypes
        final_metadata = pd.DataFrame(
            columns=self.md_column_names,
            index=metadata.index,
        )

        # Iterate over the available symbols, loading the asset's raw symbol
        # data from the cache.  The final metadata is computed and recorded in
        # the appropriate row depending on the asset's id.
        with maybe_show_progress(
                metadata.symbol.iteritems(),
                show_progress,
                label='Post-processing symbol metadata',
                item_show_func=item_show_count(len(metadata)),
                length=len(metadata),
                show_percent=False,
        ) as symbols_map:
            for asset_id, symbol in symbols_map:
                # Attempt to load data from disk, the cache should have an entry
                # for each symbol at this point of the execution. If one does
                # not exist, we should fail.
                key = '{sym}.daily.frame'.format(sym=symbol)
                try:
                    raw_data = cache[key]
                except KeyError:
                    raise ValueError(
                        'Unable to find cached data for symbol: {0}'.format(
                            symbol))

                # Perform and require post-processing of metadata.
                final_symbol_metadata = self.post_process_symbol_metadata(
                    asset_id,
                    metadata.iloc[asset_id],
                    raw_data,
                )

                # Record symbol's final metadata.
                final_metadata.iloc[asset_id] = final_symbol_metadata

            # Register all assets with the bundle's default exchange.
            final_metadata['exchange'] = self.exchange

        return final_metadata
Ejemplo n.º 3
0
    def _post_process_metadata(self, metadata, cache, show_progress=False):
        # Create empty data frame using target metadata column names and dtypes
        final_metadata = pd.DataFrame(
            columns=self.md_column_names,
            index=metadata.index,
        )

        # Iterate over the available symbols, loading the asset's raw symbol
        # data from the cache.  The final metadata is computed and recorded in
        # the appropriate row depending on the asset's id.
        with maybe_show_progress(
            metadata.symbol.iteritems(),
            show_progress,
            label='Post-processing symbol metadata',
            item_show_func=item_show_count(len(metadata)),
            length=len(metadata),
            show_percent=False,
        ) as symbols_map:
            for asset_id, symbol in symbols_map:
                # Attempt to load data from disk, the cache should have an
                # entry for each symbol at this point of the execution. If one
                # does not exist, we should fail.
                key = '{sym}.daily.frame'.format(sym=symbol)
                try:
                    raw_data = cache[key]
                except KeyError:
                    raise ValueError(
                      'Unable to find cached data for symbol:'
                      ' {0}'.format(symbol))

                # Perform and require post-processing of metadata.
                final_symbol_metadata = self.post_process_symbol_metadata(
                    asset_id,
                    metadata.iloc[asset_id],
                    raw_data,
                )

                # Record symbol's final metadata.
                final_metadata.iloc[asset_id] = final_symbol_metadata

            # Register all assets with the bundle's default exchange.
            final_metadata['exchange'] = self.exchange

        return final_metadata
Ejemplo n.º 4
0
    def write(self,
              data,
              assets=None,
              show_progress=False,
              invalid_data_behavior='warn'):
        """
        Parameters
        ----------
        data : iterable[tuple[int, pandas.DataFrame or bcolz.ctable]]
            The data chunks to write. Each chunk should be a tuple of sid
            and the data for that asset.
        assets : set[int], optional
            The assets that should be in ``data``. If this is provided
            we will check ``data`` against the assets and provide better
            progress information.
        show_progress : bool, optional
            Whether or not to show a progress bar while writing.
        invalid_data_behavior : {'warn', 'raise', 'ignore'}, optional
            What to do when data is encountered that is outside the range of
            a uint64.

        Returns
        -------
        table : bcolz.ctable
            The newly-written table.
        """
        total = None if assets is None else len(assets)
        ctx = maybe_show_progress(
            (
                (sid, self.to_ctable(df, invalid_data_behavior))
                for sid, df in data
            ),
            show_progress=show_progress,
            label=self.progress_bar_message,
            item_show_func=item_show_count(total),
            length=total,
            show_percent=False,
        )
        with ctx as it:
            return self._write_internal(it, assets)
Ejemplo n.º 5
0
    def write(self,
              data,
              assets=None,
              show_progress=False,
              invalid_data_behavior='warn'):
        """
        Parameters
        ----------
        data : iterable[tuple[int, pandas.DataFrame or bcolz.ctable]]
            The data chunks to write. Each chunk should be a tuple of sid
            and the data for that asset.
        assets : set[int], optional
            The assets that should be in ``data``. If this is provided
            we will check ``data`` against the assets and provide better
            progress information.
        show_progress : bool, optional
            Whether or not to show a progress bar while writing.
        invalid_data_behavior : {'warn', 'raise', 'ignore'}, optional
            What to do when data is encountered that is outside the range of
            a uint64.

        Returns
        -------
        table : bcolz.ctable
            The newly-written table.
        """
        total = None if assets is None else len(assets)
        ctx = maybe_show_progress(
            (
                (sid, self.to_ctable(df, invalid_data_behavior))
                for sid, df in data
            ),
            show_progress=show_progress,
            label=self.progress_bar_message,
            item_show_func=item_show_count(total),
            length=total,
            show_percent=False,
        )
        with ctx as it:
            return self._write_internal(it, assets)
Ejemplo n.º 6
0
    def _fetch_metadata_frame(self,
                              api_key,
                              cache,
                              retries=DEFAULT_RETRIES,
                              environ=None,
                              show_progress=False):

        # Setup raw metadata iterator to fetch pages if necessary.
        raw_iter = self._fetch_metadata_iter(api_key, cache, retries, environ)

        # Concatenate all frame in iterator to compute a single metadata frame.
        with maybe_show_progress(
                raw_iter,
                show_progress,
                label='Fetching symbol metadata',
                item_show_func=item_show_count(),
                length=3,
                show_percent=False,
        ) as blocks:
            metadata = pd.concat(blocks, ignore_index=True)

        return metadata
Ejemplo n.º 7
0
    def _fetch_metadata_frame(self,
                              api_key,
                              cache,
                              retries=DEFAULT_RETRIES,
                              environ=None,
                              show_progress=False):

        # Setup raw metadata iterator to fetch pages if necessary.
        raw_iter = self._fetch_metadata_iter(api_key, cache, retries, environ)

        # Concatenate all frame in iterator to compute a single metadata frame.
        with maybe_show_progress(
            raw_iter,
            show_progress,
            label='Fetching symbol metadata',
            item_show_func=item_show_count(),
            length=3,
            show_percent=False,
        ) as blocks:
            metadata = pd.concat(blocks, ignore_index=True)

        return metadata