Esempio n. 1
0
    def chunk_to_df(self, exchange_name, symbol, data_frequency, period):

        exchange = get_exchange(exchange_name)
        asset = exchange.get_asset(symbol)

        filename = get_bcolz_chunk(
            exchange_name=exchange_name,
            symbol=symbol,
            data_frequency=data_frequency,
            period=period
        )

        reader = BcolzExchangeBarReader(rootdir=filename,
                                        data_frequency=data_frequency)

        # metadata = BcolzMinuteBarMetadata.read(filename)

        start = reader.first_trading_day
        end = reader.last_available_dt

        if data_frequency == 'daily':
            end = end - pd.Timedelta(hours=23, minutes=59)

        print(start, end, data_frequency)

        arrays = reader.load_raw_arrays(self.columns, start, end,
                                        [asset.sid, ])

        bundle = ExchangeBundle(exchange_name)

        periods = bundle.get_calendar_periods_range(
            start, end, data_frequency
        )

        return get_df_from_arrays(arrays, periods)
Esempio n. 2
0
    def bundle_to_csv(self):
        exchange_name = 'poloniex'
        data_frequency = 'minute'
        period = '2017-01'
        symbol = 'eth_btc'

        exchange = get_exchange(exchange_name)
        asset = exchange.get_asset(symbol)

        path = get_bcolz_chunk(exchange_name=exchange.name,
                               symbol=asset.symbol,
                               data_frequency=data_frequency,
                               period=period)
        self._bundle_to_csv(asset=asset,
                            exchange_name=exchange.name,
                            data_frequency=data_frequency,
                            path=path,
                            filename=period)
        pass
Esempio n. 3
0
    def download_from_catalyst(self, asset, data_frequency, period):
        # Download and extract the bundle
        path = get_bcolz_chunk(exchange_name=self.exchange_name,
                               symbol=asset.symbol,
                               data_frequency=data_frequency,
                               period=period)

        reader = self.get_reader(data_frequency, path=path)
        if reader is None:
            try:
                log.warn('the reader is unable to use bundle: {}, '
                         'deleting it.'.format(path))
                shutil.rmtree(path)

            except Exception as e:
                log.warn('unable to remove temp bundle: {}'.format(e))

            raise TempBundleNotFoundError(path=path)

        start_dt = reader.first_trading_day
        end_dt = reader.last_available_dt

        if data_frequency == 'daily':
            end_dt = end_dt - pd.Timedelta(hours=23, minutes=59)

        arrays = None
        try:
            arrays = reader.load_raw_arrays(
                sids=[asset.sid],
                fields=['open', 'high', 'low', 'close', 'volume'],
                start_dt=start_dt,
                end_dt=end_dt)
        except Exception as e:
            log.warn('skipping ctable for {} from {} to {}: {}'.format(
                asset.symbol, start_dt, end_dt, e))

        if not arrays:
            return reader._rootdir

        periods = self.get_calendar_periods_range(start_dt, end_dt,
                                                  data_frequency)
        return get_df_from_arrays(arrays, periods), reader
Esempio n. 4
0
    def ingest_ctable(self, asset, data_frequency, period,
                      writer, empty_rows_behavior='strip',
                      duplicates_threshold=100, cleanup=False):
        """
        Merge a ctable bundle chunk into the main bundle for the exchange.

        Parameters
        ----------
        asset: TradingPair
        data_frequency: str
        period: str
        writer:
        empty_rows_behavior: str
            Ensure that the bundle does not have any missing data.

        cleanup: bool
            Remove the temp bundle directory after ingestion.

        Returns
        -------
        list[str]
            A list of problems which occurred during ingestion.

        """
        problems = []

        # Download and extract the bundle
        path = get_bcolz_chunk(
            exchange_name=self.exchange_name,
            symbol=asset.symbol,
            data_frequency=data_frequency,
            period=period
        )

        reader = self.get_reader(data_frequency, path=path)
        if reader is None:
            try:
                log.warn('the reader is unable to use bundle: {}, '
                         'deleting it.'.format(path))
                shutil.rmtree(path)

            except Exception as e:
                log.warn('unable to remove temp bundle: {}'.format(e))

            raise TempBundleNotFoundError(path=path)

        start_dt = reader.first_trading_day
        end_dt = reader.last_available_dt

        if data_frequency == 'daily':
            end_dt = end_dt - pd.Timedelta(hours=23, minutes=59)

        arrays = None
        try:
            arrays = reader.load_raw_arrays(
                sids=[asset.sid],
                fields=['open', 'high', 'low', 'close', 'volume'],
                start_dt=start_dt,
                end_dt=end_dt
            )
        except Exception as e:
            log.warn('skipping ctable for {} from {} to {}: {}'.format(
                asset.symbol, start_dt, end_dt, e
            ))

        if not arrays:
            return reader._rootdir

        periods = self.get_calendar_periods_range(
            start_dt, end_dt, data_frequency
        )
        df = get_df_from_arrays(arrays, periods)
        problems += self.ingest_df(
            ohlcv_df=df,
            data_frequency=data_frequency,
            asset=asset,
            writer=writer,
            empty_rows_behavior=empty_rows_behavior,
            duplicates_threshold=duplicates_threshold
        )

        if cleanup:
            log.debug(
                'removing bundle folder following ingestion: {}'.format(
                    reader._rootdir)
            )
            shutil.rmtree(reader._rootdir)

        return filter(partial(is_not, None), problems)