Esempio n. 1
0
    def ingest(self,
               data_frequency,
               include_symbols=None,
               exclude_symbols=None,
               start=None,
               end=None,
               show_progress=True,
               environ=os.environ):
        """

        :param data_frequency:
        :param include_symbols:
        :param exclude_symbols:
        :param start:
        :param end:
        :param show_progress:
        :param environ:
        :return:
        """
        assets = self.get_assets(include_symbols, exclude_symbols)
        start_dt, end_dt = get_adj_dates(start, end, assets, data_frequency)

        for frequency in data_frequency.split(','):
            self.ingest_assets(assets, start_dt, end_dt, frequency,
                               show_progress)
Esempio n. 2
0
    def get_history_window_series(self,
                                  assets,
                                  end_dt,
                                  bar_count,
                                  field,
                                  data_frequency,
                                  reset_reader=False):
        start_dt = get_start_dt(end_dt, bar_count, data_frequency)
        start_dt, end_dt = \
            get_adj_dates(start_dt, end_dt, assets, data_frequency)

        reader = self.get_reader(data_frequency)
        if reset_reader:
            del self._readers[reader._rootdir]
            reader = self.get_reader(data_frequency)

        if reader is None:
            symbols = [asset.symbol.encode('utf-8') for asset in assets]
            raise PricingDataNotLoadedError(
                field=field,
                first_trading_day=min([asset.start_date for asset in assets]),
                exchange=self.exchange.name,
                symbols=symbols,
                symbol_list=','.join(symbols),
                data_frequency=data_frequency)

        for asset in assets:
            asset_start_dt, asset_end_dt = \
                get_adj_dates(start_dt, end_dt, assets, data_frequency)

            in_bundle = range_in_bundle(asset, asset_start_dt, asset_end_dt,
                                        reader)
            if not in_bundle:
                raise PricingDataNotLoadedError(
                    field=field,
                    first_trading_day=asset.start_date,
                    exchange=self.exchange.name,
                    symbols=asset.symbol,
                    symbol_list=asset.symbol,
                    data_frequency=data_frequency)

        series = dict()
        try:
            arrays = reader.load_raw_arrays(
                sids=[asset.sid for asset in assets],
                fields=[field],
                start_dt=start_dt,
                end_dt=end_dt)

        except Exception:
            symbols = [asset.symbol.encode('utf-8') for asset in assets]
            raise PricingDataNotLoadedError(
                field=field,
                first_trading_day=min([asset.start_date for asset in assets]),
                exchange=self.exchange.name,
                symbols=symbols,
                symbol_list=','.join(symbols),
                data_frequency=data_frequency)

        periods = self.get_calendar_periods_range(start_dt, end_dt,
                                                  data_frequency)

        for asset_index, asset in enumerate(assets):
            asset_values = arrays[asset_index]

            value_series = pd.Series(asset_values.flatten(), index=periods)
            series[asset] = value_series

        return series
Esempio n. 3
0
    def prepare_chunks(self, assets, data_frequency, start_dt, end_dt):
        """
        Split a price data request into chunks corresponding to individual
        bundles.

        :param assets:
        :param data_frequency:
        :param start_dt:
        :param end_dt:
        :return:
        """
        reader = self.get_reader(data_frequency)

        chunks = []
        for asset in assets:
            try:
                asset_start, asset_end = \
                    get_adj_dates(start_dt, end_dt, [asset], data_frequency)

            except NoDataAvailableOnExchange:
                continue

            start_dt = max(start_dt, self.calendar.first_trading_session)
            start_dt = max(start_dt, asset_start)

            # Aligning start / end dates with the daily calendar
            sessions = get_periods_range(start_dt, end_dt, data_frequency) \
                if data_frequency == 'minute' \
                else self.calendar.sessions_in_range(start_dt, end_dt)

            if asset_start < sessions[0]:
                asset_start = sessions[0]

            if asset_end > sessions[-1]:
                asset_end = sessions[-1]

            chunk_labels = []
            dt = sessions[0]
            while dt <= sessions[-1]:
                label = '{}-{:02d}'.format(dt.year, dt.month) \
                    if data_frequency == 'minute' else '{}'.format(dt.year)

                if label not in chunk_labels:
                    chunk_labels.append(label)

                    # Adjusting the period dates to match the availability
                    # of the trading pair
                    if data_frequency == 'minute':
                        period_start, period_end = get_month_start_end(dt)
                        asset_start_month, _ = get_month_start_end(asset_start)

                        if asset_start_month == period_start \
                                and period_start < asset_start:
                            period_start = asset_start

                        _, asset_end_month = get_month_start_end(asset_end)
                        if asset_end_month == period_end \
                                and period_end > asset_end:
                            period_end = asset_end

                    elif data_frequency == 'daily':
                        period_start, period_end = get_year_start_end(dt)
                        asset_start_year, _ = get_year_start_end(asset_start)

                        if asset_start_year == period_start \
                                and period_start < asset_start:
                            period_start = asset_start

                        _, asset_end_year = get_year_start_end(asset_end)
                        if asset_end_year == period_end \
                                and period_end > asset_end:
                            period_end = asset_end
                    else:
                        raise InvalidHistoryFrequencyError(
                            frequency=data_frequency)

                    # Currencies don't always start trading at midnight.
                    # Checking the last minute of the day instead.
                    range_start = period_start.replace(hour=23, minute=59) \
                        if data_frequency == 'minute' else period_start
                    has_data = range_in_bundle(asset, range_start, period_end,
                                               reader)

                    if not has_data:
                        log.debug('adding period: {}'.format(label))
                        chunks.append(
                            dict(asset=asset,
                                 period_start=period_start,
                                 period_end=period_end,
                                 period=label))

                dt += timedelta(days=1)

        chunks.sort(key=lambda chunk: chunk['period_end'])

        return chunks