def select_random_exchanges(population=3, features=None, is_authenticated=False, base_currency=None): all_exchanges = find_exchanges( features=features, is_authenticated=is_authenticated, base_currency=base_currency, ) if population is not None: if len(all_exchanges) < population: population = len(all_exchanges) exchanges = random.sample(all_exchanges, population) else: exchanges = all_exchanges return exchanges
def lifetimes(self, dates, include_start_date): """ Compute a DataFrame representing asset lifetimes for the specified date range. Parameters ---------- dates : pd.DatetimeIndex The dates for which to compute lifetimes. include_start_date : bool Whether or not to count the asset as alive on its start_date. This is useful in a backtesting context where `lifetimes` is being used to signify "do I have data for this asset as of the morning of this date?" For many financial metrics, (e.g. daily close), data isn't available for an asset until the end of the asset's first day. Returns ------- lifetimes : pd.DataFrame A frame of dtype bool with `dates` as index and an Int64Index of assets as columns. The value at `lifetimes.loc[date, asset]` will be True iff `asset` existed on `date`. If `include_start_date` is False, then lifetimes.loc[date, asset] will be false when date == asset.start_date. See Also -------- numpy.putmask catalyst.pipeline.engine.SimplePipelineEngine._compute_root_mask """ exchanges = find_exchanges(features=['minuteBundle']) if not exchanges: raise ValueError('exchange with minute bundles not found') # TODO: find a way to support multiple exchanges exchange = exchanges[0] # Using a single exchange for now because are not unique for the # same asset in different exchanges. I'd like to avoid binding # pipeline to a single exchange. exchange.init() data = [] for dt in dates: exists = [] for asset in exchange.assets: if include_start_date: condition = (asset.start_date <= dt < asset.end_minute) else: condition = (asset.start_date < dt < asset.end_minute) exists.append(condition) data.append(exists) sids = [asset.sid for asset in exchange.assets] df = pd.DataFrame(data, index=dates, columns=exchange.assets) return df