def __iter__(self): offset = self.offset cur = self.start if offset._normalizeFirst: cur = datetools.normalize_date(cur) while cur <= self.end: yield cur cur = cur + offset
def load_ipython_extension(ip): global _loaded if not _loaded: ip.register_magics(MDFMagics) # create the ambient context today = datetools.normalize_date(datetime.now()) ctx = MDFContext(today) ctx._activate_ctx() _loaded = True print("""Use the magic function %mdf_help for a list of commands""")
def mdf_reset(self, parameter_s=""): """ Resets the current mdf context, and optionally sets the current date. %mdf_reset [date] eg: %mdf_reset or: %mdf_reset 2010-01-01 """ if parameter_s: now = _parse_datetime(parameter_s, self.shell.user_global_ns, self.shell.user_ns) else: now = datetools.normalize_date(datetime.now()) ctx = MDFContext(now) ctx._activate_ctx()
def generate_range(start=None, end=None, periods=None, offset=datetools.BDay(), time_rule=None): """ Generates a sequence of dates corresponding to the specified time offset. Similar to dateutil.rrule except uses pandas DateOffset objects to represent time increments Parameters ---------- start : datetime (default None) end : datetime (default None) periods : int, optional Note ---- * This method is faster for generating weekdays than dateutil.rrule * At least two of (start, end, periods) must be specified. * If both start and end are specified, the returned dates will satisfy start <= date <= end. Returns ------- dates : generator object See also -------- DateRange, dateutil.rrule """ if time_rule is not None: offset = datetools.getOffset(time_rule) if time_rule is None: if offset in datetools._offsetNames: time_rule = datetools._offsetNames[offset] start = datetools.to_datetime(start) end = datetools.to_datetime(end) if start and not offset.onOffset(start): start = offset.rollforward(start) if end and not offset.onOffset(end): end = offset.rollback(end) if periods is None and end < start: end = None periods = 0 if end is None: end = start + (periods - 1) * offset if start is None: start = end - (periods - 1) * offset cur = start if offset._normalizeFirst: cur = datetools.normalize_date(cur) while cur <= end: yield cur # faster than cur + offset cur = offset.apply(cur)
def write(self, sid, df): """ Write the OHLCV data for the given sid. If there is no bcolz ctable yet created for the sid, create it. If the length of the bcolz ctable is not exactly to the date before the first day provided, fill the ctable with 0s up to that date. Writes in blocks of the size of the days times minutes per day. Parameters: ----------- sid : int The asset identifer for the data being written. days : pd.DatetimeIndex The days for which to write data from the given df. df : pd.DataFrame DataFrame of market data with the following characteristics. columns : ('open', 'high', 'low', 'close', 'volume') open : float64 high : float64 low : float64 close : float64 volume : float64|int64 index : DatetimeIndex of market minutes. """ table = self._ensure_ctable(sid) last_date = self.last_date_in_output_for_sid(sid) tds = self._trading_days days = tds[tds.slice_indexer(start=normalize_date(df.index[0]), end=normalize_date(df.index[-1]))] input_first_day = days[0] if last_date is pd.NaT: # If there is no data, determine how many days to add so that # desired days are written to the correct slots. days_to_zerofill = tds[tds.slice_indexer(end=input_first_day)] # Chop off the input first day. days_to_zerofill = days_to_zerofill[:-1] else: next_date = last_date + 1 if next_date < input_first_day: # If last_date and input_first_day are not adjacent need to # fill in between. days_to_zerofill = tds[tds.slice_indexer(start=last_date + 1, end=input_first_day)] # Chop off the input first day. days_to_zerofill = days_to_zerofill[:-1] elif next_date > input_first_day: raise BcolzMinuteOverlappingData( dedent(""" window start={0} is before expected write date={1} for sid={2}""".strip()).format(days[0], input_first_day, sid)) else: days_to_zerofill = None if days_to_zerofill is not None and len(days_to_zerofill): self._zerofill(table, len(days_to_zerofill)) days_to_write = tds[tds.slice_indexer(start=input_first_day, end=days[-1])] minutes_count = len(days_to_write) * self._minutes_per_day all_minutes = self._minute_index indexer = all_minutes.slice_indexer(start=days_to_write[0]) all_minutes_in_window = all_minutes[indexer] open_col = np.zeros(minutes_count, dtype=np.uint32) high_col = np.zeros(minutes_count, dtype=np.uint32) low_col = np.zeros(minutes_count, dtype=np.uint32) close_col = np.zeros(minutes_count, dtype=np.uint32) vol_col = np.zeros(minutes_count, dtype=np.uint32) dt_ixs = np.searchsorted(all_minutes_in_window.values, df.index.values) ohlc_ratio = self._ohlc_ratio open_col[dt_ixs] = (df.open.values * ohlc_ratio).astype(np.uint32) high_col[dt_ixs] = (df.high.values * ohlc_ratio).astype(np.uint32) low_col[dt_ixs] = (df.low.values * ohlc_ratio).astype(np.uint32) close_col[dt_ixs] = (df.close.values * ohlc_ratio).astype(np.uint32) vol_col[dt_ixs] = df.volume.values.astype(np.uint32) table.append([open_col, high_col, low_col, close_col, vol_col]) table.flush()
def test_normalize_date(): actual = normalize_date(datetime(2007, 10, 1, 1, 12, 5, 10)) assert actual == datetime(2007, 10, 1)
def write(self, sid, df): """ Write the OHLCV data for the given sid. If there is no bcolz ctable yet created for the sid, create it. If the length of the bcolz ctable is not exactly to the date before the first day provided, fill the ctable with 0s up to that date. Writes in blocks of the size of the days times minutes per day. Parameters: ----------- sid : int The asset identifer for the data being written. days : pd.DatetimeIndex The days for which to write data from the given df. df : pd.DataFrame DataFrame of market data with the following characteristics. columns : ('open', 'high', 'low', 'close', 'volume') open : float64 high : float64 low : float64 close : float64 volume : float64|int64 index : DatetimeIndex of market minutes. """ table = self._ensure_ctable(sid) last_date = self.last_date_in_output_for_sid(sid) tds = self._trading_days days = tds[tds.slice_indexer(start=normalize_date(df.index[0]), end=normalize_date(df.index[-1]))] input_first_day = days[0] if last_date is pd.NaT: # If there is no data, determine how many days to add so that # desired days are written to the correct slots. days_to_zerofill = tds[tds.slice_indexer(end=input_first_day)] # Chop off the input first day. days_to_zerofill = days_to_zerofill[:-1] else: next_date = last_date + 1 if next_date < input_first_day: # If last_date and input_first_day are not adjacent need to # fill in between. days_to_zerofill = tds[tds.slice_indexer( start=last_date + 1, end=input_first_day)] # Chop off the input first day. days_to_zerofill = days_to_zerofill[:-1] elif next_date > input_first_day: raise BcolzMinuteOverlappingData(dedent(""" window start={0} is before expected write date={1} for sid={2}""".strip()).format(days[0], input_first_day, sid)) else: days_to_zerofill = None if days_to_zerofill is not None and len(days_to_zerofill): self._zerofill(table, len(days_to_zerofill)) days_to_write = tds[tds.slice_indexer(start=input_first_day, end=days[-1])] minutes_count = len(days_to_write) * self._minutes_per_day all_minutes = self._minute_index indexer = all_minutes.slice_indexer(start=days_to_write[0]) all_minutes_in_window = all_minutes[indexer] open_col = np.zeros(minutes_count, dtype=np.uint32) high_col = np.zeros(minutes_count, dtype=np.uint32) low_col = np.zeros(minutes_count, dtype=np.uint32) close_col = np.zeros(minutes_count, dtype=np.uint32) vol_col = np.zeros(minutes_count, dtype=np.uint32) dt_ixs = np.searchsorted(all_minutes_in_window.values, df.index.values) ohlc_ratio = self._ohlc_ratio open_col[dt_ixs] = (df.open.values * ohlc_ratio).astype(np.uint32) high_col[dt_ixs] = (df.high.values * ohlc_ratio).astype(np.uint32) low_col[dt_ixs] = (df.low.values * ohlc_ratio).astype(np.uint32) close_col[dt_ixs] = (df.close.values * ohlc_ratio).astype( np.uint32) vol_col[dt_ixs] = df.volume.values.astype(np.uint32) table.append([ open_col, high_col, low_col, close_col, vol_col ]) table.flush()
def write(self, sid, df): """ Write the OHLCV data for the given sid. If there is no bcolz ctable yet created for the sid, create it. If the length of the bcolz ctable is not exactly to the date before the first day provided, fill the ctable with 0s up to that date. Writes in blocks of the size of the days times minutes per day. Parameters: ----------- sid : int The asset identifer for the data being written. days : pd.DatetimeIndex The days for which to write data from the given df. df : pd.DataFrame DataFrame of market data with the following characteristics. columns : ('open', 'high', 'low', 'close', 'volume') open : float64 high : float64 low : float64 close : float64 volume : float64|int64 index : DatetimeIndex of market minutes. """ table = self._ensure_ctable(sid) tds = self._trading_days input_first_day = normalize_date(df.index[0]) input_last_day = normalize_date(df.index[-1]) last_date = self.last_date_in_output_for_sid(sid) if last_date >= input_first_day: raise BcolzMinuteOverlappingData(dedent(""" Data with last_date={0} already includes input start={1} for sid={2}""".strip()).format(last_date, input_first_day, sid)) day_before_input = input_first_day - tds.freq self.pad(sid, day_before_input) table = self._ensure_ctable(sid) days_to_write = tds[tds.slice_indexer(start=input_first_day, end=input_last_day)] minutes_count = len(days_to_write) * self._minutes_per_day all_minutes = self._minute_index indexer = all_minutes.slice_indexer(start=days_to_write[0]) all_minutes_in_window = all_minutes[indexer] open_col = np.zeros(minutes_count, dtype=np.uint32) high_col = np.zeros(minutes_count, dtype=np.uint32) low_col = np.zeros(minutes_count, dtype=np.uint32) close_col = np.zeros(minutes_count, dtype=np.uint32) vol_col = np.zeros(minutes_count, dtype=np.uint32) dt_ixs = np.searchsorted(all_minutes_in_window.values, df.index.values) ohlc_ratio = self._ohlc_ratio open_col[dt_ixs] = (df.open.values * ohlc_ratio).astype(np.uint32) high_col[dt_ixs] = (df.high.values * ohlc_ratio).astype(np.uint32) low_col[dt_ixs] = (df.low.values * ohlc_ratio).astype(np.uint32) close_col[dt_ixs] = (df.close.values * ohlc_ratio).astype( np.uint32) vol_col[dt_ixs] = df.volume.values.astype(np.uint32) table.append([ open_col, high_col, low_col, close_col, vol_col ]) table.flush()
def __new__( cls, data=None, freq=None, start=None, end=None, periods=None, dtype=None, copy=False, name=None, tz=None, verify_integrity=True, normalize=False, **kwds ): warn = False if "offset" in kwds and kwds["offset"]: freq = kwds["offset"] warn = True if not isinstance(freq, datetools.DateOffset): freq = datetools.to_offset(freq) if warn: import warnings warnings.warn("parameter 'offset' is deprecated, " "please use 'freq' instead", FutureWarning) if isinstance(freq, basestring): freq = datetools.get_offset(freq) else: if isinstance(freq, basestring): freq = datetools.to_offset(freq) offset = freq if data is None and offset is None: raise ValueError("Must provide freq argument if no data is " "supplied") if data is None: _normalized = True if start is not None: start = Timestamp(start) if not isinstance(start, Timestamp): raise ValueError("Failed to convert %s to timestamp" % start) if normalize: start = datetools.normalize_date(start) _normalized = True else: _normalized = _normalized and start.time() == _midnight if end is not None: end = Timestamp(end) if not isinstance(end, Timestamp): raise ValueError("Failed to convert %s to timestamp" % end) if normalize: end = datetools.normalize_date(end) _normalized = True else: _normalized = _normalized and end.time() == _midnight start, end, tz = tools._figure_out_timezone(start, end, tz) if ( offset._should_cache() and not (offset._normalize_cache and not _normalized) and datetools._naive_in_cache_range(start, end) ): index = cls._cached_range(start, end, periods=periods, offset=offset, name=name) else: index = _generate_regular_range(start, end, periods, offset) index = index.view(cls) index.name = name index.offset = offset index.tz = tz return index if not isinstance(data, np.ndarray): if np.isscalar(data): raise ValueError( "DatetimeIndex() must be called with a " "collection of some kind, %s was passed" % repr(data) ) if isinstance(data, datetime): data = [data] # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) data = np.asarray(data, dtype="O") # try a few ways to make it datetime64 if lib.is_string_array(data): data = _str_to_dt_array(data) else: data = np.asarray(data, dtype="M8[us]") if issubclass(data.dtype.type, basestring): subarr = _str_to_dt_array(data) elif issubclass(data.dtype.type, np.integer): subarr = np.array(data, dtype="M8[us]", copy=copy) elif issubclass(data.dtype.type, np.datetime64): subarr = np.array(data, dtype="M8[us]", copy=copy) else: subarr = np.array(data, dtype="M8[us]", copy=copy) # TODO: this is horribly inefficient. If user passes data + offset, we # need to make sure data points conform. Punting on this if verify_integrity: if offset is not None: for i, ts in enumerate(subarr): if not offset.onOffset(Timestamp(ts)): val = Timestamp(offset.rollforward(ts)).value subarr[i] = val subarr = subarr.view(cls) subarr.name = name subarr.offset = offset subarr.tz = tz return subarr