def _index_from_records(self, recarr): index = recarr.dtype.metadata['index'] if len(index) == 1: rtn = Index(np.copy(recarr[str(index[0])]), name=index[0]) if isinstance( rtn, DatetimeIndex) and 'index_tz' in recarr.dtype.metadata: rtn = rtn.tz_localize('UTC').tz_convert( recarr.dtype.metadata['index_tz']) else: level_arrays = [] index_tz = recarr.dtype.metadata.get('index_tz', []) for level_no, index_name in enumerate(index): # build each index level separately to ensure we end up with the right index dtype level = Index(np.copy(recarr[str(index_name)])) if level_no < len(index_tz): tz = index_tz[level_no] if tz is not None: if not isinstance(level, DatetimeIndex) and len(level) == 0: # index type information got lost during save as the index was empty, cast back level = DatetimeIndex([], tz=tz) else: level = level.tz_localize('UTC').tz_convert(tz) level_arrays.append(level) rtn = MultiIndex.from_arrays(level_arrays, names=index) return rtn
def test_where_invalid_dtypes(self): dti = pd.date_range("20130101", periods=3, tz="US/Eastern") i2 = Index([pd.NaT, pd.NaT] + dti[2:].tolist()) with pytest.raises(TypeError, match="Where requires matching dtype"): # passing tz-naive ndarray to tzaware DTI dti.where(notna(i2), i2.values) with pytest.raises(TypeError, match="Where requires matching dtype"): # passing tz-aware DTI to tznaive DTI dti.tz_localize(None).where(notna(i2), i2) with pytest.raises(TypeError, match="Where requires matching dtype"): dti.where(notna(i2), i2.tz_localize(None).to_period("D")) with pytest.raises(TypeError, match="Where requires matching dtype"): dti.where(notna(i2), i2.asi8.view("timedelta64[ns]")) with pytest.raises(TypeError, match="Where requires matching dtype"): dti.where(notna(i2), i2.asi8) with pytest.raises(TypeError, match="Where requires matching dtype"): # non-matching scalar dti.where(notna(i2), pd.Timedelta(days=4))
def test_where_invalid_dtypes(self): dti = date_range("20130101", periods=3, tz="US/Eastern") i2 = Index([pd.NaT, pd.NaT] + dti[2:].tolist()) msg = "value should be a 'Timestamp', 'NaT', or array of those. Got" msg2 = "Cannot compare tz-naive and tz-aware datetime-like objects" with pytest.raises(TypeError, match=msg2): # passing tz-naive ndarray to tzaware DTI dti.where(notna(i2), i2.values) with pytest.raises(TypeError, match=msg2): # passing tz-aware DTI to tznaive DTI dti.tz_localize(None).where(notna(i2), i2) with pytest.raises(TypeError, match=msg): dti.where(notna(i2), i2.tz_localize(None).to_period("D")) with pytest.raises(TypeError, match=msg): dti.where(notna(i2), i2.asi8.view("timedelta64[ns]")) with pytest.raises(TypeError, match=msg): dti.where(notna(i2), i2.asi8) with pytest.raises(TypeError, match=msg): # non-matching scalar dti.where(notna(i2), pd.Timedelta(days=4))
def _index_from_records(self, recarr): index = recarr.dtype.metadata['index'] if len(index) == 1: rtn = Index(np.copy(recarr[str(index[0])]), name=index[0]) if isinstance(rtn, DatetimeIndex) and 'index_tz' in recarr.dtype.metadata: rtn = rtn.tz_localize('UTC').tz_convert(recarr.dtype.metadata['index_tz']) else: level_arrays = [] index_tz = recarr.dtype.metadata.get('index_tz', []) for level_no, index_name in enumerate(index): # build each index level separately to ensure we end up with the right index dtype level = Index(np.copy(recarr[str(index_name)])) if level_no < len(index_tz): tz = index_tz[level_no] if tz is not None: if not isinstance(level, DatetimeIndex) and len(level) == 0: # index type information got lost during save as the index was empty, cast back level = DatetimeIndex([], tz=tz) else: level = level.tz_localize('UTC').tz_convert(tz) level_arrays.append(level) rtn = MultiIndex.from_arrays(level_arrays, names=index) return rtn
def getMinutelyQuotes(self, symbol, market, index): days = abs((index[index.shape[0] - 1] - index[0]).days) freq = int(index.freqstr[0]) if index.freqstr[1] == 'S': freq += 1 elif index.freqstr[1] == 'T': freq *= 61 elif index.freqstr[1] == 'H': freq *= 3601 else: log.error('** No suitable time frequency: {}'.format( index.freqstr)) return None url = 'http://www.google.com/finance/getprices?q=%s&x=%s&p=%sd&i=%s' \ % (symbol, market, str(days), str(freq + 1)) log.info('On %d days with a precision of %d secs' % (days, freq)) try: page = urllib2.urlopen(url) except urllib2.HTTPError: log.error('** Unable to fetch data for stock: %s'.format(symbol)) return None except urllib2.URLError: log.error('** URL error for stock: %s'.format(symbol)) return None feed = '' data = [] while (re.search('^a', feed) is None): feed = page.readline() while (feed != ''): data.append( np.array(map(float, feed[:-1].replace('a', '').split(',')))) feed = page.readline() dates, open, close, high, low, volume = zip(*data) adj_close = np.empty(len(close)) adj_close.fill(np.NaN) data = { 'open': open, 'close': close, 'high': high, 'low': low, 'volume': volume, 'adj_close': adj_close # for compatibility with Fields.QUOTES } #NOTE use here index ? dates = Index(epochToDate(d) for d in dates) return DataFrame(data, index=dates.tz_localize(self.tz))
def getMinutelyQuotes(self, symbol, market, index): days = abs((index[index.shape[0] - 1] - index[0]).days) freq = int(index.freqstr[0]) if index.freqstr[1] == 'S': freq += 1 elif index.freqstr[1] == 'T': freq *= 61 elif index.freqstr[1] == 'H': freq *= 3601 else: log.error('** No suitable time frequency: {}'.format(index.freqstr)) return None url = 'http://www.google.com/finance/getprices?q=%s&x=%s&p=%sd&i=%s' \ % (symbol, market, str(days), str(freq + 1)) log.info('On %d days with a precision of %d secs' % (days, freq)) try: page = urllib2.urlopen(url) except urllib2.HTTPError: log.error('** Unable to fetch data for stock: %s'.format(symbol)) return None except urllib2.URLError: log.error('** URL error for stock: %s'.format(symbol)) return None feed = '' data = [] while (re.search('^a', feed) is None): feed = page.readline() while (feed != ''): data.append(np.array(map(float, feed[:-1].replace('a', '').split(',')))) feed = page.readline() dates, open, close, high, low, volume = zip(*data) adj_close = np.empty(len(close)) adj_close.fill(np.NaN) data = { 'open' : open, 'close' : close, 'high' : high, 'low' : low, 'volume' : volume, 'adj_close' : adj_close # for compatibility with Fields.QUOTES } #NOTE use here index ? dates = Index(epochToDate(d) for d in dates) return DataFrame(data, index=dates.tz_localize(self.tz))
def test_where_invalid_dtypes(self): dti = date_range("20130101", periods=3, tz="US/Eastern") tail = dti[2:].tolist() i2 = Index([pd.NaT, pd.NaT] + tail) mask = notna(i2) # passing tz-naive ndarray to tzaware DTI result = dti.where(mask, i2.values) expected = Index([pd.NaT.asm8, pd.NaT.asm8] + tail, dtype=object) tm.assert_index_equal(result, expected) # passing tz-aware DTI to tznaive DTI naive = dti.tz_localize(None) result = naive.where(mask, i2) expected = Index([i2[0], i2[1]] + naive[2:].tolist(), dtype=object) tm.assert_index_equal(result, expected) pi = i2.tz_localize(None).to_period("D") result = dti.where(mask, pi) expected = Index([pi[0], pi[1]] + tail, dtype=object) tm.assert_index_equal(result, expected) tda = i2.asi8.view("timedelta64[ns]") result = dti.where(mask, tda) expected = Index([tda[0], tda[1]] + tail, dtype=object) assert isinstance(expected[0], np.timedelta64) tm.assert_index_equal(result, expected) result = dti.where(mask, i2.asi8) expected = Index([pd.NaT.value, pd.NaT.value] + tail, dtype=object) assert isinstance(expected[0], int) tm.assert_index_equal(result, expected) # non-matching scalar td = pd.Timedelta(days=4) result = dti.where(mask, td) expected = Index([td, td] + tail, dtype=object) assert expected[0] is td tm.assert_index_equal(result, expected)
def _convert_listlike_datetimes( arg, box, format, name=None, tz=None, unit=None, errors=None, infer_datetime_format=None, dayfirst=None, yearfirst=None, exact=None, ): """ Helper function for to_datetime. Performs the conversions of 1D listlike of dates Parameters ---------- arg : list, tuple, ndarray, Series, Index date to be parced box : boolean True boxes result as an Index-like, False returns an ndarray name : object None or string for the Index name tz : object None or 'utc' unit : string None or string of the frequency of the passed data errors : string error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore' infer_datetime_format : boolean inferring format behavior from to_datetime dayfirst : boolean dayfirst parsing behavior from to_datetime yearfirst : boolean yearfirst parsing behavior from to_datetime exact : boolean exact format matching behavior from to_datetime Returns ------- ndarray of parsed dates Returns: - Index-like if box=True - ndarray of Timestamps if box=False """ from pandas import DatetimeIndex from pandas.core.arrays import DatetimeArray from pandas.core.arrays.datetimes import ( maybe_convert_dtype, objects_to_datetime64ns, ) if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype="O") # these are shortcutable if is_datetime64tz_dtype(arg): if not isinstance(arg, (DatetimeArray, DatetimeIndex)): return DatetimeIndex(arg, tz=tz, name=name) if tz == "utc": arg = arg.tz_convert(None).tz_localize(tz) return arg elif is_datetime64_ns_dtype(arg): if box and not isinstance(arg, (DatetimeArray, DatetimeIndex)): try: return DatetimeIndex(arg, tz=tz, name=name) except ValueError: pass return arg elif unit is not None: if format is not None: raise ValueError("cannot specify both format and unit") arg = getattr(arg, "values", arg) result, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) if box: if errors == "ignore": from pandas import Index result = Index(result, name=name) else: result = DatetimeIndex(result, name=name) # GH 23758: We may still need to localize the result with tz # GH 25546: Apply tz_parsed first (from arg), then tz (from caller) # result will be naive but in UTC try: result = result.tz_localize("UTC").tz_convert(tz_parsed) except AttributeError: # Regular Index from 'ignore' path return result if tz is not None: if result.tz is None: result = result.tz_localize(tz) else: result = result.tz_convert(tz) return result elif getattr(arg, "ndim", 1) > 1: raise TypeError( "arg must be a string, datetime, list, tuple, 1-d array, or Series" ) # warn if passing timedelta64, raise for PeriodDtype # NB: this must come after unit transformation orig_arg = arg arg, _ = maybe_convert_dtype(arg, copy=False) arg = ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = _format_is_iso(format) if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None tz_parsed = None result = None if format is not None: try: # shortcut formatting here if format == "%Y%m%d": try: # pass orig_arg as float-dtype may have been converted to # datetime64[ns] orig_arg = ensure_object(orig_arg) result = _attempt_YYYYMMDD(orig_arg, errors=errors) except (ValueError, TypeError, tslibs.OutOfBoundsDatetime): raise ValueError( "cannot convert the input to '%Y%m%d' date format") # fallback if result is None: try: result, timezones = array_strptime(arg, format, exact=exact, errors=errors) if "%Z" in format or "%z" in format: return _return_parsed_timezone_results( result, timezones, box, tz, name) except tslibs.OutOfBoundsDatetime: if errors == "raise": raise elif errors == "coerce": result = np.empty(arg.shape, dtype="M8[ns]") iresult = result.view("i8") iresult.fill(tslibs.iNaT) else: result = arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here # for specified formats if not infer_datetime_format: if errors == "raise": raise elif errors == "coerce": result = np.empty(arg.shape, dtype="M8[ns]") iresult = result.view("i8") iresult.fill(tslibs.iNaT) else: result = arg except ValueError as e: # Fallback to try to convert datetime objects if timezone-aware # datetime objects are found without passing `utc=True` try: values, tz = conversion.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, name=name, tz=tz) except (ValueError, TypeError): raise e if result is None: assert format is None or infer_datetime_format utc = tz == "utc" result, tz_parsed = objects_to_datetime64ns( arg, dayfirst=dayfirst, yearfirst=yearfirst, utc=utc, errors=errors, require_iso8601=require_iso8601, allow_object=True, ) if tz_parsed is not None: if box: # We can take a shortcut since the datetime64 numpy array # is in UTC return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed) else: # Convert the datetime64 numpy array to an numpy array # of datetime objects result = [ Timestamp(ts, tz=tz_parsed).to_pydatetime() for ts in result ] return np.array(result, dtype=object) if box: utc = tz == "utc" return _box_as_indexlike(result, utc=utc, name=name) return result
def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, unit=None, errors=None, infer_datetime_format=None, dayfirst=None, yearfirst=None, exact=None): """ Helper function for to_datetime. Performs the conversions of 1D listlike of dates Parameters ---------- arg : list, tuple, ndarray, Series, Index date to be parced box : boolean True boxes result as an Index-like, False returns an ndarray name : object None or string for the Index name tz : object None or 'utc' unit : string None or string of the frequency of the passed data errors : string error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore' infer_datetime_format : boolean inferring format behavior from to_datetime dayfirst : boolean dayfirst parsing behavior from to_datetime yearfirst : boolean yearfirst parsing behavior from to_datetime exact : boolean exact format matching behavior from to_datetime Returns ------- ndarray of parsed dates Returns: - Index-like if box=True - ndarray of Timestamps if box=False """ from pandas import DatetimeIndex from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray from pandas.core.arrays.datetimes import ( maybe_convert_dtype, objects_to_datetime64ns) if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') # these are shortcutable if is_datetime64tz_dtype(arg): if not isinstance(arg, (DatetimeArray, DatetimeIndex)): return DatetimeIndex(arg, tz=tz, name=name) if tz == 'utc': arg = arg.tz_convert(None).tz_localize(tz) return arg elif is_datetime64_ns_dtype(arg): if box and not isinstance(arg, (DatetimeArray, DatetimeIndex)): try: return DatetimeIndex(arg, tz=tz, name=name) except ValueError: pass return arg elif unit is not None: if format is not None: raise ValueError("cannot specify both format and unit") arg = getattr(arg, 'values', arg) result = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) if box: if errors == 'ignore': from pandas import Index result = Index(result, name=name) # GH 23758: We may still need to localize the result with tz try: return result.tz_localize(tz) except AttributeError: return result return DatetimeIndex(result, tz=tz, name=name) return result elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') # warn if passing timedelta64, raise for PeriodDtype # NB: this must come after unit transformation orig_arg = arg arg, _ = maybe_convert_dtype(arg, copy=False) arg = ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = _format_is_iso(format) if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None tz_parsed = None result = None if format is not None: try: # shortcut formatting here if format == '%Y%m%d': try: # pass orig_arg as float-dtype may have been converted to # datetime64[ns] orig_arg = ensure_object(orig_arg) result = _attempt_YYYYMMDD(orig_arg, errors=errors) except (ValueError, TypeError, tslibs.OutOfBoundsDatetime): raise ValueError("cannot convert the input to " "'%Y%m%d' date format") # fallback if result is None: try: result, timezones = array_strptime( arg, format, exact=exact, errors=errors) if '%Z' in format or '%z' in format: return _return_parsed_timezone_results( result, timezones, box, tz, name) except tslibs.OutOfBoundsDatetime: if errors == 'raise': raise result = arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here # for specified formats if not infer_datetime_format: if errors == 'raise': raise result = arg except ValueError as e: # Fallback to try to convert datetime objects if timezone-aware # datetime objects are found without passing `utc=True` try: values, tz = conversion.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, name=name, tz=tz) except (ValueError, TypeError): raise e if result is None: assert format is None or infer_datetime_format utc = tz == 'utc' result, tz_parsed = objects_to_datetime64ns( arg, dayfirst=dayfirst, yearfirst=yearfirst, utc=utc, errors=errors, require_iso8601=require_iso8601, allow_object=True) if tz_parsed is not None: if box: # We can take a shortcut since the datetime64 numpy array # is in UTC return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed) else: # Convert the datetime64 numpy array to an numpy array # of datetime objects result = [Timestamp(ts, tz=tz_parsed).to_pydatetime() for ts in result] return np.array(result, dtype=object) if box: # Ensure we return an Index in all cases where box=True if is_datetime64_dtype(result): return DatetimeIndex(result, tz=tz, name=name) elif is_object_dtype(result): # e.g. an Index of datetime objects from pandas import Index return Index(result, name=name) return result