예제 #1
0
    def _index_from_records(self, recarr):
        index = recarr.dtype.metadata['index']

        if len(index) == 1:
            rtn = Index(np.copy(recarr[str(index[0])]), name=index[0])
            if isinstance(
                    rtn,
                    DatetimeIndex) and 'index_tz' in recarr.dtype.metadata:
                rtn = rtn.tz_localize('UTC').tz_convert(
                    recarr.dtype.metadata['index_tz'])
        else:
            level_arrays = []
            index_tz = recarr.dtype.metadata.get('index_tz', [])
            for level_no, index_name in enumerate(index):
                # build each index level separately to ensure we end up with the right index dtype
                level = Index(np.copy(recarr[str(index_name)]))
                if level_no < len(index_tz):
                    tz = index_tz[level_no]
                    if tz is not None:
                        if not isinstance(level,
                                          DatetimeIndex) and len(level) == 0:
                            # index type information got lost during save as the index was empty, cast back
                            level = DatetimeIndex([], tz=tz)
                        else:
                            level = level.tz_localize('UTC').tz_convert(tz)
                level_arrays.append(level)
            rtn = MultiIndex.from_arrays(level_arrays, names=index)
        return rtn
예제 #2
0
    def test_where_invalid_dtypes(self):
        dti = pd.date_range("20130101", periods=3, tz="US/Eastern")

        i2 = Index([pd.NaT, pd.NaT] + dti[2:].tolist())

        with pytest.raises(TypeError, match="Where requires matching dtype"):
            # passing tz-naive ndarray to tzaware DTI
            dti.where(notna(i2), i2.values)

        with pytest.raises(TypeError, match="Where requires matching dtype"):
            # passing tz-aware DTI to tznaive DTI
            dti.tz_localize(None).where(notna(i2), i2)

        with pytest.raises(TypeError, match="Where requires matching dtype"):
            dti.where(notna(i2), i2.tz_localize(None).to_period("D"))

        with pytest.raises(TypeError, match="Where requires matching dtype"):
            dti.where(notna(i2), i2.asi8.view("timedelta64[ns]"))

        with pytest.raises(TypeError, match="Where requires matching dtype"):
            dti.where(notna(i2), i2.asi8)

        with pytest.raises(TypeError, match="Where requires matching dtype"):
            # non-matching scalar
            dti.where(notna(i2), pd.Timedelta(days=4))
예제 #3
0
    def test_where_invalid_dtypes(self):
        dti = date_range("20130101", periods=3, tz="US/Eastern")

        i2 = Index([pd.NaT, pd.NaT] + dti[2:].tolist())

        msg = "value should be a 'Timestamp', 'NaT', or array of those. Got"
        msg2 = "Cannot compare tz-naive and tz-aware datetime-like objects"
        with pytest.raises(TypeError, match=msg2):
            # passing tz-naive ndarray to tzaware DTI
            dti.where(notna(i2), i2.values)

        with pytest.raises(TypeError, match=msg2):
            # passing tz-aware DTI to tznaive DTI
            dti.tz_localize(None).where(notna(i2), i2)

        with pytest.raises(TypeError, match=msg):
            dti.where(notna(i2), i2.tz_localize(None).to_period("D"))

        with pytest.raises(TypeError, match=msg):
            dti.where(notna(i2), i2.asi8.view("timedelta64[ns]"))

        with pytest.raises(TypeError, match=msg):
            dti.where(notna(i2), i2.asi8)

        with pytest.raises(TypeError, match=msg):
            # non-matching scalar
            dti.where(notna(i2), pd.Timedelta(days=4))
예제 #4
0
    def _index_from_records(self, recarr):
        index = recarr.dtype.metadata['index']

        if len(index) == 1:
            rtn = Index(np.copy(recarr[str(index[0])]), name=index[0])
            if isinstance(rtn, DatetimeIndex) and 'index_tz' in recarr.dtype.metadata:
                rtn = rtn.tz_localize('UTC').tz_convert(recarr.dtype.metadata['index_tz'])
        else:
            level_arrays = []
            index_tz = recarr.dtype.metadata.get('index_tz', [])
            for level_no, index_name in enumerate(index):
                # build each index level separately to ensure we end up with the right index dtype
                level = Index(np.copy(recarr[str(index_name)]))
                if level_no < len(index_tz):
                    tz = index_tz[level_no]
                    if tz is not None:
                        if not isinstance(level, DatetimeIndex) and len(level) == 0:
                            # index type information got lost during save as the index was empty, cast back
                            level = DatetimeIndex([], tz=tz)
                        else:
                            level = level.tz_localize('UTC').tz_convert(tz)
                level_arrays.append(level)
            rtn = MultiIndex.from_arrays(level_arrays, names=index)
        return rtn
예제 #5
0
 def getMinutelyQuotes(self, symbol, market, index):
     days = abs((index[index.shape[0] - 1] - index[0]).days)
     freq = int(index.freqstr[0])
     if index.freqstr[1] == 'S':
         freq += 1
     elif index.freqstr[1] == 'T':
         freq *= 61
     elif index.freqstr[1] == 'H':
         freq *= 3601
     else:
         log.error('** No suitable time frequency: {}'.format(
             index.freqstr))
         return None
     url = 'http://www.google.com/finance/getprices?q=%s&x=%s&p=%sd&i=%s' \
             % (symbol, market, str(days), str(freq + 1))
     log.info('On %d days with a precision of %d secs' % (days, freq))
     try:
         page = urllib2.urlopen(url)
     except urllib2.HTTPError:
         log.error('** Unable to fetch data for stock: %s'.format(symbol))
         return None
     except urllib2.URLError:
         log.error('** URL error for stock: %s'.format(symbol))
         return None
     feed = ''
     data = []
     while (re.search('^a', feed) is None):
         feed = page.readline()
     while (feed != ''):
         data.append(
             np.array(map(float, feed[:-1].replace('a', '').split(','))))
         feed = page.readline()
     dates, open, close, high, low, volume = zip(*data)
     adj_close = np.empty(len(close))
     adj_close.fill(np.NaN)
     data = {
         'open': open,
         'close': close,
         'high': high,
         'low': low,
         'volume': volume,
         'adj_close': adj_close  # for compatibility with Fields.QUOTES
     }
     #NOTE use here index ?
     dates = Index(epochToDate(d) for d in dates)
     return DataFrame(data, index=dates.tz_localize(self.tz))
예제 #6
0
 def getMinutelyQuotes(self, symbol, market, index):
     days = abs((index[index.shape[0] - 1] - index[0]).days)
     freq = int(index.freqstr[0])
     if index.freqstr[1] == 'S':
         freq += 1
     elif index.freqstr[1] == 'T':
         freq *= 61
     elif index.freqstr[1] == 'H':
         freq *= 3601
     else:
         log.error('** No suitable time frequency: {}'.format(index.freqstr))
         return None
     url = 'http://www.google.com/finance/getprices?q=%s&x=%s&p=%sd&i=%s' \
             % (symbol, market, str(days), str(freq + 1))
     log.info('On %d days with a precision of %d secs' % (days, freq))
     try:
         page = urllib2.urlopen(url)
     except urllib2.HTTPError:
         log.error('** Unable to fetch data for stock: %s'.format(symbol))
         return None
     except urllib2.URLError:
         log.error('** URL error for stock: %s'.format(symbol))
         return None
     feed = ''
     data = []
     while (re.search('^a', feed) is None):
         feed = page.readline()
     while (feed != ''):
         data.append(np.array(map(float, feed[:-1].replace('a', '').split(','))))
         feed = page.readline()
     dates, open, close, high, low, volume = zip(*data)
     adj_close = np.empty(len(close))
     adj_close.fill(np.NaN)
     data = {
             'open'      : open,
             'close'     : close,
             'high'      : high,
             'low'       : low,
             'volume'    : volume,
             'adj_close' : adj_close  # for compatibility with Fields.QUOTES
     }
     #NOTE use here index ?
     dates = Index(epochToDate(d) for d in dates)
     return DataFrame(data, index=dates.tz_localize(self.tz))
예제 #7
0
    def test_where_invalid_dtypes(self):
        dti = date_range("20130101", periods=3, tz="US/Eastern")

        tail = dti[2:].tolist()
        i2 = Index([pd.NaT, pd.NaT] + tail)

        mask = notna(i2)

        # passing tz-naive ndarray to tzaware DTI
        result = dti.where(mask, i2.values)
        expected = Index([pd.NaT.asm8, pd.NaT.asm8] + tail, dtype=object)
        tm.assert_index_equal(result, expected)

        # passing tz-aware DTI to tznaive DTI
        naive = dti.tz_localize(None)
        result = naive.where(mask, i2)
        expected = Index([i2[0], i2[1]] + naive[2:].tolist(), dtype=object)
        tm.assert_index_equal(result, expected)

        pi = i2.tz_localize(None).to_period("D")
        result = dti.where(mask, pi)
        expected = Index([pi[0], pi[1]] + tail, dtype=object)
        tm.assert_index_equal(result, expected)

        tda = i2.asi8.view("timedelta64[ns]")
        result = dti.where(mask, tda)
        expected = Index([tda[0], tda[1]] + tail, dtype=object)
        assert isinstance(expected[0], np.timedelta64)
        tm.assert_index_equal(result, expected)

        result = dti.where(mask, i2.asi8)
        expected = Index([pd.NaT.value, pd.NaT.value] + tail, dtype=object)
        assert isinstance(expected[0], int)
        tm.assert_index_equal(result, expected)

        # non-matching scalar
        td = pd.Timedelta(days=4)
        result = dti.where(mask, td)
        expected = Index([td, td] + tail, dtype=object)
        assert expected[0] is td
        tm.assert_index_equal(result, expected)
예제 #8
0
def _convert_listlike_datetimes(
    arg,
    box,
    format,
    name=None,
    tz=None,
    unit=None,
    errors=None,
    infer_datetime_format=None,
    dayfirst=None,
    yearfirst=None,
    exact=None,
):
    """
    Helper function for to_datetime. Performs the conversions of 1D listlike
    of dates

    Parameters
    ----------
    arg : list, tuple, ndarray, Series, Index
        date to be parced
    box : boolean
        True boxes result as an Index-like, False returns an ndarray
    name : object
        None or string for the Index name
    tz : object
        None or 'utc'
    unit : string
        None or string of the frequency of the passed data
    errors : string
        error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'
    infer_datetime_format : boolean
        inferring format behavior from to_datetime
    dayfirst : boolean
        dayfirst parsing behavior from to_datetime
    yearfirst : boolean
        yearfirst parsing behavior from to_datetime
    exact : boolean
        exact format matching behavior from to_datetime

    Returns
    -------
    ndarray of parsed dates
        Returns:

        - Index-like if box=True
        - ndarray of Timestamps if box=False
    """
    from pandas import DatetimeIndex
    from pandas.core.arrays import DatetimeArray
    from pandas.core.arrays.datetimes import (
        maybe_convert_dtype,
        objects_to_datetime64ns,
    )

    if isinstance(arg, (list, tuple)):
        arg = np.array(arg, dtype="O")

    # these are shortcutable
    if is_datetime64tz_dtype(arg):
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            return DatetimeIndex(arg, tz=tz, name=name)
        if tz == "utc":
            arg = arg.tz_convert(None).tz_localize(tz)
        return arg

    elif is_datetime64_ns_dtype(arg):
        if box and not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            try:
                return DatetimeIndex(arg, tz=tz, name=name)
            except ValueError:
                pass

        return arg

    elif unit is not None:
        if format is not None:
            raise ValueError("cannot specify both format and unit")
        arg = getattr(arg, "values", arg)
        result, tz_parsed = tslib.array_with_unit_to_datetime(arg,
                                                              unit,
                                                              errors=errors)
        if box:
            if errors == "ignore":
                from pandas import Index

                result = Index(result, name=name)
            else:
                result = DatetimeIndex(result, name=name)
            # GH 23758: We may still need to localize the result with tz
            # GH 25546: Apply tz_parsed first (from arg), then tz (from caller)
            # result will be naive but in UTC
            try:
                result = result.tz_localize("UTC").tz_convert(tz_parsed)
            except AttributeError:
                # Regular Index from 'ignore' path
                return result
            if tz is not None:
                if result.tz is None:
                    result = result.tz_localize(tz)
                else:
                    result = result.tz_convert(tz)
        return result
    elif getattr(arg, "ndim", 1) > 1:
        raise TypeError(
            "arg must be a string, datetime, list, tuple, 1-d array, or Series"
        )

    # warn if passing timedelta64, raise for PeriodDtype
    # NB: this must come after unit transformation
    orig_arg = arg
    arg, _ = maybe_convert_dtype(arg, copy=False)

    arg = ensure_object(arg)
    require_iso8601 = False

    if infer_datetime_format and format is None:
        format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

    if format is not None:
        # There is a special fast-path for iso8601 formatted
        # datetime strings, so in those cases don't use the inferred
        # format because this path makes process slower in this
        # special case
        format_is_iso8601 = _format_is_iso(format)
        if format_is_iso8601:
            require_iso8601 = not infer_datetime_format
            format = None

    tz_parsed = None
    result = None

    if format is not None:
        try:
            # shortcut formatting here
            if format == "%Y%m%d":
                try:
                    # pass orig_arg as float-dtype may have been converted to
                    # datetime64[ns]
                    orig_arg = ensure_object(orig_arg)
                    result = _attempt_YYYYMMDD(orig_arg, errors=errors)
                except (ValueError, TypeError, tslibs.OutOfBoundsDatetime):
                    raise ValueError(
                        "cannot convert the input to '%Y%m%d' date format")

            # fallback
            if result is None:
                try:
                    result, timezones = array_strptime(arg,
                                                       format,
                                                       exact=exact,
                                                       errors=errors)
                    if "%Z" in format or "%z" in format:
                        return _return_parsed_timezone_results(
                            result, timezones, box, tz, name)
                except tslibs.OutOfBoundsDatetime:
                    if errors == "raise":
                        raise
                    elif errors == "coerce":
                        result = np.empty(arg.shape, dtype="M8[ns]")
                        iresult = result.view("i8")
                        iresult.fill(tslibs.iNaT)
                    else:
                        result = arg
                except ValueError:
                    # if format was inferred, try falling back
                    # to array_to_datetime - terminate here
                    # for specified formats
                    if not infer_datetime_format:
                        if errors == "raise":
                            raise
                        elif errors == "coerce":
                            result = np.empty(arg.shape, dtype="M8[ns]")
                            iresult = result.view("i8")
                            iresult.fill(tslibs.iNaT)
                        else:
                            result = arg
        except ValueError as e:
            # Fallback to try to convert datetime objects if timezone-aware
            #  datetime objects are found without passing `utc=True`
            try:
                values, tz = conversion.datetime_to_datetime64(arg)
                return DatetimeIndex._simple_new(values, name=name, tz=tz)
            except (ValueError, TypeError):
                raise e

    if result is None:
        assert format is None or infer_datetime_format
        utc = tz == "utc"
        result, tz_parsed = objects_to_datetime64ns(
            arg,
            dayfirst=dayfirst,
            yearfirst=yearfirst,
            utc=utc,
            errors=errors,
            require_iso8601=require_iso8601,
            allow_object=True,
        )

    if tz_parsed is not None:
        if box:
            # We can take a shortcut since the datetime64 numpy array
            # is in UTC
            return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed)
        else:
            # Convert the datetime64 numpy array to an numpy array
            # of datetime objects
            result = [
                Timestamp(ts, tz=tz_parsed).to_pydatetime() for ts in result
            ]
            return np.array(result, dtype=object)

    if box:
        utc = tz == "utc"
        return _box_as_indexlike(result, utc=utc, name=name)
    return result
예제 #9
0
def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
                                unit=None, errors=None,
                                infer_datetime_format=None, dayfirst=None,
                                yearfirst=None, exact=None):
    """
    Helper function for to_datetime. Performs the conversions of 1D listlike
    of dates

    Parameters
    ----------
    arg : list, tuple, ndarray, Series, Index
        date to be parced
    box : boolean
        True boxes result as an Index-like, False returns an ndarray
    name : object
        None or string for the Index name
    tz : object
        None or 'utc'
    unit : string
        None or string of the frequency of the passed data
    errors : string
        error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'
    infer_datetime_format : boolean
        inferring format behavior from to_datetime
    dayfirst : boolean
        dayfirst parsing behavior from to_datetime
    yearfirst : boolean
        yearfirst parsing behavior from to_datetime
    exact : boolean
        exact format matching behavior from to_datetime

    Returns
    -------
    ndarray of parsed dates
        Returns:

        - Index-like if box=True
        - ndarray of Timestamps if box=False
    """
    from pandas import DatetimeIndex
    from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray
    from pandas.core.arrays.datetimes import (
        maybe_convert_dtype, objects_to_datetime64ns)

    if isinstance(arg, (list, tuple)):
        arg = np.array(arg, dtype='O')

    # these are shortcutable
    if is_datetime64tz_dtype(arg):
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            return DatetimeIndex(arg, tz=tz, name=name)
        if tz == 'utc':
            arg = arg.tz_convert(None).tz_localize(tz)
        return arg

    elif is_datetime64_ns_dtype(arg):
        if box and not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            try:
                return DatetimeIndex(arg, tz=tz, name=name)
            except ValueError:
                pass

        return arg

    elif unit is not None:
        if format is not None:
            raise ValueError("cannot specify both format and unit")
        arg = getattr(arg, 'values', arg)
        result = tslib.array_with_unit_to_datetime(arg, unit,
                                                   errors=errors)
        if box:
            if errors == 'ignore':
                from pandas import Index
                result = Index(result, name=name)
                # GH 23758: We may still need to localize the result with tz
                try:
                    return result.tz_localize(tz)
                except AttributeError:
                    return result

            return DatetimeIndex(result, tz=tz, name=name)
        return result
    elif getattr(arg, 'ndim', 1) > 1:
        raise TypeError('arg must be a string, datetime, list, tuple, '
                        '1-d array, or Series')

    # warn if passing timedelta64, raise for PeriodDtype
    # NB: this must come after unit transformation
    orig_arg = arg
    arg, _ = maybe_convert_dtype(arg, copy=False)

    arg = ensure_object(arg)
    require_iso8601 = False

    if infer_datetime_format and format is None:
        format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

    if format is not None:
        # There is a special fast-path for iso8601 formatted
        # datetime strings, so in those cases don't use the inferred
        # format because this path makes process slower in this
        # special case
        format_is_iso8601 = _format_is_iso(format)
        if format_is_iso8601:
            require_iso8601 = not infer_datetime_format
            format = None

    tz_parsed = None
    result = None

    if format is not None:
        try:
            # shortcut formatting here
            if format == '%Y%m%d':
                try:
                    # pass orig_arg as float-dtype may have been converted to
                    # datetime64[ns]
                    orig_arg = ensure_object(orig_arg)
                    result = _attempt_YYYYMMDD(orig_arg, errors=errors)
                except (ValueError, TypeError, tslibs.OutOfBoundsDatetime):
                    raise ValueError("cannot convert the input to "
                                     "'%Y%m%d' date format")

            # fallback
            if result is None:
                try:
                    result, timezones = array_strptime(
                        arg, format, exact=exact, errors=errors)
                    if '%Z' in format or '%z' in format:
                        return _return_parsed_timezone_results(
                            result, timezones, box, tz, name)
                except tslibs.OutOfBoundsDatetime:
                    if errors == 'raise':
                        raise
                    result = arg
                except ValueError:
                    # if format was inferred, try falling back
                    # to array_to_datetime - terminate here
                    # for specified formats
                    if not infer_datetime_format:
                        if errors == 'raise':
                            raise
                        result = arg
        except ValueError as e:
            # Fallback to try to convert datetime objects if timezone-aware
            #  datetime objects are found without passing `utc=True`
            try:
                values, tz = conversion.datetime_to_datetime64(arg)
                return DatetimeIndex._simple_new(values, name=name, tz=tz)
            except (ValueError, TypeError):
                raise e

    if result is None:
        assert format is None or infer_datetime_format
        utc = tz == 'utc'
        result, tz_parsed = objects_to_datetime64ns(
            arg, dayfirst=dayfirst, yearfirst=yearfirst,
            utc=utc, errors=errors, require_iso8601=require_iso8601,
            allow_object=True)

    if tz_parsed is not None:
        if box:
            # We can take a shortcut since the datetime64 numpy array
            # is in UTC
            return DatetimeIndex._simple_new(result, name=name,
                                             tz=tz_parsed)
        else:
            # Convert the datetime64 numpy array to an numpy array
            # of datetime objects
            result = [Timestamp(ts, tz=tz_parsed).to_pydatetime()
                      for ts in result]
            return np.array(result, dtype=object)

    if box:
        # Ensure we return an Index in all cases where box=True
        if is_datetime64_dtype(result):
            return DatetimeIndex(result, tz=tz, name=name)
        elif is_object_dtype(result):
            # e.g. an Index of datetime objects
            from pandas import Index
            return Index(result, name=name)
    return result