Beispiel #1
0
def truncate(self, before=None, after=None, copy=True):
    """Function truncate a sorted DataFrame / Series before and/or after
    some particular dates.

    Parameters
    ----------
    before : date
        Truncate before date
    after : date
        Truncate after date
	copy : boolean, default True

    Returns
    -------
    truncated : type of caller
    """
    from pandas.tseries.tools import to_datetime
    before = to_datetime(before)
    after = to_datetime(after)

    if before is not None and after is not None:
        if before > after:
            raise AssertionError('Truncate: %s must be after %s' %
                                 (before, after))

    result = self.ix[before:after]

    if isinstance(self.index, MultiIndex):
        result.index = self.index.truncate(before, after)

    if copy:
        result = result.copy()

    return result
Beispiel #2
0
def truncate(self, before=None, after=None, copy=True):
    """Function truncate a sorted DataFrame / Series before and/or after
    some particular dates.

    Parameters
    ----------
    before : date
        Truncate before date
    after : date
        Truncate after date

    Returns
    -------
    truncated : type of caller
    """
    from pandas.tseries.tools import to_datetime
    before = to_datetime(before)
    after = to_datetime(after)

    if before is not None and after is not None:
        assert(before <= after)

    result = self.ix[before:after]

    if isinstance(self.index, MultiIndex):
        result.index = self.index.truncate(before, after)

    if copy:
        result = result.copy()

    return result
Beispiel #3
0
def generate_range(start=None, end=None, periods=None,
                   offset=BDay(), time_rule=None):
    """
    Generates a sequence of dates corresponding to the specified time
    offset. Similar to dateutil.rrule except uses pandas DateOffset
    objects to represent time increments

    Parameters
    ----------
    start : datetime (default None)
    end : datetime (default None)
    periods : int, optional

    Note
    ----
    * This method is faster for generating weekdays than dateutil.rrule
    * At least two of (start, end, periods) must be specified.
    * If both start and end are specified, the returned dates will
    satisfy start <= date <= end.

    Returns
    -------
    dates : generator object

    """
    if time_rule is not None:
        from pandas.tseries.frequencies import get_offset
        offset = get_offset(time_rule)

    start = to_datetime(start)
    end = to_datetime(end)

    if start and not offset.onOffset(start):
        start = offset.rollforward(start)

    if end and not offset.onOffset(end):
        end = offset.rollback(end)

        if periods is None and end < start:
            end = None
            periods = 0

    if end is None:
        end = start + (periods - 1) * offset

    if start is None:
        start = end - (periods - 1) * offset

    cur = start

    next_date = cur
    while cur <= end:
        yield cur

        # faster than cur + offset
        next_date = offset.apply(cur)
        if next_date <= cur:
            raise ValueError('Offset %s did not increment date' % offset)
        cur = next_date
Beispiel #4
0
def generate_range(start=None, end=None, periods=None,
                   offset=BDay(), time_rule=None):
    """
    Generates a sequence of dates corresponding to the specified time
    offset. Similar to dateutil.rrule except uses pandas DateOffset
    objects to represent time increments

    Parameters
    ----------
    start : datetime (default None)
    end : datetime (default None)
    periods : int, optional

    Note
    ----
    * This method is faster for generating weekdays than dateutil.rrule
    * At least two of (start, end, periods) must be specified.
    * If both start and end are specified, the returned dates will
    satisfy start <= date <= end.

    Returns
    -------
    dates : generator object

    """
    if time_rule is not None:
        from pandas.tseries.frequencies import get_offset
        offset = get_offset(time_rule)

    start = to_datetime(start)
    end = to_datetime(end)

    if start and not offset.onOffset(start):
        start = offset.rollforward(start)

    if end and not offset.onOffset(end):
        end = offset.rollback(end)

        if periods is None and end < start:
            end = None
            periods = 0

    if end is None:
        end = start + (periods - 1) * offset

    if start is None:
        start = end - (periods - 1) * offset

    cur = start

    next_date = cur
    while cur <= end:
        yield cur

        # faster than cur + offset
        next_date = offset.apply(cur)
        if next_date <= cur:
            raise ValueError('Offset %s did not increment date' % offset)
        cur = next_date
Beispiel #5
0
def _handle_date_column(col, format=None):
    if isinstance(format, dict):
        return to_datetime(col, **format)
    else:
        if format in ['D', 's', 'ms', 'us', 'ns']:
            return to_datetime(col, coerce=True, unit=format)
        elif issubclass(col.dtype.type, np.floating) or issubclass(col.dtype.type, np.integer):
            # parse dates as timestamp
            format = 's' if format is None else format
            return to_datetime(col, coerce=True, unit=format)
        else:
            return to_datetime(col, coerce=True, format=format)
Beispiel #6
0
def _handle_date_column(col, format=None):
    if isinstance(format, dict):
        return to_datetime(col, **format)
    else:
        if format in ['D', 's', 'ms', 'us', 'ns']:
            return to_datetime(col, coerce=True, unit=format)
        elif issubclass(col.dtype.type, np.floating) or issubclass(col.dtype.type, np.integer):
            # parse dates as timestamp
            format = 's' if format is None else format
            return to_datetime(col, coerce=True, unit=format)
        else:
            return to_datetime(col, coerce=True, format=format)
    def test_combine_first_dt64(self):
        from pandas.tseries.tools import to_datetime
        s0 = to_datetime(Series(["2010", np.NaN]))
        s1 = to_datetime(Series([np.NaN, "2011"]))
        rs = s0.combine_first(s1)
        xp = to_datetime(Series(['2010', '2011']))
        assert_series_equal(rs, xp)

        s0 = to_datetime(Series(["2010", np.NaN]))
        s1 = Series([np.NaN, "2011"])
        rs = s0.combine_first(s1)
        xp = Series([datetime(2010, 1, 1), '2011'])
        assert_series_equal(rs, xp)
Beispiel #8
0
    def test_combine_first_dt64(self):
        from pandas.tseries.tools import to_datetime
        s0 = to_datetime(Series(["2010", np.NaN]))
        s1 = to_datetime(Series([np.NaN, "2011"]))
        rs = s0.combine_first(s1)
        xp = to_datetime(Series(['2010', '2011']))
        assert_series_equal(rs, xp)

        s0 = to_datetime(Series(["2010", np.NaN]))
        s1 = Series([np.NaN, "2011"])
        rs = s0.combine_first(s1)
        xp = Series([datetime(2010, 1, 1), '2011'])
        assert_series_equal(rs, xp)
Beispiel #9
0
def _generate_regular_range(start, end, periods, offset):
    if isinstance(offset, Tick):
        stride = offset.nanos
        if periods is None:
            b = Timestamp(start).value
            e = Timestamp(end).value
            e += stride - e % stride
        elif start is not None:
            b = Timestamp(start).value
            e = b + periods * stride
        elif end is not None:
            e = Timestamp(end).value + stride
            b = e - periods * stride
        else:
            raise NotImplementedError

        data = np.arange(b, e, stride, dtype=np.int64)
        data = data.view(_NS_DTYPE)
    else:
        if isinstance(start, Timestamp):
            start = start.to_pydatetime()

        if isinstance(end, Timestamp):
            end = end.to_pydatetime()

        xdr = generate_range(start=start,
                             end=end,
                             periods=periods,
                             offset=offset)

        dates = list(xdr)
        # utc = len(dates) > 0 and dates[0].tzinfo is not None
        data = tools.to_datetime(dates)

    return data
Beispiel #10
0
    def convert(values, unit, axis):
        from pandas.tseries.index import DatetimeIndex
        def try_parse(values):
            try:
                return _dt_to_float_ordinal(tools.to_datetime(values))
            except Exception:
                return values

        if isinstance(values, (datetime, pydt.date)):
            return _dt_to_float_ordinal(values)
        elif isinstance(values, pydt.time):
            return dates.date2num(values)
        elif (com.is_integer(values) or com.is_float(values)):
            return values
        elif isinstance(values, basestring):
            return try_parse(values)
        elif isinstance(values, (list, tuple, np.ndarray)):
            if not isinstance(values, np.ndarray):
                values = np.array(values, dtype='O')

            try:
                values = tools.to_datetime(values)
                if isinstance(values, Index):
                    values = values.map(_dt_to_float_ordinal)
                else:
                    values = [_dt_to_float_ordinal(x) for x in values]
            except Exception:
                pass

        return values
Beispiel #11
0
def _generate_regular_range(start, end, periods, offset):
    if isinstance(offset, Tick):
        stride = offset.nanos
        if periods is None:
            b = Timestamp(start).value
            e = Timestamp(end).value
            e += stride - e % stride
        elif start is not None:
            b = Timestamp(start).value
            e = b + periods * stride
        elif end is not None:
            e = Timestamp(end).value + stride
            b = e - periods * stride
        else:
            raise NotImplementedError

        data = np.arange(b, e, stride, dtype=np.int64)
        data = data.view(_NS_DTYPE)
    else:
        xdr = generate_range(start=start, end=end, periods=periods, offset=offset)

        dates = list(xdr)
        utc = len(dates) > 0 and dates[0].tzinfo is not None
        data = tools.to_datetime(dates, utc=utc)

    return data
Beispiel #12
0
    def convert(values, unit, axis):
        def try_parse(values):
            try:
                return _dt_to_float_ordinal(tools.to_datetime(values))
            except Exception:
                return values

        if isinstance(values, (datetime, pydt.date)):
            return _dt_to_float_ordinal(values)
        elif isinstance(values, pydt.time):
            return dates.date2num(values)
        elif (com.is_integer(values) or com.is_float(values)):
            return values
        elif isinstance(values, compat.string_types):
            return try_parse(values)
        elif isinstance(values, (list, tuple, np.ndarray)):
            if not isinstance(values, np.ndarray):
                values = com._asarray_tuplesafe(values)

            if com.is_integer_dtype(values) or com.is_float_dtype(values):
                return values

            try:
                values = tools.to_datetime(values)
                if isinstance(values, Index):
                    values = values.map(_dt_to_float_ordinal)
                else:
                    values = [_dt_to_float_ordinal(x) for x in values]
            except Exception:
                pass

        return values
Beispiel #13
0
    def convert(values, unit, axis):
        from pandas.tseries.index import DatetimeIndex

        def try_parse(values):
            try:
                return _dt_to_float_ordinal(tools.to_datetime(values))
            except Exception:
                return values

        if isinstance(values, (datetime, pydt.date)):
            return _dt_to_float_ordinal(values)
        elif isinstance(values, pydt.time):
            return dates.date2num(values)
        elif com.is_integer(values) or com.is_float(values):
            return values
        elif isinstance(values, str):
            return try_parse(values)
        elif isinstance(values, (list, tuple, np.ndarray)):
            if not isinstance(values, np.ndarray):
                values = np.array(values, dtype="O")

            try:
                values = tools.to_datetime(values)
                if isinstance(values, Index):
                    values = values.map(_dt_to_float_ordinal)
                else:
                    values = [_dt_to_float_ordinal(x) for x in values]
            except Exception:
                pass

        return values
Beispiel #14
0
    def get_config(self, name, group=None, date=None, version=None):
        """
        Get the configurations for the given item
        on the given date and version

        Parameters
        ----------
        name : str
            Data item name
        group : object, default None
            Data item group, if any
        date : str or datetime, default None
            Configuration date for the item. Use latest date if None
        version : int, str, datetime, default None
            Version hash or datetime

        Returns
        -------
        conf : dict
        """
        if date is None:
            date = self.latest_date(name)
        date = tools.to_datetime(date)

        if version is None:
            version = date

        vobj = self.get_version(version)
        rs = vobj.get_item(name, group, date)
        if rs is None:
            raise ValueError('Parameters were not found for %s (version %s) for %s'                             % (name, version, date))
        return rs
Beispiel #15
0
def _str_to_dt_array(arr, offset=None):
    def parser(x):
        result = parse_time_string(x, offset)
        return result[0]

    arr = np.asarray(arr, dtype=object)
    data = _algos.arrmap_object(arr, parser)
    return tools.to_datetime(data)
Beispiel #16
0
def _str_to_dt_array(arr, offset=None):
    def parser(x):
        result = parse_time_string(x, offset)
        return result[0]

    arr = np.asarray(arr, dtype=object)
    data = _algos.arrmap_object(arr, parser)
    return tools.to_datetime(data)
Beispiel #17
0
    def _cached_range(cls,
                      start=None,
                      end=None,
                      periods=None,
                      offset=None,
                      name=None):
        if start is not None:
            start = Timestamp(start)
        if end is not None:
            end = Timestamp(end)

        if offset is None:
            raise Exception('Must provide a DateOffset!')

        drc = _daterange_cache
        if offset not in _daterange_cache:
            xdr = generate_range(offset=offset,
                                 start=_CACHE_START,
                                 end=_CACHE_END)

            arr = tools.to_datetime(list(xdr), box=False)

            cachedRange = arr.view(DatetimeIndex)
            cachedRange.offset = offset
            cachedRange.tz = None
            cachedRange.name = None
            drc[offset] = cachedRange
        else:
            cachedRange = drc[offset]

        if start is None:
            assert (isinstance(end, Timestamp))

            end = offset.rollback(end)

            endLoc = cachedRange.get_loc(end) + 1
            startLoc = endLoc - periods
        elif end is None:
            assert (isinstance(start, Timestamp))
            start = offset.rollforward(start)

            startLoc = cachedRange.get_loc(start)
            endLoc = startLoc + periods
        else:
            if not offset.onOffset(start):
                start = offset.rollforward(start)

            if not offset.onOffset(end):
                end = offset.rollback(end)

            startLoc = cachedRange.get_loc(start)
            endLoc = cachedRange.get_loc(end) + 1

        indexSlice = cachedRange[startLoc:endLoc]
        indexSlice.name = name
        indexSlice.offset = offset

        return indexSlice
Beispiel #18
0
def time2num(d):
    if isinstance(d, compat.string_types):
        parsed = tools.to_datetime(d)
        if not isinstance(parsed, datetime):
            raise ValueError('Could not parse time %s' % d)
        return _to_ordinalf(parsed.time())
    if isinstance(d, pydt.time):
        return _to_ordinalf(d)
    return d
Beispiel #19
0
def time2num(d):
    if isinstance(d, compat.string_types):
        parsed = tools.to_datetime(d)
        if not isinstance(parsed, datetime):
            raise ValueError('Could not parse time %s' % d)
        return _to_ordinalf(parsed.time())
    if isinstance(d, pydt.time):
        return _to_ordinalf(d)
    return d
Beispiel #20
0
def time2num(d):
    if isinstance(d, str):
        parsed = tools.to_datetime(d)
        if not isinstance(parsed, datetime):
            raise ValueError("Could not parse time %s" % d)
        return _to_ordinalf(parsed.time())
    if isinstance(d, pydt.time):
        return _to_ordinalf(d)
    return d
Beispiel #21
0
def isin(comps, values):
    """
    Compute the isin boolean array

    Parameters
    ----------
    comps: array-like
    values: array-like

    Returns
    -------
    boolean array same length as comps
    """

    if not com.is_list_like(comps):
        raise TypeError(
            "only list-like objects are allowed to be passed"
            " to isin(), you passed a "
            "[{0}]".format(type(comps).__name__)
        )
    comps = np.asarray(comps)
    if not com.is_list_like(values):
        raise TypeError(
            "only list-like objects are allowed to be passed"
            " to isin(), you passed a "
            "[{0}]".format(type(values).__name__)
        )

    # GH11232
    # work-around for numpy < 1.8 and comparisions on py3
    # faster for larger cases to use np.in1d
    if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000:
        f = lambda x, y: np.in1d(x, np.asarray(list(y)))
    else:
        f = lambda x, y: lib.ismember_int64(x, set(y))

    # may need i8 conversion for proper membership testing
    if com.is_datetime64_dtype(comps):
        from pandas.tseries.tools import to_datetime

        values = to_datetime(values)._values.view("i8")
        comps = comps.view("i8")
    elif com.is_timedelta64_dtype(comps):
        from pandas.tseries.timedeltas import to_timedelta

        values = to_timedelta(values)._values.view("i8")
        comps = comps.view("i8")
    elif com.is_int64_dtype(comps):
        pass
    else:
        f = lambda x, y: lib.ismember(x, set(values))

    return f(comps, values)
Beispiel #22
0
    def _cached_range(cls, start=None, end=None, periods=None, offset=None,
                      name=None):
        if start is not None:
            start = Timestamp(start)
        if end is not None:
            end = Timestamp(end)

        if offset is None:
            raise Exception('Must provide a DateOffset!')

        drc = _daterange_cache
        if offset not in _daterange_cache:
            xdr = generate_range(offset=offset, start=_CACHE_START,
                                 end=_CACHE_END)

            arr = tools.to_datetime(list(xdr), box=False)

            cachedRange = arr.view(DatetimeIndex)
            cachedRange.offset = offset
            cachedRange.tz = None
            cachedRange.name = None
            drc[offset] = cachedRange
        else:
            cachedRange = drc[offset]

        if start is None:
            assert(isinstance(end, Timestamp))

            end = offset.rollback(end)

            endLoc = cachedRange.get_loc(end) + 1
            startLoc = endLoc - periods
        elif end is None:
            assert(isinstance(start, Timestamp))
            start = offset.rollforward(start)

            startLoc = cachedRange.get_loc(start)
            endLoc = startLoc + periods
        else:
            if not offset.onOffset(start):
                start = offset.rollforward(start)

            if not offset.onOffset(end):
                end = offset.rollback(end)

            startLoc = cachedRange.get_loc(start)
            endLoc = cachedRange.get_loc(end) + 1

        indexSlice = cachedRange[startLoc:endLoc]
        indexSlice.name = name
        indexSlice.offset = offset

        return indexSlice
Beispiel #23
0
def truncate(self, before=None, after=None, copy=True):
    """Function truncate a sorted DataFrame / Series before and/or after
    some particular dates.

    Parameters
    ----------
    before : date
        Truncate before date
    after : date
        Truncate after date
	copy : boolean, default True

    Returns
    -------
    truncated : type of caller
    """

    # if we have a date index, convert to dates, otherwise
    # treat like a slice
    if self.index.is_all_dates:
        from pandas.tseries.tools import to_datetime
        before = to_datetime(before)
        after = to_datetime(after)

    if before is not None and after is not None:
        if before > after:
            raise AssertionError('Truncate: %s must be after %s' %
                                 (before, after))

    result = self.ix[before:after]

    if isinstance(self.index, MultiIndex):
        result.index = self.index.truncate(before, after)

    if copy:
        result = result.copy()

    return result
Beispiel #24
0
def isin(comps, values):
    """
    Compute the isin boolean array

    Parameters
    ----------
    comps: array-like
    values: array-like

    Returns
    -------
    boolean array same length as comps
    """

    if not is_list_like(comps):
        raise TypeError("only list-like objects are allowed to be passed"
                        " to isin(), you passed a "
                        "[{0}]".format(type(comps).__name__))
    comps = np.asarray(comps)
    if not is_list_like(values):
        raise TypeError("only list-like objects are allowed to be passed"
                        " to isin(), you passed a "
                        "[{0}]".format(type(values).__name__))
    if not isinstance(values, np.ndarray):
        values = list(values)

    # GH11232
    # work-around for numpy < 1.8 and comparisions on py3
    # faster for larger cases to use np.in1d
    if (_np_version_under1p8 and compat.PY3) or len(comps) > 1000000:
        f = lambda x, y: np.in1d(x, np.asarray(list(y)))
    else:
        f = lambda x, y: lib.ismember_int64(x, set(y))

    # may need i8 conversion for proper membership testing
    if is_datetime64_dtype(comps):
        from pandas.tseries.tools import to_datetime
        values = to_datetime(values)._values.view('i8')
        comps = comps.view('i8')
    elif is_timedelta64_dtype(comps):
        from pandas.tseries.timedeltas import to_timedelta
        values = to_timedelta(values)._values.view('i8')
        comps = comps.view('i8')
    elif is_int64_dtype(comps):
        pass
    else:
        f = lambda x, y: lib.ismember(x, set(values))

    return f(comps, values)
    def test_parse_tz_aware(self):
        # See gh-1693
        import pytz
        data = StringIO("Date,x\n2012-06-13T01:39:00Z,0.5")

        # it works
        result = self.read_csv(data, index_col=0, parse_dates=True)
        stamp = result.index[0]
        self.assertEqual(stamp.minute, 39)
        try:
            self.assertIs(result.index.tz, pytz.utc)
        except AssertionError:  # hello Yaroslav
            arr = result.index.to_pydatetime()
            result = tools.to_datetime(arr, utc=True)[0]
            self.assertEqual(stamp.minute, result.minute)
            self.assertEqual(stamp.hour, result.hour)
            self.assertEqual(stamp.day, result.day)
Beispiel #26
0
    def test_parse_tz_aware(self):
        # See gh-1693
        import pytz
        data = StringIO("Date,x\n2012-06-13T01:39:00Z,0.5")

        # it works
        result = self.read_csv(data, index_col=0, parse_dates=True)
        stamp = result.index[0]
        self.assertEqual(stamp.minute, 39)
        try:
            self.assertIs(result.index.tz, pytz.utc)
        except AssertionError:  # hello Yaroslav
            arr = result.index.to_pydatetime()
            result = tools.to_datetime(arr, utc=True)[0]
            self.assertEqual(stamp.minute, result.minute)
            self.assertEqual(stamp.hour, result.hour)
            self.assertEqual(stamp.day, result.day)
Beispiel #27
0
 def test_non_datetimeindex(self):
     dates = to_datetime(["1/1/2000", "1/2/2000", "1/3/2000"])
     self.assertEqual(frequencies.infer_freq(dates), "D")
Beispiel #28
0
 def try_parse(values):
     try:
         return datetools.to_datetime(values).toordinal()
     except Exception:
         return values
Beispiel #29
0
 def test_non_datetimeindex(self):
     dates = to_datetime(['1/1/2000', '1/2/2000', '1/3/2000'])
     self.assert_(infer_freq(dates) == 'D')
#residencies        0.000169
#pottery            0.000152
#radio_&_podcast    0.000128
#typography         0.000104
#chiptune           0.000088
#letterpress        0.000088
#taxidermy          0.000016
#NaN                0.000000

df['funded']= 2
df.funded[df.state == 'successful'] = 1
df.funded[df.state == 'failed'] = 0
df = df[df.funded != 2]


df.deadline = to_datetime(df.deadline)


df.describe()
df.head()


# Convert currency to USD
df.currency.value_counts(normalize = True)
#USD    0.716564
#GBP    0.088293
#CAD    0.039900
#AUD    0.020653
#EUR    0.007431
#NZD    0.004088
#SEK    0.001760
Beispiel #31
0
    def __new__(cls,
                data=None,
                freq=None,
                start=None,
                end=None,
                periods=None,
                copy=False,
                name=None,
                tz=None,
                verify_integrity=True,
                normalize=False,
                **kwds):

        dayfirst = kwds.pop('dayfirst', None)
        yearfirst = kwds.pop('yearfirst', None)
        warn = False
        if 'offset' in kwds and kwds['offset']:
            freq = kwds['offset']
            warn = True

        freq_infer = False
        if not isinstance(freq, DateOffset):
            if freq != 'infer':
                freq = to_offset(freq)
            else:
                freq_infer = True
                freq = None

        if warn:
            import warnings
            warnings.warn(
                "parameter 'offset' is deprecated, "
                "please use 'freq' instead", FutureWarning)

        offset = freq

        if periods is not None:
            if com.is_float(periods):
                periods = int(periods)
            elif not com.is_integer(periods):
                raise ValueError('Periods must be a number, got %s' %
                                 str(periods))

        if data is None and offset is None:
            raise ValueError("Must provide freq argument if no data is "
                             "supplied")

        if data is None:
            return cls._generate(start,
                                 end,
                                 periods,
                                 name,
                                 offset,
                                 tz=tz,
                                 normalize=normalize)

        if not isinstance(data, np.ndarray):
            if np.isscalar(data):
                raise ValueError('DatetimeIndex() must be called with a '
                                 'collection of some kind, %s was passed' %
                                 repr(data))

            # other iterable of some kind
            if not isinstance(data, (list, tuple)):
                data = list(data)

            data = np.asarray(data, dtype='O')

            # try a few ways to make it datetime64
            if lib.is_string_array(data):
                data = _str_to_dt_array(data,
                                        offset,
                                        dayfirst=dayfirst,
                                        yearfirst=yearfirst)
            else:
                data = tools.to_datetime(data)
                data.offset = offset
                if isinstance(data, DatetimeIndex):
                    if name is not None:
                        data.name = name
                    return data

        if issubclass(data.dtype.type, basestring):
            subarr = _str_to_dt_array(data,
                                      offset,
                                      dayfirst=dayfirst,
                                      yearfirst=yearfirst)
        elif issubclass(data.dtype.type, np.datetime64):
            if isinstance(data, DatetimeIndex):
                if tz is None:
                    tz = data.tz

                subarr = data.values

                if offset is None:
                    offset = data.offset
                    verify_integrity = False
            else:
                if data.dtype != _NS_DTYPE:
                    subarr = lib.cast_to_nanoseconds(data)
                else:
                    subarr = data
        elif data.dtype == _INT64_DTYPE:
            if isinstance(data, Int64Index):
                raise TypeError('cannot convert Int64Index->DatetimeIndex')
            if copy:
                subarr = np.asarray(data, dtype=_NS_DTYPE)
            else:
                subarr = data.view(_NS_DTYPE)
        else:
            try:
                subarr = tools.to_datetime(data)
            except ValueError:
                # tz aware
                subarr = tools.to_datetime(data, utc=True)

            if not np.issubdtype(subarr.dtype, np.datetime64):
                raise TypeError('Unable to convert %s to datetime dtype' %
                                str(data))

        if isinstance(subarr, DatetimeIndex):
            if tz is None:
                tz = subarr.tz
        else:
            if tz is not None:
                tz = tools._maybe_get_tz(tz)

                if (not isinstance(data, DatetimeIndex)
                        or getattr(data, 'tz', None) is None):
                    # Convert tz-naive to UTC
                    ints = subarr.view('i8')
                    subarr = lib.tz_localize_to_utc(ints, tz)

                subarr = subarr.view(_NS_DTYPE)

        subarr = subarr.view(cls)
        subarr.name = name
        subarr.offset = offset
        subarr.tz = tz

        if verify_integrity and len(subarr) > 0:
            if offset is not None and not freq_infer:
                inferred = subarr.inferred_freq
                if inferred != offset.freqstr:
                    raise ValueError('Dates do not conform to passed '
                                     'frequency')

        if freq_infer:
            inferred = subarr.inferred_freq
            if inferred:
                subarr.offset = to_offset(inferred)

        return subarr
Beispiel #32
0
 def try_parse(values):
     try:
         return datetools.to_datetime(values).toordinal()
     except Exception:
         return values
Beispiel #33
0
    def __new__(
        cls,
        data=None,
        freq=None,
        start=None,
        end=None,
        periods=None,
        copy=False,
        name=None,
        tz=None,
        verify_integrity=True,
        normalize=False,
        **kwds
    ):

        dayfirst = kwds.pop("dayfirst", None)
        yearfirst = kwds.pop("yearfirst", None)
        warn = False
        if "offset" in kwds and kwds["offset"]:
            freq = kwds["offset"]
            warn = True

        freq_infer = False
        if not isinstance(freq, DateOffset):
            if freq != "infer":
                freq = to_offset(freq)
            else:
                freq_infer = True
                freq = None

        if warn:
            import warnings

            warnings.warn("parameter 'offset' is deprecated, " "please use 'freq' instead", FutureWarning)

        offset = freq

        if periods is not None:
            if com.is_float(periods):
                periods = int(periods)
            elif not com.is_integer(periods):
                raise ValueError("Periods must be a number, got %s" % str(periods))

        if data is None and offset is None:
            raise ValueError("Must provide freq argument if no data is " "supplied")

        if data is None:
            return cls._generate(start, end, periods, name, offset, tz=tz, normalize=normalize)

        if not isinstance(data, np.ndarray):
            if np.isscalar(data):
                raise ValueError(
                    "DatetimeIndex() must be called with a " "collection of some kind, %s was passed" % repr(data)
                )

            # other iterable of some kind
            if not isinstance(data, (list, tuple)):
                data = list(data)

            data = np.asarray(data, dtype="O")

            # try a few ways to make it datetime64
            if lib.is_string_array(data):
                data = _str_to_dt_array(data, offset, dayfirst=dayfirst, yearfirst=yearfirst)
            else:
                data = tools.to_datetime(data)
                data.offset = offset
                if isinstance(data, DatetimeIndex):
                    if name is not None:
                        data.name = name
                    return data

        if issubclass(data.dtype.type, basestring):
            subarr = _str_to_dt_array(data, offset, dayfirst=dayfirst, yearfirst=yearfirst)
        elif issubclass(data.dtype.type, np.datetime64):
            if isinstance(data, DatetimeIndex):
                if tz is None:
                    tz = data.tz

                subarr = data.values

                if offset is None:
                    offset = data.offset
                    verify_integrity = False
            else:
                if data.dtype != _NS_DTYPE:
                    subarr = lib.cast_to_nanoseconds(data)
                else:
                    subarr = data
        elif data.dtype == _INT64_DTYPE:
            if isinstance(data, Int64Index):
                raise TypeError("cannot convert Int64Index->DatetimeIndex")
            if copy:
                subarr = np.asarray(data, dtype=_NS_DTYPE)
            else:
                subarr = data.view(_NS_DTYPE)
        else:
            try:
                subarr = tools.to_datetime(data)
            except ValueError:
                # tz aware
                subarr = tools.to_datetime(data, utc=True)

            if not np.issubdtype(subarr.dtype, np.datetime64):
                raise TypeError("Unable to convert %s to datetime dtype" % str(data))

        if isinstance(subarr, DatetimeIndex):
            if tz is None:
                tz = subarr.tz
        else:
            if tz is not None:
                tz = tools._maybe_get_tz(tz)

                if not isinstance(data, DatetimeIndex) or getattr(data, "tz", None) is None:
                    # Convert tz-naive to UTC
                    ints = subarr.view("i8")
                    subarr = lib.tz_localize_to_utc(ints, tz)

                subarr = subarr.view(_NS_DTYPE)

        subarr = subarr.view(cls)
        subarr.name = name
        subarr.offset = offset
        subarr.tz = tz

        if verify_integrity and len(subarr) > 0:
            if offset is not None and not freq_infer:
                inferred = subarr.inferred_freq
                if inferred != offset.freqstr:
                    raise ValueError("Dates do not conform to passed " "frequency")

        if freq_infer:
            inferred = subarr.inferred_freq
            if inferred:
                subarr.offset = to_offset(inferred)

        return subarr
Beispiel #34
0
def date_index(df):
    date = df['timestamp']
    date = to_datetime(date, unit='s')
    df['date'] = date
    df = df.set_index('date')
    return df
Beispiel #35
0
df_loyalty = df_loyalty.dropna()
len(df_loyalty)

#convert to dask
df_loyalty = dd.from_pandas(df_loyalty, npartitions=3)

print df_loyalty.head()

#converting to same datatypes
df2['dlTableStoreNumber'] = df2['dlTableStoreNumber'].astype(int)
df2['CHECK'] = df2['CHECK'].astype(int)

df_loyalty['StoreNumber'] = df_loyalty['StoreNumber'].astype(int)
df_loyalty['ReceiptNumber'] = df_loyalty['ReceiptNumber'].astype(int)
df_loyalty['ReceiptDate'] = df_loyalty['ReceiptDate'].compute(
    get=dask. async .get_sync).to_datetime()

left_columns = ["dlTableStoreNumber", 'CHECK']

right_columns = ["StoreNumber", "ReceiptNumber"]

#Merging both dataframes
df_merge = df2.merge(df_loyalty,
                     how='inner',
                     left_on=left_columns,
                     right_on=right_columns)

print df_merge.head()
print dd.compute(df_merge.count())

#Working on date format, work in progress
Beispiel #36
0
 def __getInternalStruct(self):
     out = []
     for d in self._serie['data']:
         out.append((d['uuid'], Series([d['dps'][k] for k in sorted(d['dps'].keys())],
             index=to_datetime([int(ts) for ts in sorted(d['dps'].keys())], unit='s'))))
     return DataFrame(dict(out))
Beispiel #37
0
 def test_non_datetimeindex(self):
     dates = to_datetime(['1/1/2000', '1/2/2000', '1/3/2000'])
     self.assertEqual(frequencies.infer_freq(dates), 'D')
Beispiel #38
0
 def try_parse(values):
     try:
         return _dt_to_float_ordinal(tools.to_datetime(values))
     except Exception:
         return values
main_cats = ('film_and_video', 'music','publishing', 'games', 'design', 'art','food','technology','fashion','comics', 'theater', 'crafts', 'journalism','photography','animals','dance')

df.sub_category[df.main_category == 'unknown'] = 'unknown'
df.sub_category[df.sub_category == 'film_&_video'] = 'film_and_video'

for name in main_cats:
    df.sub_category[df.sub_category == name] = 'unknown'
    
df.sub_category.value_counts(normalize = True, dropna = False)

df['funded']= 2
df.funded[df.state == 'successful'] = 1
df.funded[df.state == 'failed'] = 0
df = df[df.funded != 2]

df.deadline = to_datetime(df.deadline)
df['year'] = DatetimeIndex(df['deadline']).year
df['month'] = DatetimeIndex(df['deadline']).month


# Convert pledged to USD

df['pledged_USD'] = df.pledged

df.pledged_USD = df.pledged_USD[df.currency == "GBP"] = df.pledged * 1.48
df.pledged_USD = df.pledged_USD[df.currency == "CAD"] = df.pledged * .79
df.pledged_USD = df.pledged_USD[df.currency == "AUD"] = df.pledged * .76
df.pledged_USD = df.pledged_USD[df.currency == "EUR"] = df.pledged * 1.07
df.pledged_USD = df.pledged_USD[df.currency == "NZD"] = df.pledged * .75
df.pledged_USD = df.pledged_USD[df.currency == "DKK"] = df.pledged * .14
df.pledged_USD = df.pledged_USD[df.currency == "NOK"] = df.pledged * .12
Beispiel #40
0
    def __new__(cls, data=None,
                freq=None, start=None, end=None, periods=None,
                copy=False, name=None, tz=None,
                verify_integrity=True, normalize=False, **kwds):

        warn = False
        if 'offset' in kwds and kwds['offset']:
            freq = kwds['offset']
            warn = True

        freq_infer = False
        if not isinstance(freq, DateOffset):
            if freq != 'infer':
                freq = to_offset(freq)
            else:
                freq_infer = True
                freq = None

        if warn:
            import warnings
            warnings.warn("parameter 'offset' is deprecated, "
                          "please use 'freq' instead",
                          FutureWarning)

        offset = freq

        if periods is not None:
            if com.is_float(periods):
                periods = int(periods)
            elif not com.is_integer(periods):
                raise ValueError('Periods must be a number, got %s' %
                                 str(periods))

        if data is None and offset is None:
            raise ValueError("Must provide freq argument if no data is "
                             "supplied")

        if data is None:
            return cls._generate(start, end, periods, name, offset,
                                 tz=tz, normalize=normalize)

        if not isinstance(data, np.ndarray):
            if np.isscalar(data):
                raise ValueError('DatetimeIndex() must be called with a '
                                 'collection of some kind, %s was passed'
                                 % repr(data))

            # other iterable of some kind
            if not isinstance(data, (list, tuple)):
                data = list(data)

            data = np.asarray(data, dtype='O')

            # try a few ways to make it datetime64
            if lib.is_string_array(data):
                data = _str_to_dt_array(data, offset)
            else:
                data = tools.to_datetime(data)
                data.offset = offset

        if issubclass(data.dtype.type, basestring):
            subarr = _str_to_dt_array(data, offset)
        elif issubclass(data.dtype.type, np.datetime64):
            if isinstance(data, DatetimeIndex):
                subarr = data.values
                if offset is None:
                    offset = data.offset
                    verify_integrity = False
            else:
                if data.dtype != _NS_DTYPE:
                    subarr = lib.cast_to_nanoseconds(data)
                else:
                    subarr = data
        elif data.dtype == _INT64_DTYPE:
            if copy:
                subarr = np.asarray(data, dtype=_NS_DTYPE)
            else:
                subarr = data.view(_NS_DTYPE)
        else:
            subarr = tools.to_datetime(data)
            if not np.issubdtype(subarr.dtype, np.datetime64):
                raise TypeError('Unable to convert %s to datetime dtype'
                                % str(data))

        if tz is not None:
            tz = tools._maybe_get_tz(tz)
            # Convert local to UTC
            ints = subarr.view('i8')

            subarr = lib.tz_localize_to_utc(ints, tz)
            subarr = subarr.view(_NS_DTYPE)

        subarr = subarr.view(cls)
        subarr.name = name
        subarr.offset = offset
        subarr.tz = tz

        if verify_integrity and len(subarr) > 0:
            if offset is not None and not freq_infer:
                inferred = subarr.inferred_freq
                if inferred != offset.freqstr:
                    raise ValueError('Dates do not conform to passed '
                                     'frequency')

        if freq_infer:
            inferred = subarr.inferred_freq
            if inferred:
                subarr.offset = to_offset(inferred)

        return subarr
Beispiel #41
0
 def test_non_datetimeindex(self):
     dates = to_datetime(["1/1/2000", "1/2/2000", "1/3/2000"])
     self.assert_(infer_freq(dates) == "D")
for name in art:
    df.sub_category[df.main_category == name] = name
    df.main_category[df.sub_category == name] = 'art' 
    
for name in publishing:
    df.sub_category[df.main_category == name] = name
    df.main_category[df.sub_category == name] = 'publishing' 
     
df.main_category.value_counts(normalize = True, dropna = False )

df['funded']= 2
df.funded[df.state == 'successful'] = 1
df.funded[df.state == 'failed'] = 0
df = df[df.funded != 2]

df.deadline = to_datetime(df.deadline)

# Convert pledged to USD

df['pledged_USD'] = df.pledged

df.pledged_USD = df.pledged_USD[df.currency == "GBP"] = df.pledged * 1.48
df.pledged_USD = df.pledged_USD[df.currency == "CAD"] = df.pledged * .79
df.pledged_USD = df.pledged_USD[df.currency == "AUD"] = df.pledged * .76
df.pledged_USD = df.pledged_USD[df.currency == "EUR"] = df.pledged * 1.07
df.pledged_USD = df.pledged_USD[df.currency == "NZD"] = df.pledged * .75
df.pledged_USD = df.pledged_USD[df.currency == "DKK"] = df.pledged * .14
df.pledged_USD = df.pledged_USD[df.currency == "NOK"] = df.pledged * .12

df.pledged_USD = df.pledged_USD.astype('int')
Beispiel #43
0
 def try_parse(values):
     try:
         return _dt_to_float_ordinal(tools.to_datetime(values))
     except Exception:
         return values
Beispiel #44
0
 def __getInternalStruct(self):
     out = []
     for d in self._serie['data']:
         out.append((d['uuid'], Series([d['dps'][k] for k in sorted(d['dps'].keys())],
             index=to_datetime([int(ts) for ts in sorted(d['dps'].keys())], unit='s'))))
     return DataFrame(dict(out))
Beispiel #45
0
import time as tm
from pandas.tseries.tools import to_datetime

#|Set CSV  load start time
start_csv = tm.time()

#|Load trade history CSV file
trd = pd.read_csv('btcnCNY.csv',
                  names=['timestamp','price','amount'])
print '---CSV File Loaded---'

#|Set CSV load end time
end_csv = tm.time()

#|Create datetime index from timestamp
trd['date'] = to_datetime(trd['timestamp'], unit='s')
trd = trd.set_index('date')

#|Create Series objects of 'price' and 'amount'
price = trd['price'].astype(float)
amount = trd['amount'].astype(float)

#|Set frequency of hour intervals for OLHCV conversion
freq = 'min'

#|Set resample start time
start_resample = tm.time()

#|Create index column for OLHCV price history DataFrame
prc = pd.DataFrame(index=price.resample(freq, how='last').index)
Beispiel #46
0
 def test_non_datetimeindex(self):
     dates = to_datetime(['1/1/2000', '1/2/2000', '1/3/2000'])
     self.assertEqual(infer_freq(dates), 'D')
Beispiel #47
0
    def __new__(
        cls,
        data=None,
        freq=None,
        start=None,
        end=None,
        periods=None,
        copy=False,
        name=None,
        tz=None,
        verify_integrity=True,
        normalize=False,
        **kwds
    ):

        warn = False
        if "offset" in kwds and kwds["offset"]:
            freq = kwds["offset"]
            warn = True

        infer_freq = False
        if not isinstance(freq, DateOffset):
            if freq != "infer":
                freq = to_offset(freq)
            else:
                infer_freq = True
                freq = None

        if warn:
            import warnings

            warnings.warn("parameter 'offset' is deprecated, " "please use 'freq' instead", FutureWarning)
            if isinstance(freq, basestring):
                freq = to_offset(freq)
        else:
            if isinstance(freq, basestring):
                freq = to_offset(freq)

        offset = freq

        if data is None and offset is None:
            raise ValueError("Must provide freq argument if no data is " "supplied")

        if data is None:
            return cls._generate(start, end, periods, name, offset, tz=tz, normalize=normalize)

        if not isinstance(data, np.ndarray):
            if np.isscalar(data):
                raise ValueError(
                    "DatetimeIndex() must be called with a " "collection of some kind, %s was passed" % repr(data)
                )

            if isinstance(data, datetime):
                data = [data]

            # other iterable of some kind
            if not isinstance(data, (list, tuple)):
                data = list(data)

            data = np.asarray(data, dtype="O")

            # try a few ways to make it datetime64
            if lib.is_string_array(data):
                data = _str_to_dt_array(data, offset)
            else:
                data = tools.to_datetime(data)
                data = np.asarray(data, dtype="M8[ns]")

        if issubclass(data.dtype.type, basestring):
            subarr = _str_to_dt_array(data, offset)
        elif issubclass(data.dtype.type, np.datetime64):
            if isinstance(data, DatetimeIndex):
                subarr = data.values
                offset = data.offset
                verify_integrity = False
            else:
                subarr = np.array(data, dtype="M8[ns]", copy=copy)
        elif issubclass(data.dtype.type, np.integer):
            subarr = np.array(data, dtype="M8[ns]", copy=copy)
        else:
            subarr = tools.to_datetime(data)
            if not np.issubdtype(subarr.dtype, np.datetime64):
                raise TypeError("Unable to convert %s to datetime dtype" % str(data))

        if tz is not None:
            tz = tools._maybe_get_tz(tz)
            # Convert local to UTC
            ints = subarr.view("i8")
            lib.tz_localize_check(ints, tz)
            subarr = lib.tz_convert(ints, tz, _utc())
            subarr = subarr.view("M8[ns]")

        subarr = subarr.view(cls)
        subarr.name = name
        subarr.offset = offset
        subarr.tz = tz

        if verify_integrity and len(subarr) > 0:
            if offset is not None and not infer_freq:
                inferred = subarr.inferred_freq
                if inferred != offset.freqstr:
                    raise ValueError("Dates do not conform to passed " "frequency")

        if infer_freq:
            inferred = subarr.inferred_freq
            if inferred:
                subarr.offset = to_offset(inferred)

        return subarr