def test_usecols_with_parse_dates_and_usecol_names(self):
        # See gh-9755
        s = """0,1,20140101,0900,4
        0,1,20140102,1000,4"""
        parse_dates = [[1, 2]]
        names = list('acd')

        cols = {
            'a': [0, 0],
            'c_d': [
                Timestamp('2014-01-01 09:00:00'),
                Timestamp('2014-01-02 10:00:00')
            ]
        }
        expected = DataFrame(cols, columns=['c_d', 'a'])

        df = self.read_csv(StringIO(s),
                           names=names,
                           usecols=[0, 2, 3],
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)

        df = self.read_csv(StringIO(s),
                           names=names,
                           usecols=[3, 0, 2],
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)
Example #2
0
def test_usecols_with_parse_dates2(all_parsers):
    # see gh-13604
    parser = all_parsers
    data = """2008-02-07 09:40,1032.43
2008-02-07 09:50,1042.54
2008-02-07 10:00,1051.65"""

    names = ["date", "values"]
    usecols = names[:]
    parse_dates = [0]

    index = Index(
        [
            Timestamp("2008-02-07 09:40"),
            Timestamp("2008-02-07 09:50"),
            Timestamp("2008-02-07 10:00"),
        ],
        name="date",
    )
    cols = {"values": [1032.43, 1042.54, 1051.65]}
    expected = DataFrame(cols, index=index)

    result = parser.read_csv(
        StringIO(data),
        parse_dates=parse_dates,
        index_col=0,
        usecols=usecols,
        header=None,
        names=names,
    )
    tm.assert_frame_equal(result, expected)
Example #3
0
def test_multiple_date_col_timestamp_parse(all_parsers):
    parser = all_parsers
    data = """05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25
05/31/2012,15:30:00.029,1306.25,8,E,0,,1306.25"""

    result = parser.read_csv(StringIO(data),
                             parse_dates=[[0, 1]],
                             header=None,
                             date_parser=Timestamp)
    expected = DataFrame(
        [
            [
                Timestamp("05/31/2012, 15:30:00.029"),
                1306.25,
                1,
                "E",
                0,
                np.nan,
                1306.25,
            ],
            [
                Timestamp("05/31/2012, 15:30:00.029"),
                1306.25,
                8,
                "E",
                0,
                np.nan,
                1306.25,
            ],
        ],
        columns=["0_1", 2, 3, 4, 5, 6, 7],
    )
    tm.assert_frame_equal(result, expected)
Example #4
0
def _get_range_edges(first, last, offset, closed='left', base=0):
    if isinstance(offset, compat.string_types):
        offset = to_offset(offset)

    if isinstance(offset, Tick):
        is_day = isinstance(offset, Day)
        day_nanos = delta_to_nanoseconds(timedelta(1))

        # #1165
        if (is_day and day_nanos % offset.nanos == 0) or not is_day:
            return _adjust_dates_anchored(first,
                                          last,
                                          offset,
                                          closed=closed,
                                          base=base)

    if not isinstance(offset, Tick):  # and first.time() != last.time():
        # hack!
        first = first.normalize()
        last = last.normalize()

    if closed == 'left':
        first = Timestamp(offset.rollback(first))
    else:
        first = Timestamp(first - offset)

    last = Timestamp(last + offset)

    return first, last
Example #5
0
def test_from_product_datetimeindex():
    dt_index = date_range("2000-01-01", periods=2)
    mi = MultiIndex.from_product([[1, 2], dt_index])
    etalon = construct_1d_object_array_from_listlike([
        (1, Timestamp("2000-01-01")),
        (1, Timestamp("2000-01-02")),
        (2, Timestamp("2000-01-01")),
        (2, Timestamp("2000-01-02")),
    ])
    tm.assert_numpy_array_equal(mi.values, etalon)
Example #6
0
def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
    # First and last offsets should be calculated from the start day to fix an
    # error cause by resampling across multiple days when a one day period is
    # not a multiple of the frequency.
    #
    # See https://github.com/pandas-dev/pandas/issues/8683

    # 14682 - Since we need to drop the TZ information to perform
    # the adjustment in the presence of a DST change,
    # save TZ Info and the DST state of the first and last parameters
    # so that we can accurately rebuild them at the end.
    first_tzinfo = first.tzinfo
    last_tzinfo = last.tzinfo
    first_dst = bool(first.dst())
    last_dst = bool(last.dst())

    first = first.tz_localize(None)
    last = last.tz_localize(None)

    start_day_nanos = first.normalize().value

    base_nanos = (base % offset.n) * offset.nanos // offset.n
    start_day_nanos += base_nanos

    foffset = (first.value - start_day_nanos) % offset.nanos
    loffset = (last.value - start_day_nanos) % offset.nanos

    if closed == 'right':
        if foffset > 0:
            # roll back
            fresult = first.value - foffset
        else:
            fresult = first.value - offset.nanos

        if loffset > 0:
            # roll forward
            lresult = last.value + (offset.nanos - loffset)
        else:
            # already the end of the road
            lresult = last.value
    else:  # closed == 'left'
        if foffset > 0:
            fresult = first.value - foffset
        else:
            # start of the road
            fresult = first.value

        if loffset > 0:
            # roll forward
            lresult = last.value + (offset.nanos - loffset)
        else:
            lresult = last.value + offset.nanos

    return (Timestamp(fresult).tz_localize(first_tzinfo, ambiguous=first_dst),
            Timestamp(lresult).tz_localize(last_tzinfo, ambiguous=last_dst))
Example #7
0
def test_from_arrays(idx):
    arrays = [np.asarray(lev).take(level_codes)
              for lev, level_codes in zip(idx.levels, idx.codes)]

    # list of arrays as input
    result = MultiIndex.from_arrays(arrays, names=idx.names)
    tm.assert_index_equal(result, idx)

    # infer correctly
    result = MultiIndex.from_arrays([[pd.NaT, Timestamp('20130101')],
                                     ['a', 'b']])
    assert result.levels[0].equals(Index([Timestamp('20130101')]))
    assert result.levels[1].equals(Index(['a', 'b']))
Example #8
0
    def get_wayback_tot_via_memento(self, w, urlalt=None):
        '''
        returns archive information for the URL (via memento protocol)
        :param w: the penalization value for domain search
        :param urlalt: an alternative URL (e.g., the URL domain)
        :return: number of years the URL was cached, since the caching started.
        '''
        k = 0
        y = 0
        try:
            if urlalt is None:
                urlalt = self.url
                w = 1.0
            out = self.__query_memento(urlalt)
            if out['mementos']:
                t1 = Timestamp(out['mementos']['first']['datetime'], tz=None)
                try:
                    t2 = Timestamp(out['mementos']['prev']['datetime'],
                                   tz=None)
                except:
                    t2 = t1
                t3 = Timestamp(out['mementos']['closest']['datetime'], tz=None)
                try:
                    t4 = Timestamp(out['mementos']['next']['datetime'],
                                   tz=None)
                except:
                    t4 = t3
                t5 = Timestamp(out['mementos']['last']['datetime'], tz=None)

                d1 = abs(t2 - t1) + dt2.timedelta(days=1)
                d2 = abs(t4 - t3) + dt2.timedelta(days=1)

                l = 1 / (np.log(d1.days * d2.days) + 1)

                age_years = abs(t5.year - t1.year) + 1
                today = datetime.today()
                most_recent_update = diff_month(today, t5) + 1

                k = (l + np.log(age_years) + (1 / most_recent_update)) * w

                #print(t1, t2, t3, t4, t5)

                #k = (t1.year - t2.year)
                #k = k * w
                y = t5.year

            #print(k)
            return k, y
        except Exception as e:
            config.logger.error(repr(e))
            raise
Example #9
0
def test_usecols_with_parse_dates(all_parsers, usecols):
    # see gh-9755
    data = """a,b,c,d,e
0,1,20140101,0900,4
0,1,20140102,1000,4"""
    parser = all_parsers
    parse_dates = [[1, 2]]

    cols = {
        "a": [0, 0],
        "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")],
    }
    expected = DataFrame(cols, columns=["c_d", "a"])
    result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates)
    tm.assert_frame_equal(result, expected)
Example #10
0
def test_date_parser_int_bug(all_parsers):
    # see gh-3071
    parser = all_parsers
    data = ("posix_timestamp,elapsed,sys,user,queries,query_time,rows,"
            "accountid,userid,contactid,level,silo,method\n"
            "1343103150,0.062353,0,4,6,0.01690,3,"
            "12345,1,-1,3,invoice_InvoiceResource,search\n")

    result = parser.read_csv(
        StringIO(data),
        index_col=0,
        parse_dates=[0],
        date_parser=lambda x: datetime.utcfromtimestamp(int(x)))
    expected = DataFrame([[
        0.062353, 0, 4, 6, 0.01690, 3, 12345, 1, -1, 3,
        "invoice_InvoiceResource", "search"
    ]],
                         columns=[
                             "elapsed", "sys", "user", "queries", "query_time",
                             "rows", "accountid", "userid", "contactid",
                             "level", "silo", "method"
                         ],
                         index=Index([Timestamp("2012-07-24 04:12:30")],
                                     name="posix_timestamp"))
    tm.assert_frame_equal(result, expected)
Example #11
0
 def _sub_datelike(self, other):
     # subtract a datetime from myself, yielding a ndarray[timedelta64[ns]]
     if isinstance(other, (DatetimeArrayMixin, np.ndarray)):
         if isinstance(other, np.ndarray):
             # if other is an ndarray, we assume it is datetime64-dtype
             other = type(self)(other)
         if not self._has_same_tz(other):
             # require tz compat
             raise TypeError("{cls} subtraction must have the same "
                             "timezones or no timezones".format(
                                 cls=type(self).__name__))
         result = self._sub_datelike_dti(other)
     elif isinstance(other, (datetime, np.datetime64)):
         assert other is not NaT
         other = Timestamp(other)
         if other is NaT:
             return self - NaT
         # require tz compat
         elif not self._has_same_tz(other):
             raise TypeError("Timestamp subtraction must have the same "
                             "timezones or no timezones")
         else:
             i8 = self.asi8
             result = checked_add_with_arr(i8,
                                           -other.value,
                                           arr_mask=self._isnan)
             result = self._maybe_mask_results(result, fill_value=iNaT)
     else:
         raise TypeError("cannot subtract {cls} and {typ}".format(
             cls=type(self).__name__, typ=type(other).__name__))
     return result.view('timedelta64[ns]')
Example #12
0
def test_usecols_with_parse_dates3(all_parsers):
    # see gh-14792
    parser = all_parsers
    data = """a,b,c,d,e,f,g,h,i,j
2016/09/21,1,1,2,3,4,5,6,7,8"""

    usecols = list("abcdefghij")
    parse_dates = [0]

    cols = {
        "a": Timestamp("2016-09-21"),
        "b": [1],
        "c": [1],
        "d": [2],
        "e": [3],
        "f": [4],
        "g": [5],
        "h": [6],
        "i": [7],
        "j": [8],
    }
    expected = DataFrame(cols, columns=usecols)

    result = parser.read_csv(StringIO(data),
                             usecols=usecols,
                             parse_dates=parse_dates)
    tm.assert_frame_equal(result, expected)
Example #13
0
    def get_wayback_tot_via_api(self, x, w, urlalt=None):
        '''
        returns archive information for the URL (via waybackmachine)
        :param x: the number of historical data to look back (years)
        :param w: the penalization value for domain search
        :param urlalt: an alternative URL (e.g., the URL domain)
        :return: number of years the URL was cached, the last cache (year)
        '''
        k = 0
        y = None
        try:
            if urlalt is None:
                urlalt = self.url
                w = 1.0
            out = self.__query_wayback(urlalt)
            if out['archived_snapshots']['closest']['status'] == '200':
                k += 1
                tm = Timestamp(
                    out['archived_snapshots']['closest']['timestamp'], tz=None)
                y = tm.year
                for i in range(1, x):
                    try:
                        out = self.__query_wayback(urlalt,
                                                   str(tm.year - i) + '0101')
                        if out['archived_snapshots']['closest'][
                                'status'] == '200':
                            k += w
                    except:
                        pass

            return k, y
        except:
            return k, None
def test_override_set_noconvert_columns():
    # see gh-17351
    #
    # Usecols needs to be sorted in _set_noconvert_columns based
    # on the test_usecols_with_parse_dates test from test_usecols.py
    class MyTextFileReader(TextFileReader):
        def __init__(self):
            self._currow = 0
            self.squeeze = False

    class MyCParserWrapper(CParserWrapper):
        def _set_noconvert_columns(self):
            if self.usecols_dtype == "integer":
                # self.usecols is a set, which is documented as unordered
                # but in practice, a CPython set of integers is sorted.
                # In other implementations this assumption does not hold.
                # The following code simulates a different order, which
                # before GH 17351 would cause the wrong columns to be
                # converted via the parse_dates parameter
                self.usecols = list(self.usecols)
                self.usecols.reverse()
            return CParserWrapper._set_noconvert_columns(self)

    data = """a,b,c,d,e
0,1,20140101,0900,4
0,1,20140102,1000,4"""

    parse_dates = [[1, 2]]
    cols = {
        "a": [0, 0],
        "c_d":
        [Timestamp("2014-01-01 09:00:00"),
         Timestamp("2014-01-02 10:00:00")],
    }
    expected = DataFrame(cols, columns=["c_d", "a"])

    parser = MyTextFileReader()
    parser.options = {
        "usecols": [0, 2, 3],
        "parse_dates": parse_dates,
        "delimiter": ",",
    }
    parser.engine = "c"
    parser._engine = MyCParserWrapper(StringIO(data), **parser.options)

    result = parser.read()
    tm.assert_frame_equal(result, expected)
    def test_override__set_noconvert_columns(self):
        # GH 17351 - usecols needs to be sorted in _setnoconvert_columns
        # based on the test_usecols_with_parse_dates test from usecols.py
        from pandas.io.parsers import CParserWrapper, TextFileReader

        s = """a,b,c,d,e
        0,1,20140101,0900,4
        0,1,20140102,1000,4"""

        parse_dates = [[1, 2]]
        cols = {
            'a': [0, 0],
            'c_d': [
                Timestamp('2014-01-01 09:00:00'),
                Timestamp('2014-01-02 10:00:00')
            ]
        }
        expected = DataFrame(cols, columns=['c_d', 'a'])

        class MyTextFileReader(TextFileReader):
            def __init__(self):
                self._currow = 0
                self.squeeze = False

        class MyCParserWrapper(CParserWrapper):
            def _set_noconvert_columns(self):
                if self.usecols_dtype == 'integer':
                    # self.usecols is a set, which is documented as unordered
                    # but in practice, a CPython set of integers is sorted.
                    # In other implementations this assumption does not hold.
                    # The following code simulates a different order, which
                    # before GH 17351 would cause the wrong columns to be
                    # converted via the parse_dates parameter
                    self.usecols = list(self.usecols)
                    self.usecols.reverse()
                return CParserWrapper._set_noconvert_columns(self)

        parser = MyTextFileReader()
        parser.options = {
            'usecols': [0, 2, 3],
            'parse_dates': parse_dates,
            'delimiter': ','
        }
        parser._engine = MyCParserWrapper(StringIO(s), **parser.options)
        df = parser.read()

        tm.assert_frame_equal(df, expected)
    def test_multiple_date_col_timestamp_parse(self):
        data = """05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25
05/31/2012,15:30:00.029,1306.25,8,E,0,,1306.25"""
        result = self.read_csv(StringIO(data), sep=',', header=None,
                               parse_dates=[[0, 1]], date_parser=Timestamp)

        ex_val = Timestamp('05/31/2012 15:30:00.029')
        assert result['0_1'][0] == ex_val
Example #17
0
    def _has_same_tz(self, other):
        zzone = self._timezone

        # vzone sholdn't be None if value is non-datetime like
        if isinstance(other, np.datetime64):
            # convert to Timestamp as np.datetime64 doesn't have tz attr
            other = Timestamp(other)
        vzone = timezones.get_timezone(getattr(other, 'tzinfo', '__no_tz__'))
        return zzone == vzone
Example #18
0
def _to_m8(key, tz=None):
    """
    Timestamp-like => dt64
    """
    if not isinstance(key, Timestamp):
        # this also converts strings
        key = Timestamp(key, tz=tz)

    return np.int64(conversion.pydt_to_i8(key)).view(_NS_DTYPE)
Example #19
0
def _generate_regular_range(cls, start, end, periods, freq):
    if isinstance(freq, Tick):
        stride = freq.nanos
        if periods is None:
            b = Timestamp(start).value
            # cannot just use e = Timestamp(end) + 1 because arange breaks when
            # stride is too large, see GH10887
            e = (b + (Timestamp(end).value - b) // stride * stride +
                 stride // 2 + 1)
            # end.tz == start.tz by this point due to _generate implementation
            tz = start.tz
        elif start is not None:
            b = Timestamp(start).value
            e = b + np.int64(periods) * stride
            tz = start.tz
        elif end is not None:
            e = Timestamp(end).value + stride
            b = e - np.int64(periods) * stride
            tz = end.tz
        else:
            raise ValueError("at least 'start' or 'end' should be specified "
                             "if a 'period' is given.")

        data = np.arange(b, e, stride, dtype=np.int64)
        data = cls._simple_new(data.view(_NS_DTYPE), None, tz=tz)
    else:
        tz = None
        if isinstance(start, Timestamp):
            tz = start.tz
            start = start.to_pydatetime()

        if isinstance(end, Timestamp):
            tz = end.tz
            end = end.to_pydatetime()

        xdr = generate_range(start=start,
                             end=end,
                             periods=periods,
                             offset=freq)

        values = np.array([x.value for x in xdr])
        data = cls._simple_new(values, freq=freq, tz=tz)

    return data
    def test_csvtext(self):
        csvtext = """2017-09-12,932.1
2017-09-13,935.0
2017-09-14,925.1
2017-09-15,920.2
"""
        series = from_csvtext(csvtext)

        self.assertEqual(series.index[0], Timestamp('2017-09-12'))
        self.assertEqual(series.index[1], Timestamp('2017-09-13'))
        self.assertEqual(series.index[2], Timestamp('2017-09-14'))
        self.assertEqual(series.index[3], Timestamp('2017-09-15'))

        self.assertEqual(series[0], 932.1)
        self.assertEqual(series[1], 935.0)
        self.assertEqual(series[2], 925.1)
        self.assertEqual(series[3], 920.2)

        self.assertEqual(to_csvtext(series), csvtext)
Example #21
0
def test_parse_tz_aware(all_parsers):
    # See gh-1693
    parser = all_parsers
    data = "Date,x\n2012-06-13T01:39:00Z,0.5"

    result = parser.read_csv(StringIO(data), index_col=0, parse_dates=True)
    expected = DataFrame({"x": [0.5]},
                         index=Index([Timestamp("2012-06-13 01:39:00+00:00")],
                                     name="Date"))
    tm.assert_frame_equal(result, expected)
    assert result.index.tz is pytz.utc
Example #22
0
def test_nat_parse(all_parsers):
    # see gh-3062
    parser = all_parsers
    df = DataFrame(
        dict({"A": np.arange(10, dtype="float64"), "B": Timestamp("20010101")})
    )
    df.iloc[3:6, :] = np.nan

    with tm.ensure_clean("__nat_parse_.csv") as path:
        df.to_csv(path)

        result = parser.read_csv(path, index_col=0, parse_dates=["B"])
        tm.assert_frame_equal(result, df)
Example #23
0
    def test_index_groupby(self):
        int_idx = Index(range(6))
        float_idx = Index(np.arange(0, 0.6, 0.1))
        obj_idx = Index('A B C D E F'.split())
        dt_idx = pd.date_range('2013-01-01', freq='M', periods=6)

        for idx in [int_idx, float_idx, obj_idx, dt_idx]:
            to_groupby = np.array([1, 2, np.nan, np.nan, 2, 1])
            tm.assert_dict_equal(idx.groupby(to_groupby),
                                 {1.0: idx[[0, 5]], 2.0: idx[[1, 4]]})

            to_groupby = Index([datetime(2011, 11, 1),
                                datetime(2011, 12, 1),
                                pd.NaT,
                                pd.NaT,
                                datetime(2011, 12, 1),
                                datetime(2011, 11, 1)],
                               tz='UTC').values

            ex_keys = [Timestamp('2011-11-01'), Timestamp('2011-12-01')]
            expected = {ex_keys[0]: idx[[0, 5]],
                        ex_keys[1]: idx[[1, 4]]}
            tm.assert_dict_equal(idx.groupby(to_groupby), expected)
Example #24
0
def test_date_parser_usecols_thousands(all_parsers):
    # GH#39365
    data = """A,B,C
    1,3,20-09-01-01
    2,4,20-09-01-01
    """

    parser = all_parsers
    result = parser.read_csv(
        StringIO(data),
        parse_dates=[1],
        usecols=[1, 2],
        thousands="-",
    )
    expected = DataFrame({"B": [3, 4], "C": [Timestamp("20-09-2001 01:00:00")] * 2})
    tm.assert_frame_equal(result, expected)
Example #25
0
    def test_datetime_units(self):
        val = datetime.datetime(2013, 8, 17, 21, 17, 12, 215504)
        stamp = Timestamp(val)

        roundtrip = ujson.decode(ujson.encode(val, date_unit='s'))
        assert roundtrip == stamp.value // 10**9

        roundtrip = ujson.decode(ujson.encode(val, date_unit='ms'))
        assert roundtrip == stamp.value // 10**6

        roundtrip = ujson.decode(ujson.encode(val, date_unit='us'))
        assert roundtrip == stamp.value // 10**3

        roundtrip = ujson.decode(ujson.encode(val, date_unit='ns'))
        assert roundtrip == stamp.value

        pytest.raises(ValueError, ujson.encode, val, date_unit='foo')
Example #26
0
    def test_datetime_units(self):
        val = datetime.datetime(2013, 8, 17, 21, 17, 12, 215504)
        stamp = Timestamp(val)

        roundtrip = ujson.decode(ujson.encode(val, date_unit="s"))
        assert roundtrip == stamp.value // 10**9

        roundtrip = ujson.decode(ujson.encode(val, date_unit="ms"))
        assert roundtrip == stamp.value // 10**6

        roundtrip = ujson.decode(ujson.encode(val, date_unit="us"))
        assert roundtrip == stamp.value // 10**3

        roundtrip = ujson.decode(ujson.encode(val, date_unit="ns"))
        assert roundtrip == stamp.value

        msg = "Invalid value 'foo' for option 'date_unit'"
        with pytest.raises(ValueError, match=msg):
            ujson.encode(val, date_unit="foo")
Example #27
0
 def _box_func(self):
     return lambda x: Timestamp(x, freq=self.freq, tz=self.tz)
    def test_constructor_invalid(self):

        # invalid
        pytest.raises(TypeError, Float64Index, 0.)
        pytest.raises(TypeError, Float64Index, ['a', 'b', 0.])
        pytest.raises(TypeError, Float64Index, [Timestamp('20130101')])
Example #29
0
    def test_usecols_with_parse_dates(self):
        # See gh-9755
        s = """a,b,c,d,e
        0,1,20140101,0900,4
        0,1,20140102,1000,4"""
        parse_dates = [[1, 2]]

        cols = {
            'a': [0, 0],
            'c_d': [
                Timestamp('2014-01-01 09:00:00'),
                Timestamp('2014-01-02 10:00:00')
            ]
        }
        expected = DataFrame(cols, columns=['c_d', 'a'])

        df = self.read_csv(StringIO(s), usecols=[0, 2, 3],
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)

        df = self.read_csv(StringIO(s), usecols=[3, 0, 2],
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)

        # See gh-13604
        s = """2008-02-07 09:40,1032.43
        2008-02-07 09:50,1042.54
        2008-02-07 10:00,1051.65
        """
        parse_dates = [0]
        names = ['date', 'values']
        usecols = names[:]

        index = Index([Timestamp('2008-02-07 09:40'),
                       Timestamp('2008-02-07 09:50'),
                       Timestamp('2008-02-07 10:00')],
                      name='date')
        cols = {'values': [1032.43, 1042.54, 1051.65]}
        expected = DataFrame(cols, index=index)

        df = self.read_csv(StringIO(s), parse_dates=parse_dates, index_col=0,
                           usecols=usecols, header=None, names=names)
        tm.assert_frame_equal(df, expected)

        # See gh-14792
        s = """a,b,c,d,e,f,g,h,i,j
        2016/09/21,1,1,2,3,4,5,6,7,8"""
        parse_dates = [0]
        usecols = list('abcdefghij')
        cols = {'a': Timestamp('2016-09-21'),
                'b': [1], 'c': [1], 'd': [2],
                'e': [3], 'f': [4], 'g': [5],
                'h': [6], 'i': [7], 'j': [8]}
        expected = DataFrame(cols, columns=usecols)
        df = self.read_csv(StringIO(s), usecols=usecols,
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)

        s = """a,b,c,d,e,f,g,h,i,j\n2016/09/21,1,1,2,3,4,5,6,7,8"""
        parse_dates = [[0, 1]]
        usecols = list('abcdefghij')
        cols = {'a_b': '2016/09/21 1',
                'c': [1], 'd': [2], 'e': [3], 'f': [4],
                'g': [5], 'h': [6], 'i': [7], 'j': [8]}
        expected = DataFrame(cols, columns=['a_b'] + list('cdefghij'))
        df = self.read_csv(StringIO(s), usecols=usecols,
                           parse_dates=parse_dates)
        tm.assert_frame_equal(df, expected)
Example #30
0
    def _generate_range(cls,
                        start,
                        end,
                        periods,
                        freq,
                        tz=None,
                        normalize=False,
                        ambiguous='raise',
                        closed=None):
        if com.count_not_none(start, end, periods, freq) != 3:
            raise ValueError('Of the four parameters: start, end, periods, '
                             'and freq, exactly three must be specified')
        freq = to_offset(freq)

        if start is not None:
            start = Timestamp(start)

        if end is not None:
            end = Timestamp(end)

        if start is None and end is None:
            if closed is not None:
                raise ValueError("Closed has to be None if not both of start"
                                 "and end are defined")

        left_closed, right_closed = dtl.validate_endpoints(closed)

        start, end, _normalized = _maybe_normalize_endpoints(
            start, end, normalize)

        tz, inferred_tz = _infer_tz_from_endpoints(start, end, tz)

        if hasattr(freq, 'delta') and freq != Day():
            # sub-Day Tick
            if inferred_tz is None and tz is not None:
                # naive dates
                if start is not None and start.tz is None:
                    start = start.tz_localize(tz, ambiguous=False)

                if end is not None and end.tz is None:
                    end = end.tz_localize(tz, ambiguous=False)

            if start and end:
                if start.tz is None and end.tz is not None:
                    start = start.tz_localize(end.tz, ambiguous=False)

                if end.tz is None and start.tz is not None:
                    end = end.tz_localize(start.tz, ambiguous=False)

            if cls._use_cached_range(freq, _normalized, start, end):
                index = cls._cached_range(start,
                                          end,
                                          periods=periods,
                                          freq=freq)
            else:
                index = _generate_regular_range(cls, start, end, periods, freq)

        else:

            if tz is not None:
                # naive dates
                if start is not None and start.tz is not None:
                    start = start.replace(tzinfo=None)

                if end is not None and end.tz is not None:
                    end = end.replace(tzinfo=None)

            if start and end:
                if start.tz is None and end.tz is not None:
                    end = end.replace(tzinfo=None)

                if end.tz is None and start.tz is not None:
                    start = start.replace(tzinfo=None)

            if freq is not None:
                if cls._use_cached_range(freq, _normalized, start, end):
                    index = cls._cached_range(start,
                                              end,
                                              periods=periods,
                                              freq=freq)
                else:
                    index = _generate_regular_range(cls, start, end, periods,
                                                    freq)

                if tz is not None and getattr(index, 'tz', None) is None:
                    arr = conversion.tz_localize_to_utc(ensure_int64(
                        index.values),
                                                        tz,
                                                        ambiguous=ambiguous)

                    index = cls(arr)

                    # index is localized datetime64 array -> have to convert
                    # start/end as well to compare
                    if start is not None:
                        start = start.tz_localize(tz).asm8
                    if end is not None:
                        end = end.tz_localize(tz).asm8
            else:
                # Create a linearly spaced date_range in local time
                start = start.tz_localize(tz)
                end = end.tz_localize(tz)
                arr = np.linspace(start.value, end.value, periods)
                index = cls._simple_new(arr.astype('M8[ns]'), freq=None, tz=tz)

        if not left_closed and len(index) and index[0] == start:
            index = index[1:]
        if not right_closed and len(index) and index[-1] == end:
            index = index[:-1]

        return cls._simple_new(index.values, freq=freq, tz=tz)