Ejemplo n.º 1
0
 def _int64_cut_off(shape):
     acc = long(1)
     for i, mul in enumerate(shape):
         acc *= long(mul)
         if not acc < _INT64_MAX:
             return i
     return len(shape)
Ejemplo n.º 2
0
    def test_2d_datetime64(self):
        # 2005/01/01 - 2006/01/01
        arr = np.random.randint(long(11045376), long(11360736),
                                (5, 3)) * 100000000000
        arr = arr.view(dtype='datetime64[ns]')
        indexer = [0, 2, -1, 1, -1]

        # axis=0
        result = com.take_nd(arr, indexer, axis=0)
        result2 = np.empty_like(result)
        com.take_nd(arr, indexer, axis=0, out=result2)
        tm.assert_almost_equal(result, result2)

        expected = arr.take(indexer, axis=0)
        expected.view(np.int64)[[2, 4], :] = iNaT
        tm.assert_almost_equal(result, expected)

        result = com.take_nd(arr,
                             indexer,
                             axis=0,
                             fill_value=datetime(2007, 1, 1))
        result2 = np.empty_like(result)
        com.take_nd(arr,
                    indexer,
                    out=result2,
                    axis=0,
                    fill_value=datetime(2007, 1, 1))
        tm.assert_almost_equal(result, result2)

        expected = arr.take(indexer, axis=0)
        expected[[2, 4], :] = datetime(2007, 1, 1)
        tm.assert_almost_equal(result, expected)

        # axis=1
        result = com.take_nd(arr, indexer, axis=1)
        result2 = np.empty_like(result)
        com.take_nd(arr, indexer, axis=1, out=result2)
        tm.assert_almost_equal(result, result2)

        expected = arr.take(indexer, axis=1)
        expected.view(np.int64)[:, [2, 4]] = iNaT
        tm.assert_almost_equal(result, expected)

        result = com.take_nd(arr,
                             indexer,
                             axis=1,
                             fill_value=datetime(2007, 1, 1))
        result2 = np.empty_like(result)
        com.take_nd(arr,
                    indexer,
                    out=result2,
                    axis=1,
                    fill_value=datetime(2007, 1, 1))
        tm.assert_almost_equal(result, result2)

        expected = arr.take(indexer, axis=1)
        expected[:, [2, 4]] = datetime(2007, 1, 1)
        tm.assert_almost_equal(result, expected)
Ejemplo n.º 3
0
    def test_2d_datetime64(self):
        # 2005/01/01 - 2006/01/01
        arr = np.random.randint(
            long(11045376), long(11360736), (5, 3)) * 100000000000
        arr = arr.view(dtype='datetime64[ns]')
        indexer = [0, 2, -1, 1, -1]

        # axis=0
        result = algos.take_nd(arr, indexer, axis=0)
        result2 = np.empty_like(result)
        algos.take_nd(arr, indexer, axis=0, out=result2)
        tm.assert_almost_equal(result, result2)

        expected = arr.take(indexer, axis=0)
        expected.view(np.int64)[[2, 4], :] = iNaT
        tm.assert_almost_equal(result, expected)

        result = algos.take_nd(arr, indexer, axis=0,
                               fill_value=datetime(2007, 1, 1))
        result2 = np.empty_like(result)
        algos.take_nd(arr, indexer, out=result2, axis=0,
                      fill_value=datetime(2007, 1, 1))
        tm.assert_almost_equal(result, result2)

        expected = arr.take(indexer, axis=0)
        expected[[2, 4], :] = datetime(2007, 1, 1)
        tm.assert_almost_equal(result, expected)

        # axis=1
        result = algos.take_nd(arr, indexer, axis=1)
        result2 = np.empty_like(result)
        algos.take_nd(arr, indexer, axis=1, out=result2)
        tm.assert_almost_equal(result, result2)

        expected = arr.take(indexer, axis=1)
        expected.view(np.int64)[:, [2, 4]] = iNaT
        tm.assert_almost_equal(result, expected)

        result = algos.take_nd(arr, indexer, axis=1,
                               fill_value=datetime(2007, 1, 1))
        result2 = np.empty_like(result)
        algos.take_nd(arr, indexer, out=result2, axis=1,
                      fill_value=datetime(2007, 1, 1))
        tm.assert_almost_equal(result, result2)

        expected = arr.take(indexer, axis=1)
        expected[:, [2, 4]] = datetime(2007, 1, 1)
        tm.assert_almost_equal(result, expected)
Ejemplo n.º 4
0
    def test_comparison(self):
        # 5-18-2012 00:00:00.000
        stamp = long(1337299200000000000)

        val = Timestamp(stamp)

        assert val == val
        assert not val != val
        assert not val < val
        assert val <= val
        assert not val > val
        assert val >= val

        other = datetime(2012, 5, 18)
        assert val == other
        assert not val != other
        assert not val < other
        assert val <= other
        assert not val > other
        assert val >= other

        other = Timestamp(stamp + 100)

        assert val != other
        assert val != other
        assert val < other
        assert val <= other
        assert other > val
        assert other >= val
Ejemplo n.º 5
0
    def _try_convert_to_date(self, data):
        """ try to parse a ndarray like into a date column
            try to coerce object in epoch/iso formats and
            integer/float in epcoh formats, return a boolean if parsing
            was successful """

        # no conversion on empty
        if not len(data): return data, False

        new_data = data
        if new_data.dtype == 'object':
            try:
                new_data = data.astype('int64')
            except:
                pass


        # ignore numbers that are out of range
        if issubclass(new_data.dtype.type,np.number):
            if not ((new_data == iNaT) | (new_data > long(31536000000000000))).all():
                return data, False

        try:
            new_data = to_datetime(new_data)
        except:
            try:
                new_data = to_datetime(new_data.astype('int64'))
            except:

                # return old, noting more we can do
                return data, False

        return new_data, True
Ejemplo n.º 6
0
    def test_comparison(self):
        # 5-18-2012 00:00:00.000
        stamp = long(1337299200000000000)

        val = Timestamp(stamp)

        assert val == val
        assert not val != val
        assert not val < val
        assert val <= val
        assert not val > val
        assert val >= val

        other = datetime(2012, 5, 18)
        assert val == other
        assert not val != other
        assert not val < other
        assert val <= other
        assert not val > other
        assert val >= other

        other = Timestamp(stamp + 100)

        assert val != other
        assert val != other
        assert val < other
        assert val <= other
        assert other > val
        assert other >= val
Ejemplo n.º 7
0
    def test_convert_sql_column_longs(self):
        arr = np.array([long(1), long(2), long(3), long(4)], dtype='O')
        result = lib.convert_sql_column(arr)
        expected = np.array([1, 2, 3, 4], dtype='i8')
        _assert_same_values_and_dtype(result, expected)

        arr = np.array([long(1), long(2), long(3), None, long(4)], dtype='O')
        result = lib.convert_sql_column(arr)
        expected = np.array([1, 2, 3, np.nan, 4], dtype='f8')
        _assert_same_values_and_dtype(result, expected)
Ejemplo n.º 8
0
    def test_convert_sql_column_longs(self):
        arr = np.array([long(1), long(2), long(3), long(4)], dtype='O')
        result = lib.convert_sql_column(arr)
        expected = np.array([1, 2, 3, 4], dtype='i8')
        _assert_same_values_and_dtype(result, expected)

        arr = np.array([long(1), long(2), long(3), None, long(4)], dtype='O')
        result = lib.convert_sql_column(arr)
        expected = np.array([1, 2, 3, np.nan, 4], dtype='f8')
        _assert_same_values_and_dtype(result, expected)
Ejemplo n.º 9
0
    def test_convert_sql_column_longs(self):
        arr = np.array([long(1), long(2), long(3), long(4)], dtype='O')
        result = lib.convert_sql_column(arr)
        expected = np.array([1, 2, 3, 4], dtype='i8')
        self.assert_numpy_array_equal(result, expected)

        arr = np.array([long(1), long(2), long(3), None, long(4)], dtype='O')
        result = lib.convert_sql_column(arr)
        expected = np.array([1, 2, 3, np.nan, 4], dtype='f8')
        self.assert_numpy_array_equal(result, expected)
Ejemplo n.º 10
0
    def test_convert_sql_column_longs(self):
        arr = np.array([long(1), long(2), long(3), long(4)], dtype='O')
        result = lib.convert_sql_column(arr)
        expected = np.array([1, 2, 3, 4], dtype='i8')
        tm.assert_numpy_array_equal(result, expected)

        arr = np.array([long(1), long(2), long(3), None, long(4)], dtype='O')
        result = lib.convert_sql_column(arr)
        expected = np.array([1, 2, 3, np.nan, 4], dtype='f8')
        tm.assert_numpy_array_equal(result, expected)
Ejemplo n.º 11
0
    def test_scalar_conversion(self):

        # Pass in scalar is disabled
        scalar = Series(0.5)
        self.assertNotIsInstance(scalar, float)

        # coercion
        self.assertEqual(float(Series([1.])), 1.0)
        self.assertEqual(int(Series([1.])), 1)
        self.assertEqual(long(Series([1.])), 1)
Ejemplo n.º 12
0
def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'):
    """

    Parameters
    ----------

    Returns
    -------

    """
    if len(left_keys) != len(right_keys):
        raise AssertionError('left_key and right_keys must be the same length')

    left_labels = []
    right_labels = []
    group_sizes = []

    for lk, rk in zip(left_keys, right_keys):
        llab, rlab, count = _factorize_keys(lk, rk, sort=sort)

        left_labels.append(llab)
        right_labels.append(rlab)
        group_sizes.append(count)

    max_groups = long(1)
    for x in group_sizes:
        max_groups *= long(x)

    if max_groups > 2**63:  # pragma: no cover
        left_group_key, right_group_key, max_groups = \
            _factorize_keys(lib.fast_zip(left_labels),
                            lib.fast_zip(right_labels))
    else:
        left_group_key = get_group_index(left_labels, group_sizes)
        right_group_key = get_group_index(right_labels, group_sizes)

        left_group_key, right_group_key, max_groups = \
            _factorize_keys(left_group_key, right_group_key, sort=sort)

    # preserve left frame order if how == 'left' and sort == False
    kwargs = {'sort': sort} if how == 'left' else {}
    join_func = _join_functions[how]
    return join_func(left_group_key, right_group_key, max_groups, **kwargs)
Ejemplo n.º 13
0
    def test_scalar_conversion(self):

        # Pass in scalar is disabled
        scalar = Series(0.5)
        assert not isinstance(scalar, float)

        # Coercion
        assert float(Series([1.])) == 1.0
        assert int(Series([1.])) == 1
        assert long(Series([1.])) == 1
Ejemplo n.º 14
0
def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'):
    """

    Parameters
    ----------

    Returns
    -------

    """
    if len(left_keys) != len(right_keys):
        raise AssertionError('left_key and right_keys must be the same length')

    left_labels = []
    right_labels = []
    group_sizes = []

    for lk, rk in zip(left_keys, right_keys):
        llab, rlab, count = _factorize_keys(lk, rk, sort=sort)

        left_labels.append(llab)
        right_labels.append(rlab)
        group_sizes.append(count)

    max_groups = long(1)
    for x in group_sizes:
        max_groups *= long(x)

    if max_groups > 2 ** 63:  # pragma: no cover
        left_group_key, right_group_key, max_groups = \
            _factorize_keys(lib.fast_zip(left_labels),
                            lib.fast_zip(right_labels))
    else:
        left_group_key = get_group_index(left_labels, group_sizes)
        right_group_key = get_group_index(right_labels, group_sizes)

        left_group_key, right_group_key, max_groups = \
            _factorize_keys(left_group_key, right_group_key, sort=sort)

    # preserve left frame order if how == 'left' and sort == False
    kwargs = {'sort':sort} if how == 'left' else {}
    join_func = _join_functions[how]
    return join_func(left_group_key, right_group_key, max_groups, **kwargs)
Ejemplo n.º 15
0
    def test_scalar_conversion(self):

        # Pass in scalar is disabled
        scalar = Series(0.5)
        self.assertNotIsInstance(scalar, float)

        # coercion
        self.assertEqual(float(Series([1.])), 1.0)
        self.assertEqual(int(Series([1.])), 1)
        self.assertEqual(long(Series([1.])), 1)
Ejemplo n.º 16
0
    def test_scalar_conversion(self):

        # Pass in scalar is disabled
        scalar = Series(0.5)
        assert not isinstance(scalar, float)

        # Coercion
        assert float(Series([1.])) == 1.0
        assert int(Series([1.])) == 1
        assert long(Series([1.])) == 1
Ejemplo n.º 17
0
    def test_compare_invalid(self):
        # GH#8058
        val = Timestamp('20130101 12:01:02')
        assert not val == 'foo'
        assert not val == 10.0
        assert not val == 1
        assert not val == long(1)
        assert not val == []
        assert not val == {'foo': 1}
        assert not val == np.float64(1)
        assert not val == np.int64(1)

        assert val != 'foo'
        assert val != 10.0
        assert val != 1
        assert val != long(1)
        assert val != []
        assert val != {'foo': 1}
        assert val != np.float64(1)
        assert val != np.int64(1)
Ejemplo n.º 18
0
    def test_compare_invalid(self):
        # GH 8058
        val = Timestamp('20130101 12:01:02')
        assert not val == 'foo'
        assert not val == 10.0
        assert not val == 1
        assert not val == long(1)
        assert not val == []
        assert not val == {'foo': 1}
        assert not val == np.float64(1)
        assert not val == np.int64(1)

        assert val != 'foo'
        assert val != 10.0
        assert val != 1
        assert val != long(1)
        assert val != []
        assert val != {'foo': 1}
        assert val != np.float64(1)
        assert val != np.int64(1)
Ejemplo n.º 19
0
def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'):
    """

    Parameters
    ----------

    Returns
    -------

    """
    if not ((len(left_keys) == len(right_keys))):
        raise AssertionError()

    left_labels = []
    right_labels = []
    group_sizes = []

    for lk, rk in zip(left_keys, right_keys):
        llab, rlab, count = _factorize_keys(lk, rk, sort=sort)

        left_labels.append(llab)
        right_labels.append(rlab)
        group_sizes.append(count)

    max_groups = long(1)
    for x in group_sizes:
        max_groups *= long(x)

    if max_groups > 2**63:  # pragma: no cover
        left_group_key, right_group_key, max_groups = \
            _factorize_keys(lib.fast_zip(left_labels),
                            lib.fast_zip(right_labels))
    else:
        left_group_key = get_group_index(left_labels, group_sizes)
        right_group_key = get_group_index(right_labels, group_sizes)

        left_group_key, right_group_key, max_groups = \
            _factorize_keys(left_group_key, right_group_key, sort=sort)

    join_func = _join_functions[how]
    return join_func(left_group_key, right_group_key, max_groups)
Ejemplo n.º 20
0
def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'):
    """

    Parameters
    ----------

    Returns
    -------

    """
    if not ((len(left_keys) == len(right_keys))):
        raise AssertionError()

    left_labels = []
    right_labels = []
    group_sizes = []

    for lk, rk in zip(left_keys, right_keys):
        llab, rlab, count = _factorize_keys(lk, rk, sort=sort)

        left_labels.append(llab)
        right_labels.append(rlab)
        group_sizes.append(count)

    max_groups = long(1)
    for x in group_sizes:
        max_groups *= long(x)

    if max_groups > 2 ** 63:  # pragma: no cover
        left_group_key, right_group_key, max_groups = \
            _factorize_keys(lib.fast_zip(left_labels),
                            lib.fast_zip(right_labels))
    else:
        left_group_key = get_group_index(left_labels, group_sizes)
        right_group_key = get_group_index(right_labels, group_sizes)

        left_group_key, right_group_key, max_groups = \
            _factorize_keys(left_group_key, right_group_key, sort=sort)

    join_func = _join_functions[how]
    return join_func(left_group_key, right_group_key, max_groups)
Ejemplo n.º 21
0
    def test_multiindex_columns_empty_level(self):
        lst = [['count', 'values'], ['to filter', '']]
        midx = MultiIndex.from_tuples(lst)

        df = DataFrame([[long(1), 'A']], columns=midx)

        grouped = df.groupby('to filter').groups
        assert grouped['A'] == [0]

        grouped = df.groupby([('to filter', '')]).groups
        assert grouped['A'] == [0]

        df = DataFrame([[long(1), 'A'], [long(2), 'B']], columns=midx)

        expected = df.groupby('to filter').groups
        result = df.groupby([('to filter', '')]).groups
        assert result == expected

        df = DataFrame([[long(1), 'A'], [long(2), 'A']], columns=midx)

        expected = df.groupby('to filter').groups
        result = df.groupby([('to filter', '')]).groups
        tm.assert_dict_equal(result, expected)
Ejemplo n.º 22
0
    def test_multiindex_columns_empty_level(self):
        lst = [['count', 'values'], ['to filter', '']]
        midx = MultiIndex.from_tuples(lst)

        df = DataFrame([[long(1), 'A']], columns=midx)

        grouped = df.groupby('to filter').groups
        assert grouped['A'] == [0]

        grouped = df.groupby([('to filter', '')]).groups
        assert grouped['A'] == [0]

        df = DataFrame([[long(1), 'A'], [long(2), 'B']], columns=midx)

        expected = df.groupby('to filter').groups
        result = df.groupby([('to filter', '')]).groups
        assert result == expected

        df = DataFrame([[long(1), 'A'], [long(2), 'A']], columns=midx)

        expected = df.groupby('to filter').groups
        result = df.groupby([('to filter', '')]).groups
        tm.assert_dict_equal(result, expected)
Ejemplo n.º 23
0
def test_inplace_mutation_resets_values():
    levels = [['a', 'b', 'c'], [4]]
    levels2 = [[1, 2, 3], ['a']]
    codes = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]]

    mi1 = MultiIndex(levels=levels, codes=codes)
    mi2 = MultiIndex(levels=levels2, codes=codes)
    vals = mi1.values.copy()
    vals2 = mi2.values.copy()

    assert mi1._tuples is not None

    # Make sure level setting works
    new_vals = mi1.set_levels(levels2).values
    tm.assert_almost_equal(vals2, new_vals)

    # Non-inplace doesn't kill _tuples [implementation detail]
    tm.assert_almost_equal(mi1._tuples, vals)

    # ...and values is still same too
    tm.assert_almost_equal(mi1.values, vals)

    # Inplace should kill _tuples
    mi1.set_levels(levels2, inplace=True)
    tm.assert_almost_equal(mi1.values, vals2)

    # Make sure label setting works too
    codes2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
    exp_values = np.empty((6, ), dtype=object)
    exp_values[:] = [(long(1), 'a')] * 6

    # Must be 1d array of tuples
    assert exp_values.shape == (6, )
    new_values = mi2.set_codes(codes2).values

    # Not inplace shouldn't change
    tm.assert_almost_equal(mi2._tuples, vals2)

    # Should have correct values
    tm.assert_almost_equal(exp_values, new_values)

    # ...and again setting inplace should kill _tuples, etc
    mi2.set_codes(codes2, inplace=True)
    tm.assert_almost_equal(mi2.values, new_values)
Ejemplo n.º 24
0
def test_inplace_mutation_resets_values():
    levels = [['a', 'b', 'c'], [4]]
    levels2 = [[1, 2, 3], ['a']]
    labels = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]]

    mi1 = MultiIndex(levels=levels, labels=labels)
    mi2 = MultiIndex(levels=levels2, labels=labels)
    vals = mi1.values.copy()
    vals2 = mi2.values.copy()

    assert mi1._tuples is not None

    # Make sure level setting works
    new_vals = mi1.set_levels(levels2).values
    tm.assert_almost_equal(vals2, new_vals)

    # Non-inplace doesn't kill _tuples [implementation detail]
    tm.assert_almost_equal(mi1._tuples, vals)

    # ...and values is still same too
    tm.assert_almost_equal(mi1.values, vals)

    # Inplace should kill _tuples
    mi1.set_levels(levels2, inplace=True)
    tm.assert_almost_equal(mi1.values, vals2)

    # Make sure label setting works too
    labels2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
    exp_values = np.empty((6,), dtype=object)
    exp_values[:] = [(long(1), 'a')] * 6

    # Must be 1d array of tuples
    assert exp_values.shape == (6,)
    new_values = mi2.set_labels(labels2).values

    # Not inplace shouldn't change
    tm.assert_almost_equal(mi2._tuples, vals2)

    # Should have correct values
    tm.assert_almost_equal(exp_values, new_values)

    # ...and again setting inplace should kill _tuples, etc
    mi2.set_labels(labels2, inplace=True)
    tm.assert_almost_equal(mi2.values, new_values)
Ejemplo n.º 25
0
def is_int64_overflow_possible(shape):
    the_prod = long(1)
    for x in shape:
        the_prod *= long(x)

    return the_prod >= _INT64_MAX
Ejemplo n.º 26
0
class Parser(object):

    _STAMP_UNITS = ('s', 'ms', 'us', 'ns')
    _MIN_STAMPS = {
        's': long(31536000),
        'ms': long(31536000000),
        'us': long(31536000000000),
        'ns': long(31536000000000000)
    }

    def __init__(self,
                 json,
                 orient,
                 dtype=True,
                 convert_axes=True,
                 convert_dates=True,
                 keep_default_dates=False,
                 numpy=False,
                 precise_float=False,
                 date_unit=None):
        self.json = json

        if orient is None:
            orient = self._default_orient

        self.orient = orient
        self.dtype = dtype

        if orient == "split":
            numpy = False

        if date_unit is not None:
            date_unit = date_unit.lower()
            if date_unit not in self._STAMP_UNITS:
                raise ValueError('date_unit must be one of %s' %
                                 (self._STAMP_UNITS, ))
            self.min_stamp = self._MIN_STAMPS[date_unit]
        else:
            self.min_stamp = self._MIN_STAMPS['s']

        self.numpy = numpy
        self.precise_float = precise_float
        self.convert_axes = convert_axes
        self.convert_dates = convert_dates
        self.date_unit = date_unit
        self.keep_default_dates = keep_default_dates
        self.obj = None

    def parse(self):

        # try numpy
        numpy = self.numpy
        if numpy:
            self._parse_numpy()

        else:
            self._parse_no_numpy()

        if self.obj is None:
            return None
        if self.convert_axes:
            self._convert_axes()
        self._try_convert_types()
        return self.obj

    def _convert_axes(self):
        """ try to convert axes """
        for axis in self.obj._AXIS_NUMBERS.keys():
            new_axis, result = self._try_convert_data(axis,
                                                      self.obj._get_axis(axis),
                                                      use_dtypes=False,
                                                      convert_dates=True)
            if result:
                setattr(self.obj, axis, new_axis)

    def _try_convert_types(self):
        raise NotImplementedError

    def _try_convert_data(self,
                          name,
                          data,
                          use_dtypes=True,
                          convert_dates=True):
        """ try to parse a ndarray like into a column by inferring dtype """

        # don't try to coerce, unless a force conversion
        if use_dtypes:
            if self.dtype is False:
                return data, False
            elif self.dtype is True:
                pass

            else:

                # dtype to force
                dtype = (self.dtype.get(name)
                         if isinstance(self.dtype, dict) else self.dtype)
                if dtype is not None:
                    try:
                        dtype = np.dtype(dtype)
                        return data.astype(dtype), True
                    except:
                        return data, False

        if convert_dates:
            new_data, result = self._try_convert_to_date(data)
            if result:
                return new_data, True

        result = False

        if data.dtype == 'object':

            # try float
            try:
                data = data.astype('float64')
                result = True
            except:
                pass

        if data.dtype.kind == 'f':

            if data.dtype != 'float64':

                # coerce floats to 64
                try:
                    data = data.astype('float64')
                    result = True
                except:
                    pass

        # do't coerce 0-len data
        if len(data) and (data.dtype == 'float' or data.dtype == 'object'):

            # coerce ints if we can
            try:
                new_data = data.astype('int64')
                if (new_data == data).all():
                    data = new_data
                    result = True
            except:
                pass

        # coerce ints to 64
        if data.dtype == 'int':

            # coerce floats to 64
            try:
                data = data.astype('int64')
                result = True
            except:
                pass

        return data, result

    def _try_convert_to_date(self, data):
        """ try to parse a ndarray like into a date column
            try to coerce object in epoch/iso formats and
            integer/float in epcoh formats, return a boolean if parsing
            was successful """

        # no conversion on empty
        if not len(data):
            return data, False

        new_data = data
        if new_data.dtype == 'object':
            try:
                new_data = data.astype('int64')
            except:
                pass

        # ignore numbers that are out of range
        if issubclass(new_data.dtype.type, np.number):
            in_range = (isnull(new_data.values) | (new_data > self.min_stamp) |
                        (new_data.values == iNaT))
            if not in_range.all():
                return data, False

        date_units = (
            self.date_unit, ) if self.date_unit else self._STAMP_UNITS
        for date_unit in date_units:
            try:
                new_data = to_datetime(new_data,
                                       errors='raise',
                                       unit=date_unit)
            except OverflowError:
                continue
            except:
                break
            return new_data, True
        return data, False

    def _try_convert_dates(self):
        raise NotImplementedError
Ejemplo n.º 27
0
Archivo: json.py Proyecto: 0x29a/pandas
class Parser(object):

    _STAMP_UNITS = ('s', 'ms', 'us', 'ns')
    _MIN_STAMPS = {
        's': long(31536000),
        'ms': long(31536000000),
        'us': long(31536000000000),
        'ns': long(31536000000000000)}

    def __init__(self, json, orient, dtype=True, convert_axes=True,
                 convert_dates=True, keep_default_dates=False, numpy=False,
                 precise_float=False, date_unit=None):
        self.json = json

        if orient is None:
            orient = self._default_orient

        self.orient = orient
        self.dtype = dtype

        if orient == "split":
            numpy = False

        if date_unit is not None:
            date_unit = date_unit.lower()
            if date_unit not in self._STAMP_UNITS:
                raise ValueError('date_unit must be one of {units}'
                                 .format(units=self._STAMP_UNITS))
            self.min_stamp = self._MIN_STAMPS[date_unit]
        else:
            self.min_stamp = self._MIN_STAMPS['s']

        self.numpy = numpy
        self.precise_float = precise_float
        self.convert_axes = convert_axes
        self.convert_dates = convert_dates
        self.date_unit = date_unit
        self.keep_default_dates = keep_default_dates
        self.obj = None

    def check_keys_split(self, decoded):
        """
        Checks that dict has only the appropriate keys for orient='split'.
        """
        bad_keys = set(decoded.keys()).difference(set(self._split_keys))
        if bad_keys:
            bad_keys = ", ".join(bad_keys)
            raise ValueError(u("JSON data had unexpected key(s): {bad_keys}")
                             .format(bad_keys=pprint_thing(bad_keys)))

    def parse(self):

        # try numpy
        numpy = self.numpy
        if numpy:
            self._parse_numpy()

        else:
            self._parse_no_numpy()

        if self.obj is None:
            return None
        if self.convert_axes:
            self._convert_axes()
        self._try_convert_types()
        return self.obj

    def _convert_axes(self):
        """
        Try to convert axes.
        """
        for axis in self.obj._AXIS_NUMBERS.keys():
            new_axis, result = self._try_convert_data(
                axis, self.obj._get_axis(axis), use_dtypes=False,
                convert_dates=True)
            if result:
                setattr(self.obj, axis, new_axis)

    def _try_convert_types(self):
        raise AbstractMethodError(self)

    def _try_convert_data(self, name, data, use_dtypes=True,
                          convert_dates=True):
        """
        Try to parse a ndarray like into a column by inferring dtype.
        """

        # don't try to coerce, unless a force conversion
        if use_dtypes:
            if self.dtype is False:
                return data, False
            elif self.dtype is True:
                pass
            else:
                # dtype to force
                dtype = (self.dtype.get(name)
                         if isinstance(self.dtype, dict) else self.dtype)
                if dtype is not None:
                    try:
                        dtype = np.dtype(dtype)
                        return data.astype(dtype), True
                    except (TypeError, ValueError):
                        return data, False

        if convert_dates:
            new_data, result = self._try_convert_to_date(data)
            if result:
                return new_data, True

        result = False

        if data.dtype == 'object':

            # try float
            try:
                data = data.astype('float64')
                result = True
            except (TypeError, ValueError):
                pass

        if data.dtype.kind == 'f':

            if data.dtype != 'float64':

                # coerce floats to 64
                try:
                    data = data.astype('float64')
                    result = True
                except (TypeError, ValueError):
                    pass

        # don't coerce 0-len data
        if len(data) and (data.dtype == 'float' or data.dtype == 'object'):

            # coerce ints if we can
            try:
                new_data = data.astype('int64')
                if (new_data == data).all():
                    data = new_data
                    result = True
            except (TypeError, ValueError):
                pass

        # coerce ints to 64
        if data.dtype == 'int':

            # coerce floats to 64
            try:
                data = data.astype('int64')
                result = True
            except (TypeError, ValueError):
                pass

        return data, result

    def _try_convert_to_date(self, data):
        """
        Try to parse a ndarray like into a date column.

        Try to coerce object in epoch/iso formats and integer/float in epoch
        formats. Return a boolean if parsing was successful.
        """

        # no conversion on empty
        if not len(data):
            return data, False

        new_data = data
        if new_data.dtype == 'object':
            try:
                new_data = data.astype('int64')
            except (TypeError, ValueError, OverflowError):
                pass

        # ignore numbers that are out of range
        if issubclass(new_data.dtype.type, np.number):
            in_range = (isna(new_data.values) | (new_data > self.min_stamp) |
                        (new_data.values == iNaT))
            if not in_range.all():
                return data, False

        date_units = (self.date_unit,) if self.date_unit else self._STAMP_UNITS
        for date_unit in date_units:
            try:
                new_data = to_datetime(new_data, errors='raise',
                                       unit=date_unit)
            except ValueError:
                continue
            except Exception:
                break
            return new_data, True
        return data, False

    def _try_convert_dates(self):
        raise AbstractMethodError(self)
Ejemplo n.º 28
0
def _long_prod(vals):
    result = long(1)
    for x in vals:
        result *= x
    return result
Ejemplo n.º 29
0
Archivo: Yin.py Proyecto: lifetea/s
    def query_yin_list(self,endDate):
        file_name = '../files/yin/' + endDate + '.xlsx'
        if not os.path.exists(file_name):
            self.stocks_zt = pd.DataFrame(columns=('代码', '名称', '日期', '涨幅' '隐单', '主单','力度','流通股'))
            base_list = base.get_stock_base_list(endDate)
            query = {
                'yin': {
                    '$gte': 100,
                    # '$lte': time_end
                },
                # 'zhu': {
                #     # '$gte': 0,
                #     '$lte': -100
                # },
                'zf': {
                    '$gte': -6.0,
                    '$lte': 6.0
                },
                'date': endDate,
                # 'pCode':i,
                # 'code': code
            }
            res = self.collection.find(query)
            res_list = list(res)
            self.total = len(res_list)
            for j in res_list:
                code = j['code']
                date = j['date']
                # zfs = get_stock_later_info(code, date)
                yin = float(j['yin'])
                zhu = float(j['zhu'])
                zj  = float(j['zj'])
                item = {
                    '代码': code,
                    '名称': j['name'],
                    '日期': endDate,
                    '隐单': j['yin'],
                    '主单': j['zhu'],
                    '涨幅': j['zf'],
                    '流通股': long(base_list['流通股数'][j['code']]),
                    '力度': round((long(j['yin']) * 10000) / (long(base_list['流通股数'][j['code']])), 2),
                    '行业': base_list['行业'][j['code']],
                    '概念': base_list['概念'][j['code']]
                }

                # for i, zf in enumerate(zfs):
                #     # print(i)
                #     item['第' + str(i + 2) + '天涨幅'] = zf
                # if zhu != 0 and yin / abs(zhu) < 0.3:
                #     continue
                # if zhu > 0 and zhu > yin:
                #     continue
                if round((long(j['yin']) * 10000) / (long(base_list['流通股数'][j['code']])), 2) <= -0.01:
                    continue

                self.stocks = self.stocks.append(item, ignore_index=True)
                self.index += 1
                print(str(self.index)+"/"+str(self.total))
                pass
            # print(self.stocks.to_json(orient='split'))
            file_name = '../files/yin/' + endDate + '.xlsx'
            writer = pd.ExcelWriter(file_name)
            self.stocks.to_excel(writer, 'Sheet1')
            writer.save()
        else:
            print("存在")
            self.stocks = pd.read_excel(file_name,dtype={'代码':str})
            # print(self.stocks)
        return self.stocks
        pass
Ejemplo n.º 30
0
Archivo: Yin.py Proyecto: lifetea/s
    def query_yin_detail(self,code,startDate,endDate):
        stocks = pd.DataFrame(columns=('股票代码','股票名称','日期', '涨幅', '隐单', '主单','资金'))
        base_list = base.get_stock_base(code=code,startDate=startDate,endDate=endDate)
        # print(base_list)
        base_list = base_list.set_index(['日期'])
        query = {
            'date': {
                '$gte': startDate,
                '$lte': endDate
            },
            'code': code
        }

        res = self.collection.find(query)
        df = pd.DataFrame(list(res))

        del df['_id']
        # print(long(base_list['流通股数'][code]))
        df = df.rename(columns={
            'code': '股票代码',
            'name': '股票名称',
            'date': '日期',
            'zf': '涨幅',
            'yin': '隐单',
            'zhu': '主单',
            'zj': '资金',
        })

        df = df.sort_values(by=['日期'], ascending=False)
        df = df.set_index(['日期'])

        for s in df.index:
            item = {
                '股票代码': df['股票代码'][s],
                '股票名称': df['股票名称'][s],
                '日期': s,
                '涨幅': df['涨幅'][s],
                '隐单': df['隐单'][s],
                '主单': df['主单'][s],
                '资金': df['资金'][s],
                '力度': round((long(df['隐单'][s]) * 10000) / (long(base_list['流通股数'][s])), 2),
                '流通股数': round(base_list['流通股数'][s],0),
                '成交量': float(base_list['成交量'][s]),
                '成交额': round(float(base_list['成交额'][s])),
                
                # '主单': j['zhu'],
                # '原因': zts.reason[code],

            }
            stocks = stocks.append(item, ignore_index=True)

        # base_list = base_list.rename(columns={
        #     'date': '日期'
        # })

        #
        # # print(base_list)
        # # for i in df['日期']:
        # #     print(i)
        # df = pd.merge(df, base_list,on='日期',how='right')
        # print(df)

        # print(stocks)
        return stocks
        # print(list(data))
        # res_list = list(res)
        # self.total = len(res_list)
        # for j in res_list:
        #     code = j['code']
        #     date = j['date']
        #     # zfs = get_stock_later_info(code, date)
        #     yin = float(j['yin'])
        #     zhu = float(j['zhu'])
        #     zj  = float(j['zj'])
        #     item = {
        #         '代码': code,
        #         '名称': j['name'],
        #         '日期': endDate,
        #         '隐单': j['yin'],
        #         '主单': j['zhu'],
        #         '涨幅': j['zf'],
        #         '流通股': long(base_list['流通股数'][j['code']]),
        #         '力度': round((long(j['yin']) * 10000) / (long(base_list['流通股数'][j['code']])), 2),
        #         '行业': base_list.industry[j['code']],
        #         '概念': base_list.concept[j['code']]
        #     }
        #     # for i, zf in enumerate(zfs):
        #     #     # print(i)
        #     #     item['第' + str(i + 2) + '天涨幅'] = zf
        #     if zhu != 0 and yin / abs(zhu) < 0.3:
        #         continue
        #     if zhu > 0 and zhu > yin:
        #         continue
        #     if abs(zj) <= 100:
        #         continue
        #
        #     self.stocks = self.stocks.append(item, ignore_index=True)
        #     self.index += 1
        #     print(str(self.index)+"/"+str(self.total))
        pass
Ejemplo n.º 31
0
 def test_delta_preserve_nanos(self):
     val = Timestamp(long(1337299200000000123))
     result = val + timedelta(1)
     assert result.nanosecond == val.nanosecond
Ejemplo n.º 32
0
    if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex):
        if isinstance(index, (pd.Int64Index, pd.Float64Index)):
            raise TypeError("cannot infer freq from a non-convertible index "
                            "type {0}".format(type(index)))
        index = index.values

    if not isinstance(index, pd.DatetimeIndex):
        try:
            index = pd.DatetimeIndex(index)
        except AmbiguousTimeError:
            index = pd.DatetimeIndex(index.asi8)

    inferer = _FrequencyInferer(index, warn=warn)
    return inferer.get_freq()

_ONE_MICRO = long(1000)
_ONE_MILLI = _ONE_MICRO * 1000
_ONE_SECOND = _ONE_MILLI * 1000
_ONE_MINUTE = 60 * _ONE_SECOND
_ONE_HOUR = 60 * _ONE_MINUTE
_ONE_DAY = 24 * _ONE_HOUR


class _FrequencyInferer(object):
    """
    Not sure if I can avoid the state machine here
    """

    def __init__(self, index, warn=True):
        self.index = index
        self.values = np.asarray(index).view('i8')
Ejemplo n.º 33
0
def _long_prod(vals):
    result = long(1)
    for x in vals:
        result *= x
    return result
Ejemplo n.º 34
0
from pandas.compat import long
from pandas.core.arrays import PeriodArray, DatetimeArrayMixin as DatetimeArray


@pytest.fixture(params=[1, np.array(1, dtype=np.int64)])
def one(request):
    # zero-dim integer array behaves like an integer
    return request.param


zeros = [box_cls([0] * 5, dtype=dtype)
         for box_cls in [pd.Index, np.array]
         for dtype in [np.int64, np.uint64, np.float64]]
zeros.extend([np.array(0, dtype=dtype)
              for dtype in [np.int64, np.uint64, np.float64]])
zeros.extend([0, 0.0, long(0)])


@pytest.fixture(params=zeros)
def zero(request):
    # For testing division by (or of) zero for Index with length 5, this
    # gives several scalar-zeros and length-5 vector-zeros
    return request.param


# ------------------------------------------------------------------
# Vector Fixtures

@pytest.fixture(params=[pd.Float64Index(np.arange(5, dtype='float64')),
                        pd.Int64Index(np.arange(5, dtype='int64')),
                        pd.UInt64Index(np.arange(5, dtype='uint64')),
Ejemplo n.º 35
0
Archivo: Yin.py Proyecto: lifetea/s
    def query_yin_zt_list(self,startDate,endDate):
        file_name_zt = '../files/yin/' + startDate + '|' + endDate + '-zt.xlsx'
        if not os.path.exists(file_name_zt):
            self.stocks = pd.DataFrame(columns=('代码', '名称', '日期', '涨幅', '隐单', '主单','力度','流通股','概念','行业'))
            industry = base.get_industry_list()
            base_list = base.get_stock_base_list(endDate)
            query = {
                'yin': {
                    '$gte': 100,
                    # '$lte': time_end
                },
                # 'zhu': {
                #     # '$gte': 0,
                #     '$lte': -100
                # },
                'zf': {
                    '$gte': -6.0,
                    '$lte': 6.0
                },
                'date': endDate,
                # 'pCode':i,
                # 'code': code
            }
            res = self.collection.find(query)
            res_list = list(res)
            self.total = len(res_list)
            zt_list = base.get_zt_list(startDate=startDate, endDate=endDate).index
            for j in res_list:
                code = j['code']
                date = j['date']
                yin = float(j['yin'])
                zhu = float(j['zhu'])
                zj = float(j['zj'])
                try:
                    item = {
                        '代码': code,
                        '名称': j['name'],
                        '日期': endDate,
                        '隐单': j['yin'],
                        '主单': j['zhu'],
                        '涨幅': j['zf'],
                        '流通股': long(base_list['流通股数'][j['code']]),
                        '力度': round((long(j['yin']) * 10000) / (long(base_list['流通股数'][j['code']])), 2),
                        '行业': industry.industry[j['code']],
                        '概念': industry.concept[j['code']]
                    }
                except Exception as e:
                    print("出错"+code)
                    print(e)
                    continue

                # for i, zf in enumerate(zfs):
                #     # print(i)
                #     item['第' + str(i + 2) + '天涨幅'] = zf
                # if zhu != 0 and yin / abs(zhu) < 0.3:
                #     continue
                # if zhu > 0 and zhu > yin:
                #     continue
                # if abs(zj) <= 100:
                #     continue

                if code in zt_list:
                    self.stocks = self.stocks.append(item, ignore_index=True)

                if round((long(j['yin']) * 10000) / (long(base_list['流通股数'][j['code']])), 2) <= -0.01:
                    continue
                # else:
                #     global stocks
                #     stocks = stocks.append(item, ignore_index=True)
                # zfs = []
                self.index += 1
                print(str(self.index)+"/"+str(self.total))
                pass
            file_name_zt = '../files/yin/' + startDate + '|' + endDate + '-zt.xlsx'
            writer_zt = pd.ExcelWriter(file_name_zt)
            self.stocks.to_excel(writer_zt, 'Sheet1')
            writer_zt.save()
        else:
            print("存在")
            self.stocks = pd.read_excel(file_name_zt,dtype={'代码':str})
            # print(self.stocks)

        return self.stocks
        pass
Ejemplo n.º 36
0
class TestTimestamp(object):
    def test_tz(self):
        tstr = '2014-02-01 09:00'
        ts = Timestamp(tstr)
        local = ts.tz_localize('Asia/Tokyo')
        assert local.hour == 9
        assert local == Timestamp(tstr, tz='Asia/Tokyo')
        conv = local.tz_convert('US/Eastern')
        assert conv == Timestamp('2014-01-31 19:00', tz='US/Eastern')
        assert conv.hour == 19

        # preserves nanosecond
        ts = Timestamp(tstr) + offsets.Nano(5)
        local = ts.tz_localize('Asia/Tokyo')
        assert local.hour == 9
        assert local.nanosecond == 5
        conv = local.tz_convert('US/Eastern')
        assert conv.nanosecond == 5
        assert conv.hour == 19

    def test_utc_z_designator(self):
        assert get_timezone(Timestamp('2014-11-02 01:00Z').tzinfo) == 'UTC'

    def test_asm8(self):
        np.random.seed(7960929)
        ns = [Timestamp.min.value, Timestamp.max.value, 1000]

        for n in ns:
            assert (Timestamp(n).asm8.view('i8') == np.datetime64(
                n, 'ns').view('i8') == n)

        assert (Timestamp('nat').asm8.view('i8') == np.datetime64(
            'nat', 'ns').view('i8'))

    def test_class_ops_pytz(self):
        def compare(x, y):
            assert (int(Timestamp(x).value / 1e9) == int(
                Timestamp(y).value / 1e9))

        compare(Timestamp.now(), datetime.now())
        compare(Timestamp.now('UTC'), datetime.now(timezone('UTC')))
        compare(Timestamp.utcnow(), datetime.utcnow())
        compare(Timestamp.today(), datetime.today())
        current_time = calendar.timegm(datetime.now().utctimetuple())
        compare(Timestamp.utcfromtimestamp(current_time),
                datetime.utcfromtimestamp(current_time))
        compare(Timestamp.fromtimestamp(current_time),
                datetime.fromtimestamp(current_time))

        date_component = datetime.utcnow()
        time_component = (date_component + timedelta(minutes=10)).time()
        compare(Timestamp.combine(date_component, time_component),
                datetime.combine(date_component, time_component))

    def test_class_ops_dateutil(self):
        def compare(x, y):
            assert (int(np.round(Timestamp(x).value / 1e9)) == int(
                np.round(Timestamp(y).value / 1e9)))

        compare(Timestamp.now(), datetime.now())
        compare(Timestamp.now('UTC'), datetime.now(tzutc()))
        compare(Timestamp.utcnow(), datetime.utcnow())
        compare(Timestamp.today(), datetime.today())
        current_time = calendar.timegm(datetime.now().utctimetuple())
        compare(Timestamp.utcfromtimestamp(current_time),
                datetime.utcfromtimestamp(current_time))
        compare(Timestamp.fromtimestamp(current_time),
                datetime.fromtimestamp(current_time))

        date_component = datetime.utcnow()
        time_component = (date_component + timedelta(minutes=10)).time()
        compare(Timestamp.combine(date_component, time_component),
                datetime.combine(date_component, time_component))

    def test_basics_nanos(self):
        val = np.int64(946684800000000000).view('M8[ns]')
        stamp = Timestamp(val.view('i8') + 500)
        assert stamp.year == 2000
        assert stamp.month == 1
        assert stamp.microsecond == 0
        assert stamp.nanosecond == 500

        # GH 14415
        val = np.iinfo(np.int64).min + 80000000000000
        stamp = Timestamp(val)
        assert stamp.year == 1677
        assert stamp.month == 9
        assert stamp.day == 21
        assert stamp.microsecond == 145224
        assert stamp.nanosecond == 192

    @pytest.mark.parametrize(
        'value, check_kwargs',
        [[946688461000000000, {}],
         [946688461000000000 / long(1000),
          dict(unit='us')],
         [946688461000000000 / long(1000000),
          dict(unit='ms')],
         [946688461000000000 / long(1000000000),
          dict(unit='s')], [10957, dict(unit='D', h=0)],
         pytest.param((946688461000000000 + 500000) / long(1000000000),
                      dict(unit='s', us=499, ns=964),
                      marks=pytest.mark.skipif(not PY3,
                                               reason='using truediv, so these'
                                               ' are like floats')),
         pytest.param((946688461000000000 + 500000000) / long(1000000000),
                      dict(unit='s', us=500000),
                      marks=pytest.mark.skipif(not PY3,
                                               reason='using truediv, so these'
                                               ' are like floats')),
         pytest.param((946688461000000000 + 500000) / long(1000000),
                      dict(unit='ms', us=500),
                      marks=pytest.mark.skipif(not PY3,
                                               reason='using truediv, so these'
                                               ' are like floats')),
         pytest.param(
             (946688461000000000 + 500000) / long(1000000000),
             dict(unit='s'),
             marks=pytest.mark.skipif(PY3, reason='get chopped in py2')),
         pytest.param(
             (946688461000000000 + 500000000) / long(1000000000),
             dict(unit='s'),
             marks=pytest.mark.skipif(PY3, reason='get chopped in py2')),
         pytest.param(
             (946688461000000000 + 500000) / long(1000000),
             dict(unit='ms'),
             marks=pytest.mark.skipif(PY3, reason='get chopped in py2')),
         [(946688461000000000 + 500000) / long(1000),
          dict(unit='us', us=500)],
         [(946688461000000000 + 500000000) / long(1000000),
          dict(unit='ms', us=500000)],
         [946688461000000000 / 1000.0 + 5,
          dict(unit='us', us=5)],
         [946688461000000000 / 1000.0 + 5000,
          dict(unit='us', us=5000)],
         [946688461000000000 / 1000000.0 + 0.5,
          dict(unit='ms', us=500)],
         [946688461000000000 / 1000000.0 + 0.005,
          dict(unit='ms', us=5, ns=5)],
         [946688461000000000 / 1000000000.0 + 0.5,
          dict(unit='s', us=500000)], [10957 + 0.5,
                                       dict(unit='D', h=12)]])
    def test_unit(self, value, check_kwargs):
        def check(value, unit=None, h=1, s=1, us=0, ns=0):
            stamp = Timestamp(value, unit=unit)
            assert stamp.year == 2000
            assert stamp.month == 1
            assert stamp.day == 1
            assert stamp.hour == h
            if unit != 'D':
                assert stamp.minute == 1
                assert stamp.second == s
                assert stamp.microsecond == us
            else:
                assert stamp.minute == 0
                assert stamp.second == 0
                assert stamp.microsecond == 0
            assert stamp.nanosecond == ns

        check(value, **check_kwargs)

    def test_roundtrip(self):

        # test value to string and back conversions
        # further test accessors
        base = Timestamp('20140101 00:00:00')

        result = Timestamp(base.value + Timedelta('5ms').value)
        assert result == Timestamp(str(base) + ".005000")
        assert result.microsecond == 5000

        result = Timestamp(base.value + Timedelta('5us').value)
        assert result == Timestamp(str(base) + ".000005")
        assert result.microsecond == 5

        result = Timestamp(base.value + Timedelta('5ns').value)
        assert result == Timestamp(str(base) + ".000000005")
        assert result.nanosecond == 5
        assert result.microsecond == 0

        result = Timestamp(base.value + Timedelta('6ms 5us').value)
        assert result == Timestamp(str(base) + ".006005")
        assert result.microsecond == 5 + 6 * 1000

        result = Timestamp(base.value + Timedelta('200ms 5us').value)
        assert result == Timestamp(str(base) + ".200005")
        assert result.microsecond == 5 + 200 * 1000

    def test_hash_equivalent(self):
        d = {datetime(2011, 1, 1): 5}
        stamp = Timestamp(datetime(2011, 1, 1))
        assert d[stamp] == 5
Ejemplo n.º 37
0
    if isinstance(index, pd.PeriodIndex):
        raise TypeError("PeriodIndex given. Check the `freq` attribute "
                        "instead of using infer_freq.")
    if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex):
        if isinstance(index, (pd.Int64Index, pd.Float64Index)):
            raise TypeError(
                "cannot infer freq from a non-convertible index type {0}".
                format(type(index)))
        index = index.values

    index = pd.DatetimeIndex(index)
    inferer = _FrequencyInferer(index, warn=warn)
    return inferer.get_freq()


_ONE_MICRO = long(1000)
_ONE_MILLI = _ONE_MICRO * 1000
_ONE_SECOND = _ONE_MILLI * 1000
_ONE_MINUTE = 60 * _ONE_SECOND
_ONE_HOUR = 60 * _ONE_MINUTE
_ONE_DAY = 24 * _ONE_HOUR


def _tz_convert_with_transitions(values, to_tz, from_tz):
    """
    convert i8 values from the specificed timezone to the to_tz zone, taking
    into account DST transitions
    """

    # vectorization is slow, so tests if we can do this via the faster tz_convert
    f = lambda x: tslib.tz_convert_single(x, to_tz, from_tz)
Ejemplo n.º 38
0
                        Index([True, False]),
                        tm.makeCategoricalIndex(100),
                        Index([]),
                        MultiIndex.from_tuples(lzip(
                            ['foo', 'bar', 'baz'], [1, 2, 3])),
                        Index([0, 0, 1, 1, 2, 2])],
                ids=lambda x: type(x).__name__)
def indices(request):
    return request.param


@pytest.fixture(params=[1, np.array(1, dtype=np.int64)])
def one(request):
    # zero-dim integer array behaves like an integer
    return request.param


zeros = [box([0] * 5, dtype=dtype)
         for box in [pd.Index, np.array]
         for dtype in [np.int64, np.uint64, np.float64]]
zeros.extend([np.array(0, dtype=dtype)
              for dtype in [np.int64, np.uint64, np.float64]])
zeros.extend([0, 0.0, long(0)])


@pytest.fixture(params=zeros)
def zero(request):
    # For testing division by (or of) zero for Index with length 5, this
    # gives several scalar-zeros and length-5 vector-zeros
    return request.param
Ejemplo n.º 39
0
    def test_unit(self):
        def check(val, unit=None, h=1, s=1, us=0):
            stamp = Timestamp(val, unit=unit)
            assert stamp.year == 2000
            assert stamp.month == 1
            assert stamp.day == 1
            assert stamp.hour == h
            if unit != 'D':
                assert stamp.minute == 1
                assert stamp.second == s
                assert stamp.microsecond == us
            else:
                assert stamp.minute == 0
                assert stamp.second == 0
                assert stamp.microsecond == 0
            assert stamp.nanosecond == 0

        ts = Timestamp('20000101 01:01:01')
        val = ts.value
        days = (ts - Timestamp('1970-01-01')).days

        check(val)
        check(val / long(1000), unit='us')
        check(val / long(1000000), unit='ms')
        check(val / long(1000000000), unit='s')
        check(days, unit='D', h=0)

        # using truediv, so these are like floats
        if PY3:
            check((val + 500000) / long(1000000000), unit='s', us=500)
            check((val + 500000000) / long(1000000000), unit='s', us=500000)
            check((val + 500000) / long(1000000), unit='ms', us=500)

        # get chopped in py2
        else:
            check((val + 500000) / long(1000000000), unit='s')
            check((val + 500000000) / long(1000000000), unit='s')
            check((val + 500000) / long(1000000), unit='ms')

        # ok
        check((val + 500000) / long(1000), unit='us', us=500)
        check((val + 500000000) / long(1000000), unit='ms', us=500000)

        # floats
        check(val / 1000.0 + 5, unit='us', us=5)
        check(val / 1000.0 + 5000, unit='us', us=5000)
        check(val / 1000000.0 + 0.5, unit='ms', us=500)
        check(val / 1000000.0 + 0.005, unit='ms', us=5)
        check(val / 1000000000.0 + 0.5, unit='s', us=500000)
        check(days + 0.5, unit='D', h=12)
Ejemplo n.º 40
0
 def test_delta_preserve_nanos(self):
     val = Timestamp(long(1337299200000000123))
     result = val + timedelta(1)
     assert result.nanosecond == val.nanosecond
Ejemplo n.º 41
0
def is_int64_overflow_possible(shape):
    the_prod = long(1)
    for x in shape:
        the_prod *= long(x)

    return the_prod >= _INT64_MAX
Ejemplo n.º 42
0
    def test_unit(self):

        def check(val, unit=None, h=1, s=1, us=0):
            stamp = Timestamp(val, unit=unit)
            assert stamp.year == 2000
            assert stamp.month == 1
            assert stamp.day == 1
            assert stamp.hour == h
            if unit != 'D':
                assert stamp.minute == 1
                assert stamp.second == s
                assert stamp.microsecond == us
            else:
                assert stamp.minute == 0
                assert stamp.second == 0
                assert stamp.microsecond == 0
            assert stamp.nanosecond == 0

        ts = Timestamp('20000101 01:01:01')
        val = ts.value
        days = (ts - Timestamp('1970-01-01')).days

        check(val)
        check(val / long(1000), unit='us')
        check(val / long(1000000), unit='ms')
        check(val / long(1000000000), unit='s')
        check(days, unit='D', h=0)

        # using truediv, so these are like floats
        if PY3:
            check((val + 500000) / long(1000000000), unit='s', us=500)
            check((val + 500000000) / long(1000000000), unit='s', us=500000)
            check((val + 500000) / long(1000000), unit='ms', us=500)

        # get chopped in py2
        else:
            check((val + 500000) / long(1000000000), unit='s')
            check((val + 500000000) / long(1000000000), unit='s')
            check((val + 500000) / long(1000000), unit='ms')

        # ok
        check((val + 500000) / long(1000), unit='us', us=500)
        check((val + 500000000) / long(1000000), unit='ms', us=500000)

        # floats
        check(val / 1000.0 + 5, unit='us', us=5)
        check(val / 1000.0 + 5000, unit='us', us=5000)
        check(val / 1000000.0 + 0.5, unit='ms', us=500)
        check(val / 1000000.0 + 0.005, unit='ms', us=5)
        check(val / 1000000000.0 + 0.5, unit='s', us=500000)
        check(days + 0.5, unit='D', h=12)