def test_constructors(self, data, closed, name): left, right = data[:-1], data[1:] ivs = [Interval(l, r, closed=closed) for l, r in lzip(left, right)] expected = IntervalIndex._simple_new( left=left, right=right, closed=closed, name=name) # validate expected assert expected.closed == closed assert expected.name == name assert expected.dtype.subtype == data.dtype tm.assert_index_equal(expected.left, data[:-1]) tm.assert_index_equal(expected.right, data[1:]) # validated constructors result = IntervalIndex(ivs, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_intervals(ivs, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_breaks(data, closed=closed, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_arrays( left, right, closed=closed, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_tuples( lzip(left, right), closed=closed, name=name) tm.assert_index_equal(result, expected) result = Index(ivs, name=name) assert isinstance(result, IntervalIndex) tm.assert_index_equal(result, expected) # idempotent tm.assert_index_equal(Index(expected), expected) tm.assert_index_equal(IntervalIndex(expected), expected) result = IntervalIndex.from_intervals(expected) tm.assert_index_equal(result, expected) result = IntervalIndex.from_intervals( expected.values, name=expected.name) tm.assert_index_equal(result, expected) left, right = expected.left, expected.right result = IntervalIndex.from_arrays( left, right, closed=expected.closed, name=expected.name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_tuples( expected.to_tuples(), closed=expected.closed, name=expected.name) tm.assert_index_equal(result, expected) breaks = expected.left.tolist() + [expected.right[-1]] result = IntervalIndex.from_breaks( breaks, closed=expected.closed, name=expected.name) tm.assert_index_equal(result, expected)
def test_constructors_errors_string(self, data): # GH 19016 left, right = data[:-1], data[1:] tuples = lzip(left, right) ivs = [Interval(l, r) for l, r in tuples] or data msg = ('category, object, and string subtypes are not supported ' 'for IntervalIndex') with tm.assert_raises_regex(TypeError, msg): IntervalIndex(ivs) with tm.assert_raises_regex(TypeError, msg): Index(ivs) with tm.assert_raises_regex(TypeError, msg): IntervalIndex.from_intervals(ivs) with tm.assert_raises_regex(TypeError, msg): IntervalIndex.from_breaks(data) with tm.assert_raises_regex(TypeError, msg): IntervalIndex.from_arrays(left, right) with tm.assert_raises_regex(TypeError, msg): IntervalIndex.from_tuples(tuples)
def test_is_(): mi = MultiIndex.from_tuples(lzip(range(10), range(10))) assert mi.is_(mi) assert mi.is_(mi.view()) assert mi.is_(mi.view().view().view().view()) mi2 = mi.view() # names are metadata, they don't change id mi2.names = ["A", "B"] assert mi2.is_(mi) assert mi.is_(mi2) assert mi.is_(mi.set_names(["C", "D"])) mi2 = mi.view() mi2.set_names(["E", "F"], inplace=True) assert mi.is_(mi2) # levels are inherent properties, they change identity mi3 = mi2.set_levels([lrange(10), lrange(10)]) assert not mi3.is_(mi2) # shouldn't change assert mi2.is_(mi) mi4 = mi3.view() # GH 17464 - Remove duplicate MultiIndex levels mi4.set_levels([lrange(10), lrange(10)], inplace=True) assert not mi4.is_(mi3) mi5 = mi.view() mi5.set_levels(mi5.levels, inplace=True) assert not mi5.is_(mi)
def get_schema(frame, name, flavor, keys=None): "Return a CREATE TABLE statement to suit the contents of a DataFrame." lookup_type = lambda dtype: get_sqltype(dtype.type, flavor) # Replace spaces in DataFrame column names with _. # Also force lowercase, postgresql can be case sensitive safe_columns = [s.replace(' ', '_').strip().lower() for s in frame.dtypes.index] column_types = lzip(safe_columns, map(lookup_type, frame.dtypes)) if flavor == 'sqlite': columns = ',\n '.join('[%s] %s' % x for x in column_types) elif flavor == 'postgresql': columns = ',\n '.join('"%s" %s' % x for x in column_types) else: columns = ',\n '.join('`%s` %s' % x for x in column_types) keystr = '' if keys is not None: if isinstance(keys, compat.string_types): keys = (keys,) keystr = ', PRIMARY KEY (%s)' % ','.join(keys) template = """CREATE TABLE %(name)s ( %(columns)s %(keystr)s );""" create_statement = template % {'name': name, 'columns': columns, 'keystr': keystr} return create_statement
def test_drop(self): simple = DataFrame({"A": [1, 2, 3, 4], "B": [0, 1, 2, 3]}) assert_frame_equal(simple.drop("A", axis=1), simple[["B"]]) assert_frame_equal(simple.drop(["A", "B"], axis="columns"), simple[[]]) assert_frame_equal(simple.drop([0, 1, 3], axis=0), simple.ix[[2], :]) assert_frame_equal(simple.drop([0, 3], axis="index"), simple.ix[[1, 2], :]) self.assertRaises(ValueError, simple.drop, 5) self.assertRaises(ValueError, simple.drop, "C", 1) self.assertRaises(ValueError, simple.drop, [1, 5]) self.assertRaises(ValueError, simple.drop, ["A", "C"], 1) # errors = 'ignore' assert_frame_equal(simple.drop(5, errors="ignore"), simple) assert_frame_equal(simple.drop([0, 5], errors="ignore"), simple.ix[[1, 2, 3], :]) assert_frame_equal(simple.drop("C", axis=1, errors="ignore"), simple) assert_frame_equal(simple.drop(["A", "C"], axis=1, errors="ignore"), simple[["B"]]) # non-unique - wheee! nu_df = DataFrame(lzip(range(3), range(-3, 1), list("abc")), columns=["a", "a", "b"]) assert_frame_equal(nu_df.drop("a", axis=1), nu_df[["b"]]) assert_frame_equal(nu_df.drop("b", axis="columns"), nu_df["a"]) nu_df = nu_df.set_index(pd.Index(["X", "Y", "X"])) nu_df.columns = list("abc") assert_frame_equal(nu_df.drop("X", axis="rows"), nu_df.ix[["Y"], :]) assert_frame_equal(nu_df.drop(["X", "Y"], axis=0), nu_df.ix[[], :]) # inplace cache issue # GH 5628 df = pd.DataFrame(np.random.randn(10, 3), columns=list("abc")) expected = df[~(df.b > 0)] df.drop(labels=df[df.b > 0].index, inplace=True) assert_frame_equal(df, expected)
def test_boxplot_legacy(self): grouped = self.hist_df.groupby(by='gender') with warnings.catch_warnings(): warnings.simplefilter('ignore') axes = _check_plot_works(grouped.boxplot, return_type='axes') self._check_axes_shape(list(axes.values()), axes_num=2, layout=(1, 2)) axes = _check_plot_works(grouped.boxplot, subplots=False, return_type='axes') self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) tuples = lzip(string.ascii_letters[:10], range(10)) df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) grouped = df.groupby(level=1) axes = _check_plot_works(grouped.boxplot, return_type='axes') self._check_axes_shape(list(axes.values()), axes_num=10, layout=(4, 3)) axes = _check_plot_works(grouped.boxplot, subplots=False, return_type='axes') self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) grouped = df.unstack(level=1).groupby(level=0, axis=1) axes = _check_plot_works(grouped.boxplot, return_type='axes') self._check_axes_shape(list(axes.values()), axes_num=3, layout=(2, 2)) axes = _check_plot_works(grouped.boxplot, subplots=False, return_type='axes') self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
def groups(self): """ dict {group name -> group labels} """ if len(self.groupings) == 1: return self.groupings[0].groups else: to_groupby = lzip(*(ping.grouper for ping in self.groupings)) to_groupby = Index(to_groupby) return self.axis.groupby(to_groupby)
def tquery(self, *args): cur = self.execute(*args) result = self._fetchall_as_list(cur) # This makes into tuples if result and len(result[0]) == 1: # python 3 compat result = list(lzip(*result)[0]) elif result is None: # pragma: no cover result = [] return result
def tquery(sql, con=None, cur=None, retry=True): """ DEPRECATED. Returns list of tuples corresponding to each row in given sql query. If only one column selected, then plain list is returned. To obtain the same result in the future, you can use the following: >>> execute(sql, con, params).fetchall() Parameters ---------- sql: string SQL query to be executed con: DBAPI2 connection cur: depreciated, cursor is obtained from connection Returns ------- Results Iterable """ warnings.warn( "tquery is depreciated, and will be removed in future versions. " "You can use ``execute(...).fetchall()`` instead.", FutureWarning, ) cur = execute(sql, con, cur=cur) result = _safe_fetch(cur) if con is not None: try: cur.close() con.commit() except Exception as e: excName = e.__class__.__name__ if excName == "OperationalError": # pragma: no cover print("Failed to commit, may need to restart interpreter") else: raise traceback.print_exc() if retry: return tquery(sql, con=con, retry=False) if result and len(result[0]) == 1: # python 3 compat result = list(lzip(*result)[0]) elif result is None: # pragma: no cover result = [] return result
def test_boxplot_legacy3(self): tuples = lzip(string.ascii_letters[:10], range(10)) df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) grouped = df.unstack(level=1).groupby(level=0, axis=1) with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(grouped.boxplot, return_type='axes') self._check_axes_shape(list(axes.values), axes_num=3, layout=(2, 2)) axes = _check_plot_works(grouped.boxplot, subplots=False, return_type='axes') self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
def get_kwargs_from_breaks(self, breaks, closed='right'): """ converts intervals in breaks format to a dictionary of kwargs to specific to the format expected by IntervalIndex.from_tuples """ if len(breaks) == 0: return {'data': breaks} tuples = lzip(breaks[:-1], breaks[1:]) if isinstance(breaks, (list, tuple)): return {'data': tuples} return {'data': com._asarray_tuplesafe(tuples)}
def __init__(self, encoding): if(encoding is None): self._encoding = 'cp1252' else: self._encoding = encoding #type code. #-------------------- #str1 1 = 0x01 #str2 2 = 0x02 #... #str244 244 = 0xf4 #byte 251 = 0xfb (sic) #int 252 = 0xfc #long 253 = 0xfd #float 254 = 0xfe #double 255 = 0xff #-------------------- #NOTE: the byte type seems to be reserved for categorical variables # with a label, but the underlying variable is -127 to 100 # we're going to drop the label and cast to int self.DTYPE_MAP = \ dict( lzip(range(1, 245), ['a' + str(i) for i in range(1, 245)]) + [ (251, np.int16), (252, np.int32), (253, np.int64), (254, np.float32), (255, np.float64) ] ) self.TYPE_MAP = lrange(251) + list('bhlfd') #NOTE: technically, some of these are wrong. there are more numbers # that can be represented. it's the 27 ABOVE and BELOW the max listed # numeric data type in [U] 12.2.2 of the 11.2 manual self.MISSING_VALUES = \ { 'b': (-127, 100), 'h': (-32767, 32740), 'l': (-2147483647, 2147483620), 'f': (-1.701e+38, +1.701e+38), 'd': (-1.798e+308, +8.988e+307) } self.OLD_TYPE_MAPPING = \ { 'i': 252, 'f': 254, 'b': 251 }
def test_indexer_caching(self): # GH5727 # make sure that indexers are in the _internal_names_set n = 1000001 arrays = [lrange(n), lrange(n)] index = MultiIndex.from_tuples(lzip(*arrays)) s = Series(np.zeros(n), index=index) str(s) # setitem expected = Series(np.ones(n), index=index) s = Series(np.zeros(n), index=index) s[s == 0] = 1 tm.assert_series_equal(s, expected)
def test_series_getitem_not_sorted(self): arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] tuples = lzip(*arrays) index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) arrays = [np.array(x) for x in zip(*index.values)] result = s['qux'] result2 = s.loc['qux'] expected = s[arrays[0] == 'qux'] expected.index = expected.index.droplevel(0) tm.assert_series_equal(result, expected) tm.assert_series_equal(result2, expected)
def test_boxplot(self): df = DataFrame(np.random.rand(10, 2), columns=["Col1", "Col2"]) df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"]) grouped = df.groupby(by="X") _check_plot_works(grouped.boxplot) _check_plot_works(grouped.boxplot, subplots=False) tuples = lzip(string.ascii_letters[:10], range(10)) df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) grouped = df.groupby(level=1) _check_plot_works(grouped.boxplot) _check_plot_works(grouped.boxplot, subplots=False) grouped = df.unstack(level=1).groupby(level=0, axis=1) _check_plot_works(grouped.boxplot) _check_plot_works(grouped.boxplot, subplots=False)
def test_constructors_errors_tz(self, tz_left, tz_right): # GH 18537 left = date_range('2017-01-01', periods=4, tz=tz_left) right = date_range('2017-01-02', periods=4, tz=tz_right) # don't need to check IntervalIndex(...) or from_intervals, since # mixed tz are disallowed at the Interval level with pytest.raises(ValueError): IntervalIndex.from_arrays(left, right) with pytest.raises(ValueError): IntervalIndex.from_tuples(lzip(left, right)) with pytest.raises(ValueError): breaks = left.tolist() + [right[-1]] IntervalIndex.from_breaks(breaks)
def __init__(self, encoding): self._encoding = encoding # type code. # -------------------- # str1 1 = 0x01 # str2 2 = 0x02 # ... # str244 244 = 0xf4 # byte 251 = 0xfb (sic) # int 252 = 0xfc # long 253 = 0xfd # float 254 = 0xfe # double 255 = 0xff # -------------------- # NOTE: the byte type seems to be reserved for categorical variables # with a label, but the underlying variable is -127 to 100 # we're going to drop the label and cast to int self.DTYPE_MAP = dict( lzip(range(1, 245), ["a" + str(i) for i in range(1, 245)]) + [(251, np.int16), (252, np.int32), (253, np.int64), (254, np.float32), (255, np.float64)] ) self.DTYPE_MAP_XML = dict( [ (32768, np.string_), (65526, np.float64), (65527, np.float32), (65528, np.int64), (65529, np.int32), (65530, np.int16), ] ) self.TYPE_MAP = lrange(251) + list("bhlfd") self.TYPE_MAP_XML = dict([(65526, "d"), (65527, "f"), (65528, "l"), (65529, "h"), (65530, "b")]) # NOTE: technically, some of these are wrong. there are more numbers # that can be represented. it's the 27 ABOVE and BELOW the max listed # numeric data type in [U] 12.2.2 of the 11.2 manual self.MISSING_VALUES = { "b": (-127, 100), "h": (-32767, 32740), "l": (-2147483647, 2147483620), "f": (-1.701e38, +1.701e38), "d": (-1.798e308, +8.988e307), } self.OLD_TYPE_MAPPING = {"i": 252, "f": 254, "b": 251}
def test_drop(self): simple = DataFrame({"A": [1, 2, 3, 4], "B": [0, 1, 2, 3]}) assert_frame_equal(simple.drop("A", axis=1), simple[['B']]) assert_frame_equal(simple.drop(["A", "B"], axis='columns'), simple[[]]) assert_frame_equal(simple.drop([0, 1, 3], axis=0), simple.loc[[2], :]) assert_frame_equal(simple.drop( [0, 3], axis='index'), simple.loc[[1, 2], :]) with pytest.raises(KeyError, match=r"\[5\] not found in axis"): simple.drop(5) with pytest.raises(KeyError, match=r"\['C'\] not found in axis"): simple.drop('C', 1) with pytest.raises(KeyError, match=r"\[5\] not found in axis"): simple.drop([1, 5]) with pytest.raises(KeyError, match=r"\['C'\] not found in axis"): simple.drop(['A', 'C'], 1) # errors = 'ignore' assert_frame_equal(simple.drop(5, errors='ignore'), simple) assert_frame_equal(simple.drop([0, 5], errors='ignore'), simple.loc[[1, 2, 3], :]) assert_frame_equal(simple.drop('C', axis=1, errors='ignore'), simple) assert_frame_equal(simple.drop(['A', 'C'], axis=1, errors='ignore'), simple[['B']]) # non-unique - wheee! nu_df = DataFrame(lzip(range(3), range(-3, 1), list('abc')), columns=['a', 'a', 'b']) assert_frame_equal(nu_df.drop('a', axis=1), nu_df[['b']]) assert_frame_equal(nu_df.drop('b', axis='columns'), nu_df['a']) assert_frame_equal(nu_df.drop([]), nu_df) # GH 16398 nu_df = nu_df.set_index(pd.Index(['X', 'Y', 'X'])) nu_df.columns = list('abc') assert_frame_equal(nu_df.drop('X', axis='rows'), nu_df.loc[["Y"], :]) assert_frame_equal(nu_df.drop(['X', 'Y'], axis=0), nu_df.loc[[], :]) # inplace cache issue # GH 5628 df = pd.DataFrame(np.random.randn(10, 3), columns=list('abc')) expected = df[~(df.b > 0)] df.drop(labels=df[df.b > 0].index, inplace=True) assert_frame_equal(df, expected)
def _get_schema_legacy(frame, name, flavor, keys=None): """Old function from 0.13.1. To keep backwards compatibility. When mysql legacy support is dropped, it should be possible to remove this code """ def get_sqltype(dtype, flavor): pytype = dtype.type pytype_name = "text" if issubclass(pytype, np.floating): pytype_name = "float" elif issubclass(pytype, np.integer): pytype_name = "int" elif issubclass(pytype, np.datetime64) or pytype is datetime: # Caution: np.datetime64 is also a subclass of np.number. pytype_name = "datetime" elif pytype is datetime.date: pytype_name = "date" elif issubclass(pytype, np.bool_): pytype_name = "bool" return _SQL_TYPES[pytype_name][flavor] lookup_type = lambda dtype: get_sqltype(dtype, flavor) column_types = lzip(frame.dtypes.index, map(lookup_type, frame.dtypes)) if flavor == 'sqlite': columns = ',\n '.join('[%s] %s' % x for x in column_types) else: columns = ',\n '.join('`%s` %s' % x for x in column_types) keystr = '' if keys is not None: if isinstance(keys, string_types): keys = (keys,) keystr = ', PRIMARY KEY (%s)' % ','.join(keys) template = """CREATE TABLE %(name)s ( %(columns)s %(keystr)s );""" create_statement = template % {'name': name, 'columns': columns, 'keystr': keystr} return create_statement
def tquery(sql, con=None, cur=None, retry=True): """ Returns list of tuples corresponding to each row in given sql query. If only one column selected, then plain list is returned. Parameters ---------- sql: string SQL query to be executed con: SQLConnection or DB API 2.0-compliant connection cur: DB API 2.0 cursor Provide a specific connection or a specific cursor if you are executing a lot of sequential statements and want to commit outside. """ cur = execute(sql, con, cur=cur) result = _safe_fetch(cur) if con is not None: try: cur.close() con.commit() except Exception as e: excName = e.__class__.__name__ if excName == 'OperationalError': # pragma: no cover print('Failed to commit, may need to restart interpreter') else: raise traceback.print_exc() if retry: return tquery(sql, con=con, retry=False) if result and len(result[0]) == 1: # python 3 compat result = list(lzip(*result)[0]) elif result is None: # pragma: no cover result = [] return result
def _parse_data(schema, rows): # see: # http://pandas.pydata.org/pandas-docs/dev/missing_data.html # #missing-data-casting-rules-and-indexing dtype_map = {'FLOAT': np.dtype(float), 'TIMESTAMP': 'M8[ns]'} fields = schema['fields'] col_types = [field['type'] for field in fields] col_names = [str(field['name']) for field in fields] col_dtypes = [dtype_map.get(field['type'], object) for field in fields] page_array = np.zeros((len(rows),), dtype=lzip(col_names, col_dtypes)) for row_num, raw_row in enumerate(rows): entries = raw_row.get('f', []) for col_num, field_type in enumerate(col_types): field_value = _parse_entry(entries[col_num].get('v', ''), field_type) page_array[row_num][col_num] = field_value return DataFrame(page_array, columns=col_names)
def test_plot(self): df = tm.makeTimeDataFrame() _check_plot_works(df.plot, grid=False) _check_plot_works(df.plot, subplots=True) _check_plot_works(df.plot, subplots=True, use_index=False) df = DataFrame({'x': [1, 2], 'y': [3, 4]}) self._check_plot_fails(df.plot, kind='line', blarg=True) df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) _check_plot_works(df.plot, use_index=True) _check_plot_works(df.plot, sort_columns=False) _check_plot_works(df.plot, yticks=[1, 5, 10]) _check_plot_works(df.plot, xticks=[1, 5, 10]) _check_plot_works(df.plot, ylim=(-100, 100), xlim=(-100, 100)) _check_plot_works(df.plot, subplots=True, title='blah') _check_plot_works(df.plot, title='blah') tuples = lzip(string.ascii_letters[:10], range(10)) df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) _check_plot_works(df.plot, use_index=True) # unicode index = MultiIndex.from_tuples([(u('\u03b1'), 0), (u('\u03b1'), 1), (u('\u03b2'), 2), (u('\u03b2'), 3), (u('\u03b3'), 4), (u('\u03b3'), 5), (u('\u03b4'), 6), (u('\u03b4'), 7)], names=['i0', 'i1']) columns = MultiIndex.from_tuples([('bar', u('\u0394')), ('bar', u('\u0395'))], names=['c0', 'c1']) df = DataFrame(np.random.randint(0, 10, (8, 2)), columns=columns, index=index) _check_plot_works(df.plot, title=u('\u03A3'))
def test_plot(self): df = tm.makeTimeDataFrame() _check_plot_works(df.plot, grid=False) _check_plot_works(df.plot, subplots=True) _check_plot_works(df.plot, subplots=True, use_index=False) df = DataFrame({"x": [1, 2], "y": [3, 4]}) self._check_plot_fails(df.plot, kind="line", blarg=True) df = DataFrame(np.random.rand(10, 3), index=list(string.ascii_letters[:10])) _check_plot_works(df.plot, use_index=True) _check_plot_works(df.plot, sort_columns=False) _check_plot_works(df.plot, yticks=[1, 5, 10]) _check_plot_works(df.plot, xticks=[1, 5, 10]) _check_plot_works(df.plot, ylim=(-100, 100), xlim=(-100, 100)) _check_plot_works(df.plot, subplots=True, title="blah") _check_plot_works(df.plot, title="blah") tuples = lzip(string.ascii_letters[:10], range(10)) df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) _check_plot_works(df.plot, use_index=True) # unicode index = MultiIndex.from_tuples( [ (u("\u03b1"), 0), (u("\u03b1"), 1), (u("\u03b2"), 2), (u("\u03b2"), 3), (u("\u03b3"), 4), (u("\u03b3"), 5), (u("\u03b4"), 6), (u("\u03b4"), 7), ], names=["i0", "i1"], ) columns = MultiIndex.from_tuples([("bar", u("\u0394")), ("bar", u("\u0395"))], names=["c0", "c1"]) df = DataFrame(np.random.randint(0, 10, (8, 2)), columns=columns, index=index) _check_plot_works(df.plot, title=u("\u03A3"))
def __init__(self, encoding): self._encoding = encoding #type code. #-------------------- #str1 1 = 0x01 #str2 2 = 0x02 #... #str244 244 = 0xf4 #byte 251 = 0xfb (sic) #int 252 = 0xfc #long 253 = 0xfd #float 254 = 0xfe #double 255 = 0xff #-------------------- #NOTE: the byte type seems to be reserved for categorical variables # with a label, but the underlying variable is -127 to 100 # we're going to drop the label and cast to int self.DTYPE_MAP = \ dict( lzip(range(1, 245), ['a' + str(i) for i in range(1, 245)]) + [ (251, np.int16), (252, np.int32), (253, np.int64), (254, np.float32), (255, np.float64) ] ) self.DTYPE_MAP_XML = \ dict( [ (32768, np.string_), (65526, np.float64), (65527, np.float32), (65528, np.int64), (65529, np.int32), (65530, np.int16) ] ) self.TYPE_MAP = lrange(251) + list('bhlfd') self.TYPE_MAP_XML = \ dict( [ (65526, 'd'), (65527, 'f'), (65528, 'l'), (65529, 'h'), (65530, 'b') ] ) #NOTE: technically, some of these are wrong. there are more numbers # that can be represented. it's the 27 ABOVE and BELOW the max listed # numeric data type in [U] 12.2.2 of the 11.2 manual self.MISSING_VALUES = \ { 'b': (-127, 100), 'h': (-32767, 32740), 'l': (-2147483647, 2147483620), 'f': (-1.701e+38, +1.701e+38), 'd': (-1.798e+308, +8.988e+307) } self.OLD_TYPE_MAPPING = \ { 'i': 252, 'f': 254, 'b': 251 }
def __init__(self, encoding): self._encoding = encoding #type code. #-------------------- #str1 1 = 0x01 #str2 2 = 0x02 #... #str244 244 = 0xf4 #byte 251 = 0xfb (sic) #int 252 = 0xfc #long 253 = 0xfd #float 254 = 0xfe #double 255 = 0xff #-------------------- #NOTE: the byte type seems to be reserved for categorical variables # with a label, but the underlying variable is -127 to 100 # we're going to drop the label and cast to int self.DTYPE_MAP = \ dict( lzip(range(1, 245), ['a' + str(i) for i in range(1, 245)]) + [ (251, np.int8), (252, np.int16), (253, np.int32), (254, np.float32), (255, np.float64) ] ) self.DTYPE_MAP_XML = \ dict( [ (32768, np.string_), (65526, np.float64), (65527, np.float32), (65528, np.int32), (65529, np.int16), (65530, np.int8) ] ) self.TYPE_MAP = lrange(251) + list('bhlfd') self.TYPE_MAP_XML = \ dict( [ (65526, 'd'), (65527, 'f'), (65528, 'l'), (65529, 'h'), (65530, 'b') ] ) #NOTE: technically, some of these are wrong. there are more numbers # that can be represented. it's the 27 ABOVE and BELOW the max listed # numeric data type in [U] 12.2.2 of the 11.2 manual float32_min = b'\xff\xff\xff\xfe' float32_max = b'\xff\xff\xff\x7e' float64_min = b'\xff\xff\xff\xff\xff\xff\xef\xff' float64_max = b'\xff\xff\xff\xff\xff\xff\xdf\x7f' self.VALID_RANGE = \ { 'b': (-127, 100), 'h': (-32767, 32740), 'l': (-2147483647, 2147483620), 'f': (np.float32(struct.unpack('<f', float32_min)[0]), np.float32(struct.unpack('<f', float32_max)[0])), 'd': (np.float64(struct.unpack('<d', float64_min)[0]), np.float64(struct.unpack('<d', float64_max)[0])) } self.OLD_TYPE_MAPPING = \ { 'i': 252, 'f': 254, 'b': 251 } # These missing values are the generic '.' in Stata, and are used # to replace nans self.MISSING_VALUES = \ { 'b': 101, 'h': 32741, 'l': 2147483621, 'f': np.float32(struct.unpack('<f', b'\x00\x00\x00\x7f')[0]), 'd': np.float64(struct.unpack('<d', b'\x00\x00\x00\x00\x00\x00\xe0\x7f')[0]) }
def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, idx_type=None): """Create an index/multindex with given dimensions, levels, names, etc' nentries - number of entries in index nlevels - number of levels (> 1 produces multindex) prefix - a string prefix for labels names - (Optional), bool or list of strings. if True will use default names, if false will use no names, if a list is given, the name of each level in the index will be taken from the list. ndupe_l - (Optional), list of ints, the number of rows for which the label will repeated at the corresponding level, you can specify just the first few, the rest will use the default ndupe_l of 1. len(ndupe_l) <= nlevels. idx_type - "i"/"f"/"s"/"u"/"dt/"p". If idx_type is not None, `idx_nlevels` must be 1. "i"/"f" creates an integer/float index, "s"/"u" creates a string/unicode index "dt" create a datetime index. if unspecified, string labels will be generated. """ if ndupe_l is None: ndupe_l = [1] * nlevels assert (_is_sequence(ndupe_l) and len(ndupe_l) <= nlevels) assert (names is None or names is False or names is True or len(names) is nlevels) assert idx_type is None or \ (idx_type in ('i', 'f', 's', 'u', 'dt', 'p') and nlevels == 1) if names is True: # build default names names = [prefix + str(i) for i in range(nlevels)] if names is False: # pass None to index constructor for no name names = None # make singelton case uniform if isinstance(names, compat.string_types) and nlevels == 1: names = [names] # specific 1D index type requested? idx_func = dict(i=makeIntIndex, f=makeFloatIndex, s=makeStringIndex, u=makeUnicodeIndex, dt=makeDateIndex, p=makePeriodIndex).get(idx_type) if idx_func: idx = idx_func(nentries) # but we need to fill in the name if names: idx.name = names[0] return idx elif idx_type is not None: raise ValueError('"%s" is not a legal value for `idx_type`, use ' '"i"/"f"/"s"/"u"/"dt/"p".' % idx_type) if len(ndupe_l) < nlevels: ndupe_l.extend([1] * (nlevels - len(ndupe_l))) assert len(ndupe_l) == nlevels assert all([x > 0 for x in ndupe_l]) tuples = [] for i in range(nlevels): def keyfunc(x): import re numeric_tuple = re.sub("[^\d_]_?", "", x).split("_") return lmap(int, numeric_tuple) # build a list of lists to create the index from div_factor = nentries // ndupe_l[i] + 1 cnt = Counter() for j in range(div_factor): label = prefix + '_l%d_g' % i + str(j) cnt[label] = ndupe_l[i] # cute Counter trick result = list(sorted(cnt.elements(), key=keyfunc))[:nentries] tuples.append(result) tuples = lzip(*tuples) # convert tuples to index if nentries == 1: index = Index(tuples[0], name=names[0]) else: index = MultiIndex.from_tuples(tuples, names=names) return index
def _write_hierarchical_rows(self, fmt_values, indent): template = 'rowspan="{span}" valign="top"' truncate_h = self.fmt.truncate_h truncate_v = self.fmt.truncate_v frame = self.fmt.tr_frame ncols = len(frame.columns) nrows = len(frame) row_levels = self.frame.index.nlevels idx_values = frame.index.format(sparsify=False, adjoin=False, names=False) idx_values = lzip(*idx_values) if self.fmt.sparsify: # GH3547 sentinel = com.sentinel_factory() levels = frame.index.format(sparsify=sentinel, adjoin=False, names=False) level_lengths = get_level_lengths(levels, sentinel) inner_lvl = len(level_lengths) - 1 if truncate_v: # Insert ... row and adjust idx_values and # level_lengths to take this into account. ins_row = self.fmt.tr_row_num inserted = False for lnum, records in enumerate(level_lengths): rec_new = {} for tag, span in list(records.items()): if tag >= ins_row: rec_new[tag + 1] = span elif tag + span > ins_row: rec_new[tag] = span + 1 # GH 14882 - Make sure insertion done once if not inserted: dot_row = list(idx_values[ins_row - 1]) dot_row[-1] = u('...') idx_values.insert(ins_row, tuple(dot_row)) inserted = True else: dot_row = list(idx_values[ins_row]) dot_row[inner_lvl - lnum] = u('...') idx_values[ins_row] = tuple(dot_row) else: rec_new[tag] = span # If ins_row lies between tags, all cols idx cols # receive ... if tag + span == ins_row: rec_new[ins_row] = 1 if lnum == 0: idx_values.insert(ins_row, tuple( [u('...')] * len(level_lengths))) # GH 14882 - Place ... in correct level elif inserted: dot_row = list(idx_values[ins_row]) dot_row[inner_lvl - lnum] = u('...') idx_values[ins_row] = tuple(dot_row) level_lengths[lnum] = rec_new level_lengths[inner_lvl][ins_row] = 1 for ix_col in range(len(fmt_values)): fmt_values[ix_col].insert(ins_row, '...') nrows += 1 for i in range(nrows): row = [] tags = {} sparse_offset = 0 j = 0 for records, v in zip(level_lengths, idx_values[i]): if i in records: if records[i] > 1: tags[j] = template.format(span=records[i]) else: sparse_offset += 1 continue j += 1 row.append(v) row.extend(fmt_values[j][i] for j in range(ncols)) if truncate_h: row.insert(row_levels - sparse_offset + self.fmt.tr_col_num, '...') self.write_tr(row, indent, self.indent_delta, tags=tags, nindex_levels=len(levels) - sparse_offset) else: for i in range(len(frame)): idx_values = list(zip(*frame.index.format( sparsify=False, adjoin=False, names=False))) row = [] row.extend(idx_values[i]) row.extend(fmt_values[j][i] for j in range(ncols)) if truncate_h: row.insert(row_levels + self.fmt.tr_col_num, '...') self.write_tr(row, indent, self.indent_delta, tags=None, nindex_levels=frame.index.nlevels)
def _write_hierarchical_rows(self, fmt_values, indent): template = 'rowspan="{span}" valign="top"' truncate_h = self.fmt.truncate_h truncate_v = self.fmt.truncate_v frame = self.fmt.tr_frame nrows = len(frame) # TODO: after gh-22887 fixed, refactor to use class property # in place of row_levels row_levels = self.frame.index.nlevels idx_values = frame.index.format(sparsify=False, adjoin=False, names=False) idx_values = lzip(*idx_values) if self.fmt.sparsify: # GH3547 sentinel = com.sentinel_factory() levels = frame.index.format(sparsify=sentinel, adjoin=False, names=False) level_lengths = get_level_lengths(levels, sentinel) inner_lvl = len(level_lengths) - 1 if truncate_v: # Insert ... row and adjust idx_values and # level_lengths to take this into account. ins_row = self.fmt.tr_row_num inserted = False for lnum, records in enumerate(level_lengths): rec_new = {} for tag, span in list(records.items()): if tag >= ins_row: rec_new[tag + 1] = span elif tag + span > ins_row: rec_new[tag] = span + 1 # GH 14882 - Make sure insertion done once if not inserted: dot_row = list(idx_values[ins_row - 1]) dot_row[-1] = u('...') idx_values.insert(ins_row, tuple(dot_row)) inserted = True else: dot_row = list(idx_values[ins_row]) dot_row[inner_lvl - lnum] = u('...') idx_values[ins_row] = tuple(dot_row) else: rec_new[tag] = span # If ins_row lies between tags, all cols idx cols # receive ... if tag + span == ins_row: rec_new[ins_row] = 1 if lnum == 0: idx_values.insert( ins_row, tuple([u('...')] * len(level_lengths))) # GH 14882 - Place ... in correct level elif inserted: dot_row = list(idx_values[ins_row]) dot_row[inner_lvl - lnum] = u('...') idx_values[ins_row] = tuple(dot_row) level_lengths[lnum] = rec_new level_lengths[inner_lvl][ins_row] = 1 for ix_col in range(len(fmt_values)): fmt_values[ix_col].insert(ins_row, '...') nrows += 1 for i in range(nrows): row = [] tags = {} sparse_offset = 0 j = 0 for records, v in zip(level_lengths, idx_values[i]): if i in records: if records[i] > 1: tags[j] = template.format(span=records[i]) else: sparse_offset += 1 continue j += 1 row.append(v) row.extend(fmt_values[j][i] for j in range(self.ncols)) if truncate_h: row.insert( row_levels - sparse_offset + self.fmt.tr_col_num, '...') self.write_tr(row, indent, self.indent_delta, tags=tags, nindex_levels=len(levels) - sparse_offset) else: for i in range(len(frame)): idx_values = list( zip(*frame.index.format( sparsify=False, adjoin=False, names=False))) row = [] row.extend(idx_values[i]) row.extend(fmt_values[j][i] for j in range(self.ncols)) if truncate_h: row.insert(row_levels + self.fmt.tr_col_num, '...') self.write_tr(row, indent, self.indent_delta, tags=None, nindex_levels=frame.index.nlevels)
def _translate(self): """ Convert the DataFrame in `self.data` and the attrs from `_build_styles` into a dictionary of {head, body, uuid, cellstyle} """ table_styles = self.table_styles or [] caption = self.caption ctx = self.ctx precision = self.precision uuid = self.uuid or str(uuid1()).replace("-", "_") ROW_HEADING_CLASS = "row_heading" COL_HEADING_CLASS = "col_heading" DATA_CLASS = "data" BLANK_CLASS = "blank" BLANK_VALUE = "" cell_context = dict() n_rlvls = self.data.index.nlevels n_clvls = self.data.columns.nlevels rlabels = self.data.index.tolist() clabels = self.data.columns.tolist() idx_values = self.data.index.format(sparsify=False, adjoin=False, names=False) idx_values = lzip(*idx_values) if n_rlvls == 1: rlabels = [[x] for x in rlabels] if n_clvls == 1: clabels = [[x] for x in clabels] clabels = list(zip(*clabels)) cellstyle = [] head = [] for r in range(n_clvls): row_es = [{"type": "th", "value": BLANK_VALUE, "class": " ".join([BLANK_CLASS])}] * n_rlvls for c in range(len(clabels[0])): cs = [COL_HEADING_CLASS, "level%s" % r, "col%s" % c] cs.extend(cell_context.get( "col_headings", {}).get(r, {}).get(c, [])) value = clabels[r][c] row_es.append({"type": "th", "value": value, "display_value": value, "class": " ".join(cs)}) head.append(row_es) if self.data.index.names and self.data.index.names != [None]: index_header_row = [] for c, name in enumerate(self.data.index.names): cs = [COL_HEADING_CLASS, "level%s" % (n_clvls + 1), "col%s" % c] index_header_row.append({"type": "th", "value": name, "class": " ".join(cs)}) index_header_row.extend( [{"type": "th", "value": BLANK_VALUE, "class": " ".join([BLANK_CLASS]) }] * len(clabels[0])) head.append(index_header_row) body = [] for r, idx in enumerate(self.data.index): cs = [ROW_HEADING_CLASS, "level%s" % c, "row%s" % r] cs.extend( cell_context.get("row_headings", {}).get(r, {}).get(c, [])) row_es = [{"type": "th", "value": rlabels[r][c], "class": " ".join(cs), "display_value": rlabels[r][c]} for c in range(len(rlabels[r]))] for c, col in enumerate(self.data.columns): cs = [DATA_CLASS, "row%s" % r, "col%s" % c] cs.extend(cell_context.get("data", {}).get(r, {}).get(c, [])) formatter = self._display_funcs[(r, c)] value = self.data.iloc[r, c] row_es.append({ "type": "td", "value": value, "class": " ".join(cs), "id": "_".join(cs[1:]), "display_value": formatter(value) }) props = [] for x in ctx[r, c]: # have to handle empty styles like [''] if x.count(":"): props.append(x.split(":")) else: props.append(['', '']) cellstyle.append({'props': props, 'selector': "row%s_col%s" % (r, c)}) body.append(row_es) return dict(head=head, cellstyle=cellstyle, body=body, uuid=uuid, precision=precision, table_styles=table_styles, caption=caption, table_attributes=self.table_attributes)
def test_constructors(self, data, closed, name): left, right = data[:-1], data[1:] ivs = [Interval(l, r, closed=closed) for l, r in lzip(left, right)] expected = IntervalIndex._simple_new(left=left, right=right, closed=closed, name=name) # validate expected assert expected.closed == closed assert expected.name == name assert expected.dtype.subtype == data.dtype tm.assert_index_equal(expected.left, data[:-1]) tm.assert_index_equal(expected.right, data[1:]) # validated constructors result = IntervalIndex(ivs, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_intervals(ivs, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_breaks(data, closed=closed, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_arrays(left, right, closed=closed, name=name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_tuples(lzip(left, right), closed=closed, name=name) tm.assert_index_equal(result, expected) result = Index(ivs, name=name) assert isinstance(result, IntervalIndex) tm.assert_index_equal(result, expected) # idempotent tm.assert_index_equal(Index(expected), expected) tm.assert_index_equal(IntervalIndex(expected), expected) result = IntervalIndex.from_intervals(expected) tm.assert_index_equal(result, expected) result = IntervalIndex.from_intervals(expected.values, name=expected.name) tm.assert_index_equal(result, expected) left, right = expected.left, expected.right result = IntervalIndex.from_arrays(left, right, closed=expected.closed, name=expected.name) tm.assert_index_equal(result, expected) result = IntervalIndex.from_tuples(expected.to_tuples(), closed=expected.closed, name=expected.name) tm.assert_index_equal(result, expected) breaks = expected.left.tolist() + [expected.right[-1]] result = IntervalIndex.from_breaks(breaks, closed=expected.closed, name=expected.name) tm.assert_index_equal(result, expected)
def __init__(self, encoding): self._encoding = encoding #type code. #-------------------- #str1 1 = 0x01 #str2 2 = 0x02 #... #str244 244 = 0xf4 #byte 251 = 0xfb (sic) #int 252 = 0xfc #long 253 = 0xfd #float 254 = 0xfe #double 255 = 0xff #-------------------- #NOTE: the byte type seems to be reserved for categorical variables # with a label, but the underlying variable is -127 to 100 # we're going to drop the label and cast to int self.DTYPE_MAP = \ dict( lzip(range(1, 245), ['a' + str(i) for i in range(1, 245)]) + [ (251, np.int8), (252, np.int16), (253, np.int32), (254, np.float32), (255, np.float64) ] ) self.DTYPE_MAP_XML = \ dict( [ (32768, np.string_), (65526, np.float64), (65527, np.float32), (65528, np.int32), (65529, np.int16), (65530, np.int8) ] ) self.TYPE_MAP = lrange(251) + list('bhlfd') self.TYPE_MAP_XML = \ dict( [ (65526, 'd'), (65527, 'f'), (65528, 'l'), (65529, 'h'), (65530, 'b') ] ) #NOTE: technically, some of these are wrong. there are more numbers # that can be represented. it's the 27 ABOVE and BELOW the max listed # numeric data type in [U] 12.2.2 of the 11.2 manual float32_min = b'\xff\xff\xff\xfe' float32_max = b'\xff\xff\xff\x7e' float64_min = b'\xff\xff\xff\xff\xff\xff\xef\xff' float64_max = b'\xff\xff\xff\xff\xff\xff\xdf\x7f' self.VALID_RANGE = \ { 'b': (-127, 100), 'h': (-32767, 32740), 'l': (-2147483647, 2147483620), 'f': (np.float32(struct.unpack('<f', float32_min)[0]), np.float32(struct.unpack('<f', float32_max)[0])), 'd': (np.float64(struct.unpack('<d', float64_min)[0]), np.float64(struct.unpack('<d', float64_max)[0])) } self.OLD_TYPE_MAPPING = \ { 'i': 252, 'f': 254, 'b': 251 } # These missing values are the generic '.' in Stata, and are used # to replace nans self.MISSING_VALUES = \ { 'b': 101, 'h': 32741, 'l': 2147483621, 'f': np.float32(struct.unpack('<f', b'\x00\x00\x00\x7f')[0]), 'd': np.float64(struct.unpack('<d', b'\x00\x00\x00\x00\x00\x00\xe0\x7f')[0]) } # Reserved words cannot be used as variable names self.RESERVED_WORDS = ('aggregate', 'array', 'boolean', 'break', 'byte', 'case', 'catch', 'class', 'colvector', 'complex', 'const', 'continue', 'default', 'delegate', 'delete', 'do', 'double', 'else', 'eltypedef', 'end', 'enum', 'explicit', 'export', 'external', 'float', 'for', 'friend', 'function', 'global', 'goto', 'if', 'inline', 'int', 'local', 'long', 'NULL', 'pragma', 'protected', 'quad', 'rowvector', 'short', 'typedef', 'typename', 'virtual')
from pandas.core.indexes.api import Index, MultiIndex @pytest.fixture(params=[ tm.makeUnicodeIndex(100), tm.makeStringIndex(100), tm.makeDateIndex(100), tm.makePeriodIndex(100), tm.makeTimedeltaIndex(100), tm.makeIntIndex(100), tm.makeUIntIndex(100), tm.makeFloatIndex(100), Index([True, False]), tm.makeCategoricalIndex(100), Index([]), MultiIndex.from_tuples(lzip(['foo', 'bar', 'baz'], [1, 2, 3])), Index([0, 0, 1, 1, 2, 2]) ], ids=lambda x: type(x).__name__) def indices(request): return request.param @pytest.fixture(params=[1, np.array(1, dtype=np.int64)]) def one(request): # zero-dim integer array behaves like an integer return request.param zeros = [ box([0] * 5, dtype=dtype) for box in [pd.Index, np.array]
def _make_concat_multiindex(indexes, keys, levels=None, names=None): if ((levels is None and isinstance(keys[0], tuple)) or (levels is not None and len(levels) > 1)): zipped = lzip(*keys) if names is None: names = [None] * len(zipped) if levels is None: levels = [Categorical.from_array(zp).levels for zp in zipped] else: levels = [_ensure_index(x) for x in levels] else: zipped = [keys] if names is None: names = [None] if levels is None: levels = [_ensure_index(keys)] else: levels = [_ensure_index(x) for x in levels] if not _all_indexes_same(indexes): label_list = [] # things are potentially different sizes, so compute the exact labels # for each level and pass those to MultiIndex.from_arrays for hlevel, level in zip(zipped, levels): to_concat = [] for key, index in zip(hlevel, indexes): try: i = level.get_loc(key) except KeyError: raise ValueError('Key %s not in level %s' % (str(key), str(level))) to_concat.append(np.repeat(i, len(index))) label_list.append(np.concatenate(to_concat)) concat_index = _concat_indexes(indexes) # these go at the end if isinstance(concat_index, MultiIndex): levels.extend(concat_index.levels) label_list.extend(concat_index.labels) else: factor = Categorical.from_array(concat_index) levels.append(factor.levels) label_list.append(factor.codes) if len(names) == len(levels): names = list(names) else: # make sure that all of the passed indices have the same nlevels if not len(set([ i.nlevels for i in indexes ])) == 1: raise AssertionError("Cannot concat indices that do" " not have the same number of levels") # also copies names = names + _get_consensus_names(indexes) return MultiIndex(levels=levels, labels=label_list, names=names, verify_integrity=False) new_index = indexes[0] n = len(new_index) kpieces = len(indexes) # also copies new_names = list(names) new_levels = list(levels) # construct labels new_labels = [] # do something a bit more speedy for hlevel, level in zip(zipped, levels): hlevel = _ensure_index(hlevel) mapped = level.get_indexer(hlevel) mask = mapped == -1 if mask.any(): raise ValueError('Values not found in passed level: %s' % str(hlevel[mask])) new_labels.append(np.repeat(mapped, n)) if isinstance(new_index, MultiIndex): new_levels.extend(new_index.levels) new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels]) else: new_levels.append(new_index) new_labels.append(np.tile(np.arange(n), kpieces)) if len(new_names) < len(new_levels): new_names.extend(new_index.names) return MultiIndex(levels=new_levels, labels=new_labels, names=new_names, verify_integrity=False)
def _zip2(*args): return lib.list_to_object_array(lzip(*args))
class TestIntervalIndex(Base): _holder = IntervalIndex def setup_method(self, method): self.index = IntervalIndex.from_arrays([0, 1], [1, 2]) self.index_with_nan = IntervalIndex.from_tuples([(0, 1), np.nan, (1, 2)]) self.indices = dict(intervalIndex=tm.makeIntervalIndex(10)) def create_index(self, closed='right'): return IntervalIndex.from_breaks(range(11), closed=closed) def create_index_with_nan(self, closed='right'): mask = [True, False] + [True] * 8 return IntervalIndex.from_arrays(np.where(mask, np.arange(10), np.nan), np.where(mask, np.arange(1, 11), np.nan), closed=closed) def test_properties(self, closed): index = self.create_index(closed=closed) assert len(index) == 10 assert index.size == 10 assert index.shape == (10, ) tm.assert_index_equal(index.left, Index(np.arange(10))) tm.assert_index_equal(index.right, Index(np.arange(1, 11))) tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5))) assert index.closed == closed ivs = [Interval(l, r, closed) for l, r in zip(range(10), range(1, 11))] expected = np.array(ivs, dtype=object) tm.assert_numpy_array_equal(np.asarray(index), expected) tm.assert_numpy_array_equal(index.values, expected) # with nans index = self.create_index_with_nan(closed=closed) assert len(index) == 10 assert index.size == 10 assert index.shape == (10, ) expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9]) expected_right = expected_left + 1 expected_mid = expected_left + 0.5 tm.assert_index_equal(index.left, expected_left) tm.assert_index_equal(index.right, expected_right) tm.assert_index_equal(index.mid, expected_mid) assert index.closed == closed ivs = [ Interval(l, r, closed) if notna(l) else np.nan for l, r in zip(expected_left, expected_right) ] expected = np.array(ivs, dtype=object) tm.assert_numpy_array_equal(np.asarray(index), expected) tm.assert_numpy_array_equal(index.values, expected) @pytest.mark.parametrize( 'breaks', [[1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608], [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf], pd.to_datetime(['20170101', '20170202', '20170303', '20170404']), pd.to_timedelta(['1ns', '2ms', '3s', '4M', '5H', '6D'])]) def test_length(self, closed, breaks): # GH 18789 index = IntervalIndex.from_breaks(breaks, closed=closed) result = index.length expected = Index(iv.length for iv in index) tm.assert_index_equal(result, expected) # with NA index = index.insert(1, np.nan) result = index.length expected = Index(iv.length if notna(iv) else iv for iv in index) tm.assert_index_equal(result, expected) def test_with_nans(self, closed): index = self.create_index(closed=closed) assert not index.hasnans result = index.isna() expected = np.repeat(False, len(index)) tm.assert_numpy_array_equal(result, expected) result = index.notna() expected = np.repeat(True, len(index)) tm.assert_numpy_array_equal(result, expected) index = self.create_index_with_nan(closed=closed) assert index.hasnans result = index.isna() expected = np.array([False, True] + [False] * (len(index) - 2)) tm.assert_numpy_array_equal(result, expected) result = index.notna() expected = np.array([True, False] + [True] * (len(index) - 2)) tm.assert_numpy_array_equal(result, expected) def test_copy(self, closed): expected = self.create_index(closed=closed) result = expected.copy() assert result.equals(expected) result = expected.copy(deep=True) assert result.equals(expected) assert result.left is not expected.left def test_ensure_copied_data(self, closed): # exercise the copy flag in the constructor # not copying index = self.create_index(closed=closed) result = IntervalIndex(index, copy=False) tm.assert_numpy_array_equal(index.left.values, result.left.values, check_same='same') tm.assert_numpy_array_equal(index.right.values, result.right.values, check_same='same') # by-definition make a copy result = IntervalIndex(index.values, copy=False) tm.assert_numpy_array_equal(index.left.values, result.left.values, check_same='copy') tm.assert_numpy_array_equal(index.right.values, result.right.values, check_same='copy') def test_equals(self, closed): expected = IntervalIndex.from_breaks(np.arange(5), closed=closed) assert expected.equals(expected) assert expected.equals(expected.copy()) assert not expected.equals(expected.astype(object)) assert not expected.equals(np.array(expected)) assert not expected.equals(list(expected)) assert not expected.equals([1, 2]) assert not expected.equals(np.array([1, 2])) assert not expected.equals(pd.date_range('20130101', periods=2)) expected_name1 = IntervalIndex.from_breaks(np.arange(5), closed=closed, name='foo') expected_name2 = IntervalIndex.from_breaks(np.arange(5), closed=closed, name='bar') assert expected.equals(expected_name1) assert expected_name1.equals(expected_name2) for other_closed in {'left', 'right', 'both', 'neither'} - {closed}: expected_other_closed = IntervalIndex.from_breaks( np.arange(5), closed=other_closed) assert not expected.equals(expected_other_closed) @pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series]) def test_where(self, closed, klass): idx = self.create_index(closed=closed) cond = [True] * len(idx) expected = idx result = expected.where(klass(cond)) tm.assert_index_equal(result, expected) cond = [False] + [True] * len(idx[1:]) expected = IntervalIndex([np.nan] + idx[1:].tolist()) result = idx.where(klass(cond)) tm.assert_index_equal(result, expected) def test_delete(self, closed): expected = IntervalIndex.from_breaks(np.arange(1, 11), closed=closed) result = self.create_index(closed=closed).delete(0) tm.assert_index_equal(result, expected) @pytest.mark.parametrize('data', [ interval_range(0, periods=10, closed='neither'), interval_range(1.7, periods=8, freq=2.5, closed='both'), interval_range(Timestamp('20170101'), periods=12, closed='left'), interval_range(Timedelta('1 day'), periods=6, closed='right') ]) def test_insert(self, data): item = data[0] idx_item = IntervalIndex([item]) # start expected = idx_item.append(data) result = data.insert(0, item) tm.assert_index_equal(result, expected) # end expected = data.append(idx_item) result = data.insert(len(data), item) tm.assert_index_equal(result, expected) # mid expected = data[:3].append(idx_item).append(data[3:]) result = data.insert(3, item) tm.assert_index_equal(result, expected) # invalid type msg = 'can only insert Interval objects and NA into an IntervalIndex' with tm.assert_raises_regex(ValueError, msg): data.insert(1, 'foo') # invalid closed msg = 'inserted item must be closed on the same side as the index' for closed in {'left', 'right', 'both', 'neither'} - {item.closed}: with tm.assert_raises_regex(ValueError, msg): bad_item = Interval(item.left, item.right, closed=closed) data.insert(1, bad_item) # GH 18295 (test missing) na_idx = IntervalIndex([np.nan], closed=data.closed) for na in (np.nan, pd.NaT, None): expected = data[:1].append(na_idx).append(data[1:]) result = data.insert(1, na) tm.assert_index_equal(result, expected) def test_take(self, closed): index = self.create_index(closed=closed) result = index.take(range(10)) tm.assert_index_equal(result, index) result = index.take([0, 0, 1]) expected = IntervalIndex.from_arrays([0, 0, 1], [1, 1, 2], closed=closed) tm.assert_index_equal(result, expected) def test_unique(self, closed): # unique non-overlapping idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed) assert idx.is_unique # unique overlapping - distinct endpoints idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed) assert idx.is_unique # unique overlapping - shared endpoints idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) assert idx.is_unique # unique nested idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed) assert idx.is_unique # duplicate idx = IntervalIndex.from_tuples([(0, 1), (0, 1), (2, 3)], closed=closed) assert not idx.is_unique # empty idx = IntervalIndex([], closed=closed) assert idx.is_unique def test_monotonic(self, closed): # increasing non-overlapping idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed) assert idx.is_monotonic assert idx._is_strictly_monotonic_increasing assert not idx.is_monotonic_decreasing assert not idx._is_strictly_monotonic_decreasing # decreasing non-overlapping idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)], closed=closed) assert not idx.is_monotonic assert not idx._is_strictly_monotonic_increasing assert idx.is_monotonic_decreasing assert idx._is_strictly_monotonic_decreasing # unordered non-overlapping idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)], closed=closed) assert not idx.is_monotonic assert not idx._is_strictly_monotonic_increasing assert not idx.is_monotonic_decreasing assert not idx._is_strictly_monotonic_decreasing # increasing overlapping idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)], closed=closed) assert idx.is_monotonic assert idx._is_strictly_monotonic_increasing assert not idx.is_monotonic_decreasing assert not idx._is_strictly_monotonic_decreasing # decreasing overlapping idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)], closed=closed) assert not idx.is_monotonic assert not idx._is_strictly_monotonic_increasing assert idx.is_monotonic_decreasing assert idx._is_strictly_monotonic_decreasing # unordered overlapping idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)], closed=closed) assert not idx.is_monotonic assert not idx._is_strictly_monotonic_increasing assert not idx.is_monotonic_decreasing assert not idx._is_strictly_monotonic_decreasing # increasing overlapping shared endpoints idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) assert idx.is_monotonic assert idx._is_strictly_monotonic_increasing assert not idx.is_monotonic_decreasing assert not idx._is_strictly_monotonic_decreasing # decreasing overlapping shared endpoints idx = pd.IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed) assert not idx.is_monotonic assert not idx._is_strictly_monotonic_increasing assert idx.is_monotonic_decreasing assert idx._is_strictly_monotonic_decreasing # stationary idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed) assert idx.is_monotonic assert not idx._is_strictly_monotonic_increasing assert idx.is_monotonic_decreasing assert not idx._is_strictly_monotonic_decreasing # empty idx = IntervalIndex([], closed=closed) assert idx.is_monotonic assert idx._is_strictly_monotonic_increasing assert idx.is_monotonic_decreasing assert idx._is_strictly_monotonic_decreasing @pytest.mark.skip(reason='not a valid repr as we use interval notation') def test_repr(self): i = IntervalIndex.from_tuples([(0, 1), (1, 2)], closed='right') expected = ("IntervalIndex(left=[0, 1]," "\n right=[1, 2]," "\n closed='right'," "\n dtype='interval[int64]')") assert repr(i) == expected i = IntervalIndex.from_tuples( (Timestamp('20130101'), Timestamp('20130102')), (Timestamp('20130102'), Timestamp('20130103')), closed='right') expected = ("IntervalIndex(left=['2013-01-01', '2013-01-02']," "\n right=['2013-01-02', '2013-01-03']," "\n closed='right'," "\n dtype='interval[datetime64[ns]]')") assert repr(i) == expected @pytest.mark.skip(reason='not a valid repr as we use interval notation') def test_repr_max_seq_item_setting(self): super(TestIntervalIndex, self).test_repr_max_seq_item_setting() @pytest.mark.skip(reason='not a valid repr as we use interval notation') def test_repr_roundtrip(self): super(TestIntervalIndex, self).test_repr_roundtrip() # TODO: check this behavior is consistent with test_interval_new.py def test_get_item(self, closed): i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed) assert i[0] == Interval(0.0, 1.0, closed=closed) assert i[1] == Interval(1.0, 2.0, closed=closed) assert isna(i[2]) result = i[0:1] expected = IntervalIndex.from_arrays((0., ), (1., ), closed=closed) tm.assert_index_equal(result, expected) result = i[0:2] expected = IntervalIndex.from_arrays((0., 1), (1., 2.), closed=closed) tm.assert_index_equal(result, expected) result = i[1:3] expected = IntervalIndex.from_arrays((1., np.nan), (2., np.nan), closed=closed) tm.assert_index_equal(result, expected) # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_get_loc_value(self): pytest.raises(KeyError, self.index.get_loc, 0) assert self.index.get_loc(0.5) == 0 assert self.index.get_loc(1) == 0 assert self.index.get_loc(1.5) == 1 assert self.index.get_loc(2) == 1 pytest.raises(KeyError, self.index.get_loc, -1) pytest.raises(KeyError, self.index.get_loc, 3) idx = IntervalIndex.from_tuples([(0, 2), (1, 3)]) assert idx.get_loc(0.5) == 0 assert idx.get_loc(1) == 0 tm.assert_numpy_array_equal(idx.get_loc(1.5), np.array([0, 1], dtype='int64')) tm.assert_numpy_array_equal(np.sort(idx.get_loc(2)), np.array([0, 1], dtype='int64')) assert idx.get_loc(3) == 1 pytest.raises(KeyError, idx.get_loc, 3.5) idx = IntervalIndex.from_arrays([0, 2], [1, 3]) pytest.raises(KeyError, idx.get_loc, 1.5) # To be removed, replaced by test_interval_new.py (see #16316, #16386) def slice_locs_cases(self, breaks): # TODO: same tests for more index types index = IntervalIndex.from_breaks([0, 1, 2], closed='right') assert index.slice_locs() == (0, 2) assert index.slice_locs(0, 1) == (0, 1) assert index.slice_locs(1, 1) == (0, 1) assert index.slice_locs(0, 2) == (0, 2) assert index.slice_locs(0.5, 1.5) == (0, 2) assert index.slice_locs(0, 0.5) == (0, 1) assert index.slice_locs(start=1) == (0, 2) assert index.slice_locs(start=1.2) == (1, 2) assert index.slice_locs(end=1) == (0, 1) assert index.slice_locs(end=1.1) == (0, 2) assert index.slice_locs(end=1.0) == (0, 1) assert index.slice_locs(-1, -1) == (0, 0) index = IntervalIndex.from_breaks([0, 1, 2], closed='neither') assert index.slice_locs(0, 1) == (0, 1) assert index.slice_locs(0, 2) == (0, 2) assert index.slice_locs(0.5, 1.5) == (0, 2) assert index.slice_locs(1, 1) == (1, 1) assert index.slice_locs(1, 2) == (1, 2) index = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed='both') assert index.slice_locs(1, 1) == (0, 1) assert index.slice_locs(1, 2) == (0, 2) # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_slice_locs_int64(self): self.slice_locs_cases([0, 1, 2]) # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_slice_locs_float64(self): self.slice_locs_cases([0.0, 1.0, 2.0]) # To be removed, replaced by test_interval_new.py (see #16316, #16386) def slice_locs_decreasing_cases(self, tuples): index = IntervalIndex.from_tuples(tuples) assert index.slice_locs(1.5, 0.5) == (1, 3) assert index.slice_locs(2, 0) == (1, 3) assert index.slice_locs(2, 1) == (1, 3) assert index.slice_locs(3, 1.1) == (0, 3) assert index.slice_locs(3, 3) == (0, 2) assert index.slice_locs(3.5, 3.3) == (0, 1) assert index.slice_locs(1, -3) == (2, 3) slice_locs = index.slice_locs(-1, -1) assert slice_locs[0] == slice_locs[1] # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_slice_locs_decreasing_int64(self): self.slice_locs_cases([(2, 4), (1, 3), (0, 2)]) # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_slice_locs_decreasing_float64(self): self.slice_locs_cases([(2., 4.), (1., 3.), (0., 2.)]) # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_slice_locs_fails(self): index = IntervalIndex.from_tuples([(1, 2), (0, 1), (2, 3)]) with pytest.raises(KeyError): index.slice_locs(1, 2) # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_get_loc_interval(self): assert self.index.get_loc(Interval(0, 1)) == 0 assert self.index.get_loc(Interval(0, 0.5)) == 0 assert self.index.get_loc(Interval(0, 1, 'left')) == 0 pytest.raises(KeyError, self.index.get_loc, Interval(2, 3)) pytest.raises(KeyError, self.index.get_loc, Interval(-1, 0, 'left')) # Make consistent with test_interval_new.py (see #16316, #16386) @pytest.mark.parametrize('item', [3, Interval(1, 4)]) def test_get_loc_length_one(self, item, closed): # GH 20921 index = IntervalIndex.from_tuples([(0, 5)], closed=closed) result = index.get_loc(item) assert result == 0 # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_get_indexer(self): actual = self.index.get_indexer([-1, 0, 0.5, 1, 1.5, 2, 3]) expected = np.array([-1, -1, 0, 0, 1, 1, -1], dtype='intp') tm.assert_numpy_array_equal(actual, expected) actual = self.index.get_indexer(self.index) expected = np.array([0, 1], dtype='intp') tm.assert_numpy_array_equal(actual, expected) index = IntervalIndex.from_breaks([0, 1, 2], closed='left') actual = index.get_indexer([-1, 0, 0.5, 1, 1.5, 2, 3]) expected = np.array([-1, 0, 0, 1, 1, -1, -1], dtype='intp') tm.assert_numpy_array_equal(actual, expected) actual = self.index.get_indexer(index[:1]) expected = np.array([0], dtype='intp') tm.assert_numpy_array_equal(actual, expected) actual = self.index.get_indexer(index) expected = np.array([-1, 1], dtype='intp') tm.assert_numpy_array_equal(actual, expected) # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_get_indexer_subintervals(self): # TODO: is this right? # return indexers for wholly contained subintervals target = IntervalIndex.from_breaks(np.linspace(0, 2, 5)) actual = self.index.get_indexer(target) expected = np.array([0, 0, 1, 1], dtype='p') tm.assert_numpy_array_equal(actual, expected) target = IntervalIndex.from_breaks([0, 0.67, 1.33, 2]) actual = self.index.get_indexer(target) expected = np.array([0, 0, 1, 1], dtype='intp') tm.assert_numpy_array_equal(actual, expected) actual = self.index.get_indexer(target[[0, -1]]) expected = np.array([0, 1], dtype='intp') tm.assert_numpy_array_equal(actual, expected) target = IntervalIndex.from_breaks([0, 0.33, 0.67, 1], closed='left') actual = self.index.get_indexer(target) expected = np.array([0, 0, 0], dtype='intp') tm.assert_numpy_array_equal(actual, expected) # Make consistent with test_interval_new.py (see #16316, #16386) @pytest.mark.parametrize( 'item', [[3], np.arange(1, 5), [Interval(1, 4)], interval_range(1, 4)]) def test_get_indexer_length_one(self, item, closed): # GH 17284 index = IntervalIndex.from_tuples([(0, 5)], closed=closed) result = index.get_indexer(item) expected = np.array([0] * len(item), dtype='intp') tm.assert_numpy_array_equal(result, expected) # To be removed, replaced by test_interval_new.py (see #16316, #16386) def test_contains(self): # Only endpoints are valid. i = IntervalIndex.from_arrays([0, 1], [1, 2]) # Invalid assert 0 not in i assert 1 not in i assert 2 not in i # Valid assert Interval(0, 1) in i assert Interval(0, 2) in i assert Interval(0, 0.5) in i assert Interval(3, 5) not in i assert Interval(-1, 0, closed='left') not in i # To be removed, replaced by test_interval_new.py (see #16316, #16386) def testcontains(self): # can select values that are IN the range of a value i = IntervalIndex.from_arrays([0, 1], [1, 2]) assert i.contains(0.1) assert i.contains(0.5) assert i.contains(1) assert i.contains(Interval(0, 1)) assert i.contains(Interval(0, 2)) # these overlaps completely assert i.contains(Interval(0, 3)) assert i.contains(Interval(1, 3)) assert not i.contains(20) assert not i.contains(-20) def test_dropna(self, closed): expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)], closed=closed) ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed) result = ii.dropna() tm.assert_index_equal(result, expected) ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan], closed=closed) result = ii.dropna() tm.assert_index_equal(result, expected) # TODO: check this behavior is consistent with test_interval_new.py def test_non_contiguous(self, closed): index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed) target = [0.5, 1.5, 2.5] actual = index.get_indexer(target) expected = np.array([0, -1, 1], dtype='intp') tm.assert_numpy_array_equal(actual, expected) assert 1.5 not in index def test_union(self, closed): index = self.create_index(closed=closed) other = IntervalIndex.from_breaks(range(5, 13), closed=closed) expected = IntervalIndex.from_breaks(range(13), closed=closed) result = index.union(other) tm.assert_index_equal(result, expected) result = other.union(index) tm.assert_index_equal(result, expected) tm.assert_index_equal(index.union(index), index) tm.assert_index_equal(index.union(index[:1]), index) # GH 19101: empty result, same dtype index = IntervalIndex(np.array([], dtype='int64'), closed=closed) result = index.union(index) tm.assert_index_equal(result, index) # GH 19101: empty result, different dtypes other = IntervalIndex(np.array([], dtype='float64'), closed=closed) result = index.union(other) tm.assert_index_equal(result, index) def test_intersection(self, closed): index = self.create_index(closed=closed) other = IntervalIndex.from_breaks(range(5, 13), closed=closed) expected = IntervalIndex.from_breaks(range(5, 11), closed=closed) result = index.intersection(other) tm.assert_index_equal(result, expected) result = other.intersection(index) tm.assert_index_equal(result, expected) tm.assert_index_equal(index.intersection(index), index) # GH 19101: empty result, same dtype other = IntervalIndex.from_breaks(range(300, 314), closed=closed) expected = IntervalIndex(np.array([], dtype='int64'), closed=closed) result = index.intersection(other) tm.assert_index_equal(result, expected) # GH 19101: empty result, different dtypes breaks = np.arange(300, 314, dtype='float64') other = IntervalIndex.from_breaks(breaks, closed=closed) result = index.intersection(other) tm.assert_index_equal(result, expected) def test_difference(self, closed): index = self.create_index(closed=closed) tm.assert_index_equal(index.difference(index[:1]), index[1:]) # GH 19101: empty result, same dtype result = index.difference(index) expected = IntervalIndex(np.array([], dtype='int64'), closed=closed) tm.assert_index_equal(result, expected) # GH 19101: empty result, different dtypes other = IntervalIndex.from_arrays(index.left.astype('float64'), index.right, closed=closed) result = index.difference(other) tm.assert_index_equal(result, expected) def test_symmetric_difference(self, closed): index = self.create_index(closed=closed) result = index[1:].symmetric_difference(index[:-1]) expected = IntervalIndex([index[0], index[-1]]) tm.assert_index_equal(result, expected) # GH 19101: empty result, same dtype result = index.symmetric_difference(index) expected = IntervalIndex(np.array([], dtype='int64'), closed=closed) tm.assert_index_equal(result, expected) # GH 19101: empty result, different dtypes other = IntervalIndex.from_arrays(index.left.astype('float64'), index.right, closed=closed) result = index.symmetric_difference(other) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( 'op_name', ['union', 'intersection', 'difference', 'symmetric_difference']) def test_set_operation_errors(self, closed, op_name): index = self.create_index(closed=closed) set_op = getattr(index, op_name) # non-IntervalIndex msg = ('the other index needs to be an IntervalIndex too, but ' 'was type Int64Index') with tm.assert_raises_regex(TypeError, msg): set_op(Index([1, 2, 3])) # mixed closed msg = ('can only do set operations between two IntervalIndex objects ' 'that are closed on the same side') for other_closed in {'right', 'left', 'both', 'neither'} - {closed}: other = self.create_index(closed=other_closed) with tm.assert_raises_regex(ValueError, msg): set_op(other) # GH 19016: incompatible dtypes other = interval_range(Timestamp('20180101'), periods=9, closed=closed) msg = ('can only do {op} between two IntervalIndex objects that have ' 'compatible dtypes').format(op=op_name) with tm.assert_raises_regex(TypeError, msg): set_op(other) def test_isin(self, closed): index = self.create_index(closed=closed) expected = np.array([True] + [False] * (len(index) - 1)) result = index.isin(index[:1]) tm.assert_numpy_array_equal(result, expected) result = index.isin([index[0]]) tm.assert_numpy_array_equal(result, expected) other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed) expected = np.array([True] * (len(index) - 1) + [False]) result = index.isin(other) tm.assert_numpy_array_equal(result, expected) result = index.isin(other.tolist()) tm.assert_numpy_array_equal(result, expected) for other_closed in {'right', 'left', 'both', 'neither'}: other = self.create_index(closed=other_closed) expected = np.repeat(closed == other_closed, len(index)) result = index.isin(other) tm.assert_numpy_array_equal(result, expected) result = index.isin(other.tolist()) tm.assert_numpy_array_equal(result, expected) def test_comparison(self): actual = Interval(0, 1) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = Interval(0.5, 1.5) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > Interval(0.5, 1.5) tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index expected = np.array([True, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index <= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index >= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index < self.index expected = np.array([False, False]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index == IntervalIndex.from_breaks([0, 1, 2], 'left') tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index.values == self.index tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index <= self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index != self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index > self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index.values > self.index tm.assert_numpy_array_equal(actual, np.array([False, False])) # invalid comparisons actual = self.index == 0 tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index == self.index.left tm.assert_numpy_array_equal(actual, np.array([False, False])) with tm.assert_raises_regex(TypeError, 'unorderable types'): self.index > 0 with tm.assert_raises_regex(TypeError, 'unorderable types'): self.index <= 0 with pytest.raises(TypeError): self.index > np.arange(2) with pytest.raises(ValueError): self.index > np.arange(3) def test_missing_values(self, closed): idx = Index([ np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed) ]) idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed) assert idx.equals(idx2) with pytest.raises(ValueError): IntervalIndex.from_arrays([np.nan, 0, 1], np.array([0, 1, 2]), closed=closed) tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False])) def test_sort_values(self, closed): index = self.create_index(closed=closed) result = index.sort_values() tm.assert_index_equal(result, index) result = index.sort_values(ascending=False) tm.assert_index_equal(result, index[::-1]) # with nan index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)]) result = index.sort_values() expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan]) tm.assert_index_equal(result, expected) result = index.sort_values(ascending=False) expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)]) tm.assert_index_equal(result, expected) @pytest.mark.parametrize('tz', [None, 'US/Eastern']) def test_datetime(self, tz): start = Timestamp('2000-01-01', tz=tz) dates = date_range(start=start, periods=10) index = IntervalIndex.from_breaks(dates) # test mid start = Timestamp('2000-01-01T12:00', tz=tz) expected = date_range(start=start, periods=9) tm.assert_index_equal(index.mid, expected) # __contains__ doesn't check individual points assert Timestamp('2000-01-01', tz=tz) not in index assert Timestamp('2000-01-01T12', tz=tz) not in index assert Timestamp('2000-01-02', tz=tz) not in index iv_true = Interval(Timestamp('2000-01-01T08', tz=tz), Timestamp('2000-01-01T18', tz=tz)) iv_false = Interval(Timestamp('1999-12-31', tz=tz), Timestamp('2000-01-01', tz=tz)) assert iv_true in index assert iv_false not in index # .contains does check individual points assert not index.contains(Timestamp('2000-01-01', tz=tz)) assert index.contains(Timestamp('2000-01-01T12', tz=tz)) assert index.contains(Timestamp('2000-01-02', tz=tz)) assert index.contains(iv_true) assert not index.contains(iv_false) # test get_indexer start = Timestamp('1999-12-31T12:00', tz=tz) target = date_range(start=start, periods=7, freq='12H') actual = index.get_indexer(target) expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype='intp') tm.assert_numpy_array_equal(actual, expected) start = Timestamp('2000-01-08T18:00', tz=tz) target = date_range(start=start, periods=7, freq='6H') actual = index.get_indexer(target) expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype='intp') tm.assert_numpy_array_equal(actual, expected) def test_append(self, closed): index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed) index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed) result = index1.append(index2) expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed) tm.assert_index_equal(result, expected) result = index1.append([index1, index2]) expected = IntervalIndex.from_arrays([0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed) tm.assert_index_equal(result, expected) msg = ('can only append two IntervalIndex objects that are closed ' 'on the same side') for other_closed in {'left', 'right', 'both', 'neither'} - {closed}: index_other_closed = IntervalIndex.from_arrays([0, 1], [1, 2], closed=other_closed) with tm.assert_raises_regex(ValueError, msg): index1.append(index_other_closed) def test_is_non_overlapping_monotonic(self, closed): # Should be True in all cases tpls = [(0, 1), (2, 3), (4, 5), (6, 7)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is True idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is True # Should be False in all cases (overlapping) tpls = [(0, 2), (1, 3), (4, 5), (6, 7)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is False idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is False # Should be False in all cases (non-monotonic) tpls = [(0, 1), (2, 3), (6, 7), (4, 5)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is False idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is False # Should be False for closed='both', otherwise True (GH16560) if closed == 'both': idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is False else: idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is True @pytest.mark.parametrize('tuples', [ lzip(range(10), range(1, 11)), lzip(date_range('20170101', periods=10), date_range('20170101', periods=10)), lzip(timedelta_range('0 days', periods=10), timedelta_range('1 day', periods=10)) ]) def test_to_tuples(self, tuples): # GH 18756 idx = IntervalIndex.from_tuples(tuples) result = idx.to_tuples() expected = Index(com._asarray_tuplesafe(tuples)) tm.assert_index_equal(result, expected) @pytest.mark.parametrize('tuples', [ lzip(range(10), range(1, 11)) + [np.nan], lzip(date_range('20170101', periods=10), date_range('20170101', periods=10)) + [np.nan], lzip(timedelta_range('0 days', periods=10), timedelta_range('1 day', periods=10)) + [np.nan] ]) @pytest.mark.parametrize('na_tuple', [True, False]) def test_to_tuples_na(self, tuples, na_tuple): # GH 18756 idx = IntervalIndex.from_tuples(tuples) result = idx.to_tuples(na_tuple=na_tuple) # check the non-NA portion expected_notna = Index(com._asarray_tuplesafe(tuples[:-1])) result_notna = result[:-1] tm.assert_index_equal(result_notna, expected_notna) # check the NA portion result_na = result[-1] if na_tuple: assert isinstance(result_na, tuple) assert len(result_na) == 2 assert all(isna(x) for x in result_na) else: assert isna(result_na) @pytest.mark.parametrize('new_closed', ['left', 'right', 'both', 'neither']) def test_set_closed(self, name, closed, new_closed): # GH 21670 index = interval_range(0, 5, closed=closed, name=name) result = index.set_closed(new_closed) expected = interval_range(0, 5, closed=new_closed, name=name) tm.assert_index_equal(result, expected) @pytest.mark.parametrize('bad_closed', ['foo', 10, 'LEFT', True, False]) def test_set_closed_errors(self, bad_closed): # GH 21670 index = interval_range(0, 5) msg = "invalid option for 'closed': {closed}".format(closed=bad_closed) with tm.assert_raises_regex(ValueError, msg): index.set_closed(bad_closed)
import pandas.util.testing as tm @pytest.fixture(params=[tm.makeUnicodeIndex(100), tm.makeStringIndex(100), tm.makeDateIndex(100), tm.makePeriodIndex(100), tm.makeTimedeltaIndex(100), tm.makeIntIndex(100), tm.makeUIntIndex(100), tm.makeRangeIndex(100), tm.makeFloatIndex(100), Index([True, False]), tm.makeCategoricalIndex(100), Index([]), MultiIndex.from_tuples(lzip( ['foo', 'bar', 'baz'], [1, 2, 3])), Index([0, 0, 1, 1, 2, 2])], ids=lambda x: type(x).__name__) def indices(request): return request.param @pytest.fixture(params=[1, np.array(1, dtype=np.int64)]) def one(request): # zero-dim integer array behaves like an integer return request.param zeros = [box([0] * 5, dtype=dtype) for box in [pd.Index, np.array] for dtype in [np.int64, np.uint64, np.float64]]
def _make_concat_multiindex(indexes, keys, levels=None, names=None): if ((levels is None and isinstance(keys[0], tuple)) or (levels is not None and len(levels) > 1)): zipped = lzip(*keys) if names is None: names = [None] * len(zipped) if levels is None: levels = [Categorical.from_array( zp, ordered=True).categories for zp in zipped] else: levels = [_ensure_index(x) for x in levels] else: zipped = [keys] if names is None: names = [None] if levels is None: levels = [_ensure_index(keys)] else: levels = [_ensure_index(x) for x in levels] if not _all_indexes_same(indexes): label_list = [] # things are potentially different sizes, so compute the exact labels # for each level and pass those to MultiIndex.from_arrays for hlevel, level in zip(zipped, levels): to_concat = [] for key, index in zip(hlevel, indexes): try: i = level.get_loc(key) except KeyError: raise ValueError('Key %s not in level %s' % (str(key), str(level))) to_concat.append(np.repeat(i, len(index))) label_list.append(np.concatenate(to_concat)) concat_index = _concat_indexes(indexes) # these go at the end if isinstance(concat_index, MultiIndex): levels.extend(concat_index.levels) label_list.extend(concat_index.labels) else: factor = Categorical.from_array(concat_index, ordered=True) levels.append(factor.categories) label_list.append(factor.codes) if len(names) == len(levels): names = list(names) else: # make sure that all of the passed indices have the same nlevels if not len(set([idx.nlevels for idx in indexes])) == 1: raise AssertionError("Cannot concat indices that do" " not have the same number of levels") # also copies names = names + _get_consensus_names(indexes) return MultiIndex(levels=levels, labels=label_list, names=names, verify_integrity=False) new_index = indexes[0] n = len(new_index) kpieces = len(indexes) # also copies new_names = list(names) new_levels = list(levels) # construct labels new_labels = [] # do something a bit more speedy for hlevel, level in zip(zipped, levels): hlevel = _ensure_index(hlevel) mapped = level.get_indexer(hlevel) mask = mapped == -1 if mask.any(): raise ValueError('Values not found in passed level: %s' % str(hlevel[mask])) new_labels.append(np.repeat(mapped, n)) if isinstance(new_index, MultiIndex): new_levels.extend(new_index.levels) new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels]) else: new_levels.append(new_index) new_labels.append(np.tile(np.arange(n), kpieces)) if len(new_names) < len(new_levels): new_names.extend(new_index.names) return MultiIndex(levels=new_levels, labels=new_labels, names=new_names, verify_integrity=False)
class TestSeriesMisc(TestData, SharedWithSparse): series_klass = Series # SharedWithSparse tests use generic, series_klass-agnostic assertion _assert_series_equal = staticmethod(tm.assert_series_equal) def test_tab_completion(self): # GH 9910 s = Series(list('abcd')) # Series of str values should have .str but not .dt/.cat in __dir__ assert 'str' in dir(s) assert 'dt' not in dir(s) assert 'cat' not in dir(s) # similarly for .dt s = Series(date_range('1/1/2015', periods=5)) assert 'dt' in dir(s) assert 'str' not in dir(s) assert 'cat' not in dir(s) # Similarly for .cat, but with the twist that str and dt should be # there if the categories are of that type first cat and str. s = Series(list('abbcd'), dtype="category") assert 'cat' in dir(s) assert 'str' in dir(s) # as it is a string categorical assert 'dt' not in dir(s) # similar to cat and str s = Series(date_range('1/1/2015', periods=5)).astype("category") assert 'cat' in dir(s) assert 'str' not in dir(s) assert 'dt' in dir(s) # as it is a datetime categorical def test_tab_completion_with_categorical(self): # test the tab completion display ok_for_cat = [ 'name', 'index', 'categorical', 'categories', 'codes', 'ordered', 'set_categories', 'add_categories', 'remove_categories', 'rename_categories', 'reorder_categories', 'remove_unused_categories', 'as_ordered', 'as_unordered' ] def get_dir(s): results = [r for r in s.cat.__dir__() if not r.startswith('_')] return list(sorted(set(results))) s = Series(list('aabbcde')).astype('category') results = get_dir(s) tm.assert_almost_equal(results, list(sorted(set(ok_for_cat)))) @pytest.mark.parametrize("index", [ tm.makeUnicodeIndex(10), tm.makeStringIndex(10), tm.makeCategoricalIndex(10), Index(['foo', 'bar', 'baz'] * 2), tm.makeDateIndex(10), tm.makePeriodIndex(10), tm.makeTimedeltaIndex(10), tm.makeIntIndex(10), tm.makeUIntIndex(10), tm.makeIntIndex(10), tm.makeFloatIndex(10), Index([True, False]), Index(['a{}'.format(i) for i in range(101)]), pd.MultiIndex.from_tuples(lzip('ABCD', 'EFGH')), pd.MultiIndex.from_tuples(lzip([0, 1, 2, 3], 'EFGH')), ]) def test_index_tab_completion(self, index): # dir contains string-like values of the Index. s = pd.Series(index=index) dir_s = dir(s) for i, x in enumerate(s.index.unique(level=0)): if i < 100: assert (not isinstance(x, str) or not x.isidentifier() or x in dir_s) else: assert x not in dir_s def test_not_hashable(self): s_empty = Series() s = Series([1]) msg = "'Series' objects are mutable, thus they cannot be hashed" with pytest.raises(TypeError, match=msg): hash(s_empty) with pytest.raises(TypeError, match=msg): hash(s) def test_contains(self): tm.assert_contains_all(self.ts.index, self.ts) def test_iter(self): for i, val in enumerate(self.series): assert val == self.series[i] for i, val in enumerate(self.ts): assert val == self.ts[i] def test_keys(self): # HACK: By doing this in two stages, we avoid 2to3 wrapping the call # to .keys() in a list() getkeys = self.ts.keys assert getkeys() is self.ts.index def test_values(self): tm.assert_almost_equal(self.ts.values, self.ts, check_dtype=False) def test_iteritems(self): for idx, val in self.series.items(): assert val == self.series[idx] for idx, val in self.ts.items(): assert val == self.ts[idx] # assert is lazy (genrators don't define reverse, lists do) assert not hasattr(self.series.iteritems(), 'reverse') def test_items(self): for idx, val in self.series.items(): assert val == self.series[idx] for idx, val in self.ts.items(): assert val == self.ts[idx] # assert is lazy (genrators don't define reverse, lists do) assert not hasattr(self.series.items(), 'reverse') def test_raise_on_info(self): s = Series(np.random.randn(10)) msg = "'Series' object has no attribute 'info'" with pytest.raises(AttributeError, match=msg): s.info() def test_copy(self): for deep in [None, False, True]: s = Series(np.arange(10), dtype='float64') # default deep is True if deep is None: s2 = s.copy() else: s2 = s.copy(deep=deep) s2[::2] = np.NaN if deep is None or deep is True: # Did not modify original Series assert np.isnan(s2[0]) assert not np.isnan(s[0]) else: # we DID modify the original Series assert np.isnan(s2[0]) assert np.isnan(s[0]) # GH 11794 # copy of tz-aware expected = Series([Timestamp('2012/01/01', tz='UTC')]) expected2 = Series([Timestamp('1999/01/01', tz='UTC')]) for deep in [None, False, True]: s = Series([Timestamp('2012/01/01', tz='UTC')]) if deep is None: s2 = s.copy() else: s2 = s.copy(deep=deep) s2[0] = pd.Timestamp('1999/01/01', tz='UTC') # default deep is True if deep is None or deep is True: # Did not modify original Series assert_series_equal(s2, expected2) assert_series_equal(s, expected) else: # we DID modify the original Series assert_series_equal(s2, expected2) assert_series_equal(s, expected2) def test_axis_alias(self): s = Series([1, 2, np.nan]) assert_series_equal(s.dropna(axis='rows'), s.dropna(axis='index')) assert s.dropna().sum('rows') == 3 assert s._get_axis_number('rows') == 0 assert s._get_axis_name('rows') == 'index' def test_class_axis(self): # https://github.com/pandas-dev/pandas/issues/18147 # no exception and no empty docstring assert pydoc.getdoc(Series.index) def test_numpy_unique(self): # it works! np.unique(self.ts) def test_ndarray_compat(self): # test numpy compat with Series as sub-class of NDFrame tsdf = DataFrame(np.random.randn(1000, 3), columns=['A', 'B', 'C'], index=date_range('1/1/2000', periods=1000)) def f(x): return x[x.idxmax()] result = tsdf.apply(f) expected = tsdf.max() tm.assert_series_equal(result, expected) # .item() s = Series([1]) result = s.item() assert result == 1 assert s.item() == s.iloc[0] # using an ndarray like function s = Series(np.random.randn(10)) result = Series(np.ones_like(s)) expected = Series(1, index=range(10), dtype='float64') tm.assert_series_equal(result, expected) # ravel s = Series(np.random.randn(10)) tm.assert_almost_equal(s.ravel(order='F'), s.values.ravel(order='F')) # compress # GH 6658 s = Series([0, 1., -1], index=list('abc')) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s > 0, s) tm.assert_series_equal(result, Series([1.], index=['b'])) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s < -1, s) # result empty Index(dtype=object) as the same as original exp = Series([], dtype='float64', index=Index([], dtype='object')) tm.assert_series_equal(result, exp) s = Series([0, 1., -1], index=[.1, .2, .3]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s > 0, s) tm.assert_series_equal(result, Series([1.], index=[.2])) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s < -1, s) # result empty Float64Index as the same as original exp = Series([], dtype='float64', index=Index([], dtype='float64')) tm.assert_series_equal(result, exp) def test_str_accessor_updates_on_inplace(self): s = pd.Series(list('abc')) s.drop([0], inplace=True) assert len(s.str.lower()) == 2 def test_str_attribute(self): # GH9068 methods = ['strip', 'rstrip', 'lstrip'] s = Series([' jack', 'jill ', ' jesse ', 'frank']) for method in methods: expected = Series([getattr(str, method)(x) for x in s.values]) assert_series_equal(getattr(Series.str, method)(s.str), expected) # str accessor only valid with string values s = Series(range(5)) with pytest.raises(AttributeError, match='only use .str accessor'): s.str.repeat(2) def test_empty_method(self): s_empty = pd.Series() assert s_empty.empty for full_series in [pd.Series([1]), pd.Series(index=[1])]: assert not full_series.empty def test_tab_complete_warning(self, ip): # https://github.com/pandas-dev/pandas/issues/16409 pytest.importorskip('IPython', minversion="6.0.0") from IPython.core.completer import provisionalcompleter code = "import pandas as pd; s = pd.Series()" ip.run_code(code) with tm.assert_produces_warning(None): with provisionalcompleter('ignore'): list(ip.Completer.completions('s.', 1)) def test_integer_series_size(self): # GH 25580 s = Series(range(9)) assert s.size == 9 s = Series(range(9), dtype="Int64") assert s.size == 9
def test_zip(self): lst = [builtins.range(10), builtins.range(10), builtins.range(10)] actual = [zip(*lst), lzip(*lst)], expected = list(builtins.zip(*lst)), lengths = 10, self.check_result(actual, expected, lengths)
def _translate(self): """ Convert the DataFrame in `self.data` and the attrs from `_build_styles` into a dictionary of {head, body, uuid, cellstyle} """ table_styles = self.table_styles or [] caption = self.caption ctx = self.ctx precision = self.precision uuid = self.uuid or str(uuid1()).replace("-", "_") ROW_HEADING_CLASS = "row_heading" COL_HEADING_CLASS = "col_heading" DATA_CLASS = "data" BLANK_CLASS = "blank" BLANK_VALUE = "" cell_context = dict() n_rlvls = self.data.index.nlevels n_clvls = self.data.columns.nlevels rlabels = self.data.index.tolist() clabels = self.data.columns.tolist() idx_values = self.data.index.format(sparsify=False, adjoin=False, names=False) idx_values = lzip(*idx_values) if n_rlvls == 1: rlabels = [[x] for x in rlabels] if n_clvls == 1: clabels = [[x] for x in clabels] clabels = list(zip(*clabels)) cellstyle = [] head = [] for r in range(n_clvls): row_es = [{ "type": "th", "value": BLANK_VALUE, "class": " ".join([BLANK_CLASS]) }] * n_rlvls for c in range(len(clabels[0])): cs = [COL_HEADING_CLASS, "level%s" % r, "col%s" % c] cs.extend( cell_context.get("col_headings", {}).get(r, {}).get(c, [])) row_es.append({ "type": "th", "value": clabels[r][c], "class": " ".join(cs) }) head.append(row_es) if self.data.index.names: index_header_row = [] for c, name in enumerate(self.data.index.names): cs = [ COL_HEADING_CLASS, "level%s" % (n_clvls + 1), "col%s" % c ] index_header_row.append({ "type": "th", "value": name, "class": " ".join(cs) }) index_header_row.extend([{ "type": "th", "value": BLANK_VALUE, "class": " ".join([BLANK_CLASS]) }] * len(clabels[0])) head.append(index_header_row) body = [] for r, idx in enumerate(self.data.index): cs = [ROW_HEADING_CLASS, "level%s" % c, "row%s" % r] cs.extend( cell_context.get("row_headings", {}).get(r, {}).get(c, [])) row_es = [{ "type": "th", "value": rlabels[r][c], "class": " ".join(cs) } for c in range(len(rlabels[r]))] for c, col in enumerate(self.data.columns): cs = [DATA_CLASS, "row%s" % r, "col%s" % c] cs.extend(cell_context.get("data", {}).get(r, {}).get(c, [])) row_es.append({ "type": "td", "value": self.data.iloc[r][c], "class": " ".join(cs), "id": "_".join(cs[1:]) }) props = [] for x in ctx[r, c]: # have to handle empty styles like [''] if x.count(":"): props.append(x.split(":")) else: props.append(['', '']) cellstyle.append({ 'props': props, 'selector': "row%s_col%s" % (r, c) }) body.append(row_es) return dict(head=head, cellstyle=cellstyle, body=body, uuid=uuid, precision=precision, table_styles=table_styles, caption=caption, table_attributes=self.table_attributes)
def _zip(*args): arr = np.empty(N, dtype=object) arr[:] = lzip(*args) return arr
from pandas import * import numpy as np from pandas.compat import zip, range, lzip from pandas.util.testing import rands import pandas.lib as lib N = 100000 key1 = [rands(10) for _ in range(N)] key2 = [rands(10) for _ in range(N)] zipped = lzip(key1, key2) def _zip(*args): arr = np.empty(N, dtype=object) arr[:] = lzip(*args) return arr def _zip2(*args): return lib.list_to_object_array(lzip(*args)) index = MultiIndex.from_arrays([key1, key2]) to_join = DataFrame({'j1': np.random.randn(100000)}, index=index) data = DataFrame({ 'A': np.random.randn(500000), 'key1': np.repeat(key1, 5),
def test_lzip(self): lst = [builtins.range(10), builtins.range(10), builtins.range(10)] results = lzip(*lst), expecteds = list(builtins.zip(*lst)), lengths = 10, self.check_results(results, expecteds, lengths)