def test_unicode_string_with_unicode(self): idx = Index(lrange(1000)) if PY3: str(idx) else: compat.text_type(idx)
def test_unicode_string_with_unicode(self): df = DataFrame({'A': [u("\u05d0")]}) if compat.PY3: str(df) else: compat.text_type(df)
def as_escaped_unicode(thing, escape_chars=escape_chars): # Unicode is fine, else we try to decode using utf-8 and 'replace' # if that's not it either, we have no way of knowing and the user # should deal with it himself. try: result = compat.text_type(thing) # we should try this first except UnicodeDecodeError: # either utf-8 or we replace errors result = str(thing).decode('utf-8', "replace") translate = { '\t': r'\t', '\n': r'\n', '\r': r'\r', } if isinstance(escape_chars, dict): if default_escapes: translate.update(escape_chars) else: translate = escape_chars escape_chars = list(escape_chars.keys()) else: escape_chars = escape_chars or tuple() for c in escape_chars: result = result.replace(c, translate[c]) return compat.text_type(result)
def test_unicode_string_with_unicode(): d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} idx = pd.DataFrame(d).set_index(["a", "b"]).index if PY3: str(idx) else: compat.text_type(idx)
def _get_column_names_and_types(self, dtype_mapper): column_names_and_types = [] if self.index is not None: for i, idx_label in enumerate(self.index): idx_type = dtype_mapper( self.frame.index._get_level_values(i)) column_names_and_types.append((text_type(idx_label), idx_type, True)) column_names_and_types += [ (text_type(c), dtype_mapper(self.frame[c]), False) for c in self.frame.columns ] return column_names_and_types
def _join_unicode(lines, sep=''): try: return sep.join(lines) except UnicodeDecodeError: sep = compat.text_type(sep) return sep.join([x.decode('utf-8') if isinstance(x, str) else x for x in lines])
def _stringify_path(filepath_or_buffer): """Attempt to convert a path-like object to a string. Parameters ---------- filepath_or_buffer : object to be converted Returns ------- str_filepath_or_buffer : maybe a string version of the object Notes ----- Objects supporting the fspath protocol (python 3.6+) are coerced according to its __fspath__ method. For backwards compatibility with older pythons, pathlib.Path and py.path objects are specially coerced. Any other object is passed through unchanged, which includes bytes, strings, buffers, or anything else that's not even path-like. """ if hasattr(filepath_or_buffer, '__fspath__'): return filepath_or_buffer.__fspath__() if _PATHLIB_INSTALLED and isinstance(filepath_or_buffer, pathlib.Path): return text_type(filepath_or_buffer) if _PY_PATH_INSTALLED and isinstance(filepath_or_buffer, LocalPath): return filepath_or_buffer.strpath return filepath_or_buffer
def __init__(self, name, env, side=None, encoding=None): self._name = name self.env = env self.side = side self.local = _TAG_RE.search(text_type(name)) is not None self._value = self._resolve_name() self.encoding = encoding
def _join_unicode(lines, sep=''): try: return sep.join(lines) except UnicodeDecodeError: sep = compat.text_type(sep) return sep.join( [x.decode('utf-8') if isinstance(x, str) else x for x in lines])
def __init__(self, name, env, side=None, encoding=None): self._name = name self.env = env self.side = side self.is_local = text_type(name).startswith(_LOCAL_TAG) self._value = self._resolve_name() self.encoding = encoding
def _get_repr(self, name=False, length=True, na_rep='NaN', footer=True): formatter = fmt.CategoricalFormatter(self, name=name, length=length, na_rep=na_rep, footer=footer) result = formatter.to_string() return compat.text_type(result)
def get_commit_info(c, fmt, sep='\t'): r = sh.git('log', "--format={}".format(fmt), '{}^..{}'.format(c, c), "-n", "1", _tty_out=False) return text_type(r).split(sep)
def __init__(self, name, env, side=None, encoding=None): self._name = name self.env = env self.side = side tname = text_type(name) self.is_local = (tname.startswith(_LOCAL_TAG) or tname in _DEFAULT_GLOBALS) self._value = self._resolve_name() self.encoding = encoding
def test_read_csv(self): if not compat.PY3: if compat.is_platform_windows(): prefix = u("file:///") else: prefix = u("file://") fname = prefix + compat.text_type(self.csv1) self.read_csv(fname, index_col=0, parse_dates=True)
def _tidy_repr(self, max_vals=20): num = max_vals // 2 head = self[:num]._get_repr(length=False, name=False, footer=False) tail = self[-(max_vals - num) :]._get_repr(length=False, name=False, footer=False) result = "%s\n...\n%s" % (head, tail) # TODO: tidy_repr for footer since there may be a ton of levels? result = "%s\n%s" % (result, self._repr_footer()) return compat.text_type(result)
def test_repr_roundtrip(self): ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) str(ci) tm.assert_index_equal(eval(repr(ci)), ci, exact=True) # formatting if PY3: str(ci) else: compat.text_type(ci) # long format # this is not reprable ci = CategoricalIndex(np.random.randint(0, 5, size=100)) if PY3: str(ci) else: compat.text_type(ci)
def _tidy_repr(self, max_vals=10): num = max_vals // 2 head = self[:num]._get_repr(length=False, name=False, footer=False) tail = self[-(max_vals - num):]._get_repr(length=False, name=False, footer=False) result = '%s, ..., %s' % (head[:-1], tail[1:]) result = '%s\n%s' % (result, self._repr_footer()) return compat.text_type(result)
def _tidy_repr(self, max_vals=20): num = max_vals // 2 head = self[:num]._get_repr(length=False, name=False, footer=False) tail = self[-(max_vals - num):]._get_repr(length=False, name=False, footer=False) result = '%s\n...\n%s' % (head, tail) result = '%s\n%s' % (result, self._repr_footer()) return compat.text_type(result)
def test_repr_roundtrip(): mi = MultiIndex.from_product([list('ab'), range(3)], names=['first', 'second']) str(mi) if PY3: tm.assert_index_equal(eval(repr(mi)), mi, exact=True) else: result = eval(repr(mi)) # string coerces to unicode tm.assert_index_equal(result, mi, exact=False) assert mi.get_level_values('first').inferred_type == 'string' assert result.get_level_values('first').inferred_type == 'unicode' mi_u = MultiIndex.from_product([list(u'ab'), range(3)], names=['first', 'second']) result = eval(repr(mi_u)) tm.assert_index_equal(result, mi_u, exact=True) # formatting if PY3: str(mi) else: compat.text_type(mi) # long format mi = MultiIndex.from_product([list('abcdefg'), range(10)], names=['first', 'second']) if PY3: tm.assert_index_equal(eval(repr(mi)), mi, exact=True) else: result = eval(repr(mi)) # string coerces to unicode tm.assert_index_equal(result, mi, exact=False) assert mi.get_level_values('first').inferred_type == 'string' assert result.get_level_values('first').inferred_type == 'unicode' result = eval(repr(mi_u)) tm.assert_index_equal(result, mi_u, exact=True)
def test_repr_roundtrip(): mi = MultiIndex.from_product([list('ab'), range(3)], names=['first', 'second']) str(mi) if PY3: tm.assert_index_equal(eval(repr(mi)), mi, exact=True) else: result = eval(repr(mi)) # string coerces to unicode tm.assert_index_equal(result, mi, exact=False) assert mi.get_level_values('first').inferred_type == 'string' assert result.get_level_values('first').inferred_type == 'unicode' mi_u = MultiIndex.from_product( [list(u'ab'), range(3)], names=['first', 'second']) result = eval(repr(mi_u)) tm.assert_index_equal(result, mi_u, exact=True) # formatting if PY3: str(mi) else: compat.text_type(mi) # long format mi = MultiIndex.from_product([list('abcdefg'), range(10)], names=['first', 'second']) if PY3: tm.assert_index_equal(eval(repr(mi)), mi, exact=True) else: result = eval(repr(mi)) # string coerces to unicode tm.assert_index_equal(result, mi, exact=False) assert mi.get_level_values('first').inferred_type == 'string' assert result.get_level_values('first').inferred_type == 'unicode' result = eval(repr(mi_u)) tm.assert_index_equal(result, mi_u, exact=True)
def _tidy_repr(self, max_vals=20): num = max_vals // 2 head = self[:num]._get_repr(length=False, name=False, footer=False) tail = self[-(max_vals - num):]._get_repr(length=False, name=False, footer=False) result = '%s\n...\n%s' % (head, tail) # TODO: tidy_repr for footer since there may be a ton of levels? result = '%s\n%s' % (result, self._repr_footer()) return compat.text_type(result)
def test_repr_binary_type(): import string letters = string.ascii_letters btype = compat.binary_type try: raw = btype(letters, encoding=cf.get_option('display.encoding')) except TypeError: raw = btype(letters) b = compat.text_type(compat.bytes_to_str(raw)) res = printing.pprint_thing(b, quote_strings=True) assert res == repr(b) res = printing.pprint_thing(b, quote_strings=False) assert res == b
def test_repr_binary_type(): import string letters = string.ascii_letters btype = compat.binary_type try: raw = btype(letters, encoding=cf.get_option('display.encoding')) except TypeError: raw = btype(letters) b = compat.text_type(compat.bytes_to_str(raw)) res = com.pprint_thing(b, quote_strings=True) assert_equal(res, repr(b)) res = com.pprint_thing(b, quote_strings=False) assert_equal(res, b)
def as_escaped_unicode(thing, escape_chars=escape_chars): # Unicode is fine, else we try to decode using utf-8 and 'replace' # if that's not it either, we have no way of knowing and the user # should deal with it himself. try: result = compat.text_type(thing) # we should try this first except UnicodeDecodeError: # either utf-8 or we replace errors result = str(thing).decode('utf-8', "replace") translate = {'\t': r'\t', '\n': r'\n', '\r': r'\r', } if isinstance(escape_chars, dict): if default_escapes: translate.update(escape_chars) else: translate = escape_chars escape_chars = list(escape_chars.keys()) else: escape_chars = escape_chars or tuple() for c in escape_chars: result = result.replace(c, translate[c]) return compat.text_type(result)
def _stringify_path(filepath_or_buffer): """Return the argument coerced to a string if it was a pathlib.Path or a py.path.local Parameters ---------- filepath_or_buffer : object to be converted Returns ------- str_filepath_or_buffer : a the string version of the input path """ if _PATHLIB_INSTALLED and isinstance(filepath_or_buffer, pathlib.Path): return text_type(filepath_or_buffer) if _PY_PATH_INSTALLED and isinstance(filepath_or_buffer, LocalPath): return filepath_or_buffer.strpath return filepath_or_buffer
def _evaluate(self): import numexpr as ne # convert the expression to a valid numexpr expression s = self.convert() try: env = self.expr.env scope = env.full_scope truediv = scope['truediv'] _check_ne_builtin_clash(self.expr) return ne.evaluate(s, local_dict=scope, truediv=truediv) except KeyError as e: # python 3 compat kludge try: msg = e.message except AttributeError: msg = compat.text_type(e) raise UndefinedVariableError(msg)
def _evaluate(self): import numexpr as ne # add the resolvers to locals self.expr.add_resolvers_to_locals() # convert the expression to a valid numexpr expression s = self.convert() try: return ne.evaluate(s, local_dict=self.expr.env.locals, global_dict=self.expr.env.globals, truediv=self.expr.truediv) except KeyError as e: # python 3 compat kludge try: msg = e.message except AttributeError: msg = compat.text_type(e) raise UndefinedVariableError(msg)
def __next__(self): row = next(self.reader) return [compat.text_type(s, "utf-8") for s in row]
def __unicode__(self): return compat.text_type(self.name)
def paste_qt(): cb = app.clipboard() return text_type(cb.text())
def test_array_repr_unicode(self, data): result = compat.text_type(data) assert isinstance(result, compat.text_type)
def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False, quote_strings=False, max_seq_items=None): """ This function is the sanctioned way of converting objects to a unicode representation. properly handles nested sequences containing unicode strings (unicode(object) does not) Parameters ---------- thing : anything to be formatted _nest_lvl : internal use only. pprint_thing() is mutually-recursive with pprint_sequence, this argument is used to keep track of the current nesting level, and limit it. escape_chars : list or dict, optional Characters to escape. If a dict is passed the values are the replacements default_escapes : bool, default False Whether the input escape characters replaces or adds to the defaults max_seq_items : False, int, default None Pass thru to other pretty printers to limit sequence printing Returns ------- result - unicode object on py2, str on py3. Always Unicode. """ def as_escaped_unicode(thing, escape_chars=escape_chars): # Unicode is fine, else we try to decode using utf-8 and 'replace' # if that's not it either, we have no way of knowing and the user # should deal with it himself. try: result = compat.text_type(thing) # we should try this first except UnicodeDecodeError: # either utf-8 or we replace errors result = str(thing).decode('utf-8', "replace") translate = {'\t': r'\t', '\n': r'\n', '\r': r'\r', } if isinstance(escape_chars, dict): if default_escapes: translate.update(escape_chars) else: translate = escape_chars escape_chars = list(escape_chars.keys()) else: escape_chars = escape_chars or tuple() for c in escape_chars: result = result.replace(c, translate[c]) return compat.text_type(result) if (compat.PY3 and hasattr(thing, '__next__')) or hasattr(thing, 'next'): return compat.text_type(thing) elif (isinstance(thing, dict) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_dict(thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items) elif (is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars, quote_strings=quote_strings, max_seq_items=max_seq_items) elif isinstance(thing, compat.string_types) and quote_strings: if compat.PY3: fmt = u("'{thing}'") else: fmt = u("u'{thing}'") result = fmt.format(thing=as_escaped_unicode(thing)) else: result = as_escaped_unicode(thing) return compat.text_type(result) # always unicode
def test_unicode_string_with_unicode(self): df = Series([u("\u05d0")], name=u("\u05d1")) if compat.PY3: str(df) else: compat.text_type(df)
def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False, quote_strings=False, max_seq_items=None): """ This function is the sanctioned way of converting objects to a unicode representation. properly handles nested sequences containing unicode strings (unicode(object) does not) Parameters ---------- thing : anything to be formatted _nest_lvl : internal use only. pprint_thing() is mutually-recursive with pprint_sequence, this argument is used to keep track of the current nesting level, and limit it. escape_chars : list or dict, optional Characters to escape. If a dict is passed the values are the replacements default_escapes : bool, default False Whether the input escape characters replaces or adds to the defaults max_seq_items : False, int, default None Pass thru to other pretty printers to limit sequence printing Returns ------- result - unicode object on py2, str on py3. Always Unicode. """ def as_escaped_unicode(thing, escape_chars=escape_chars): # Unicode is fine, else we try to decode using utf-8 and 'replace' # if that's not it either, we have no way of knowing and the user # should deal with it himself. try: result = compat.text_type(thing) # we should try this first except UnicodeDecodeError: # either utf-8 or we replace errors result = str(thing).decode('utf-8', "replace") translate = { '\t': r'\t', '\n': r'\n', '\r': r'\r', } if isinstance(escape_chars, dict): if default_escapes: translate.update(escape_chars) else: translate = escape_chars escape_chars = list(escape_chars.keys()) else: escape_chars = escape_chars or tuple() for c in escape_chars: result = result.replace(c, translate[c]) return compat.text_type(result) if (compat.PY3 and hasattr(thing, '__next__')) or hasattr(thing, 'next'): return compat.text_type(thing) elif (isinstance(thing, dict) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_dict(thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items) elif (is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars, quote_strings=quote_strings, max_seq_items=max_seq_items) elif isinstance(thing, compat.string_types) and quote_strings: if compat.PY3: fmt = "'%s'" else: fmt = "u'%s'" result = fmt % as_escaped_unicode(thing) else: result = as_escaped_unicode(thing) return compat.text_type(result) # always unicode