def is_recoverable(inp): if is_str(inp): return True if not is_sequence(inp): return False types = set((type(e) for e in inp)) if len(types) != 1: raise ValueError("All inputs must have the same type") return is_str(inp[0])
def __init__(self, inp, encoding, file_cnt=0, compression=None, dec_remaining=b'', timeout=None, start_byte=0): """ NB: all inputs are supposed to have the same type, encoding, compression TODO: check that for encoding and compression """ #if self._input_stream is not None: # self.close() if is_str(inp) or is_file_like(inp): inp = [inp] if not is_sequence(inp): raise ValueError("Bad input") types = set((type(e) for e in inp)) if len(types) != 1: raise ValueError("All inputs must have the same type") f = inp[0] if not (is_str(f) or is_file_like(f)): raise ValueError("input type not supported") #if compression is not None and start_byte: # raise ValueError("Cannot open a compressed file with a positive offset") if file_cnt >= len(inp): raise ValueError("File counter out of range") self._seq = inp self._file_cnt = file_cnt compression = _infer_compression(self.filepath, compression) offs = 0 if compression else start_byte istream, encoding, compression, size = filepath_to_buffer( self.filepath, encoding=encoding, compression=compression, timeout=timeout, start_byte=offs) self._encoding = encoding self._compression = compression self._input_size = size self._timeout = None # for tests self._decompressor_class = None self._decompressor = None self._dec_remaining = dec_remaining self._dec_offset = 0 #self._compressed_offset = 0 self._stream = istream if self._compression is not None: self._decompressor_class = decompressors[self._compression] self._decompressor = self._decompressor_class() self._read_compressed(start_byte) #seek
def __init__( self, inp: Any, encoding: Optional[str], file_cnt: int = 0, compression: Optional[str] = None, dec_remaining: bytes = b"", timeout: Optional[float] = None, start_byte: int = 0, usecols: Optional[List[str]] = None, ): """ NB: all inputs are supposed to have the same type, encoding, compression TODO: check that for encoding and compression """ # if self._input_stream is not None: # self.close() if is_str(inp) or is_file_like(inp): inp = [inp] if not is_sequence(inp): raise ValueError("Bad input") types = set((type(e) for e in inp)) if len(types) != 1: raise ValueError("All inputs must have the same type") f = inp[0] if not (is_str(f) or is_file_like(f)): raise ValueError("input type not supported") # if compression is not None and start_byte: # raise ValueError("Cannot open a compressed file with a positive offset") if file_cnt >= len(inp): raise ValueError("File counter out of range") self._seq: List[str] = inp self._file_cnt: int = file_cnt self._usecols: Optional[List[str]] = usecols self._encoding: Optional[str] self._compression: Optional[str] self._input_size: int self._timeout: Optional[float] = None self._decompressor_class: Optional[type] = None self._decompressor: Optional[Decompressor] = None self._dec_remaining: bytes self._dec_offset: int self._stream: IOBase
def pprint_thing( thing: Any, _nest_lvl: int = 0, escape_chars: Optional[EscapeChars] = None, default_escapes: bool = False, quote_strings: bool = False, max_seq_items: Optional[int] = None, ) -> str: """ This function is the sanctioned way of converting objects to a string representation and properly handles nested sequences. Parameters ---------- thing : anything to be formatted _nest_lvl : internal use only. pprint_thing() is mutually-recursive with pprint_sequence, this argument is used to keep track of the current nesting level, and limit it. escape_chars : list or dict, optional Characters to escape. If a dict is passed the values are the replacements default_escapes : bool, default False Whether the input escape characters replaces or adds to the defaults max_seq_items : int or None, default None Pass through to other pretty printers to limit sequence printing Returns ------- str """ def as_escaped_string( thing: Any, escape_chars: Optional[EscapeChars] = escape_chars) -> str: translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"} if isinstance(escape_chars, dict): if default_escapes: translate.update(escape_chars) else: translate = escape_chars escape_chars = list(escape_chars.keys()) else: escape_chars = escape_chars or tuple() result = str(thing) for c in escape_chars: result = result.replace(c, translate[c]) return result if hasattr(thing, "__next__"): return str(thing) elif isinstance( thing, dict) and _nest_lvl < get_option("display.pprint_nest_depth"): result = _pprint_dict(thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items) elif is_sequence( thing) and _nest_lvl < get_option("display.pprint_nest_depth"): result = _pprint_seq( thing, _nest_lvl, escape_chars=escape_chars, quote_strings=quote_strings, max_seq_items=max_seq_items, ) elif isinstance(thing, str) and quote_strings: result = f"'{as_escaped_string(thing)}'" else: result = as_escaped_string(thing) return result
def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False, quote_strings=False, max_seq_items=None): """ This function is the sanctioned way of converting objects to a unicode representation. properly handles nested sequences containing unicode strings (unicode(object) does not) Parameters ---------- thing : anything to be formatted _nest_lvl : internal use only. pprint_thing() is mutually-recursive with pprint_sequence, this argument is used to keep track of the current nesting level, and limit it. escape_chars : list or dict, optional Characters to escape. If a dict is passed the values are the replacements default_escapes : bool, default False Whether the input escape characters replaces or adds to the defaults max_seq_items : False, int, default None Pass thru to other pretty printers to limit sequence printing Returns ------- result - unicode object on py2, str on py3. Always Unicode. """ def as_escaped_unicode(thing, escape_chars=escape_chars): # Unicode is fine, else we try to decode using utf-8 and 'replace' # if that's not it either, we have no way of knowing and the user # should deal with it himself. try: result = compat.text_type(thing) # we should try this first except UnicodeDecodeError: # either utf-8 or we replace errors result = str(thing).decode('utf-8', "replace") translate = { '\t': r'\t', '\n': r'\n', '\r': r'\r', } if isinstance(escape_chars, dict): if default_escapes: translate.update(escape_chars) else: translate = escape_chars escape_chars = list(escape_chars.keys()) else: escape_chars = escape_chars or tuple() for c in escape_chars: result = result.replace(c, translate[c]) return compat.text_type(result) if (compat.PY3 and hasattr(thing, '__next__')) or hasattr(thing, 'next'): return compat.text_type(thing) elif (isinstance(thing, dict) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_dict(thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items) elif (is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars, quote_strings=quote_strings, max_seq_items=max_seq_items) elif isinstance(thing, compat.string_types) and quote_strings: if compat.PY3: fmt = "'%s'" else: fmt = "u'%s'" result = fmt % as_escaped_unicode(thing) else: result = as_escaped_unicode(thing) return compat.text_type(result) # always unicode
def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False, quote_strings=False, max_seq_items=None): """ This function is the sanctioned way of converting objects to a unicode representation. properly handles nested sequences containing unicode strings (unicode(object) does not) Parameters ---------- thing : anything to be formatted _nest_lvl : internal use only. pprint_thing() is mutually-recursive with pprint_sequence, this argument is used to keep track of the current nesting level, and limit it. escape_chars : list or dict, optional Characters to escape. If a dict is passed the values are the replacements default_escapes : bool, default False Whether the input escape characters replaces or adds to the defaults max_seq_items : False, int, default None Pass thru to other pretty printers to limit sequence printing Returns ------- result - unicode object on py2, str on py3. Always Unicode. """ def as_escaped_unicode(thing, escape_chars=escape_chars): # Unicode is fine, else we try to decode using utf-8 and 'replace' # if that's not it either, we have no way of knowing and the user # should deal with it himself. try: result = compat.text_type(thing) # we should try this first except UnicodeDecodeError: # either utf-8 or we replace errors result = str(thing).decode('utf-8', "replace") translate = {'\t': r'\t', '\n': r'\n', '\r': r'\r', } if isinstance(escape_chars, dict): if default_escapes: translate.update(escape_chars) else: translate = escape_chars escape_chars = list(escape_chars.keys()) else: escape_chars = escape_chars or tuple() for c in escape_chars: result = result.replace(c, translate[c]) return compat.text_type(result) if (compat.PY3 and hasattr(thing, '__next__')) or hasattr(thing, 'next'): return compat.text_type(thing) elif (isinstance(thing, dict) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_dict(thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items) elif (is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars, quote_strings=quote_strings, max_seq_items=max_seq_items) elif isinstance(thing, compat.string_types) and quote_strings: if compat.PY3: fmt = u("'{thing}'") else: fmt = u("u'{thing}'") result = fmt.format(thing=as_escaped_unicode(thing)) else: result = as_escaped_unicode(thing) return compat.text_type(result) # always unicode
def pprint_thing( thing, _nest_lvl: int = 0, escape_chars: Optional[Union[Dict[str, str], Iterable[str]]] = None, default_escapes: bool = False, quote_strings: bool = False, max_seq_items: Optional[int] = None, ) -> str: """ This function is the sanctioned way of converting objects to a unicode representation. properly handles nested sequences containing unicode strings (unicode(object) does not) Parameters ---------- thing : anything to be formatted _nest_lvl : internal use only. pprint_thing() is mutually-recursive with pprint_sequence, this argument is used to keep track of the current nesting level, and limit it. escape_chars : list or dict, optional Characters to escape. If a dict is passed the values are the replacements default_escapes : bool, default False Whether the input escape characters replaces or adds to the defaults max_seq_items : False, int, default None Pass thru to other pretty printers to limit sequence printing Returns ------- result - unicode str """ def as_escaped_unicode(thing, escape_chars=escape_chars): # Unicode is fine, else we try to decode using utf-8 and 'replace' # if that's not it either, we have no way of knowing and the user # should deal with it himself. try: result = str(thing) # we should try this first except UnicodeDecodeError: # either utf-8 or we replace errors result = str(thing).decode("utf-8", "replace") translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"} if isinstance(escape_chars, dict): if default_escapes: translate.update(escape_chars) else: translate = escape_chars escape_chars = list(escape_chars.keys()) else: escape_chars = escape_chars or tuple() for c in escape_chars: result = result.replace(c, translate[c]) return str(result) if hasattr(thing, "__next__"): return str(thing) elif isinstance(thing, dict) and _nest_lvl < get_option( "display.pprint_nest_depth" ): result = _pprint_dict( thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items ) elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"): result = _pprint_seq( thing, _nest_lvl, escape_chars=escape_chars, quote_strings=quote_strings, max_seq_items=max_seq_items, ) elif isinstance(thing, str) and quote_strings: result = "'{thing}'".format(thing=as_escaped_unicode(thing)) else: result = as_escaped_unicode(thing) return str(result) # always unicode