Esempio n. 1
0
def is_recoverable(inp):
    if is_str(inp):
        return True
    if not is_sequence(inp):
        return False
    types = set((type(e) for e in inp))
    if len(types) != 1:
        raise ValueError("All inputs must have the same type")
    return is_str(inp[0])
Esempio n. 2
0
 def __init__(self,
              inp,
              encoding,
              file_cnt=0,
              compression=None,
              dec_remaining=b'',
              timeout=None,
              start_byte=0):
     """
     NB: all inputs are supposed to have the same type, encoding, compression
     TODO: check that for encoding and compression
     """
     #if self._input_stream is not None:
     #    self.close()
     if is_str(inp) or is_file_like(inp):
         inp = [inp]
     if not is_sequence(inp):
         raise ValueError("Bad input")
     types = set((type(e) for e in inp))
     if len(types) != 1:
         raise ValueError("All inputs must have the same type")
     f = inp[0]
     if not (is_str(f) or is_file_like(f)):
         raise ValueError("input type not supported")
     #if compression is not None and start_byte:
     #    raise ValueError("Cannot open a compressed file with a positive offset")
     if file_cnt >= len(inp):
         raise ValueError("File counter out of range")
     self._seq = inp
     self._file_cnt = file_cnt
     compression = _infer_compression(self.filepath, compression)
     offs = 0 if compression else start_byte
     istream, encoding, compression, size = filepath_to_buffer(
         self.filepath,
         encoding=encoding,
         compression=compression,
         timeout=timeout,
         start_byte=offs)
     self._encoding = encoding
     self._compression = compression
     self._input_size = size
     self._timeout = None  # for tests
     self._decompressor_class = None
     self._decompressor = None
     self._dec_remaining = dec_remaining
     self._dec_offset = 0
     #self._compressed_offset = 0
     self._stream = istream
     if self._compression is not None:
         self._decompressor_class = decompressors[self._compression]
         self._decompressor = self._decompressor_class()
         self._read_compressed(start_byte)  #seek
Esempio n. 3
0
 def __init__(
     self,
     inp: Any,
     encoding: Optional[str],
     file_cnt: int = 0,
     compression: Optional[str] = None,
     dec_remaining: bytes = b"",
     timeout: Optional[float] = None,
     start_byte: int = 0,
     usecols: Optional[List[str]] = None,
 ):
     """
     NB: all inputs are supposed to have the same type, encoding, compression
     TODO: check that for encoding and compression
     """
     # if self._input_stream is not None:
     #    self.close()
     if is_str(inp) or is_file_like(inp):
         inp = [inp]
     if not is_sequence(inp):
         raise ValueError("Bad input")
     types = set((type(e) for e in inp))
     if len(types) != 1:
         raise ValueError("All inputs must have the same type")
     f = inp[0]
     if not (is_str(f) or is_file_like(f)):
         raise ValueError("input type not supported")
     # if compression is not None and start_byte:
     #    raise ValueError("Cannot open a compressed file with a positive offset")
     if file_cnt >= len(inp):
         raise ValueError("File counter out of range")
     self._seq: List[str] = inp
     self._file_cnt: int = file_cnt
     self._usecols: Optional[List[str]] = usecols
     self._encoding: Optional[str]
     self._compression: Optional[str]
     self._input_size: int
     self._timeout: Optional[float] = None
     self._decompressor_class: Optional[type] = None
     self._decompressor: Optional[Decompressor] = None
     self._dec_remaining: bytes
     self._dec_offset: int
     self._stream: IOBase
Esempio n. 4
0
def pprint_thing(
    thing: Any,
    _nest_lvl: int = 0,
    escape_chars: Optional[EscapeChars] = None,
    default_escapes: bool = False,
    quote_strings: bool = False,
    max_seq_items: Optional[int] = None,
) -> str:
    """
    This function is the sanctioned way of converting objects
    to a string representation and properly handles nested sequences.

    Parameters
    ----------
    thing : anything to be formatted
    _nest_lvl : internal use only. pprint_thing() is mutually-recursive
        with pprint_sequence, this argument is used to keep track of the
        current nesting level, and limit it.
    escape_chars : list or dict, optional
        Characters to escape. If a dict is passed the values are the
        replacements
    default_escapes : bool, default False
        Whether the input escape characters replaces or adds to the defaults
    max_seq_items : int or None, default None
        Pass through to other pretty printers to limit sequence printing

    Returns
    -------
    str
    """
    def as_escaped_string(
            thing: Any,
            escape_chars: Optional[EscapeChars] = escape_chars) -> str:
        translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"}
        if isinstance(escape_chars, dict):
            if default_escapes:
                translate.update(escape_chars)
            else:
                translate = escape_chars
            escape_chars = list(escape_chars.keys())
        else:
            escape_chars = escape_chars or tuple()

        result = str(thing)
        for c in escape_chars:
            result = result.replace(c, translate[c])
        return result

    if hasattr(thing, "__next__"):
        return str(thing)
    elif isinstance(
            thing,
            dict) and _nest_lvl < get_option("display.pprint_nest_depth"):
        result = _pprint_dict(thing,
                              _nest_lvl,
                              quote_strings=True,
                              max_seq_items=max_seq_items)
    elif is_sequence(
            thing) and _nest_lvl < get_option("display.pprint_nest_depth"):
        result = _pprint_seq(
            thing,
            _nest_lvl,
            escape_chars=escape_chars,
            quote_strings=quote_strings,
            max_seq_items=max_seq_items,
        )
    elif isinstance(thing, str) and quote_strings:
        result = f"'{as_escaped_string(thing)}'"
    else:
        result = as_escaped_string(thing)

    return result
Esempio n. 5
0
def pprint_thing(thing,
                 _nest_lvl=0,
                 escape_chars=None,
                 default_escapes=False,
                 quote_strings=False,
                 max_seq_items=None):
    """
    This function is the sanctioned way of converting objects
    to a unicode representation.

    properly handles nested sequences containing unicode strings
    (unicode(object) does not)

    Parameters
    ----------
    thing : anything to be formatted
    _nest_lvl : internal use only. pprint_thing() is mutually-recursive
        with pprint_sequence, this argument is used to keep track of the
        current nesting level, and limit it.
    escape_chars : list or dict, optional
        Characters to escape. If a dict is passed the values are the
        replacements
    default_escapes : bool, default False
        Whether the input escape characters replaces or adds to the defaults
    max_seq_items : False, int, default None
        Pass thru to other pretty printers to limit sequence printing

    Returns
    -------
    result - unicode object on py2, str on py3. Always Unicode.

    """
    def as_escaped_unicode(thing, escape_chars=escape_chars):
        # Unicode is fine, else we try to decode using utf-8 and 'replace'
        # if that's not it either, we have no way of knowing and the user
        # should deal with it himself.

        try:
            result = compat.text_type(thing)  # we should try this first
        except UnicodeDecodeError:
            # either utf-8 or we replace errors
            result = str(thing).decode('utf-8', "replace")

        translate = {
            '\t': r'\t',
            '\n': r'\n',
            '\r': r'\r',
        }
        if isinstance(escape_chars, dict):
            if default_escapes:
                translate.update(escape_chars)
            else:
                translate = escape_chars
            escape_chars = list(escape_chars.keys())
        else:
            escape_chars = escape_chars or tuple()
        for c in escape_chars:
            result = result.replace(c, translate[c])

        return compat.text_type(result)

    if (compat.PY3 and hasattr(thing, '__next__')) or hasattr(thing, 'next'):
        return compat.text_type(thing)
    elif (isinstance(thing, dict)
          and _nest_lvl < get_option("display.pprint_nest_depth")):
        result = _pprint_dict(thing,
                              _nest_lvl,
                              quote_strings=True,
                              max_seq_items=max_seq_items)
    elif (is_sequence(thing)
          and _nest_lvl < get_option("display.pprint_nest_depth")):
        result = _pprint_seq(thing,
                             _nest_lvl,
                             escape_chars=escape_chars,
                             quote_strings=quote_strings,
                             max_seq_items=max_seq_items)
    elif isinstance(thing, compat.string_types) and quote_strings:
        if compat.PY3:
            fmt = "'%s'"
        else:
            fmt = "u'%s'"
        result = fmt % as_escaped_unicode(thing)
    else:
        result = as_escaped_unicode(thing)

    return compat.text_type(result)  # always unicode
Esempio n. 6
0
def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False,
                 quote_strings=False, max_seq_items=None):
    """
    This function is the sanctioned way of converting objects
    to a unicode representation.

    properly handles nested sequences containing unicode strings
    (unicode(object) does not)

    Parameters
    ----------
    thing : anything to be formatted
    _nest_lvl : internal use only. pprint_thing() is mutually-recursive
        with pprint_sequence, this argument is used to keep track of the
        current nesting level, and limit it.
    escape_chars : list or dict, optional
        Characters to escape. If a dict is passed the values are the
        replacements
    default_escapes : bool, default False
        Whether the input escape characters replaces or adds to the defaults
    max_seq_items : False, int, default None
        Pass thru to other pretty printers to limit sequence printing

    Returns
    -------
    result - unicode object on py2, str on py3. Always Unicode.

    """

    def as_escaped_unicode(thing, escape_chars=escape_chars):
        # Unicode is fine, else we try to decode using utf-8 and 'replace'
        # if that's not it either, we have no way of knowing and the user
        # should deal with it himself.

        try:
            result = compat.text_type(thing)  # we should try this first
        except UnicodeDecodeError:
            # either utf-8 or we replace errors
            result = str(thing).decode('utf-8', "replace")

        translate = {'\t': r'\t', '\n': r'\n', '\r': r'\r', }
        if isinstance(escape_chars, dict):
            if default_escapes:
                translate.update(escape_chars)
            else:
                translate = escape_chars
            escape_chars = list(escape_chars.keys())
        else:
            escape_chars = escape_chars or tuple()
        for c in escape_chars:
            result = result.replace(c, translate[c])

        return compat.text_type(result)

    if (compat.PY3 and hasattr(thing, '__next__')) or hasattr(thing, 'next'):
        return compat.text_type(thing)
    elif (isinstance(thing, dict) and
          _nest_lvl < get_option("display.pprint_nest_depth")):
        result = _pprint_dict(thing, _nest_lvl, quote_strings=True,
                              max_seq_items=max_seq_items)
    elif (is_sequence(thing) and
          _nest_lvl < get_option("display.pprint_nest_depth")):
        result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars,
                             quote_strings=quote_strings,
                             max_seq_items=max_seq_items)
    elif isinstance(thing, compat.string_types) and quote_strings:
        if compat.PY3:
            fmt = u("'{thing}'")
        else:
            fmt = u("u'{thing}'")
        result = fmt.format(thing=as_escaped_unicode(thing))
    else:
        result = as_escaped_unicode(thing)

    return compat.text_type(result)  # always unicode
Esempio n. 7
0
def pprint_thing(
    thing,
    _nest_lvl: int = 0,
    escape_chars: Optional[Union[Dict[str, str], Iterable[str]]] = None,
    default_escapes: bool = False,
    quote_strings: bool = False,
    max_seq_items: Optional[int] = None,
) -> str:
    """
    This function is the sanctioned way of converting objects
    to a unicode representation.

    properly handles nested sequences containing unicode strings
    (unicode(object) does not)

    Parameters
    ----------
    thing : anything to be formatted
    _nest_lvl : internal use only. pprint_thing() is mutually-recursive
        with pprint_sequence, this argument is used to keep track of the
        current nesting level, and limit it.
    escape_chars : list or dict, optional
        Characters to escape. If a dict is passed the values are the
        replacements
    default_escapes : bool, default False
        Whether the input escape characters replaces or adds to the defaults
    max_seq_items : False, int, default None
        Pass thru to other pretty printers to limit sequence printing

    Returns
    -------
    result - unicode str

    """

    def as_escaped_unicode(thing, escape_chars=escape_chars):
        # Unicode is fine, else we try to decode using utf-8 and 'replace'
        # if that's not it either, we have no way of knowing and the user
        # should deal with it himself.

        try:
            result = str(thing)  # we should try this first
        except UnicodeDecodeError:
            # either utf-8 or we replace errors
            result = str(thing).decode("utf-8", "replace")

        translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"}
        if isinstance(escape_chars, dict):
            if default_escapes:
                translate.update(escape_chars)
            else:
                translate = escape_chars
            escape_chars = list(escape_chars.keys())
        else:
            escape_chars = escape_chars or tuple()
        for c in escape_chars:
            result = result.replace(c, translate[c])

        return str(result)

    if hasattr(thing, "__next__"):
        return str(thing)
    elif isinstance(thing, dict) and _nest_lvl < get_option(
        "display.pprint_nest_depth"
    ):
        result = _pprint_dict(
            thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items
        )
    elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"):
        result = _pprint_seq(
            thing,
            _nest_lvl,
            escape_chars=escape_chars,
            quote_strings=quote_strings,
            max_seq_items=max_seq_items,
        )
    elif isinstance(thing, str) and quote_strings:
        result = "'{thing}'".format(thing=as_escaped_unicode(thing))
    else:
        result = as_escaped_unicode(thing)

    return str(result)  # always unicode