Python IOBase.read Exemples, io.IOBase.read Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : parser.py Projet : gitter-badger/jam-1

def parseFile(source:IOBase, logger=logging.getLogger()):
    try:
        return Parser(Lexer(source), logger).parseModule(False)
    except CompilerError as e:
        source.seek(0)
        e.format(source.read())
        raise e

Exemple #2

0

Afficher le fichier

Fichier : parsing.py Projet : nathanielksmith/prosaic

def peek(stream: IOBase, chunk_size: int) -> str:
    if hasattr(stream, 'peek'):
        return stream.peek(chunk_size)
    else:
        current_pos = stream.tell()
        result = stream.read(chunk_size)
        stream.seek(current_pos)
        return result

Exemple #3

0

Afficher le fichier

    def __new__(cls, buffer: IOBase = None, file_header: XTFFileHeader = None, *args, **kwargs):
        obj = super().__new__(cls, buffer=buffer, file_header=file_header, *args, **kwargs)

        if buffer:
            # TODO: Make getters/setters that updates StringSize when changed
            obj.RawAsciiData = buffer.read(ctypes.sizeof(ctypes.c_char) * obj.StringSize.value)
        else:
            obj.RawAsciiData = b''

Exemple #4

0

Afficher le fichier

def unreadpack(fs: io.IOBase, f: str) -> Tuple:
    s = struct.calcsize(f)
    buf = fs.read(s)
    if buf == b'':
        raise EOFError
    if len(buf) != s:
        raise Exception('not enough data')
    return struct.unpack(f, buf)

Exemple #5

0

Afficher le fichier

Fichier : xtf_ctypes.py Projet : hanhaixingxing/pyxtf

    def create_from_buffer(cls,
                           buffer: IOBase,
                           file_header: XTFFileHeader = None):
        obj = super().create_from_buffer(buffer)

        n_bytes = obj.NumBytesThisRecord - ctypes.sizeof(cls)
        obj.data = buffer.read(n_bytes)

        return obj

Exemple #6

0

Afficher le fichier

Fichier : xtf_ctypes.py Projet : hanhaixingxing/pyxtf

    def create_from_buffer(cls,
                           buffer: IOBase,
                           file_header: XTFFileHeader = None):
        obj = super().create_from_buffer(buffer)
        # TODO: Make getters/setters that updates StringSize when changed
        obj.RawAsciiData = buffer.read(
            ctypes.sizeof(ctypes.c_char) * obj.StringSize.value)

        return obj

Exemple #7

0

Afficher le fichier

Fichier : util.py Projet : verschmelzen/dfs

def deserialize(
    stream: IOBase,
    content_len: int,
    remote_ip: str,
) -> Any:
    """
    Deserialize stream using information from server.

    Generic deserialization for following formats:

        1. single utf-8 string without whitespaces
        2. utf-8 string and raw byte data separated by first zero byte
        3. two utf-8 string separated by first space in stream
        4. utf-8 string and flag represented by single '!' and separated
           by space

    Parameters
    ----------
    stream : IOBase
        Stream of request body.
    content_len : int
        Length of request body.
    remote_ip : str
        IP address of client.

    Returns
    -------
    Any:
        One of the options described above
    """
    it = iter(stream.read(content_len))
    has_blob = False
    path = b''
    for b in it:
        b = bytes([b])
        if b == b' ':
            break
        if b == b'\0':
            has_blob = True
            break
        path += b
    else:
        return (path.decode('utf-8'), ) if path else ()
    path = path.decode('utf-8')

    if has_blob:
        return path, bytes(it)

    b = bytes([next(it)])
    if b == b'!':
        nb = bytes([nnext(it)])
        if not nb:
            return path, True
        b += nb
    b += bytes(it)
    return path, b.decode('utf-8')

Exemple #8

0

Afficher le fichier

Fichier : mzml.py Projet : mobiusklein/ms_deisotope

def checksum_mzml_stream(stream: io.IOBase) -> Tuple[bytes, bytes]:
    """Calculate the SHA1 checksum of an indexed mzML file for the purposes
    of validating the checksum at the end of the file.

    Parameters
    ----------
    stream : file-like
        A file-like object supporting a `read` method.

    Returns
    -------
    calculated_checksum: bytes
        The checksum calculated from the file's contents up to the first occurrence
        of <fileChecksum>, exclusive.
    obsesrved_checksum: bytes or :const:`None`
        The checksum written in the file within the <fileChecksum> tag. Will be :const:`None`
        if the tag is not found and closed. Expected to match the calculated checksum.
    """
    hasher = hashlib.sha1()
    target = b"<fileChecksum>"
    target_pattern = re.compile(b"(" + target + b")")
    extract_checksum = re.compile(br"<fileChecksum>\s*(\S+)\s*</fileChecksum>")
    block_size = int(2 ** 12)
    chunk = stream.read(block_size)
    hit_target = False
    observed_checksum = None
    while chunk:
        tokens = target_pattern.split(chunk)
        for token in tokens:
            hasher.update(token)
            if token == target:
                hit_target = True
                chunk += stream.read(5000)
                observed_checksum = extract_checksum.findall(chunk)
                if observed_checksum:
                    observed_checksum = observed_checksum[0]
                else:
                    observed_checksum = None
                break
        if hit_target:
            break
        chunk = stream.read(block_size)
    return hasher.hexdigest(), observed_checksum

Exemple #9

0

Afficher le fichier

def load(source: IOBase) -> Sequence[str]:
    """Loads translatable strings from a Javascript file.

    This function finds all occurrences of the translation functions ``_`` and
    ``_N``, extracts its first argument and then performs ``eval`` on that
    expression.
    """
    data = source.read()

    for m in TRANS_RE.finditer(data):
        yield eval(compile('({})'.format(m.group(1)), '__code__', 'eval'))

Exemple #10

0

Afficher le fichier

Fichier : mediaman.py Projet : Cysion/Secproject1

def get_sha1(obj: IOBase) -> str:
    """A function to get sha1 in a memory efficient way. returns hexdigest of obj
    obj = io object to digest"""
    hashhold = hashlib.sha1()
    try:
        for chunk in iter(lambda: obj.read(4096), b""):
            hashhold.update(chunk)
        obj.seek(0)
    except AttributeError:
        hashhold.update(obj)
    return hashhold.hexdigest()

Exemple #11

0

Afficher le fichier

Fichier : __init__.py Projet : gitter-badger/jam-1

def verify(module:Module, builtin:Module, logger = logging.getLogger(), source:IOBase = None):
    # Set up the initial state before verifying
    State.init(builtin, logger.getChild("lekvar"))

    State.logger.info(module.context)

    try:
        module.verify()
    except CompilerError as e:
        if source is not None:
            source.seek(0)
            e.format(source.read())
        raise e

Exemple #12

0

Afficher le fichier

Fichier : boot_resources.py Projet : alburnum/alburnum-maas-client

def calc_size_and_sha265(content: io.IOBase, chunk_size: int):
    """Calculates the size and the sha2566 value of the content."""
    size = 0
    sha256 = hashlib.sha256()
    content.seek(0, io.SEEK_SET)
    while True:
        buf = content.read(chunk_size)
        length = len(buf)
        size += length
        sha256.update(buf)
        if length != chunk_size:
            break
    return size, sha256.hexdigest()

Exemple #13

0

Afficher le fichier

def calc_size_and_sha265(content: io.IOBase, chunk_size: int):
    """Calculates the size and the sha2566 value of the content."""
    size = 0
    sha256 = hashlib.sha256()
    content.seek(0, io.SEEK_SET)
    while True:
        buf = content.read(chunk_size)
        length = len(buf)
        size += length
        sha256.update(buf)
        if length != chunk_size:
            break
    return size, sha256.hexdigest()

Exemple #14

0

Afficher le fichier

def calc_hash(src: io.IOBase, m: hashlib._hashlib.HASH) -> str:
    """calc hash of the io-src and specified hash mode"""
    if src is None:
        raise Exception("Invalid src for hash calc")
    if m is None:
        raise Exception("Invalid hash m for hash calc")
    while True:
        b = src.read(4096)
        if not b:
            break
        m.update(b)
    res = m.hexdigest()
    return res

Exemple #15

0

Afficher le fichier

Fichier : text.py Projet : eaugeas/hilo-tfx

def deserialize(
    stream: IOBase,
    message: Union[Type[Message], Message],
    max_size: int = MB,
) -> Message:
    if isinstance(message, Type):
        message: Message = message()

    contents = stream.read(max_size + 1)
    if len(contents) > max_size:
        raise BufferError('Message content length is greater than max_size')

    text_format.Parse(contents, message)
    return message

Exemple #16

0

Afficher le fichier

    def __new__(cls, buffer: IOBase = None):
        if buffer:
            if type(buffer) in [bytes, bytearray]:
                buffer = BytesIO(buffer)

            header_bytes = buffer.read(ctypes.sizeof(cls))
            if not header_bytes:
                raise RuntimeError('XTF file shorter than expected (end hit while reading {})'.format(cls.__name__))

            obj = cls.from_buffer_copy(header_bytes)
        else:
            obj = super().__new__(cls)

        return obj

Exemple #17

0

Afficher le fichier

Fichier : kongsberg.py Projet : oysstu/pyxtf

    def create_from_buffer(cls, buffer: IOBase, file_header=None):
        if type(buffer) in [bytes, bytearray]:
            buffer = BytesIO(buffer)

            # Read bytes up until the variable-sized data
        base_bytes = buffer.read(cls.TX.offset)
        n_bytes = ctypes.c_uint32.from_buffer_copy(base_bytes, cls.NumberOfBytes.offset).value
        n_tx = ctypes.c_uint16.from_buffer_copy(base_bytes, cls.Ntx.offset).value
        n_rx = ctypes.c_uint16.from_buffer_copy(base_bytes, cls.Nrx.offset).value

        # Read remaining bytes
        remaining_bytes = buffer.read(n_bytes - cls.TX.offset + cls.NumberOfBytes.size)

        # Create new class dynamically with string array at the correct size
        new_name = cls.__name__ + '_ntx{}_nrx{}'.format(n_tx, n_rx)
        new_fields = cls._fields_.copy()
        tx_idx = [i for i, (name, fieldtype) in enumerate(cls._fields_) if name == 'TX'][0]
        rx_idx = [i for i, (name, fieldtype) in enumerate(cls._fields_) if name == 'RX'][0]
        new_fields[tx_idx] = ('TX', KMRawRangeAngle78_TX * n_tx)
        new_fields[rx_idx] = ('RX', KMRawRangeAngle78_RX * n_rx)
        new_cls = type(new_name, (ctypes.LittleEndianStructure,), {
            '__str__': cls.__str__,
            '_pack_': cls._pack_,
            '_fields_': new_fields
        })

        all_bytes = base_bytes + remaining_bytes
        obj = new_cls.from_buffer_copy(all_bytes)

        # Checksum (not crc16, but a straight sum of bytes with overflow)
        chk = (sum(all_bytes[new_cls.DatagramType.offset:new_cls.EndID.offset]) & 0xFFFF)
        if chk != obj.Checksum:
            warning_str = '{}: Checksum failed'.format(cls.__name__)
            warnings.warn(warning_str)

        return obj

Exemple #18

0

Afficher le fichier

def get_md5_from_stream(src: io.IOBase) -> str:
    """calculate md5 of src stream. The stream could been 
    from a file(mode='rb')/network-stream/stringio or any other readable
    object in BINARY stream. This method will NOT close the stream! 
    Return the MD5 hex digest number."""
    if not isinstance(src, io.IOBase) or not src.readable():
        raise Exception("src is not stream or unreadable")
    m: hashlib._hashlib.HASH = hashlib.md5()
    while True:
        b = src.read(4096)
        if not b:
            break
        m.update(b)

    res = m.hexdigest()
    return res

Exemple #19

0

Afficher le fichier

Fichier : io.py Projet : interlockledger/pyiltags

def read_bytes(n: int, reader: io.IOBase) -> bytes:
    """
    Reads the specified number of bytes from the reader. It raises an 
    `EOFError` if the specified number of bytes is not available.

    Parameters:
    - `n`: The number of bytes to read;
    - `reader`: The reader;

    Returns the bytes read.
    """
    buff = reader.read(n)
    if not isinstance(buff, bytes):
        raise ValueError('The reader is expected to return bytes.')
    if len(buff) != n:
        raise EOFError(f'Unable to read {n} bytes from the stream.')
    return buff

Exemple #20

0

Afficher le fichier

Fichier : xtf_ctypes.py Projet : hanhaixingxing/pyxtf

    def create_from_buffer(cls, buffer: IOBase, file_header=None):
        """
        Initializes the XTF structure by copying from the target buffer.
        Note: not to be confused with .from_buffer and .from_buffer_copy which are the direct ctypes functions
        :param buffer: Input bytes
        :param file_header: XTFFileHeader, only necessary for XTFPingHeader
        :return:
        """
        if type(buffer) in [bytes, bytearray]:
            buffer = BytesIO(buffer)

        header_bytes = buffer.read(ctypes.sizeof(cls))
        if not header_bytes:
            raise RuntimeError(
                'XTF file shorter than expected (end hit while reading {})'.
                format(cls.__name__))

        return cls.from_buffer_copy(header_bytes)

Exemple #21

0

Afficher le fichier

    def parse(self,
              code: IOBase,
              fname='<string>',
              name='_pysh_func') -> ast.AST:
        # Get an AST from the input code
        node = ast.parse(code.read())

        # Now wrap the script in a function so we can get a reference to it.
        wrapper = ast.parse('def {}(): pass'.format(name))
        wrapper.body[0].body = node.body

        for parser in self.parsers:
            wrapper = _apply_transform(parser, wrapper, fname=fname)

        # Ensure locations are ok before compiling
        ast.fix_missing_locations(wrapper)

        return wrapper

Exemple #22

0

Afficher le fichier

Fichier : boot_resources.py Projet : allenap/python-libmaas

 def _upload_chunks(cls,
                    rfile: BootResourceFile,
                    content: io.IOBase,
                    chunk_size: int,
                    progress_callback=None):
     """Upload the `content` to `rfile` in chunks using `chunk_size`."""
     content.seek(0, io.SEEK_SET)
     upload_uri = urlparse(cls._handler.uri)._replace(
         path=rfile._data['upload_uri']).geturl()
     uploaded_size = 0
     while True:
         buf = content.read(chunk_size)
         length = len(buf)
         if length > 0:
             uploaded_size += length
             cls._put_chunk(upload_uri, buf)
             if progress_callback is not None:
                 progress_callback(uploaded_size / rfile.size)
         if length != chunk_size:
             break

Exemple #23

0

Afficher le fichier

def readfile(stream: IOBase,
             writer: csv.DictWriter,
             formver: List[int],
             maxcount: int = 20):
    count = 0
    last = 0
    offset = 8
    last_end = 8
    while True:
        stream.seek(offset)
        buf = stream.read(4)
        if buf == b'' or len(buf) < 4:
            logger.info('no more data.')
            break
        # read data as if offset
        data = struct.unpack('<I', buf)

        if data[0] == offset:  # yes, we have an equal
            stream.seek(offset)  # go back a bit
            fr = Frame.read(stream, formver)
            told = stream.tell()
            dct = fr.to_dict(formver[0])
            dct['start'] = offset
            dct['end'] = told
            dct['offby'] = offset - last_end
            dct['size'] = offset - last
            dct['asdf'] = [fr.channel, f'({fr.flags}) {fr.flags:016b}']
            writer.writerow(dct)
            # print(
            #     'match at', offset, 'now', dct['now'], 'size', offset - last, 'asd', now-offset-fr.headersize,
            #     fr.to_dict(format=3, fields=['offset', 'index', 'latitude', 'packetsize', 'headersize'])
            # )
            last_end = told
            last = offset
            count += 1

        offset += 1
        if count >= maxcount:
            break
    return count

Exemple #24

0

Afficher le fichier

Fichier : message.py Projet : dmgolembiowski/sremail

    def attach_stream(self, stream: IOBase, file_name: str) -> Message:
        """Read a stream into an attachment and attach to this message.

        This method returns the object, so
        you can chain it like::
            msg.attach(file_handle, "test.txt").attach(byte_stream, "test.bin")

        Args:
            stream (IOBase): The stream to read from.
            file_name (str): The name of the file, used for MIME type identification.

        Returns:
            Message: this Message, for chaining.
        """
        mime_type = mimetypes.guess_type(file_name)[0]

        # it's possible we get a file that doesn't have a mime type, like a
        # Linux executable, or a mach-o file - in that case just set it
        # to octet-stream as a generic stream of bytes
        if mime_type is None:
            main_type, sub_type = ("application", "octet-stream")
        else:
            main_type, sub_type = mime_type.split("/")
        attachment = MIMEPart()

        # we need special handling for set_content with datatype of str, as
        # for some reason this method doesn't like 'maintype'
        # see: https://docs.python.org/3/library/
        # email.contentmanager.html#email.contentmanager.set_content
        content_args = {"subtype": sub_type}
        if main_type != "text":
            content_args["maintype"] = main_type
        file_name = path.basename(file_name)
        attachment.set_content(stream.read(),
                               filename=file_name,
                               disposition="attachment",
                               **content_args)
        self.attachments.append(attachment)
        return self

Exemple #25

0

Afficher le fichier

Fichier : __init__.py Projet : greats3an/pywebhost

def streamcopy(from_: IOBase, to_: IOBase, size=-1, chunk_size=163840):
    '''Copies content from one buffer to the other,chunk by chunk

    NOTE: Nor from_ or to_ has to be IOBase objects,it's doable as long as they have read() / write() calls
    
    Args:
        from_ (IOBase): Stream to copy from
        to_ (IOBase): Stream to copy to
        size (int, optional): Length to be copied. Defaults to -1.
        chunk_size (int, optional): Size of chunk. Defaults to 163840.

    Returns:
        int : copied length
    '''
    if not size: return 0
    size, copied = int(size), 0
    if size < 0:
        # read until EOF
        def copychunk():
            chunk = from_.read(chunk_size)
            if not chunk: return 0
            to_.write(chunk)
            return len(chunk)

        while (True):
            copied_ = copychunk()
            if not copied_: break
            copied += copied_
    else:
        # read `size` of bytes
        for offset in range(0, size, chunk_size):
            remaining = size - offset
            chunk = from_.read(
                remaining if remaining < chunk_size else chunk_size)
            if not chunk: break
            copied += len(chunk)
            to_.write(chunk)
    return copied

Exemple #26

0

Afficher le fichier

Fichier : html.py Projet : moses-palmer/allo

def load(source: IOBase) -> Sequence[str]:
    """Loads translatable strings from an HTML 5 file.

    This function will find all elements with a ``data-trans`` attribute. If
    the attribute is empty, the text content, after normalisation by removing
    redundant whitespace, is used as translatable string, otherwise the value
    of the attribute indicated by the value is used.

    :param source: The source file.

    :returns: a sequence of translatable strings
    """
    for el in document_fromstring(source.read()).findall(
            './/*[@{}]'.format(ATTR)):
        attrib = el.attrib[ATTR]
        if attrib:
            if attrib in el.attrib:
                yield el.attrib[attrib]
            else:
                raise ValueError(
                    'unknown attribute {}'.format(attrib))
        else:
            yield ' '.join(el.text_content().split())

Exemple #27

0

Afficher le fichier

    async def _upload_chunks(
            cls, rfile: BootResourceFile, content: io.IOBase, chunk_size: int,
            progress_callback=None):
        """Upload the `content` to `rfile` in chunks using `chunk_size`."""
        content.seek(0, io.SEEK_SET)
        upload_uri = urlparse(
            cls._handler.uri)._replace(path=rfile._data['upload_uri']).geturl()
        uploaded_size = 0

        insecure = cls._handler.session.insecure
        connector = aiohttp.TCPConnector(verify_ssl=(not insecure))
        session = aiohttp.ClientSession(connector=connector)

        async with session:
            while True:
                buf = content.read(chunk_size)
                length = len(buf)
                if length > 0:
                    uploaded_size += length
                    await cls._put_chunk(session, upload_uri, buf)
                    if progress_callback is not None:
                        progress_callback(uploaded_size / rfile.size)
                if length != chunk_size:
                    break

Exemple #28

0

Afficher le fichier

Fichier : mediaman.py Projet : Cysion/Secproject1

def header_and_file(infile: IOBase, bytesio=False, only_header=False) -> tuple:
    """opens filename and splits the header from the file. returns tuple of header(str) and file(bytes)
    file = file to split
    rootdir = the root of all media storage sent to the default_storage class
    bytesio = return file in bytesio mode instead of as bytes
    only_header = discard the file after reading and return a tuple of the header(str) and None
    """
    infile.seek(0)
    header = b""
    addnext = b""

    #iterate until the end of the header
    while addnext != b"---END HEADER---\n":
        addnext = infile.readline()
        header += addnext
    #add the rest of the file to retfile
    if not only_header:
        if bytesio:
            retfile = infile
        else:
            retfile = infile.read()
            infile.close()

    return (header.decode("ascii"), None if only_header else retfile)

Exemple #29

0

Afficher le fichier

Fichier : util.py Projet : verschmelzen/dfs

def deserialize_list(
    stream: IOBase,
    content_len: int,
    remote_ip: str,
) -> List[str]:
    """
    Deserialize list.

    Parameters
    ----------
    stream : IOBase
        Stream of request body.
    content_len : int
        Length of request body.
    remote_ip : str
        IP address of client.

    Returns
    -------
    List[str]:
        Deserialized list.
    """
    tmp = stream.read(content_len).decode('utf-8')
    return tmp.split()

Exemple #30

0

Afficher le fichier

Fichier : boot_resources.py Projet : alburnum/alburnum-maas-client

    async def _upload_chunks(
            cls, rfile: BootResourceFile, content: io.IOBase, chunk_size: int,
            progress_callback=None):
        """Upload the `content` to `rfile` in chunks using `chunk_size`."""
        content.seek(0, io.SEEK_SET)
        upload_uri = urlparse(
            cls._handler.uri)._replace(path=rfile._data['upload_uri']).geturl()
        uploaded_size = 0

        insecure = cls._handler.session.insecure
        connector = aiohttp.TCPConnector(verify_ssl=(not insecure))
        session = aiohttp.ClientSession(connector=connector)

        async with session:
            while True:
                buf = content.read(chunk_size)
                length = len(buf)
                if length > 0:
                    uploaded_size += length
                    await cls._put_chunk(session, upload_uri, buf)
                    if progress_callback is not None:
                        progress_callback(uploaded_size / rfile.size)
                if length != chunk_size:
                    break

Exemple #31

0

Afficher le fichier

Fichier : io.py Projet : Gobot1234/vdf3

def _read_string(fp: IOBase, wide: bool = False) -> str:
    buf, end = b"", -1
    offset = fp.tell()

    # locate string end
    while end == -1:
        chunk = fp.read(64)

        if not chunk:
            raise VDFDecodeError(f"Unterminated cstring (offset: {offset})")

        buf += chunk
        end = buf.find(b"\x00\x00" if wide else b"\x00")

    if wide:
        end += end % 2

    # rewind fp
    fp.seek(end - len(buf) + (2 if wide else 1), 1)

    # decode string
    result = buf[:end]

    return result.decode("utf-16") if wide else result.decode("utf-8", "replace")

Exemple #32

0

Afficher le fichier

 def load(self, stream: IOBase) -> any:
     return stream.read().decode('utf-8')

Exemple #33

0

Afficher le fichier

 def load(self, stream: IOBase) -> any:
     return stream.read()

Exemple #34

0

Afficher le fichier

 def get_file_data_and_close_file(file_instance: io.IOBase) -> bytes:
     file_data = file_instance.read()
     file_instance.close()
     return file_data

Exemple #35

0

Afficher le fichier

Fichier : parsing.py Projet : nathanielksmith/prosaic

def process_text(db: Database,
                 source: Source,
                 text: IOBase) -> Optional[Exception]:
    session = get_session(db)
    line_no = 1 # lol
    ultimate_text = ''
    futures = []
    source.content = ''
    session.add(source)
    session.commit() # so we can attach phrases to it. need its id.
    line_queue = Queue()
    error_queue = Queue()
    db_proc = Process(target=line_handler,
                      args=(db, line_queue, error_queue, source.id))
    db_proc.start()

    chunk = text.read(CHUNK_SIZE)
    while len(chunk) > 0:
        line_buff = ""
        for c in chunk:
            if BAD_CHARS.get(c, False):
                if not line_buff.endswith(' '):
                    line_buff += ' '
                continue
            if CLAUSE_MARKERS.get(c, False):
                if len(line_buff) > LONG_ENOUGH:
                    ultimate_text += line_buff
                    line_queue.put((line_no, line_buff))
                    line_no += 1
                    line_buff = ""
                else:
                    line_buff += c
                continue
            if SENTENCE_MARKERS.get(c, False):
                if len(line_buff) > LONG_ENOUGH:
                    ultimate_text += line_buff
                    line_queue.put((line_no, line_buff))
                    line_no += 1
                line_buff = ""
                continue
            if c == ' ' and line_buff.endswith(' '):
                continue
            if c == "'" and line_buff.endswith(' '):
                continue
            if c == "'" and peek(text, 1) == ' ':
                continue
            line_buff += c
        chunk = text.read(CHUNK_SIZE)

    line_queue.put(DONE_READING)
    db_proc.join()

    error = None
    if error_queue.empty():
        source.content = ultimate_text
        session.add(source)
    else:
        error = error_queue.get()
        session.delete(source)

    result = None
    if error is None:
        result = source.id
    else:
        result = error

    session.commit()
    session.close()

    return result

Exemple #36

0

Afficher le fichier

Fichier : parsing.py Projet : soywalker/prosaic

def process_text(db: Database, source: Source,
                 text: IOBase) -> Optional[Exception]:
    session = get_session(db)
    line_no = 1  # lol
    ultimate_text = ''
    futures = []
    source.content = ''
    session.add(source)
    session.commit()  # so we can attach phrases to it. need its id.
    line_queue = Queue()
    error_queue = Queue()
    db_proc = Process(target=line_handler,
                      args=(db, line_queue, error_queue, source.id))
    db_proc.start()

    chunk = text.read(CHUNK_SIZE)
    while len(chunk) > 0:
        line_buff = ""
        for c in chunk:
            if BAD_CHARS.get(c, False):
                if not line_buff.endswith(' '):
                    line_buff += ' '
                continue
            if CLAUSE_MARKERS.get(c, False):
                if len(line_buff) > LONG_ENOUGH:
                    ultimate_text += line_buff
                    line_queue.put((line_no, line_buff))
                    line_no += 1
                    line_buff = ""
                else:
                    line_buff += c
                continue
            if SENTENCE_MARKERS.get(c, False):
                if len(line_buff) > LONG_ENOUGH:
                    ultimate_text += line_buff
                    line_queue.put((line_no, line_buff))
                    line_no += 1
                line_buff = ""
                continue
            if c == ' ' and line_buff.endswith(' '):
                continue
            if c == "'" and line_buff.endswith(' '):
                continue
            if c == "'" and peek(text, 1) == ' ':
                continue
            line_buff += c
        chunk = text.read(CHUNK_SIZE)

    line_queue.put(DONE_READING)
    db_proc.join()

    error = None
    if error_queue.empty():
        source.content = ultimate_text
        session.add(source)
    else:
        error = error_queue.get()
        session.delete(source)

    result = None
    if error is None:
        result = source.id
    else:
        result = error

    session.commit()
    session.close()

    return result

Exemple #37

0

Afficher le fichier

Fichier : xtf_ctypes.py Projet : hanhaixingxing/pyxtf

    def create_from_buffer(cls,
                           buffer: IOBase,
                           file_header: XTFFileHeader = None):
        if not file_header:
            raise RuntimeError(
                'Initialization of XTFPingHeader from buffer requires file_header to be passed.'
            )

        obj = super().create_from_buffer(buffer=buffer)

        obj.ping_chan_headers = []  # type: List[XTFPingChanHeader]
        obj.data = None

        # Sonar and bathy has a different data structure following the header
        if obj.HeaderType == XTFHeaderType.sonar:
            obj.data = []  # type: List[np.ndarray]

            bytes_remaining = obj.NumBytesThisRecord - ctypes.sizeof(
                XTFPingHeader)

            for i in range(0, obj.NumChansToFollow):
                # Retrieve XTFPingChanHeader for this channel
                p_chan = XTFPingChanHeader.create_from_buffer(buffer=buffer)
                obj.ping_chan_headers.append(p_chan)
                bytes_remaining -= ctypes.sizeof(XTFPingChanHeader)

                # Backwards-compatibility: retrive from NumSamples if possible, else use old field
                n_samples = p_chan.NumSamples if p_chan.NumSamples > 0 else file_header.sonar_info[
                    i].Reserved

                # Calculate number of bytes to read
                n_bytes = n_samples * file_header.sonar_info[i].BytesPerSample
                if n_bytes > bytes_remaining:
                    raise RuntimeError(
                        'Number of bytes to read exceeds the number of bytes remaining in packet.'
                    )

                # Read the data and output as a numpy array of the specified bytes-per-sample
                samples = buffer.read(n_bytes)
                if not samples:
                    raise RuntimeError(
                        'File ended while reading data packets (file corrupt?)'
                    )

                bytes_remaining -= len(samples)

                # Favor getting the sample format from the dedicated field added in X41.
                # If the field is not populated deduce the type from the bytes per sample field.
                if file_header.sonar_info[
                        i].SampleFormat in sample_format_dtype:
                    sample_format = sample_format_dtype[
                        file_header.sonar_info[i].SampleFormat]
                else:
                    sample_format = xtf_dtype[
                        file_header.sonar_info[i].BytesPerSample]

                samples = np.frombuffer(samples, dtype=sample_format)
                obj.data.append(samples)

        elif obj.HeaderType == XTFHeaderType.bathy_xyza:
            # Bathymetry uses the same header as sonar, but without the XTFPingChanHeaders

            # TODO: Should the sub-channel number be used to index chan_info (?)
            # sub_chan = obj.SubChannelNumber

            # Read the data that follows
            n_bytes = obj.NumBytesThisRecord - ctypes.sizeof(XTFPingHeader)
            samples = buffer.read(n_bytes)
            if not samples:
                warn('XTFBathyHeader without any data encountered.')

            # Processed bathy data consists of repeated XTFBeamXYZA structures
            # Note: Using a ctypes array is a _lot_ faster than constructing a list of BeamXYZA
            num_xyza = n_bytes // ctypes.sizeof(XTFBeamXYZA)
            xyza_array_type = XTFBeamXYZA * num_xyza
            xyza_array_type._pack_ = 1
            obj.data = xyza_array_type.from_buffer_copy(samples)

        elif obj.HeaderType == XTFHeaderType.reson_7018_watercolumn:
            # 7018 water column consists of XTFPingHeader followed by (one?) XTFPingChanHeader, then vendor data

            # Retrieve XTFPingChanHeader
            p_chan = XTFPingChanHeader.create_from_buffer(buffer=buffer)
            obj.ping_chan_headers.append(p_chan)

            # Read the data that follows
            n_bytes = obj.NumBytesThisRecord - ctypes.sizeof(
                XTFPingHeader) - ctypes.sizeof(XTFPingChanHeader)
            samples = buffer.read(n_bytes)
            if not samples:
                warn('XTFPingHeader (Reson7018) without any data encountered.')

            obj.data = samples

        else:
            # Generic XTFPingHeader construction
            n_bytes = obj.NumBytesThisRecord - ctypes.sizeof(XTFPingHeader)
            samples = buffer.read(n_bytes)
            if not samples and n_bytes > 0:
                warn('XTFPingHeader without any data encountered.')

            # The data is the raw bytes following the header
            obj.data = samples

        return obj