Exemple #1
0
def _opencrx(f: TextIO) -> str:
    """
    Conversion to string is necessary because of a quirk where gzip.open() even with 'rt' doesn't decompress until read.

    Nbytes is used to read first line.
    """
    exe = './crx2rnx'
    shell = False
    if os.name == 'nt':
        exe = exe[2:]
        shell = True

    try:
        In = f.read()
        ret = subprocess.check_output([exe, '-'],
                                      input=In,
                                      universal_newlines=True,
                                      cwd=R / 'rnxcmp',
                                      shell=shell)
    except FileNotFoundError as e:
        raise FileNotFoundError(
            f'trouble converting Hatanka file, did you compile the crx2rnx program?   {e}'
        )

    return ret
Exemple #2
0
 def reversed_blocks(f: TextIO, blocksize=4096):
     """ Generate blocks of file's contents in reverse order. """
     f.seek(0, os.SEEK_END)
     here = f.tell()
     while 0 < here:
         delta = min(blocksize, here)
         here -= delta
         f.seek(here, os.SEEK_SET)
         yield f.read(delta)
Exemple #3
0
def opencrx(f: TextIO) -> str:
    """
    Conversion to string is necessary because of a quirk where gzip.open() even with 'rt' doesn't decompress until read.

    Nbytes is used to read first line.
    """
    exe = crxexe()

    ret = subprocess.check_output([exe, "-"],
                                  input=f.read(),
                                  universal_newlines=True)

    return ret
Exemple #4
0
def _opencrx(f: TextIO) -> str:
    """
    Conversion to string is necessary because of a quirk where gzip.open() even with 'rt' doesn't decompress until read.

    Nbytes is used to read first line.
    """
    exe = crxexe()

    if not exe:
        raise RuntimeError(
            'Hatanka crx2rnx not available. Did you compile it per README?')

    ret = subprocess.check_output([exe, '-'],
                                  input=f.read(),
                                  universal_newlines=True)

    return ret
Exemple #5
0
def opencrx(f: TextIO) -> str:
    """
    Conversion to string is necessary because of a quirk where gzip.open() even with 'rt' doesn't decompress until read.

    Nbytes is used to read first line.
    """
    exe = crxexe()

    if not exe:
        if build() != 0:
            raise RuntimeError(
                'could not build Hatanka converter. Do you have a C compiler?')
        exe = crxexe()
        if not exe:
            raise RuntimeError('Hatanaka converter is broken or missing.')

    ret = subprocess.check_output([exe, '-'],
                                  input=f.read(),
                                  universal_newlines=True)

    return ret
def tokenize(input_fd: io.TextIO) -> Iterator[Tuple[Optional[str], int, int]]:
    """
    Read characters from input file one-by-one and generate stream of tokens with metadata, separated by spaces or ';'
    ';' and \n are replaced by None, meaning end of line
    escaped characters in string literal are unescaped.
    Generate tuples (value:str_or_none, line:int, start_position_in_line:int)
    """
    curr_buff = None   # current token or None is no active token
    line_num = 0
    line_pos = 0
    in_str = False  # true, if now parsing string literal (==in quotes)
    token_start_line = None   # line_num & line_pos at the beginning of current token
    token_start_pos = None
    eof_reached = False

    while not eof_reached:
        ch = input_fd.read(1)

        if ch == '':
            if in_str:
                raise  ParserError("Unclosed string at the end of file at line {}".format(line_num))
            ch = '\n'   # hack to flush current buffer
            eof_reached = True

        line_pos += 1

        if in_str:
            if ch == '\\':
                ch = input_fd.read(1)
                if ch == '':
                    return

                line_pos += 1

                if ch not in ESCAPED:
                    raise  ParserError("Failed to parse at line {}".format(line_num))

                curr_buff += ESCAPED[ch]
            elif ch == '"':
                yield curr_buff + '"', token_start_line, token_start_pos
                token_start_pos = None
                token_start_line = None
                curr_buff = None
                in_str = False
            elif ch == '\n':
                raise ParserError("Failed to parse at line {}".format(line_num))
            else:
                curr_buff += ch
        elif ch == '"':
            in_str = True
            curr_buff = ch
            token_start_pos = line_pos
            token_start_line = line_num
        elif ch.isspace():
            if curr_buff is not None:
                yield curr_buff, token_start_line, token_start_pos
                curr_buff = None
                token_start_pos = None
                token_start_line = None

            if ch == '\n':
                yield None, line_num, line_pos
                line_num += 1
                line_pos = 0

        elif ch == ';':
            yield None, line_num, line_pos
        else:
            if curr_buff is None:
                curr_buff = ch
                token_start_pos = line_pos
                token_start_line = line_num
            else:
                curr_buff += ch
    assert curr_buff is None, "Non flushed token at the end of file"
Exemple #7
0
def tokenize(input_fd: io.TextIO) -> Iterator[Tuple[Optional[str], int, int]]:
    """
    Read characters from input file one-by-one and generate stream of tokens with metadata, separated by spaces or ';'
    ';' and \n are replaced by None, meaning end of line
    escaped characters in string literal are unescaped.
    Generate tuples (value:str_or_none, line:int, start_position_in_line:int)
    """
    curr_buff = None  # current token or None is no active token
    line_num = 0
    line_pos = 0
    in_str = False  # true, if now parsing string literal (==in quotes)
    token_start_line = None  # line_num & line_pos at the beginning of current token
    token_start_pos = None
    eof_reached = False

    while not eof_reached:
        ch = input_fd.read(1)

        if ch == '':
            if in_str:
                raise ParserError(
                    "Unclosed string at the end of file at line {}".format(
                        line_num))
            ch = '\n'  # hack to flush current buffer
            eof_reached = True

        line_pos += 1

        if in_str:
            if ch == '\\':
                ch = input_fd.read(1)
                if ch == '':
                    return

                line_pos += 1

                if ch not in ESCAPED:
                    raise ParserError(
                        "Failed to parse at line {}".format(line_num))

                curr_buff += ESCAPED[ch]
            elif ch == '"':
                yield curr_buff + '"', token_start_line, token_start_pos
                token_start_pos = None
                token_start_line = None
                curr_buff = None
                in_str = False
            elif ch == '\n':
                raise ParserError(
                    "Failed to parse at line {}".format(line_num))
            else:
                curr_buff += ch
        elif ch == '"':
            in_str = True
            curr_buff = ch
            token_start_pos = line_pos
            token_start_line = line_num
        elif ch.isspace():
            if curr_buff is not None:
                yield curr_buff, token_start_line, token_start_pos
                curr_buff = None
                token_start_pos = None
                token_start_line = None

            if ch == '\n':
                yield None, line_num, line_pos
                line_num += 1
                line_pos = 0

        elif ch == ';':
            yield None, line_num, line_pos
        else:
            if curr_buff is None:
                curr_buff = ch
                token_start_pos = line_pos
                token_start_line = line_num
            else:
                curr_buff += ch
    assert curr_buff is None, "Non flushed token at the end of file"