def _opencrx(f: TextIO) -> str: """ Conversion to string is necessary because of a quirk where gzip.open() even with 'rt' doesn't decompress until read. Nbytes is used to read first line. """ exe = './crx2rnx' shell = False if os.name == 'nt': exe = exe[2:] shell = True try: In = f.read() ret = subprocess.check_output([exe, '-'], input=In, universal_newlines=True, cwd=R / 'rnxcmp', shell=shell) except FileNotFoundError as e: raise FileNotFoundError( f'trouble converting Hatanka file, did you compile the crx2rnx program? {e}' ) return ret
def reversed_blocks(f: TextIO, blocksize=4096): """ Generate blocks of file's contents in reverse order. """ f.seek(0, os.SEEK_END) here = f.tell() while 0 < here: delta = min(blocksize, here) here -= delta f.seek(here, os.SEEK_SET) yield f.read(delta)
def opencrx(f: TextIO) -> str: """ Conversion to string is necessary because of a quirk where gzip.open() even with 'rt' doesn't decompress until read. Nbytes is used to read first line. """ exe = crxexe() ret = subprocess.check_output([exe, "-"], input=f.read(), universal_newlines=True) return ret
def _opencrx(f: TextIO) -> str: """ Conversion to string is necessary because of a quirk where gzip.open() even with 'rt' doesn't decompress until read. Nbytes is used to read first line. """ exe = crxexe() if not exe: raise RuntimeError( 'Hatanka crx2rnx not available. Did you compile it per README?') ret = subprocess.check_output([exe, '-'], input=f.read(), universal_newlines=True) return ret
def opencrx(f: TextIO) -> str: """ Conversion to string is necessary because of a quirk where gzip.open() even with 'rt' doesn't decompress until read. Nbytes is used to read first line. """ exe = crxexe() if not exe: if build() != 0: raise RuntimeError( 'could not build Hatanka converter. Do you have a C compiler?') exe = crxexe() if not exe: raise RuntimeError('Hatanaka converter is broken or missing.') ret = subprocess.check_output([exe, '-'], input=f.read(), universal_newlines=True) return ret
def tokenize(input_fd: io.TextIO) -> Iterator[Tuple[Optional[str], int, int]]: """ Read characters from input file one-by-one and generate stream of tokens with metadata, separated by spaces or ';' ';' and \n are replaced by None, meaning end of line escaped characters in string literal are unescaped. Generate tuples (value:str_or_none, line:int, start_position_in_line:int) """ curr_buff = None # current token or None is no active token line_num = 0 line_pos = 0 in_str = False # true, if now parsing string literal (==in quotes) token_start_line = None # line_num & line_pos at the beginning of current token token_start_pos = None eof_reached = False while not eof_reached: ch = input_fd.read(1) if ch == '': if in_str: raise ParserError("Unclosed string at the end of file at line {}".format(line_num)) ch = '\n' # hack to flush current buffer eof_reached = True line_pos += 1 if in_str: if ch == '\\': ch = input_fd.read(1) if ch == '': return line_pos += 1 if ch not in ESCAPED: raise ParserError("Failed to parse at line {}".format(line_num)) curr_buff += ESCAPED[ch] elif ch == '"': yield curr_buff + '"', token_start_line, token_start_pos token_start_pos = None token_start_line = None curr_buff = None in_str = False elif ch == '\n': raise ParserError("Failed to parse at line {}".format(line_num)) else: curr_buff += ch elif ch == '"': in_str = True curr_buff = ch token_start_pos = line_pos token_start_line = line_num elif ch.isspace(): if curr_buff is not None: yield curr_buff, token_start_line, token_start_pos curr_buff = None token_start_pos = None token_start_line = None if ch == '\n': yield None, line_num, line_pos line_num += 1 line_pos = 0 elif ch == ';': yield None, line_num, line_pos else: if curr_buff is None: curr_buff = ch token_start_pos = line_pos token_start_line = line_num else: curr_buff += ch assert curr_buff is None, "Non flushed token at the end of file"
def tokenize(input_fd: io.TextIO) -> Iterator[Tuple[Optional[str], int, int]]: """ Read characters from input file one-by-one and generate stream of tokens with metadata, separated by spaces or ';' ';' and \n are replaced by None, meaning end of line escaped characters in string literal are unescaped. Generate tuples (value:str_or_none, line:int, start_position_in_line:int) """ curr_buff = None # current token or None is no active token line_num = 0 line_pos = 0 in_str = False # true, if now parsing string literal (==in quotes) token_start_line = None # line_num & line_pos at the beginning of current token token_start_pos = None eof_reached = False while not eof_reached: ch = input_fd.read(1) if ch == '': if in_str: raise ParserError( "Unclosed string at the end of file at line {}".format( line_num)) ch = '\n' # hack to flush current buffer eof_reached = True line_pos += 1 if in_str: if ch == '\\': ch = input_fd.read(1) if ch == '': return line_pos += 1 if ch not in ESCAPED: raise ParserError( "Failed to parse at line {}".format(line_num)) curr_buff += ESCAPED[ch] elif ch == '"': yield curr_buff + '"', token_start_line, token_start_pos token_start_pos = None token_start_line = None curr_buff = None in_str = False elif ch == '\n': raise ParserError( "Failed to parse at line {}".format(line_num)) else: curr_buff += ch elif ch == '"': in_str = True curr_buff = ch token_start_pos = line_pos token_start_line = line_num elif ch.isspace(): if curr_buff is not None: yield curr_buff, token_start_line, token_start_pos curr_buff = None token_start_pos = None token_start_line = None if ch == '\n': yield None, line_num, line_pos line_num += 1 line_pos = 0 elif ch == ';': yield None, line_num, line_pos else: if curr_buff is None: curr_buff = ch token_start_pos = line_pos token_start_line = line_num else: curr_buff += ch assert curr_buff is None, "Non flushed token at the end of file"