def parse_file(self): self.is_bom = False if self.lines is not None: for lineno, line in enumerate(self.lines, start=1): if self.check_line(line, lineno): break else: self.detect_bom(self.source) with FileReader(self.source) as file_reader: for lineno, line in enumerate(file_reader.readlines(), start=1): if self.check_line(line, lineno): break
def _load(self, path): ''' The general idea is to do a quick parse, creating a list of tables. Each table is nothing more than a list of rows, with each row being a list of cells. Additional parsing such as combining rows into statements is done on demand. This first pass is solely to read in the plain text and organize it by table. ''' self.tables = [] current_table = DefaultTable(self) with FileReader(path) as f: # N.B. the caller should be catching errors self.raw_text = f.read() f.file.seek(0) matcher = Matcher(re.IGNORECASE) for linenumber, raw_text in enumerate(f.readlines()): linenumber += 1 # start counting at 1 rather than zero # this mimics what the robot TSV reader does -- # it replaces non-breaking spaces with regular spaces, # and then strips trailing whitespace raw_text = raw_text.replace(u'\xA0', ' ') raw_text = raw_text.rstrip() # FIXME: I'm keeping line numbers but throwing away # where each cell starts. I should be preserving that # (though to be fair, robot is throwing that away so # I'll have to write my own splitter if I want to save # the character position) cells = self.split_row(raw_text) _heading_regex = r'^\s*\*+\s*(.*?)[ *]*$' if matcher(_heading_regex, cells[0]): # we've found the start of a new table table_name = matcher.group(1) current_table = tableFactory(self, linenumber, table_name, raw_text) self.tables.append(current_table) else: current_table.append(Row(linenumber, raw_text, cells))
def load_args_from_file(self, argfile): try: with FileReader(argfile) as arg_f: args = [] for line in arg_f.readlines(): if line.strip().startswith("#"): continue for arg in line.split(" ", 1): arg = arg.strip() if not arg: continue args.append(arg) if "-A" in args or "--argumentfile" in args: raise NestedArgumentFileError(argfile) if args: self.config_from = argfile return args except FileNotFoundError: raise ArgumentFileNotFoundError(argfile) from None
def _read(self, source): try: with FileReader(source, accept_text=True) as reader: return reader.read() except: raise DataError(get_error_message())
def _get_source(self, source): with FileReader(source) as reader: return read_rest_data(reader)
def test_accept_text(self): with FileReader(STRING, accept_text=True) as reader: assert_reader(reader, '<in-memory file>') assert_closed(reader.file)
def test_bytesio(self): f = BytesIO(self.BOM + STRING.encode('UTF-8')) with FileReader(f) as reader: assert_reader(reader, '<in-memory file>') assert_open(f)
def test_stringio(self): f = StringIO(STRING) with FileReader(f) as reader: assert_reader(reader, '<in-memory file>') assert_open(f)
def test_open_binary_file(self): with open(PATH, 'rb') as f: with FileReader(f) as reader: assert_reader(reader) assert_open(f, reader.file) assert_closed(f, reader.file)
def test_codecs_open_file(self): with codecs.open(PATH, encoding='UTF-8') as f: with FileReader(f) as reader: assert_reader(reader) assert_open(f, reader.file) assert_closed(f, reader.file)
def test_path_as_pathlib_path(self): with FileReader(Path(PATH)) as reader: assert_reader(reader) assert_closed(reader.file)
def test_path_as_string(self): with FileReader(PATH) as reader: assert_reader(reader) assert_closed(reader.file)
def test_invalid_encoding(self): russian = STRING.split()[-1] path = os.path.join(TEMPDIR, 'filereader.iso88595') self._create(russian, path, encoding='ISO-8859-5') with FileReader(path) as reader: assert_raises(UnicodeDecodeError, reader.read)
def test_readlines(self): with FileReader(PATH) as reader: assert_equal(list(reader.readlines()), STRING.splitlines(True))
def test_path_as_path(self): from pathlib import Path with FileReader(Path(PATH)) as reader: assert_reader(reader) assert_closed(reader.file)
def test_open_text_file(self): with open(PATH) as f: with FileReader(f) as reader: assert_reader(reader) assert_open(f, reader.file) assert_closed(f, reader.file)