def test_empty_file(self): """ make sure an empty file does not throw an error when reverse_readline is called this was a problem with an earlier implementation """ for idx, line in enumerate( reverse_readfile(os.path.join(test_dir, "empty_file.txt"))): raise ValueError("an empty file is being read!")
def test_empty_file(self): """ make sure an empty file does not throw an error when reverse_readline is called this was a problem with an earlier implementation """ for idx, line in enumerate(reverse_readfile( os.path.join(test_dir, "empty_file.txt"))): raise ValueError("an empty file is being read!")
def test_reverse_readfile_gz(self): """ We are making sure a file containing line numbers is read in reverse order, i.e. the first line that is read corresponds to the last line. number """ fname = os.path.join(test_dir, "3000lines.txt.gz") for idx, line in enumerate(reverse_readfile(fname)): self.assertEqual( int(line), self.NUMLINES - idx, "read_backwards read {} whereas it should " "have read {}".format(int(line), self.NUMLINES - idx))
def test_reverse_readfile_gz(self): """ We are making sure a file containing line numbers is read in reverse order, i.e. the first line that is read corresponds to the last line. number """ fname = os.path.join(test_dir, "3000_lines.txt.gz") for idx, line in enumerate(reverse_readfile(fname)): self.assertEqual(int(line), self.NUMLINES - idx, "read_backwards read {} whereas it should " "have read {}".format( int(line), self.NUMLINES - idx))
def regrep(filename, patterns, reverse=False, terminate_on_match=False, postprocess=str): r""" A powerful regular expression version of grep. Args: filename (str): Filename to grep. patterns (dict): A dict of patterns, e.g., {"energy": r"energy\\(sigma->0\\)\\s+=\\s+([\\d\\-\\.]+)"}. reverse (bool): Read files in reverse. Defaults to false. Useful for large files, especially when used with terminate_on_match. terminate_on_match (bool): Whether to terminate when there is at least one match in each key in pattern. postprocess (callable): A post processing function to convert all matches. Defaults to str, i.e., no change. Returns: A dict of the following form: {key1: [[[matches...], lineno], [[matches...], lineno], [[matches...], lineno], ...], key2: ...} For reverse reads, the lineno is given as a -ve number. Please note that 0-based indexing is used. """ compiled = {k: re.compile(v) for k, v in patterns.items()} matches = collections.defaultdict(list) gen = reverse_readfile(filename) if reverse else zopen(filename, "rt") for i, l in enumerate(gen): for k, p in compiled.items(): m = p.search(l) if m: matches[k].append([[postprocess(g) for g in m.groups()], -i if reverse else i]) if terminate_on_match and all( len(matches.get(k, [])) for k in compiled.keys()): break try: # Try to close open file handle. Pass if it is a generator. gen.close() except Exception: pass return matches