def test_empty_file(self): """ make sure an empty file does not throw an error when reverse_readline is called this was a problem with an earlier implementation """ with open(os.path.join(test_dir, "empty_file.txt")) as f: for idx, line in enumerate(reverse_readline(f)): raise ValueError("an empty file is being read!")
def test_reverse_readline_fake_big(self): """ Make sure that large textfiles are read properly """ with open(os.path.join(test_dir, "3000_lines.txt"), "rt") as f: for idx, line in enumerate(reverse_readline(f, max_mem=0)): self.assertEqual( int(line), self.NUMLINES - idx, "read_backwards read {} whereas it should " "have read {}".format(int(line), self.NUMLINES - idx))
def test_reverse_readline_fake_big(self): """ Make sure that large textfiles are read properly """ with open(os.path.join(test_dir, "3000_lines.txt")) as f: for idx, line in enumerate(reverse_readline(f, max_mem=0)): self.assertEqual(int(line), self.NUMLINES - idx, "read_backwards read {} whereas it should " "have read {}".format( int(line), self.NUMLINES - idx))
def test_reverse_readline_bz2(self): """ We are making sure a file containing line numbers is read in reverse order, i.e. the first line that is read corresponds to the last line. number """ lines = [] with zopen(os.path.join(test_dir, "myfile_bz2.bz2"), "rb") as f: for line in reverse_readline(f): lines.append(line.strip()) self.assertEqual("HelloWorld.", lines[-1].strip())
def test_reverse_readfile_gz(self): """ We are making sure a file containing line numbers is read in reverse order, i.e. the first line that is read corresponds to the last line. number """ with open(os.path.join(test_dir, "three_thousand_lines.txt")) as f: for idx, line in enumerate(reverse_readline(f)): self.assertEqual( int(line), self.NUMLINES - idx, "read_backwards read {} whereas it should " "have read {}".format(int(line), self.NUMLINES - idx))
def test_reverse_readline(self): """ We are making sure a file containing line numbers is read in reverse order, i.e. the first line that is read corresponds to the last line. number """ with open(os.path.join(test_dir, "3000_lines.txt")) as f: for idx, line in enumerate(reverse_readline(f)): self.assertEqual(int(line), self.NUMLINES - idx, "read_backwards read {} whereas it should " "have read {}".format( int(line), self.NUMLINES - idx))
def track_file(self, launch_dir=None): """ Reads the monitored file and returns back the last N lines :param launch_dir: directory where job was launched in case of relative filename :return: """ m_file = self.filename if launch_dir and not os.path.isabs(self.filename): m_file = os.path.join(launch_dir, m_file) lines = [] if os.path.exists(m_file): with zopen(zpath(m_file)) as f: for l in reverse_readline(f): lines.append(l) if len(lines) == self.nlines: break self.content = '\n'.join(reversed(lines)) return self.content
def track_file(self, launch_dir=None): """ Reads the monitored file and returns back the last N lines Args: launch_dir (str): directory where job was launched in case of relative filename Returns: str: the content(last N lines) """ m_file = self.filename if launch_dir and not os.path.isabs(self.filename): m_file = os.path.join(launch_dir, m_file) lines = [] if self.allow_zipped: m_file = zpath(m_file) if os.path.exists(m_file): with zopen(m_file, "rt") as f: for l in reverse_readline(f): lines.append(l) if len(lines) == self.nlines: break self.content = '\n'.join(reversed(lines)) return self.content
def _parse_logfile(self, logfile): """ Parse the formatted logfile. """ cycle_patt = re.compile(r"Coordinates\sin\sGeometry\sCycle\s(\d+)") coord_patt = re.compile(r"\s+([0-9]+)\.([A-Za-z]+)" + 3 * r"\s+([-\.0-9]+)") energy_patt = re.compile( r"<.*>\s<.*>\s+current\senergy\s+([-\.0-9]+)\sHartree") final_energy_patt = re.compile( r"<.*>\s<.*>\s+Bond\sEnergy\s+([-\.0-9]+)\sa\.u\.") error_patt = re.compile(r"<.*>\s<.*>\s+ERROR\sDETECTED:\s(.*)") runtype_patt = re.compile(r"<.*>\s<.*>\s+RunType\s+:\s(.*)") end_patt = re.compile(r"<.*>\s<.*>\s+END") parse_cycle = False sites = [] last_cycle = -1 parse_final = False # Stop parsing the logfile is this job is not terminated successfully. # The last non-empty line of the logfile must match the end pattern. # Otherwise the job has some internal failure. The TAPE13 part of the # ADF manual has a detailed explanantion. with open(logfile) as f: for line in reverse_readline(f): if line == "": continue if end_patt.search(line) is None: self.is_internal_crash = True self.error = "Internal crash. TAPE13 is generated!" self.is_failed = True return break with open(logfile) as f: for line in f: m = error_patt.search(line) if m: self.is_failed = True self.error = m.group(1) break if self.run_type is None: m = runtype_patt.search(line) if m: if m.group(1) == "FREQUENCIES": self.freq_type = "Numerical" self.run_type = "NumericalFreq" elif m.group(1) == "GEOMETRY OPTIMIZATION": self.run_type = "GeometryOptimization" elif m.group(1) == "CREATE": self.run_type = None elif m.group(1) == "SINGLE POINT": self.run_type = "SinglePoint" else: raise AdfOutputError("Undefined Runtype!") elif self.run_type == "SinglePoint": m = coord_patt.search(line) if m: sites.append( [m.groups()[0], list(map(float, m.groups()[2:]))]) else: m = final_energy_patt.search(line) if m: self.final_energy = float(m.group(1)) self.final_structure = self._sites_to_mol(sites) elif self.run_type == "GeometryOptimization": m = cycle_patt.search(line) if m: cycle = int(m.group(1)) if cycle <= 0: raise AdfOutputError(f"Wrong cycle {cycle}") if cycle > last_cycle: parse_cycle = True last_cycle = cycle else: parse_final = True elif parse_cycle: m = coord_patt.search(line) if m: sites.append([ m.groups()[1], list(map(float, m.groups()[2:])) ]) else: m = energy_patt.search(line) if m: self.energies.append(float(m.group(1))) mol = self._sites_to_mol(sites) self.structures.append(mol) parse_cycle = False sites = [] elif parse_final: m = final_energy_patt.search(line) if m: self.final_energy = float(m.group(1)) elif self.run_type == "NumericalFreq": break if not self.is_failed: if self.run_type == "GeometryOptimization": if len(self.structures) > 0: self.final_structure = self.structures[-1] if self.final_energy is None: raise AdfOutputError("The final energy can not be read!") elif self.run_type == "SinglePoint": if self.final_structure is None: raise AdfOutputError("The final structure is missing!") if self.final_energy is None: raise AdfOutputError("The final energy can not be read!")
def _parse_logfile(self, logfile): """ Parse the formatted logfile. """ cycle_patt = re.compile(r"Coordinates\sin\sGeometry\sCycle\s(\d+)") coord_patt = re.compile(r"\s+([0-9]+)\.([A-Za-z]+)"+3*r"\s+([-\.0-9]+)") energy_patt = re.compile(r"<.*>\s<.*>\s+current\senergy\s+([-\.0-9]+)\s" "Hartree") final_energy_patt = re.compile( r"<.*>\s<.*>\s+Bond\sEnergy\s+([-\.0-9]+)\sa\.u\.") error_patt = re.compile(r"<.*>\s<.*>\s+ERROR\sDETECTED:\s(.*)") runtype_patt = re.compile(r"<.*>\s<.*>\s+RunType\s+:\s(.*)") end_patt = re.compile(r"<.*>\s<.*>\s+END") parse_cycle = False sites = [] last_cycle = -1 parse_final = False # Stop parsing the logfile is this job is not terminated successfully. # The last non-empty line of the logfile must match the end pattern. # Otherwise the job has some internal failure. The TAPE13 part of the # ADF manual has a detailed explanantion. with open(logfile, "r") as f: for line in reverse_readline(f): if line == "": continue if end_patt.search(line) is None: self.is_internal_crash = True self.error = "Internal crash. TAPE13 is generated!" self.is_failed = True return else: break with open(logfile, "r") as f: for line in f: m = error_patt.search(line) if m: self.is_failed = True self.error = m.group(1) break if self.run_type is None: m = runtype_patt.search(line) if m: if m.group(1) == 'FREQUENCIES': self.freq_type = "Numerical" self.run_type = "NumericalFreq" elif m.group(1) == 'GEOMETRY OPTIMIZATION': self.run_type = "GeometryOptimization" elif m.group(1) == 'CREATE': self.run_type = None elif m.group(1) == 'SINGLE POINT': self.run_type = 'SinglePoint' else: raise AdfOutputError("Undefined Runtype!") elif self.run_type == 'SinglePoint': m = coord_patt.search(line) if m: sites.append([m.groups()[0], list(map(float, m.groups()[2:]))]) else: m = final_energy_patt.search(line) if m: self.final_energy = float(m.group(1)) self.final_structure = self._sites_to_mol(sites) elif self.run_type == 'GeometryOptimization': m = cycle_patt.search(line) if m: cycle = int(m.group(1)) if cycle <= 0: raise AdfOutputError("Wrong cycle {}".format(cycle)) if cycle > last_cycle: parse_cycle = True last_cycle = cycle else: parse_final = True elif parse_cycle: m = coord_patt.search(line) if m: sites.append([m.groups()[1], list(map(float, m.groups()[2:]))]) else: m = energy_patt.search(line) if m: self.energies.append(float(m.group(1))) mol = self._sites_to_mol(sites) self.structures.append(mol) parse_cycle = False sites = [] elif parse_final: m = final_energy_patt.search(line) if m: self.final_energy = float(m.group(1)) elif self.run_type == "NumericalFreq": break if not self.is_failed: if self.run_type == "GeometryOptimization": if len(self.structures) > 0: self.final_structure = self.structures[-1] if self.final_energy is None: raise AdfOutputError("The final energy can not be read!") elif self.run_type == "SinglePoint": if self.final_structure is None: raise AdfOutputError("The final structure is missing!") if self.final_energy is None: raise AdfOutputError("The final energy can not be read!")