def test_regrep(self): """ We are making sure a file containing line numbers is read in reverse order, i.e. the first line that is read corresponds to the last line. number """ fname = os.path.join(test_dir, "3000_lines.txt") matches = regrep(fname, { "1": r"1(\d+)", "3": r"3(\d+)" }, postprocess=int) self.assertEqual(len(matches["1"]), 1380) self.assertEqual(len(matches["3"]), 571) self.assertEqual(matches["1"][0][0][0], 0) matches = regrep(fname, { "1": r"1(\d+)", "3": r"3(\d+)" }, reverse=True, terminate_on_match=True, postprocess=int) self.assertEqual(len(matches["1"]), 1) self.assertEqual(len(matches["3"]), 11)
def read_pattern(self, patterns, reverse=False, terminate_on_match=False, postprocess=str): """ General pattern reading. Uses monty's regrep method. Takes the same arguments. Args: patterns (dict): A dict of patterns, e.g., {"energy": r"energy\\(sigma->0\\)\\s+=\\s+([\\d\\-.]+)"}. reverse (bool): Read files in reverse. Defaults to false. Useful for large files, esp OUTCARs, especially when used with terminate_on_match. terminate_on_match (bool): Whether to terminate when there is at least one match in each key in pattern. postprocess (callable): A post processing function to convert all matches. Defaults to str, i.e., no change. Renders accessible: Any attribute in patterns. For example, {"energy": r"energy\\(sigma->0\\)\\s+=\\s+([\\d\\-.]+)"} will set the value of self.data["energy"] = [[-1234], [-3453], ...], to the results from regex and postprocess. Note that the returned values are lists of lists, because you can grep multiple items on one line. """ matches = regrep(self.filename, patterns, reverse=reverse, terminate_on_match=terminate_on_match, postprocess=postprocess) self.data.update(matches)
def test_regrep(self): """ We are making sure a file containing line numbers is read in reverse order, i.e. the first line that is read corresponds to the last line. number """ fname = os.path.join(test_dir, "three_thousand_lines.txt") matches = regrep(fname, {"1": "1(\d+)", "3": "3(\d+)"}, postprocess=int) self.assertEqual(len(matches["1"]), 1380) self.assertEqual(len(matches["3"]), 571) self.assertEqual(matches["1"][0][0][0], 0) matches = regrep(fname, {"1": "1(\d+)", "3": "3(\d+)"}, reverse=True, terminate_on_match=True, postprocess=int) self.assertEqual(len(matches["1"]), 1) self.assertEqual(len(matches["3"]), 11)
def check(self): vi = VaspInput.from_directory(".") nelmdl = abs(vi["INCAR"].get("NELMDL", -5)) loop_pattern = r"\s+LOOP:\s+cpu\stime\s+\S+:\sreal\stime\s+(\S+)" loop_timing = regrep(filename="OUTCAR", patterns={"loop": loop_pattern})["loop"] if len(loop_timing) > 0: max_loop = np.max([float(e[0][0]) for e in loop_timing]) if max_loop > self.max_elec_step_time: return True with open("temp.out", "w") as file: file.write("Number of steps: " + str(len(loop_timing))) if len(loop_timing) >= self.max_elec_steps + nelmdl - 1: return True else: return False
def from_file(self, filepath): matches = regrep(filepath, self.patterns) if len(matches['kpts_coord']) != 0: with open(filepath, 'rt') as file: file_data = file.readlines() eigenvalues = [] for start, end in zip(matches['kpts_coord'], matches['occupations']): data = file_data[start[1] + 2:end[1] - 1] data = [ float(i) for i in itertools.chain.from_iterable( [line.split() for line in data]) ] eigenvalues.append(data) self.eigenvalues = np.array(eigenvalues) occupations = [] n_strings_occups = matches['occupations'][0][1] - matches[ 'kpts_coord'][0][1] - 1 for start in matches['occupations']: data = file_data[start[1] + 1:start[1] + n_strings_occups] data = [ float(i) for i in itertools.chain.from_iterable( [line.split() for line in data]) ] occupations.append(data) self.occupations = np.array(occupations) self.efermi = float(matches['efermi'][0][0][0]) self.nkpt = int(matches['nkpts'][0][0][0]) weights = np.zeros(self.nkpt) for i in range(self.nkpt): weights[i] = file_data[matches['nkpts'][0][1] + 2 + i].split()[-1] self.weights = weights
def process_OUTCAR(self, outcar_path=None, dir_to_save=None, optimization=False): """ process OUTCAR file obtained from VASP get following variables: self.nkpts - number of k-points (int) self.efermi - Fermi level (float) self.nbands - number of bands (int) self.eigenvalues - 2D np.array, eigenvalues[i][j] contains energy for i k-point and j band self.occupations - 2D np.array, occupations[i][j] contains occupation for i k-point and j band :param outcar_path: path to OUTCAR file :return: nothing """ if outcar_path == None: outcar_path = self.outcar_path if dir_to_save == None: dir_to_save = self.dir_to_save patterns = { 'nkpts': r'Found\s+(\d+)\s+irreducible\sk-points', 'weights': 'Following reciprocal coordinates:', 'efermi': 'E-fermi\s:\s+([-.\d]+)', 'kpoints': r'k-point\s+(\d+)\s:\s+[-.\d]+\s+[-.\d]+\s+[-.\d]+\n' } matches = regrep(outcar_path, patterns) self.nkpts = int(matches['nkpts'][0][0][0]) if optimization: self.efermi = [] efermi_data = np.array(matches['efermi'])[..., 0] number_of_ionic_steps = len(efermi_data) for i in range(number_of_ionic_steps): self.efermi.append(float(efermi_data[i][0])) else: self.efermi = float(matches['efermi'][0][0][0]) self.nbands = int(matches['kpoints'][1][1] - matches['kpoints'][0][1] - 3) self.eigenvalues = [] self.occupations = [] self.weights = [] with open(outcar_path) as file: lines = file.readlines() for i in range(self.nkpts): self.weights.append( float(lines[matches['weights'][0][1] + i + 2].split()[3])) if optimization: for step in range(number_of_ionic_steps): self.eigenvalues.append([]) self.occupations.append([]) for kpoint in range(self.nkpts): self.eigenvalues[step].append([]) self.occupations[step].append([]) startline = matches['kpoints'][kpoint + (step * self.nkpts)][1] + 2 for i in range(startline, startline + self.nbands): self.eigenvalues[step][kpoint].append( float(lines[i].split()[1])) self.occupations[step][kpoint].append( float(lines[i].split()[2])) else: for kpoint in range(self.nkpts): self.eigenvalues.append([]) self.occupations.append([]) startline = matches['kpoints'][kpoint][1] + 2 for i in range(startline, startline + self.nbands): self.eigenvalues[kpoint].append( float(lines[i].split()[1])) self.occupations[kpoint].append( float(lines[i].split()[2])) self.eigenvalues = np.array(self.eigenvalues) self.occupations = np.array(self.occupations) self.weights = np.array(self.weights) self.weights /= np.sum(self.weights) for var in [ 'efermi', 'nkpts', 'nbands', 'weights', 'eigenvalues', 'occupations' ]: self.save(var, dir_to_save)
def from_file(filepath): file = open(filepath, 'r') data = file.readlines() file.close() patterns = { 'nkpts': 'k-points\s+NKPTS\s+=\s+(\d+)', 'nbands': 'number of bands\s+NBANDS=\s+(\d+)', 'natoms': 'NIONS\s+=\s+(\d+)', 'weights': 'Following reciprocal coordinates:', 'efermi': 'E-fermi\s:\s+([-.\d]+)', 'energy': 'free energy\s+TOTEN\s+=\s+(.\d+\.\d+)\s+eV', 'energy_ionic': 'free energy\s+TOTEN\s+=\s+(.\d+\.\d+)\s+eV', 'kpoints': r'k-point\s+(\d+)\s:\s+[-.\d]+\s+[-.\d]+\s+[-.\d]+\n', 'forces': '\s+POSITION\s+TOTAL-FORCE', 'spin': 'spin component \d+\n' } matches = regrep(filepath, patterns) nbands = int(matches['nbands'][0][0][0]) nkpts = int(matches['nkpts'][0][0][0]) natoms = int(matches['natoms'][0][0][0]) energy_hist = ([float(i[0][0]) for i in matches['energy']]) energy_ionic_hist = ([float(i[0][0]) for i in matches['energy_ionic']]) if matches['spin'] != []: spin_restricted = True nspin = 2 else: spin_restricted = False nspin = 1 if nkpts == 1: weights = [float(data[matches['weights'][0][1] + 2].split()[3])] else: weights = np.zeros(nkpts) for i in range(nkpts): weights[i] = float(data[matches['weights'][0][1] + 2 + i].split()[3]) weights /= np.sum(weights) arr = matches['efermi'] efermi_hist = np.zeros(len(arr)) for i in range(len(arr)): efermi_hist[i] = float(arr[i][0][0]) nisteps = len(efermi_hist) eigenvalues_hist = np.zeros((nisteps, nspin, nkpts, nbands)) occupations_hist = np.zeros((nisteps, nspin, nkpts, nbands)) each_kpoint_list = np.array([[int(j[0][0]), int(j[1])] for j in matches['kpoints']]) for step in range(nisteps): for spin in range(nspin): for kpoint in range(nkpts): arr = data[each_kpoint_list[nkpts * nspin * step + nkpts * spin + kpoint, 1] + 2:each_kpoint_list[nkpts * nspin * step + nkpts * spin + kpoint, 1] + 2 + nbands] eigenvalues_hist[step, spin, kpoint] = [ float(i.split()[1]) for i in arr ] occupations_hist[step, spin, kpoint] = [ float(i.split()[2]) for i in arr ] arr = matches['forces'] forces_hist = np.zeros((nisteps, natoms, 3)) for step in range(nisteps): for atom in range(natoms): line = data[arr[step][1] + atom + 2:arr[step][1] + atom + 3] line = line[0].split() forces_hist[step, atom] = [ float(line[3]), float(line[4]), float(line[5]) ] return Outcar(nkpts, nbands, natoms, weights, nisteps, spin_restricted, efermi_hist, eigenvalues_hist, occupations_hist, energy_hist, energy_ionic_hist, forces_hist)