Python regrep 예제들, monty.re.regrep Python 예제들

예제 #1

0

파일 보기

    def test_regrep(self):
        """
        We are making sure a file containing line numbers is read in reverse
        order, i.e. the first line that is read corresponds to the last line.
        number
        """
        fname = os.path.join(test_dir, "3000_lines.txt")
        matches = regrep(fname, {
            "1": r"1(\d+)",
            "3": r"3(\d+)"
        },
                         postprocess=int)
        self.assertEqual(len(matches["1"]), 1380)
        self.assertEqual(len(matches["3"]), 571)
        self.assertEqual(matches["1"][0][0][0], 0)

        matches = regrep(fname, {
            "1": r"1(\d+)",
            "3": r"3(\d+)"
        },
                         reverse=True,
                         terminate_on_match=True,
                         postprocess=int)
        self.assertEqual(len(matches["1"]), 1)
        self.assertEqual(len(matches["3"]), 11)

예제 #2

0

파일 보기

    def read_pattern(self,
                     patterns,
                     reverse=False,
                     terminate_on_match=False,
                     postprocess=str):
        """
        General pattern reading. Uses monty's regrep method. Takes the same
        arguments.

        Args:
            patterns (dict): A dict of patterns, e.g.,
                {"energy": r"energy\\(sigma->0\\)\\s+=\\s+([\\d\\-.]+)"}.
            reverse (bool): Read files in reverse. Defaults to false. Useful for
                large files, esp OUTCARs, especially when used with
                terminate_on_match.
            terminate_on_match (bool): Whether to terminate when there is at
                least one match in each key in pattern.
            postprocess (callable): A post processing function to convert all
                matches. Defaults to str, i.e., no change.

        Renders accessible:
            Any attribute in patterns. For example,
            {"energy": r"energy\\(sigma->0\\)\\s+=\\s+([\\d\\-.]+)"} will set the
            value of self.data["energy"] = [[-1234], [-3453], ...], to the
            results from regex and postprocess. Note that the returned
            values are lists of lists, because you can grep multiple
            items on one line.
        """
        matches = regrep(self.filename,
                         patterns,
                         reverse=reverse,
                         terminate_on_match=terminate_on_match,
                         postprocess=postprocess)
        self.data.update(matches)

예제 #3

0

파일 보기

파일: pwscf.py 프로젝트: davidwaroquiers/pymatgen

    def read_pattern(self, patterns, reverse=False,
                     terminate_on_match=False, postprocess=str):
        """
        General pattern reading. Uses monty's regrep method. Takes the same
        arguments.

        Args:
            patterns (dict): A dict of patterns, e.g.,
                {"energy": r"energy\\(sigma->0\\)\\s+=\\s+([\\d\\-.]+)"}.
            reverse (bool): Read files in reverse. Defaults to false. Useful for
                large files, esp OUTCARs, especially when used with
                terminate_on_match.
            terminate_on_match (bool): Whether to terminate when there is at
                least one match in each key in pattern.
            postprocess (callable): A post processing function to convert all
                matches. Defaults to str, i.e., no change.

        Renders accessible:
            Any attribute in patterns. For example,
            {"energy": r"energy\\(sigma->0\\)\\s+=\\s+([\\d\\-.]+)"} will set the
            value of self.data["energy"] = [[-1234], [-3453], ...], to the
            results from regex and postprocess. Note that the returned
            values are lists of lists, because you can grep multiple
            items on one line.
        """
        matches = regrep(self.filename, patterns, reverse=reverse,
                         terminate_on_match=terminate_on_match,
                         postprocess=postprocess)
        self.data.update(matches)

예제 #4

0

파일 보기

파일: test_re.py 프로젝트: dwinston/monty

    def test_regrep(self):
        """
        We are making sure a file containing line numbers is read in reverse
        order, i.e. the first line that is read corresponds to the last line.
        number
        """
        fname = os.path.join(test_dir, "three_thousand_lines.txt")
        matches = regrep(fname, {"1": "1(\d+)", "3": "3(\d+)"}, postprocess=int)
        self.assertEqual(len(matches["1"]), 1380)
        self.assertEqual(len(matches["3"]), 571)
        self.assertEqual(matches["1"][0][0][0], 0)

        matches = regrep(fname, {"1": "1(\d+)", "3": "3(\d+)"}, reverse=True,
                         terminate_on_match=True, postprocess=int)
        self.assertEqual(len(matches["1"]), 1)
        self.assertEqual(len(matches["3"]), 11)

예제 #5

0

파일 보기

    def check(self):

        vi = VaspInput.from_directory(".")
        nelmdl = abs(vi["INCAR"].get("NELMDL", -5))

        loop_pattern = r"\s+LOOP:\s+cpu\stime\s+\S+:\sreal\stime\s+(\S+)"
        loop_timing = regrep(filename="OUTCAR",
                             patterns={"loop": loop_pattern})["loop"]

        if len(loop_timing) > 0:
            max_loop = np.max([float(e[0][0]) for e in loop_timing])
            if max_loop > self.max_elec_step_time:
                return True

        with open("temp.out", "w") as file:
            file.write("Number of steps: " + str(len(loop_timing)))

        if len(loop_timing) >= self.max_elec_steps + nelmdl - 1:
            return True
        else:
            return False

예제 #6

0

파일 보기

파일: qe.py 프로젝트: vitalyanich/electrochemistry

    def from_file(self, filepath):
        matches = regrep(filepath, self.patterns)

        if len(matches['kpts_coord']) != 0:
            with open(filepath, 'rt') as file:
                file_data = file.readlines()
                eigenvalues = []
                for start, end in zip(matches['kpts_coord'],
                                      matches['occupations']):
                    data = file_data[start[1] + 2:end[1] - 1]
                    data = [
                        float(i) for i in itertools.chain.from_iterable(
                            [line.split() for line in data])
                    ]
                    eigenvalues.append(data)
                self.eigenvalues = np.array(eigenvalues)

                occupations = []
                n_strings_occups = matches['occupations'][0][1] - matches[
                    'kpts_coord'][0][1] - 1
                for start in matches['occupations']:
                    data = file_data[start[1] + 1:start[1] + n_strings_occups]
                    data = [
                        float(i) for i in itertools.chain.from_iterable(
                            [line.split() for line in data])
                    ]
                    occupations.append(data)
                self.occupations = np.array(occupations)

        self.efermi = float(matches['efermi'][0][0][0])
        self.nkpt = int(matches['nkpts'][0][0][0])

        weights = np.zeros(self.nkpt)

        for i in range(self.nkpt):
            weights[i] = file_data[matches['nkpts'][0][1] + 2 + i].split()[-1]
        self.weights = weights

예제 #7

0

파일 보기

파일: preprocessing.py 프로젝트: vitalyanich/electrochemistry

    def process_OUTCAR(self,
                       outcar_path=None,
                       dir_to_save=None,
                       optimization=False):
        """
        process OUTCAR file obtained from VASP
        get following variables:
        self.nkpts - number of k-points (int)
        self.efermi - Fermi level (float)
        self.nbands - number of bands (int)
        self.eigenvalues - 2D np.array, eigenvalues[i][j] contains energy for i k-point and j band
        self.occupations - 2D np.array, occupations[i][j] contains occupation for i k-point and j band
        :param outcar_path: path to OUTCAR file
        :return: nothing
        """
        if outcar_path == None:
            outcar_path = self.outcar_path
        if dir_to_save == None:
            dir_to_save = self.dir_to_save
        patterns = {
            'nkpts': r'Found\s+(\d+)\s+irreducible\sk-points',
            'weights': 'Following reciprocal coordinates:',
            'efermi': 'E-fermi\s:\s+([-.\d]+)',
            'kpoints': r'k-point\s+(\d+)\s:\s+[-.\d]+\s+[-.\d]+\s+[-.\d]+\n'
        }
        matches = regrep(outcar_path, patterns)

        self.nkpts = int(matches['nkpts'][0][0][0])
        if optimization:
            self.efermi = []
            efermi_data = np.array(matches['efermi'])[..., 0]
            number_of_ionic_steps = len(efermi_data)
            for i in range(number_of_ionic_steps):
                self.efermi.append(float(efermi_data[i][0]))
        else:
            self.efermi = float(matches['efermi'][0][0][0])
        self.nbands = int(matches['kpoints'][1][1] - matches['kpoints'][0][1] -
                          3)
        self.eigenvalues = []
        self.occupations = []
        self.weights = []

        with open(outcar_path) as file:
            lines = file.readlines()
            for i in range(self.nkpts):
                self.weights.append(
                    float(lines[matches['weights'][0][1] + i + 2].split()[3]))
            if optimization:
                for step in range(number_of_ionic_steps):
                    self.eigenvalues.append([])
                    self.occupations.append([])
                    for kpoint in range(self.nkpts):
                        self.eigenvalues[step].append([])
                        self.occupations[step].append([])
                        startline = matches['kpoints'][kpoint +
                                                       (step *
                                                        self.nkpts)][1] + 2
                        for i in range(startline, startline + self.nbands):
                            self.eigenvalues[step][kpoint].append(
                                float(lines[i].split()[1]))
                            self.occupations[step][kpoint].append(
                                float(lines[i].split()[2]))
            else:
                for kpoint in range(self.nkpts):
                    self.eigenvalues.append([])
                    self.occupations.append([])
                    startline = matches['kpoints'][kpoint][1] + 2
                    for i in range(startline, startline + self.nbands):
                        self.eigenvalues[kpoint].append(
                            float(lines[i].split()[1]))
                        self.occupations[kpoint].append(
                            float(lines[i].split()[2]))
        self.eigenvalues = np.array(self.eigenvalues)
        self.occupations = np.array(self.occupations)
        self.weights = np.array(self.weights)
        self.weights /= np.sum(self.weights)
        for var in [
                'efermi', 'nkpts', 'nbands', 'weights', 'eigenvalues',
                'occupations'
        ]:
            self.save(var, dir_to_save)

예제 #8

0

파일 보기

    def from_file(filepath):
        file = open(filepath, 'r')
        data = file.readlines()
        file.close()

        patterns = {
            'nkpts': 'k-points\s+NKPTS\s+=\s+(\d+)',
            'nbands': 'number of bands\s+NBANDS=\s+(\d+)',
            'natoms': 'NIONS\s+=\s+(\d+)',
            'weights': 'Following reciprocal coordinates:',
            'efermi': 'E-fermi\s:\s+([-.\d]+)',
            'energy': 'free energy\s+TOTEN\s+=\s+(.\d+\.\d+)\s+eV',
            'energy_ionic': 'free  energy\s+TOTEN\s+=\s+(.\d+\.\d+)\s+eV',
            'kpoints': r'k-point\s+(\d+)\s:\s+[-.\d]+\s+[-.\d]+\s+[-.\d]+\n',
            'forces': '\s+POSITION\s+TOTAL-FORCE',
            'spin': 'spin component \d+\n'
        }
        matches = regrep(filepath, patterns)

        nbands = int(matches['nbands'][0][0][0])
        nkpts = int(matches['nkpts'][0][0][0])
        natoms = int(matches['natoms'][0][0][0])
        energy_hist = ([float(i[0][0]) for i in matches['energy']])
        energy_ionic_hist = ([float(i[0][0]) for i in matches['energy_ionic']])

        if matches['spin'] != []:
            spin_restricted = True
            nspin = 2
        else:
            spin_restricted = False
            nspin = 1

        if nkpts == 1:
            weights = [float(data[matches['weights'][0][1] + 2].split()[3])]
        else:
            weights = np.zeros(nkpts)
            for i in range(nkpts):
                weights[i] = float(data[matches['weights'][0][1] + 2 +
                                        i].split()[3])
            weights /= np.sum(weights)

        arr = matches['efermi']
        efermi_hist = np.zeros(len(arr))
        for i in range(len(arr)):
            efermi_hist[i] = float(arr[i][0][0])

        nisteps = len(efermi_hist)
        eigenvalues_hist = np.zeros((nisteps, nspin, nkpts, nbands))
        occupations_hist = np.zeros((nisteps, nspin, nkpts, nbands))

        each_kpoint_list = np.array([[int(j[0][0]), int(j[1])]
                                     for j in matches['kpoints']])
        for step in range(nisteps):
            for spin in range(nspin):
                for kpoint in range(nkpts):
                    arr = data[each_kpoint_list[nkpts * nspin * step +
                                                nkpts * spin + kpoint, 1] +
                               2:each_kpoint_list[nkpts * nspin * step +
                                                  nkpts * spin + kpoint, 1] +
                               2 + nbands]
                    eigenvalues_hist[step, spin, kpoint] = [
                        float(i.split()[1]) for i in arr
                    ]
                    occupations_hist[step, spin, kpoint] = [
                        float(i.split()[2]) for i in arr
                    ]

        arr = matches['forces']
        forces_hist = np.zeros((nisteps, natoms, 3))
        for step in range(nisteps):
            for atom in range(natoms):
                line = data[arr[step][1] + atom + 2:arr[step][1] + atom + 3]
                line = line[0].split()
                forces_hist[step, atom] = [
                    float(line[3]),
                    float(line[4]),
                    float(line[5])
                ]

        return Outcar(nkpts, nbands, natoms, weights, nisteps, spin_restricted,
                      efermi_hist, eigenvalues_hist, occupations_hist,
                      energy_hist, energy_ionic_hist, forces_hist)