예제 #1
0
    def test_dict_to_object(self):
        coords = [[0, 0, 0], [0.75, 0.5, 0.75]]
        lattice = Lattice([[3.8401979337, 0.00, 0.00],
                           [1.9200989668, 3.3257101909, 0.00],
                           [0.00, -2.2171384943, 3.1355090603]])
        struct = Structure(lattice, ["Si"] * 2, coords)
        d = {'structure_dict': [struct.as_dict(), struct.as_dict()]}
        df = DataFrame(data=d)

        df["structure"] = dict_to_object(df["structure_dict"])
        self.assertEqual(df["structure"].tolist()[0], struct)
        self.assertEqual(df["structure"].tolist()[1], struct)
예제 #2
0
 def from_entries(
     cls,
     grouped_entries: List[ComputedEntry],
     working_ion_entry: ComputedEntry,
     battery_id: str,
     host_structure: Structure,
 ) -> Union["InsertionElectrodeDoc", None]:
     try:
         ie = InsertionElectrode.from_entries(
             entries=grouped_entries,
             working_ion_entry=working_ion_entry,
             strip_structures=True,
         )
     except IndexError:
         return None
     d = ie.get_summary_dict()
     d["material_ids"] = d["stable_material_ids"] + d[
         "unstable_material_ids"]
     d["num_steps"] = d.pop("nsteps", None)
     d["last_updated"] = datetime.utcnow()
     elements = sorted(host_structure.composition.elements)
     chemsys = "-".join(sorted(map(str, elements)))
     framework = Composition(d["framework_formula"])
     return cls(battery_id=battery_id,
                host_structure=host_structure.as_dict(),
                framework=framework,
                electrode_object=ie.as_dict(),
                elements=elements,
                nelements=len(elements),
                chemsys=chemsys,
                formula_anonymous=framework.anonymized_formula,
                **d)
예제 #3
0
    def test_conversion_multiindex_dynamic(self):
        # test dynamic target_col_id setting with multiindex

        coords = [[0, 0, 0], [0.75, 0.5, 0.75]]
        lattice = Lattice([[3.8401979337, 0.00, 0.00],
                           [1.9200989668, 3.3257101909, 0.00],
                           [0.00, -2.2171384943, 3.1355090603]])
        struct = Structure(lattice, ["Si"] * 2, coords)
        d = {'structure_dict': [struct.as_dict(), struct.as_dict()]}
        df_2lvl = DataFrame(data=d)
        df_2lvl.columns = MultiIndex.from_product((["custom"],
                                                   df_2lvl.columns.values))

        dto = DictToObject()
        df_2lvl = dto.featurize_dataframe(df_2lvl, ('custom', 'structure_dict'),
                                          multiindex=True)
        new_col_id = ('DictToObject', 'structure_dict_object')
        self.assertEqual(df_2lvl[new_col_id].tolist()[0], struct)
        self.assertEqual(df_2lvl[new_col_id].tolist()[1], struct)
예제 #4
0
    def test_dict_to_object(self):
        coords = [[0, 0, 0], [0.75, 0.5, 0.75]]
        lattice = Lattice([[3.8401979337, 0.00, 0.00],
                           [1.9200989668, 3.3257101909, 0.00],
                           [0.00, -2.2171384943, 3.1355090603]])
        struct = Structure(lattice, ["Si"] * 2, coords)
        d = {'structure_dict': [struct.as_dict(), struct.as_dict()]}
        df = DataFrame(data=d)

        dto = DictToObject(target_col_id='structure')
        df = dto.featurize_dataframe(df, 'structure_dict')
        self.assertEqual(df["structure"].tolist()[0], struct)
        self.assertEqual(df["structure"].tolist()[1], struct)

        # test dynamic target_col_id setting
        df = DataFrame(data=d)
        dto = DictToObject()
        df = dto.featurize_dataframe(df, 'structure_dict')
        self.assertEqual(df["structure_dict_object"].tolist()[0], struct)
        self.assertEqual(df["structure_dict_object"].tolist()[1], struct)
예제 #5
0
    def read_cfgs(self, filename="output.data"):
        """
        Read the configuration file.

        Args:
            filename (str): The configuration file to be read.
        """
        data_pool = []
        with zopen(filename, "rt") as f:
            lines = f.read()

        block_pattern = re.compile("begin\n(.*?)end", re.S)
        lattice_pattern = re.compile("lattice(.*?)\n")
        position_pattern = re.compile("atom(.*?)\n")
        energy_pattern = re.compile("energy(.*?)\n")

        for block in block_pattern.findall(lines):
            d = {"outputs": {}}
            lattice_str = lattice_pattern.findall(block)
            lattice = Lattice(
                np.array([latt.split() for latt in lattice_str],
                         dtype=np.float64) * self.bohr_to_angstrom)
            position_str = position_pattern.findall(block)
            positions = pd.DataFrame([pos.split() for pos in position_str])
            positions.columns = [
                "x", "y", "z", "specie", "charge", "atomic_energy", "fx", "fy",
                "fz"
            ]
            coords = np.array(positions.loc[:, ["x", "y", "z"]],
                              dtype=np.float64)
            coords = coords * self.bohr_to_angstrom
            species = np.array(positions["specie"])
            forces = np.array(positions.loc[:, ["fx", "fy", "fz"]],
                              dtype=np.float64)
            forces = forces / self.eV_to_Ha / self.bohr_to_angstrom
            energy_str = energy_pattern.findall(block)[0]
            energy = float(energy_str.lstrip()) / self.eV_to_Ha
            struct = Structure(lattice=lattice,
                               species=species,
                               coords=coords,
                               coords_are_cartesian=True)
            d["structure"] = struct.as_dict()
            d["outputs"]["energy"] = energy
            d["outputs"]["forces"] = forces
            d["num_atoms"] = len(struct)

            data_pool.append(d)
        _, df = convert_docs(docs=data_pool)
        return data_pool, df
예제 #6
0
파일: _nnp.py 프로젝트: zhenming-xu/maml
    def read_cfgs(self, filename='output.data'):
        """
        Read the configuration file.

        Args:
            filename (str): The configuration file to be read.
        """
        data_pool = []
        with zopen(filename, 'rt') as f:
            lines = f.read()

        block_pattern = re.compile('begin\n(.*?)end', re.S)
        lattice_pattern = re.compile('lattice(.*?)\n')
        position_pattern = re.compile('atom(.*?)\n')
        energy_pattern = re.compile('energy(.*?)\n')

        for block in block_pattern.findall(lines):
            d = {'outputs': {}}
            lattice_str = lattice_pattern.findall(block)
            lattice = Lattice(
                np.array([latt.split() for latt in lattice_str],
                         dtype=np.float) * self.bohr_to_angstrom)
            position_str = position_pattern.findall(block)
            positions = pd.DataFrame([pos.split() for pos in position_str])
            positions.columns = \
                ['x', 'y', 'z', 'specie', 'charge', 'atomic_energy', 'fx', 'fy', 'fz']
            coords = np.array(positions.loc[:, ['x', 'y', 'z']],
                              dtype=np.float)
            coords = coords * self.bohr_to_angstrom
            species = np.array(positions['specie'])
            forces = np.array(positions.loc[:, ['fx', 'fy', 'fz']],
                              dtype=np.float)
            forces = forces / self.eV_to_Ha / self.bohr_to_angstrom
            energy_str = energy_pattern.findall(block)[0]
            energy = float(energy_str.lstrip()) / self.eV_to_Ha
            struct = Structure(lattice=lattice,
                               species=species,
                               coords=coords,
                               coords_are_cartesian=True)
            d['structure'] = struct.as_dict()
            d['outputs']['energy'] = energy
            d['outputs']['forces'] = forces
            d['num_atoms'] = len(struct)

            data_pool.append(d)
        _, df = convert_docs(docs=data_pool)
        return data_pool, df
예제 #7
0
def find_inequivalent_coords(structure: Structure,
                             df: DataFrame) -> List[CoordInfo]:
    result = []
    initial_sg = StructureSymmetrizer(structure).sg_number
    added_structure = Structure.from_dict(structure.as_dict())
    start_index = len(structure)
    for _, column in df.iterrows():
        coords = [column.a, column.b, column.c]
        assert Element.Og not in structure.composition.elements
        added_structure.append(Element.Og, coords)
    end_index = len(added_structure)

    symmetrizer = StructureSymmetrizer(added_structure)

    if initial_sg != symmetrizer.sg_number:
        logger.warning("The symmetry has changed, meaning all the symmetry "
                       "equivalent sites do not exist.")

    sym_data = symmetrizer.spglib_sym_data
    _indices = [i for i in range(start_index, end_index)]
    repr_atom_pairs = zip(sym_data["equivalent_atoms"][_indices], _indices)

    key = lambda x: x[0]
    for _, equiv_sites in groupby(sorted(repr_atom_pairs, key=key), key=key):
        coords, quantity = [], []
        for repr_idx, atom_idx in equiv_sites:
            fcoord = added_structure[atom_idx].frac_coords
            coords.append(tuple(fcoord))
            key = "ave_value" if "ave_value" in df else "value"
            quantity.append(df[key][atom_idx - start_index])
            if repr_idx == atom_idx:
                site_sym = sym_data["site_symmetry_symbols"][atom_idx]
                distances = Distances(structure, fcoord)
                coordination = distances.coordination()

        coord_info = CoordInfo(site_symmetry=remove_dot(site_sym),
                               coordination=coordination,
                               frac_coords=coords,
                               quantities=quantity)
        result.append(coord_info)
    return result
예제 #8
0
    def read_cfgs(filename, predict=False):
        """
        Read the configuration file.

        Args:
            filename (str): The configuration file to be read.
        """
        type_convert = {'R': np.float32, 'I': np.int, 'S': np.str}
        data_pool = []
        with zopen(filename, 'rt') as f:
            lines = f.read()
        repl = re.compile('AT ')
        lines = repl.sub('', string=lines)

        block_pattern = re.compile(
            r'(\n[0-9]+\n|^[0-9]+\n)(.+?)(?=\n[0-9]+\n|$)', re.S)
        lattice_pattern = re.compile(r'Lattice="(.+)"')
        # energy_pattern = re.compile('dft_energy=(-?[0-9]+.[0-9]+)', re.I)
        energy_pattern = re.compile(
            r'(?<=\S{3}\s|dft_)energy=(-?[0-9]+.[0-9]+)')
        # stress_pattern = re.compile('dft_virial={(.+)}')
        stress_pattern = re.compile(r'dft_virial=({|)(.+?)(}|) \S.*')
        properties_pattern = re.compile(r'properties=(\S+)', re.I)
        # position_pattern = re.compile('\n(.+)', re.S)
        position_pattern = re.compile('\n(.+?)(?=\nE.*|\n\n.*|$)', re.S)
        # formatify = lambda string: [float(s) for s in string.split()]

        for (size, block) in block_pattern.findall(lines):
            d = {'outputs': {}}
            size = int(size)
            lattice_str = lattice_pattern.findall(block)[0]
            lattice = Lattice(
                list(map(lambda s: float(s), lattice_str.split())))
            energy_str = energy_pattern.findall(block)[-1]
            energy = float(energy_str)
            stress_str = stress_pattern.findall(block)[0][1]
            virial_stress = np.array(
                list(map(lambda s: float(s), stress_str.split())))
            virial_stress = [virial_stress[i] for i in [0, 4, 8, 1, 5, 6]]
            properties = properties_pattern.findall(block)[0].split(":")
            labels_columns = OrderedDict()
            labels = defaultdict()
            for i in range(0, len(properties), 3):
                labels_columns[properties[i]] = [
                    int(properties[i + 2]), properties[i + 1]
                ]
            position_str = position_pattern.findall(block)[0].split('\n')
            position = np.array([p.split() for p in position_str])
            column_index = 0
            for key in labels_columns:
                num_columns, dtype = labels_columns[key]
                labels[key] = position[:, column_index:column_index +
                                       num_columns].astype(type_convert[dtype])
                column_index += num_columns
            struct = Structure(lattice=lattice,
                               species=labels['species'].ravel(),
                               coords=labels['pos'],
                               coords_are_cartesian=True)
            if predict:
                forces = labels['force']
            else:
                forces = labels['dft_force']
            d['structure'] = struct.as_dict()
            d['outputs']['energy'] = energy
            assert size == struct.num_sites
            d['num_atoms'] = size
            d['outputs']['forces'] = forces
            d['outputs']['virial_stress'] = virial_stress

            data_pool.append(d)
        _, df = convert_docs(docs=data_pool)
        return data_pool, df
예제 #9
0
파일: _mtp.py 프로젝트: zhenming-xu/maml
    def read_cfgs(self, filename):
        """

        Args:
            filename (str): The configuration file to be read.

        """
        def formatify(string):
            return [float(s) for s in string.split()]

        if not self.elements:
            raise ValueError("No species given.")

        data_pool = []
        with zopen(filename, 'rt') as f:
            lines = f.read()

        block_pattern = re.compile('BEGIN_CFG\n(.*?)\nEND_CFG', re.S)
        size_pattern = re.compile('Size\n(.*?)\n SuperCell', re.S | re.I)
        lattice_pattern = re.compile('SuperCell\n(.*?)\n AtomData',
                                     re.S | re.I)
        position_pattern = re.compile('fz\n(.*?)\n Energy', re.S)
        energy_pattern = re.compile('Energy\n(.*?)\n (?=PlusStress|Stress)',
                                    re.S)
        stress_pattern = re.compile('xy\n(.*?)(?=\n|$)', re.S)
        for block in block_pattern.findall(lines):
            d = {'outputs': {}}
            size_str = size_pattern.findall(block)[0]
            size = int(size_str.lstrip())
            lattice_str = lattice_pattern.findall(block)[0]
            lattice = Lattice(
                np.array(list(map(formatify, lattice_str.split('\n')))))
            position_str = position_pattern.findall(block)[0]
            position = np.array(list(map(formatify, position_str.split('\n'))))
            species = np.array(self.elements)[position[:, 1].astype(np.int)]
            forces = position[:, 5:8].tolist()
            position = position[:, 2:5]
            energy_str = energy_pattern.findall(block)[0]
            energy = float(energy_str.lstrip())
            stress_str = stress_pattern.findall(block)[0]
            virial_stress = np.array(list(map(formatify,
                                              stress_str.split()))).reshape(
                                                  6, ).tolist()
            virial_stress = [
                virial_stress[self.mtp_stress_order.index(n)]
                for n in self.vasp_stress_order
            ]
            struct = Structure(lattice=lattice,
                               species=species,
                               coords=position,
                               coords_are_cartesian=True)
            d['structure'] = struct.as_dict()
            d['outputs']['energy'] = energy
            assert size == struct.num_sites
            d['num_atoms'] = size
            d['outputs']['forces'] = forces
            d['outputs']['virial_stress'] = virial_stress

            data_pool.append(d)
        _, df = convert_docs(docs=data_pool)
        return data_pool, df
예제 #10
0
def to_istructure(structure: Structure) -> IStructure:
    return IStructure.from_dict(structure.as_dict())