def print_high_Tc(X_test, y_pred):
    path_w = 'test1_tc.txt'
    with open(path_w, mode='w') as f:
        for i in range(len(X_test)):
            if (y_pred[i] > 100):
                satom0 = Element.from_Z(int(X_test[i][0])).symbol.lstrip()
                satom1 = Element.from_Z(int(X_test[i][1])).symbol.lstrip()
                natom0 = int(X_test[i][2])
                natom1 = int(X_test[i][3])
                p = int(X_test[i][4])
                tc = int(y_pred[i])
                f.write('{:>2}{}{}{} P = {:>3} GPa Tc = {} K \n'.format(
                    satom0, natom0, satom1, natom1, p, tc))
    print('Predicted Tc is written in file {}'.format(path_w))
Beispiel #2
0
 def fp_oganov(self, delta=0.01, sigma=0.01):
     struc_dist_x, struc_dist = self.structure_distances(delta=delta,
                                                         sigma=sigma)
     fp_oganov = struc_dist.copy()
     vol = self.structure.volume
     for spec_pair in struc_dist:
         for i in range(len(struc_dist[spec_pair])):
             specie0 = Element.from_Z(spec_pair[0])
             specie1 = Element.from_Z(spec_pair[1])
             number_atoms0 = self.structure.composition[specie0]
             number_atoms1 = self.structure.composition[specie1]
             fp_oganov[spec_pair][i] *= vol / (delta * number_atoms0 *
                                               number_atoms1)
             fp_oganov[spec_pair][i] -= 1
     return struc_dist_x, fp_oganov
Beispiel #3
0
    def get_atom_feature(
            self,
            mol,  # type: ignore
            atom) -> Dict:  # type: ignore
        """
        Generate all features of a particular atom

        Args:
            mol (pybel.Molecule): Molecule being evaluated
            atom (pybel.Atom): Specific atom being evaluated
        Return:
            (dict): All features for that atom
        """

        # Get the link to the OpenBabel representation of the atom
        obatom = atom.OBAtom
        atom_idx = atom.idx - 1  # (pybel atoms indices start from 1)

        # Get the element
        element = Element.from_Z(obatom.GetAtomicNum()).symbol

        # Get the fast-to-compute properties
        output = {
            "element":
            element,
            "atomic_num":
            obatom.GetAtomicNum(),
            "formal_charge":
            obatom.GetFormalCharge(),
            "hybridization":
            6 if element == 'H' else obatom.GetHyb(),
            "acceptor":
            obatom.IsHbondAcceptor(),
            "donor":
            obatom.IsHbondDonorH()
            if atom.type == 'H' else obatom.IsHbondDonor(),
            "aromatic":
            obatom.IsAromatic(),
            "coordid":
            atom.coordidx
        }

        # Get the chirality, if desired
        if 'chirality' in self.atom_features:
            # Determine whether the molecule has chiral centers
            chiral_cc = self._get_chiral_centers(mol)
            if atom_idx not in chiral_cc:
                output['chirality'] = 0
            else:
                # 1 --> 'R', 2 --> 'S'
                output['chirality'] = 1 if chiral_cc[atom_idx] == 'R' else 2

        # Find the rings, if desired
        if 'ring_sizes' in self.atom_features:
            rings = mol.OBMol.GetSSSR(
            )  # OpenBabel caches ring computation internally, no need to cache ourselves
            output['ring_sizes'] = [
                r.Size() for r in rings if r.IsInRing(atom.idx)
            ]
        return output
Beispiel #4
0
    def __init__(self):
        self.all_elemental_props = dict()
        available_props = []
        self.data_dir = os.path.join(module_dir, "data_files",
                                     'magpie_elementdata')

        # Make a list of available properties
        for datafile in glob(os.path.join(self.data_dir, "*.table")):
            available_props.append(
                os.path.basename(datafile).replace('.table', ''))

        # parse and store elemental properties
        for descriptor_name in available_props:
            with open(
                    os.path.join(self.data_dir,
                                 '{}.table'.format(descriptor_name)),
                    'r') as f:
                self.all_elemental_props[descriptor_name] = dict()
                lines = f.readlines()
                for atomic_no in range(1, len(_pt_data) + 1):  # max Z=103
                    try:
                        if descriptor_name in ["OxidationStates"]:
                            prop_value = [
                                float(i) for i in lines[atomic_no - 1].split()
                            ]
                        else:
                            prop_value = float(lines[atomic_no - 1])
                    except ValueError:
                        prop_value = float("NaN")
                    self.all_elemental_props[descriptor_name][Element.from_Z(
                        atomic_no).symbol] = prop_value
Beispiel #5
0
    def __init__(self):
        self.all_elemental_props = dict()
        available_props = []
        self.data_dir = os.path.join(module_dir, "data_files",
                                     'magpie_elementdata')

        # Make a list of available properties
        for datafile in glob(os.path.join(self.data_dir, "*.table")):
            available_props.append(
                os.path.basename(datafile).replace('.table', ''))

        # parse and store elemental properties
        for descriptor_name in available_props:
            with open(os.path.join(self.data_dir,
                                   '{}.table'.format(descriptor_name)),
                      'r') as f:
                self.all_elemental_props[descriptor_name] = dict()
                lines = f.readlines()
                for atomic_no in range(1, len(_pt_data) + 1):  # max Z=103
                    try:
                        if descriptor_name in ["OxidationStates"]:
                            prop_value = [float(i) for i in
                                          lines[atomic_no - 1].split()]
                        else:
                            prop_value = float(lines[atomic_no - 1])
                    except ValueError:
                        prop_value = float("NaN")
                    self.all_elemental_props[descriptor_name][
                        Element.from_Z(atomic_no).symbol] = prop_value
Beispiel #6
0
def print_high_Tc(X_test, y_pred):
    element = Element("H")
    for i in range(len(X_test)):
        if (y_pred[i] > 150):
            atom0 = element.from_Z(int(X_test[i][0])).symbol
            atom1 = element.from_Z(int(X_test[i][1])).symbol
            print('%2s%.1i%1s%.1i P = %.3i GPa Tc = %.3i K' \
            % (atom0,X_test[i][2],atom1,X_test[i][3],int(X_test[i][4]),y_pred[i]))
Beispiel #7
0
def readLammps(desired_return):
    from pymatgen.io.lammps.outputs import parse_lammps_dumps, parse_lammps_log
    from pymatgen import Structure, Element
    from pymatgen.analysis.elasticity.stress import Stress
    from numpy import unique, array, argmin
    try:
        log = parse_lammps_log(filename="log.lammps")[-1]
    except IndexError:
        return_dict = {}
        for ret in desired_return:
            return_dict[ret] = None
        return return_dict
    result_dict = {}
    result_dict["energies"] = list(log['PotEng'])[-1]

    for dump in parse_lammps_dumps("dump.atoms"):
        atoms = dump.data
        coords = [''] * dump.natoms
        forces = [''] * dump.natoms
        masses = [''] * dump.natoms
        for atom in range(dump.natoms):
            coords[atoms["id"][atom] -
                   1] = [atoms["x"][atom], atoms["y"][atom], atoms["z"][atom]]
            forces[atoms['id'][atom] - 1] = [
                atoms["fx"][atom], atoms["fy"][atom], atoms["fz"][atom]
            ]
            masses[atoms['id'][atom] - 1] = atoms["mass"][atom]

        box = dump.box

    unique_masses = unique(masses)
    ref_masses = [el.atomic_mass.real for el in Element]
    diff = abs(array(ref_masses) - unique_masses[:, None])
    atomic_numbers = argmin(diff, axis=1) + 1
    symbols = [Element.from_Z(an).symbol for an in atomic_numbers]
    species_map = {}
    for i in range(len(unique_masses)):
        species_map[unique_masses[i]] = symbols[i]
    atom_species = [species_map[mass] for mass in masses]

    result_dict["structures"] = Structure(box.to_lattice(),
                                          atom_species,
                                          coords,
                                          coords_are_cartesian=True)
    result_dict["forces"] = forces

    pressure = [
        1e-1 * list(log['c_press[{}]'.format(i)])[-1] for i in range(1, 7)
    ]
    result_dict["stresses"] = Stress([[pressure[0], pressure[3], pressure[4]],
                                      [pressure[3], pressure[1], pressure[5]],
                                      [pressure[4], pressure[5], pressure[2]]])
    return_dict = {}
    for ret in desired_return:
        return_dict[ret] = result_dict[ret]
    return return_dict
Beispiel #8
0
def cell_to_structure(
        cell: Tuple[List[List[float]], List[List[float]],
                    List[int]]) -> Structure:
    """
    cell: (Lattice parameters
           [[a_x, a_y, a_z], [b_x, b_y, b_z], [c_x, c_y, c_z]],
          Fractional atomic coordinates in an Nx3 array,
          Z numbers of species in a length N array)
    """
    species = [Element.from_Z(i) for i in cell[2]]
    return Structure(lattice=cell[0], coords=cell[1], species=species)
Beispiel #9
0
 def find_seekpath_data(self) -> None:
     """Get full information of seekpath band path. """
     self._seekpath_data = \
         seekpath.get_explicit_k_path(structure=self.cell,
                                      symprec=self.symprec,
                                      angle_tolerance=self.angle_tolerance,
                                      with_time_reversal=self.time_reversal,
                                      reference_distance=self.ref_distance)
     lattice = self._seekpath_data["primitive_lattice"]
     element_types = self._seekpath_data["primitive_types"]
     species = [Element.from_Z(i) for i in element_types]
     positions = self._seekpath_data["primitive_positions"]
     self._band_primitive = Structure(lattice, species, positions)
Beispiel #10
0
 def get_mol_species(mol):
     """
     Returns a list of element symbols in a Molecule
     """
     species = []
     num_atoms = mol.NumAtoms()
     # element_table = ob.OBElementTable()
     for i in range(1, num_atoms + 1):
         a = mol.GetAtom(i)
         atomic_num = a.GetAtomicNum()
         # symbol = element_table.GetSymbol(atomic_num)
         symbol = Element.from_Z(atomic_num).symbol
         species.append(symbol)
     return species
Beispiel #11
0
 def __init__(self):
     dfile = os.path.join(module_dir,
                          "data_files/megnet_elemental_embedding.json")
     self._dummy = "Dummy"
     with open(dfile, "r") as fp:
         embeddings = json.load(fp)
     self.prop_names = ["embedding {}".format(i) for i in range(1, 17)]
     self.all_element_data = {}
     for i in range(95):
         embedding_dict = dict(zip(self.prop_names, embeddings[i]))
         if i == 0:
             self.all_element_data[self._dummy] = embedding_dict
         else:
             self.all_element_data[str(Element.from_Z(i))] = embedding_dict
Beispiel #12
0
 def __init__(self):
     dfile = os.path.join(module_dir,
                          "data_files/megnet_elemental_embedding.json")
     self._dummy = "Dummy"
     with open(dfile, "r") as fp:
         embeddings = json.load(fp)
     self.prop_names = ["embedding {}".format(i) for i in range(1, 17)]
     self.all_element_data = {}
     for i in range(95):
         embedding_dict = dict(zip(self.prop_names, embeddings[i]))
         if i == 0:
             self.all_element_data[self._dummy] = embedding_dict
         else:
             self.all_element_data[str(Element.from_Z(i))] = embedding_dict
Beispiel #13
0
 def _parse(self):
     """
     parse and store all elemental properties once and for all.
     """
     for descriptor_name in self.available_props:
         with open(os.path.join(self.data_dir, '{}.table'.format(descriptor_name)), 'r') as f:
             lines = f.readlines()
             for atomic_no in range(1, len(_pt_data)+1):  # max Z=103
                 try:
                     if descriptor_name in ["OxidationStates"]:
                         prop_value = [float(i) for i in lines[atomic_no - 1].split()]
                     else:
                         prop_value = float(lines[atomic_no - 1])
                 except ValueError:
                     prop_value = float("NaN")
                 self.all_elemental_props[descriptor_name][str(Element.from_Z(atomic_no))] = prop_value
Beispiel #14
0
 def get_radius(self, mol):
     '''
     Return a list of scaled radii for each atom in the Molecule,
     in au.
     '''
     radius = []
     ref_radius = CovalentRadius.radius
     num_atoms = mol.NumAtoms()
     # element_table = ob.OBElementTable()
     for i in range(1, num_atoms + 1):
         a = mol.GetAtom(i)
         atomic_num = a.GetAtomicNum()
         # symbol = element_table.GetSymbol(atomic_num)
         symbol = Element.from_Z(atomic_num).symbol
         if symbol in self.metals:
             scale = self.metal_radius_scale
         else:
             scale = self.covalent_radius_scale
         rad = ref_radius[symbol] * self.angstrom2au * scale
         radius.append(rad)
     return radius
Beispiel #15
0
def insert_elements(coll):
    print("adding missing elements.")
    for z in range(1, 19):
        el = Element.from_Z(z)
        r = coll.find(filter={"formula": "{}1".format(el.symbol)})
        if r.count() == 0:
            try:
                clean_mol = Molecule([el], [[0, 0, 0]])
                xyz = XYZ(clean_mol)
                bb = BabelMolAdaptor.from_string(str(xyz), "xyz")
                pbmol = pb.Molecule(bb.openbabel_mol)
                smiles = pbmol.write("smi").split()[0]
                can = pbmol.write("can").split()[0]
                inchi = pbmol.write("inchi")
                svg = pbmol.write("svg")
                d = {"molecule": clean_mol.as_dict()}
                comp = clean_mol.composition
                d["pretty_formula"] = comp.reduced_formula
                d["formula"] = comp.formula
                d["composition"] = comp.as_dict()
                d["elements"] = list(comp.as_dict().keys())
                d["nelements"] = len(comp)
                d["charge"] = 0
                d["spin_multiplicity"] = clean_mol.spin_multiplicity
                d["smiles"] = smiles
                d["can"] = can
                d["inchi"] = inchi
                # d["names"] = get_nih_names(smiles)
                d["svg"] = svg
                d["xyz"] = str(xyz)
                d["tags"] = ["G305 test set"]
                coll.insert(d)
            except Exception as ex:
                print("Error in {}".format(el))
        elif r.count() > 1:
            print("More than 1 {} found. Removing...".format(el))
            results = list(r)
            for r in results[1:]:
                print(r["_id"])
                coll.remove({"_id": r["_id"]})
Beispiel #16
0
def get_el(obj: Union[Element, Specie, str, int]) -> str:
    """Utility method to get an element str from a symbol, Element, or Specie.

    Args:
        obj: An arbitrary object. Spported objects are Element/Specie objects,
            integers (representing atomic numbers), or strings (element
            symbols or species strings).

    Returns:
        The element as a string.
    """
    if isinstance(obj, str):
        obj = get_el_sp(obj)

    if isinstance(obj, Element):
        return obj.name
    elif isinstance(obj, Specie):
        return obj.element.name
    elif isinstance(obj, int):
        return Element.from_Z(obj).name
    else:
        raise ValueError("Unsupported element type: {}.".format(type(obj)))
Beispiel #17
0
    def __init__(self, period_tag=False, flatten=True):
        """Initialize the featurizer

        Args:
            period_tag (bool): In the original OFM, an element is represented
                    by a vector of length 32, where each element is 1 or 0,
                    which represents the valence subshell of the element.
                    With period_tag=True, the vector size is increased
                    to 39, where the 7 extra elements represent the period
                    of the element. Note lanthanides are treated as period 6,
                    actinides as period 7. Default False as in the original paper.
        """
        my_ohvs = {}
        if period_tag:
            self.size = 39
        else:
            self.size = 32
        for Z in range(1, 95):
            el = Element.from_Z(Z)
            my_ohvs[Z] = self.get_ohv(el, period_tag)
            my_ohvs[Z] = np.matrix(my_ohvs[Z])
        self.ohvs = my_ohvs
        self.flatten = flatten
Beispiel #18
0
    def _GetSiteEnvironments(cls,
                             coord,
                             cell,
                             SiteTypes,
                             cutoff,
                             pbc,
                             get_permutations=True,
                             eigen_tol=1e-5):
        """Extract local environments from primitive cell
        
        Parameters
        ----------
        coord : n x 3 list or numpy array of scaled positions. n is the number 
            of atom.
        cell : 3 x 3 list or numpy array
        SiteTypes : n list of string. String must be S or A followed by a 
            number. S indicates a spectator sites and A indicates a active 
            sites.
        cutoff : float. cutoff distance in angstrom for collecting local
            environment.
        pbc : list of boolean. Periodic boundary condition
        get_permutations : boolean. Whether to find permutatated neighbor list or not.
        eigen_tol : tolerance for eigenanalysis of point group analysis in
            pymatgen.
        
        Returns
        ------
        list of local_env : list of local_env class
        """
        #%% Check error
        assert isinstance(coord, (list, np.ndarray))
        assert isinstance(cell, (list, np.ndarray))
        assert len(coord) == len(SiteTypes)
        #%% Initialize
        # TODO: Technically, user doesn't even have to supply site index, because
        #       pymatgen can be used to automatically categorize sites..
        coord = np.mod(coord, 1)
        pbc = np.array(pbc)
        #%% Map sites to other elements..
        # TODO: Available pymatgne functions are very limited when DummySpecie is
        #       involved. This may be perhaps fixed in the future. Until then, we
        #       simply bypass this by mapping site to an element
        # Find available atomic number to map site to it
        availableAN = [i + 1 for i in reversed(range(0, 118))]

        # Organize Symbols and record mapping
        symbols = []
        site_idxs = []
        SiteSymMap = {}  # mapping
        SymSiteMap = {}
        for i, SiteType in enumerate(SiteTypes):
            if SiteType not in SiteSymMap:
                symbol = Element.from_Z(availableAN.pop())
                SiteSymMap[SiteType] = symbol
                SymSiteMap[symbol] = SiteType

            else:
                symbol = SiteSymMap[SiteType]
            symbols.append(symbol)
            if 'A' in SiteType:
                site_idxs.append(i)
        #%% Get local environments of each site
        # Find neighbors and permutations using pymatgen
        lattice = Lattice(cell)
        structure = Structure(lattice, symbols, coord)
        neighbors = structure.get_all_neighbors(cutoff, include_index=True)
        site_envs = []
        for site_idx in site_idxs:
            local_env_sym = [symbols[site_idx]]
            local_env_xyz = [structure[site_idx].coords]
            local_env_dist = [0.0]
            local_env_sitemap = [site_idx]
            for n in neighbors[site_idx]:
                # if PBC condition is fulfilled..
                c = np.around(n[0].frac_coords, 10)
                withinPBC = np.logical_and(0 <= c, c < 1)
                if np.all(withinPBC[~pbc]):
                    local_env_xyz.append(n[0].coords)
                    local_env_sym.append(n[0].specie)
                    local_env_dist.append(n[1])
                    local_env_sitemap.append(n[2])
            local_env_xyz = np.subtract(local_env_xyz,
                                        np.mean(local_env_xyz, 0))

            perm = []
            if get_permutations:
                finder = PointGroupAnalyzer(Molecule(local_env_sym,
                                                     local_env_xyz),
                                            eigen_tolerance=eigen_tol)
                pg = finder.get_pointgroup()
                for i, op in enumerate(pg):
                    newpos = op.operate_multi(local_env_xyz)
                    perm.append(
                        np.argmin(cdist(local_env_xyz, newpos),
                                  axis=1).tolist())

            site_env = {
                'pos': local_env_xyz,
                'sitetypes': [SymSiteMap[s] for s in local_env_sym],
                'env2config': local_env_sitemap,
                'permutations': perm,
                'dist': local_env_dist
            }
            site_envs.append(site_env)
        return site_envs
Beispiel #19
0
print("The MAE of the linear ridge regression band gap model using the naive feature set is: "\
	+ str(round(abs(mean(scores)), 3)) + " eV")

##############################################################################################################

# Let's see which features are most important for the linear model

print("Below are the fitted linear ridge regression coefficients for each feature (i.e., element) in our naive feature set")

linear.fit(naiveFeatures, bandgaps) # fit to the whole data set; we're not doing CV here

print("element: coefficient")

for i in range(MAX_Z):
       element = Element.from_Z(i + 1)
       print(element.symbol + ': ' + str(linear.coef_[i]))

##############################################################################################################

# Create alternative feature set that is more physically-motivated

physicalFeatures = []

for material in materials:
       theseFeatures = []
       fraction = []
       atomicNo = []
       eneg = []
       group = []
Beispiel #20
0
    def disassemble(self, atom_labels=None, guess_element=True,
                    ff_label="ff_map"):
        """
        Breaks down LammpsData to building blocks
        (LammpsBox, ForceField and a series of Topology).
        RESTRICTIONS APPLIED:

        1. No complex force field defined not just on atom
            types, where the same type or equivalent types of topology
            may have more than one set of coefficients.
        2. No intermolecular topologies (with atoms from different
            molecule-ID) since a Topology object includes data for ONE
            molecule or structure only.

        Args:
            atom_labels ([str]): List of strings (must be different
                from one another) for labelling each atom type found in
                Masses section. Default to None, where the labels are
                automaticaly added based on either element guess or
                dummy specie assignment.
            guess_element (bool): Whether to guess the element based on
                its atomic mass. Default to True, otherwise dummy
                species "Qa", "Qb", ... will be assigned to various
                atom types. The guessed or assigned elements will be
                reflected on atom labels if atom_labels is None, as
                well as on the species of molecule in each Topology.
            ff_label (str): Site property key for labeling atoms of
                different types. Default to "ff_map".

        Returns:
            LammpsBox, ForceField, [Topology]

        """
        atoms_df = self.atoms.copy()
        if "nx" in atoms_df.columns:
            atoms_df[["x", "y", "z"]] += \
                self.box.get_box_shift(atoms_df[["nx", "ny", "nz"]].values)
        atoms_df = pd.concat([atoms_df, self.velocities], axis=1)

        mids = atoms_df.get("molecule-ID")
        if mids is None:
            unique_mids = [1]
            data_by_mols = {1: {"Atoms": atoms_df}}
        else:
            unique_mids = np.unique(mids)
            data_by_mols = {}
            for k in unique_mids:
                df = atoms_df[atoms_df["molecule-ID"] == k]
                data_by_mols[k] = {"Atoms": df}

        masses = self.masses.copy()
        masses["label"] = atom_labels
        unique_masses = np.unique(masses["mass"])
        if guess_element:
            ref_masses = [el.atomic_mass.real for el in Element]
            diff = np.abs(np.array(ref_masses) - unique_masses[:, None])
            atomic_numbers = np.argmin(diff, axis=1) + 1
            symbols = [Element.from_Z(an).symbol for an in atomic_numbers]
        else:
            symbols = ["Q%s" % a for a in
                       map(chr, range(97, 97 + len(unique_masses)))]
        for um, s in zip(unique_masses, symbols):
            masses.loc[masses["mass"] == um, "element"] = s
        if atom_labels is None:  # add unique labels based on elements
            for el, vc in masses["element"].value_counts().iteritems():
                masses.loc[masses["element"] == el, "label"] = \
                    ["%s%d" % (el, c) for c in range(1, vc + 1)]
        assert masses["label"].nunique(dropna=False) == len(masses), \
            "Expecting unique atom label for each type"
        mass_info = [tuple([r["label"], r["mass"]])
                     for _, r in masses.iterrows()]

        nonbond_coeffs, topo_coeffs = None, None
        if self.force_field:
            if "PairIJ Coeffs" in self.force_field:
                nbc = self.force_field["PairIJ Coeffs"]
                nbc = nbc.sort_values(["id1", "id2"]).drop(["id1", "id2"], axis=1)
                nonbond_coeffs = [list(t) for t in nbc.itertuples(False, None)]
            elif "Pair Coeffs" in self.force_field:
                nbc = self.force_field["Pair Coeffs"].sort_index()
                nonbond_coeffs = [list(t) for t in nbc.itertuples(False, None)]

            topo_coeffs = {k: [] for k in SECTION_KEYWORDS["ff"][2:]
                           if k in self.force_field}
            for kw in topo_coeffs.keys():
                class2_coeffs = {k: list(v.itertuples(False, None))
                                 for k, v in self.force_field.items()
                                 if k in CLASS2_KEYWORDS.get(kw, [])}
                ff_df = self.force_field[kw]
                for t in ff_df.itertuples(True, None):
                    d = {"coeffs": list(t[1:]), "types": []}
                    if class2_coeffs:
                        d.update({k: list(v[t[0] - 1])
                                  for k, v in class2_coeffs.items()})
                    topo_coeffs[kw].append(d)

        if self.topology:
            label_topo = lambda t: tuple(masses.loc[atoms_df.loc[t, "type"],
                                                    "label"])
            for k, v in self.topology.items():
                ff_kw = k[:-1] + " Coeffs"
                for topo in v.itertuples(False, None):
                    topo_idx = topo[0] - 1
                    indices = topo[1:]
                    mids = atoms_df.loc[indices, "molecule-ID"].unique()
                    assert len(mids) == 1, \
                        "Do not support intermolecular topology formed " \
                        "by atoms with different molecule-IDs"
                    label = label_topo(indices)
                    topo_coeffs[ff_kw][topo_idx]["types"].append(label)
                    if data_by_mols[mids[0]].get(k):
                        data_by_mols[mids[0]][k].append(indices)
                    else:
                        data_by_mols[mids[0]][k] = [indices]

        if topo_coeffs:
            for v in topo_coeffs.values():
                for d in v:
                    d["types"] = list(set(d["types"]))

        ff = ForceField(mass_info=mass_info, nonbond_coeffs=nonbond_coeffs,
                        topo_coeffs=topo_coeffs)

        topo_list = []
        for mid in unique_mids:
            data = data_by_mols[mid]
            atoms = data["Atoms"]
            shift = min(atoms.index)
            type_ids = atoms["type"]
            species = masses.loc[type_ids, "element"]
            labels = masses.loc[type_ids, "label"]
            coords = atoms[["x", "y", "z"]]
            m = Molecule(species.values, coords.values,
                         site_properties={ff_label: labels.values})
            charges = atoms.get("q")
            velocities = atoms[["vx", "vy", "vz"]] if "vx" in atoms.columns \
                else None
            topologies = {}
            for kw in SECTION_KEYWORDS["topology"]:
                if data.get(kw):
                    topologies[kw] = (np.array(data[kw]) - shift).tolist()
            topologies = None if not topologies else topologies
            topo_list.append(Topology(sites=m, ff_label=ff_label,
                                      charges=charges, velocities=velocities,
                                      topologies=topologies))

        return self.box, ff, topo_list
Beispiel #21
0
print(fe.thermal_conductivity)  # html
print(fe.boiling_point)  # html
print(fe.melting_point)  # html
print(fe.critical_temperature)  # html
print(fe.superconduction_temperature)  # html
print(fe.liquid_range)  # html
print(fe.bulk_modulus)  # html
print(fe.youngs_modulus)  # html
print(fe.brinell_hardness)  # html
print(fe.rigidity_modulus)  # html
print(fe.mineral_hardness)  # html
print(fe.vickers_hardness)  # html
print(fe.density_of_solid)  # html
print(fe.coefficient_of_linear_thermal_expansion)  # html
print(fe.average_ionic_radius)
print(fe.ionic_radii)
print(fe.is_valid_symbol("Fe"))  # dir
print(fe.from_Z(26))  # dir
print(fe.as_dict())  # dir
print(fe.from_dict(fe.as_dict()))  # dir
print(fe.from_row_and_group(4, 8))  # dir
print(fe.ground_state_term_symbol)  # dir
print(fe.icsd_oxidation_states)  # dir
print(fe.name)  # dir
print(fe.print_periodic_table())  # dir
print(fe.term_symbols)  # dir
print(fe.valence)  # dir
print(fe.value)  # dir
print(fe.boiling_point)
print(float(fe.boiling_point.to("")))
 def element():
     for z in range(1, 100):
         e = Element.from_Z(z)
         if e not in structure_for_visualize.composition:
             yield e
Beispiel #23
0
    def _parse(self, filename):
        start_patt = re.compile(" \(Enter \S+l101\.exe\)")
        route_patt = re.compile(" #[pPnNtT]*.*")
        link0_patt = re.compile("^\s(%.+)\s*=\s*(.+)")
        charge_mul_patt = re.compile("Charge\s+=\s*([-\\d]+)\s+"
                                     "Multiplicity\s+=\s*(\d+)")
        num_basis_func_patt = re.compile("([0-9]+)\s+basis functions")
        num_elec_patt = re.compile("(\d+)\s+alpha electrons\s+(\d+)\s+beta electrons")
        pcm_patt = re.compile("Polarizable Continuum Model")
        stat_type_patt = re.compile("imaginary frequencies")
        scf_patt = re.compile("E\(.*\)\s*=\s*([-\.\d]+)\s+")
        mp2_patt = re.compile("EUMP2\s*=\s*(.*)")
        oniom_patt = re.compile("ONIOM:\s+extrapolated energy\s*=\s*(.*)")
        termination_patt = re.compile("(Normal|Error) termination")
        error_patt = re.compile(
            "(! Non-Optimized Parameters !|Convergence failure)")
        mulliken_patt = re.compile(
            "^\s*(Mulliken charges|Mulliken atomic charges)")
        mulliken_charge_patt = re.compile(
            '^\s+(\d+)\s+([A-Z][a-z]?)\s*(\S*)')
        end_mulliken_patt = re.compile(
            '(Sum of Mulliken )(.*)(charges)\s*=\s*(\D)')
        std_orientation_patt = re.compile("Standard orientation")
        end_patt = re.compile("--+")
        orbital_patt = re.compile("(Alpha|Beta)\s*\S+\s*eigenvalues --(.*)")
        thermo_patt = re.compile("(Zero-point|Thermal) correction(.*)="
                                 "\s+([\d\.-]+)")
        forces_on_patt = re.compile(
            "Center\s+Atomic\s+Forces\s+\(Hartrees/Bohr\)")
        forces_off_patt = re.compile("Cartesian\s+Forces:\s+Max.*RMS.*")
        forces_patt = re.compile(
            "\s+(\d+)\s+(\d+)\s+([0-9\.-]+)\s+([0-9\.-]+)\s+([0-9\.-]+)")

        freq_on_patt = re.compile(
            "Harmonic\sfrequencies\s+\(cm\*\*-1\),\sIR\sintensities.*Raman.*")
        freq_patt = re.compile("Frequencies\s--\s+(.*)")
        normal_mode_patt = re.compile(
            "\s+(\d+)\s+(\d+)\s+([0-9\.-]{4,5})\s+([0-9\.-]{4,5}).*")

        mo_coeff_patt = re.compile("Molecular Orbital Coefficients:")
        mo_coeff_name_patt = re.compile("\d+\s((\d+|\s+)\s+([a-zA-Z]{1,2}|\s+))\s+(\d+\S+)")

        self.properly_terminated = False
        self.is_pcm = False
        self.stationary_type = "Minimum"
        self.structures = []
        self.corrections = {}
        self.energies = []
        self.pcm = None
        self.errors = []
        self.Mulliken_charges = {}
        self.link0 = {}
        self.cart_forces = []
        self.frequencies = []
        self.eigenvalues = []
        self.is_spin = False

        coord_txt = []
        read_coord = 0
        read_mulliken = False
        read_eigen = False
        eigen_txt = []
        parse_stage = 0
        num_basis_found = False
        terminated = False
        parse_forces = False
        forces = []
        parse_freq = False
        frequencies = []
        read_mo = False

        with zopen(filename) as f:
            for line in f:
                if parse_stage == 0:
                    if start_patt.search(line):
                        parse_stage = 1
                    elif link0_patt.match(line):
                        m = link0_patt.match(line)
                        self.link0[m.group(1)] = m.group(2)
                    elif route_patt.search(line):
                        params = read_route_line(line)
                        self.functional = params[0]
                        self.basis_set = params[1]
                        self.route = params[2]
                        self.dieze_tag = params[3]
                        parse_stage = 1
                elif parse_stage == 1:
                    if charge_mul_patt.search(line):
                        m = charge_mul_patt.search(line)
                        self.charge = int(m.group(1))
                        self.spin_mult = int(m.group(2))
                        parse_stage = 2
                elif parse_stage == 2:

                    if self.is_pcm:
                        self._check_pcm(line)

                    if "FREQ" in self.route and thermo_patt.search(line):
                        m = thermo_patt.search(line)
                        if m.group(1) == "Zero-point":
                            self.corrections["Zero-point"] = float(m.group(3))
                        else:
                            key = m.group(2).strip(" to ")
                            self.corrections[key] = float(m.group(3))

                    if read_coord:
                        if not end_patt.search(line):
                            coord_txt.append(line)
                        else:
                            read_coord = (read_coord + 1) % 4
                            if not read_coord:
                                sp = []
                                coords = []
                                for l in coord_txt[2:]:
                                    toks = l.split()
                                    sp.append(Element.from_Z(int(toks[1])))
                                    coords.append([float(i) for i in toks[3:6]])
                                self.structures.append(Molecule(sp, coords))

                    if parse_forces:
                        m = forces_patt.search(line)
                        if m:
                            forces.extend([float(_v) for _v in m.groups()[2:5]])
                        elif forces_off_patt.search(line):
                            self.cart_forces.append(forces)
                            forces = []
                            parse_forces = False

                    # read molecular orbital eigenvalues
                    if read_eigen:
                        m = orbital_patt.search(line)
                        if m:
                            eigen_txt.append(line)
                        else:
                            read_eigen = False
                            self.eigenvalues = {Spin.up: []}
                            for eigenline in eigen_txt:
                                if "Alpha" in eigenline:
                                    self.eigenvalues[Spin.up] += [float(e)
                                        for e in float_patt.findall(eigenline)]
                                elif "Beta" in eigenline:
                                    if Spin.down not in self.eigenvalues:
                                        self.eigenvalues[Spin.down] = []
                                    self.eigenvalues[Spin.down] += [float(e)
                                        for e in float_patt.findall(eigenline)]
                            eigen_txt = []

                    # read molecular orbital coefficients
                    if read_mo:
                        # build a matrix with all coefficients
                        all_spin = [Spin.up]
                        if self.is_spin:
                            all_spin.append(Spin.down)

                        mat_mo = {}
                        for spin in all_spin:
                            mat_mo[spin] = np.zeros((self.num_basis_func, self.num_basis_func))
                            nMO = 0
                            end_mo = False
                            while nMO < self.num_basis_func and not end_mo:
                                f.readline()
                                f.readline()
                                self.atom_basis_labels = []
                                for i in range(self.num_basis_func):
                                    line = f.readline()

                                    # identify atom and OA labels
                                    m = mo_coeff_name_patt.search(line)
                                    if m.group(1).strip() != "":
                                        iat = int(m.group(2)) - 1
                                        # atname = m.group(3)
                                        self.atom_basis_labels.append([m.group(4)])
                                    else:
                                        self.atom_basis_labels[iat].append(m.group(4))

                                    # MO coefficients
                                    coeffs = [float(c) for c in float_patt.findall(line)]
                                    for j in range(len(coeffs)):
                                        mat_mo[spin][i, nMO + j] = coeffs[j]

                                nMO += len(coeffs)
                                line = f.readline()
                                # manage pop=regular case (not all MO)
                                if nMO < self.num_basis_func and \
                                    ("Density Matrix:" in line or mo_coeff_patt.search(line)):
                                    end_mo = True
                                    warnings.warn("POP=regular case, matrix coefficients not complete")
                            f.readline()

                        self.eigenvectors = mat_mo
                        read_mo = False

                        # build a more convenient array dict with MO coefficient of
                        # each atom in each MO.
                        # mo[Spin][OM j][atom i] = {AO_k: coeff, AO_k: coeff ... }
                        mo = {}
                        for spin in all_spin:
                            mo[spin] = [[{} for iat in range(len(self.atom_basis_labels))]
                                                for j in range(self.num_basis_func)]
                            for j in range(self.num_basis_func):
                                i = 0
                                for iat in range(len(self.atom_basis_labels)):
                                    for label in self.atom_basis_labels[iat]:
                                        mo[spin][j][iat][label] = self.eigenvectors[spin][i, j]
                                        i += 1

                        self.molecular_orbital = mo


                    elif parse_freq:
                        m = freq_patt.search(line)
                        if m:
                            values = [float(_v) for _v in m.groups()[0].split()]
                            for value in values:
                                frequencies.append([value, []])
                        elif normal_mode_patt.search(line):
                            values = [float(_v) for _v in line.split()[2:]]
                            n = int(len(values) / 3)
                            for i in range(0, len(values), 3):
                                j = -n + int(i / 3)
                                frequencies[j][1].extend(values[i:i+3])
                        elif line.find("-------------------") != -1:
                            parse_freq = False
                            self.frequencies.append(frequencies)
                            frequencies = []

                    elif termination_patt.search(line):
                        m = termination_patt.search(line)
                        if m.group(1) == "Normal":
                            self.properly_terminated = True
                            terminated = True
                    elif error_patt.search(line):
                        error_defs = {
                            "! Non-Optimized Parameters !": "Optimization "
                                                            "error",
                            "Convergence failure": "SCF convergence error"
                        }
                        m = error_patt.search(line)
                        self.errors.append(error_defs[m.group(1)])
                    elif (not num_basis_found) and \
                            num_basis_func_patt.search(line):
                        m = num_basis_func_patt.search(line)
                        self.num_basis_func = int(m.group(1))
                        num_basis_found = True
                    elif num_elec_patt.search(line):
                        m = num_elec_patt.search(line)
                        self.electrons = (int(m.group(1)), int(m.group(2)))
                    elif (not self.is_pcm) and pcm_patt.search(line):
                        self.is_pcm = True
                        self.pcm = {}
                    elif "FREQ" in self.route and "OPT" in self.route and \
                            stat_type_patt.search(line):
                        self.stationary_type = "Saddle"
                    elif mp2_patt.search(line):
                        m = mp2_patt.search(line)
                        self.energies.append(float(m.group(1).replace("D",
                                                                      "E")))
                    elif oniom_patt.search(line):
                        m = oniom_patt.matcher(line)
                        self.energies.append(float(m.group(1)))
                    elif scf_patt.search(line):
                        m = scf_patt.search(line)
                        self.energies.append(float(m.group(1)))
                    elif std_orientation_patt.search(line):
                        coord_txt = []
                        read_coord = 1
                    elif not read_eigen and orbital_patt.search(line):
                        eigen_txt.append(line)
                        read_eigen = True
                    elif mulliken_patt.search(line):
                        mulliken_txt = []
                        read_mulliken = True
                    elif not parse_forces and forces_on_patt.search(line):
                        parse_forces = True
                    elif freq_on_patt.search(line):
                        parse_freq = True
                    elif mo_coeff_patt.search(line):
                        if "Alpha" in line:
                            self.is_spin = True
                        read_mo = True

                    if read_mulliken:
                        if not end_mulliken_patt.search(line):
                            mulliken_txt.append(line)
                        else:
                            m = end_mulliken_patt.search(line)
                            mulliken_charges = {}
                            for line in mulliken_txt:
                                if mulliken_charge_patt.search(line):
                                    m = mulliken_charge_patt.search(line)
                                    dict = {int(m.group(1)): [m.group(2), float(m.group(3))]}
                                    mulliken_charges.update(dict)
                            read_mulliken = False
                            self.Mulliken_charges = mulliken_charges

        if not terminated:
            #raise IOError("Bad Gaussian output file.")
            warnings.warn("\n" + self.filename + \
                ": Termination error or bad Gaussian output file !")
Beispiel #24
0
def main():
    col_names = ['compound',
                 'bandgap']  # file has no header, so we add our own
    bg = pd.read_csv('bandgapDFT.csv', names=col_names, header=None)
    print bg.head()

    # construct naive feature set and add it to the existing data frame
    compositions = bg['compound'].apply(
        Composition)  # this is a Pandas series containing the compositions
    bg['naive_features'] = compositions.apply(
        naiveVectorize
    )  # apply the naiveVectorize functinon on the compositions
    print bg.head()

    # Establish baseline accuracy by "guessing the average" of the band gap set
    # A good model should never do worse.
    baselineError = mean(abs(mean(bg.bandgap) - bg.bandgap))
    print("The MAE of always guessing the average band gap is: " +
          str(round(baselineError, 3)) + " eV")

    # Train linear ridge regression model using naive feature set
    linear = linear_model.Ridge(
        alpha=0.5
    )  # alpha is a tuning parameter affecting how regression deals with collinear inputs
    cv = ShuffleSplit(n_splits=10, test_size=0.1, random_state=0)

    naive_scores = cross_val_score(linear,
                                   list(bg['naive_features']),
                                   bg['bandgap'],
                                   cv=cv,
                                   scoring='neg_mean_absolute_error')
    print(
        "The MAE of the linear ridge regression band gap model using the naive feature set is: "
        + str(round(abs(mean(naive_scores)), 3)) + " eV")

    # Let's see which features are most important for the linear model
    print(
        "Below are the fitted linear ridge regression coefficients for each feature (i.e., element) in our naive feature set"
    )
    linear.fit(
        list(bg['naive_features']),
        bg['bandgap'])  # fit to the whole data set; we're not doing CV here

    print("element: coefficient")
    for i in range(MAX_Z):
        element = Element.from_Z(i + 1)
        print(element.symbol + ': ' + str(linear.coef_[i]))

    # Create alternative feature set that is more physically-motivated
    bg['physical_features'] = compositions.apply(get_physical_features)
    print bg.head()

    physical_scores = cross_val_score(linear,
                                      list(bg['physical_features']),
                                      bg['bandgap'],
                                      cv=cv,
                                      scoring='neg_mean_absolute_error')
    print(
        "The MAE of the linear ridge regression band gap model using the physical feature set is: "
        + str(round(abs(mean(physical_scores)), 3)) + " eV")

    # Random forest
    rfr = ensemble.RandomForestRegressor(
        n_estimators=10)  # try 10 trees in the forest

    naive_scores = cross_val_score(rfr,
                                   list(bg['naive_features']),
                                   bg['bandgap'],
                                   cv=cv,
                                   scoring='neg_mean_absolute_error')
    print(
        "The MAE of the nonlinear random forest band gap model using the naive feature set is: "
        + str(round(abs(mean(naive_scores)), 3)) + " eV")

    physical_scores = cross_val_score(rfr,
                                      list(bg['physical_features']),
                                      bg['bandgap'],
                                      cv=cv,
                                      scoring='neg_mean_absolute_error')
    print(
        "The MAE of the nonlinear random forest band gap model using the physical feature set is: "
        + str(round(abs(mean(physical_scores)), 3)) + " eV")
Beispiel #25
0
    def from_file(feff_inp_file="feff.inp", ldos_file="ldos"):
        """
        Creates LDos object from raw Feff ldos files by
        by assuming they are numbered consecutively, i.e. ldos01.dat
        ldos02.dat...

        Args:
            feff_inp_file (str): input file of run to obtain structure
            ldos_file (str): output ldos file of run to obtain dos info, etc.
        """
        header_str = Header.header_string_from_file(feff_inp_file)
        header = Header.from_string(header_str)
        structure = header.struct
        nsites = structure.num_sites
        parameters = Tags.from_file(feff_inp_file)

        if "RECIPROCAL" in parameters:
            pot_dict = dict()
            pot_readstart = re.compile(".*iz.*lmaxsc.*xnatph.*xion.*folp.*")
            pot_readend = re.compile(".*ExternalPot.*switch.*")
            pot_inp = re.sub(r"feff.inp", r"pot.inp", feff_inp_file)
            dos_index = 1
            begin = 0

            with zopen(pot_inp, "r") as potfile:
                for line in potfile:
                    if len(pot_readend.findall(line)) > 0:
                        break

                    if begin == 1:
                        begin += 1
                        continue

                    if begin == 2:
                        z_number = int(line.strip().split()[0])
                        ele_name = Element.from_Z(z_number).name
                        if ele_name not in pot_dict:
                            pot_dict[ele_name] = dos_index
                        else:
                            pot_dict[ele_name] = min(dos_index,
                                                     pot_dict[ele_name])
                        dos_index += 1

                    if len(pot_readstart.findall(line)) > 0:
                        begin = 1
        else:
            pot_string = Potential.pot_string_from_file(feff_inp_file)
            dicts = Potential.pot_dict_from_string(pot_string)
            pot_dict = dicts[0]

        with zopen(ldos_file + "00.dat", "r") as fobject:
            f = fobject.readlines()
        efermi = float(f[0].split()[4])

        dos_energies = []
        ldos = {}

        for i in range(1, len(pot_dict) + 1):
            if len(str(i)) == 1:
                ldos[i] = np.loadtxt("{}0{}.dat".format(ldos_file, i))
            else:
                ldos[i] = np.loadtxt("{}{}.dat".format(ldos_file, i))

        for i in range(0, len(ldos[1])):
            dos_energies.append(ldos[1][i][0])

        all_pdos = []
        vorb = {
            "s": Orbital.s,
            "p": Orbital.py,
            "d": Orbital.dxy,
            "f": Orbital.f0
        }
        forb = {"s": 0, "p": 1, "d": 2, "f": 3}

        dlength = len(ldos[1])

        for i in range(nsites):
            pot_index = pot_dict[structure.species[i].symbol]
            all_pdos.append(defaultdict(dict))
            for k, v in vorb.items():
                density = [
                    ldos[pot_index][j][forb[k] + 1] for j in range(dlength)
                ]
                updos = density
                downdos = None
                if downdos:
                    all_pdos[-1][v] = {Spin.up: updos, Spin.down: downdos}
                else:
                    all_pdos[-1][v] = {Spin.up: updos}

        pdos = all_pdos
        vorb2 = {0: Orbital.s, 1: Orbital.py, 2: Orbital.dxy, 3: Orbital.f0}
        pdoss = {
            structure[i]: {v: pdos[i][v]
                           for v in vorb2.values()}
            for i in range(len(pdos))
        }

        forb = {"s": 0, "p": 1, "d": 2, "f": 3}

        tdos = [0] * dlength
        for i in range(nsites):
            pot_index = pot_dict[structure.species[i].symbol]
            for v in forb.values():
                density = [ldos[pot_index][j][v + 1] for j in range(dlength)]
                for j in range(dlength):
                    tdos[j] = tdos[j] + density[j]
        tdos = {Spin.up: tdos}

        dos = Dos(efermi, dos_energies, tdos)
        complete_dos = CompleteDos(structure, dos, pdoss)
        charge_transfer = LDos.charge_transfer_from_file(
            feff_inp_file, ldos_file)
        return LDos(complete_dos, charge_transfer)
Beispiel #26
0
 def _get_bond_type(graph) -> Dict:
     new_graph = deepcopy(graph)
     elements = [Element.from_Z(i) for i in graph["atom"]]
     for k, (i, j) in enumerate(zip(graph["index1"], graph["index2"])):
         new_graph["bond"][k] = elements[i].is_metal + elements[j].is_metal
     return new_graph
def Bk_symbol():
    return [str(Element.from_Z(97))]
cv = cross_validation.ShuffleSplit(len(bandgaps),\
	n_iter=10, test_size=0.1, random_state=0)

scores = cross_validation.cross_val_score(linear, naiveFeatures,\
	bandgaps, cv=cv, scoring='mean_absolute_error')

print("The MAE of model using the naive feature set is: "\
	+ str(round(abs(mean(scores)), 3)) + " eV")

print("Below naive feature set")

linear.fit(naiveFeatures, bandgaps) # fit to the whole data set
print("element: coefficient")

for i in range(MAX_Z):
       element = Element.from_Z(i + 1)
       print(element.symbol + ': ' + str(linear.coef_[i]))
####To be continued
# more physically-motivated
physicalFeatures = []

for material in materials:
       theseFeatures = []
       fraction = []
       atomicNo = []
       eneg = []
       group = []
       for element in material:
               fraction.append(material.get_atomic_fraction(element))
               atomicNo.append(float(element.Z))
               eneg.append(element.X)
Beispiel #29
0
    def disassemble(self, atom_labels=None, guess_element=True,
                    ff_label="ff_map"):
        """
        Breaks down LammpsData to ForceField and a series of Topology.
        RESTRICTIONS APPLIED:
        1. No complex force field defined not just on atom
            types, where the same type or equivalent types of topology
            may have more than one set of coefficients.
        2. No intermolecular topologies (with atoms from different
            molecule-ID) since a Topology object includes data for ONE
            molecule or structure only.

        Args:
            atom_labels ([str]): List of strings (must be different
                from one another) for labelling each atom type found in
                Masses section. Default to None, where the labels are
                automaticaly added based on either element guess or
                dummy specie assignment.
            guess_element (bool): Whether to guess the element based on
                its atomic mass. Default to True, otherwise dummy
                species "Qa", "Qb", ... will be assigned to various
                atom types. The guessed or assigned elements will be
                reflected on atom labels if atom_labels is None, as
                well as on the species of molecule in each Topology.
            ff_label (str): Site property key for labeling atoms of
                different types. Default to "ff_map".

        Returns:
            ForceField, [Topology]

        """
        atoms_df = self.atoms.copy()
        if "nx" in atoms_df.columns:
            box_dim = np.ptp(self.box_bounds, axis=1)
            atoms_df[["x", "y", "z"]] += atoms_df[["nx", "ny", "nz"]].values \
                                         * box_dim
        atoms_df = pd.concat([atoms_df, self.velocities], axis=1)

        mids = atoms_df.get("molecule-ID")
        if mids is None:
            unique_mids = [1]
            data_by_mols = {1: {"Atoms": atoms_df}}
        else:
            unique_mids = np.unique(mids)
            data_by_mols = {}
            for k in unique_mids:
                df = atoms_df[atoms_df["molecule-ID"] == k]
                data_by_mols[k] = {"Atoms": df}

        masses = self.masses.copy()
        masses["label"] = atom_labels
        unique_masses = np.unique(masses["mass"])
        if guess_element:
            ref_masses = sorted([el.atomic_mass.real for el in Element])
            diff = np.abs(np.array(ref_masses) - unique_masses[:, None])
            atomic_numbers = np.argmin(diff, axis=1) + 1
            symbols = [Element.from_Z(an).symbol for an in atomic_numbers]
        else:
            symbols = ["Q%s" % a for a in
                       map(chr, range(97, 97 + len(unique_masses)))]
        for um, s in zip(unique_masses, symbols):
            masses.loc[masses["mass"] == um, "element"] = s
        if atom_labels is None:  # add unique labels based on elements
            for el, vc in masses["element"].value_counts().iteritems():
                masses.loc[masses["element"] == el, "label"] = \
                    ["%s%d" % (el, c) for c in range(1, vc + 1)]
        assert masses["label"].nunique(dropna=False) == len(masses), \
            "Expecting unique atom label for each type"
        mass_info = [tuple([r["label"], r["mass"]])
                     for _, r in masses.iterrows()]

        nonbond_coeffs, topo_coeffs = None, None
        if self.force_field:
            if "PairIJ Coeffs" in self.force_field:
                nbc = self.force_field["PairIJ Coeffs"]
                nbc = nbc.sort_values(["id1", "id2"]).drop(["id1", "id2"], axis=1)
                nonbond_coeffs = [list(t) for t in nbc.itertuples(False, None)]
            elif "Pair Coeffs" in self.force_field:
                nbc = self.force_field["Pair Coeffs"].sort_index()
                nonbond_coeffs = [list(t) for t in nbc.itertuples(False, None)]

            topo_coeffs = {k: [] for k in SECTION_KEYWORDS["ff"][2:]
                           if k in self.force_field}
            for kw in topo_coeffs.keys():
                class2_coeffs = {k: list(v.itertuples(False, None))
                                 for k, v in self.force_field.items()
                                 if k in CLASS2_KEYWORDS.get(kw, [])}
                ff_df = self.force_field[kw]
                for t in ff_df.itertuples(True, None):
                    d = {"coeffs": list(t[1:]), "types": []}
                    if class2_coeffs:
                        d.update({k: list(v[t[0] - 1])
                                  for k, v in class2_coeffs.items()})
                    topo_coeffs[kw].append(d)

        if self.topology:
            label_topo = lambda t: tuple(masses.loc[atoms_df.loc[t, "type"],
                                                    "label"])
            for k, v in self.topology.items():
                ff_kw = k[:-1] + " Coeffs"
                for topo in v.itertuples(False, None):
                    topo_idx = topo[0] - 1
                    indices = topo[1:]
                    mids = atoms_df.loc[indices, "molecule-ID"].unique()
                    assert len(mids) == 1, \
                        "Do not support intermolecular topology formed " \
                        "by atoms with different molecule-IDs"
                    label = label_topo(indices)
                    topo_coeffs[ff_kw][topo_idx]["types"].append(label)
                    if data_by_mols[mids[0]].get(k):
                        data_by_mols[mids[0]][k].append(indices)
                    else:
                        data_by_mols[mids[0]][k] = [indices]

        if topo_coeffs:
            for v in topo_coeffs.values():
                for d in v:
                    d["types"] = list(set(d["types"]))

        ff = ForceField(mass_info=mass_info, nonbond_coeffs=nonbond_coeffs,
                        topo_coeffs=topo_coeffs)

        topo_list = []
        for mid in unique_mids:
            data = data_by_mols[mid]
            atoms = data["Atoms"]
            shift = min(atoms.index)
            type_ids = atoms["type"]
            species = masses.loc[type_ids, "element"]
            labels = masses.loc[type_ids, "label"]
            coords = atoms[["x", "y", "z"]]
            m = Molecule(species.values, coords.values,
                         site_properties={ff_label: labels.values})
            charges = atoms.get("q")
            velocities = atoms[["vx", "vy", "vz"]] if "vx" in atoms.columns \
                else None
            topologies = {}
            for kw in SECTION_KEYWORDS["topology"]:
                if data.get(kw):
                    topologies[kw] = (np.array(data[kw]) - shift).tolist()
            topologies = None if not topologies else topologies
            topo_list.append(Topology(sites=m, ff_label=ff_label,
                                      charges=charges, velocities=velocities,
                                      topologies=topologies))

        return ff, topo_list
Beispiel #30
0
    def charge_transfer_from_file(feff_inp_file, ldos_file):
        """
        Get charge transfer from file.

        Args:
            feff_inp_file (str): name of feff.inp file for run
            ldos_file (str): ldos filename for run, assume consequetive order,
                i.e., ldos01.dat, ldos02.dat....

        Returns:
            dictionary of dictionaries in order of potential sites
            ({"p": 0.154, "s": 0.078, "d": 0.0, "tot": 0.232}, ...)
        """
        cht = OrderedDict()
        parameters = Tags.from_file(feff_inp_file)

        if "RECIPROCAL" in parameters:
            dicts = [dict()]
            pot_dict = dict()
            dos_index = 1
            begin = 0
            pot_inp = re.sub(r"feff.inp", r"pot.inp", feff_inp_file)
            pot_readstart = re.compile(".*iz.*lmaxsc.*xnatph.*xion.*folp.*")
            pot_readend = re.compile(".*ExternalPot.*switch.*")
            with zopen(pot_inp, "r") as potfile:
                for line in potfile:
                    if len(pot_readend.findall(line)) > 0:
                        break
                    if begin == 1:
                        z_number = int(line.strip().split()[0])
                        ele_name = Element.from_Z(z_number).name
                        if len(pot_dict) == 0:
                            pot_dict[0] = ele_name
                        elif len(pot_dict) > 0:
                            pot_dict[max(pot_dict.keys()) + 1] = ele_name
                        begin += 1
                        continue
                    if begin == 2:
                        z_number = int(line.strip().split()[0])
                        ele_name = Element.from_Z(z_number).name
                        dicts[0][ele_name] = dos_index
                        dos_index += 1
                        if len(pot_dict) == 0:
                            pot_dict[0] = ele_name
                        elif len(pot_dict) > 0:
                            pot_dict[max(pot_dict.keys()) + 1] = ele_name
                    if len(pot_readstart.findall(line)) > 0:
                        begin = 1
        else:
            pot_string = Potential.pot_string_from_file(feff_inp_file)
            dicts = Potential.pot_dict_from_string(pot_string)
            pot_dict = dicts[1]

        for i in range(0, len(dicts[0]) + 1):
            if len(str(i)) == 1:
                with zopen("{}0{}.dat".format(ldos_file, i), "rt") as fobject:
                    f = fobject.readlines()
                    s = float(f[3].split()[2])
                    p = float(f[4].split()[2])
                    d = float(f[5].split()[2])
                    f1 = float(f[6].split()[2])
                    tot = float(f[1].split()[4])
                    cht[str(i)] = {
                        pot_dict[i]: {
                            "s": s,
                            "p": p,
                            "d": d,
                            "f": f1,
                            "tot": tot
                        }
                    }
            else:
                with zopen(ldos_file + str(i) + ".dat", "rt") as fid:
                    f = fid.readlines()
                    s = float(f[3].split()[2])
                    p = float(f[4].split()[2])
                    d = float(f[5].split()[2])
                    f1 = float(f[6].split()[2])
                    tot = float(f[1].split()[4])
                    cht[str(i)] = {
                        pot_dict[i]: {
                            "s": s,
                            "p": p,
                            "d": d,
                            "f": f1,
                            "tot": tot
                        }
                    }

        return cht
Beispiel #31
0
 def feature_labels(self):
     labels = []
     for i in range(1, 104):
         labels.append(Element.from_Z(i).symbol)
     return labels
Beispiel #32
0
    features.extend(atomicNo)
    features.extend(natom)
    features.append(pressure)
    pf.append(features[:])
# }}}
#
# set X_train, y_train, X_test
# {{{
X = pf[:]
y = tc[:]
X_train = X[:]
y_train = y[:]
materials = []
xatom=Element("H")
for i in range(3,86):
    if(not xatom.from_Z(i).is_noble_gas):
        for iatom1 in range(1,10):
            for iatom2 in range(1,10):
#               print('%s%.1i%s%.1i' % (xatom.from_Z(i).symbol,iatom1,xatom.symbol,iatom2))
                str_mat=str(xatom.from_Z(i).symbol)+str(iatom1)+str(xatom.symbol)+str(iatom2)
                materials.append(Composition(str_mat))

X_test = []
for material in materials:
    atomicNo = []
    natom = []
    for element in material:
        natom.append(material.get_atomic_fraction(element)*material.num_atoms)
        atomicNo.append(float(element.Z))
#   atom0=element.from_Z(atomicNo[0]).symbol
#   atom1=element.from_Z(atomicNo[1]).symbol
Beispiel #33
0
def get_translations(structure, structural_type='100'):
    assert structural_type in ['100', '110']

    metal = [
        Element.from_Z(z).symbol for z in set(structure.atomic_numbers)
        if Element.from_Z(z).is_metal or Element.from_Z(z).is_metalloid
    ]

    mul_structures, conn_components_, ab_indices = [], [], [0, 1, 2]
    conn_indices = [[2, 1, 1], [1, 2, 1], [1, 1, 2]]

    number_connected_components = [
        conn_comps_sci(adjacency_matrix(structure.__mul__(i)))[0]
        for i in conn_indices
    ]
    c_index = number_connected_components.index(
        max(number_connected_components))
    ab_indices.remove(c_index)

    extended_structure = structure.__mul__(3)
    extended_components = list(
        conn_comps_netx(from_numpy_matrix(
            adjacency_matrix(extended_structure))))
    extended_sites = [[extended_structure[i] for i in components]
                      for components in extended_components]
    layers = [
        s for s in extended_sites
        if metal[0] in [site.specie.symbol for site in s]
    ]

    max_coords = [
        max([a.coords[c_index] for a in layer if a.specie.symbol == metal[0]])
        for layer in layers
    ]
    first_layer_index = max_coords.index(sorted(max_coords)[0])
    second_layer_index = max_coords.index(sorted(max_coords)[1])

    first_layer_coords = array([
        a.coords for a in layers[first_layer_index]
        if a.specie.symbol == metal[0]
    ])
    second_layer_coords = array([
        a.coords for a in layers[second_layer_index]
        if a.specie.symbol == metal[0]
    ])

    if structural_type == '110':
        first_layer_coords = first_layer_coords[
            first_layer_coords[:, c_index].argsort(
            )][:int(first_layer_coords.shape[0] / 2), :]
        second_layer_coords = second_layer_coords[
            second_layer_coords[:, c_index].argsort(
            )][:int(second_layer_coords.shape[0] / 2), :]

    a_axis = extended_structure.lattice.matrix[ab_indices][0]
    b_axis = extended_structure.lattice.matrix[ab_indices][1]
    perp = cross(a_axis / norm(a_axis), b_axis / norm(b_axis))

    dir_1 = sorted([
        c[0] - c[1] for c in combinations(first_layer_coords, 2)
        if abs(dot(c[0] - c[1], perp) / norm(c[0] - c[1]) / norm(perp)) < 0.07
    ],
                   key=norm)[0]
    m_dist = norm(dir_1)
    dir_1 = dir_1 / norm(dir_1)
    dir_2 = cross(perp / norm(perp), dir_1 / norm(dir_1))

    a_projections, b_projections = [], []

    for site_coords in first_layer_coords:
        nearest_site = second_layer_coords[KDTree(second_layer_coords).query(
            site_coords)[1]]
        a_projections.append(dot((site_coords - nearest_site), dir_1))
        b_projections.append(dot((site_coords - nearest_site), dir_2))

    a_translation = min([
        min(abs(m_dist - abs(p) % m_dist),
            abs(p) % m_dist) for p in a_projections
    ]) / m_dist

    if structural_type == '110':
        m_dist = m_dist * sqrt(2)

    b_translation = min([
        min(abs(m_dist - abs(p) % m_dist),
            abs(p) % m_dist) for p in b_projections
    ]) / m_dist

    return sorted([round(a_translation, 2), round(b_translation, 2)])
Beispiel #34
0
 def _get_bond_type(graph) -> Dict:
     new_graph = deepcopy(graph)
     elements = [Element.from_Z(i) for i in graph['atom']]
     for k, (i, j) in enumerate(zip(graph['index1'], graph['index2'])):
         new_graph['bond'][k] = elements[i].is_metal + elements[j].is_metal
     return new_graph
def max_z_Cm_symbol():
    return [str(Element.from_Z(96))]