Beispiel #1
0
    def __init__(self):
        self.all_elemental_props = dict()
        available_props = []
        self.data_dir = os.path.join(module_dir, "data_files",
                                     'magpie_elementdata')

        # Make a list of available properties
        for datafile in glob(os.path.join(self.data_dir, "*.table")):
            available_props.append(
                os.path.basename(datafile).replace('.table', ''))

        # parse and store elemental properties
        for descriptor_name in available_props:
            with open(os.path.join(self.data_dir,
                                   '{}.table'.format(descriptor_name)),
                      'r') as f:
                self.all_elemental_props[descriptor_name] = dict()
                lines = f.readlines()
                for atomic_no in range(1, len(_pt_data) + 1):  # max Z=103
                    try:
                        if descriptor_name in ["OxidationStates"]:
                            prop_value = [float(i) for i in
                                          lines[atomic_no - 1].split()]
                        else:
                            prop_value = float(lines[atomic_no - 1])
                    except ValueError:
                        prop_value = float("NaN")
                    self.all_elemental_props[descriptor_name][
                        Element.from_Z(atomic_no).symbol] = prop_value
Beispiel #2
0
def get_magpie_descriptor(comp, descriptor_name):
    """
    Get descriptor data for elements in a compound from the Magpie data repository.

    Args:
        comp: (str) compound composition, eg: "NaCl"
        descriptor_name: name of Magpie descriptor needed. Find the entire list at
            https://bitbucket.org/wolverton/magpie/src/6ecf8d3b79e03e06ef55c141c350a08fbc8da849/Lookup%20Data/?at=master

    Returns: (list) of descriptor values for each atom in the composition

    """
    magpiedata = []
    magpiedata_tup_lst = []
    magpiedata_tup = collections.namedtuple(
        'magpiedata_tup', 'element propname propvalue propunit amt')
    available_props = []

    # Make a list of available properties
    for datafile in os.listdir('data/magpie_elementdata'):
        available_props.append(datafile.replace('.table', ''))

    if descriptor_name not in available_props:
        raise ValueError(
            "This descriptor is not available from the Magpie repository. Choose from {}"
            .format(available_props))

    # Get units from Magpie README file
    el_amt = Composition(comp).get_el_amt_dict()
    unit = None
    with open('data/magpie_elementdata/README.txt', 'r') as readme_file:
        readme_file_line = readme_file.readlines()
        for lineno, line in enumerate(readme_file_line, 1):
            if descriptor_name + '.table' in line:
                if 'Units: ' in readme_file_line[lineno + 1]:
                    unit = readme_file_line[lineno +
                                            1].split(':')[1].strip('\n')

    # Extract from data file
    with open('data/magpie_elementdata/' + descriptor_name + '.table',
              'r') as descp_file:
        lines = descp_file.readlines()
        for el in el_amt:
            atomic_no = Element(el).Z
            magpiedata_tup_lst.append(
                magpiedata_tup(element=el,
                               propname=descriptor_name,
                               propvalue=float(lines[atomic_no - 1]),
                               propunit=unit,
                               amt=el_amt[el]))

            # Add descriptor values, one for each atom in the compound
            for i in range(int(el_amt[el])):
                magpiedata.append(float(lines[atomic_no - 1]))

    return magpiedata
Beispiel #3
0
def sulfide_type(structure):
    """
    Determines if a structure is a sulfide/polysulfide

    Args:
        structure (Structure): Input structure.

    Returns:
        (str) sulfide/polysulfide/sulfate
    """
    structure = structure.copy()
    structure.remove_oxidation_states()
    s = Element("S")
    comp = structure.composition
    if comp.is_element or s not in comp:
        return None

    finder = SpacegroupAnalyzer(structure, symprec=0.1)
    symm_structure = finder.get_symmetrized_structure()
    s_sites = [
        sites[0] for sites in symm_structure.equivalent_sites
        if sites[0].specie == s
    ]

    def process_site(site):

        # in an exceptionally rare number of structures, the search
        # radius needs to be increased to find a neighbor atom
        search_radius = 4
        neighbors = []
        while len(neighbors) == 0:
            neighbors = structure.get_neighbors(site, search_radius)
            search_radius *= 2
            if search_radius > max(structure.lattice.abc) * 2:
                break

        neighbors = sorted(neighbors, key=lambda n: n[1])
        dist = neighbors[0].distance
        coord_elements = [
            nn.site.specie for nn in neighbors if nn.distance < dist + 0.4
        ][:4]
        avg_electroneg = np.mean([e.X for e in coord_elements])
        if avg_electroneg > s.X:
            return "sulfate"
        elif avg_electroneg == s.X and s in coord_elements:
            return "polysulfide"
        else:
            return "sulfide"

    types = set([process_site(site) for site in s_sites])
    if "sulfate" in types:
        return None
    elif "polysulfide" in types:
        return "polysulfide"
    else:
        return "sulfide"
Beispiel #4
0
def readLammps(desired_return):
    from pymatgen.io.lammps.outputs import parse_lammps_dumps, parse_lammps_log
    from pymatgen import Structure, Element
    from pymatgen.analysis.elasticity.stress import Stress
    from numpy import unique, array, argmin
    try:
        log = parse_lammps_log(filename="log.lammps")[-1]
    except IndexError:
        return_dict = {}
        for ret in desired_return:
            return_dict[ret] = None
        return return_dict
    result_dict = {}
    result_dict["energies"] = list(log['PotEng'])[-1]

    for dump in parse_lammps_dumps("dump.atoms"):
        atoms = dump.data
        coords = [''] * dump.natoms
        forces = [''] * dump.natoms
        masses = [''] * dump.natoms
        for atom in range(dump.natoms):
            coords[atoms["id"][atom] -
                   1] = [atoms["x"][atom], atoms["y"][atom], atoms["z"][atom]]
            forces[atoms['id'][atom] - 1] = [
                atoms["fx"][atom], atoms["fy"][atom], atoms["fz"][atom]
            ]
            masses[atoms['id'][atom] - 1] = atoms["mass"][atom]

        box = dump.box

    unique_masses = unique(masses)
    ref_masses = [el.atomic_mass.real for el in Element]
    diff = abs(array(ref_masses) - unique_masses[:, None])
    atomic_numbers = argmin(diff, axis=1) + 1
    symbols = [Element.from_Z(an).symbol for an in atomic_numbers]
    species_map = {}
    for i in range(len(unique_masses)):
        species_map[unique_masses[i]] = symbols[i]
    atom_species = [species_map[mass] for mass in masses]

    result_dict["structures"] = Structure(box.to_lattice(),
                                          atom_species,
                                          coords,
                                          coords_are_cartesian=True)
    result_dict["forces"] = forces

    pressure = [
        1e-1 * list(log['c_press[{}]'.format(i)])[-1] for i in range(1, 7)
    ]
    result_dict["stresses"] = Stress([[pressure[0], pressure[3], pressure[4]],
                                      [pressure[3], pressure[1], pressure[5]],
                                      [pressure[4], pressure[5], pressure[2]]])
    return_dict = {}
    for ret in desired_return:
        return_dict[ret] = result_dict[ret]
    return return_dict
Beispiel #5
0
 def test_ozonide(self):
     el_li = Element("Li")
     el_o = Element("O")
     elts = [el_li, el_o, el_o, el_o]
     latt = Lattice.from_parameters(3.999911, 3.999911, 3.999911,
                                    133.847504, 102.228244, 95.477342)
     coords = [[0.513004, 0.513004, 1.000000],
               [0.017616, 0.017616, 0.000000],
               [0.649993, 0.874790, 0.775203],
               [0.099587, 0.874790, 0.224797]]
     struct = Structure(latt, elts, coords)
     lio3_entry = ComputedStructureEntry(struct, -3,
                                         parameters={'is_hubbard': False,
                                       'hubbards': None,
                                       'run_type': 'GGA',
                                       'potcar_symbols':
     ['PAW_PBE Fe 06Sep2000', 'PAW_PBE O 08Apr2002']})
     lio3_entry_corrected = self.compat.process_entry(lio3_entry)
     self.assertAlmostEqual(lio3_entry_corrected.energy, -3.0 - 3 * 0.66975)
Beispiel #6
0
    def test_disordered(self):
        si = Element("Si")
        n = Element("N")
        coords = list()
        coords.append(np.array([0, 0, 0]))
        coords.append(np.array([0.75, 0.5, 0.75]))
        lattice = Lattice(np.array([[3.8401979337, 0.00, 0.00],
                                    [1.9200989668, 3.3257101909, 0.00],
                                    [0.00, -2.2171384943, 3.1355090603]]))
        struct = Structure(lattice, [si, {si: 0.5, n: 0.5}], coords)
        writer = CifWriter(struct)
        ans = """# generated using pymatgen
data_Si1.5N0.5
_symmetry_space_group_name_H-M   'P 1'
_cell_length_a   3.84019793
_cell_length_b   3.84019899
_cell_length_c   3.84019793
_cell_angle_alpha   119.99999086
_cell_angle_beta   90.00000000
_cell_angle_gamma   60.00000914
_symmetry_Int_Tables_number   1
_chemical_formula_structural   Si1.5N0.5
_chemical_formula_sum   'Si1.5 N0.5'
_cell_volume   40.04479464
_cell_formula_units_Z   1
loop_
 _symmetry_equiv_pos_site_id
 _symmetry_equiv_pos_as_xyz
  1  'x, y, z'
loop_
 _atom_site_type_symbol
 _atom_site_label
 _atom_site_symmetry_multiplicity
 _atom_site_fract_x
 _atom_site_fract_y
 _atom_site_fract_z
 _atom_site_occupancy
  Si  Si0  1  0.00000000  0.00000000  0.00000000  1
  Si  Si1  1  0.75000000  0.50000000  0.75000000  0.5
  N  N2  1  0.75000000  0.50000000  0.75000000  0.5"""

        for l1, l2 in zip(str(writer).split("\n"), ans.split("\n")):
            self.assertEqual(l1.strip(), l2.strip())
 def test_oxide_energy_corr(self):
     el_li = Element("Li")
     el_o = Element("O")
     elts = [el_li, el_li, el_o]
     latt = Lattice.from_parameters(3.278, 3.278, 3.278, 60, 60, 60)
     coords = [[0.25, 0.25, 0.25], [0.75, 0.75, 0.75], [0.0, 0.0, 0.0]]
     struct = Structure(latt, elts, coords)
     li2o_entry = ComputedStructureEntry(
         struct,
         -3,
         parameters={
             'is_hubbard': False,
             'hubbards': None,
             'run_type': 'GGA',
             'potcar_symbols':
             ['PAW_PBE Fe 06Sep2000', 'PAW_PBE O 08Apr2002']
         })
     li2o_entry_corrected = self.compat.process_entry(li2o_entry)
     self.assertAlmostEqual(li2o_entry_corrected.energy, -3.0 - 0.66975, 4)
Beispiel #8
0
    def write_param(self):
        """
        Write parameter and coefficient file to perform lammps calculation.
        """
        if not self.specie:
            raise ValueError("No specie given!")

        param_file = '{}.snapparam'.format(self.name)
        coeff_file = '{}.snapcoeff'.format(self.name)

        model = self.model
        # ncoeff = len(model.coef)
        describer = self.model.describer
        profile = describer.element_profile
        elements = [
            element.symbol
            for element in sorted([Element(e) for e in profile.keys()])
        ]
        ne = len(elements)
        nbc = len(describer.subscripts)
        if describer.quadratic:
            nbc += int((1 + nbc) * nbc / 2)
        tjm = describer.twojmax
        diag = describer.diagonalstyle
        # assert ncoeff == ne * (nbc + 1),\
        #     '{} coefficients given. '.format(ncoeff) + \
        #     '{} ({} * ({} + 1)) '.format(ne * (nbc + 1), ne, nbc) + \
        #     'coefficients expected ' + \
        #     'for twojmax={} and diagonalstyle={}.'.format(tjm, diag)

        coeff_lines = []
        coeff_lines.append('{} {}'.format(ne, nbc + 1))
        for element, coeff in zip(elements, np.split(model.coef, ne)):
            coeff_lines.append('{} {} {}'.format(element,
                                                 profile[element]['r'],
                                                 profile[element]['w']))
            coeff_lines.extend([str(c) for c in coeff])
        with open(coeff_file, 'w') as f:
            f.write('\n'.join(coeff_lines))

        param_lines = []
        keys = ['rcutfac', 'twojmax', 'rfac0', 'rmin0', 'diagonalstyle']
        param_lines.extend(
            ['{} {}'.format(k, getattr(describer, k)) for k in keys])
        param_lines.append('quadraticflag {}'.format(int(describer.quadratic)))
        param_lines.append('bzeroflag 0')
        with open(param_file, 'w') as f:
            f.write('\n'.join(param_lines))

        pair_coeff = self.pair_coeff.format(elements=' '.join(elements),
                                            specie=self.specie.name,
                                            coeff_file=coeff_file,
                                            param_file=param_file)
        ff_settings = [self.pair_style, pair_coeff]
        return ff_settings
Beispiel #9
0
def cell_to_structure(
        cell: Tuple[List[List[float]], List[List[float]],
                    List[int]]) -> Structure:
    """
    cell: (Lattice parameters
           [[a_x, a_y, a_z], [b_x, b_y, b_z], [c_x, c_y, c_z]],
          Fractional atomic coordinates in an Nx3 array,
          Z numbers of species in a length N array)
    """
    species = [Element.from_Z(i) for i in cell[2]]
    return Structure(lattice=cell[0], coords=cell[1], species=species)
Beispiel #10
0
def num_atom_differences(structure: IStructure,
                         ref_structure: IStructure) -> Dict[Element, int]:
    target_composition = structure.composition.as_dict()
    reference_composition = ref_structure.composition.as_dict()
    result = {}
    for k in set(target_composition.keys()) | set(
            reference_composition.keys()):
        n_atom_diff = int(target_composition[k] - reference_composition[k])
        if n_atom_diff:
            result[Element(k)] = n_atom_diff
    return result
Beispiel #11
0
 def setUp(self):
     element_profile = {'Ni': {'r': 0.5, 'w': 1}}
     describer = BispectrumCoefficients(rcutfac=4.1, twojmax=8,
                                        element_profile=element_profile,
                                        pot_fit=True)
     model = LinearModel(describer=describer)
     model.model.coef_ = coeff
     model.model.intercept_ = intercept
     snap = SNAPotential(model=model)
     snap.specie = Element('Ni')
     self.ff_settings = snap
Beispiel #12
0
    def from_config(param_file, coeff_file, **kwargs):
        """
        Initialize potentials with parameters file and coefficient file.

        Args:
            param_file (str): The file storing the configuration of potentials.
            coeff_file (str): The file storing the coefficients of potentials.

        Return:
            SNAPotential.
        """
        with open(coeff_file) as f:
            coeff_lines = f.readlines()
        coeff_lines = [
            line for line in coeff_lines if not line.startswith('#')
        ]
        specie, r, w = coeff_lines[1].split()
        r, w = float(r), int(w)
        element_profile = {specie: {'r': r, 'w': w}}

        rcut_pattern = re.compile('rcutfac (.*?)\n', re.S)
        twojmax_pattern = re.compile('twojmax (\d*)\n', re.S)
        rfac_pattern = re.compile('rfac0 (.*?)\n', re.S)
        rmin_pattern = re.compile('rmin0 (.*?)\n', re.S)
        diagonalstyle_pattern = re.compile('diagonalstyle (.*?)\n', re.S)
        quadratic_pattern = re.compile('quadraticflag (.*?)(?=\n|$)', re.S)

        with zopen(param_file, 'rt') as f:
            param_lines = f.read()

        rcut = float(rcut_pattern.findall(param_lines)[-1])
        twojmax = int(twojmax_pattern.findall(param_lines)[-1])
        rfac = float(rfac_pattern.findall(param_lines)[-1])
        rmin = int(rmin_pattern.findall(param_lines)[-1])
        diagonal = int(diagonalstyle_pattern.findall(param_lines)[-1])
        if quadratic_pattern.findall(param_lines):
            quadratic = bool(int(quadratic_pattern.findall(param_lines)[-1]))
        else:
            quadratic = False

        describer = BispectrumCoefficients(rcutfac=rcut,
                                           twojmax=twojmax,
                                           rfac0=rfac,
                                           element_profile=element_profile,
                                           rmin0=rmin,
                                           diagonalstyle=diagonal,
                                           quadratic=quadratic,
                                           pot_fit=True)
        model = LinearModel(describer=describer, **kwargs)
        model.model.coef_ = np.array(coeff_lines[2:], dtype=np.float)
        model.model.intercept_ = 0
        snap = SNAPotential(model=model)
        snap.specie = Element(specie)
        return snap
Beispiel #13
0
    def from_options(
            cls,
            xc: Xc,
            symbol_list: list,
            factor: int,
            aexx: Optional[float] = 0.25,
            hubbard_u: Optional[bool] = None,
            ldauu: Optional[dict] = None,
            ldaul: Optional[dict] = None,
            ldaul_set_name: Optional[str] = "default") -> "XcIncarSettings":
        """ Construct incar settings related to xc with some options.

        Args: See ViseInputSet docstrings

        Return: XcIncarSettings class object
        """
        settings = \
            load_default_incar_settings(yaml_filename="xc_incar_set.yaml",
                                        required_flags=XC_REQUIRED_FLAGS,
                                        optional_flags=XC_OPTIONAL_FLAGS,
                                        key_name=str(xc))

        # By default Hubbard U is set for LDA or GGA.
        hubbard_u = xc in LDA_OR_GGA if hubbard_u is None else hubbard_u
        ldauu = ldauu or {}
        ldaul = ldaul or {}

        if xc == Xc.pbesol:
            settings["GGA"] = "PS"
        elif xc == Xc.scan:
            settings["METAGGA"] = "SCAN"

        if hubbard_u:
            u_set = loadfn(SET_DIR / "u_parameter_set.yaml")
            ldauu_set = u_set["LDAUU"][ldaul_set_name]
            ldauu_set.update(ldauu)
            ldauu = [ldauu_set.get(el, 0) for el in symbol_list]

            if sum(ldauu) > 0:
                settings["LDAUU"] = ldauu
                settings.update({"LDAU": True, "LDAUTYPE": 2, "LDAUPRINT": 1})
                ldaul_set = u_set["LDAUL"][ldaul_set_name]
                ldaul_set.update(ldaul)
                settings["LDAUL"] = \
                    [ldaul_set.get(el, -1) for el in symbol_list]
                settings["LMAXMIX"] = \
                    6 if any([Element(el).Z > 56 for el in symbol_list]) else 4

        if xc in HYBRID_FUNCTIONAL:
            settings["AEXX"] = aexx
            if factor > 1:
                settings["NKRED"] = factor

        return cls(settings=settings)
Beispiel #14
0
 def test_get_property(self):
     self.assertAlmostEqual(
         -4.3853,
         self.data_source.get_elemental_property(Element("Bi"), "mus_fere"),
         4)
     self.assertEqual(
         59600,
         self.data_source.get_elemental_property(Element("Li"),
                                                 "electron_affin"))
     self.assertAlmostEqual(
         2372300,
         self.data_source.get_elemental_property(Element("He"),
                                                 "first_ioniz"))
     self.assertAlmostEqual(
         sum([2372300, 5250500]),
         self.data_source.get_charge_dependent_property_from_specie(
             Specie("He", 2), "total_ioniz"))
     self.assertAlmostEqual(
         18.6,
         self.data_source.get_charge_dependent_property_from_specie(
             Specie("V", 3), "xtal_field_split"))
Beispiel #15
0
def contains_element(entries, element_symbol):
    """Returns the entries in a list of entries which contain element_symbol

    Parameters:
    -----------
    entries: list
    """
    element = Element(element_symbol)

    lst = [entry for entry in entries if element in entry.composition.elements]

    return lst
Beispiel #16
0
    def test_oxide_energy_corr(self):
        el_li = Element("Li")
        el_o = Element("O")
        elts = [el_li, el_li, el_o]
        latt = Lattice.from_parameters(3.278, 3.278, 3.278,
                                       60, 60, 60)
        coords = [[0.25, 0.25, 0.25],
                  [0.75, 0.75, 0.75],
                  [0.0, 0.0, 0.0]]
        struct = Structure(latt, elts, coords)
        li2o_entry = ComputedStructureEntry(struct, -3,
                                            parameters={'is_hubbard': False,
                                          'hubbards': None,
                                          'run_type': 'GGA',
                                          'potcar_spec': [{'titel':'PAW_PBE Li 17Jan2003',
                                                           'hash': '65e83282d1707ec078c1012afbd05be8'},
                                                          {'titel': 'PAW_PBE O 08Apr2002',
                                                           'hash': '7a25bc5b9a5393f46600a4939d357982'}]})

        li2o_entry_corrected = self.compat.process_entry(li2o_entry)
        self.assertAlmostEqual(li2o_entry_corrected.energy, -3.0 -0.66975, 4)
Beispiel #17
0
 def setUpClass(cls):
     mass_info = [("A", "H"), ("B", Element("C")), ("C", Element("O")),
                  ("D", 1.00794)]
     nonbond_coeffs = [[1, 1, 1.1225], [1, 1.175, 1.31894],
                       [1, 1.55, 1.73988], [1, 1, 1.1225], [1, 1.35, 4],
                       [1, 1.725, 1.93631], [1, 1.175, 1.31894],
                       [1, 2.1, 4], [1, 1.55, 1.73988], [1, 1, 1.1225]]
     topo_coeffs = {
         "Bond Coeffs": [{
             "coeffs": [50, 0.659469],
             "types": [("A", "B"), ("C", "D")]
         }, {
             "coeffs": [50, 0.855906],
             "types": [("B", "C")]
         }]
     }
     cls.virus = ForceField(mass_info=mass_info,
                            nonbond_coeffs=nonbond_coeffs,
                            topo_coeffs=topo_coeffs)
     cls.ethane = ForceField.from_file(
         os.path.join(test_dir, "ff_ethane.yaml"))
Beispiel #18
0
 def DOI(self, calc_state):
     asites = self.site_matcher.get_mapping(calc_state.config.structure,
                                            self.Asite_struct)
     # print asites
     # print spinel_config.structure
     # print spinel_config.Asite_struct
     x = 0
     for i in asites:
         if calc_state.config.structure.species[i] == Element(self.Bspecie):
             x += 1
     x /= float(len(asites))
     return x
    def test_apply_transformation(self):
        l = Lattice.cubic(4)
        s_orig = Structure(l, [{"Li": 0.19, "Na": 0.19, "K": 0.62}, {"O": 1}],
                      [[0, 0, 0], [0.5, 0.5, 0.5]])
        dot = DiscretizeOccupanciesTransformation(max_denominator=5, tol=0.5)
        s = dot.apply_transformation(s_orig)
        self.assertEqual(dict(s[0].species), {Element("Li"): 0.2,
                                                       Element("Na"): 0.2,
                                                       Element("K"): 0.6})

        dot = DiscretizeOccupanciesTransformation(max_denominator=5, tol=0.01)
        self.assertRaises(RuntimeError, dot.apply_transformation, s_orig)

        s_orig_2 = Structure(l, [{"Li": 0.5, "Na": 0.25, "K": 0.25}, {"O": 1}],
                      [[0, 0, 0], [0.5, 0.5, 0.5]])

        dot = DiscretizeOccupanciesTransformation(max_denominator=9, tol=0.25,
                                                  fix_denominator=False)

        s = dot.apply_transformation(s_orig_2)
        self.assertEqual(dict(s[0].species), {Element("Li"): Fraction(1/2),
                                                       Element("Na"): Fraction(1/4),
                                                       Element("K"): Fraction(1/4)})

        dot = DiscretizeOccupanciesTransformation(max_denominator=9, tol=0.05,
                                                  fix_denominator=True)
        self.assertRaises(RuntimeError, dot.apply_transformation, s_orig_2)
Beispiel #20
0
 def test_get_data(self):
     self.assertEqual(
         -27, self.data.get_mixing_enthalpy(Element('H'), Element('Pd')))
     self.assertEqual(
         -27, self.data.get_mixing_enthalpy(Element('Pd'), Element('H')))
     self.assertTrue(
         isnan(self.data.get_mixing_enthalpy(Element('He'), Element('H'))))
def VoronoiInfo(poscar, vasprun, elements):
    base_s = poscar.structure
    start_i = get_center_i(
        base_s,
        Element('O'),
    )
    print(start_i)
    vnn = VoronoiNN(targets=[Element(x) for x in elements])
    start_vnn = vnn.get_nn_info(base_s, start_i)

    weight = {}
    total_length = {}
    for pt in start_vnn:
        # print('{}: {:3.2f}'.format(pt['site_index'], pt['weight']))
        temp_weight = round(pt['weight'] + 0.45)
        if pt['site'].species_string in weight:
            weight[pt['site'].species_string] = weight[
                pt['site'].species_string] + temp_weight
        else:
            weight[pt['site'].species_string] = temp_weight
        if temp_weight >= 0.99:
            bond_length = base_s.get_distance(start_i, pt['site_index'])
            if pt['site'].species_string in total_length:
                total_length[pt['site'].species_string] = total_length[
                    pt['site'].species_string] + bond_length
            else:
                total_length[pt['site'].species_string] = bond_length
    bonds = {
        'A_Bonds':
        weight[elements[0]],
        'A_length':
        total_length[elements[0]] / weight[elements[0]],
        'B_Bonds':
        weight[elements[1]],
        'B_length':
        total_length[elements[1]] / weight[elements[1]],
        'avg_length': (total_length[elements[0]] + total_length[elements[1]]) /
        (weight[elements[0]] + weight[elements[1]]),
    }
    return bonds
    def get_vc_plot_data(self, open_el, valence=None, entries=None, allowpmu=True):
        common_working_ion = {Element('Li'): 1, Element('Na'): 1, Element('K'): 1, Element('Mg'): 2, Element('Ca'): 2,
                              Element('Zn'): 2, Element('Al'): 3}
        if valence:
            ioncharge = valence
        else:
            if open_el not in common_working_ion.keys():
                raise ValueError('Working ion {} not supported. You can provide charge manually'.format(open_el.symbol))
            else:
                ioncharge = common_working_ion[open_el]

        evolution_profile = self.get_phase_evolution_profile(open_el, entries=entries, allowpmu=allowpmu)
        oe_list = []
        v_list = []
        for i in range(len(evolution_profile)):
            step = evolution_profile[-i - 1]
            oe_content = step['evolution']
            miu_vasp = step['chempot']
            oe_list.append(oe_content)
            v_list.append(miu_vasp)
        v_ref = v_list[-1]
        v_list = [-i + v_ref for i in v_list]
        v_list = [v / ioncharge for v in v_list]

        return oe_list, v_list
Beispiel #23
0
def read_poscar(layers, atom_dict):
    from pymatgen import Structure, Element
    from objects import atom

    name = (input('Enter Name of POSCAR file in the same directory: '))
    poscar = Structure.from_file(name)
    for i in poscar:
        mg_atom = Element(str(i.specie))
        atom_dict[len(atom_dict)] = atom(mg_atom,
                                         list(i.coords),
                                         num=len(atom_dict),
                                         c_tag='b')
    layers.append([atom_dict[i] for i in atom_dict])
    print('Read POSCAR as a single layer')
    cmd = input('add layer <l> or add adsorbate <a>? ')
    while cmd == 'l':
        sym = input("Which atom to deposit? ")
        add_layer(Element(sym))
        cmd = input('add layer <l> or add adsorbate <a>? ')

    if cmd == 'a':
        add_ads(layers[-1], Element(sym))
    def test_connectivity_array(self):
        vc = VoronoiConnectivity(self.get_structure("LiFePO4"))
        ca = vc.connectivity_array
        expected = np.array([0, 1.96338392, 0, 0.04594495])
        self.assertTrue(np.allclose(ca[15, :4, ca.shape[2] // 2], expected))

        expected = np.array([0, 0, 0])
        self.assertTrue(np.allclose(ca[1, -3:, 51], expected))

        site = vc.get_sitej(27, 51)
        self.assertEqual(site.specie, Element('O'))
        expected = np.array([-0.29158, 0.74889, 0.95684])
        self.assertTrue(np.allclose(site.frac_coords, expected))
Beispiel #25
0
 def test_process_entry_peroxide(self):
     latt = Lattice.from_parameters(3.159597, 3.159572, 7.685205, 89.999884, 89.999674, 60.000510)
     el_li = Element("Li")
     el_o = Element("O")
     elts = [el_li, el_li, el_li, el_li, el_o, el_o, el_o, el_o]
     coords = [[0.666656, 0.666705, 0.750001],
               [0.333342, 0.333378, 0.250001],
               [0.000001, 0.000041, 0.500001],
               [0.000001, 0.000021, 0.000001],
               [0.333347, 0.333332, 0.649191],
               [0.333322, 0.333353, 0.850803],
               [0.666666, 0.666686, 0.350813],
               [0.666665, 0.666684, 0.149189]]
     struct = Structure(latt, elts, coords)
     li2o2_entry = ComputedStructureEntry(struct, -3,
                                         parameters={'is_hubbard': False,
                                       'hubbards': None,
                                       'run_type': 'GGA',
                                       'potcar_symbols':
     ['PAW_PBE Fe 06Sep2000', 'PAW_PBE O 08Apr2002']})
     li2o2_entry_corrected = self.compat.process_entry(li2o2_entry)
     self.assertAlmostEqual(li2o2_entry_corrected.energy, -3 - 0.44317 * 4, 4)
Beispiel #26
0
    def test_ozonide(self):
        el_li = Element("Li")
        el_o = Element("O")
        elts = [el_li, el_o, el_o, el_o]
        latt = Lattice.from_parameters(3.999911, 3.999911, 3.999911,
                                       133.847504, 102.228244, 95.477342)
        coords = [[0.513004, 0.513004, 1.000000],
                  [0.017616, 0.017616, 0.000000],
                  [0.649993, 0.874790, 0.775203],
                  [0.099587, 0.874790, 0.224797]]
        struct = Structure(latt, elts, coords)
        lio3_entry = ComputedStructureEntry(struct, -3,
                                            parameters={'is_hubbard': False,
                                          'hubbards': None,
                                          'run_type': 'GGA',
                                          'potcar_spec': [{'titel':'PAW_PBE Li 17Jan2003',
                                                           'hash': '65e83282d1707ec078c1012afbd05be8'},
                                                          {'titel': 'PAW_PBE O 08Apr2002',
                                                           'hash': '7a25bc5b9a5393f46600a4939d357982'}]})

        lio3_entry_corrected = self.compat.process_entry(lio3_entry)
        self.assertAlmostEqual(lio3_entry_corrected.energy, -3.0 - 3 * 0.66975)
Beispiel #27
0
 def find_seekpath_data(self) -> None:
     """Get full information of seekpath band path. """
     self._seekpath_data = \
         seekpath.get_explicit_k_path(structure=self.cell,
                                      symprec=self.symprec,
                                      angle_tolerance=self.angle_tolerance,
                                      with_time_reversal=self.time_reversal,
                                      reference_distance=self.ref_distance)
     lattice = self._seekpath_data["primitive_lattice"]
     element_types = self._seekpath_data["primitive_types"]
     species = [Element.from_Z(i) for i in element_types]
     positions = self._seekpath_data["primitive_positions"]
     self._band_primitive = Structure(lattice, species, positions)
Beispiel #28
0
def get_composition_from_string(comp_str):
    """validate and return composition from string `comp_str`."""
    from pymatgen import Composition, Element
    comp = Composition(comp_str)
    for element in comp.elements:
        Element(element)
    formula = comp.get_integer_formula_and_factor()[0]
    comp = Composition(formula)
    return ''.join([
        '{}{}'.format(key,
                      int(value) if value > 1 else '')
        for key, value in comp.as_dict().items()
    ])
Beispiel #29
0
def insert_g3testset(coll):
    for f in glob.glob("g*.txt"):
        print("Parsing " + f)
        for (m, charge, spin) in parse_file(f):
            try:
                clean_sites = []
                for site in m:
                    if Element.is_valid_symbol(site.specie.symbol):
                        clean_sites.append(site)
                clean_mol = Molecule.from_sites(clean_sites,
                                                charge=charge,
                                                spin_multiplicity=spin)
                xyz = XYZ(clean_mol)
                bb = BabelMolAdaptor.from_string(str(xyz), "xyz")
                pbmol = pb.Molecule(bb.openbabel_mol)
                smiles = pbmol.write("smi").split()[0]
                can = pbmol.write("can").split()[0]
                inchi = pbmol.write("inchi")
                svg = pbmol.write("svg")
                d = {"molecule": clean_mol.as_dict()}
                comp = clean_mol.composition
                d["pretty_formula"] = comp.reduced_formula
                d["formula"] = comp.formula
                d["composition"] = comp.as_dict()
                d["elements"] = list(comp.as_dict().keys())
                d["nelements"] = len(comp)
                d["charge"] = charge
                d["spin_multiplicity"] = spin
                d["smiles"] = smiles
                d["can"] = can
                d["inchi"] = inchi
                # d["names"] = get_nih_names(smiles)
                d["svg"] = svg
                d["xyz"] = str(xyz)
                d["tags"] = ["G305 test set"]
                coll.update(
                    {
                        "inchi": inchi,
                        "charge": charge,
                        "spin_multiplicity": spin
                    }, {"$set": d},
                    upsert=True)
            except Exception as ex:
                print("Error in {}".format(f))
                exc_type, exc_value, exc_traceback = sys.exc_info()
                traceback.print_exception(exc_type,
                                          exc_value,
                                          exc_traceback,
                                          limit=2,
                                          file=sys.stdout)
        print("{} parsed!".format(f))
Beispiel #30
0
def get_pymatgen_descriptor(comp, prop):
    """
    Get descriptor data for elements in a compound from pymatgen.

    Args:
        comp: (str) compound composition, eg: "NaCl"
        prop: (str) pymatgen element attribute, as defined in the Element class at
    http://pymatgen.org/_modules/pymatgen/core/periodic_table.html

    Returns: (list) of namedtuples containing element name, property name, property value, units, and amount of element

    """
    eldata_lst = []
    eldata = collections.namedtuple('eldata',
                                    'element propname propvalue propunit amt')
    el_amt_dict = Composition(comp).get_el_amt_dict()
    for el in el_amt_dict:
        if callable(getattr(Element(el), prop)) is None:
            raise ValueError('Invalid pymatgen Element attribute(property)')
        if getattr(Element(el), prop) is not None:
            if prop in ['X', 'Z', 'ionic_radii', 'group', 'row', 'number']:
                units = None
            else:
                units = getattr(Element(el), prop).unit
            eldata_lst.append(
                eldata(element=el,
                       propname=prop,
                       propvalue=float(getattr(Element(el), prop)),
                       propunit=units,
                       amt=el_amt_dict[el]))
        else:
            eldata_lst.append(
                eldata(element=el,
                       propname=prop,
                       propvalue=None,
                       propunit=None,
                       amt=el_amt_dict[el]))
    return eldata_lst
Beispiel #31
0
    def test_get_nn(self):
        s = self.get_structure('LiFePO4')

        # Test the default near-neighbor finder.
        nsites_checked = 0

        for site_idx, site in enumerate(s):
            if site.specie == Element("Li"):
                self.assertEqual(self.jmol.get_cn(s, site_idx), 0)
                nsites_checked += 1
            elif site.specie == Element("Fe"):
                self.assertEqual(self.jmol.get_cn(s, site_idx), 6)
                nsites_checked += 1
            elif site.specie == Element("P"):
                self.assertEqual(self.jmol.get_cn(s, site_idx), 4)
                nsites_checked += 1
        self.assertEqual(nsites_checked, 12)

        # Test a user override that would cause Li to show up as 6-coordinated
        self.assertEqual(self.jmol_update.get_cn(s, 0), 6)

        # Verify get_nn function works
        self.assertEqual(len(self.jmol_update.get_nn(s, 0)), 6)
Beispiel #32
0
 def __init__(self):
     dfile = os.path.join(module_dir,
                          "data_files/megnet_elemental_embedding.json")
     self._dummy = "Dummy"
     with open(dfile, "r") as fp:
         embeddings = json.load(fp)
     self.prop_names = ["embedding {}".format(i) for i in range(1, 17)]
     self.all_element_data = {}
     for i in range(95):
         embedding_dict = dict(zip(self.prop_names, embeddings[i]))
         if i == 0:
             self.all_element_data[self._dummy] = embedding_dict
         else:
             self.all_element_data[str(Element.from_Z(i))] = embedding_dict
Beispiel #33
0
def gen_iupac_ordering():
    periodic_table = loadfn("periodic_table.json")
    order = [([18], range(6, 0, -1)),  # noble gasses
             ([1], range(7, 1, -1)),  # alkali metals
             ([2], range(7, 1, -1)),  # alkali earth metals
             (range(17, 2, -1), [9]),  # actinides
             (range(17, 2, -1), [8]),  # lanthanides
             ([3], (5, 4)),  # Y, Sc
             ([4], (6, 5, 4)),  # Hf -> Ti
             ([5], (6, 5, 4)),  # Ta -> V
             ([6], (6, 5, 4)),  # W -> Cr
             ([7], (6, 5, 4)),  # Re -> Mn
             ([8], (6, 5, 4)),  # Os -> Fe
             ([9], (6, 5, 4)),  # Ir -> Co
             ([10], (6, 5, 4)),  # Pt -> Ni
             ([11], (6, 5, 4)),  # Au -> Cu
             ([12], (6, 5, 4)),  # Hg -> Zn
             ([13], range(6, 1, -1)),  # Tl -> B
             ([14], range(6, 1, -1)),  # Pb -> C
             ([15], range(6, 1, -1)),  # Bi -> N
             ([1], [1]),  # Hydrogen
             ([16], range(6, 1, -1)),  # Po -> O
             ([17], range(6, 1, -1))]  # At -> F

    order = sum([list(product(x, y)) for x, y in order], [])
    iupac_ordering_dict = dict(zip(
        [Element.from_row_and_group(row, group) for group, row in order],
        range(len(order))))

    # first clean periodic table of any IUPAC ordering
    for el in periodic_table:
        periodic_table[el].pop('IUPAC ordering', None)

    # now add iupac ordering
    for el in periodic_table:
        if 'IUPAC ordering' in periodic_table[el]:
            # sanity check that we don't cover the same element twice
            raise KeyError("IUPAC ordering already exists for {}".format(el))

        periodic_table[el]['IUPAC ordering'] = iupac_ordering_dict[get_el_sp(el)]
Beispiel #34
0
    def _parse(self, filename):
        start_patt = re.compile(" \(Enter \S+l101\.exe\)")
        route_patt = re.compile(" #[pPnNtT]*.*")
        link0_patt = re.compile("^\s(%.+)\s*=\s*(.+)")
        charge_mul_patt = re.compile("Charge\s+=\s*([-\\d]+)\s+"
                                     "Multiplicity\s+=\s*(\d+)")
        num_basis_func_patt = re.compile("([0-9]+)\s+basis functions")
        num_elec_patt = re.compile("(\d+)\s+alpha electrons\s+(\d+)\s+beta electrons")
        pcm_patt = re.compile("Polarizable Continuum Model")
        stat_type_patt = re.compile("imaginary frequencies")
        scf_patt = re.compile("E\(.*\)\s*=\s*([-\.\d]+)\s+")
        mp2_patt = re.compile("EUMP2\s*=\s*(.*)")
        oniom_patt = re.compile("ONIOM:\s+extrapolated energy\s*=\s*(.*)")
        termination_patt = re.compile("(Normal|Error) termination")
        error_patt = re.compile(
            "(! Non-Optimized Parameters !|Convergence failure)")
        mulliken_patt = re.compile(
            "^\s*(Mulliken charges|Mulliken atomic charges)")
        mulliken_charge_patt = re.compile(
            '^\s+(\d+)\s+([A-Z][a-z]?)\s*(\S*)')
        end_mulliken_patt = re.compile(
            '(Sum of Mulliken )(.*)(charges)\s*=\s*(\D)')
        std_orientation_patt = re.compile("Standard orientation")
        end_patt = re.compile("--+")
        orbital_patt = re.compile("(Alpha|Beta)\s*\S+\s*eigenvalues --(.*)")
        thermo_patt = re.compile("(Zero-point|Thermal) correction(.*)="
                                 "\s+([\d\.-]+)")
        forces_on_patt = re.compile(
            "Center\s+Atomic\s+Forces\s+\(Hartrees/Bohr\)")
        forces_off_patt = re.compile("Cartesian\s+Forces:\s+Max.*RMS.*")
        forces_patt = re.compile(
            "\s+(\d+)\s+(\d+)\s+([0-9\.-]+)\s+([0-9\.-]+)\s+([0-9\.-]+)")

        freq_on_patt = re.compile(
            "Harmonic\sfrequencies\s+\(cm\*\*-1\),\sIR\sintensities.*Raman.*")
        freq_patt = re.compile("Frequencies\s--\s+(.*)")
        normal_mode_patt = re.compile(
            "\s+(\d+)\s+(\d+)\s+([0-9\.-]{4,5})\s+([0-9\.-]{4,5}).*")

        mo_coeff_patt = re.compile("Molecular Orbital Coefficients:")
        mo_coeff_name_patt = re.compile("\d+\s((\d+|\s+)\s+([a-zA-Z]{1,2}|\s+))\s+(\d+\S+)")

        self.properly_terminated = False
        self.is_pcm = False
        self.stationary_type = "Minimum"
        self.structures = []
        self.corrections = {}
        self.energies = []
        self.pcm = None
        self.errors = []
        self.Mulliken_charges = {}
        self.link0 = {}
        self.cart_forces = []
        self.frequencies = []
        self.eigenvalues = []
        self.is_spin = False

        coord_txt = []
        read_coord = 0
        read_mulliken = False
        read_eigen = False
        eigen_txt = []
        parse_stage = 0
        num_basis_found = False
        terminated = False
        parse_forces = False
        forces = []
        parse_freq = False
        frequencies = []
        read_mo = False

        with zopen(filename) as f:
            for line in f:
                if parse_stage == 0:
                    if start_patt.search(line):
                        parse_stage = 1
                    elif link0_patt.match(line):
                        m = link0_patt.match(line)
                        self.link0[m.group(1)] = m.group(2)
                    elif route_patt.search(line):
                        params = read_route_line(line)
                        self.functional = params[0]
                        self.basis_set = params[1]
                        self.route = params[2]
                        self.dieze_tag = params[3]
                        parse_stage = 1
                elif parse_stage == 1:
                    if charge_mul_patt.search(line):
                        m = charge_mul_patt.search(line)
                        self.charge = int(m.group(1))
                        self.spin_mult = int(m.group(2))
                        parse_stage = 2
                elif parse_stage == 2:

                    if self.is_pcm:
                        self._check_pcm(line)

                    if "FREQ" in self.route and thermo_patt.search(line):
                        m = thermo_patt.search(line)
                        if m.group(1) == "Zero-point":
                            self.corrections["Zero-point"] = float(m.group(3))
                        else:
                            key = m.group(2).strip(" to ")
                            self.corrections[key] = float(m.group(3))

                    if read_coord:
                        if not end_patt.search(line):
                            coord_txt.append(line)
                        else:
                            read_coord = (read_coord + 1) % 4
                            if not read_coord:
                                sp = []
                                coords = []
                                for l in coord_txt[2:]:
                                    toks = l.split()
                                    sp.append(Element.from_Z(int(toks[1])))
                                    coords.append([float(i) for i in toks[3:6]])
                                self.structures.append(Molecule(sp, coords))

                    if parse_forces:
                        m = forces_patt.search(line)
                        if m:
                            forces.extend([float(_v) for _v in m.groups()[2:5]])
                        elif forces_off_patt.search(line):
                            self.cart_forces.append(forces)
                            forces = []
                            parse_forces = False

                    # read molecular orbital eigenvalues
                    if read_eigen:
                        m = orbital_patt.search(line)
                        if m:
                            eigen_txt.append(line)
                        else:
                            read_eigen = False
                            self.eigenvalues = {Spin.up: []}
                            for eigenline in eigen_txt:
                                if "Alpha" in eigenline:
                                    self.eigenvalues[Spin.up] += [float(e)
                                        for e in float_patt.findall(eigenline)]
                                elif "Beta" in eigenline:
                                    if Spin.down not in self.eigenvalues:
                                        self.eigenvalues[Spin.down] = []
                                    self.eigenvalues[Spin.down] += [float(e)
                                        for e in float_patt.findall(eigenline)]
                            eigen_txt = []

                    # read molecular orbital coefficients
                    if read_mo:
                        # build a matrix with all coefficients
                        all_spin = [Spin.up]
                        if self.is_spin:
                            all_spin.append(Spin.down)

                        mat_mo = {}
                        for spin in all_spin:
                            mat_mo[spin] = np.zeros((self.num_basis_func, self.num_basis_func))
                            nMO = 0
                            end_mo = False
                            while nMO < self.num_basis_func and not end_mo:
                                f.readline()
                                f.readline()
                                self.atom_basis_labels = []
                                for i in range(self.num_basis_func):
                                    line = f.readline()

                                    # identify atom and OA labels
                                    m = mo_coeff_name_patt.search(line)
                                    if m.group(1).strip() != "":
                                        iat = int(m.group(2)) - 1
                                        # atname = m.group(3)
                                        self.atom_basis_labels.append([m.group(4)])
                                    else:
                                        self.atom_basis_labels[iat].append(m.group(4))

                                    # MO coefficients
                                    coeffs = [float(c) for c in float_patt.findall(line)]
                                    for j in range(len(coeffs)):
                                        mat_mo[spin][i, nMO + j] = coeffs[j]

                                nMO += len(coeffs)
                                line = f.readline()
                                # manage pop=regular case (not all MO)
                                if nMO < self.num_basis_func and \
                                    ("Density Matrix:" in line or mo_coeff_patt.search(line)):
                                    end_mo = True
                                    warnings.warn("POP=regular case, matrix coefficients not complete")
                            f.readline()

                        self.eigenvectors = mat_mo
                        read_mo = False

                        # build a more convenient array dict with MO coefficient of
                        # each atom in each MO.
                        # mo[Spin][OM j][atom i] = {AO_k: coeff, AO_k: coeff ... }
                        mo = {}
                        for spin in all_spin:
                            mo[spin] = [[{} for iat in range(len(self.atom_basis_labels))]
                                                for j in range(self.num_basis_func)]
                            for j in range(self.num_basis_func):
                                i = 0
                                for iat in range(len(self.atom_basis_labels)):
                                    for label in self.atom_basis_labels[iat]:
                                        mo[spin][j][iat][label] = self.eigenvectors[spin][i, j]
                                        i += 1

                        self.molecular_orbital = mo


                    elif parse_freq:
                        m = freq_patt.search(line)
                        if m:
                            values = [float(_v) for _v in m.groups()[0].split()]
                            for value in values:
                                frequencies.append([value, []])
                        elif normal_mode_patt.search(line):
                            values = [float(_v) for _v in line.split()[2:]]
                            n = int(len(values) / 3)
                            for i in range(0, len(values), 3):
                                j = -n + int(i / 3)
                                frequencies[j][1].extend(values[i:i+3])
                        elif line.find("-------------------") != -1:
                            parse_freq = False
                            self.frequencies.append(frequencies)
                            frequencies = []

                    elif termination_patt.search(line):
                        m = termination_patt.search(line)
                        if m.group(1) == "Normal":
                            self.properly_terminated = True
                            terminated = True
                    elif error_patt.search(line):
                        error_defs = {
                            "! Non-Optimized Parameters !": "Optimization "
                                                            "error",
                            "Convergence failure": "SCF convergence error"
                        }
                        m = error_patt.search(line)
                        self.errors.append(error_defs[m.group(1)])
                    elif (not num_basis_found) and \
                            num_basis_func_patt.search(line):
                        m = num_basis_func_patt.search(line)
                        self.num_basis_func = int(m.group(1))
                        num_basis_found = True
                    elif num_elec_patt.search(line):
                        m = num_elec_patt.search(line)
                        self.electrons = (int(m.group(1)), int(m.group(2)))
                    elif (not self.is_pcm) and pcm_patt.search(line):
                        self.is_pcm = True
                        self.pcm = {}
                    elif "FREQ" in self.route and "OPT" in self.route and \
                            stat_type_patt.search(line):
                        self.stationary_type = "Saddle"
                    elif mp2_patt.search(line):
                        m = mp2_patt.search(line)
                        self.energies.append(float(m.group(1).replace("D",
                                                                      "E")))
                    elif oniom_patt.search(line):
                        m = oniom_patt.matcher(line)
                        self.energies.append(float(m.group(1)))
                    elif scf_patt.search(line):
                        m = scf_patt.search(line)
                        self.energies.append(float(m.group(1)))
                    elif std_orientation_patt.search(line):
                        coord_txt = []
                        read_coord = 1
                    elif not read_eigen and orbital_patt.search(line):
                        eigen_txt.append(line)
                        read_eigen = True
                    elif mulliken_patt.search(line):
                        mulliken_txt = []
                        read_mulliken = True
                    elif not parse_forces and forces_on_patt.search(line):
                        parse_forces = True
                    elif freq_on_patt.search(line):
                        parse_freq = True
                    elif mo_coeff_patt.search(line):
                        if "Alpha" in line:
                            self.is_spin = True
                        read_mo = True

                    if read_mulliken:
                        if not end_mulliken_patt.search(line):
                            mulliken_txt.append(line)
                        else:
                            m = end_mulliken_patt.search(line)
                            mulliken_charges = {}
                            for line in mulliken_txt:
                                if mulliken_charge_patt.search(line):
                                    m = mulliken_charge_patt.search(line)
                                    dict = {int(m.group(1)): [m.group(2), float(m.group(3))]}
                                    mulliken_charges.update(dict)
                            read_mulliken = False
                            self.Mulliken_charges = mulliken_charges

        if not terminated:
            #raise IOError("Bad Gaussian output file.")
            warnings.warn("\n" + self.filename + \
                ": Termination error or bad Gaussian output file !")
Beispiel #35
0
print("The MAE of the linear ridge regression band gap model using the naive feature set is: "\
	+ str(round(abs(mean(scores)), 3)) + " eV")

##############################################################################################################

# Let's see which features are most important for the linear model

print("Below are the fitted linear ridge regression coefficients for each feature (i.e., element) in our naive feature set")

linear.fit(naiveFeatures, bandgaps) # fit to the whole data set; we're not doing CV here

print("element: coefficient")

for i in range(MAX_Z):
       element = Element.from_Z(i + 1)
       print(element.symbol + ': ' + str(linear.coef_[i]))

##############################################################################################################

# Create alternative feature set that is more physically-motivated

physicalFeatures = []

for material in materials:
       theseFeatures = []
       fraction = []
       atomicNo = []
       eneg = []
       group = []
Beispiel #36
0
    def disassemble(self, atom_labels=None, guess_element=True,
                    ff_label="ff_map"):
        """
        Breaks down LammpsData to building blocks
        (LammpsBox, ForceField and a series of Topology).
        RESTRICTIONS APPLIED:

        1. No complex force field defined not just on atom
            types, where the same type or equivalent types of topology
            may have more than one set of coefficients.
        2. No intermolecular topologies (with atoms from different
            molecule-ID) since a Topology object includes data for ONE
            molecule or structure only.

        Args:
            atom_labels ([str]): List of strings (must be different
                from one another) for labelling each atom type found in
                Masses section. Default to None, where the labels are
                automaticaly added based on either element guess or
                dummy specie assignment.
            guess_element (bool): Whether to guess the element based on
                its atomic mass. Default to True, otherwise dummy
                species "Qa", "Qb", ... will be assigned to various
                atom types. The guessed or assigned elements will be
                reflected on atom labels if atom_labels is None, as
                well as on the species of molecule in each Topology.
            ff_label (str): Site property key for labeling atoms of
                different types. Default to "ff_map".

        Returns:
            LammpsBox, ForceField, [Topology]

        """
        atoms_df = self.atoms.copy()
        if "nx" in atoms_df.columns:
            atoms_df[["x", "y", "z"]] += \
                self.box.get_box_shift(atoms_df[["nx", "ny", "nz"]].values)
        atoms_df = pd.concat([atoms_df, self.velocities], axis=1)

        mids = atoms_df.get("molecule-ID")
        if mids is None:
            unique_mids = [1]
            data_by_mols = {1: {"Atoms": atoms_df}}
        else:
            unique_mids = np.unique(mids)
            data_by_mols = {}
            for k in unique_mids:
                df = atoms_df[atoms_df["molecule-ID"] == k]
                data_by_mols[k] = {"Atoms": df}

        masses = self.masses.copy()
        masses["label"] = atom_labels
        unique_masses = np.unique(masses["mass"])
        if guess_element:
            ref_masses = [el.atomic_mass.real for el in Element]
            diff = np.abs(np.array(ref_masses) - unique_masses[:, None])
            atomic_numbers = np.argmin(diff, axis=1) + 1
            symbols = [Element.from_Z(an).symbol for an in atomic_numbers]
        else:
            symbols = ["Q%s" % a for a in
                       map(chr, range(97, 97 + len(unique_masses)))]
        for um, s in zip(unique_masses, symbols):
            masses.loc[masses["mass"] == um, "element"] = s
        if atom_labels is None:  # add unique labels based on elements
            for el, vc in masses["element"].value_counts().iteritems():
                masses.loc[masses["element"] == el, "label"] = \
                    ["%s%d" % (el, c) for c in range(1, vc + 1)]
        assert masses["label"].nunique(dropna=False) == len(masses), \
            "Expecting unique atom label for each type"
        mass_info = [tuple([r["label"], r["mass"]])
                     for _, r in masses.iterrows()]

        nonbond_coeffs, topo_coeffs = None, None
        if self.force_field:
            if "PairIJ Coeffs" in self.force_field:
                nbc = self.force_field["PairIJ Coeffs"]
                nbc = nbc.sort_values(["id1", "id2"]).drop(["id1", "id2"], axis=1)
                nonbond_coeffs = [list(t) for t in nbc.itertuples(False, None)]
            elif "Pair Coeffs" in self.force_field:
                nbc = self.force_field["Pair Coeffs"].sort_index()
                nonbond_coeffs = [list(t) for t in nbc.itertuples(False, None)]

            topo_coeffs = {k: [] for k in SECTION_KEYWORDS["ff"][2:]
                           if k in self.force_field}
            for kw in topo_coeffs.keys():
                class2_coeffs = {k: list(v.itertuples(False, None))
                                 for k, v in self.force_field.items()
                                 if k in CLASS2_KEYWORDS.get(kw, [])}
                ff_df = self.force_field[kw]
                for t in ff_df.itertuples(True, None):
                    d = {"coeffs": list(t[1:]), "types": []}
                    if class2_coeffs:
                        d.update({k: list(v[t[0] - 1])
                                  for k, v in class2_coeffs.items()})
                    topo_coeffs[kw].append(d)

        if self.topology:
            label_topo = lambda t: tuple(masses.loc[atoms_df.loc[t, "type"],
                                                    "label"])
            for k, v in self.topology.items():
                ff_kw = k[:-1] + " Coeffs"
                for topo in v.itertuples(False, None):
                    topo_idx = topo[0] - 1
                    indices = topo[1:]
                    mids = atoms_df.loc[indices, "molecule-ID"].unique()
                    assert len(mids) == 1, \
                        "Do not support intermolecular topology formed " \
                        "by atoms with different molecule-IDs"
                    label = label_topo(indices)
                    topo_coeffs[ff_kw][topo_idx]["types"].append(label)
                    if data_by_mols[mids[0]].get(k):
                        data_by_mols[mids[0]][k].append(indices)
                    else:
                        data_by_mols[mids[0]][k] = [indices]

        if topo_coeffs:
            for v in topo_coeffs.values():
                for d in v:
                    d["types"] = list(set(d["types"]))

        ff = ForceField(mass_info=mass_info, nonbond_coeffs=nonbond_coeffs,
                        topo_coeffs=topo_coeffs)

        topo_list = []
        for mid in unique_mids:
            data = data_by_mols[mid]
            atoms = data["Atoms"]
            shift = min(atoms.index)
            type_ids = atoms["type"]
            species = masses.loc[type_ids, "element"]
            labels = masses.loc[type_ids, "label"]
            coords = atoms[["x", "y", "z"]]
            m = Molecule(species.values, coords.values,
                         site_properties={ff_label: labels.values})
            charges = atoms.get("q")
            velocities = atoms[["vx", "vy", "vz"]] if "vx" in atoms.columns \
                else None
            topologies = {}
            for kw in SECTION_KEYWORDS["topology"]:
                if data.get(kw):
                    topologies[kw] = (np.array(data[kw]) - shift).tolist()
            topologies = None if not topologies else topologies
            topo_list.append(Topology(sites=m, ff_label=ff_label,
                                      charges=charges, velocities=velocities,
                                      topologies=topologies))

        return self.box, ff, topo_list