def enumerate_structures(self, spacegroups=None):

        if 'icsd' in self.source:
            # First take experimental structures
            OI = OqmdInterface()
            print('Enumerating experimental prototypes')

            chemical_formulas = get_formulas(self.elements,
                                             self.stoichiometries)

            for chemical_formula in chemical_formulas:
                OI.store_enumeration(filename=self.db_filename,
                                     chemical_formula=chemical_formula,
                                     max_atoms=self.max_atoms)

        # Enumerate prototypes as a next step
        # experimental structures will not be overwritten.
        if 'prototypes' in self.source:
            print('Enumerating Prototypes')
            for stoichiometry in self.stoichiometries:
                self.enumerate_prototypes(stoichiometry, spacegroups)

            AE = AtomsEnumeration(self.elements, self.max_atoms, spacegroups)
            print('Enumerating atoms')
            AE.store_atom_enumeration(filename=self.db_filename,
                                      multithread=False,
                                      max_candidates=1)
    def test_unique_prototypes(self):
        O = OqmdInterface()
        p = O.get_distinct_prototypes(chemical_formula='NiCuCN',
                                      max_atoms=8)

        assert(len(p) == 19)
        print('{} distinct prototypes found'.format(len(p)))
    def test_get_same_formula(self):
        # Should get same formula if exists
        O = OqmdInterface()

        atoms_data = O.get_atoms_for_prototype(chemical_formula='TiO2',
                                               proto_name='AB2_2_a_f_136')[0]
        assert atoms_data['chemical_formula'] == atoms_data['original_formula']
Beispiel #4
0
def get_structures_from_protosearch(formulas,
                                    source='icsd',
                                    db_interface=None):
    """
    Calls protosearch to get the hypothetical structures.

    Args:
        formulas ([str]): list of chemical formulas from which
            to generate candidate structures
        source (str): project name in OQMD to be used as source.
            Defaults to ICSD.
        db_interface (DbInterface): interface to OQMD database
            by default uses the one pulled from data.matr.io

    Returns:
        (pandas.DataFrame) hypothetical pymatgen structures
            generated and their unique ids from protosearch

    TODO:
        - For efficiency, n_max_atoms can be handled within OqmdInterface
    """

    if db_interface is None:
        cache_matrio_data("oqmd_ver3.db")
        oqmd_db_path = os.path.join(CAMD_CACHE, "oqmd_ver3.db")
        db_interface = OqmdInterface(oqmd_db_path)
    dataframes = [
        db_interface.create_proto_data_set(source=source,
                                           chemical_formula=formula)
        for formula in formulas
    ]
    _structures = pd.concat(dataframes)

    # Drop bad structures
    _structures.dropna(axis=0, how='any', inplace=True)

    # conversion to pymatgen structures
    ase_adap = AseAtomsAdaptor()
    pmg_structures = [
        ase_adap.get_structure(_structures.iloc[i]['atoms'])
        for i in range(len(_structures))
    ]
    _structures['pmg_structures'] = pmg_structures

    # The uuid below is probably an overkill. But want the strings
    # to be unique. Sometimes in spaces with similar stoichiometries
    # they may clash e.g. IrSb2O2 and Ir2SbO2 may end up producing
    # the same string, despite different substitutions on same structure.
    # We should figure out a way to get the right order from protosearch.
    structure_uids = [
        _structures.iloc[i]['proto_name'].replace('_', '-') + '-' +
        '-'.join(pmg_structures[i].symbol_set) + '-' +
        str(uuid.uuid4()).replace('-', '')[:6] for i in range(len(_structures))
    ]
    _structures.index = structure_uids
    return _structures
Beispiel #5
0
    def test_create_proto_dataset(self):
        O = OqmdInterface()
        atoms_list = O.create_proto_data_set(chemical_formula='FeO6',
                                             max_atoms=7)

        assert len(atoms_list) == 5

        for atoms in atoms_list["atoms"][:5]:
            assert atoms.get_number_of_atoms() == 7
            assert atoms.get_chemical_symbols().count('Fe') == 1
            assert atoms.get_chemical_symbols().count('O') == 6
    def test_substitute_atoms(self):
        O = OqmdInterface()

        atoms_data = O.get_atoms_for_prototype(chemical_formula='TiMoO2',
                                               proto_name='ABC2_2_a_c_f_194')[0]
        atoms = atoms_data['atoms']

        atoms_list = O.substitute_atoms(
            atoms, new_symbols=['Nb', 'V', 'O', 'O'])

        assert len(atoms_list) == 2
    def test_unique(self):
        # AB == BA  - one structure
        O = OqmdInterface()
        atoms_data = O.get_atoms_for_prototype(chemical_formula='TiMo',
                                               proto_name='AB_4_ab_ab_186')
        atoms = [a['atoms'] for a in atoms_data]

        assert len(atoms) == 1

        # ABC3 != BAC3  - two structures
        atoms_data = O.get_atoms_for_prototype(chemical_formula='TiMoO3',
                                               proto_name='ABC3_1_a_a_b_160')
        atoms = [a['atoms'] for a in atoms_data]

        assert len(atoms) == 2
Beispiel #8
0
def get_structures_from_protosearch(formulas,
                                    source="icsd",
                                    db_interface=None):
    """
    Calls protosearch to get the hypothetical structures.

    Args:
        formulas ([str]): list of chemical formulas from which
            to generate candidate structures
        source (str): project name in OQMD to be used as source.
            Defaults to ICSD.
        db_interface (DbInterface): interface to OQMD database
            by default uses the one pulled from data.matr.io

    Returns:
        (pandas.DataFrame) hypothetical pymatgen structures
            generated and their unique ids from protosearch
    """

    if db_interface is None:
        db_interface = OqmdInterface(source)
    dataframes = [
        db_interface.create_proto_data_set(chemical_formula=formula)
        for formula in formulas
    ]
    _structures = pd.concat(dataframes)

    # Drop bad structures
    _structures.dropna(axis=0, how="any", inplace=True)

    # conversion to pymatgen structures
    ase_adap = AseAtomsAdaptor()
    pmg_structures = [
        ase_adap.get_structure(_structures.iloc[i]["atoms"])
        for i in range(len(_structures))
    ]
    _structures["structure"] = pmg_structures
    # This is for compatibility with Mc1, which doesn't allow
    # underscores
    structure_uids = [
        _structures.iloc[i]["structure_name"].replace('_', '-')
        for i in range(len(_structures))
    ]
    _structures.index = structure_uids
    return _structures
Beispiel #9
0
 def test_store_enumeration(self):
     O = OqmdInterface()
     O.store_enumeration(filename='test.db',
                         chemical_formula='FeO6',
                         max_atoms=7)
Beispiel #10
0
 def test_unique_prototypes2(self):
     O = OqmdInterface()
     p = O.get_distinct_prototypes(chemical_formula='TiO2')
     print(len(p))