def enumerate_structures(self, spacegroups=None): if 'icsd' in self.source: # First take experimental structures OI = OqmdInterface() print('Enumerating experimental prototypes') chemical_formulas = get_formulas(self.elements, self.stoichiometries) for chemical_formula in chemical_formulas: OI.store_enumeration(filename=self.db_filename, chemical_formula=chemical_formula, max_atoms=self.max_atoms) # Enumerate prototypes as a next step # experimental structures will not be overwritten. if 'prototypes' in self.source: print('Enumerating Prototypes') for stoichiometry in self.stoichiometries: self.enumerate_prototypes(stoichiometry, spacegroups) AE = AtomsEnumeration(self.elements, self.max_atoms, spacegroups) print('Enumerating atoms') AE.store_atom_enumeration(filename=self.db_filename, multithread=False, max_candidates=1)
def test_unique_prototypes(self): O = OqmdInterface() p = O.get_distinct_prototypes(chemical_formula='NiCuCN', max_atoms=8) assert(len(p) == 19) print('{} distinct prototypes found'.format(len(p)))
def test_get_same_formula(self): # Should get same formula if exists O = OqmdInterface() atoms_data = O.get_atoms_for_prototype(chemical_formula='TiO2', proto_name='AB2_2_a_f_136')[0] assert atoms_data['chemical_formula'] == atoms_data['original_formula']
def get_structures_from_protosearch(formulas, source='icsd', db_interface=None): """ Calls protosearch to get the hypothetical structures. Args: formulas ([str]): list of chemical formulas from which to generate candidate structures source (str): project name in OQMD to be used as source. Defaults to ICSD. db_interface (DbInterface): interface to OQMD database by default uses the one pulled from data.matr.io Returns: (pandas.DataFrame) hypothetical pymatgen structures generated and their unique ids from protosearch TODO: - For efficiency, n_max_atoms can be handled within OqmdInterface """ if db_interface is None: cache_matrio_data("oqmd_ver3.db") oqmd_db_path = os.path.join(CAMD_CACHE, "oqmd_ver3.db") db_interface = OqmdInterface(oqmd_db_path) dataframes = [ db_interface.create_proto_data_set(source=source, chemical_formula=formula) for formula in formulas ] _structures = pd.concat(dataframes) # Drop bad structures _structures.dropna(axis=0, how='any', inplace=True) # conversion to pymatgen structures ase_adap = AseAtomsAdaptor() pmg_structures = [ ase_adap.get_structure(_structures.iloc[i]['atoms']) for i in range(len(_structures)) ] _structures['pmg_structures'] = pmg_structures # The uuid below is probably an overkill. But want the strings # to be unique. Sometimes in spaces with similar stoichiometries # they may clash e.g. IrSb2O2 and Ir2SbO2 may end up producing # the same string, despite different substitutions on same structure. # We should figure out a way to get the right order from protosearch. structure_uids = [ _structures.iloc[i]['proto_name'].replace('_', '-') + '-' + '-'.join(pmg_structures[i].symbol_set) + '-' + str(uuid.uuid4()).replace('-', '')[:6] for i in range(len(_structures)) ] _structures.index = structure_uids return _structures
def test_create_proto_dataset(self): O = OqmdInterface() atoms_list = O.create_proto_data_set(chemical_formula='FeO6', max_atoms=7) assert len(atoms_list) == 5 for atoms in atoms_list["atoms"][:5]: assert atoms.get_number_of_atoms() == 7 assert atoms.get_chemical_symbols().count('Fe') == 1 assert atoms.get_chemical_symbols().count('O') == 6
def test_substitute_atoms(self): O = OqmdInterface() atoms_data = O.get_atoms_for_prototype(chemical_formula='TiMoO2', proto_name='ABC2_2_a_c_f_194')[0] atoms = atoms_data['atoms'] atoms_list = O.substitute_atoms( atoms, new_symbols=['Nb', 'V', 'O', 'O']) assert len(atoms_list) == 2
def test_unique(self): # AB == BA - one structure O = OqmdInterface() atoms_data = O.get_atoms_for_prototype(chemical_formula='TiMo', proto_name='AB_4_ab_ab_186') atoms = [a['atoms'] for a in atoms_data] assert len(atoms) == 1 # ABC3 != BAC3 - two structures atoms_data = O.get_atoms_for_prototype(chemical_formula='TiMoO3', proto_name='ABC3_1_a_a_b_160') atoms = [a['atoms'] for a in atoms_data] assert len(atoms) == 2
def get_structures_from_protosearch(formulas, source="icsd", db_interface=None): """ Calls protosearch to get the hypothetical structures. Args: formulas ([str]): list of chemical formulas from which to generate candidate structures source (str): project name in OQMD to be used as source. Defaults to ICSD. db_interface (DbInterface): interface to OQMD database by default uses the one pulled from data.matr.io Returns: (pandas.DataFrame) hypothetical pymatgen structures generated and their unique ids from protosearch """ if db_interface is None: db_interface = OqmdInterface(source) dataframes = [ db_interface.create_proto_data_set(chemical_formula=formula) for formula in formulas ] _structures = pd.concat(dataframes) # Drop bad structures _structures.dropna(axis=0, how="any", inplace=True) # conversion to pymatgen structures ase_adap = AseAtomsAdaptor() pmg_structures = [ ase_adap.get_structure(_structures.iloc[i]["atoms"]) for i in range(len(_structures)) ] _structures["structure"] = pmg_structures # This is for compatibility with Mc1, which doesn't allow # underscores structure_uids = [ _structures.iloc[i]["structure_name"].replace('_', '-') for i in range(len(_structures)) ] _structures.index = structure_uids return _structures
def test_store_enumeration(self): O = OqmdInterface() O.store_enumeration(filename='test.db', chemical_formula='FeO6', max_atoms=7)
def test_unique_prototypes2(self): O = OqmdInterface() p = O.get_distinct_prototypes(chemical_formula='TiO2') print(len(p))