def test_disordered_primitive_to_ordered_supercell(self): sm_atoms = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=False, scale=True, attempt_supercell=True, allow_subset=True, supercell_size = 'num_atoms', comparator=OrderDisorderElementComparator()) sm_sites = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=False, scale=True, attempt_supercell=True, allow_subset=True, supercell_size = 'num_sites', comparator=OrderDisorderElementComparator()) lp = Lattice.orthorhombic(10, 20, 30) pcoords = [[0, 0, 0], [0.5, 0.5, 0.5]] ls = Lattice.orthorhombic(20,20,30) scoords = [[0, 0, 0], [0.75, 0.5, 0.5]] prim = Structure(lp, [{'Na':0.5}, {'Cl':0.5}], pcoords) supercell = Structure(ls, ['Na', 'Cl'], scoords) supercell.make_supercell([[-1,1,0],[0,1,1],[1,0,0]]) self.assertFalse(sm_sites.fit(prim, supercell)) self.assertTrue(sm_atoms.fit(prim, supercell)) self.assertRaises(ValueError, sm_atoms.get_s2_like_s1, prim, supercell) self.assertEqual(len(sm_atoms.get_s2_like_s1(supercell, prim)), 4)
def test_ordered_primitive_to_disordered_supercell(self): sm_atoms = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=False, scale=True, attempt_supercell=True, allow_subset=True, supercell_size = 'num_atoms', comparator=OrderDisorderElementComparator()) sm_sites = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=False, scale=True, attempt_supercell=True, allow_subset=True, supercell_size = 'num_sites', comparator=OrderDisorderElementComparator()) lp = Lattice.orthorhombic(10, 20, 30) pcoords = [[0, 0, 0], [0.5, 0.5, 0.5]] ls = Lattice.orthorhombic(20,20,30) scoords = [[0, 0, 0], [0.5, 0, 0], [0.25, 0.5, 0.5], [0.75, 0.5, 0.5]] s1 = Structure(lp, ['Na', 'Cl'], pcoords) s2 = Structure(ls, [{'Na':0.5}, {'Na':0.5}, {'Cl':0.5}, {'Cl':0.5}], scoords) self.assertTrue(sm_sites.fit(s1, s2)) self.assertFalse(sm_atoms.fit(s1, s2))
def add_if_belongs(self, cand_snl): # no need to compare if different formulas or spacegroups if cand_snl.snlgroup_key != self.canonical_snl.snlgroup_key: return False, None # no need to compare if one is ordered, the other disordered if not (cand_snl.structure.is_ordered == self.canonical_structure.is_ordered): return False, None # filter out large C-Ce structures comp = cand_snl.structure.composition elsyms = sorted(set([e.symbol for e in comp.elements])) chemsys = '-'.join(elsyms) if ( cand_snl.structure.num_sites > 1500 or self.canonical_structure.num_sites > 1500) and chemsys == 'C-Ce': print 'SKIPPING LARGE C-Ce' return False, None # make sure the structure is not already in all_structures if cand_snl.snl_id in self.all_snl_ids: print 'WARNING: add_if_belongs() has detected that you are trying to add the same SNL id twice!' return False, None #try a structure fit to the canonical structure # use default Structure Matcher params from April 24, 2013, as suggested by Shyue # we are using the ElementComparator() because this is how we want to group results sm = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=True, scale=True, attempt_supercell=False, comparator=ElementComparator()) if not sm.fit(cand_snl.structure, self.canonical_structure): return False, None # everything checks out, add to the group self.all_snl_ids.append(cand_snl.snl_id) # now that we are in the group, if there are site properties we need to check species_groups # e.g., if there is another SNL in the group with the same site properties, e.g. MAGMOM spec_group = None if has_species_properties(cand_snl.structure): for snl in self.species_snl: sms = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=True, scale=True, attempt_supercell=False, comparator=SpeciesComparator()) if sms.fit(cand_snl.structure, snl.structure): spec_group = snl.snl_id self.species_groups[snl.snl_id].append(cand_snl.snl_id) break # add a new species group if not spec_group: self.species_groups[cand_snl.snl_id] = [cand_snl.snl_id] self.species_snl.append(cand_snl) spec_group = cand_snl.snl_id self.updated_at = datetime.datetime.utcnow() return True, spec_group
def test_supercell_fit(self): sm = StructureMatcher(attempt_supercell=False) s1 = read_structure(os.path.join(test_dir, "Al3F9.cif")) s2 = read_structure(os.path.join(test_dir, "Al3F9_distorted.cif")) self.assertFalse(sm.fit(s1, s2)) sm = StructureMatcher(attempt_supercell=True) self.assertTrue(sm.fit(s1, s2))
def test_supercell_fit(self): sm = StructureMatcher(attempt_supercell=False) s1 = Structure.from_file(os.path.join(test_dir, "Al3F9.json")) s2 = Structure.from_file(os.path.join(test_dir, "Al3F9_distorted.json")) self.assertFalse(sm.fit(s1, s2)) sm = StructureMatcher(attempt_supercell=True) self.assertTrue(sm.fit(s1, s2)) self.assertTrue(sm.fit(s2, s1))
def test_no_scaling(self): sm = StructureMatcher(ltol=0.1, stol=0.1, angle_tol=2, scale=False, comparator=ElementComparator()) self.assertTrue(sm.fit(self.struct_list[0], self.struct_list[1])) self.assertTrue(sm.get_rms_dist(self.struct_list[0], self.struct_list[1])[0] < 0.0008)
def _match_material(self, doc): """ Returns the material_id that has the same structure as this doc as determined by the structure matcher. Returns None if no match. Args: doc: a JSON-like document Returns: (int) matching material_id or None """ formula = doc["formula_reduced_abc"] sgnum = doc["spacegroup"]["number"] for m in self._materials.find({"formula_reduced_abc": formula, "sg_number": sgnum}, {"structure": 1, "material_id": 1}): m_struct = Structure.from_dict(m["structure"]) t_struct = Structure.from_dict(doc["structure"]) sm = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=True, scale=True, attempt_supercell=False, allow_subset=False, comparator=ElementComparator()) if sm.fit(m_struct, t_struct): return m["material_id"] return None
def _match_material(self, taskdoc): """ Returns the material_id that has the same structure as this task as determined by the structure matcher. Returns None if no match. Args: taskdoc (dict): a JSON-like task document Returns: (int) matching material_id or None """ formula = taskdoc["formula_reduced_abc"] if "parent_structure" in taskdoc: # this is used to intentionally combine multiple data w/same formula but slightly different structure, e.g. from an ordering scheme t_struct = Structure.from_dict(taskdoc["parent_structure"]["structure"]) q = {"formula_reduced_abc": formula, "parent_structure.spacegroup.number": taskdoc["parent_structure"]["spacegroup"]["number"]} else: sgnum = taskdoc["output"]["spacegroup"]["number"] t_struct = Structure.from_dict(taskdoc["output"]["structure"]) q = {"formula_reduced_abc": formula, "sg_number": sgnum} for m in self._materials.find(q, {"parent_structure": 1, "structure": 1, "material_id": 1}): s_dict = m["parent_structure"]["structure"] if "parent_structure" in m else m["structure"] m_struct = Structure.from_dict(s_dict) sm = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=True, scale=True, attempt_supercell=False, allow_subset=False, comparator=ElementComparator()) if sm.fit(m_struct, t_struct): return m["material_id"] return None
def test_primitive(self): """Test primitive cell reduction""" sm = StructureMatcher(primitive_cell=True) mod = SupercellMaker(self.struct_list[1], scaling_matrix=[[2, 0, 0], [0, 3, 0], [0, 0, 1]]) super_cell = mod.modified_structure self.assertTrue(sm.fit(self.struct_list[0], super_cell))
def _perform_grouping(args): (entries_json, hosts_json, ltol, stol, angle_tol, primitive_cell, scale, comparator, groups) = args entries = json.loads(entries_json, cls=MontyDecoder) hosts = json.loads(hosts_json, cls=MontyDecoder) unmatched = list(zip(entries, hosts)) while len(unmatched) > 0: ref_host = unmatched[0][1] logger.info( "Reference tid = {}, formula = {}".format(unmatched[0][0].entry_id, ref_host.formula) ) ref_formula = ref_host.composition.reduced_formula logger.info("Reference host = {}".format(ref_formula)) matches = [unmatched[0]] for i in range(1, len(unmatched)): test_host = unmatched[i][1] logger.info("Testing tid = {}, formula = {}" .format(unmatched[i][0].entry_id, test_host.formula)) test_formula = test_host.composition.reduced_formula logger.info("Test host = {}".format(test_formula)) m = StructureMatcher(ltol=ltol, stol=stol, angle_tol=angle_tol, primitive_cell=primitive_cell, scale=scale, comparator=comparator) if m.fit(ref_host, test_host): logger.info("Fit found") matches.append(unmatched[i]) groups.append(json.dumps([m[0] for m in matches], cls=MontyEncoder)) unmatched = list(filter(lambda x: x not in matches, unmatched)) logger.info("{} unmatched remaining".format(len(unmatched)))
def add_if_belongs(self, cand_snl): # no need to compare if different formulas or spacegroups if cand_snl.snlgroup_key != self.canonical_snl.snlgroup_key: return False # no need to compare if one is ordered, the other disordered if not (cand_snl.structure.is_ordered == self.canonical_structure.is_ordered): return False # make sure the structure is not already in all_structures if cand_snl.snl_id in self.all_snl_ids: print 'WARNING: add_if_belongs() has detected that you are trying to add the same SNL id twice!' return False #try a structure fit to the canonical structure # use default Structure Matcher params from April 24, 2013, as suggested by Shyue # we are using the ElementComparator() because this is how we want to group results sm = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=True, scale=True, attempt_supercell=True, comparator=ElementComparator()) if not sm.fit(cand_snl.structure, self.canonical_structure): return False # everything checks out, add to the group self.all_snl_ids.append(cand_snl.snl_id) self.updated_at = datetime.datetime.utcnow() return True
def structure_transform(self, original_structure, new_structure, refine_rotation=True): """ Transforms a tensor from one basis for an original structure into a new basis defined by a new structure. Args: original_structure (Structure): structure corresponding to the basis of the current tensor new_structure (Structure): structure corresponding to the desired basis refine_rotation (bool): whether to refine the rotations generated in get_ieee_rotation Returns: Tensor that has been transformed such that its basis corresponds to the new_structure's basis """ sm = StructureMatcher() if not sm.fit(original_structure, new_structure): warnings.warn("original and new structures do not match!") trans_1 = self.get_ieee_rotation(original_structure, refine_rotation) trans_2 = self.get_ieee_rotation(new_structure, refine_rotation) # Get the ieee format tensor new = self.rotate(trans_1) # Reverse the ieee format rotation for the second structure new = new.rotate(np.transpose(trans_2)) return new
def test_get_lattice_from_lattice_type(self): cif_structure = """#generated using pymatgen data_FePO4 _symmetry_space_group_name_H-M Pnma _cell_length_a 10.41176687 _cell_length_b 6.06717188 _cell_length_c 4.75948954 _chemical_formula_structural FePO4 _chemical_formula_sum 'Fe4 P4 O16' _cell_volume 300.65685512 _cell_formula_units_Z 4 _symmetry_cell_setting Orthorhombic loop_ _symmetry_equiv_pos_site_id _symmetry_equiv_pos_as_xyz 1 'x, y, z' loop_ _atom_site_type_symbol _atom_site_label _atom_site_symmetry_multiplicity _atom_site_fract_x _atom_site_fract_y _atom_site_fract_z _atom_site_occupancy Fe Fe1 1 0.218728 0.750000 0.474867 1 Fe Fe2 1 0.281272 0.250000 0.974867 1 Fe Fe3 1 0.718728 0.750000 0.025133 1 Fe Fe4 1 0.781272 0.250000 0.525133 1 P P5 1 0.094613 0.250000 0.418243 1 P P6 1 0.405387 0.750000 0.918243 1 P P7 1 0.594613 0.250000 0.081757 1 P P8 1 0.905387 0.750000 0.581757 1 O O9 1 0.043372 0.750000 0.707138 1 O O10 1 0.096642 0.250000 0.741320 1 O O11 1 0.165710 0.046072 0.285384 1 O O12 1 0.165710 0.453928 0.285384 1 O O13 1 0.334290 0.546072 0.785384 1 O O14 1 0.334290 0.953928 0.785384 1 O O15 1 0.403358 0.750000 0.241320 1 O O16 1 0.456628 0.250000 0.207138 1 O O17 1 0.543372 0.750000 0.792862 1 O O18 1 0.596642 0.250000 0.758680 1 O O19 1 0.665710 0.046072 0.214616 1 O O20 1 0.665710 0.453928 0.214616 1 O O21 1 0.834290 0.546072 0.714616 1 O O22 1 0.834290 0.953928 0.714616 1 O O23 1 0.903358 0.750000 0.258680 1 O O24 1 0.956628 0.250000 0.292862 1 """ cp = CifParser.from_string(cif_structure) s_test = cp.get_structures(False)[0] filepath = os.path.join(test_dir, 'POSCAR') poscar = Poscar.from_file(filepath) s_ref = poscar.structure sm = StructureMatcher(stol=0.05, ltol=0.01, angle_tol=0.1) self.assertTrue(sm.fit(s_ref, s_test))
def test_get_lattice_from_lattice_type(self): cif_structure = """#generated using pymatgen data_FePO4 _symmetry_space_group_name_H-M Pnma _cell_length_a 10.41176687 _cell_length_b 6.06717188 _cell_length_c 4.75948954 _chemical_formula_structural FePO4 _chemical_formula_sum 'Fe4 P4 O16' _cell_volume 300.65685512 _cell_formula_units_Z 4 _symmetry_cell_setting Orthorhombic loop_ _symmetry_equiv_pos_site_id _symmetry_equiv_pos_as_xyz 1 'x, y, z' loop_ _atom_site_type_symbol _atom_site_label _atom_site_symmetry_multiplicity _atom_site_fract_x _atom_site_fract_y _atom_site_fract_z _atom_site_occupancy Fe Fe1 1 0.218728 0.750000 0.474867 1 Fe Fe2 1 0.281272 0.250000 0.974867 1 Fe Fe3 1 0.718728 0.750000 0.025133 1 Fe Fe4 1 0.781272 0.250000 0.525133 1 P P5 1 0.094613 0.250000 0.418243 1 P P6 1 0.405387 0.750000 0.918243 1 P P7 1 0.594613 0.250000 0.081757 1 P P8 1 0.905387 0.750000 0.581757 1 O O9 1 0.043372 0.750000 0.707138 1 O O10 1 0.096642 0.250000 0.741320 1 O O11 1 0.165710 0.046072 0.285384 1 O O12 1 0.165710 0.453928 0.285384 1 O O13 1 0.334290 0.546072 0.785384 1 O O14 1 0.334290 0.953928 0.785384 1 O O15 1 0.403358 0.750000 0.241320 1 O O16 1 0.456628 0.250000 0.207138 1 O O17 1 0.543372 0.750000 0.792862 1 O O18 1 0.596642 0.250000 0.758680 1 O O19 1 0.665710 0.046072 0.214616 1 O O20 1 0.665710 0.453928 0.214616 1 O O21 1 0.834290 0.546072 0.714616 1 O O22 1 0.834290 0.953928 0.714616 1 O O23 1 0.903358 0.750000 0.258680 1 O O24 1 0.956628 0.250000 0.292862 1 """ cp = CifParser.from_string(cif_structure) s_test = cp.get_structures(False)[0] filepath = os.path.join(test_dir, 'POSCAR') poscar = Poscar.from_file(filepath) s_ref = poscar.structure sm = StructureMatcher(stol=0.05, ltol=0.01, angle_tol=0.1) self.assertTrue(sm.fit(s_ref, s_test))
def test_fit(self): """ Take two known matched structures 1) Ensure match 2) Ensure match after translation and rotations 3) Ensure no-match after large site translation 4) Ensure match after site shuffling """ sm = StructureMatcher() self.assertTrue(sm.fit(self.struct_list[0], self.struct_list[1])) # Test rotational/translational invariance op = SymmOp.from_axis_angle_and_translation([0, 0, 1], 30, False, np.array([0.4, 0.7, 0.9])) self.struct_list[1].apply_operation(op) self.assertTrue(sm.fit(self.struct_list[0], self.struct_list[1])) #Test failure under large atomic translation self.struct_list[1].translate_sites([0], [.4, .4, .2], frac_coords=True) self.assertFalse(sm.fit(self.struct_list[0], self.struct_list[1])) self.struct_list[1].translate_sites([0], [-.4, -.4, -.2], frac_coords=True) # random.shuffle(editor._sites) self.assertTrue(sm.fit(self.struct_list[0], self.struct_list[1])) #Test FrameworkComporator sm2 = StructureMatcher(comparator=FrameworkComparator()) lfp = read_structure(os.path.join(test_dir, "LiFePO4.cif")) nfp = read_structure(os.path.join(test_dir, "NaFePO4.cif")) self.assertTrue(sm2.fit(lfp, nfp)) self.assertFalse(sm.fit(lfp, nfp)) #Test anonymous fit. self.assertEqual(sm.fit_anonymous(lfp, nfp), {Composition("Li"): Composition("Na")}) self.assertAlmostEqual(sm.get_minimax_rms_anonymous(lfp, nfp)[0], 0.096084154118549828) #Test partial occupancies. s1 = Structure([[3, 0, 0], [0, 3, 0], [0, 0, 3]], [{"Fe": 0.5}, {"Fe": 0.5}, {"Fe": 0.5}, {"Fe": 0.5}], [[0, 0, 0], [0.25, 0.25, 0.25], [0.5, 0.5, 0.5], [0.75, 0.75, 0.75]]) s2 = Structure([[3, 0, 0], [0, 3, 0], [0, 0, 3]], [{"Fe": 0.25}, {"Fe": 0.5}, {"Fe": 0.5}, {"Fe": 0.75}], [[0, 0, 0], [0.25, 0.25, 0.25], [0.5, 0.5, 0.5], [0.75, 0.75, 0.75]]) self.assertFalse(sm.fit(s1, s2)) self.assertFalse(sm.fit(s2, s1)) s2 = Structure([[3, 0, 0], [0, 3, 0], [0, 0, 3]], [{"Fe": 0.25}, {"Fe": 0.25}, {"Fe": 0.25}, {"Fe": 0.25}], [[0, 0, 0], [0.25, 0.25, 0.25], [0.5, 0.5, 0.5], [0.75, 0.75, 0.75]]) self.assertEqual(sm.fit_anonymous(s1, s2), {Composition("Fe0.5"): Composition("Fe0.25")}) self.assertAlmostEqual(sm.get_minimax_rms_anonymous(s1, s2)[0], 0)
def test_fit(self): """ Take two known matched structures 1) Ensure match 2) Ensure match after translation and rotations 3) Ensure no-match after large site translation 4) Ensure match after site shuffling """ sm = StructureMatcher() self.assertTrue(sm.fit(self.struct_list[0], self.struct_list[1])) # Test rotational/translational invariance op = SymmOp.from_axis_angle_and_translation([0, 0, 1], 30, False, np.array([0.4, 0.7, 0.9])) self.struct_list[1].apply_operation(op) self.assertTrue(sm.fit(self.struct_list[0], self.struct_list[1])) #Test failure under large atomic translation self.struct_list[1].translate_sites([0], [.4, .4, .2], frac_coords=True) self.assertFalse(sm.fit(self.struct_list[0], self.struct_list[1])) self.struct_list[1].translate_sites([0], [-.4, -.4, -.2], frac_coords=True) # random.shuffle(editor._sites) self.assertTrue(sm.fit(self.struct_list[0], self.struct_list[1])) #Test FrameworkComporator sm2 = StructureMatcher(comparator=FrameworkComparator()) lfp = read_structure(os.path.join(test_dir, "LiFePO4.cif")) nfp = read_structure(os.path.join(test_dir, "NaFePO4.cif")) self.assertTrue(sm2.fit(lfp, nfp)) self.assertFalse(sm.fit(lfp, nfp)) #Test anonymous fit. self.assertEqual(sm.fit_anonymous(lfp, nfp), {Composition("Li"): Composition("Na")}) self.assertAlmostEqual(sm.get_minimax_rms_anonymous(lfp, nfp)[0], 0.096084154118549828) #Test partial occupancies. s1 = Structure([[3, 0, 0], [0, 3, 0], [0, 0, 3]], [{"Fe": 0.5}, {"Fe": 0.5}, {"Fe": 0.5}, {"Fe": 0.5}], [[0, 0, 0], [0.25, 0.25, 0.25], [0.5, 0.5, 0.5], [0.75, 0.75, 0.75]]) s2 = Structure([[3, 0, 0], [0, 3, 0], [0, 0, 3]], [{"Fe": 0.25}, {"Fe": 0.5}, {"Fe": 0.5}, {"Fe": 0.75}], [[0, 0, 0], [0.25, 0.25, 0.25], [0.5, 0.5, 0.5], [0.75, 0.75, 0.75]]) self.assertFalse(sm.fit(s1, s2)) self.assertFalse(sm.fit(s2, s1)) s2 = Structure([[3, 0, 0], [0, 3, 0], [0, 0, 3]], [{"Fe": 0.25}, {"Fe": 0.25}, {"Fe": 0.25}, {"Fe": 0.25}], [[0, 0, 0], [0.25, 0.25, 0.25], [0.5, 0.5, 0.5], [0.75, 0.75, 0.75]]) self.assertEqual(sm.fit_anonymous(s1, s2), {Composition("Fe0.5"): Composition("Fe0.25")}) self.assertAlmostEqual(sm.get_minimax_rms_anonymous(s1, s2)[0], 0)
def test_occupancy_comparator(self): lp = Lattice.orthorhombic(10, 20, 30) pcoords = [[0, 0, 0], [0.5, 0.5, 0.5]] s1 = Structure(lp, [{'Na': 0.6, 'K': 0.4}, 'Cl'], pcoords) s2 = Structure(lp, [{'Xa': 0.4, 'Xb': 0.6}, 'Cl'], pcoords) s3 = Structure(lp, [{'Xa': 0.5, 'Xb': 0.5}, 'Cl'], pcoords) sm_sites = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=False, scale=True, attempt_supercell=True, allow_subset=True, supercell_size='num_sites', comparator=OccupancyComparator()) self.assertTrue(sm_sites.fit(s1, s2)) self.assertFalse(sm_sites.fit(s1, s3))
def test_fit(self): """ Take two known matched structures 1) Ensure match 2) Ensure match after translation and rotations 3) Ensure no-match after large site translation 4) Ensure match after site shuffling """ sm = StructureMatcher() self.assertTrue(sm.fit(self.struct_list[0], self.struct_list[1])) # Test rotational/translational invariance op = SymmOp.from_axis_angle_and_translation([0, 0, 1], 30, False, np.array([0.4, 0.7, 0.9])) self.struct_list[1].apply_operation(op) self.assertTrue(sm.fit(self.struct_list[0], self.struct_list[1])) #Test failure under large atomic translation self.struct_list[1].translate_sites([0], [.4, .4, .2], frac_coords=True) self.assertFalse(sm.fit(self.struct_list[0], self.struct_list[1])) self.struct_list[1].translate_sites([0], [-.4, -.4, -.2], frac_coords=True) # random.shuffle(editor._sites) self.assertTrue(sm.fit(self.struct_list[0], self.struct_list[1])) #Test FrameworkComporator sm2 = StructureMatcher(comparator=FrameworkComparator()) lfp = self.get_structure("LiFePO4") nfp = self.get_structure("NaFePO4") self.assertTrue(sm2.fit(lfp, nfp)) self.assertFalse(sm.fit(lfp, nfp)) #Test anonymous fit. self.assertEqual(sm.fit_anonymous(lfp, nfp), True) self.assertAlmostEqual(sm.get_rms_anonymous(lfp, nfp)[0], 0.060895871160262717) #Test partial occupancies. s1 = Structure(Lattice.cubic(3), [{"Fe": 0.5}, {"Fe": 0.5}, {"Fe": 0.5}, {"Fe": 0.5}], [[0, 0, 0], [0.25, 0.25, 0.25], [0.5, 0.5, 0.5], [0.75, 0.75, 0.75]]) s2 = Structure(Lattice.cubic(3), [{"Fe": 0.25}, {"Fe": 0.5}, {"Fe": 0.5}, {"Fe": 0.75}], [[0, 0, 0], [0.25, 0.25, 0.25], [0.5, 0.5, 0.5], [0.75, 0.75, 0.75]]) self.assertFalse(sm.fit(s1, s2)) self.assertFalse(sm.fit(s2, s1)) s2 = Structure(Lattice.cubic(3), [{"Mn": 0.5}, {"Mn": 0.5}, {"Mn": 0.5}, {"Mn": 0.5}], [[0, 0, 0], [0.25, 0.25, 0.25], [0.5, 0.5, 0.5], [0.75, 0.75, 0.75]]) self.assertEqual(sm.fit_anonymous(s1, s2), True) self.assertAlmostEqual(sm.get_rms_anonymous(s1, s2)[0], 0)
def test_fit(self): """ Take two known matched structures 1) Ensure match 2) Ensure match after translation and rotations 3) Ensure no-match after large site translation 4) Ensure match after site shuffling """ sm = StructureMatcher() self.assertTrue(sm.fit(self.struct_list[0], self.struct_list[1])) # Test rotational/translational invariance op = SymmOp.from_axis_angle_and_translation([0, 0, 1], 30, False, np.array([0.4, 0.7, 0.9])) self.struct_list[1].apply_operation(op) self.assertTrue(sm.fit(self.struct_list[0], self.struct_list[1])) # Test failure under large atomic translation self.struct_list[1].translate_sites([0], [.4, .4, .2], frac_coords=True) self.assertFalse(sm.fit(self.struct_list[0], self.struct_list[1])) self.struct_list[1].translate_sites([0], [-.4, -.4, -.2], frac_coords=True) # random.shuffle(editor._sites) self.assertTrue(sm.fit(self.struct_list[0], self.struct_list[1])) # Test FrameworkComporator sm2 = StructureMatcher(comparator=FrameworkComparator()) lfp = self.get_structure("LiFePO4") nfp = self.get_structure("NaFePO4") self.assertTrue(sm2.fit(lfp, nfp)) self.assertFalse(sm.fit(lfp, nfp)) # Test anonymous fit. self.assertEqual(sm.fit_anonymous(lfp, nfp), True) self.assertAlmostEqual(sm.get_rms_anonymous(lfp, nfp)[0], 0.060895871160262717) # Test partial occupancies. s1 = Structure(Lattice.cubic(3), [{"Fe": 0.5}, {"Fe": 0.5}, {"Fe": 0.5}, {"Fe": 0.5}], [[0, 0, 0], [0.25, 0.25, 0.25], [0.5, 0.5, 0.5], [0.75, 0.75, 0.75]]) s2 = Structure(Lattice.cubic(3), [{"Fe": 0.25}, {"Fe": 0.5}, {"Fe": 0.5}, {"Fe": 0.75}], [[0, 0, 0], [0.25, 0.25, 0.25], [0.5, 0.5, 0.5], [0.75, 0.75, 0.75]]) self.assertFalse(sm.fit(s1, s2)) self.assertFalse(sm.fit(s2, s1)) s2 = Structure(Lattice.cubic(3), [{"Mn": 0.5}, {"Mn": 0.5}, {"Mn": 0.5}, {"Mn": 0.5}], [[0, 0, 0], [0.25, 0.25, 0.25], [0.5, 0.5, 0.5], [0.75, 0.75, 0.75]]) self.assertEqual(sm.fit_anonymous(s1, s2), True) self.assertAlmostEqual(sm.get_rms_anonymous(s1, s2)[0], 0)
def test(self): expected = Structure.from_file("PPOSCAR-MgO") actual = find_hpkot_primitive(Structure.from_file("BPOSCAR-MgO")) from pymatgen.analysis.structure_matcher import StructureMatcher sm = StructureMatcher(ltol=0.0001, stol=0.0001, angle_tol=0.001, primitive_cell=False, scale=False) self.assertTrue(sm.fit(expected, actual))
def test_no_scaling(self): sm = StructureMatcher(ltol=0.1, stol=0.1, angle_tol=2, scale=False, comparator=ElementComparator()) self.assertTrue(sm.fit(self.struct_list[0], self.struct_list[1])) self.assertTrue( sm.get_rms_dist(self.struct_list[0], self.struct_list[1])[0] < 0.0008)
def _match_material(self, taskdoc, ltol=0.2, stol=0.3, angle_tol=5): """ Returns the material_id that has the same structure as this task as determined by the structure matcher. Returns None if no match. Args: taskdoc (dict): a JSON-like task document ltol (float): StructureMatcher tuning parameter stol (float): StructureMatcher tuning parameter angle_tol (float): StructureMatcher tuning parameter Returns: (int) matching material_id or None """ formula = taskdoc["formula_reduced_abc"] # handle the "parent structure" option, which is used to intentionally force slightly # different structures to contribute to the same "material", e.g. from an ordering scheme if "parent_structure" in taskdoc: t_struct = Structure.from_dict( taskdoc["parent_structure"]["structure"]) q = { "formula_reduced_abc": formula, "parent_structure.spacegroup.number": taskdoc["parent_structure"]["spacegroup"]["number"] } else: sgnum = taskdoc["output"]["spacegroup"]["number"] t_struct = Structure.from_dict(taskdoc["output"]["structure"]) q = {"formula_reduced_abc": formula, "sg_number": sgnum} for m in self._materials.find(q, { "parent_structure": 1, "structure": 1, "material_id": 1 }): s_dict = m["parent_structure"][ "structure"] if "parent_structure" in m else m["structure"] m_struct = Structure.from_dict(s_dict) sm = StructureMatcher(ltol=ltol, stol=stol, angle_tol=angle_tol, primitive_cell=True, scale=True, attempt_supercell=False, allow_subset=False, comparator=ElementComparator()) if sm.fit(m_struct, t_struct): return m["material_id"] return None
def gen_child(self, struc): ''' generate child struture # ---------- return (if success) self.child: (if fail) None: ''' # ---------- keep original structure self.child = struc.copy() # ---------- instantiate StructureMatcher smatcher = StructureMatcher() # instantiate # ---------- ntimes permutation cnt = 0 while True: n = self.ntimes while n > 0: # ------ prepare index for each atom type indx_each_type = [] for a in self.atype: indx_each_type.append([ i for i, site in enumerate(self.child) if site.species_string == a ]) # ------ choose two atom type type_choice = np.random.choice(len(self.atype), 2, replace=False) # ------ choose index indx_choice = [] for tc in type_choice: indx_choice.append(np.random.choice(indx_each_type[tc])) # ------ replace each other self.child.replace(indx_choice[0], species=self.atype[type_choice[1]]) self.child.replace(indx_choice[1], species=self.atype[type_choice[0]]) # ------ compare to original one if smatcher.fit(self.child, struc): n = self.ntimes # back to the start continue else: n -= 1 # ------ check distance if check_distance(self.child, self.atype, self.mindist): self.child = sort_by_atype(self.child, self.atype) return self.child else: # fail cnt += 1 if cnt >= self.maxcnt_ea: self.child = None return None # change parent
def test_ordered_primitive_to_disordered_supercell(self): sm_atoms = StructureMatcher( ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=False, scale=True, attempt_supercell=True, allow_subset=True, supercell_size='num_atoms', comparator=OrderDisorderElementComparator()) sm_sites = StructureMatcher( ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=False, scale=True, attempt_supercell=True, allow_subset=True, supercell_size='num_sites', comparator=OrderDisorderElementComparator()) lp = Lattice.orthorhombic(10, 20, 30) pcoords = [[0, 0, 0], [0.5, 0.5, 0.5]] ls = Lattice.orthorhombic(20, 20, 30) scoords = [[0, 0, 0], [0.5, 0, 0], [0.25, 0.5, 0.5], [0.75, 0.5, 0.5]] s1 = Structure(lp, ['Na', 'Cl'], pcoords) s2 = Structure(ls, [{ 'Na': 0.5 }, { 'Na': 0.5 }, { 'Cl': 0.5 }, { 'Cl': 0.5 }], scoords) self.assertTrue(sm_sites.fit(s1, s2)) self.assertFalse(sm_atoms.fit(s1, s2))
def match(self, snls, mat): """ Finds a material doc that matches with the given snl Args: snl ([dict]): the snls list mat (dict): a materials doc Returns: generator of materials doc keys """ sm = StructureMatcher(ltol=LTOL, stol=STOL, angle_tol=ANGLE_TOL, primitive_cell=True, scale=True, attempt_supercell=False, allow_subset=False, comparator=ElementComparator()) m_strucs = [Structure.from_dict(mat["structure"])] + [ Structure.from_dict(init_struc) for init_struc in mat["initial_structures"] ] for snl in snls: try: snl_struc = StructureNL.from_dict(snl).structure # Get SNL Spacegroup # This try-except fixes issues for some structures where space group data is not returned by spglib try: snl_spacegroup = snl_struc.get_space_group_info( symprec=0.1)[0] except: snl_spacegroup = -1 for struc in m_strucs: # Get Materials Structure Spacegroup try: struc_sg = struc.get_space_group_info(symprec=0.1)[0] except: struc_sg = -1 # Match spacegroups if struc_sg == snl_spacegroup and sm.fit(struc, snl_struc): yield snl break except: self.logger.warning("Bad SNL found : {}".format( snl.get("task_id")))
def match(self, snl, mats): """ Finds a material doc that matches with the given snl Args: snl (dict): the snl doc mats ([dict]): the materials docs to match against Returns: dict: a materials doc if one is found otherwise returns None """ sm = StructureMatcher(ltol=self.ltol, stol=self.stol, angle_tol=self.angle_tol, primitive_cell=True, scale=True, attempt_supercell=False, allow_subset=False, comparator=ElementComparator()) snl_struc = StructureNL.from_dict(snl).structure for m in mats: m_struct = Structure.from_dict(m["structure"]) init_m_struct = Structure.from_dict(m["initial_structure"]) if sm.fit(m_struct, snl_struc) or sm.fit(init_m_struct, snl_struc): return m[self.materials.key] return None
def test_rms_vs_minimax(self): # This tests that structures with adjusted RMS less than stol, but minimax # greater than stol are treated properly sm = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=False) l = Lattice.orthorhombic(1, 2, 12) sp = ["Si", "Si", "Al"] s1 = Structure(l, sp, [[0.5, 0, 0], [0, 0, 0], [0, 0, 0.5]]) s2 = Structure(l, sp, [[0.5, 0, 0], [0, 0, 0], [0, 0, 0.6]]) self.assertArrayAlmostEqual(sm.get_rms_dist(s1, s2), (0.32 ** 0.5 / 2, 0.4)) self.assertEqual(sm.fit(s1, s2), False) self.assertEqual(sm.fit_anonymous(s1, s2), False) self.assertEqual(sm.get_mapping(s1, s2), None)
def _match_material(self, taskdoc): """ Returns the material_id that has the same structure as this task as determined by the structure matcher. Returns None if no match. Args: taskdoc (dict): a JSON-like task document Returns: (int) matching material_id or None """ formula = taskdoc["formula_reduced_abc"] if "parent_structure" in taskdoc: # this is used to intentionally combine multiple data w/same formula but slightly different structure, e.g. from an ordering scheme t_struct = Structure.from_dict( taskdoc["parent_structure"]["structure"]) q = { "formula_reduced_abc": formula, "parent_structure.spacegroup.number": taskdoc["parent_structure"]["spacegroup"]["number"] } else: sgnum = taskdoc["output"]["spacegroup"]["number"] t_struct = Structure.from_dict(taskdoc["output"]["structure"]) q = {"formula_reduced_abc": formula, "sg_number": sgnum} for m in self._materials.find(q, { "parent_structure": 1, "structure": 1, "material_id": 1 }): s_dict = m["parent_structure"][ "structure"] if "parent_structure" in m else m["structure"] m_struct = Structure.from_dict(s_dict) sm = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=True, scale=True, attempt_supercell=False, allow_subset=False, comparator=ElementComparator()) if sm.fit(m_struct, t_struct): return m["material_id"] return None
def for_zengen(path): path_list = glob.glob(path) structures = defaultdict(list) for path in path_list: spg_num, primitive = get_primitive(path) structures[spg_num].append(primitive) irreps = [] for spg_num in sorted(structures.keys(), reverse=True): while len(structures[spg_num]) != 1: struct0 = structures[spg_num].pop() matcher = StructureMatcher() judge = [matcher.fit(struct0, x) for x in structures[spg_num]] if not True in judge: irreps.append(struct0) irreps.append(structures[spg_num][0]) return irreps
def test_rms_vs_minimax(self): # This tests that structures with adjusted RMS less than stol, but minimax # greater than stol are treated properly # stol=0.3 gives exactly an ftol of 0.1 on the c axis sm = StructureMatcher(ltol=0.2, stol=0.301, angle_tol=1, primitive_cell=False) l = Lattice.orthorhombic(1, 2, 12) sp = ["Si", "Si", "Al"] s1 = Structure(l, sp, [[0.5, 0, 0], [0, 0, 0], [0, 0, 0.5]]) s2 = Structure(l, sp, [[0.5, 0, 0], [0, 0, 0], [0, 0, 0.6]]) self.assertArrayAlmostEqual(sm.get_rms_dist(s1, s2), (0.32 ** 0.5 / 2, 0.4)) self.assertEqual(sm.fit(s1, s2), False) self.assertEqual(sm.fit_anonymous(s1, s2), False) self.assertEqual(sm.get_mapping(s1, s2), None)
def add_if_belongs(self, cand_snl): # no need to compare if different formulas or spacegroups if cand_snl.snlgroup_key != self.canonical_snl.snlgroup_key: return False # no need to compare if one is ordered, the other disordered if not (cand_snl.structure.is_ordered == self.canonical_structure.is_ordered): return False # filter out large C-Ce structures comp = cand_snl.structure.composition elsyms = sorted(set([e.symbol for e in comp.elements])) chemsys = '-'.join(elsyms) if (cand_snl.structure.num_sites > 1500 or self.canonical_structure.num_sites > 1500 ) and chemsys == 'C-Ce': print 'SKIPPING LARGE C-Ce' return False # make sure the structure is not already in all_structures if cand_snl.snl_id in self.all_snl_ids: print 'WARNING: add_if_belongs() has detected that you are trying to add the same SNL id twice!' return False #try a structure fit to the canonical structure # use default Structure Matcher params from April 24, 2013, as suggested by Shyue # we are using the ElementComparator() because this is how we want to group results sm = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=True, scale=True, attempt_supercell=False, comparator=ElementComparator()) if not sm.fit(cand_snl.structure, self.canonical_structure): return False # everything checks out, add to the group self.all_snl_ids.append(cand_snl.snl_id) self.updated_at = datetime.datetime.utcnow() return True
def test_ignore_species(self): s1 = Structure.from_file(os.path.join(test_dir, "LiFePO4.cif")) s2 = Structure.from_file(os.path.join(test_dir, "POSCAR")) m = StructureMatcher(ignored_species=["Li"], primitive_cell=False, attempt_supercell=True) self.assertTrue(m.fit(s1, s2)) self.assertTrue(m.fit_anonymous(s1, s2)) groups = m.group_structures([s1, s2]) self.assertEqual(len(groups), 1) s2.make_supercell((2, 1, 1)) ss1 = m.get_s2_like_s1(s2, s1, include_ignored_species=True) self.assertAlmostEqual(ss1.lattice.a, 20.820740000000001) self.assertEqual(ss1.composition.reduced_formula, "LiFePO4") self.assertEqual( {k.symbol: v.symbol for k, v in m.get_best_electronegativity_anonymous_mapping(s1, s2).items()}, {"Fe": "Fe", "P": "P", "O": "O"}, )
class RemoveExistingFilterTest(unittest.TestCase): def setUp(self): with open(os.path.join(test_dir, "TiO2_entries.json"), 'r') as fp: entries = json.load(fp, cls=MontyDecoder) self._struct_list = [e.structure for e in entries] self._sm = StructureMatcher() self._exisiting_structures = self._struct_list[:-1] def test_filter(self): fil = RemoveExistingFilter(self._exisiting_structures) transmuter = StandardTransmuter.from_structures(self._struct_list) transmuter.apply_filter(fil) self.assertEqual(len(transmuter.transformed_structures), 1) self.assertTrue( self._sm.fit(self._struct_list[-1], transmuter.transformed_structures[-1].final_structure))
def _match_material(self, doc, ltol=0.2, stol=0.3, angle_tol=5): """ Returns the material_id that has the same structure as this doc as determined by the structure matcher. Returns None if no match. Args: doc (dict): a JSON-like document ltol (float): StructureMatcher tuning parameter stol (float): StructureMatcher tuning parameter angle_tol (float): StructureMatcher tuning parameter Returns: (int) matching material_id or None """ formula = doc["formula_reduced_abc"] sgnum = doc["spacegroup"]["number"] for m in self._materials.find( { "formula_reduced_abc": formula, "sg_number": sgnum }, { "structure": 1, "material_id": 1 }, ): m_struct = Structure.from_dict(m["structure"]) t_struct = Structure.from_dict(doc["structure"]) sm = StructureMatcher( ltol=ltol, stol=stol, angle_tol=angle_tol, primitive_cell=True, scale=True, attempt_supercell=False, allow_subset=False, comparator=ElementComparator(), ) if sm.fit(m_struct, t_struct): return m["material_id"] return None
class RemoveExistingFilterTest(unittest.TestCase): def setUp(self): with open(os.path.join(test_dir, "TiO2_entries.json"), 'r') as fp: entries = json.load(fp, cls=MontyDecoder) self._struct_list = [e.structure for e in entries] self._sm = StructureMatcher() self._exisiting_structures = self._struct_list[:-1] def test_filter(self): fil = RemoveExistingFilter(self._exisiting_structures) transmuter = StandardTransmuter.from_structures(self._struct_list) transmuter.apply_filter(fil) self.assertEqual(len(transmuter.transformed_structures), 1) self.assertTrue( self._sm.fit(self._struct_list[-1], transmuter.transformed_structures[-1].final_structure))
def _perform_grouping(args): ( entries_json, hosts_json, ltol, stol, angle_tol, primitive_cell, scale, comparator, groups, ) = args entries = json.loads(entries_json, cls=MontyDecoder) hosts = json.loads(hosts_json, cls=MontyDecoder) unmatched = list(zip(entries, hosts)) while len(unmatched) > 0: ref_host = unmatched[0][1] logger.info( f"Reference tid = {unmatched[0][0].entry_id}, formula = {ref_host.formula}" ) ref_formula = ref_host.composition.reduced_formula logger.info(f"Reference host = {ref_formula}") matches = [unmatched[0]] for i in range(1, len(unmatched)): test_host = unmatched[i][1] logger.info( f"Testing tid = {unmatched[i][0].entry_id}, formula = {test_host.formula}" ) test_formula = test_host.composition.reduced_formula logger.info(f"Test host = {test_formula}") m = StructureMatcher( ltol=ltol, stol=stol, angle_tol=angle_tol, primitive_cell=primitive_cell, scale=scale, comparator=comparator, ) if m.fit(ref_host, test_host): logger.info("Fit found") matches.append(unmatched[i]) groups.append(json.dumps([m[0] for m in matches], cls=MontyEncoder)) unmatched = list(filter(lambda x: x not in matches, unmatched)) logger.info(f"{len(unmatched)} unmatched remaining")
def test_ignore_species(self): s1 = Structure.from_file(os.path.join(test_dir, "LiFePO4.cif")) s2 = Structure.from_file(os.path.join(test_dir, "POSCAR")) m = StructureMatcher(ignored_species=["Li"], primitive_cell=False, attempt_supercell=True) self.assertTrue(m.fit(s1, s2)) self.assertTrue(m.fit_anonymous(s1, s2)) groups = m.group_structures([s1, s2]) self.assertEqual(len(groups), 1) s2.make_supercell((2, 1, 1)) ss1 = m.get_s2_like_s1(s2, s1, include_ignored_species=True) self.assertAlmostEqual(ss1.lattice.a, 20.820740000000001) self.assertEqual(ss1.composition.reduced_formula, "LiFePO4") self.assertEqual({ k.symbol: v.symbol for k, v in m.get_best_electronegativity_anonymous_mapping(s1, s2).items()}, {"Fe": "Fe", "P": "P", "O": "O"})
def relaxed_structure_match(self, i, j): """ Check if the relaxed structures of two interstitials match Args: i: Symmetrically distinct interstitial index j: Symmetrically distinct interstitial index .. note:: Index 0 corresponds to bulk. """ if not self._relax_structs: self.relax() sm = StructureMatcher() struct1 = self._relax_structs[i] struct2 = self._relax_structs[j] return sm.fit(struct1, struct2)
def mpid_and_link(symmetrizer: StructureSymmetrizer): reduced_formula = symmetrizer.structure.composition.reduced_formula criteria = {"reduced_cell_formula": reduced_formula, "spacegroup.number": symmetrizer.sg_number} properties = ["task_id", "structure"] with MPRester() as m: for doc in m.query(criteria, properties): sm = StructureMatcher() if sm.fit(doc["structure"], symmetrizer.structure): mpid = doc["task_id"] break else: return H4("None") return dcc.Link(f'mp-id {mpid}', href=f'https://materialsproject.org/materials/{mpid}/', style={'font-weight': 'bold', "font-size": "20px"})
def relaxed_structure_match(self, i, j): """ Check if the relaxed structures of two interstitials match Args: i: Symmetrically distinct interstitial index j: Symmetrically distinct interstitial index .. note:: Index 0 corresponds to bulk. """ if not self._relax_structs: self.relax() sm = StructureMatcher() struct1 = self._relax_structs[i] struct2 = self._relax_structs[j] return sm.fit(struct1, struct2)
def test__EnumerateDistinctFacets(): ''' We take all the facets that the task are distinct/unique, then actually make slabs out of them and compare all the slabs to see if they are identical. Note that this tests only if we get repeats. It does not test if we missed anything. WARNING: This test uses `run_task_locally`, which has a chance of actually submitting a FireWork to production. To avoid this, you must try to make sure that you have all of the gas calculations in the unit testing atoms collection. If you copy/paste this test into somewhere else, make sure that you use `run_task_locally` appropriately. ''' mpid = 'mp-2' max_miller = 2 task = _EnumerateDistinctFacets(mpid=mpid, max_miller=max_miller) # Run the task to get the facets, and also get the bulk structure so we can # actually make slabs to check try: run_task_locally(task) distinct_millers = get_task_output(task) with open(task.input().path, 'rb') as file_handle: bulk_doc = pickle.load(file_handle) bulk_atoms = make_atoms_from_doc(bulk_doc) # Make all the slabs that the task said are distinct all_slabs = [] for miller in distinct_millers: slabs = make_slabs_from_bulk_atoms( bulk_atoms, miller, SLAB_SETTINGS['slab_generator_settings'], SLAB_SETTINGS['get_slab_settings'], ) all_slabs.extend(slabs) # Check that the slabs are actually different matcher = StructureMatcher() for slabs_to_compare in combinations(all_slabs, 2): assert not matcher.fit(*slabs_to_compare) finally: clean_up_tasks()
def relaxed_structure_match(self, i, j): """ Check if the relaxed structures of two interstitials match Args: i: Symmetrically distinct interstitial index j: Symmetrically distinct interstitial index .. note:: To use relaxed bulk structure pass -1. -ve index will not work as expected """ if not self._relax_struct: self._relax_analysis() sm = StructureMatcher() struct1 = self._relax_struct[i + 1] struct2 = self._relax_struct[j + 1] return sm.fit(struct1, struct2)
def relaxed_structure_match(self, i, j): """ Check if the relaxed structures of two interstitials match Args: i: Symmetrically distinct interstitial index j: Symmetrically distinct interstitial index .. note:: To use relaxed bulk structure pass -1. -ve index will not work as expected """ if not self._relax_struct: self._relax_analysis() sm = StructureMatcher() struct1 = self._relax_struct[i + 1] struct2 = self._relax_struct[j + 1] return sm.fit(struct1, struct2)
def test_read_structure(self): test_dir = os.path.join(os.path.dirname(__file__), "..", "..", "..", 'test_files') for fname in ("Li2O.cif", "vasprun.xml", "vasprun_Si_bands.xml", "Si.cssr"): filename = os.path.join(test_dir, fname) struct = read_structure(filename) self.assertIsInstance(struct, Structure) prim = read_structure(filename, primitive=True) self.assertLessEqual(len(prim), len(struct)) sorted_s = read_structure(filename, sort=True) self.assertEqual(sorted_s, sorted_s.get_sorted_structure()) m = StructureMatcher() for ext in [".cif", ".json", ".cssr"]: fn = "smartio_structure_test" + ext write_structure(struct, fn) back = read_structure(fn) self.assertTrue(m.fit(back, struct)) os.remove(fn)
def test_read_structure(self): test_dir = os.path.join(os.path.dirname(__file__), "..", "..", "..", 'test_files') for fname in ("Li2O.cif", "vasprun.xml", "vasprun_Si_bands.xml", "Si.cssr"): filename = os.path.join(test_dir, fname) struct = read_structure(filename) self.assertIsInstance(struct, Structure) prim = read_structure(filename, primitive=True) self.assertLessEqual(len(prim), len(struct)) sorted_s = read_structure(filename, sort=True) self.assertEqual(sorted_s, sorted_s.get_sorted_structure()) m = StructureMatcher() for ext in [".cif", ".json", ".cssr"]: fn = "smartio_structure_test" + ext write_structure(struct, fn) back = read_structure(fn) self.assertTrue(m.fit(back, struct)) os.remove(fn)
def _match_material(self, taskdoc, ltol=0.2, stol=0.3, angle_tol=5): """ Returns the material_id that has the same structure as this task as determined by the structure matcher. Returns None if no match. Args: taskdoc (dict): a JSON-like task document ltol (float): StructureMatcher tuning parameter stol (float): StructureMatcher tuning parameter angle_tol (float): StructureMatcher tuning parameter Returns: (int) matching material_id or None """ formula = taskdoc["formula_reduced_abc"] # handle the "parent structure" option, which is used to intentionally force slightly # different structures to contribute to the same "material", e.g. from an ordering scheme if "parent_structure" in taskdoc: t_struct = Structure.from_dict(taskdoc["parent_structure"]["structure"]) q = {"formula_reduced_abc": formula, "parent_structure.spacegroup.number": taskdoc[ "parent_structure"]["spacegroup"]["number"]} else: sgnum = taskdoc["output"]["spacegroup"]["number"] t_struct = Structure.from_dict(taskdoc["output"]["structure"]) q = {"formula_reduced_abc": formula, "sg_number": sgnum} for m in self._materials.find(q, {"parent_structure": 1, "structure": 1, "material_id": 1}): s_dict = m["parent_structure"]["structure"] if "parent_structure" in m else m[ "structure"] m_struct = Structure.from_dict(s_dict) sm = StructureMatcher(ltol=ltol, stol=stol, angle_tol=angle_tol, primitive_cell=True, scale=True, attempt_supercell=False, allow_subset=False, comparator=ElementComparator()) if sm.fit(m_struct, t_struct): return m["material_id"] return None
def post_process(self, docs): s1 = Structure.from_dict(self.structure) m = StructureMatcher( ltol=self.ltol, stol=self.stol, angle_tol=self.angle_tol, primitive_cell=True, scale=True, attempt_supercell=False, comparator=ElementComparator(), ) matches = [] for doc in docs: s2 = Structure.from_dict(doc["structure"]) matched = m.fit(s1, s2) if matched: rms = m.get_rms_dist(s1, s2) matches.append({ "material_id": doc["material_id"], "normalized_rms_displacement": rms[0], "max_distance_paired_sites": rms[1], }) response = sorted( matches[:self.limit], key=lambda x: ( x["normalized_rms_displacement"], x["max_distance_paired_sites"], ), ) return response
def remove_multiple_cif(self): structMatch = StructureMatcher(ltol=0.05, stol=0.05, angle_tol=2, primitive_cell=False, scale=False, attempt_supercell=False, allow_subset=False, comparator=ElementComparator(), supercell_size='num_sites', ignored_species=None) for input_cif_file_path in glob.glob(self.ctx.input_cif_folder): input_cif_file = os.path.basename(input_cif_file_path) use_cif = True try: structure = Structure.from_file(input_cif_file_path) except: print('Structure import from cif not successful.') continue structure.remove_oxidation_states() for element in structure.composition.element_composition.elements: if str(element) not in self.ctx.element_list: use_cif = False for key in structures_used: if structMatch.fit(structure, structures_used[key]): use_cif = False self.ctx.structures_match_not_used[input_cif_file] = key if use_cif: self.ctx.structures_used[input_cif_file] = {} self.ctx.structures_used[input_cif_file][ 'structure_original'] = structure self.out('structures_match_not_used', ParameterData(dict=self.ctx.structures_match_not_used))
def match(self, snls, mat): """ Finds a material doc that matches with the given snl Args: snl ([dict]): the snls list mat (dict): a materials doc Returns: generator of materials doc keys """ sm = StructureMatcher(ltol=self.ltol, stol=self.stol, angle_tol=self.angle_tol, primitive_cell=True, scale=True, attempt_supercell=False, allow_subset=False, comparator=ElementComparator()) m_strucs = [Structure.from_dict(mat["structure"])] + [ Structure.from_dict(init_struc) for init_struc in mat["initial_structures"] ] for snl in snls: snl_struc = StructureNL.from_dict(snl).structure try: snl_spacegroup = snl_struc.get_space_group_info()[0] except: snl_spacegroup = -1 for struc in m_strucs: try: struc_sg = struc.get_space_group_info()[0] except: struc_sg = -1 # The try-excepts are a temp fix to a spglib bug if struc_sg == snl_spacegroup and sm.fit(struc, snl_struc): yield snl break
from pymatgen import Structure from pymatgen.analysis.structure_matcher import StructureMatcher client = pymongo.MongoClient() db = client.springer coll = db['pauling_file_unique_Parse'] if __name__ == '__main__': for doc in coll.find({'key': 'sd_1223808'}): struc1 = Structure.from_dict(doc['structure']) for doc in coll.find({'key': 'sd_0458111'}): struc2 = Structure.from_dict(doc['structure']) for doc in coll.find({'key': 'sd_1933177'}): struc3 = Structure.from_dict(doc['structure']) for doc in coll.find({'key': 'sd_1010018'}): struc4 = Structure.from_dict(doc['structure']) print struc4 print doc['metadata']['_Springer']['geninfo']['Phase Label(s)'] # print Structure.from_dict(doc['structure']) # print doc['structure'] for doc in coll.find({'key': 'sd_0529813'}): struc5 = Structure.from_dict(doc['structure']) matcher = StructureMatcher() print matcher.fit(struc1, struc2), '8.18, 8.26', matcher.get_rms_dist(struc1, struc2) print matcher.fit(struc2, struc3), '8.26 8.25', matcher.get_rms_dist(struc2, struc3) print matcher.fit(struc3, struc1), '8.25 8.18', matcher.get_rms_dist(struc3, struc1) print matcher.fit(struc1, struc4), matcher.get_rms_dist(struc1, struc4) print matcher.fit(struc1, struc5), matcher.get_rms_dist(struc1, struc5) print matcher.fit(struc2, struc5), matcher.get_rms_dist(struc2, struc5)
def test_supercell_subsets(self): sm = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=False, scale=True, attempt_supercell=True, allow_subset=True, supercell_size='volume') sm_no_s = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=False, scale=True, attempt_supercell=True, allow_subset=False, supercell_size='volume') l = Lattice.orthorhombic(1, 2, 3) s1 = Structure(l, ['Ag', 'Si', 'Si'], [[.7,.4,.5],[0,0,0.1],[0,0,0.2]]) s1.make_supercell([2,1,1]) s2 = Structure(l, ['Si', 'Si', 'Ag'], [[0,0.1,-0.95],[0,0.1,0],[-.7,.5,.375]]) shuffle = [0,2,1,3,4,5] s1 = Structure.from_sites([s1[i] for i in shuffle]) #test when s1 is exact supercell of s2 result = sm.get_s2_like_s1(s1, s2) for a, b in zip(s1, result): self.assertTrue(a.distance(b) < 0.08) self.assertEqual(a.species_and_occu, b.species_and_occu) self.assertTrue(sm.fit(s1, s2)) self.assertTrue(sm.fit(s2, s1)) self.assertTrue(sm_no_s.fit(s1, s2)) self.assertTrue(sm_no_s.fit(s2, s1)) rms = (0.048604032430991401, 0.059527539448807391) self.assertTrue(np.allclose(sm.get_rms_dist(s1, s2), rms)) self.assertTrue(np.allclose(sm.get_rms_dist(s2, s1), rms)) #test when the supercell is a subset of s2 subset_supercell = s1.copy() del subset_supercell[0] result = sm.get_s2_like_s1(subset_supercell, s2) self.assertEqual(len(result), 6) for a, b in zip(subset_supercell, result): self.assertTrue(a.distance(b) < 0.08) self.assertEqual(a.species_and_occu, b.species_and_occu) self.assertTrue(sm.fit(subset_supercell, s2)) self.assertTrue(sm.fit(s2, subset_supercell)) self.assertFalse(sm_no_s.fit(subset_supercell, s2)) self.assertFalse(sm_no_s.fit(s2, subset_supercell)) rms = (0.053243049896333279, 0.059527539448807336) self.assertTrue(np.allclose(sm.get_rms_dist(subset_supercell, s2), rms)) self.assertTrue(np.allclose(sm.get_rms_dist(s2, subset_supercell), rms)) #test when s2 (once made a supercell) is a subset of s1 s2_missing_site = s2.copy() del s2_missing_site[1] result = sm.get_s2_like_s1(s1, s2_missing_site) for a, b in zip((s1[i] for i in (0, 2, 4, 5)), result): self.assertTrue(a.distance(b) < 0.08) self.assertEqual(a.species_and_occu, b.species_and_occu) self.assertTrue(sm.fit(s1, s2_missing_site)) self.assertTrue(sm.fit(s2_missing_site, s1)) self.assertFalse(sm_no_s.fit(s1, s2_missing_site)) self.assertFalse(sm_no_s.fit(s2_missing_site, s1)) rms = (0.029763769724403633, 0.029763769724403987) self.assertTrue(np.allclose(sm.get_rms_dist(s1, s2_missing_site), rms)) self.assertTrue(np.allclose(sm.get_rms_dist(s2_missing_site, s1), rms))
def combine_neb_plots(neb_analyses, arranged_neb_analyses=False, reverse_plot=False): """ neb_analyses: a list of NEBAnalysis objects arranged_neb_analyses: need to manually arrange neb_analyses to get the combined-barrier plot corresponding to the percolation path if the code gives a warning, which is due to similar structures of terminal relaxations. Or only the barrier value is correct! E.g., if there are two NEBAnalysis objects to combine, arrange in such a way that the end-point energy of the first NEBAnalysis object is the start-point energy of the second NEBAnalysis object. reverse_plot: reverse the plot or percolation direction. """ x = StructureMatcher() warn = False for neb_index in range(len(neb_analyses)): if neb_index == 0: neb1 = neb_analyses[neb_index] neb1_energies = list(neb1.energies) neb1_structures = neb1.structures neb1_forces = neb1.forces neb1_r = neb1.r continue neb2 = neb_analyses[neb_index] neb2_energies = list(neb2.energies) neb1_start_s = neb1_structures[0] neb2_start_s, neb2_end_s = neb2.structures[0], neb2.structures[-1] if x.fit(neb1_start_s, neb2_start_s) \ and x.fit(neb1_start_s, neb2_end_s): warn = True warnings.warn("Need to arrange root_dirs or only the barrier " "value is correct!", Warning) if arranged_neb_analyses: neb1_energies = neb1_energies[0:len(neb1_energies) - 1] \ + [(neb1_energies[-1] + neb2_energies[0]) / 2] \ + neb2_energies[ 1:] neb1_structures = neb1_structures + neb2.structures[1:] neb1_forces = list(neb1_forces) + list(neb2.forces)[1:] neb1_r = list(neb1_r) + [i + neb1_r[-1] for i in list(neb2.r)[1:]] if (x.fit(neb1_start_s, neb2_start_s) and not x.fit(neb1_start_s, neb2_end_s)) \ or (warn == True and arranged_neb_analyses == False): neb1_energies = list(reversed(neb1_energies[1:])) + [ (neb1_energies[0] + neb2_energies[0]) / 2] + neb2_energies[1:] neb1_structures = list( reversed((neb1_structures[1:]))) + neb2.structures neb1_forces = list(reversed(list(neb1_forces)[1:])) + list( neb2.forces) neb1_r = list(reversed( [i * -1 - neb1_r[-1] * -1 for i in list(neb1_r)[1:]])) + [ i + neb1_r[-1] for i in list(neb2.r)] elif not x.fit(neb1_start_s, neb2_start_s) \ and x.fit(neb1_start_s, neb2_end_s): neb1_energies = (neb2_energies[0:len(neb2_energies) - 1]) + [ (neb1_energies[0] + neb2_energies[-1]) / 2] + neb1_energies[1:] neb1_structures = (neb2.structures[ 0:len(neb2_energies) - 1]) + neb1_structures neb1_forces = list(neb2.forces)[0:len(neb2_energies) - 1] + list( neb1_forces) neb1_r = list(reversed( [i * -1 - neb2.r[-1] * -1 for i in list(neb2.r)[1:]])) + [ i + neb2.r[-1] for i in list(neb1_r)] elif x.fit(neb1_start_s, neb2_start_s) == False \ and x.fit(neb1_start_s, neb2_end_s) == False: raise ValueError("no matched structures for connection!") if reverse_plot: na = NEBAnalysis( list(reversed([i * -1 - neb1_r[-1] * -1 for i in list(neb1_r)])), list(reversed(neb1_energies)), list(reversed(neb1_forces)), list(reversed(neb1_structures))) else: na = NEBAnalysis(neb1_r, neb1_energies, neb1_forces, neb1_structures) plt = na.get_plot() return plt
def test_primitive(self): """Test primitive cell reduction""" sm = StructureMatcher(primitive_cell=True) self.struct_list[1].make_supercell([[2, 0, 0], [0, 3, 0], [0, 0, 1]]) self.assertTrue(sm.fit(self.struct_list[0], self.struct_list[1]))
def test_left_handed_lattice(self): """Ensure Left handed lattices are accepted""" sm = StructureMatcher() s = Structure.from_file(os.path.join(test_dir, "Li3GaPCO7.json")) self.assertTrue(sm.fit(s, s))
def test_oxi(self): """Test oxidation state removal matching""" sm = StructureMatcher() self.assertFalse(sm.fit(self.oxi_structs[0], self.oxi_structs[1])) sm = StructureMatcher(comparator=ElementComparator()) self.assertTrue(sm.fit(self.oxi_structs[0], self.oxi_structs[1]))
def test_left_handed_lattice(self): """Ensure Left handed lattices are accepted""" sm = StructureMatcher() s = Structure.from_file(os.path.join(test_dir, "Li3GaPCO7.json")) self.assertTrue(sm.fit(s, s))
def test_symmetrized(self): filepath = self.TEST_FILES_DIR / 'POSCAR' poscar = Poscar.from_file(filepath, check_for_POTCAR=False) writer = CifWriter(poscar.structure, symprec=0.1) ans = """# generated using pymatgen data_FePO4 _symmetry_space_group_name_H-M Pnma _cell_length_a 10.41176687 _cell_length_b 6.06717188 _cell_length_c 4.75948954 _cell_angle_alpha 90.00000000 _cell_angle_beta 90.00000000 _cell_angle_gamma 90.00000000 _symmetry_Int_Tables_number 62 _chemical_formula_structural FePO4 _chemical_formula_sum 'Fe4 P4 O16' _cell_volume 300.65685512 _cell_formula_units_Z 4 loop_ _symmetry_equiv_pos_site_id _symmetry_equiv_pos_as_xyz 1 'x, y, z' 2 '-x, -y, -z' 3 '-x+1/2, -y, z+1/2' 4 'x+1/2, y, -z+1/2' 5 'x+1/2, -y+1/2, -z+1/2' 6 '-x+1/2, y+1/2, z+1/2' 7 '-x, y+1/2, -z' 8 'x, -y+1/2, z' loop_ _atom_site_type_symbol _atom_site_label _atom_site_symmetry_multiplicity _atom_site_fract_x _atom_site_fract_y _atom_site_fract_z _atom_site_occupancy Fe Fe1 4 0.218728 0.250000 0.525133 1 P P2 4 0.094613 0.750000 0.581757 1 O O3 8 0.165710 0.546072 0.714616 1 O O4 4 0.043372 0.250000 0.292862 1 O O5 4 0.096642 0.750000 0.258680 1""" cif = CifParser.from_string(str(writer)) m = StructureMatcher() self.assertTrue(m.fit(cif.get_structures()[0], poscar.structure)) # for l1, l2 in zip(str(writer).split("\n"), ans.split("\n")): # self.assertEqual(l1.strip(), l2.strip()) ans = """# generated using pymatgen data_LiFePO4 _symmetry_space_group_name_H-M Pnma _cell_length_a 10.41037000 _cell_length_b 6.06577000 _cell_length_c 4.74480000 _cell_angle_alpha 90.00000000 _cell_angle_beta 90.00000000 _cell_angle_gamma 90.00000000 _symmetry_Int_Tables_number 62 _chemical_formula_structural LiFePO4 _chemical_formula_sum 'Li4 Fe4 P4 O16' _cell_volume 299.619458734 _cell_formula_units_Z 4 loop_ _symmetry_equiv_pos_site_id _symmetry_equiv_pos_as_xyz 1 'x, y, z' 2 '-x, -y, -z' 3 '-x+1/2, -y, z+1/2' 4 'x+1/2, y, -z+1/2' 5 'x+1/2, -y+1/2, -z+1/2' 6 '-x+1/2, y+1/2, z+1/2' 7 '-x, y+1/2, -z' 8 'x, -y+1/2, z' loop_ _atom_site_type_symbol _atom_site_label _atom_site_symmetry_multiplicity _atom_site_fract_x _atom_site_fract_y _atom_site_fract_z _atom_site_occupancy Li Li1 4 0.000000 0.000000 0.000000 1.0 Fe Fe2 4 0.218845 0.750000 0.474910 1.0 P P3 4 0.094445 0.250000 0.417920 1.0 O O4 8 0.165815 0.044060 0.286540 1.0 O O5 4 0.043155 0.750000 0.708460 1.0 O O6 4 0.096215 0.250000 0.741480 1.0 """ s = Structure.from_file(self.TEST_FILES_DIR / 'LiFePO4.cif') writer = CifWriter(s, symprec=0.1) s2 = CifParser.from_string(str(writer)).get_structures()[0] self.assertTrue(m.fit(s, s2)) s = self.get_structure("Li2O") writer = CifWriter(s, symprec=0.1) s2 = CifParser.from_string(str(writer)).get_structures()[0] self.assertTrue(m.fit(s, s2)) # test angle tolerance. s = Structure.from_file(self.TEST_FILES_DIR / 'LiFePO4.cif') writer = CifWriter(s, symprec=0.1, angle_tolerance=0) d = list(writer.ciffile.data.values())[0] self.assertEqual(d["_symmetry_Int_Tables_number"], 14) s = Structure.from_file(self.TEST_FILES_DIR / 'LiFePO4.cif') writer = CifWriter(s, symprec=0.1, angle_tolerance=2) d = list(writer.ciffile.data.values())[0] self.assertEqual(d["_symmetry_Int_Tables_number"], 62)
class ElectrodesBuilder(Builder): def __init__(self, materials, electro, working_ion, query=None, compatibility=MaterialsProjectCompatibility("Advanced"), **kwargs): """ Calculates physical parameters of battery materials the battery entries using groups of ComputedStructureEntry and the entry for the most stable version of the working_ion in the system Args: materials (Store): Store of materials documents that contains the structures electro (Store): Store of insertion electrodes data such as voltage and capacity query (dict): dictionary to limit materials to be analyzed --- only applied to the materials when we need to group structures the phase diagram is still constructed with the entire set compatibility (PymatgenCompatability): Compatability module to ensure energies are compatible """ self.sm = StructureMatcher(comparator=ElementComparator(), primitive_cell=False) self.materials = materials self.electro = electro self.working_ion = working_ion self.query = query if query else {} self.compatibility = compatibility self.completed_tasks = set() self.working_ion_entry = None super().__init__(sources=[materials], targets=[electro], **kwargs) def get_items(self): """ Get all entries by first obtaining the distinct chemical systems then sorting them by their composition (sans the working ion) Returns: set(ComputedStructureEntry): a set of entries for this system """ # We only need the working_ion_entry once # working_ion_entries = self.materials.query(criteria={"chemsys": self.working_ion}, properties=mat_props) # working_ion_entries = self._mat_doc2comp_entry(working_ion_entries, store_struct=False) # # if working_ion_entries: # self.working_ion_entry = min(working_ion_entries, key=lambda e: e.energy_per_atom) self.logger.info( "Grabbing the relavant chemical systems containing the current \ working ion and a single redox element." ) q = dict() q.update({ '$and': [{ "elements": { '$in': [self.working_ion] } }, { "elements": { '$in': redox_els } }] }) q.update(self.query) chemsys_names = self.materials.distinct('chemsys', q) for chemsys in chemsys_names: self.logger.debug(f"Calculating the phase diagram for: {chemsys}") # get the phase diagram from using the chemsys pd_q = { 'chemsys': { "$in": list(chemsys_permutations(chemsys)) }, 'deprecated': False } self.logger.debug(f"pd_q: {pd_q}") pd_docs = list( self.materials.query(properties=mat_props, criteria=pd_q)) pd_ents = self._mat_doc2comp_entry(pd_docs, store_struct=False) pd_ents = list(filter(None.__ne__, pd_ents)) for item in self.get_hashed_entries_from_chemsys(chemsys): item.update({'pd_ents': pd_ents}) ids_all_ents = {ient.composition.entry_id for ient in item['all_entries']} ids_pd = {ient.composition.entry_id for ient in item['pd_ents']} assert(ids_all_ents.issubset(ids_pd)) self.logger.debug( f"all_ents [{[ient.composition.reduced_formula for ient in item['all_entries']]}]" ) self.logger.debug( f"pd_ents [{[ient.composition.reduced_formula for ient in item['pd_ents']]}]" ) yield item def get_hashed_entries_from_chemsys(self, chemsys): """ Read the entries from the materials database and group them based on the reduced composition of the framework material (without working ion). Args: chemsys(string): the chemical system string to be queried returns: (chemsys, [group]): entry contains a list of entries the materials together by composition """ # return the entries grouped by composition # then we will sort them elements = set(chemsys.split("-")) chemsys_w_wo_ion = { "-".join(sorted(c)) for c in [elements, elements - {self.working_ion}] } self.logger.info("chemsys list: {}".format(chemsys_w_wo_ion)) q = {'chemsys': {"$in": list(chemsys_w_wo_ion)}, 'deprecated': False} self.logger.debug(f"q: {q}") docs = self.materials.query(q, mat_props) entries = self._mat_doc2comp_entry(docs) entries = list(filter(lambda x: x is not None, entries)) self.logger.debug( f"entries found using q [{[ient.composition.reduced_formula for ient in entries]}]" ) self.logger.info("Found {} entries in the database".format( len(entries))) entries = list(filter(None.__ne__, entries)) if len(entries) > 1: # ignore systems with only one entry # group entries together by their composition sans the working ion entries = sorted(entries, key=s_hash) for _, g in groupby(entries, key=s_hash): g = list(g) self.logger.debug( "The full group of entries found based on chemical formula alone: {}" .format([el.name for el in g])) if len(g) > 1: #print('read') yield {'chemsys': chemsys, 'all_entries': g} def process_item(self, item): """ Read the entries from the thermo database and group them based on the reduced composition of the framework material (without working ion). Args: chemsys(string): the chemical system string to be queried returns: (chemsys, [group]): entry contains a list of entries the materials together by composition """ # sort the entries intro subgroups # then perform PD analysis all_entries = item['all_entries'] pd_ents = item['pd_ents'] phdi = PhaseDiagram(pd_ents) # The working ion entries ents_wion = list( filter( lambda x: x.composition.get_integer_formula_and_factor()[0] == self.working_ion, pd_ents)) self.working_ion_entry = min(ents_wion, key=lambda e: e.energy_per_atom) assert (self.working_ion_entry != None) grouped_entries = list(self.get_sorted_subgroups(all_entries)) docs = [] # results for group in grouped_entries: self.logger.debug( f"Grouped entries in all sandboxes {', '.join([en.name for en in group])}" ) for en in group: # skip this d_muO2 stuff if you do note have oxygen if Element('O') in en.composition.elements: d_muO2 = [{ 'reaction': str(itr['reaction']), 'chempot': itr['chempot'], 'evolution': itr['evolution'] } for itr in phdi.get_element_profile('O', en.composition)] else: d_muO2 = None en.data['muO2'] = d_muO2 en.data['decomposition_energy'] = phdi.get_e_above_hull(en) # sort out the sandboxes # for each sandbox core+sandbox will both contribute entries all_sbx = [ent.data['sbxn'] for ent in group] all_sbx = set(chain.from_iterable(all_sbx)) self.logger.debug(f"All sandboxes {', '.join(list(all_sbx))}") for isbx in all_sbx: group_sbx = list( filter( lambda ent: (isbx in ent.data['sbxn']) or (ent.data[ 'sbxn'] == ['core']), group)) # Need more than one level of lithiation to define a electrode material if len(group_sbx) == 1: continue self.logger.debug( f"Grouped entries in sandbox {isbx} -- {', '.join([en.name for en in group_sbx])}" ) try: result = InsertionElectrode(group_sbx, self.working_ion_entry) assert (len(result._stable_entries) > 1) except: self.logger.warn( f"Not able to generate a entries in sandbox {isbx} using the following entires-- {', '.join([en.entry_id for en in group_sbx])}" ) continue spacegroup = SpacegroupAnalyzer( result.get_stable_entries( charge_to_discharge=True)[0].structure) d = result.as_dict_summary() ids = [entry.entry_id for entry in result.get_all_entries()] lowest_id = sorted(ids, key=lambda x: x.split('-')[-1])[0] d['spacegroup'] = { k: spacegroup._space_group_data[k] for k in sg_fields } if isbx == 'core': d['battid'] = lowest_id + '_' + self.working_ion else: d['battid'] = lowest_id + '_' + self.working_ion + '_' + isbx # Only allow one sandbox value for each electrode d['sbxn'] = [isbx] docs.append(d) return docs def update_targets(self, items): items = list(filter(None, chain.from_iterable(items))) if len(items) > 0: self.logger.info("Updating {} electro documents".format( len(items))) self.electro.update(docs=items, key=['battid']) else: self.logger.info("No items to update") def get_sorted_subgroups(self, group): matching_subgroups = list(self.group_entries(group)) if matching_subgroups: for subg in matching_subgroups: wion_conc = set() for el in subg: wion_conc.add(el.composition.fractional_composition[ self.working_ion]) if len(wion_conc) > 1: yield subg else: del subg def group_entries(self, g): """ group the structures together based on similarity of the delithiated primitive cells Args: g: a list of entries Returns: subgroups: subgroups that are grouped together based on structure """ def match_in_group(ref, sub_list): for el in sub_list: if self.sm.fit(ref.data['structure_delith'], el[1].data['structure_delith']): return True return False unmatched = list(enumerate(g)) subgroups = None while len(unmatched) > 0: i, refs = unmatched.pop(0) if subgroups == None: subgroups = [[(i, refs)]] continue g_inds = filter(lambda itr: match_in_group(refs, subgroups[itr]), list(range(len(subgroups)))) g_inds = list(g_inds) # list of all matching subgroups if not g_inds: subgroups.append([(i, refs)]) else: if len(g_inds) > 1: new_group = list( chain.from_iterable(subgroups[i] for i in g_inds)) for idx in sorted(g_inds, reverse=True): del subgroups[idx] subgroups.append(new_group) # add to the end g_inds = [len(subgroups)] else: subgroups[g_inds[0]].append((i, refs)) for sg in subgroups: if len(sg) > 1: yield [el[1] for el in sg] def _chemsys_delith(self, chemsys): # get the chemsys with the working ion removed from the set elements = set(chemsys.split("-")) return { "-".join(sorted(c)) for c in [elements, elements - {self.working_ion}] } def _mat_doc2comp_entry(self, docs, store_struct=True): def get_prim_host(struct): """ Get the primitive structure with all of the lithiums removed """ structure = struct.copy() structure.remove_species([self.working_ion]) prim = PrimitiveCellTransformation() return prim.apply_transformation(structure) entries = [] for d in docs: struct = Structure.from_dict(d['structure']) en = ComputedStructureEntry( structure=struct, energy=d['thermo']['energy'], parameters=d['calc_settings'], entry_id=d['task_id'], ) en.data['sbxn'] = ['core'] if 'sbxn' in d: en.data['sbxn'].extend(d['sbxn']) elif '_sbxn' in d: en.data['sbxn'].extend(d['_sbxn']) else: en.data['sbxn'] = ['core'] if store_struct: struct_delith = get_prim_host(struct) comp_delith = self.sm._comparator.get_hash( struct_delith.composition) #new_entry.data['structure'] = struct en.data['structure_delith'] = struct_delith en.data['comp_delith'] = comp_delith try: entries.append(self.compatibility.process_entry(en)) except: self.logger.warn( 'unable to process material with task_id: {}'.format( en.entry_id)) return entries
def test_supercell_subsets(self): sm = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=False, scale=True, attempt_supercell=True, allow_subset=True, supercell_size='volume') sm_no_s = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=False, scale=True, attempt_supercell=True, allow_subset=False, supercell_size='volume') l = Lattice.orthorhombic(1, 2, 3) s1 = Structure(l, ['Ag', 'Si', 'Si'], [[.7, .4, .5], [0, 0, 0.1], [0, 0, 0.2]]) s1.make_supercell([2, 1, 1]) s2 = Structure(l, ['Si', 'Si', 'Ag'], [[0, 0.1, -0.95], [0, 0.1, 0], [-.7, .5, .375]]) shuffle = [0, 2, 1, 3, 4, 5] s1 = Structure.from_sites([s1[i] for i in shuffle]) #test when s1 is exact supercell of s2 result = sm.get_s2_like_s1(s1, s2) for a, b in zip(s1, result): self.assertTrue(a.distance(b) < 0.08) self.assertEqual(a.species_and_occu, b.species_and_occu) self.assertTrue(sm.fit(s1, s2)) self.assertTrue(sm.fit(s2, s1)) self.assertTrue(sm_no_s.fit(s1, s2)) self.assertTrue(sm_no_s.fit(s2, s1)) rms = (0.048604032430991401, 0.059527539448807391) self.assertTrue(np.allclose(sm.get_rms_dist(s1, s2), rms)) self.assertTrue(np.allclose(sm.get_rms_dist(s2, s1), rms)) #test when the supercell is a subset of s2 subset_supercell = s1.copy() del subset_supercell[0] result = sm.get_s2_like_s1(subset_supercell, s2) self.assertEqual(len(result), 6) for a, b in zip(subset_supercell, result): self.assertTrue(a.distance(b) < 0.08) self.assertEqual(a.species_and_occu, b.species_and_occu) self.assertTrue(sm.fit(subset_supercell, s2)) self.assertTrue(sm.fit(s2, subset_supercell)) self.assertFalse(sm_no_s.fit(subset_supercell, s2)) self.assertFalse(sm_no_s.fit(s2, subset_supercell)) rms = (0.053243049896333279, 0.059527539448807336) self.assertTrue(np.allclose(sm.get_rms_dist(subset_supercell, s2), rms)) self.assertTrue(np.allclose(sm.get_rms_dist(s2, subset_supercell), rms)) #test when s2 (once made a supercell) is a subset of s1 s2_missing_site = s2.copy() del s2_missing_site[1] result = sm.get_s2_like_s1(s1, s2_missing_site) for a, b in zip((s1[i] for i in (0, 2, 4, 5)), result): self.assertTrue(a.distance(b) < 0.08) self.assertEqual(a.species_and_occu, b.species_and_occu) self.assertTrue(sm.fit(s1, s2_missing_site)) self.assertTrue(sm.fit(s2_missing_site, s1)) self.assertFalse(sm_no_s.fit(s1, s2_missing_site)) self.assertFalse(sm_no_s.fit(s2_missing_site, s1)) rms = (0.029763769724403633, 0.029763769724403987) self.assertTrue(np.allclose(sm.get_rms_dist(s1, s2_missing_site), rms)) self.assertTrue(np.allclose(sm.get_rms_dist(s2_missing_site, s1), rms))