Example #1
0
    def __call__(self, struct1, struct2):
        sm = StructureMatcher(**self.kw, comparator=SpeciesComparator())

        pstruct1 = struct1.get_pymatgen_structure()
        pstruct2 = struct2.get_pymatgen_structure()

        return sm.fit(pstruct1, pstruct2)
Example #2
0
    def add_if_belongs(self, cand_snl):

        # no need to compare if different formulas or spacegroups
        if cand_snl.snlgroup_key != self.canonical_snl.snlgroup_key:
            return False, None

        # no need to compare if one is ordered, the other disordered
        if not (cand_snl.structure.is_ordered == self.canonical_structure.is_ordered):
            return False, None

        # filter out large C-Ce structures
        comp = cand_snl.structure.composition
        elsyms = sorted(set([e.symbol for e in comp.elements]))
        chemsys = '-'.join(elsyms)
        if (
                cand_snl.structure.num_sites > 1500 or self.canonical_structure.num_sites > 1500) and chemsys == 'C-Ce':
            print 'SKIPPING LARGE C-Ce'
            return False, None

        # make sure the structure is not already in all_structures
        if cand_snl.snl_id in self.all_snl_ids:
            print 'WARNING: add_if_belongs() has detected that you are trying to add the same SNL id twice!'
            return False, None

        #try a structure fit to the canonical structure

        # use default Structure Matcher params from April 24, 2013, as suggested by Shyue
        # we are using the ElementComparator() because this is how we want to group results
        sm = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=True, scale=True,
                              attempt_supercell=False, comparator=ElementComparator())

        if not sm.fit(cand_snl.structure, self.canonical_structure):
            return False, None

        # everything checks out, add to the group
        self.all_snl_ids.append(cand_snl.snl_id)

        # now that we are in the group, if there are site properties we need to check species_groups
        # e.g., if there is another SNL in the group with the same site properties, e.g. MAGMOM
        spec_group = None

        if has_species_properties(cand_snl.structure):
            for snl in self.species_snl:
                sms = StructureMatcher(ltol=0.2, stol=0.3, angle_tol=5, primitive_cell=True, scale=True,
                              attempt_supercell=False, comparator=SpeciesComparator())
                if sms.fit(cand_snl.structure, snl.structure):
                    spec_group = snl.snl_id
                    self.species_groups[snl.snl_id].append(cand_snl.snl_id)
                    break

            # add a new species group
            if not spec_group:
                self.species_groups[cand_snl.snl_id] = [cand_snl.snl_id]
                self.species_snl.append(cand_snl)
                spec_group = cand_snl.snl_id

        self.updated_at = datetime.datetime.utcnow()

        return True, spec_group
    def compute_pymatgen_fit(self, s1, s2):
        '''
        Compares two GAtor Structure objects for
        similiarity using pymatgen's StructureMatcher
        '''
        sm = (StructureMatcher(ltol=self.L_tol,
                               stol=self.S_tol,
                               angle_tol=self.Angle_tol,
                               primitive_cell=True,
                               scale=False,
                               attempt_supercell=False,
                               comparator=SpeciesComparator()))

        sp1 = s1.get_pymatgen_structure()
        sp2 = s2.get_pymatgen_structure()
        fit = sm.fit(sp1, sp2)

        # If the structure is a duplicate
        # return the ID of the duplicate
        if fit:
            return s2.struct_id
        # Else return False
        return fit
Example #4
0
def group_entries_by_structure(entries,
                               species_to_remove=None,
                               ltol=0.2,
                               stol=.4,
                               angle_tol=5,
                               primitive_cell=True,
                               scale=True,
                               comparator=SpeciesComparator(),
                               ncpus=None):
    """
    Given a sequence of ComputedStructureEntries, use structure fitter to group
    them by structural similarity.

    Args:
        entries: Sequence of ComputedStructureEntries.
        species_to_remove: Sometimes you want to compare a host framework
            (e.g., in Li-ion battery analysis). This allows you to specify
            species to remove before structural comparison.
        ltol (float): Fractional length tolerance. Default is 0.2.
        stol (float): Site tolerance in Angstrom. Default is 0.4 Angstrom.
        angle_tol (float): Angle tolerance in degrees. Default is 5 degrees.
        primitive_cell (bool): If true: input structures will be reduced to
            primitive cells prior to matching. Defaults to True.
        scale: Input structures are scaled to equivalent volume if true;
            For exact matching, set to False.
        comparator: A comparator object implementing an equals method that
            declares equivalency of sites. Default is SpeciesComparator,
            which implies rigid species mapping.
        ncpus: Number of cpus to use. Use of multiple cpus can greatly improve
            fitting speed. Default of None means serial processing.

    Returns:
        Sequence of sequence of entries by structural similarity. e.g,
        [[ entry1, entry2], [entry3, entry4, entry5]]
    """
    start = datetime.datetime.now()
    logger.info("Started at {}".format(start))
    entries_host = [(entry, _get_host(entry.structure, species_to_remove))
                    for entry in entries]
    if ncpus:
        symm_entries = collections.defaultdict(list)
        for entry, host in entries_host:
            symm_entries[comparator.get_structure_hash(host)].append(
                (entry, host))
        import multiprocessing as mp
        logging.info("Using {} cpus".format(ncpus))
        manager = mp.Manager()
        groups = manager.list()
        p = mp.Pool(ncpus)
        # Parallel processing only supports Python primitives and not objects.
        p.map(_perform_grouping,
              [(json.dumps([e[0] for e in eh], cls=MontyEncoder),
                json.dumps([e[1] for e in eh], cls=MontyEncoder), ltol, stol,
                angle_tol, primitive_cell, scale, comparator, groups)
               for eh in symm_entries.values()])
    else:
        groups = []
        hosts = [host for entry, host in entries_host]
        _perform_grouping(
            (json.dumps(entries,
                        cls=MontyEncoder), json.dumps(hosts, cls=MontyEncoder),
             ltol, stol, angle_tol, primitive_cell, scale, comparator, groups))
    entry_groups = []
    for g in groups:
        entry_groups.append(json.loads(g, cls=MontyDecoder))
    logging.info("Finished at {}".format(datetime.datetime.now()))
    logging.info("Took {}".format(datetime.datetime.now() - start))
    return entry_groups