예제 #1
0
파일: thermo.py 프로젝트: utf/emmet
    def get_affected_chemsys(self, chemical_systems: Set) -> Set:
        """ Gets chemical systems affected by changes in the supplied chemical systems """
        # First get all chemsys with any of the elements we've marked
        affected_chemsys = set()
        affected_els = list(
            {el
             for c in chemical_systems for el in c.split("-")})
        possible_affected_chemsys = self.materials.distinct(
            "chemsys", {"elements": {
                "$in": affected_els
            }})

        sub_chemsys = defaultdict(list)
        # Build a dictionary mapping sub_chemsys to all super_chemsys
        for chemsys in possible_affected_chemsys:
            for permutation in chemsys_permutations(chemsys):
                sub_chemsys[permutation].append(chemsys)

        # Select and merge distinct super chemsys from sub_chemsys
        for chemsys in chemical_systems:
            affected_chemsys |= set(sub_chemsys[chemsys])

        self.logger.debug(
            f"Found {len(affected_chemsys)} chemical systems affected by this build"
        )

        return affected_chemsys
예제 #2
0
파일: thermo.py 프로젝트: rkingsbury/emmet
    def get_items(self) -> Iterator[List[Dict]]:
        """
        Gets whole chemical systems of entries to process
        """

        self.logger.info("Thermo Builder Started")

        self.logger.info("Setting indexes")
        self.ensure_indexes()

        updated_chemsys = self.get_updated_chemsys()
        new_chemsys = self.get_new_chemsys()

        affected_chemsys = self.get_affected_chemsys(updated_chemsys
                                                     | new_chemsys)

        # Remove overlapping chemical systems
        to_process_chemsys = set()
        for chemsys in updated_chemsys | new_chemsys | affected_chemsys:
            if chemsys not in to_process_chemsys:
                to_process_chemsys |= chemsys_permutations(chemsys)

        self.logger.info(
            f"Found {len(to_process_chemsys)} chemical systems with new/updated materials to process"
        )
        self.total = len(to_process_chemsys)

        # Yield the chemical systems in order of increasing size
        # Will build them in a similar manner to fast Pourbaix
        for chemsys in sorted(to_process_chemsys,
                              key=lambda x: len(x.split("-"))):
            entries = self.get_entries(chemsys)
            yield entries
예제 #3
0
파일: thermo.py 프로젝트: utf/emmet
    def get_items(self) -> Iterator[List[Dict]]:
        """
        Gets whole chemical systems of entries to process
        """

        self.logger.info("Thermo Builder Started")

        self.logger.info("Setting indexes")
        self.ensure_indexes()

        updated_chemsys = self.get_updated_chemsys()
        new_chemsys = self.get_new_chemsys()

        affected_chemsys = self.get_affected_chemsys(updated_chemsys
                                                     | new_chemsys)

        # Remove overlapping chemical systems
        to_process_chemsys = {}
        for chemsys in updated_chemsys | new_chemsys | affected_chemsys:
            if chemsys not in to_process_chemsys:
                to_process_chemsys |= chemsys_permutations(chemsys)

        self.logger.inf(
            f"Found {len(to_process_chemsys)} chemical systems with new/updated materials to process"
        )
        self.total = len(to_process_chemsys)

        # Yield the chemical systems in order of increasing size
        # Will build them in a similar manner to fast Pourbaix
        for chemsys in sorted(to_process_chemsys,
                              key=lambda x: len(x.split("-"))):
            entries = self.get_entries(chemsys)

            # build sandbox sets: ["a"] , ["a","b"], ["core","a","b"]
            sandbox_sets = set([
                frozenset(entry.data.get("sandboxes", {})) for entry in entries
            ])
            sandbox_sets = maximal_spanning_non_intersecting_subsets(
                sandbox_sets)
            self.logger.debug(f"Found {len(sandbox_sets)}: {sandbox_sets}")

            for sandboxes in sandbox_sets:
                # only yield maximal subsets so that we can process a equivalent sandbox combinations at a time
                sandbox_entries = [
                    entry for entry in entries
                    if all(sandbox in entry.data.get("_sbxn", [])
                           for sandbox in sandboxes)
                ]

                yield sandboxes, sandbox_entries
예제 #4
0
파일: thermo.py 프로젝트: utf/emmet
    def get_entries(self, chemsys: str) -> List[ComputedEntry]:
        """
        Gets a entries from the tasks collection for the corresponding chemical systems
        Args:
            chemsys(str): a chemical system represented by string elements seperated by a dash (-)
        Returns:
            set(ComputedEntry): a set of entries for this system
        """

        self.logger.info(f"Getting entries for: {chemsys}")

        # First check the cache
        all_chemsys = chemsys_permutations(chemsys)
        cached_chemsys = all_chemsys & set(self._entries_cache.keys())
        query_chemsys = all_chemsys - cached_chemsys
        all_entries = list(
            chain.from_iterable(self._entries_cache[c]
                                for c in cached_chemsys))

        self.logger.debug(
            f"Getting {len(cached_chemsys)} sub-chemsys from cache for {chemsys}"
        )
        self.logger.debug(
            f"Getting {len(query_chemsys)} sub-chemsys from DB for {chemsys}")

        # Second grab the materials docs
        new_q = dict(self.query)
        new_q["chemsys"] = {"$in": list(query_chemsys)}
        new_q["deprecated"] = False
        materials_docs = list(
            self.materials.query(
                criteria=new_q,
                properties=[self.materials.key, "entries", "sandboxes"]))

        self.logger.debug(
            f"Got {len(materials_docs)} entries from DB for {len(query_chemsys)} sub-chemsys for {chemsys}"
        )

        # Convert the entries into ComputedEntries and store
        for doc in materials_docs:
            for entry in doc.get("entries", {}):
                entry["data"]["sandboxes"] = doc["sandboxes"]
                elsyms = sorted(set([el for el in entry["composition"]]))
                self._entries_cache["-".join(elsyms)].append(entry)
                all_entries.append(entry)

        self.logger.info(f"Total entries in {chemsys} : {len(all_entries)}")

        return all_entries
예제 #5
0
파일: test_utils.py 프로젝트: utf/emmet
def test_chemsys_permutations(test_dir):
    assert len(chemsys_permutations("Sr")) == 1
    assert len(chemsys_permutations("Sr-Hf")) == 3
    assert len(chemsys_permutations("Sr-Hf-O")) == 7