Ejemplo n.º 1
0
    def get_keys(self):
        """
        Gets the doc keys to process
        """
        mat_keys = set(self.materials.distinct(self.materials.key, criteria=self.query))
        keys = set(
            source_keys_updated(
                source=self.materials, target=self.website, query=self.query
            )
        )
        keys |= set(source_keys_updated(source=self.thermo, target=self.website))

        # Get keys for aux docs that have been updated since last processed.
        for source in self.aux:
            new_keys = source_keys_updated(source=source, target=self.website)
            self.logger.info(
                "Only considering {} new keys for {}".format(
                    len(new_keys), source.collection_name
                )
            )
            keys |= set(new_keys)

        keys = (
            keys & mat_keys
        )  # Ensure all keys are present in main materials collection

        return keys
Ejemplo n.º 2
0
    def get_items(self):

        self.logger.info("Starting {} Builder".format(self.__class__.__name__))

        self.ensure_indexes()

        if self.incremental:
            keys = source_keys_updated(source=self.source,
                                       target=self.target,
                                       query=self.query)
        else:
            keys = self.source.distinct(self.source.key, self.query)

        self.logger.info("Processing {} items".format(len(keys)))

        if self.projection:
            projection = list(
                set(self.projection + [self.source.key, self.source.lu_field]))
        else:
            projection = None

        self.total = len(keys)
        for chunked_keys in grouper(keys, self.chunk_size, None):
            chunked_keys = list(filter(None.__ne__, chunked_keys))
            for doc in list(
                    self.source.query(
                        criteria={self.source.key: {
                            "$in": chunked_keys
                        }},
                        properties=projection)):
                yield doc
Ejemplo n.º 3
0
    def get_items(self):
        """
        Gets all materials that need new substrates

        Returns:
            generator of materials to calculate substrates
        """

        self.logger.info("Substrate Builder Started")

        self.logger.info("Setting up indicies")
        self.ensure_indicies()

        mat_keys = set(
            self.materials.distinct(self.materials.key, criteria=self.query))
        updated_mats = source_keys_updated(source=self.materials,
                                           target=self.substrates,
                                           query=self.query)
        e_tensor_updated_mats = source_keys_updated(source=self.elasticity,
                                                    target=self.substrates)

        # To ensure all mats are within our scope
        mats = set(e_tensor_updated_mats + updated_mats) & mat_keys

        self.logger.info(
            "Updating all substrate calculations for {} materials".format(
                len(mats)))

        for m in mats:
            e_tensor = self.elasticity.query_one(
                criteria={self.elasticity.key: m})
            e_tensor = e_tensor.get("elasticity", {}).get(
                "elastic_tensor", None) if e_tensor else None
            mat = self.materials.query_one(
                criteria={self.materials.key: m},
                properties=["structure", self.materials.key])

            yield {
                "structure": mat["structure"],
                "task_id": mat[self.materials.key],
                "elastic_tensor": e_tensor
            }
Ejemplo n.º 4
0
    def updated_keys(self, target, criteria=None):
        """
        Returns keys for docs that are newer in the target store in comparison
        with this store when comparing the last updated field (lu_field)

        Args:
            target (Store): store to look for updated documents
            criteria (dict): mongo query to limit scope

        Returns:
            list of keys that have been updated in target store
        """
        self.ensure_index(self.key)
        self.ensure_index(self.lu_field)

        return source_keys_updated(target, self, query=criteria)
Ejemplo n.º 5
0
 def get_items(self):
     criteria = source_keys_updated(self.source, self.target, query=self.query)
     if all(isinstance(entry, str) for entry in self.grouping_properties()):
         properties = {entry: 1 for entry in self.grouping_properties()}
         if "_id" not in properties:
             properties.update({"_id": 0})
     else:
         properties = {entry: include for entry, include in self.grouping_properties()}
     groups = self.docs_to_groups(self.source.query(criteria=criteria, properties=properties))
     self.total = len(groups)
     if hasattr(self, "n_items_per_group"):
         n = self.n_items_per_group
         if isinstance(n, int) and n >= 1:
             self.total *= n
     for group in groups:
         for item in self.group_to_items(group):
             yield item
Ejemplo n.º 6
0
    def get_items(self):
        """
        Gets all materials that need a new DOS

        Returns:
            generator of materials to calculate DOS
        """

        self.logger.info("BoltzTrap Dos Builder Started")

        # All relevant materials that have been updated since boltztrap was last run
        # and a uniform bandstructure exists
        q = dict(self.query)
        q.update({"bandstructure.uniform_task": {"$exists": 1}})
        mats = set(
            source_keys_updated(source=self.materials,
                                target=self.boltztrap_dos,
                                query=self.query))

        self.logger.info(
            "Found {} new materials for calculating boltztrap dos".format(
                len(mats)))

        for m in mats:
            mat = self.materials.query(
                [self.materials.key, "structure", "bandstructure"],
                criteria={self.materials.key: m})

            # If a bandstructure uniform task exists
            bs_task_id = mat.get("bandstructure", {}).get("uniform_task", None)
            if bs_task_id:
                bs_dict = self.bandstructures.query_one(
                    {self.bandstructures.key: bs_task_id})
                mat["bandstructure_uniform"] = bs_dict

            yield mat