コード例 #1
0
    def retain_identities(self, retention_time):
        """Retain the identities in SortingHat based on the `retention_time`
        value declared in the setup.cfg.

        :param retention_time: maximum number of minutes wrt the current date to retain the SortingHat data
        """
        enrich_es = self.conf['es_enrichment']['url']
        sortinghat_db = self.db
        current_data_source = self.get_backend(self.backend_section)
        active_data_sources = self.config.get_active_data_sources()

        if retention_time is None:
            logger.debug("[identities retention] Retention policy disabled, no identities will be deleted.")
            return

        if retention_time <= 0:
            logger.debug("[identities retention] Retention time must be greater than 0.")
            return

        logger.info('[%s] identities retention start', self.backend_section)

        logger.info('[%s] populate identities index start', self.backend_section)
        # Upload the unique identities seen in the items to the index `grimoirelab_identities_cache`
        populate_identities_index(self.conf['es_enrichment']['url'],
                                  self.conf[self.backend_section]['enriched_index'])
        logger.info('[%s] populate identities index end', self.backend_section)

        # Delete the unique identities in SortingHat which have not been seen in
        # `grimoirelab_identities_cache` during the retention time, and delete the orphan
        # unique identities (those ones in SortingHat but not in `grimoirelab_identities_cache`)
        retain_identities(retention_time, enrich_es, sortinghat_db, current_data_source, active_data_sources)
コード例 #2
0
    def execute(self):
        cfg = self.config.get_conf()

        if 'enrich' in cfg[self.backend_section] and not cfg[self.backend_section]['enrich']:
            logger.info('%s enrich disabled', self.backend_section)
            return

        # ** START SYNC LOGIC **
        # Check that identities tasks are not active before executing
        while True:
            time.sleep(10)  # check each 10s if the enrichment could start
            with TasksManager.IDENTITIES_TASKS_ON_LOCK:
                with TasksManager.NUMBER_ENRICH_TASKS_ON_LOCK:
                    in_identities = TasksManager.IDENTITIES_TASKS_ON
                    if not in_identities:
                        # The enrichment can be started
                        TasksManager.NUMBER_ENRICH_TASKS_ON += 1
                        logger.debug("Number of enrichment tasks active: %i",
                                     TasksManager.NUMBER_ENRICH_TASKS_ON)
                        break
                    else:
                        logger.debug("%s Waiting for enrich until identities is done.",
                                     self.backend_section)
        #  ** END SYNC LOGIC **

        try:
            self.__enrich_items()

            retention_time = cfg['general']['retention_time']
            # Delete the items updated before a given date
            self.retain_data(retention_time,
                             self.conf['es_enrichment']['url'],
                             self.conf[self.backend_section]['enriched_index'])

            # Upload the unique identities seen in the items to the index `grimoirelab_identities_cache`
            populate_identities_index(self.conf['es_enrichment']['url'],
                                      self.conf[self.backend_section]['enriched_index'])
            # Delete the unique identities in SortingHat which have not been seen in
            # `grimoirelab_identities_cache` during the retention time, and delete the orphan
            # unique identities (those ones in SortingHat but not in `grimoirelab_identities_cache`)
            self.retain_identities(retention_time)

            autorefresh = cfg['es_enrichment']['autorefresh']

            if autorefresh:
                logger.debug("Doing autorefresh for %s", self.backend_section)
                self.__autorefresh(self._get_enrich_backend())
            else:
                logger.debug("Not doing autorefresh for %s", self.backend_section)

            self.__studies(retention_time)

            if autorefresh:
                self.__autorefresh_studies(cfg)
            else:
                logger.debug("Not doing autorefresh for %s studies", self.backend_section)

        except Exception as e:
            raise e
        finally:
            with TasksManager.NUMBER_ENRICH_TASKS_ON_LOCK:
                TasksManager.NUMBER_ENRICH_TASKS_ON -= 1