def retain_identities(self, retention_time): """Retain the identities in SortingHat based on the `retention_time` value declared in the setup.cfg. :param retention_time: maximum number of minutes wrt the current date to retain the SortingHat data """ enrich_es = self.conf['es_enrichment']['url'] sortinghat_db = self.db current_data_source = self.get_backend(self.backend_section) active_data_sources = self.config.get_active_data_sources() if retention_time is None: logger.debug("[identities retention] Retention policy disabled, no identities will be deleted.") return if retention_time <= 0: logger.debug("[identities retention] Retention time must be greater than 0.") return logger.info('[%s] identities retention start', self.backend_section) logger.info('[%s] populate identities index start', self.backend_section) # Upload the unique identities seen in the items to the index `grimoirelab_identities_cache` populate_identities_index(self.conf['es_enrichment']['url'], self.conf[self.backend_section]['enriched_index']) logger.info('[%s] populate identities index end', self.backend_section) # Delete the unique identities in SortingHat which have not been seen in # `grimoirelab_identities_cache` during the retention time, and delete the orphan # unique identities (those ones in SortingHat but not in `grimoirelab_identities_cache`) retain_identities(retention_time, enrich_es, sortinghat_db, current_data_source, active_data_sources)
def execute(self): cfg = self.config.get_conf() if 'enrich' in cfg[self.backend_section] and not cfg[self.backend_section]['enrich']: logger.info('%s enrich disabled', self.backend_section) return # ** START SYNC LOGIC ** # Check that identities tasks are not active before executing while True: time.sleep(10) # check each 10s if the enrichment could start with TasksManager.IDENTITIES_TASKS_ON_LOCK: with TasksManager.NUMBER_ENRICH_TASKS_ON_LOCK: in_identities = TasksManager.IDENTITIES_TASKS_ON if not in_identities: # The enrichment can be started TasksManager.NUMBER_ENRICH_TASKS_ON += 1 logger.debug("Number of enrichment tasks active: %i", TasksManager.NUMBER_ENRICH_TASKS_ON) break else: logger.debug("%s Waiting for enrich until identities is done.", self.backend_section) # ** END SYNC LOGIC ** try: self.__enrich_items() retention_time = cfg['general']['retention_time'] # Delete the items updated before a given date self.retain_data(retention_time, self.conf['es_enrichment']['url'], self.conf[self.backend_section]['enriched_index']) # Upload the unique identities seen in the items to the index `grimoirelab_identities_cache` populate_identities_index(self.conf['es_enrichment']['url'], self.conf[self.backend_section]['enriched_index']) # Delete the unique identities in SortingHat which have not been seen in # `grimoirelab_identities_cache` during the retention time, and delete the orphan # unique identities (those ones in SortingHat but not in `grimoirelab_identities_cache`) self.retain_identities(retention_time) autorefresh = cfg['es_enrichment']['autorefresh'] if autorefresh: logger.debug("Doing autorefresh for %s", self.backend_section) self.__autorefresh(self._get_enrich_backend()) else: logger.debug("Not doing autorefresh for %s", self.backend_section) self.__studies(retention_time) if autorefresh: self.__autorefresh_studies(cfg) else: logger.debug("Not doing autorefresh for %s studies", self.backend_section) except Exception as e: raise e finally: with TasksManager.NUMBER_ENRICH_TASKS_ON_LOCK: TasksManager.NUMBER_ENRICH_TASKS_ON -= 1