def test_enrich(self, sortinghat=False, projects=False): """Test enrich all sources""" config = configparser.ConfigParser() config.read(CONFIG_FILE) es_con = dict(config.items('ElasticSearch'))['url'] logging.info("Enriching data in: %s", es_con) connectors = get_connectors() for con in sorted(connectors.keys()): perceval_backend = None ocean_index = "test_" + con enrich_index = "test_" + con + "_enrich" clean = False ocean_backend = connectors[con][1](perceval_backend) elastic_ocean = get_elastic(es_con, ocean_index, clean, ocean_backend) ocean_backend.set_elastic(elastic_ocean) clean = True if not sortinghat and not projects: enrich_backend = connectors[con][2]() elif sortinghat and not projects: enrich_backend = connectors[con][2]( db_sortinghat=DB_SORTINGHAT) elif not sortinghat and projects: enrich_backend = connectors[con][2]( db_projects_map=DB_PROJECTS) elastic_enrich = get_elastic(es_con, enrich_index, clean, enrich_backend) enrich_backend.set_elastic(elastic_enrich) if sortinghat: # Load SH identities load_identities(ocean_backend, enrich_backend) enrich_count = enrich_backend.enrich_items(ocean_backend) if enrich_count is not None: logging.info("Total items enriched %i ", enrich_count)
def execute(self): # FIXME this should be called just once # code = 0 when command success code = Init(**self.sh_kwargs).run(self.db_sh) if not self.backend_section: logger.error( "Backend not configured in TaskIdentitiesCollection %s", self.backend_section) return backend_conf = self.config.get_conf()[self.backend_section] if 'collect' in backend_conf and not backend_conf['collect']: logger.info("Don't load ids from a backend without collection %s", self.backend_section) return if self.load_ids: logger.info("[%s] Gathering identities from raw data", self.backend_section) enrich_backend = self._get_enrich_backend() ocean_backend = self._get_ocean_backend(enrich_backend) load_identities(ocean_backend, enrich_backend)
def run(self): #FIXME this should be called just once # code = 0 when command success code = Init(**self.sh_kwargs).run(self.db_sh) if not self.backend_name: logger.error("Backend not configured in TaskIdentitiesCollection.") return if self.load_ids: logger.info("[%s] Gathering identities from raw data" % self.backend_name) enrich_backend = self.get_enrich_backend() ocean_backend = self._get_ocean_backend(enrich_backend) load_identities(ocean_backend, enrich_backend)
def test_enrich(self, sortinghat=False, projects=False): """Test enrich all sources""" config = configparser.ConfigParser() config.read(CONFIG_FILE) es_con = dict(config.items('ElasticSearch'))['url'] db_user = '' db_password = '' if 'Database' in config: if 'user' in config['Database']: db_user = config['Database']['user'] if 'password' in config['Database']: db_password = config['Database']['password'] logging.info("Enriching data in: %s", es_con) connectors = get_connectors() for con in sorted(connectors.keys()): perceval_backend = None ocean_index = "test_" + con enrich_index = "test_" + con + "_enrich" clean = False ocean_backend = connectors[con][1](perceval_backend) elastic_ocean = get_elastic(es_con, ocean_index, clean, ocean_backend) ocean_backend.set_elastic(elastic_ocean) clean = True if not sortinghat and not projects: enrich_backend = connectors[con][2]() elif sortinghat and not projects: enrich_backend = connectors[con][2]( db_sortinghat=DB_SORTINGHAT, db_user=db_user, db_password=db_password) elif not sortinghat and projects: enrich_backend = connectors[con][2]( db_projects_map=DB_PROJECTS, db_user=db_user, db_password=db_password) elastic_enrich = get_elastic(es_con, enrich_index, clean, enrich_backend) enrich_backend.set_elastic(elastic_enrich) if sortinghat: # Load SH identities load_identities(ocean_backend, enrich_backend) raw_count = len([item for item in ocean_backend.fetch()]) enrich_count = enrich_backend.enrich_items(ocean_backend) self.assertEqual(raw_count, enrich_count)
def enrich_git_items(es, index_git_raw, commits_sha_list, project, db_config): commits = _get_git_commits(es, index_git_raw, commits_sha_list) projects_file_path = _create_projects_file(project, "git", commits) logger.info("Total git track items to be enriched: %i", len(commits)) enriched_items = [] enricher = GitEnrich(db_sortinghat=db_config['database'], db_user=db_config['user'], db_password=db_config['password'], db_host=db_config['host'], json_projects_map=projects_file_path) # First load identities load_identities(commits, enricher) # Then enrich for commit in commits: enriched_items.append(enricher.get_rich_item(commit)) os.unlink(projects_file_path) return enriched_items
def enrich_gerrit_items(es, index_gerrit_raw, gerrit_numbers, project, db_config): reviews = _get_gerrit_reviews(es, index_gerrit_raw, gerrit_numbers) projects_file_path = _create_projects_file(project, "gerrit", reviews) logger.info("Total gerrit track items to be enriched: %i", len(reviews)) enriched_items = [] enricher = GerritEnrich(db_sortinghat=db_config['database'], db_user=db_config['user'], db_password=db_config['password'], db_host=db_config['host'], json_projects_map=projects_file_path) # First load identities load_identities(reviews, enricher) # Then enrich for review in reviews: enriched_items.append(enricher.get_rich_item(review)) os.unlink(projects_file_path) return enriched_items