def test_enrich(self, sortinghat=False, projects=False):
        """Test enrich all sources"""
        config = configparser.ConfigParser()
        config.read(CONFIG_FILE)
        es_con = dict(config.items('ElasticSearch'))['url']
        logging.info("Enriching data in: %s", es_con)
        connectors = get_connectors()
        for con in sorted(connectors.keys()):
            perceval_backend = None
            ocean_index = "test_" + con
            enrich_index = "test_" + con + "_enrich"
            clean = False
            ocean_backend = connectors[con][1](perceval_backend)
            elastic_ocean = get_elastic(es_con, ocean_index, clean,
                                        ocean_backend)
            ocean_backend.set_elastic(elastic_ocean)
            clean = True
            if not sortinghat and not projects:
                enrich_backend = connectors[con][2]()
            elif sortinghat and not projects:
                enrich_backend = connectors[con][2](
                    db_sortinghat=DB_SORTINGHAT)
            elif not sortinghat and projects:
                enrich_backend = connectors[con][2](
                    db_projects_map=DB_PROJECTS)
            elastic_enrich = get_elastic(es_con, enrich_index, clean,
                                         enrich_backend)
            enrich_backend.set_elastic(elastic_enrich)
            if sortinghat:
                # Load SH identities
                load_identities(ocean_backend, enrich_backend)
            enrich_count = enrich_backend.enrich_items(ocean_backend)

            if enrich_count is not None:
                logging.info("Total items enriched %i ", enrich_count)
Exemple #2
0
    def execute(self):

        # FIXME this should be called just once
        # code = 0 when command success
        code = Init(**self.sh_kwargs).run(self.db_sh)

        if not self.backend_section:
            logger.error(
                "Backend not configured in TaskIdentitiesCollection %s",
                self.backend_section)
            return

        backend_conf = self.config.get_conf()[self.backend_section]

        if 'collect' in backend_conf and not backend_conf['collect']:
            logger.info("Don't load ids from a backend without collection %s",
                        self.backend_section)
            return

        if self.load_ids:
            logger.info("[%s] Gathering identities from raw data",
                        self.backend_section)
            enrich_backend = self._get_enrich_backend()
            ocean_backend = self._get_ocean_backend(enrich_backend)
            load_identities(ocean_backend, enrich_backend)
Exemple #3
0
    def run(self):

        #FIXME this should be called just once
        # code = 0 when command success
        code = Init(**self.sh_kwargs).run(self.db_sh)

        if not self.backend_name:
            logger.error("Backend not configured in TaskIdentitiesCollection.")
            return

        if self.load_ids:
            logger.info("[%s] Gathering identities from raw data" %
                        self.backend_name)
            enrich_backend = self.get_enrich_backend()
            ocean_backend = self._get_ocean_backend(enrich_backend)
            load_identities(ocean_backend, enrich_backend)
    def test_enrich(self, sortinghat=False, projects=False):
        """Test enrich all sources"""
        config = configparser.ConfigParser()
        config.read(CONFIG_FILE)
        es_con = dict(config.items('ElasticSearch'))['url']
        db_user = ''
        db_password = ''
        if 'Database' in config:
            if 'user' in config['Database']:
                db_user = config['Database']['user']
            if 'password' in config['Database']:
                db_password = config['Database']['password']
        logging.info("Enriching data in: %s", es_con)
        connectors = get_connectors()
        for con in sorted(connectors.keys()):
            perceval_backend = None
            ocean_index = "test_" + con
            enrich_index = "test_" + con + "_enrich"
            clean = False
            ocean_backend = connectors[con][1](perceval_backend)
            elastic_ocean = get_elastic(es_con, ocean_index, clean,
                                        ocean_backend)
            ocean_backend.set_elastic(elastic_ocean)
            clean = True
            if not sortinghat and not projects:
                enrich_backend = connectors[con][2]()
            elif sortinghat and not projects:
                enrich_backend = connectors[con][2](
                    db_sortinghat=DB_SORTINGHAT,
                    db_user=db_user,
                    db_password=db_password)
            elif not sortinghat and projects:
                enrich_backend = connectors[con][2](
                    db_projects_map=DB_PROJECTS,
                    db_user=db_user,
                    db_password=db_password)
            elastic_enrich = get_elastic(es_con, enrich_index, clean,
                                         enrich_backend)
            enrich_backend.set_elastic(elastic_enrich)
            if sortinghat:
                # Load SH identities
                load_identities(ocean_backend, enrich_backend)
            raw_count = len([item for item in ocean_backend.fetch()])
            enrich_count = enrich_backend.enrich_items(ocean_backend)

            self.assertEqual(raw_count, enrich_count)
def enrich_git_items(es, index_git_raw, commits_sha_list, project, db_config):
    commits = _get_git_commits(es, index_git_raw, commits_sha_list)
    projects_file_path = _create_projects_file(project, "git", commits)
    logger.info("Total git track items to be enriched: %i", len(commits))

    enriched_items = []
    enricher = GitEnrich(db_sortinghat=db_config['database'],
                         db_user=db_config['user'],
                         db_password=db_config['password'],
                         db_host=db_config['host'],
                         json_projects_map=projects_file_path)

    # First load identities
    load_identities(commits, enricher)

    # Then enrich
    for commit in commits:
        enriched_items.append(enricher.get_rich_item(commit))

    os.unlink(projects_file_path)

    return enriched_items
Exemple #6
0
def enrich_gerrit_items(es, index_gerrit_raw, gerrit_numbers, project, db_config):
    reviews = _get_gerrit_reviews(es, index_gerrit_raw, gerrit_numbers)
    projects_file_path = _create_projects_file(project, "gerrit", reviews)
    logger.info("Total gerrit track items to be enriched: %i", len(reviews))

    enriched_items = []
    enricher = GerritEnrich(db_sortinghat=db_config['database'],
                            db_user=db_config['user'],
                            db_password=db_config['password'],
                            db_host=db_config['host'],
                            json_projects_map=projects_file_path)

    # First load identities
    load_identities(reviews, enricher)

    # Then enrich
    for review in reviews:
        enriched_items.append(enricher.get_rich_item(review))

    os.unlink(projects_file_path)

    return enriched_items
Exemple #7
0
def enrich_git_items(es, index_git_raw, commits_sha_list, project, db_config):
    commits = _get_git_commits(es, index_git_raw, commits_sha_list)
    projects_file_path = _create_projects_file(project, "git", commits)
    logger.info("Total git track items to be enriched: %i", len(commits))

    enriched_items = []
    enricher = GitEnrich(db_sortinghat=db_config['database'],
                         db_user=db_config['user'],
                         db_password=db_config['password'],
                         db_host=db_config['host'],
                         json_projects_map=projects_file_path)

    # First load identities
    load_identities(commits, enricher)

    # Then enrich
    for commit in commits:
        enriched_items.append(enricher.get_rich_item(commit))

    os.unlink(projects_file_path)

    return enriched_items