Python enrich_backendの例、grimoire_elk.arthur.enrich_backend Pythonの例

コード例 #1

0

ファイルを表示

    def __enrich_items(self):

        time_start = time.time()

        # logger.info('%s starts for %s ', 'enrichment', self.backend_section)
        logger.info('[%s] enrichment starts', self.backend_section)

        cfg = self.config.get_conf()

        if 'scroll_size' in cfg['general']:
            ElasticItems.scroll_size = cfg['general']['scroll_size']

        if 'bulk_size' in cfg['general']:
            ElasticSearch.max_items_bulk = cfg['general']['bulk_size']

        no_incremental = False
        github_token = None
        pair_programming = False
        if 'github' in cfg and 'backend_token' in cfg['github']:
            github_token = cfg['github']['backend_token']
        if 'git' in cfg and 'pair-programming' in cfg['git']:
            pair_programming = cfg['git']['pair-programming']
        only_studies = False
        only_identities = False

        # repos could change between executions because changes in projects
        repos = TaskProjects.get_repos_by_backend_section(self.backend_section)

        if not repos:
            logger.warning("No enrich repositories for %s",
                           self.backend_section)

        for repo in repos:
            # First process p2o params from repo
            p2o_args = self._compose_p2o_params(self.backend_section, repo)
            filter_raw = p2o_args[
                'filter-raw'] if 'filter-raw' in p2o_args else None
            filters_raw_prefix = p2o_args[
                'filters-raw-prefix'] if 'filters-raw-prefix' in p2o_args else None
            jenkins_rename_file = p2o_args[
                'jenkins-rename-file'] if 'jenkins-rename-file' in p2o_args else None
            url = p2o_args['url']
            # Second process perceval params from repo
            backend_args = self._compose_perceval_params(
                self.backend_section, url)

            try:
                es_col_url = self._get_collection_url()
                logger.debug('[%s] enrichment starts for %s',
                             self.backend_section, repo)
                backend = self.get_backend(self.backend_section)
                enrich_backend(
                    es_col_url,
                    self.clean,
                    backend,
                    backend_args,
                    cfg[self.backend_section]['raw_index'],
                    cfg[self.backend_section]['enriched_index'],
                    None,  # projects_db is deprecated
                    cfg['projects']['projects_file'],
                    cfg['sortinghat']['database'],
                    no_incremental,
                    only_identities,
                    github_token,
                    False,  # studies are executed in its own Task
                    only_studies,
                    cfg['es_enrichment']['url'],
                    None,  # args.events_enrich
                    cfg['sortinghat']['user'],
                    cfg['sortinghat']['password'],
                    cfg['sortinghat']['host'],
                    None,  # args.refresh_projects,
                    None,  # args.refresh_identities,
                    author_id=None,
                    author_uuid=None,
                    filter_raw=filter_raw,
                    filters_raw_prefix=filters_raw_prefix,
                    jenkins_rename_file=jenkins_rename_file,
                    unaffiliated_group=cfg['sortinghat']['unaffiliated_group'],
                    pair_programming=pair_programming)
            except Exception as ex:
                logger.error(
                    "Something went wrong producing enriched data for %s . "
                    "Using the backend_args: %s ", self.backend_section,
                    str(backend_args))
                logger.error("Exception: %s", ex)
                raise DataEnrichmentError(
                    'Failed to produce enriched data for ' +
                    self.backend_section)

            # Let's try to create the aliases for the enriched index
            if not self.enrich_aliases:
                logger.debug("Creating aliases after enrich")
                task_aliases = TaskPanelsAliases(self.config)
                task_aliases.set_backend_section(self.backend_section)
                task_aliases.execute()
                logger.debug("Done creating aliases after enrich")
                self.enrich_aliases = True

        spent_time = time.strftime("%H:%M:%S",
                                   time.gmtime(time.time() - time_start))
        logger.info('[%s] enrichment finished in %s', self.backend_section,
                    spent_time)

コード例 #2

0

ファイルを表示

    def __enrich_items(self):
        time_start = time.time()

        #logger.info('%s starts for %s ', 'enrichment', self.backend_name)
        logger.info('[%s] enrichment starts', self.backend_name)

        cfg = self.conf

        no_incremental = False
        github_token = None
        if 'github' in self.conf and 'backend_token' in self.conf['github']:
            github_token = self.conf['github']['backend_token']
        only_studies = False
        only_identities = False
        for repo in self.repos:
            # First process p2o params from repo
            p2o_args = self.compose_p2o_params(self.backend_name, repo)
            filter_raw = p2o_args[
                'filter-raw'] if 'filter-raw' in p2o_args else None
            url = p2o_args['url']
            # Second process perceval params from repo
            backend_args = self.compose_perceval_params(self.backend_name, url)

            try:
                es_col_url = self._get_collection_url()
                logger.debug('[%s] enrichment starts for %s',
                             self.backend_name, repo)
                enrich_backend(
                    es_col_url,
                    self.clean,
                    self.backend_name,
                    backend_args,
                    cfg[self.backend_name]['raw_index'],
                    cfg[self.backend_name]['enriched_index'],
                    None,  #projects_db is deprecated
                    cfg['projects_file'],
                    cfg['sh_database'],
                    no_incremental,
                    only_identities,
                    github_token,
                    False,  # studies are executed in its own Task
                    only_studies,
                    cfg['es_enrichment'],
                    None,  #args.events_enrich
                    cfg['sh_user'],
                    cfg['sh_password'],
                    cfg['sh_host'],
                    None,  #args.refresh_projects,
                    None,  #args.refresh_identities,
                    author_id=None,
                    author_uuid=None,
                    filter_raw=filter_raw)
            except KeyError as e:
                logger.exception(e)

        time.sleep(5)  # Safety sleep tp avoid too quick execution

        spent_time = time.strftime("%H:%M:%S",
                                   time.gmtime(time.time() - time_start))
        logger.info('[%s] enrichment finished in %s', self.backend_name,
                    spent_time)

コード例 #3

0

ファイルを表示

ファイル: p2o.py プロジェクト: chubbymaggie/grimoirelab-elk

                             args.project, args.arthur)

                # Wait for one second, to ensure bulk write reflects in searches
                # https://www.elastic.co/guide/en/elasticsearch/reference/6.1/docs-refresh.html
                # (there are better ways of doing this, but for now...)
                time.sleep(1)
                logging.info("Backend feed completed")

            if args.enrich or args.enrich_only:
                unaffiliated_group = None
                enrich_backend(
                    url, clean, args.backend, args.backend_args, args.index,
                    args.index_enrich, args.db_projects_map,
                    args.json_projects_map, args.db_sortinghat,
                    args.no_incremental, args.only_identities,
                    args.github_token, args.studies, args.only_studies,
                    args.elastic_url_enrich, args.events_enrich, args.db_user,
                    args.db_password, args.db_host, args.refresh_projects,
                    args.refresh_identities, args.author_id, args.author_uuid,
                    args.filter_raw, args.filters_raw_prefix,
                    args.jenkins_rename_file, unaffiliated_group,
                    args.pair_programming)
                logging.info("Enrich backend completed")
            elif args.events_enrich:
                logging.info("Enrich option is needed for events_enrich")
        else:
            logging.error("You must configure a backend")

    except KeyboardInterrupt:
        logging.info("\n\nReceived Ctrl-C or other break signal. Exiting.\n")
        sys.exit(0)

コード例 #4

0

ファイルを表示

ファイル: p2o.py プロジェクト: jgbarah/GrimoireELK

                ElasticItems.scroll_size = args.scroll_size
            if not args.enrich_only:
                feed_backend(url, clean, args.fetch_cache,
                             args.backend, args.backend_args,
                             args.index, args.index_enrich, args.project,
                             args.arthur)
                logging.info("Backed feed completed")

            if args.enrich or args.enrich_only:
                enrich_backend(url, clean, args.backend, args.backend_args,
                               args.index, args.index_enrich,
                               args.db_projects_map, args.json_projects_map,
                               args.db_sortinghat,
                               args.no_incremental, args.only_identities,
                               args.github_token,
                               args.studies, args.only_studies,
                               args.elastic_url_enrich, args.events_enrich,
                               args.db_user, args.db_password, args.db_host,
                               args.refresh_projects, args.refresh_identities,
                               args.author_id, args.author_uuid,
                               args.filter_raw, args.filters_raw_prefix,
                               args.jenkins_rename_file)
                logging.info("Enrich backend completed")
            elif args.events_enrich:
                logging.info("Enrich option is needed for events_enrich")
        else:
            logging.error("You must configure a backend")

    except KeyboardInterrupt:
        logging.info("\n\nReceived Ctrl-C or other break signal. Exiting.\n")
        sys.exit(0)