def test_run(self):
        """Test whether the Task could be run"""
        config = Config(CONF_FILE)
        cfg = config.get_conf()
        # We need to load the projects
        TaskProjects(config).execute()
        backend_section = GIT_BACKEND_SECTION
        task = TaskEnrich(config, backend_section=backend_section)
        self.assertEqual(task.execute(), None)

        # Check that the enrichment went well
        es_collection = cfg['es_collection']['url']
        es_enrichment = cfg['es_enrichment']['url']
        raw_index = es_collection + "/" + cfg[GIT_BACKEND_SECTION]['raw_index']
        enrich_index = es_enrichment + "/" + cfg[GIT_BACKEND_SECTION]['enriched_index']

        r = requests.get(raw_index + "/_search?size=0")
        raw_items = r.json()['hits']['total']
        r = requests.get(enrich_index + "/_search?size=0")
        enriched_items = r.json()['hits']['total']

        # the number of raw items is bigger since the enriched items are generated based on:
        # https://github.com/VizGrimoire/GrimoireLib
        # --filters-raw-prefix data.files.file:grimoirelib_alch data.files.file:README.md
        # see [git] section in tests/test-projects.json
        self.assertGreater(raw_items, enriched_items)
Esempio n. 2
0
def get_enrich(config, backend_section):
    """Execute the enrich phase for a given backend section

    :param config: a Mordred config object
    :param backend_section: the backend section where the enrich phase is executed
    """

    TaskProjects(config).execute()
    task = TaskEnrich(config, backend_section=backend_section)
    task.execute()
    logging.info("Loading enriched data finished!")
    def test_execute_from_archive(self):
        """Test fetching data from archives"""

        # proj_file -> 'test-projects-archive.json' stored within the conf file
        conf_file = 'archives-test.cfg'
        config = Config(conf_file)

        backend_sections = [
            'askbot', 'bugzilla', 'bugzillarest', 'confluence', 'discourse',
            'dockerhub', 'gerrit', 'github:issue', 'github:pull',
            'gitlab:issue', 'gitlab:merge', 'google_hits', 'jenkins', 'jira',
            'mediawiki', 'meetup', 'mozillaclub', 'nntp', 'phabricator',
            'redmine', 'remo', 'rss', 'stackexchange', 'slack', 'telegram',
            'twitter'
        ]

        # We need to load the projects
        TaskProjects(config).execute()
        for backend_section in backend_sections:
            task = TaskRawDataCollection(config,
                                         backend_section=backend_section)
            task.execute()

        for backend_section in backend_sections:
            task = TaskEnrich(config, backend_section=backend_section)
            self.assertEqual(task.execute(), None)
    def test_initialization(self):
        """Test whether attributes are initializated"""

        config = Config(CONF_FILE)
        backend_section = GIT_BACKEND_SECTION
        task = TaskEnrich(config, backend_section=backend_section)

        self.assertEqual(task.config, config)
        self.assertEqual(task.backend_section, backend_section)
def get_enrich(config, backend_section, repos_to_check=None):
    """Execute the enrich phase for a given backend section

    Repos are only checked if they are in BOTH `repos_to_check` and the `projects.json`

    :param config: a Mordred config object
    :param backend_section: the backend section where the enrich phase is executed
    :param repos_to_check: A list of repo URLs to check, or None to check all repos
    """

    TaskProjects(config).execute()
    task = TaskEnrich(config, backend_section=backend_section, allowed_repos=repos_to_check)
    try:
        task.execute()
        logging.info("Loading enriched data finished!")
    except Exception as e:
        logging.error(str(e))
        sys.exit(-1)
    def test_execute(self):
        """Test whether the Task could be run"""
        def setUp(self):
            config = Config(CONF_FILE)
            sh = config.get_conf()['sortinghat']

            self.sh_kwargs = {
                'user': sh['user'],
                'password': sh['password'],
                'database': sh['database'],
                'host': sh['host'],
                'port': None
            }

            # Clean the database to start an empty state

            Database.drop(**self.sh_kwargs)

            # Create command
            Database.create(**self.sh_kwargs)
            self.sh_db = Database(**self.sh_kwargs)

        config = Config(CONF_FILE)
        cfg = config.get_conf()
        # We need to load the projects
        TaskProjects(config).execute()
        backend_section = GIT_BACKEND_SECTION
        task = TaskEnrich(config, backend_section=backend_section)
        self.assertEqual(task.execute(), None)

        # Check that the enrichment went well
        es_collection = cfg['es_collection']['url']
        es_enrichment = cfg['es_enrichment']['url']
        raw_index = es_collection + "/" + cfg[GIT_BACKEND_SECTION]['raw_index']
        enrich_index = es_enrichment + "/" + cfg[GIT_BACKEND_SECTION][
            'enriched_index']

        r = requests.get(raw_index + "/_search?size=0", verify=False)
        raw_items = r.json()['hits']['total']
        r = requests.get(enrich_index + "/_search?size=0", verify=False)
        enriched_items = r.json()['hits']['total']

        self.assertEqual(raw_items, enriched_items)
    def test_execute_no_sh(self):
        """Test whether the Task could be run without SortingHat"""

        config = Config(CONF_FILE_NO_SH)
        cfg = config.get_conf()
        # We need to load the projects
        TaskProjects(config).execute()
        backend_section = GIT_BACKEND_SECTION
        task = TaskEnrich(config, backend_section=backend_section)
        self.assertEqual(task.execute(), None)

        # Check that the enrichment went well
        es_collection = cfg['es_collection']['url']
        es_enrichment = cfg['es_enrichment']['url']
        raw_index = es_collection + "/" + cfg[GIT_BACKEND_SECTION]['raw_index']
        enrich_index = es_enrichment + "/" + cfg[GIT_BACKEND_SECTION][
            'enriched_index']

        r = requests.get(raw_index + "/_search?size=0", verify=False)
        raw_items = r.json()['hits']['total']
        r = requests.get(enrich_index + "/_search?size=0", verify=False)
        enriched_items = r.json()['hits']['total']

        self.assertEqual(raw_items, enriched_items)
    def test_studies(self):
        """Test whether the studies configuration works """
        config = Config(CONF_FILE)
        cfg = config.get_conf()
        # We need to load the projects
        TaskProjects(config).execute()
        backend_section = GIT_BACKEND_SECTION
        task = TaskEnrich(config, backend_section=backend_section)

        # Configure no studies
        cfg.set_param('git', 'studies', None)
        self.assertEqual(task.execute(), None)

        # Configure no studies
        cfg.set_param('git', 'studies', [])
        self.assertEqual(task.execute(), None)

        # Configure a wrong study
        cfg.set_param('git', 'studies', ['bad_study'])
        with self.assertRaises(DataEnrichmentError):
            self.assertEqual(task.execute(), None)

        # Configure several studies
        cfg.set_param('git', 'studies', ['enrich_onion'])
        self.assertEqual(task.execute(), None)

        # Configure several studies
        cfg.set_param('git', 'studies',
                      ['enrich_demography:1', 'enrich_areas_of_code'])
        self.assertEqual(task.execute(), None)

        # Configure kafka kip study
        cfg.set_param('mbox', 'studies', ['kafka_kip'])
        self.assertEqual(task.execute(), None)

        # Configure several studies, one wrong
        cfg.set_param('git', 'studies',
                      ['enrich_demography:1', "enrich_areas_of_code1"])
        with self.assertRaises(DataEnrichmentError):
            self.assertEqual(task.execute(), None)