def test_run(self): """Test whether the Task could be run""" config = Config(CONF_FILE) cfg = config.get_conf() # We need to load the projects TaskProjects(config).execute() backend_section = GIT_BACKEND_SECTION task = TaskEnrich(config, backend_section=backend_section) self.assertEqual(task.execute(), None) # Check that the enrichment went well es_collection = cfg['es_collection']['url'] es_enrichment = cfg['es_enrichment']['url'] raw_index = es_collection + "/" + cfg[GIT_BACKEND_SECTION]['raw_index'] enrich_index = es_enrichment + "/" + cfg[GIT_BACKEND_SECTION]['enriched_index'] r = requests.get(raw_index + "/_search?size=0") raw_items = r.json()['hits']['total'] r = requests.get(enrich_index + "/_search?size=0") enriched_items = r.json()['hits']['total'] # the number of raw items is bigger since the enriched items are generated based on: # https://github.com/VizGrimoire/GrimoireLib # --filters-raw-prefix data.files.file:grimoirelib_alch data.files.file:README.md # see [git] section in tests/test-projects.json self.assertGreater(raw_items, enriched_items)
def get_enrich(config, backend_section): """Execute the enrich phase for a given backend section :param config: a Mordred config object :param backend_section: the backend section where the enrich phase is executed """ TaskProjects(config).execute() task = TaskEnrich(config, backend_section=backend_section) task.execute() logging.info("Loading enriched data finished!")
def test_execute_from_archive(self): """Test fetching data from archives""" # proj_file -> 'test-projects-archive.json' stored within the conf file conf_file = 'archives-test.cfg' config = Config(conf_file) backend_sections = [ 'askbot', 'bugzilla', 'bugzillarest', 'confluence', 'discourse', 'dockerhub', 'gerrit', 'github:issue', 'github:pull', 'gitlab:issue', 'gitlab:merge', 'google_hits', 'jenkins', 'jira', 'mediawiki', 'meetup', 'mozillaclub', 'nntp', 'phabricator', 'redmine', 'remo', 'rss', 'stackexchange', 'slack', 'telegram', 'twitter' ] # We need to load the projects TaskProjects(config).execute() for backend_section in backend_sections: task = TaskRawDataCollection(config, backend_section=backend_section) task.execute() for backend_section in backend_sections: task = TaskEnrich(config, backend_section=backend_section) self.assertEqual(task.execute(), None)
def test_initialization(self): """Test whether attributes are initializated""" config = Config(CONF_FILE) backend_section = GIT_BACKEND_SECTION task = TaskEnrich(config, backend_section=backend_section) self.assertEqual(task.config, config) self.assertEqual(task.backend_section, backend_section)
def get_enrich(config, backend_section, repos_to_check=None): """Execute the enrich phase for a given backend section Repos are only checked if they are in BOTH `repos_to_check` and the `projects.json` :param config: a Mordred config object :param backend_section: the backend section where the enrich phase is executed :param repos_to_check: A list of repo URLs to check, or None to check all repos """ TaskProjects(config).execute() task = TaskEnrich(config, backend_section=backend_section, allowed_repos=repos_to_check) try: task.execute() logging.info("Loading enriched data finished!") except Exception as e: logging.error(str(e)) sys.exit(-1)
def test_execute(self): """Test whether the Task could be run""" def setUp(self): config = Config(CONF_FILE) sh = config.get_conf()['sortinghat'] self.sh_kwargs = { 'user': sh['user'], 'password': sh['password'], 'database': sh['database'], 'host': sh['host'], 'port': None } # Clean the database to start an empty state Database.drop(**self.sh_kwargs) # Create command Database.create(**self.sh_kwargs) self.sh_db = Database(**self.sh_kwargs) config = Config(CONF_FILE) cfg = config.get_conf() # We need to load the projects TaskProjects(config).execute() backend_section = GIT_BACKEND_SECTION task = TaskEnrich(config, backend_section=backend_section) self.assertEqual(task.execute(), None) # Check that the enrichment went well es_collection = cfg['es_collection']['url'] es_enrichment = cfg['es_enrichment']['url'] raw_index = es_collection + "/" + cfg[GIT_BACKEND_SECTION]['raw_index'] enrich_index = es_enrichment + "/" + cfg[GIT_BACKEND_SECTION][ 'enriched_index'] r = requests.get(raw_index + "/_search?size=0", verify=False) raw_items = r.json()['hits']['total'] r = requests.get(enrich_index + "/_search?size=0", verify=False) enriched_items = r.json()['hits']['total'] self.assertEqual(raw_items, enriched_items)
def test_execute_no_sh(self): """Test whether the Task could be run without SortingHat""" config = Config(CONF_FILE_NO_SH) cfg = config.get_conf() # We need to load the projects TaskProjects(config).execute() backend_section = GIT_BACKEND_SECTION task = TaskEnrich(config, backend_section=backend_section) self.assertEqual(task.execute(), None) # Check that the enrichment went well es_collection = cfg['es_collection']['url'] es_enrichment = cfg['es_enrichment']['url'] raw_index = es_collection + "/" + cfg[GIT_BACKEND_SECTION]['raw_index'] enrich_index = es_enrichment + "/" + cfg[GIT_BACKEND_SECTION][ 'enriched_index'] r = requests.get(raw_index + "/_search?size=0", verify=False) raw_items = r.json()['hits']['total'] r = requests.get(enrich_index + "/_search?size=0", verify=False) enriched_items = r.json()['hits']['total'] self.assertEqual(raw_items, enriched_items)
def test_studies(self): """Test whether the studies configuration works """ config = Config(CONF_FILE) cfg = config.get_conf() # We need to load the projects TaskProjects(config).execute() backend_section = GIT_BACKEND_SECTION task = TaskEnrich(config, backend_section=backend_section) # Configure no studies cfg.set_param('git', 'studies', None) self.assertEqual(task.execute(), None) # Configure no studies cfg.set_param('git', 'studies', []) self.assertEqual(task.execute(), None) # Configure a wrong study cfg.set_param('git', 'studies', ['bad_study']) with self.assertRaises(DataEnrichmentError): self.assertEqual(task.execute(), None) # Configure several studies cfg.set_param('git', 'studies', ['enrich_onion']) self.assertEqual(task.execute(), None) # Configure several studies cfg.set_param('git', 'studies', ['enrich_demography:1', 'enrich_areas_of_code']) self.assertEqual(task.execute(), None) # Configure kafka kip study cfg.set_param('mbox', 'studies', ['kafka_kip']) self.assertEqual(task.execute(), None) # Configure several studies, one wrong cfg.set_param('git', 'studies', ['enrich_demography:1', "enrich_areas_of_code1"]) with self.assertRaises(DataEnrichmentError): self.assertEqual(task.execute(), None)