def test_demography_study(self): """ Test that the demography study works correctly """ study, ocean_backend, enrich_backend = self._test_study('enrich_demography') with self.assertLogs(logger, level='INFO') as cm: if study.__name__ == "enrich_demography": study(ocean_backend, enrich_backend, date_field="grimoire_creation_date") self.assertEqual(cm.output[0], 'INFO:grimoire_elk.enriched.enrich:[gerrit] ' 'Demography starting study %s/test_gerrit_enrich' % anonymize_url(self.es_con)) self.assertEqual(cm.output[-1], 'INFO:grimoire_elk.enriched.enrich:[gerrit] Demography end %s/test_gerrit_enrich' % anonymize_url(self.es_con)) time.sleep(5) # HACK: Wait until git enrich index has been written items = [i for i in enrich_backend.fetch()] for item in items: if item['type'] == 'patchset' and item['patchset_author_name'] is None: self.assertFalse('demography_min_date' in item.keys()) self.assertFalse('demography_max_date' in item.keys()) elif 'reviewer_uuid' in item: self.assertTrue('demography_min_date' in item.keys()) self.assertTrue('demography_max_date' in item.keys()) r = enrich_backend.elastic.requests.get(enrich_backend.elastic.index_url + "/_alias", headers=HEADER_JSON, verify=False) self.assertIn(DEMOGRAPHICS_ALIAS, r.json()[enrich_backend.elastic.index]['aliases'])
def test_demography_study(self): """ Test that the demography study works correctly """ study, ocean_backend, enrich_backend = self._test_study('enrich_demography') with self.assertLogs(logger, level='INFO') as cm: if study.__name__ == "enrich_demography": study(ocean_backend, enrich_backend, date_field="utc_commit") self.assertEqual(cm.output[0], 'INFO:grimoire_elk.enriched.enrich:[git] Demography ' 'starting study %s/test_git_enrich' % anonymize_url(self.es_con)) self.assertEqual(cm.output[-1], 'INFO:grimoire_elk.enriched.enrich:[git] Demography ' 'end %s/test_git_enrich' % anonymize_url(self.es_con)) time.sleep(5) # HACK: Wait until git enrich index has been written items = [item for item in enrich_backend.fetch()] self.assertEqual(len(items), 11) for item in items: self.assertTrue('demography_min_date' in item.keys()) self.assertTrue('demography_max_date' in item.keys()) self.assertNotIn('username:password', item['origin']) self.assertNotIn('username:password', item['tag']) r = enrich_backend.elastic.requests.get(enrich_backend.elastic.index_url + "/_alias", headers=HEADER_JSON, verify=False) self.assertIn(DEMOGRAPHICS_ALIAS, r.json()[enrich_backend.elastic.index]['aliases'])
def test_add_alias(self): """Test whether add_alias properly works""" config = configparser.ConfigParser() config.read(CONFIG_FILE) es_con = dict(config.items('ElasticSearch'))['url'] tmp_index = "test-add-aliases" tmp_index_url = es_con + "/" + tmp_index enrich_backend = get_connectors()["git"][2]() elastic_enrich = get_elastic(es_con, tmp_index, True, enrich_backend) self._enrich.set_elastic(elastic_enrich) # add alias with self.assertLogs(logger, level='INFO') as cm: self._enrich.elastic.add_alias(DEMOGRAPHICS_ALIAS) self.assertEqual(cm.output[0], 'INFO:grimoire_elk.elastic:Alias %s created on %s.' % (DEMOGRAPHICS_ALIAS, anonymize_url(tmp_index_url))) r = self._enrich.requests.get(self._enrich.elastic.index_url + "/_alias", headers=HEADER_JSON, verify=False) self.assertIn(DEMOGRAPHICS_ALIAS, r.json()[self._enrich.elastic.index]['aliases']) # add alias again with self.assertLogs(logger, level='DEBUG') as cm: self._enrich.elastic.add_alias(DEMOGRAPHICS_ALIAS) self.assertEqual(cm.output[0], 'DEBUG:grimoire_elk.elastic:Alias %s already exists on %s.' % (DEMOGRAPHICS_ALIAS, anonymize_url(tmp_index_url))) requests.delete(tmp_index_url, verify=False)
def enrich_items(self, ocean_backend): max_items = self.elastic.max_items_bulk current = 0 total = 0 bulk_json = "" url = self.elastic.get_bulk_url() logger.debug("[mozillaclub] Adding items to {} (in {} packs)".format( anonymize_url(url), max_items)) items = ocean_backend.fetch() for item in items: if current >= max_items: total += self.elastic.safe_put_bulk(url, bulk_json) bulk_json = "" current = 0 rich_item = self.get_rich_item(item) data_json = json.dumps(rich_item) bulk_json += '{"index" : {"_id" : "%s" } }\n' % \ (item[self.get_field_unique_id()]) bulk_json += data_json + "\n" # Bulk document current += 1 if current > 0: total += self.elastic.safe_put_bulk(url, bulk_json) return total
def test_demography_contribution_study(self): """ Test that the demography contribution study works correctly """ study, ocean_backend, enrich_backend = self._test_study( 'enrich_demography_contribution') with self.assertLogs(logger, level='INFO') as cm: if study.__name__ == "enrich_demography_contribution": study(ocean_backend, enrich_backend, date_field="grimoire_creation_date") self.assertEqual( cm.output[0], 'INFO:grimoire_elk.enriched.enrich:[gerrit] ' 'Demography Contribution starting study %s/test_gerrit_enrich' % anonymize_url(self.es_con)) self.assertEqual( cm.output[-1], 'INFO:grimoire_elk.enriched.enrich:[gerrit] Demography Contribution end %s/test_gerrit_enrich' % anonymize_url(self.es_con)) time.sleep(5) # HACK: Wait until gerrit enrich index has been written items = [i for i in enrich_backend.fetch()] for item in items: if 'author_uuid' not in item: continue field_type = item['type'] if field_type == 'approval': self.assertIn('approval_min_date', item) self.assertIn('approval_max_date', item) elif field_type == 'changeset': self.assertIn('changeset_min_date', item) self.assertIn('changeset_max_date', item) elif field_type == 'comment': self.assertIn('comment_min_date', item) self.assertIn('comment_max_date', item) elif field_type == 'patchset': self.assertIn('patchset_min_date', item) self.assertIn('patchset_max_date', item) r = enrich_backend.elastic.requests.get( enrich_backend.elastic.index_url + "/_alias", headers=HEADER_JSON, verify=False) self.assertIn(DEMOGRAPHICS_CONTRIBUTION_ALIAS, r.json()[enrich_backend.elastic.index]['aliases'])