def setUp(self): self.db = get_db_cnx(self.index, "monocle.test.1.") for dataset in self.datasets: index_dataset(self.db, dataset) self.otds = [ OrphanTaskDataForEL( _id="https://bugtracker.domain.dom/123", task_data=TaskData( crawler_name="mycrawler", updated_at=datetime.strptime( "2020-01-01T00:00:00Z", "%Y-%m-%dT%H:%M:%SZ" ), change_url="https://tests.com/unit/repo1/pull/1", ttype=["BUG"], tid="123", url="https://bugtracker.domain.dom/123", title="It does not work", ), ), OrphanTaskDataForEL( _id="https://bugtracker.domain.dom/124", task_data=TaskData( crawler_name="mycrawler", updated_at=datetime.strptime( "2020-01-02T00:00:00Z", "%Y-%m-%dT%H:%M:%SZ" ), change_url="https://tests.com/unit/repo1/pull/1", ttype=["BUG"], tid="124", url="https://bugtracker.domain.dom/124", title="It does not work", ), ), OrphanTaskDataForEL( _id="https://bugtracker.domain.dom/125", task_data=TaskData( crawler_name="mycrawler", updated_at=datetime.strptime( "2020-01-03T00:00:00Z", "%Y-%m-%dT%H:%M:%SZ" ), change_url="https://tests.com/unit/repo2/pull/2", ttype=["BUG"], tid="125", url="https://bugtracker.domain.dom/125", title="It does not work", ), ), ]
def test_update_change_and_events_with_orphan_tds(self): self.otds.append( OrphanTaskDataForEL( _id="https://bugtracker.domain.dom/126", task_data=TaskData( crawler_name="mycrawler", updated_at=datetime.strptime("2020-01-04T00:00:00Z", "%Y-%m-%dT%H:%M:%SZ"), change_url="https://tests.com/unit/repomissing/pull/1", ttype=["BUG"], tid="126", url="https://bugtracker.domain.dom/126", title="It does not work", ), ), ) self.db.update_task_data(self.otds) self.db.update_change_and_events_with_orphan_tds({ "https://tests.com/unit/repo1/pull/1": ["c1", "c1_e2"], "https://tests.com/unit/repo2/pull/2": ["c2"], "https://tests.com/unit/repo2/pull/3": ["c3"], }) changes = self.db.get_changes_by_url( [ "https://tests.com/unit/repo1/pull/1", "https://tests.com/unit/repo2/pull/2", "https://tests.com/unit/repo2/pull/3", "https://tests.com/unit/repomissing/pull/1", ], size=100, ) self.assertEqual(len(changes), 3) r1p1 = [c for c in changes if c["url"].endswith("repo1/pull/1")][0] r2p2 = [c for c in changes if c["url"].endswith("repo2/pull/2")][0] r2p3 = [c for c in changes if c["url"].endswith("repo2/pull/3")][0] # Ensure Tasks data are assign to the right changes self.assertEqual(len(r1p1["tasks_data"]), 2) self.assertEqual(len(r2p2["tasks_data"]), 1) self.assertEqual(len(r2p3.get("tasks_data", [])), 0) events = self.db.get_change_events_by_url( ["https://tests.com/unit/repo1/pull/1"]) events_with_td = [e for e in events if "tasks_data" in e] self.assertEqual(len(events_with_td), 1) self.assertEqual(events_with_td[0]["id"], "c1_e2") self.assertListEqual( sorted([td["tid"] for td in events_with_td[0]["tasks_data"]]), sorted(["123", "124"]), ) # Ensure no more orphan Task remain in the DB otds = self.db.get_orphan_tds_by_change_urls([ "https://tests.com/unit/repo1/pull/1", "https://tests.com/unit/repo2/pull/2", "https://tests.com/unit/repo2/pull/3", "https://tests.com/unit/repomissing/pull/1", ]) self.assertEqual(len(otds), 1)
def task_data_add(request: AddRequest) -> AddResponse: (error, result) = check_crawler_request(request.index, request.crawler, request.apikey) if error: return AddResponse(error=result) if not (0 < len(request.items) <= INPUT_TASK_DATA_LIMIT): return AddResponse(error=TD.AddFailed) extracted_data = request.items crawler_config = result index = request.index # Find changes in EL ids that match urls change_urls = [e.change_url for e in extracted_data] db = create_db_connection(index) mc = db.get_changes_by_url(change_urls, INPUT_TASK_DATA_LIMIT) me = db.get_change_events_by_url(change_urls) mc = dict([( r["url"], { "id": r["id"], "td": createELTaskData(r.get("tasks_data", [])), }, ) for r in mc]) # Prepare input data set update_docs: Any = [] for input_task_data in extracted_data: td = toTaskData(request.crawler, input_task_data) if input_task_data.change_url in mc: # First check if a td match the input one prev_td = [ td for td in mc[input_task_data.change_url]["td"] if td.url == input_task_data.url ] if len(prev_td) > 1: raise RuntimeError("Multiple td match in previous td") # Remove the previous outdated one if any if prev_td: mc[input_task_data.change_url]["td"].remove(prev_td[0]) # Add the new one to the list mc[input_task_data.change_url]["td"].append(td) else: update_docs.append( OrphanTaskDataForEL(_id=input_task_data.url, task_data=td)) total_orphans_to_update = len(update_docs) for _mc in mc.values(): update_docs.append(TaskDataForEL( _id=_mc["id"], tasks_data=_mc["td"], )) total_changes_to_update = len(update_docs) - total_orphans_to_update for _me in me: update_docs.append( TaskDataForEL(_id=_me["id"], tasks_data=mc[_me["url"]]["td"])) total_change_events_to_update = (len(update_docs) - total_orphans_to_update - total_changes_to_update) # Now insert the data err = db.update_task_data(source_it=update_docs) # https://github.com/elastic/elasticsearch-py/blob/f4447bf996bdee47a0eb4c736bd39dea20a4486e/elasticsearch/helpers/actions.py#L177 if err: return AddResponse(error=TD.AddFailed) db.set_task_crawler_metadata( crawler_config.name, push_infos={ "last_post_at": datetime.utcnow().replace(microsecond=0), "total_docs_posted": len(extracted_data), "total_changes_updated": total_changes_to_update, "total_change_events_updated": total_change_events_to_update, "total_orphans_updated": total_orphans_to_update, }, ) return AddResponse()
class TestQueries(unittest.TestCase): index = "monocle-unittest" datasets = [ "objects/unit_repo1.json", "objects/unit_repo2.json", ] otds = [ OrphanTaskDataForEL( _id="https://bugtracker.domain.dom/123", task_data=TaskData( crawler_name="mycrawler", updated_at=datetime.strptime("2020-01-01T00:00:00Z", "%Y-%m-%dT%H:%M:%SZ"), change_url="https://tests.com/unit/repo1/pull/1", ttype=["BUG", "CLIENT_IMPACT"], tid="123", url="https://bugtracker.domain.dom/123", title="It does not work", priority="HIGH", ), ), OrphanTaskDataForEL( _id="https://bugtracker.domain.dom/124", task_data=TaskData( crawler_name="mycrawler", updated_at=datetime.strptime("2020-01-02T00:00:00Z", "%Y-%m-%dT%H:%M:%SZ"), change_url="https://tests.com/unit/repo1/pull/1", ttype=["FutureFeature"], tid="124", url="https://bugtracker.domain.dom/124", title="It does not work", priority="MEDIUM", ), ), OrphanTaskDataForEL( _id="https://bugtracker.domain.dom/125", task_data=TaskData( crawler_name="mycrawler", updated_at=datetime.strptime("2020-01-03T00:00:00Z", "%Y-%m-%dT%H:%M:%SZ"), change_url="https://tests.com/unit/repo2/pull/2", ttype=["BUG", "DOC"], tid="125", url="https://bugtracker.domain.dom/125", title="It does not work", priority="LOW", ), ), ] @classmethod def setUpClass(cls): logging.basicConfig( level=logging.DEBUG, format="%(asctime)s - %(name)s - " + "%(levelname)s - %(message)s", ) log = logging.getLogger(__name__) # log to stderr log.addHandler(logging.StreamHandler()) cls.eldb = get_db_cnx(cls.index, "monocle.test.") for dataset in cls.datasets: index_dataset(cls.eldb, dataset) cls.eldb.update_task_data(cls.otds) cls.eldb.update_changes_with_orphan_tds({ "https://tests.com/unit/repo1/pull/1": "c1", "https://tests.com/unit/repo2/pull/2": "c2", "https://tests.com/unit/repo2/pull/3": "c3", }) @classmethod def tearDownClass(cls): cls.eldb.es.indices.delete(index=cls.eldb.prefix + cls.index) def test_unknown_query(self): """ Test unknown query exception """ params = set_params({}) self.assertRaises( UnknownQueryException, self.eldb.run_named_query, "unknown", "unit/repo1", params, ) def test_all_queries(self): """ Test all public queries """ failing = [] for query in queries.public_queries: params = set_params({}) ret = self.eldb.run_named_query(query, "unit/repo1", params) if (not isinstance(ret, dict) and not isinstance(ret, list) and not isinstance(ret, tuple) and not isinstance(ret, int)): failing.append((query, ret)) self.assertEqual(failing, []) def test_scan(self): """ Test internal query: _scan """ params = set_params({}) ret = queries._scan(self.eldb.es, self.eldb.index, "unit/repo1", params) ids = [obj["id"] for obj in ret] expected = ["c1_e1", "c1_e2", "c1_e3", "c1_e4", "c1_e5"] self.assertCountEqual(ids, expected) def test_first_created_event(self): """ Test internal query: _first_created_event """ params = set_params({}) ret = queries._first_created_event(self.eldb.es, self.eldb.index, "unit/repo1", params) self.assertEqual(ret, "2020-01-01T00:00:00Z") def test_events_top(self): """ Test internal query: _events_top """ params = set_params({}) ret = queries._events_top(self.eldb.es, self.eldb.index, "unit/repo1", "type", params) expected = { "count_avg": 1.25, "count_median": 1.0, "items": [ { "doc_count": 2, "key": "ChangeReviewedEvent" }, { "doc_count": 1, "key": "ChangeCommentedEvent" }, { "doc_count": 1, "key": "ChangeCreatedEvent" }, { "doc_count": 1, "key": "ChangeMergedEvent" }, ], "total": 4, "total_hits": 5, } ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff) def test_count_events(self): """ Test query: count_events """ params = set_params({}) ret = self.eldb.run_named_query("count_events", "unit/repo1", params) self.assertEqual(ret, 5) def test_count_authors(self): """ Test query: count_authors """ params = set_params({}) ret = self.eldb.run_named_query("count_authors", "unit/repo1", params) self.assertEqual(ret, 2) params = set_params({"type": "ChangeCreatedEvent"}) ret = self.eldb.run_named_query("count_authors", "unit/repo1", params) self.assertEqual(ret, 1) def test_events_histo(self): """ Test query: events_histo """ params = set_params({"gte": "2020-01-01", "lte": "2020-01-02"}) ret = self.eldb.run_named_query("events_histo", "unit/repo1", params) expected = ( [ { "doc_count": 4, "key": 1577836800000, "key_as_string": "2020-01-01" }, { "doc_count": 1, "key": 1577923200000, "key_as_string": "2020-01-02" }, ], 2.5, ) ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff) def test_authors_histo(self): """ Test query: authors_histo """ params = set_params({"gte": "2020-01-01", "lte": "2020-01-02"}) ret = self.eldb.run_named_query("authors_histo", "unit/repo1", params) expected = { "avg_authors": 1.5, "buckets": [ { "authors": ["jane", "john"], "doc_count": 2, "key": 1577836800000, "key_as_string": "2020-01-01", }, { "authors": ["jane"], "doc_count": 1, "key": 1577923200000, "key_as_string": "2020-01-02", }, ], "total_authors": 2, } ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff) def test_events_top_authors(self): """ Test query: events_top_authors """ params = set_params({}) ret = self.eldb.run_named_query("events_top_authors", "unit/repo1", params) expected = { "count_avg": 2.5, "count_median": 2.5, "items": [{ "doc_count": 3, "key": "jane" }, { "doc_count": 2, "key": "john" }], "total": 2, "total_hits": 5, } ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff) def test_repos_top_merged(self): """ Test query: repos_top_merged """ params = set_params({"state": "MERGED"}) ret = self.eldb.run_named_query("repos_top", "unit/repo[12]", params) expected = { "items": [ { "key": "unit/repo2", "doc_count": 2 }, { "key": "unit/repo1", "doc_count": 1 }, ], "count_avg": 1.5, "count_median": 1.5, "total": 2, "total_hits": 3, } ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff) def test_files_param(self): """ Test files param: last_changes """ params = set_params({"files": r".*backend.py"}) ret = self.eldb.run_named_query("last_changes", ".*", params) self.assertEqual(ret["total"], 1, ret) def test_state_param(self): """ Test files param: changes_and_events """ params = set_params({"state": "MERGED"}) ret = self.eldb.run_named_query("changes_and_events", "unit/repo[12]", params) self.assertEqual(ret["total"], 3, ret) def test_approvals_param(self): """ Test approvals param: changes_and_events """ params = set_params({ "approvals": "Code-Review+2", "gte": "2020-01-01" }) ret = self.eldb.run_named_query("changes_and_events", "unit/repo[12]", params) self.assertEqual(ret["total"], 2, ret) self.assertCountEqual([item["id"] for item in ret["items"]], ["c1", "c1_e4"]) params = set_params({ "approvals": "CHANGES_REQUESTED,APPROVED", "gte": "2020-01-01" }) ret = self.eldb.run_named_query("changes_and_events", "unit/repo[12]", params) self.assertEqual(ret["total"], 4, ret) self.assertCountEqual([item["id"] for item in ret["items"]], ["c2", "c2_e4", "c3", "c3_e2"]) def test_task_params(self): """ Test task related params """ params = set_params({"task_priority": "HIGH"}) ret = self.eldb.run_named_query("last_changes", ".*", params) self.assertEqual(ret["total"], 1, ret) params = set_params({"task_priority": "HIGH,MEDIUM,LOW"}) ret = self.eldb.run_named_query("last_changes", ".*", params) self.assertEqual(ret["total"], 2, ret) params = set_params({"task_type": "BUG"}) ret = self.eldb.run_named_query("last_changes", ".*", params) self.assertEqual(ret["total"], 2, ret) params = set_params({"task_type": "BUG,CLIENT_IMPACT"}) ret = self.eldb.run_named_query("last_changes", ".*", params) self.assertEqual(ret["total"], 2, ret) params = set_params({ "task_priority": "LOW", "task_type": "BUG,CLIENT_IMPACT" }) ret = self.eldb.run_named_query("last_changes", ".*", params) self.assertEqual(ret["total"], 1, ret) def test_exclude_approvals_param(self): """ Test exclude_approvals param: last_changes """ params = set_params({ "exclude_approvals": "Verified-1", "gte": "2020-01-01" }) ret = self.eldb.run_named_query("last_changes", "unit/repo1", params) self.assertEqual(ret["total"], 0, ret) params = set_params({ "approvals": "Code-Review+2", "exclude_approvals": "Verified-1", "gte": "2020-01-01", }) ret = self.eldb.run_named_query("last_changes", "unit/repo1", params) self.assertEqual(ret["total"], 0, ret) def test_get_indices(self): """ Test get_indices """ ret = self.eldb.get_indices() self.assertEqual(ret, [self.index]) def test_branch_param(self): """ Test branch param: last_changes """ params = set_params({ "state": "MERGED", "target_branch": "maintainance" }) ret = self.eldb.run_named_query("last_changes", "unit/repo[12]", params) self.assertEqual(ret["total"], 0, ret) params = set_params({"target_branch": "master"}) ret = self.eldb.run_named_query("changes_and_events", "unit/repo[12]", params) ret2 = self.eldb.run_named_query("changes_and_events", "unit/repo[12]", set_params({})) self.assertEqual(ret["total"], ret2["total"]) def test_change_and_events(self): """ Test change_and_events query """ params = set_params({}) ret = self.eldb.run_named_query("changes_and_events", "unit/repo1", params) self.assertEqual(ret["total"], 6) change = [c for c in ret["items"] if c["type"] == "Change"][0] self.assertTrue(change["tests_included"]) self.assertTrue(change["has_issue_tracker_links"]) self.assertListEqual( change["issue_tracker_links"][0], ["#42", "https://github.com/unit/repo1/issues/42"], ) def test_last_changes(self): """ Test last_changes query """ params = set_params({"state": "OPEN"}) ret = self.eldb.run_named_query("last_changes", "unit/repo[12]", params) self.assertEqual(ret["total"], 1) self.assertFalse(ret["items"][0]["tests_included"]) params = set_params({"state": "MERGED"}) ret = self.eldb.run_named_query("last_changes", "unit/repo[12]", params) self.assertEqual(ret["total"], 3) for change in ret["items"]: self.assertIn("tests_included", list(change.keys())) def test_self_merged_param(self): params = set_params({"state": "MERGED", "self_merged": True}) ret = self.eldb.run_named_query("last_changes", "unit/repo[12]", params) self.assertEqual(ret["total"], 1) self.assertEqual(ret["items"][0]["author"], ret["items"][0]["merged_by"]) def test_tests_included_param(self): """ Test tests_included param: last_changes """ params = set_params({"tests_included": True}) ret = self.eldb.run_named_query("last_changes", "unit/repo[12]", params) self.assertEqual(ret["total"], 1, ret) params = set_params({}) ret = self.eldb.run_named_query("last_changes", "unit/repo[12]", params) self.assertEqual(ret["total"], 4, ret) def test_has_issue_tracker_links_param(self): """ Test has_issue_tracker_links param: last_changes """ params = set_params({"has_issue_tracker_links": "github.com"}) ret = self.eldb.run_named_query("last_changes", "unit/repo[12]", params) self.assertEqual(ret["total"], 1, ret) params = set_params({}) ret = self.eldb.run_named_query("last_changes", "unit/repo[12]", params) self.assertEqual(ret["total"], 4, ret) def test_changes_lifecycle_stats(self): """ Test changes_lifecycle_stats query """ params = set_params({"gte": "2020-01-01", "lte": "2020-01-03"}) ret = self.eldb.run_named_query("changes_lifecycle_stats", ".*", params) expected = { "ChangeCommitForcePushedEvent": { "authors_count": 0, "events_count": 0 }, "ChangeCommitPushedEvent": { "authors_count": 1, "events_count": 1 }, "ChangeCreatedEvent": { "authors_count": 2, "events_count": 2 }, "abandoned": 0, "self_merged": 0, "commits": 1.0, "duration": 86400.0, "duration_variability": 0.0, "histos": { "ChangeAbandonedEvent": ( [ { "doc_count": 0, "key": 1577836800000, "key_as_string": "2020-01-01", }, { "doc_count": 0, "key": 1577923200000, "key_as_string": "2020-01-02", }, { "doc_count": 0, "key": 1578009600000, "key_as_string": "2020-01-03", }, ], 0, ), "ChangeCommitForcePushedEvent": ( [ { "doc_count": 0, "key": 1577836800000, "key_as_string": "2020-01-01", }, { "doc_count": 0, "key": 1577923200000, "key_as_string": "2020-01-02", }, { "doc_count": 0, "key": 1578009600000, "key_as_string": "2020-01-03", }, ], 0, ), "ChangeCommitPushedEvent": ( [ { "doc_count": 0, "key": 1577836800000, "key_as_string": "2020-01-01", }, { "doc_count": 0, "key": 1577923200000, "key_as_string": "2020-01-02", }, { "doc_count": 1, "key": 1578009600000, "key_as_string": "2020-01-03", }, ], 0.3333333333333333, ), "ChangeCreatedEvent": ( [ { "doc_count": 1, "key": 1577836800000, "key_as_string": "2020-01-01", }, { "doc_count": 0, "key": 1577923200000, "key_as_string": "2020-01-02", }, { "doc_count": 1, "key": 1578009600000, "key_as_string": "2020-01-03", }, ], 0.6666666666666666, ), "ChangeMergedEvent": ( [ { "doc_count": 0, "key": 1577836800000, "key_as_string": "2020-01-01", }, { "doc_count": 1, "key": 1577923200000, "key_as_string": "2020-01-02", }, { "doc_count": 0, "key": 1578009600000, "key_as_string": "2020-01-03", }, ], 0.3333333333333333, ), }, "merged": 1, "opened": 1, "ratios": { "abandoned/created": 0.0, "iterations/created": 1.5, "merged/created": 50.0, "self_merged/created": 0.0, }, "tests": 50.0, } ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff) params = set_params({ "gte": "2020-01-01", "lte": "2020-01-03", "authors": "john,jane" }) ret = self.eldb.run_named_query("changes_lifecycle_stats", ".*", params) ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff) params = set_params({ "gte": "2020-01-01", "lte": "2020-01-03", "authors": "john" }) ret = self.eldb.run_named_query("changes_lifecycle_stats", ".*", params) expected = { "ChangeCommitForcePushedEvent": { "authors_count": 0, "events_count": 0 }, "ChangeCommitPushedEvent": { "authors_count": 0, "events_count": 0 }, "ChangeCreatedEvent": { "authors_count": 1, "events_count": 1 }, "abandoned": 0, "self_merged": 0, "commits": 1.0, "duration": 86400.0, "duration_variability": 0.0, "histos": { "ChangeAbandonedEvent": ( [ { "doc_count": 0, "key": 1577836800000, "key_as_string": "2020-01-01", }, { "doc_count": 0, "key": 1577923200000, "key_as_string": "2020-01-02", }, { "doc_count": 0, "key": 1578009600000, "key_as_string": "2020-01-03", }, ], 0, ), "ChangeCommitForcePushedEvent": ( [ { "doc_count": 0, "key": 1577836800000, "key_as_string": "2020-01-01", }, { "doc_count": 0, "key": 1577923200000, "key_as_string": "2020-01-02", }, { "doc_count": 0, "key": 1578009600000, "key_as_string": "2020-01-03", }, ], 0, ), "ChangeCommitPushedEvent": ( [ { "doc_count": 0, "key": 1577836800000, "key_as_string": "2020-01-01", }, { "doc_count": 0, "key": 1577923200000, "key_as_string": "2020-01-02", }, { "doc_count": 0, "key": 1578009600000, "key_as_string": "2020-01-03", }, ], 0, ), "ChangeCreatedEvent": ( [ { "doc_count": 1, "key": 1577836800000, "key_as_string": "2020-01-01", }, { "doc_count": 0, "key": 1577923200000, "key_as_string": "2020-01-02", }, { "doc_count": 0, "key": 1578009600000, "key_as_string": "2020-01-03", }, ], 0.3333333333333333, ), "ChangeMergedEvent": ( [ { "doc_count": 0, "key": 1577836800000, "key_as_string": "2020-01-01", }, { "doc_count": 1, "key": 1577923200000, "key_as_string": "2020-01-02", }, { "doc_count": 0, "key": 1578009600000, "key_as_string": "2020-01-03", }, ], 0.3333333333333333, ), }, "merged": 1, "opened": 0, "ratios": { "abandoned/created": 0.0, "iterations/created": 1.0, "merged/created": 100.0, "self_merged/created": 0.0, }, "tests": 100.0, } ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff) def test_most_active_authors_stats(self): """ Test query: most_active_authors_stats """ params = set_params({}) ret = self.eldb.run_named_query("most_active_authors_stats", ".*", params) expected = { "ChangeCommentedEvent": { "count_avg": 1, "count_median": 1.0, "items": [ { "doc_count": 1, "key": "jane" }, { "doc_count": 1, "key": "steve" }, ], "total": 2, "total_hits": 2, }, "ChangeCreatedEvent": { "count_avg": 1.3333333333333333, "count_median": 1, "items": [ { "doc_count": 2, "key": "jane" }, { "doc_count": 1, "key": "john" }, { "doc_count": 1, "key": "steve" }, ], "total": 3, "total_hits": 4, }, "ChangeMergedEvent": { "count_avg": 1, "count_median": 1, "items": [ { "doc_count": 1, "key": "jane" }, { "doc_count": 1, "key": "john" }, { "doc_count": 1, "key": "steve" }, ], "total": 3, "total_hits": 3, }, "ChangeReviewedEvent": { "count_avg": 1.3333333333333333, "count_median": 1, "items": [ { "doc_count": 2, "key": "john" }, { "doc_count": 1, "key": "jane" }, { "doc_count": 1, "key": "steve" }, ], "total": 3, "total_hits": 4, }, } ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff) params = set_params({"authors": "jane"}) ret = self.eldb.run_named_query("most_active_authors_stats", ".*", params) expected = { "ChangeCommentedEvent": { "count_avg": 1, "count_median": 1, "items": [{ "doc_count": 1, "key": "jane" }], "total": 1, "total_hits": 1, }, "ChangeCreatedEvent": { "count_avg": 2, "count_median": 2, "items": [{ "doc_count": 2, "key": "jane" }], "total": 1, "total_hits": 2, }, "ChangeMergedEvent": { "count_avg": 1, "count_median": 1, "items": [{ "doc_count": 1, "key": "jane" }], "total": 1, "total_hits": 1, }, "ChangeReviewedEvent": { "count_avg": 1, "count_median": 1, "items": [{ "doc_count": 1, "key": "jane" }], "total": 1, "total_hits": 1, }, } ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff) def test_repos_summary(self): """ Test query: repos_summary """ params = set_params({}) ret = self.eldb.run_named_query("repos_summary", ".*", params) expected = { "summary": { "unit/repo1": { "changes": 1, "changes_abandoned": 0, "changes_merged": 1, "changes_open": 0, }, "unit/repo2": { "changes": 3, "changes_abandoned": 0, "changes_merged": 2, "changes_open": 1, }, } } ddiff = DeepDiff(ret, expected) if ddiff: raise DiffException(ddiff)
def task_data(): if request.method == "POST": index, crawler_config = task_data_endpoint_check_input_env( request, check_auth=True, check_content_type=True) json_data: List = request.get_json() if not isinstance(json_data, list): returnAPIError("Input data is not a List", 400) if len(json_data) > INPUT_TASK_DATA_LIMIT: returnAPIError( "Input data List over limit (%s items)" % (INPUT_TASK_DATA_LIMIT), 400, ) try: extracted_data = createInputTaskData(json_data, crawler_config.name) except Exception as exc: returnAPIError( "Unable to extract input data due to wrong input format: %s" % exc, 400) # Find changes in EL ids that match urls change_urls = [e.change_url for e in extracted_data] db = create_db_connection(index) mc = db.get_changes_by_url(change_urls, INPUT_TASK_DATA_LIMIT) mc = dict([( r["url"], { "id": r["id"], "td": createELTaskData(r.get("tasks_data", [])), }, ) for r in mc]) # Prepare input data set update_docs: List[Union[TaskDataForEL, OrphanTaskDataForEL]] = [] for input_task_data in extracted_data: if input_task_data.change_url in mc: # First check if a td match the input one prev_td = [ td for td in mc[input_task_data.change_url]["td"] if td.url == input_task_data.url ] if len(prev_td) > 1: raise RuntimeError("Multiple td match in previous td") # Remove the previous outdated one if any if prev_td: mc[input_task_data.change_url]["td"].remove(prev_td[0]) # Add the new one to the list mc[input_task_data.change_url]["td"].append(input_task_data) else: update_docs.append( OrphanTaskDataForEL( _id=input_task_data.url, task_data=input_task_data, )) total_orphans_to_update = len(update_docs) for _mc in mc.values(): update_docs.append( TaskDataForEL( _id=_mc["id"], tasks_data=_mc["td"], )) total_changes_to_update = len(update_docs) - total_orphans_to_update # Now insert the data err = db.update_task_data(source_it=update_docs) # https://github.com/elastic/elasticsearch-py/blob/f4447bf996bdee47a0eb4c736bd39dea20a4486e/elasticsearch/helpers/actions.py#L177 if err: returnAPIError("Unable to update tasks data", 500, str(err)) db.set_task_crawler_metadata( crawler_config.name, push_infos={ "last_post_at": datetime.utcnow().replace(microsecond=0), "total_docs_posted": len(extracted_data), "total_changes_updated": total_changes_to_update, "total_orphans_updated": total_orphans_to_update, }, ) return jsonify([]) if request.method == "GET": index, crawler_config = task_data_endpoint_check_input_env( request, check_auth=False, check_content_type=False) db = create_db_connection(index) metadata = db.get_task_crawler_metadata(crawler_config.name) if "details" in request.args and request.args.get("details") == "true": return jsonify(metadata) if not metadata.get("last_commit_at"): commit_date = crawler_config.updated_since.strftime( "%Y-%m-%dT%H:%M:%S") else: commit_date = metadata["last_commit_at"] return jsonify(commit_date + "Z")