Exemple #1
0
 def setUp(self):
     self.db = get_db_cnx(self.index, "monocle.test.1.")
     for dataset in self.datasets:
         index_dataset(self.db, dataset)
     self.otds = [
         OrphanTaskDataForEL(
             _id="https://bugtracker.domain.dom/123",
             task_data=TaskData(
                 crawler_name="mycrawler",
                 updated_at=datetime.strptime(
                     "2020-01-01T00:00:00Z", "%Y-%m-%dT%H:%M:%SZ"
                 ),
                 change_url="https://tests.com/unit/repo1/pull/1",
                 ttype=["BUG"],
                 tid="123",
                 url="https://bugtracker.domain.dom/123",
                 title="It does not work",
             ),
         ),
         OrphanTaskDataForEL(
             _id="https://bugtracker.domain.dom/124",
             task_data=TaskData(
                 crawler_name="mycrawler",
                 updated_at=datetime.strptime(
                     "2020-01-02T00:00:00Z", "%Y-%m-%dT%H:%M:%SZ"
                 ),
                 change_url="https://tests.com/unit/repo1/pull/1",
                 ttype=["BUG"],
                 tid="124",
                 url="https://bugtracker.domain.dom/124",
                 title="It does not work",
             ),
         ),
         OrphanTaskDataForEL(
             _id="https://bugtracker.domain.dom/125",
             task_data=TaskData(
                 crawler_name="mycrawler",
                 updated_at=datetime.strptime(
                     "2020-01-03T00:00:00Z", "%Y-%m-%dT%H:%M:%SZ"
                 ),
                 change_url="https://tests.com/unit/repo2/pull/2",
                 ttype=["BUG"],
                 tid="125",
                 url="https://bugtracker.domain.dom/125",
                 title="It does not work",
             ),
         ),
     ]
Exemple #2
0
    def test_update_change_and_events_with_orphan_tds(self):
        self.otds.append(
            OrphanTaskDataForEL(
                _id="https://bugtracker.domain.dom/126",
                task_data=TaskData(
                    crawler_name="mycrawler",
                    updated_at=datetime.strptime("2020-01-04T00:00:00Z",
                                                 "%Y-%m-%dT%H:%M:%SZ"),
                    change_url="https://tests.com/unit/repomissing/pull/1",
                    ttype=["BUG"],
                    tid="126",
                    url="https://bugtracker.domain.dom/126",
                    title="It does not work",
                ),
            ), )
        self.db.update_task_data(self.otds)
        self.db.update_change_and_events_with_orphan_tds({
            "https://tests.com/unit/repo1/pull/1": ["c1", "c1_e2"],
            "https://tests.com/unit/repo2/pull/2": ["c2"],
            "https://tests.com/unit/repo2/pull/3": ["c3"],
        })
        changes = self.db.get_changes_by_url(
            [
                "https://tests.com/unit/repo1/pull/1",
                "https://tests.com/unit/repo2/pull/2",
                "https://tests.com/unit/repo2/pull/3",
                "https://tests.com/unit/repomissing/pull/1",
            ],
            size=100,
        )
        self.assertEqual(len(changes), 3)
        r1p1 = [c for c in changes if c["url"].endswith("repo1/pull/1")][0]
        r2p2 = [c for c in changes if c["url"].endswith("repo2/pull/2")][0]
        r2p3 = [c for c in changes if c["url"].endswith("repo2/pull/3")][0]

        # Ensure Tasks data are assign to the right changes
        self.assertEqual(len(r1p1["tasks_data"]), 2)
        self.assertEqual(len(r2p2["tasks_data"]), 1)
        self.assertEqual(len(r2p3.get("tasks_data", [])), 0)

        events = self.db.get_change_events_by_url(
            ["https://tests.com/unit/repo1/pull/1"])
        events_with_td = [e for e in events if "tasks_data" in e]
        self.assertEqual(len(events_with_td), 1)
        self.assertEqual(events_with_td[0]["id"], "c1_e2")
        self.assertListEqual(
            sorted([td["tid"] for td in events_with_td[0]["tasks_data"]]),
            sorted(["123", "124"]),
        )

        # Ensure no more orphan Task remain in the DB
        otds = self.db.get_orphan_tds_by_change_urls([
            "https://tests.com/unit/repo1/pull/1",
            "https://tests.com/unit/repo2/pull/2",
            "https://tests.com/unit/repo2/pull/3",
            "https://tests.com/unit/repomissing/pull/1",
        ])
        self.assertEqual(len(otds), 1)
Exemple #3
0
def task_data_add(request: AddRequest) -> AddResponse:
    (error, result) = check_crawler_request(request.index, request.crawler,
                                            request.apikey)
    if error:
        return AddResponse(error=result)
    if not (0 < len(request.items) <= INPUT_TASK_DATA_LIMIT):
        return AddResponse(error=TD.AddFailed)
    extracted_data = request.items
    crawler_config = result
    index = request.index
    # Find changes in EL ids that match urls
    change_urls = [e.change_url for e in extracted_data]
    db = create_db_connection(index)
    mc = db.get_changes_by_url(change_urls, INPUT_TASK_DATA_LIMIT)
    me = db.get_change_events_by_url(change_urls)
    mc = dict([(
        r["url"],
        {
            "id": r["id"],
            "td": createELTaskData(r.get("tasks_data", [])),
        },
    ) for r in mc])
    # Prepare input data set
    update_docs: Any = []
    for input_task_data in extracted_data:
        td = toTaskData(request.crawler, input_task_data)
        if input_task_data.change_url in mc:
            # First check if a td match the input one
            prev_td = [
                td for td in mc[input_task_data.change_url]["td"]
                if td.url == input_task_data.url
            ]
            if len(prev_td) > 1:
                raise RuntimeError("Multiple td match in previous td")
            # Remove the previous outdated one if any
            if prev_td:
                mc[input_task_data.change_url]["td"].remove(prev_td[0])
            # Add the new one to the list
            mc[input_task_data.change_url]["td"].append(td)
        else:
            update_docs.append(
                OrphanTaskDataForEL(_id=input_task_data.url, task_data=td))
    total_orphans_to_update = len(update_docs)
    for _mc in mc.values():
        update_docs.append(TaskDataForEL(
            _id=_mc["id"],
            tasks_data=_mc["td"],
        ))
    total_changes_to_update = len(update_docs) - total_orphans_to_update
    for _me in me:
        update_docs.append(
            TaskDataForEL(_id=_me["id"], tasks_data=mc[_me["url"]]["td"]))
    total_change_events_to_update = (len(update_docs) -
                                     total_orphans_to_update -
                                     total_changes_to_update)
    # Now insert the data
    err = db.update_task_data(source_it=update_docs)
    # https://github.com/elastic/elasticsearch-py/blob/f4447bf996bdee47a0eb4c736bd39dea20a4486e/elasticsearch/helpers/actions.py#L177
    if err:
        return AddResponse(error=TD.AddFailed)
    db.set_task_crawler_metadata(
        crawler_config.name,
        push_infos={
            "last_post_at": datetime.utcnow().replace(microsecond=0),
            "total_docs_posted": len(extracted_data),
            "total_changes_updated": total_changes_to_update,
            "total_change_events_updated": total_change_events_to_update,
            "total_orphans_updated": total_orphans_to_update,
        },
    )
    return AddResponse()
Exemple #4
0
class TestQueries(unittest.TestCase):

    index = "monocle-unittest"
    datasets = [
        "objects/unit_repo1.json",
        "objects/unit_repo2.json",
    ]

    otds = [
        OrphanTaskDataForEL(
            _id="https://bugtracker.domain.dom/123",
            task_data=TaskData(
                crawler_name="mycrawler",
                updated_at=datetime.strptime("2020-01-01T00:00:00Z",
                                             "%Y-%m-%dT%H:%M:%SZ"),
                change_url="https://tests.com/unit/repo1/pull/1",
                ttype=["BUG", "CLIENT_IMPACT"],
                tid="123",
                url="https://bugtracker.domain.dom/123",
                title="It does not work",
                priority="HIGH",
            ),
        ),
        OrphanTaskDataForEL(
            _id="https://bugtracker.domain.dom/124",
            task_data=TaskData(
                crawler_name="mycrawler",
                updated_at=datetime.strptime("2020-01-02T00:00:00Z",
                                             "%Y-%m-%dT%H:%M:%SZ"),
                change_url="https://tests.com/unit/repo1/pull/1",
                ttype=["FutureFeature"],
                tid="124",
                url="https://bugtracker.domain.dom/124",
                title="It does not work",
                priority="MEDIUM",
            ),
        ),
        OrphanTaskDataForEL(
            _id="https://bugtracker.domain.dom/125",
            task_data=TaskData(
                crawler_name="mycrawler",
                updated_at=datetime.strptime("2020-01-03T00:00:00Z",
                                             "%Y-%m-%dT%H:%M:%SZ"),
                change_url="https://tests.com/unit/repo2/pull/2",
                ttype=["BUG", "DOC"],
                tid="125",
                url="https://bugtracker.domain.dom/125",
                title="It does not work",
                priority="LOW",
            ),
        ),
    ]

    @classmethod
    def setUpClass(cls):
        logging.basicConfig(
            level=logging.DEBUG,
            format="%(asctime)s - %(name)s - " + "%(levelname)s - %(message)s",
        )
        log = logging.getLogger(__name__)
        # log to stderr
        log.addHandler(logging.StreamHandler())
        cls.eldb = get_db_cnx(cls.index, "monocle.test.")
        for dataset in cls.datasets:
            index_dataset(cls.eldb, dataset)
        cls.eldb.update_task_data(cls.otds)
        cls.eldb.update_changes_with_orphan_tds({
            "https://tests.com/unit/repo1/pull/1":
            "c1",
            "https://tests.com/unit/repo2/pull/2":
            "c2",
            "https://tests.com/unit/repo2/pull/3":
            "c3",
        })

    @classmethod
    def tearDownClass(cls):
        cls.eldb.es.indices.delete(index=cls.eldb.prefix + cls.index)

    def test_unknown_query(self):
        """
        Test unknown query exception
        """
        params = set_params({})
        self.assertRaises(
            UnknownQueryException,
            self.eldb.run_named_query,
            "unknown",
            "unit/repo1",
            params,
        )

    def test_all_queries(self):
        """
        Test all public queries
        """
        failing = []
        for query in queries.public_queries:
            params = set_params({})
            ret = self.eldb.run_named_query(query, "unit/repo1", params)
            if (not isinstance(ret, dict) and not isinstance(ret, list)
                    and not isinstance(ret, tuple)
                    and not isinstance(ret, int)):
                failing.append((query, ret))
        self.assertEqual(failing, [])

    def test_scan(self):
        """
        Test internal query: _scan
        """
        params = set_params({})
        ret = queries._scan(self.eldb.es, self.eldb.index, "unit/repo1",
                            params)
        ids = [obj["id"] for obj in ret]
        expected = ["c1_e1", "c1_e2", "c1_e3", "c1_e4", "c1_e5"]
        self.assertCountEqual(ids, expected)

    def test_first_created_event(self):
        """
        Test internal query: _first_created_event
        """
        params = set_params({})
        ret = queries._first_created_event(self.eldb.es, self.eldb.index,
                                           "unit/repo1", params)
        self.assertEqual(ret, "2020-01-01T00:00:00Z")

    def test_events_top(self):
        """
        Test internal query: _events_top
        """
        params = set_params({})
        ret = queries._events_top(self.eldb.es, self.eldb.index, "unit/repo1",
                                  "type", params)
        expected = {
            "count_avg":
            1.25,
            "count_median":
            1.0,
            "items": [
                {
                    "doc_count": 2,
                    "key": "ChangeReviewedEvent"
                },
                {
                    "doc_count": 1,
                    "key": "ChangeCommentedEvent"
                },
                {
                    "doc_count": 1,
                    "key": "ChangeCreatedEvent"
                },
                {
                    "doc_count": 1,
                    "key": "ChangeMergedEvent"
                },
            ],
            "total":
            4,
            "total_hits":
            5,
        }
        ddiff = DeepDiff(ret, expected)
        if ddiff:
            raise DiffException(ddiff)

    def test_count_events(self):
        """
        Test query: count_events
        """
        params = set_params({})
        ret = self.eldb.run_named_query("count_events", "unit/repo1", params)
        self.assertEqual(ret, 5)

    def test_count_authors(self):
        """
        Test query: count_authors
        """
        params = set_params({})
        ret = self.eldb.run_named_query("count_authors", "unit/repo1", params)
        self.assertEqual(ret, 2)

        params = set_params({"type": "ChangeCreatedEvent"})
        ret = self.eldb.run_named_query("count_authors", "unit/repo1", params)
        self.assertEqual(ret, 1)

    def test_events_histo(self):
        """
        Test query: events_histo
        """
        params = set_params({"gte": "2020-01-01", "lte": "2020-01-02"})
        ret = self.eldb.run_named_query("events_histo", "unit/repo1", params)
        expected = (
            [
                {
                    "doc_count": 4,
                    "key": 1577836800000,
                    "key_as_string": "2020-01-01"
                },
                {
                    "doc_count": 1,
                    "key": 1577923200000,
                    "key_as_string": "2020-01-02"
                },
            ],
            2.5,
        )
        ddiff = DeepDiff(ret, expected)
        if ddiff:
            raise DiffException(ddiff)

    def test_authors_histo(self):
        """
        Test query: authors_histo
        """
        params = set_params({"gte": "2020-01-01", "lte": "2020-01-02"})
        ret = self.eldb.run_named_query("authors_histo", "unit/repo1", params)
        expected = {
            "avg_authors":
            1.5,
            "buckets": [
                {
                    "authors": ["jane", "john"],
                    "doc_count": 2,
                    "key": 1577836800000,
                    "key_as_string": "2020-01-01",
                },
                {
                    "authors": ["jane"],
                    "doc_count": 1,
                    "key": 1577923200000,
                    "key_as_string": "2020-01-02",
                },
            ],
            "total_authors":
            2,
        }
        ddiff = DeepDiff(ret, expected)
        if ddiff:
            raise DiffException(ddiff)

    def test_events_top_authors(self):
        """
        Test query: events_top_authors
        """
        params = set_params({})
        ret = self.eldb.run_named_query("events_top_authors", "unit/repo1",
                                        params)
        expected = {
            "count_avg":
            2.5,
            "count_median":
            2.5,
            "items": [{
                "doc_count": 3,
                "key": "jane"
            }, {
                "doc_count": 2,
                "key": "john"
            }],
            "total":
            2,
            "total_hits":
            5,
        }
        ddiff = DeepDiff(ret, expected)
        if ddiff:
            raise DiffException(ddiff)

    def test_repos_top_merged(self):
        """
        Test query: repos_top_merged
        """
        params = set_params({"state": "MERGED"})
        ret = self.eldb.run_named_query("repos_top", "unit/repo[12]", params)
        expected = {
            "items": [
                {
                    "key": "unit/repo2",
                    "doc_count": 2
                },
                {
                    "key": "unit/repo1",
                    "doc_count": 1
                },
            ],
            "count_avg":
            1.5,
            "count_median":
            1.5,
            "total":
            2,
            "total_hits":
            3,
        }
        ddiff = DeepDiff(ret, expected)
        if ddiff:
            raise DiffException(ddiff)

    def test_files_param(self):
        """
        Test files param: last_changes
        """
        params = set_params({"files": r".*backend.py"})
        ret = self.eldb.run_named_query("last_changes", ".*", params)
        self.assertEqual(ret["total"], 1, ret)

    def test_state_param(self):
        """
        Test files param: changes_and_events
        """
        params = set_params({"state": "MERGED"})
        ret = self.eldb.run_named_query("changes_and_events", "unit/repo[12]",
                                        params)
        self.assertEqual(ret["total"], 3, ret)

    def test_approvals_param(self):
        """
        Test approvals param: changes_and_events
        """
        params = set_params({
            "approvals": "Code-Review+2",
            "gte": "2020-01-01"
        })
        ret = self.eldb.run_named_query("changes_and_events", "unit/repo[12]",
                                        params)
        self.assertEqual(ret["total"], 2, ret)
        self.assertCountEqual([item["id"] for item in ret["items"]],
                              ["c1", "c1_e4"])

        params = set_params({
            "approvals": "CHANGES_REQUESTED,APPROVED",
            "gte": "2020-01-01"
        })
        ret = self.eldb.run_named_query("changes_and_events", "unit/repo[12]",
                                        params)
        self.assertEqual(ret["total"], 4, ret)
        self.assertCountEqual([item["id"] for item in ret["items"]],
                              ["c2", "c2_e4", "c3", "c3_e2"])

    def test_task_params(self):
        """
        Test task related params
        """
        params = set_params({"task_priority": "HIGH"})
        ret = self.eldb.run_named_query("last_changes", ".*", params)
        self.assertEqual(ret["total"], 1, ret)

        params = set_params({"task_priority": "HIGH,MEDIUM,LOW"})
        ret = self.eldb.run_named_query("last_changes", ".*", params)
        self.assertEqual(ret["total"], 2, ret)

        params = set_params({"task_type": "BUG"})
        ret = self.eldb.run_named_query("last_changes", ".*", params)
        self.assertEqual(ret["total"], 2, ret)

        params = set_params({"task_type": "BUG,CLIENT_IMPACT"})
        ret = self.eldb.run_named_query("last_changes", ".*", params)
        self.assertEqual(ret["total"], 2, ret)

        params = set_params({
            "task_priority": "LOW",
            "task_type": "BUG,CLIENT_IMPACT"
        })
        ret = self.eldb.run_named_query("last_changes", ".*", params)
        self.assertEqual(ret["total"], 1, ret)

    def test_exclude_approvals_param(self):
        """
        Test exclude_approvals param: last_changes
        """
        params = set_params({
            "exclude_approvals": "Verified-1",
            "gte": "2020-01-01"
        })
        ret = self.eldb.run_named_query("last_changes", "unit/repo1", params)
        self.assertEqual(ret["total"], 0, ret)

        params = set_params({
            "approvals": "Code-Review+2",
            "exclude_approvals": "Verified-1",
            "gte": "2020-01-01",
        })
        ret = self.eldb.run_named_query("last_changes", "unit/repo1", params)
        self.assertEqual(ret["total"], 0, ret)

    def test_get_indices(self):
        """
        Test get_indices
        """
        ret = self.eldb.get_indices()
        self.assertEqual(ret, [self.index])

    def test_branch_param(self):
        """
        Test branch param: last_changes
        """
        params = set_params({
            "state": "MERGED",
            "target_branch": "maintainance"
        })
        ret = self.eldb.run_named_query("last_changes", "unit/repo[12]",
                                        params)
        self.assertEqual(ret["total"], 0, ret)
        params = set_params({"target_branch": "master"})
        ret = self.eldb.run_named_query("changes_and_events", "unit/repo[12]",
                                        params)
        ret2 = self.eldb.run_named_query("changes_and_events", "unit/repo[12]",
                                         set_params({}))
        self.assertEqual(ret["total"], ret2["total"])

    def test_change_and_events(self):
        """
        Test change_and_events query
        """
        params = set_params({})
        ret = self.eldb.run_named_query("changes_and_events", "unit/repo1",
                                        params)
        self.assertEqual(ret["total"], 6)
        change = [c for c in ret["items"] if c["type"] == "Change"][0]
        self.assertTrue(change["tests_included"])
        self.assertTrue(change["has_issue_tracker_links"])
        self.assertListEqual(
            change["issue_tracker_links"][0],
            ["#42", "https://github.com/unit/repo1/issues/42"],
        )

    def test_last_changes(self):
        """
        Test last_changes query
        """
        params = set_params({"state": "OPEN"})
        ret = self.eldb.run_named_query("last_changes", "unit/repo[12]",
                                        params)
        self.assertEqual(ret["total"], 1)
        self.assertFalse(ret["items"][0]["tests_included"])

        params = set_params({"state": "MERGED"})
        ret = self.eldb.run_named_query("last_changes", "unit/repo[12]",
                                        params)
        self.assertEqual(ret["total"], 3)
        for change in ret["items"]:
            self.assertIn("tests_included", list(change.keys()))

    def test_self_merged_param(self):
        params = set_params({"state": "MERGED", "self_merged": True})
        ret = self.eldb.run_named_query("last_changes", "unit/repo[12]",
                                        params)
        self.assertEqual(ret["total"], 1)
        self.assertEqual(ret["items"][0]["author"],
                         ret["items"][0]["merged_by"])

    def test_tests_included_param(self):
        """
        Test tests_included param: last_changes
        """
        params = set_params({"tests_included": True})
        ret = self.eldb.run_named_query("last_changes", "unit/repo[12]",
                                        params)
        self.assertEqual(ret["total"], 1, ret)
        params = set_params({})
        ret = self.eldb.run_named_query("last_changes", "unit/repo[12]",
                                        params)
        self.assertEqual(ret["total"], 4, ret)

    def test_has_issue_tracker_links_param(self):
        """
        Test has_issue_tracker_links param: last_changes
        """
        params = set_params({"has_issue_tracker_links": "github.com"})
        ret = self.eldb.run_named_query("last_changes", "unit/repo[12]",
                                        params)
        self.assertEqual(ret["total"], 1, ret)
        params = set_params({})
        ret = self.eldb.run_named_query("last_changes", "unit/repo[12]",
                                        params)
        self.assertEqual(ret["total"], 4, ret)

    def test_changes_lifecycle_stats(self):
        """
        Test changes_lifecycle_stats query
        """
        params = set_params({"gte": "2020-01-01", "lte": "2020-01-03"})
        ret = self.eldb.run_named_query("changes_lifecycle_stats", ".*",
                                        params)
        expected = {
            "ChangeCommitForcePushedEvent": {
                "authors_count": 0,
                "events_count": 0
            },
            "ChangeCommitPushedEvent": {
                "authors_count": 1,
                "events_count": 1
            },
            "ChangeCreatedEvent": {
                "authors_count": 2,
                "events_count": 2
            },
            "abandoned": 0,
            "self_merged": 0,
            "commits": 1.0,
            "duration": 86400.0,
            "duration_variability": 0.0,
            "histos": {
                "ChangeAbandonedEvent": (
                    [
                        {
                            "doc_count": 0,
                            "key": 1577836800000,
                            "key_as_string": "2020-01-01",
                        },
                        {
                            "doc_count": 0,
                            "key": 1577923200000,
                            "key_as_string": "2020-01-02",
                        },
                        {
                            "doc_count": 0,
                            "key": 1578009600000,
                            "key_as_string": "2020-01-03",
                        },
                    ],
                    0,
                ),
                "ChangeCommitForcePushedEvent": (
                    [
                        {
                            "doc_count": 0,
                            "key": 1577836800000,
                            "key_as_string": "2020-01-01",
                        },
                        {
                            "doc_count": 0,
                            "key": 1577923200000,
                            "key_as_string": "2020-01-02",
                        },
                        {
                            "doc_count": 0,
                            "key": 1578009600000,
                            "key_as_string": "2020-01-03",
                        },
                    ],
                    0,
                ),
                "ChangeCommitPushedEvent": (
                    [
                        {
                            "doc_count": 0,
                            "key": 1577836800000,
                            "key_as_string": "2020-01-01",
                        },
                        {
                            "doc_count": 0,
                            "key": 1577923200000,
                            "key_as_string": "2020-01-02",
                        },
                        {
                            "doc_count": 1,
                            "key": 1578009600000,
                            "key_as_string": "2020-01-03",
                        },
                    ],
                    0.3333333333333333,
                ),
                "ChangeCreatedEvent": (
                    [
                        {
                            "doc_count": 1,
                            "key": 1577836800000,
                            "key_as_string": "2020-01-01",
                        },
                        {
                            "doc_count": 0,
                            "key": 1577923200000,
                            "key_as_string": "2020-01-02",
                        },
                        {
                            "doc_count": 1,
                            "key": 1578009600000,
                            "key_as_string": "2020-01-03",
                        },
                    ],
                    0.6666666666666666,
                ),
                "ChangeMergedEvent": (
                    [
                        {
                            "doc_count": 0,
                            "key": 1577836800000,
                            "key_as_string": "2020-01-01",
                        },
                        {
                            "doc_count": 1,
                            "key": 1577923200000,
                            "key_as_string": "2020-01-02",
                        },
                        {
                            "doc_count": 0,
                            "key": 1578009600000,
                            "key_as_string": "2020-01-03",
                        },
                    ],
                    0.3333333333333333,
                ),
            },
            "merged": 1,
            "opened": 1,
            "ratios": {
                "abandoned/created": 0.0,
                "iterations/created": 1.5,
                "merged/created": 50.0,
                "self_merged/created": 0.0,
            },
            "tests": 50.0,
        }

        ddiff = DeepDiff(ret, expected)
        if ddiff:
            raise DiffException(ddiff)

        params = set_params({
            "gte": "2020-01-01",
            "lte": "2020-01-03",
            "authors": "john,jane"
        })
        ret = self.eldb.run_named_query("changes_lifecycle_stats", ".*",
                                        params)
        ddiff = DeepDiff(ret, expected)
        if ddiff:
            raise DiffException(ddiff)

        params = set_params({
            "gte": "2020-01-01",
            "lte": "2020-01-03",
            "authors": "john"
        })
        ret = self.eldb.run_named_query("changes_lifecycle_stats", ".*",
                                        params)
        expected = {
            "ChangeCommitForcePushedEvent": {
                "authors_count": 0,
                "events_count": 0
            },
            "ChangeCommitPushedEvent": {
                "authors_count": 0,
                "events_count": 0
            },
            "ChangeCreatedEvent": {
                "authors_count": 1,
                "events_count": 1
            },
            "abandoned": 0,
            "self_merged": 0,
            "commits": 1.0,
            "duration": 86400.0,
            "duration_variability": 0.0,
            "histos": {
                "ChangeAbandonedEvent": (
                    [
                        {
                            "doc_count": 0,
                            "key": 1577836800000,
                            "key_as_string": "2020-01-01",
                        },
                        {
                            "doc_count": 0,
                            "key": 1577923200000,
                            "key_as_string": "2020-01-02",
                        },
                        {
                            "doc_count": 0,
                            "key": 1578009600000,
                            "key_as_string": "2020-01-03",
                        },
                    ],
                    0,
                ),
                "ChangeCommitForcePushedEvent": (
                    [
                        {
                            "doc_count": 0,
                            "key": 1577836800000,
                            "key_as_string": "2020-01-01",
                        },
                        {
                            "doc_count": 0,
                            "key": 1577923200000,
                            "key_as_string": "2020-01-02",
                        },
                        {
                            "doc_count": 0,
                            "key": 1578009600000,
                            "key_as_string": "2020-01-03",
                        },
                    ],
                    0,
                ),
                "ChangeCommitPushedEvent": (
                    [
                        {
                            "doc_count": 0,
                            "key": 1577836800000,
                            "key_as_string": "2020-01-01",
                        },
                        {
                            "doc_count": 0,
                            "key": 1577923200000,
                            "key_as_string": "2020-01-02",
                        },
                        {
                            "doc_count": 0,
                            "key": 1578009600000,
                            "key_as_string": "2020-01-03",
                        },
                    ],
                    0,
                ),
                "ChangeCreatedEvent": (
                    [
                        {
                            "doc_count": 1,
                            "key": 1577836800000,
                            "key_as_string": "2020-01-01",
                        },
                        {
                            "doc_count": 0,
                            "key": 1577923200000,
                            "key_as_string": "2020-01-02",
                        },
                        {
                            "doc_count": 0,
                            "key": 1578009600000,
                            "key_as_string": "2020-01-03",
                        },
                    ],
                    0.3333333333333333,
                ),
                "ChangeMergedEvent": (
                    [
                        {
                            "doc_count": 0,
                            "key": 1577836800000,
                            "key_as_string": "2020-01-01",
                        },
                        {
                            "doc_count": 1,
                            "key": 1577923200000,
                            "key_as_string": "2020-01-02",
                        },
                        {
                            "doc_count": 0,
                            "key": 1578009600000,
                            "key_as_string": "2020-01-03",
                        },
                    ],
                    0.3333333333333333,
                ),
            },
            "merged": 1,
            "opened": 0,
            "ratios": {
                "abandoned/created": 0.0,
                "iterations/created": 1.0,
                "merged/created": 100.0,
                "self_merged/created": 0.0,
            },
            "tests": 100.0,
        }

        ddiff = DeepDiff(ret, expected)
        if ddiff:
            raise DiffException(ddiff)

    def test_most_active_authors_stats(self):
        """
        Test query: most_active_authors_stats
        """
        params = set_params({})
        ret = self.eldb.run_named_query("most_active_authors_stats", ".*",
                                        params)
        expected = {
            "ChangeCommentedEvent": {
                "count_avg":
                1,
                "count_median":
                1.0,
                "items": [
                    {
                        "doc_count": 1,
                        "key": "jane"
                    },
                    {
                        "doc_count": 1,
                        "key": "steve"
                    },
                ],
                "total":
                2,
                "total_hits":
                2,
            },
            "ChangeCreatedEvent": {
                "count_avg":
                1.3333333333333333,
                "count_median":
                1,
                "items": [
                    {
                        "doc_count": 2,
                        "key": "jane"
                    },
                    {
                        "doc_count": 1,
                        "key": "john"
                    },
                    {
                        "doc_count": 1,
                        "key": "steve"
                    },
                ],
                "total":
                3,
                "total_hits":
                4,
            },
            "ChangeMergedEvent": {
                "count_avg":
                1,
                "count_median":
                1,
                "items": [
                    {
                        "doc_count": 1,
                        "key": "jane"
                    },
                    {
                        "doc_count": 1,
                        "key": "john"
                    },
                    {
                        "doc_count": 1,
                        "key": "steve"
                    },
                ],
                "total":
                3,
                "total_hits":
                3,
            },
            "ChangeReviewedEvent": {
                "count_avg":
                1.3333333333333333,
                "count_median":
                1,
                "items": [
                    {
                        "doc_count": 2,
                        "key": "john"
                    },
                    {
                        "doc_count": 1,
                        "key": "jane"
                    },
                    {
                        "doc_count": 1,
                        "key": "steve"
                    },
                ],
                "total":
                3,
                "total_hits":
                4,
            },
        }

        ddiff = DeepDiff(ret, expected)
        if ddiff:
            raise DiffException(ddiff)

        params = set_params({"authors": "jane"})
        ret = self.eldb.run_named_query("most_active_authors_stats", ".*",
                                        params)
        expected = {
            "ChangeCommentedEvent": {
                "count_avg": 1,
                "count_median": 1,
                "items": [{
                    "doc_count": 1,
                    "key": "jane"
                }],
                "total": 1,
                "total_hits": 1,
            },
            "ChangeCreatedEvent": {
                "count_avg": 2,
                "count_median": 2,
                "items": [{
                    "doc_count": 2,
                    "key": "jane"
                }],
                "total": 1,
                "total_hits": 2,
            },
            "ChangeMergedEvent": {
                "count_avg": 1,
                "count_median": 1,
                "items": [{
                    "doc_count": 1,
                    "key": "jane"
                }],
                "total": 1,
                "total_hits": 1,
            },
            "ChangeReviewedEvent": {
                "count_avg": 1,
                "count_median": 1,
                "items": [{
                    "doc_count": 1,
                    "key": "jane"
                }],
                "total": 1,
                "total_hits": 1,
            },
        }

        ddiff = DeepDiff(ret, expected)
        if ddiff:
            raise DiffException(ddiff)

    def test_repos_summary(self):
        """
        Test query: repos_summary
        """
        params = set_params({})
        ret = self.eldb.run_named_query("repos_summary", ".*", params)
        expected = {
            "summary": {
                "unit/repo1": {
                    "changes": 1,
                    "changes_abandoned": 0,
                    "changes_merged": 1,
                    "changes_open": 0,
                },
                "unit/repo2": {
                    "changes": 3,
                    "changes_abandoned": 0,
                    "changes_merged": 2,
                    "changes_open": 1,
                },
            }
        }

        ddiff = DeepDiff(ret, expected)
        if ddiff:
            raise DiffException(ddiff)
Exemple #5
0
def task_data():
    if request.method == "POST":
        index, crawler_config = task_data_endpoint_check_input_env(
            request, check_auth=True, check_content_type=True)
        json_data: List = request.get_json()
        if not isinstance(json_data, list):
            returnAPIError("Input data is not a List", 400)
        if len(json_data) > INPUT_TASK_DATA_LIMIT:
            returnAPIError(
                "Input data List over limit (%s items)" %
                (INPUT_TASK_DATA_LIMIT),
                400,
            )
        try:
            extracted_data = createInputTaskData(json_data,
                                                 crawler_config.name)
        except Exception as exc:
            returnAPIError(
                "Unable to extract input data due to wrong input format: %s" %
                exc, 400)
        # Find changes in EL ids that match urls
        change_urls = [e.change_url for e in extracted_data]
        db = create_db_connection(index)
        mc = db.get_changes_by_url(change_urls, INPUT_TASK_DATA_LIMIT)
        mc = dict([(
            r["url"],
            {
                "id": r["id"],
                "td": createELTaskData(r.get("tasks_data", [])),
            },
        ) for r in mc])
        # Prepare input data set
        update_docs: List[Union[TaskDataForEL, OrphanTaskDataForEL]] = []
        for input_task_data in extracted_data:
            if input_task_data.change_url in mc:
                # First check if a td match the input one
                prev_td = [
                    td for td in mc[input_task_data.change_url]["td"]
                    if td.url == input_task_data.url
                ]
                if len(prev_td) > 1:
                    raise RuntimeError("Multiple td match in previous td")
                # Remove the previous outdated one if any
                if prev_td:
                    mc[input_task_data.change_url]["td"].remove(prev_td[0])
                # Add the new one to the list
                mc[input_task_data.change_url]["td"].append(input_task_data)
            else:
                update_docs.append(
                    OrphanTaskDataForEL(
                        _id=input_task_data.url,
                        task_data=input_task_data,
                    ))
        total_orphans_to_update = len(update_docs)
        for _mc in mc.values():
            update_docs.append(
                TaskDataForEL(
                    _id=_mc["id"],
                    tasks_data=_mc["td"],
                ))
        total_changes_to_update = len(update_docs) - total_orphans_to_update
        # Now insert the data
        err = db.update_task_data(source_it=update_docs)
        # https://github.com/elastic/elasticsearch-py/blob/f4447bf996bdee47a0eb4c736bd39dea20a4486e/elasticsearch/helpers/actions.py#L177
        if err:
            returnAPIError("Unable to update tasks data", 500, str(err))
        db.set_task_crawler_metadata(
            crawler_config.name,
            push_infos={
                "last_post_at": datetime.utcnow().replace(microsecond=0),
                "total_docs_posted": len(extracted_data),
                "total_changes_updated": total_changes_to_update,
                "total_orphans_updated": total_orphans_to_update,
            },
        )
        return jsonify([])
    if request.method == "GET":
        index, crawler_config = task_data_endpoint_check_input_env(
            request, check_auth=False, check_content_type=False)
        db = create_db_connection(index)
        metadata = db.get_task_crawler_metadata(crawler_config.name)
        if "details" in request.args and request.args.get("details") == "true":
            return jsonify(metadata)
        if not metadata.get("last_commit_at"):
            commit_date = crawler_config.updated_since.strftime(
                "%Y-%m-%dT%H:%M:%S")
        else:
            commit_date = metadata["last_commit_at"]
        return jsonify(commit_date + "Z")