Example #1
0
        def _update_idents(obj: Dict) -> Tuple[Optional[Union[Change, Event]], bool]:

            prev_hash = get_obj_hash(obj)

            if obj["type"] == "Change":
                obj["author"] = update_ident(obj["author"])
                if "committer" in obj:
                    obj["committer"] = update_ident(obj["committer"])
                if "merged_by" in obj:
                    obj["merged_by"] = update_ident(obj["merged_by"])
                if "assignees" in obj:
                    obj["assignees"] = list(map(update_ident, obj["assignees"]))
                if "commits" in obj:
                    for commit in obj["commits"]:
                        commit["author"] = update_ident(commit["author"])
                        commit["committer"] = update_ident(commit["committer"])
            if obj["type"] in get_events_list():
                if "author" in obj:
                    obj["author"] = update_ident(obj["author"])
                if "on_author" in obj:
                    obj["on_author"] = update_ident(obj["on_author"])
            updated = not prev_hash == get_obj_hash(obj)
            if updated:
                return dict_to_change_or_event(obj), True
            else:
                return None, False
Example #2
0
 def test_get_change_events_by_url(self):
     objs = self.db.get_change_events_by_url([
         "https://tests.com/unit/repo1/pull/1",
         "https://tests.com/unit/repo2/pull/2",
     ], )
     total_match = len(objs)
     self.assertEqual(9, total_match)
     self.assertEqual(
         total_match,
         len([o["type"] for o in objs if o["type"] in get_events_list()]),
     )
Example #3
0
def dict_to_change_or_event(d: Dict) -> Union[Change, Event]:
    _type = d["type"]
    for k1, k2 in (("id", "_id"), ("type", "_type")):
        d[k2] = d[k1]
        del d[k1]
    if _type == "Change":
        return from_dict(data_class=Change, data=d)
    elif _type in get_events_list():
        return from_dict(data_class=Event, data=d)
    else:
        raise Exception("Unknown DB item id: %s" % _type)
Example #4
0
def changes_and_events(es, index, repository_fullname, params):
    params = deepcopy(params)
    params["etype"] = [
        "Change",
    ] + get_events_list()
    body = {
        "sort": [{"created_at": {"order": "asc"}}],
        "size": params["size"],
        "from": params["from"],
        "query": generate_filter(es, index, repository_fullname, params),
    }
    data = run_query(es, index, body)
    changes = [r["_source"] for r in data["hits"]["hits"]]
    changes = enhance_changes(changes)
    return {"items": changes, "total": totalc(data["hits"]["total"])}
Example #5
0
def string_ident_to_ident(elastic_conn, index) -> None:
    bulk_size = 7500
    client = ELmonocleDB(elastic_conn, index, previous_schema=True)
    client2 = ELmonocleDB(elastic_conn, index)
    changes_url_lookup: Dict[str, str] = {}
    to_update: List = []
    need_url_update: List[Dict] = []
    total_objects_updated = 0

    def bulk_update(to_update: List) -> None:
        client2.update(to_update)

    def update_changes_url_lookup(objs: List[Dict]) -> None:
        change_ids = [o["change_id"] for o in objs]
        change_ids = list(set(change_ids))
        change_ids = [
            _id for _id in change_ids if _id not in changes_url_lookup
        ]
        print("Updating change_url_lookup for %s changes ..." %
              len(change_ids))
        params = {"change_ids": change_ids, "size": 10000, "from": 0}
        result = client.run_named_query("changes", ".*", params=params)
        changes = result["items"]
        for change in changes:
            changes_url_lookup[change["change_id"]] = utils.strip_url(
                change["url"])
        print("%s entries in changes_url_lookup" % len(changes_url_lookup))

    def update_obj(obj: Dict) -> Dict:

        url = utils.strip_url(obj["url"])

        def update_approval_type(approval):
            if isinstance(approval, str):
                ret = [approval]
            else:
                ret = approval
            return [r for r in ret if r is not None]

        def create_ident_dict(url: str, uid: str) -> Dict:
            domain = urlparse(url).netloc
            uid = prefix(domain, uid)
            return {
                "uid": uid,
                "muid": create_muid_from_uid(uid),
            }

        def to_ident(value: Optional[str]) -> Optional[Dict]:
            if value:
                return create_ident_dict(url, value)
            return None

        if obj["type"] == "Change":
            obj["author"] = to_ident(obj["author"])
            obj["committer"] = to_ident(obj.get("committer"))
            obj["merged_by"] = to_ident(obj.get("merged_by"))
            obj["assignees"] = list(map(to_ident, obj.get("assignees", [])))
            for commit in obj.get("commits", []):
                # Also fix commit's author that might be not exists
                if "author" not in commit.keys():
                    commit["author"] = obj["author"]
                else:
                    commit["author"] = to_ident(commit["author"])
                # Also fix commit's committer that might be not exists
                if "committer" not in commit.keys():
                    commit["committer"] = commit["author"]
                else:
                    commit["committer"] = to_ident(commit["committer"])
        else:
            obj["author"] = to_ident(obj.get("author"))
            obj["on_author"] = to_ident(obj.get("on_author"))
            # Also fix missing created_at date on ChangeCommitPushedEvent
            if obj["type"] == "ChangeCommitPushedEvent" and obj[
                    "created_at"] is None:
                obj["created_at"] = obj["on_created_at"]
        # Also fix approval format if needed
        if obj.get("approval"):
            obj["approval"] = update_approval_type(obj["approval"])
        # Ensure we have the stripped url
        obj["url"] = url

        return obj

    def proceed():
        if need_url_update:
            update_changes_url_lookup(need_url_update)
        for o in to_update:
            if o in need_url_update:
                if o["change_id"] in changes_url_lookup:
                    o["url"] = changes_url_lookup[o["change_id"]]
                else:
                    print("Warning - unable to find change %s" %
                          o["change_id"])
                    o["url"] = "https://undefined"
        updated = list(map(update_obj, to_update))
        print("Updating %s objects ..." % len(to_update))
        bulk_update(list(map(dict_to_change_or_event, updated)))

    for _obj in client.iter_index():
        obj = _obj["_source"]
        if obj["type"] in utils.get_events_list() and "url" not in obj.keys():
            need_url_update.append(obj)
        if obj["type"] in utils.get_events_list() + ["Change"]:
            to_update.append(obj)

        if len(to_update) == bulk_size:
            proceed()
            total_objects_updated += len(to_update)
            print("Total objects updated: %s" % total_objects_updated)
            need_url_update = []
            to_update = []

    proceed()
    total_objects_updated += len(to_update)
    print("Total objects updated: %s" % total_objects_updated)