Exemple #1
0
def process_pull_request_file(
    prf_data,
    via="webhook",
    fetched_at=None,
    commit=True,
    pull_request_id=None,
):
    sha = prf_data.get("sha")
    if not sha:
        # This indicates a moved file: for example, moving /tmp/a.txt
        # to /tmp/b.txt. I don't know why Github marks moved files this
        # way, but it's not actually an error.
        raise NothingToDo("no pull request file SHA")

    pr_id = pull_request_id
    if not pr_id:
        raise MissingData("no pull_request_id", obj=prf_data)

    # fetch the object from the database,
    # or create it if it doesn't exist in the DB
    prf = PullRequestFile.query.get((pr_id, sha))
    if not prf:
        prf = PullRequestFile(sha=sha, pull_request_id=pr_id)

    # should we update the object?
    fetched_at = fetched_at or datetime.now()
    if prf.last_replicated_at > fetched_at:
        raise StaleData()

    # update the object
    fields = (
        "filename",
        "status",
        "additions",
        "deletions",
        "changes",
        "patch",
    )
    for field in fields:
        if field in prf_data:
            setattr(prf, field, prf_data[field])

    # update replication timestamp
    replicated_dt_field = "last_replicated_via_{}_at".format(via)
    if hasattr(prf, replicated_dt_field):
        setattr(prf, replicated_dt_field, fetched_at)

    # add to DB session, so that it will be committed
    db.session.add(prf)
    if commit:
        db.session.commit()

    return prf
Exemple #2
0
def process_repository(repo_data, via="webhook", fetched_at=None, commit=True,
                       requestor_id=None):
    repo_id = repo_data.get("id")
    if not repo_id:
        raise MissingData("no repo ID")

    # fetch the object from the database,
    # or create it if it doesn't exist in the DB
    repo = Repository.query.get(repo_id)
    if not repo:
        repo = Repository(id=repo_id)

    # should we update the object?
    fetched_at = fetched_at or datetime.now()
    if repo.last_replicated_at > fetched_at:
        raise StaleData()

    # update the object
    fields = (
        "name", "private", "description", "fork", "homepage", "size",
        "stargazers_count", "watchers_count", "language", "has_issues",
        "has_downloads", "has_wiki", "has_pages", "forks_count",
        "open_issues_count", "default_branch",
    )
    for field in fields:
        if field in repo_data:
            setattr(repo, field, repo_data[field])
    dt_fields = ("created_at", "updated_at", "pushed_at")
    for field in dt_fields:
        if repo_data.get(field):
            dt = parse_date(repo_data[field]).replace(tzinfo=None)
            setattr(repo, field, dt)

    # user references
    user_fields = ("owner", "organization")
    for user_field in user_fields:
        if user_field not in repo_data:
            continue
        user_data = repo_data[user_field]
        id_field = "{}_id".format(user_field)
        login_field = "{}_login".format(user_field)
        if user_data:
            setattr(repo, id_field, user_data["id"])
            if hasattr(repo, login_field):
                setattr(repo, login_field, user_data["login"])
            try:
                process_user(user_data, via=via, fetched_at=fetched_at)
            except StaleData:
                pass
        else:
            setattr(repo, id_field, None)
            if hasattr(repo, login_field):
                setattr(repo, login_field, None)

    # update replication timestamp
    replicated_dt_field = "last_replicated_via_{}_at".format(via)
    if hasattr(repo, replicated_dt_field):
        setattr(repo, replicated_dt_field, fetched_at)

    # add to DB session, so that it will be committed
    db.session.add(repo)

    # if we have requestor_id and permissions, update the permissions object
    if requestor_id and repo_data.get("permissions"):
        permissions_data = repo_data["permissions"]
        assoc = UserRepoAssociation.query.get((requestor_id, repo_id))
        if not assoc:
            assoc = UserRepoAssociation(user_id=requestor_id, repo_id=repo_id)
        for perm in ("admin", "push", "pull"):
            if perm in permissions_data:
                perm_attr = "can_{perm}".format(perm=perm)
                setattr(assoc, perm_attr, permissions_data[perm])
        db.session.add(assoc)

    if commit:
        db.session.commit()

    return repo
Exemple #3
0
def process_user(user_data, via="webhook", fetched_at=None, commit=True):
    user_id = user_data.get("id")
    if not user_id:
        raise MissingData("no user ID")

    # fetch the object from the database,
    # or create it if it doesn't exist in the DB
    user = User.query.get(user_id)
    if not user:
        user = User(id=user_id)

    # should we update the object?
    fetched_at = fetched_at or datetime.now()
    if user.last_replicated_at > fetched_at:
        raise StaleData()

    # Most fields have the same name in our model as they do in Github's API.
    # However, some are different. This mapping contains just the differences.
    field_to_model = {
        "public_repos": "public_repos_count",
        "public_gists": "public_gists_count",
        "followers": "followers_count",
        "following": "following_count",
    }

    # update the object
    fields = (
        "login",
        "site_admin",
        "name",
        "company",
        "blog",
        "location",
        "email",
        "hireable",
        "bio",
        "public_repos",
        "public_gists",
        "followers",
        "following",
    )
    for field in fields:
        if field in user_data:
            mfield = field_to_model.get(field, field)
            setattr(user, mfield, user_data[field])
    dt_fields = ("created_at", "updated_at")
    for field in dt_fields:
        if user_data.get(field):
            dt = parse_date(user_data[field]).replace(tzinfo=None)
            setattr(user, field, dt)

    # update replication timestamp
    replicated_dt_field = "last_replicated_via_{}_at".format(via)
    if hasattr(user, replicated_dt_field):
        setattr(user, replicated_dt_field, fetched_at)

    # add to DB session, so that it will be committed
    db.session.add(user)
    if commit:
        db.session.commit()

    return user
Exemple #4
0
def process_milestone(milestone_data,
                      via="webhook",
                      fetched_at=None,
                      commit=True,
                      repo_id=None):
    number = milestone_data.get("number")
    if not number:
        raise MissingData("no milestone number")

    if not repo_id:
        url = milestone_data.get("url")
        if not url:
            raise MissingData("no milestone url")

        # parse repo info from url
        path = URLObject(url).path
        assert path.segments[0] == "repos"
        repo_owner = path.segments[1]
        repo_name = path.segments[2]

        # fetch repo from database
        try:
            repo = Repository.get(repo_owner, repo_name)
        except MultipleResultsFound:
            msg = "Repo {owner}/{repo} found multiple times!".format(
                owner=repo_owner,
                repo=repo_name,
            )
            raise DatabaseError(msg, {
                "type": "milestone",
                "owner": repo_owner,
                "repo": repo_name,
            })
        if not repo:
            msg = "Repo {owner}/{repo} not loaded in webhookdb".format(
                owner=repo_owner,
                repo=repo_name,
            )
            raise NotFound(msg, {
                "type": "milestone",
                "owner": repo_owner,
                "repo": repo_name,
            })
        repo_id = repo.id

    # fetch the object from the database,
    # or create it if it doesn't exist in the DB
    milestone = Milestone.query.get((repo_id, number))
    if not milestone:
        milestone = Milestone(repo_id=repo_id, number=number)

    # should we update the object?
    fetched_at = fetched_at or datetime.now()
    if milestone.last_replicated_at > fetched_at:
        raise StaleData()

    # Most fields have the same name in our model as they do in Github's API.
    # However, some are different. This mapping contains just the differences.
    field_to_model = {
        "open_issues": "open_issues_count",
        "closed_issues": "closed_issues_count",
        "due_on": "due_at",
    }

    # update the object
    fields = (
        "state",
        "title",
        "description",
        "open_issues",
        "closed_issues",
    )
    for field in fields:
        if field in milestone_data:
            mfield = field_to_model.get(field, field)
            setattr(milestone, mfield, milestone_data[field])
    dt_fields = ("created_at", "updated_at", "closed_at", "due_on")
    for field in dt_fields:
        if milestone_data.get(field):
            dt = parse_date(milestone_data[field]).replace(tzinfo=None)
            mfield = field_to_model.get(field, field)
            setattr(milestone, mfield, dt)

    # user references
    user_fields = ("creator", )
    for user_field in user_fields:
        if user_field not in milestone_data:
            continue
        user_data = milestone_data[user_field]
        id_field = "{}_id".format(user_field)
        login_field = "{}_login".format(user_field)
        if user_data:
            setattr(milestone, id_field, user_data["id"])
            if hasattr(milestone, login_field):
                setattr(milestone, login_field, user_data["login"])
            try:
                process_user(user_data, via=via, fetched_at=fetched_at)
            except StaleData:
                pass
        else:
            setattr(milestone, id_field, None)
            if hasattr(milestone, login_field):
                setattr(milestone, login_field, None)

    # update replication timestamp
    replicated_dt_field = "last_replicated_via_{}_at".format(via)
    if hasattr(milestone, replicated_dt_field):
        setattr(milestone, replicated_dt_field, fetched_at)

    # add to DB session, so that it will be committed
    db.session.add(milestone)
    if commit:
        db.session.commit()

    return milestone
Exemple #5
0
def process_repository_hook(hook_data, via="webhook", fetched_at=None, commit=True,
                            requestor_id=None, repo_id=None):
    hook_id = hook_data.get("id")
    if not hook_id:
        raise MissingData("no hook ID")

    if not repo_id:
        url = hook_data.get("url")
        if not url:
            raise MissingData("no hook url")

        # parse repo info from url
        path = URLObject(url).path
        assert path.segments[0] == "repos"
        repo_owner = path.segments[1]
        repo_name = path.segments[2]

        # fetch repo from database
        repo_query = (Repository.query
            .filter(Repository.owner_login == repo_owner)
            .filter(Repository.name == repo_name)
        )
        try:
            repo = repo_query.one()
        except NoResultFound:
            msg = "Repo {owner}/{repo} not loaded in webhookdb".format(
                owner=repo_owner, repo=repo_name,
            )
            raise NotFound(msg, {
                "type": "repo_hook",
                "owner": repo_owner,
                "repo": repo_name,
            })
        except MultipleResultsFound:
            msg = "Repo {owner}/{repo} found multiple times!".format(
                owner=repo_owner, repo=repo_name,
            )
            raise DatabaseError(msg, {
                "type": "repo_hook",
                "owner": repo_owner,
                "repo": repo_name,
            })
        repo_id = repo.id

    # fetch the object from the database,
    # or create it if it doesn't exist in the DB
    hook = RepositoryHook.query.get(hook_id)
    if not hook:
        hook = RepositoryHook(id=hook_id, repo_id=repo_id)

    # should we update the object?
    fetched_at = fetched_at or datetime.now()
    if hook.last_replicated_at > fetched_at:
        raise StaleData()

    # update the object
    fields = (
        "name", "config", "events", "active", "last_response",
    )
    for field in fields:
        if field in hook_data:
            setattr(hook, field, hook_data[field])
    dt_fields = ("created_at", "updated_at")
    for field in dt_fields:
        if hook_data.get(field):
            dt = parse_date(hook_data[field]).replace(tzinfo=None)
            setattr(hook, field, dt)

    # `url` is special -- it's the value in the `config` object,
    # NOT the top-level `url` property
    hook.url = hook_data.get("config", {}).get("url")

    # update replication timestamp
    replicated_dt_field = "last_replicated_via_{}_at".format(via)
    if hasattr(hook, replicated_dt_field):
        setattr(hook, replicated_dt_field, fetched_at)

    # add to DB session, so that it will be committed
    db.session.add(hook)

    if commit:
        db.session.commit()

    return hook
Exemple #6
0
def process_label(label_data,
                  via="webhook",
                  fetched_at=None,
                  commit=True,
                  repo_id=None):
    name = label_data.get("name")
    if not name:
        raise MissingData("no label name")

    if not repo_id:
        url = label_data.get("url")
        if not url:
            raise MissingData("no label url")

        # parse repo info from url
        path = URLObject(url).path
        assert path.segments[0] == "repos"
        repo_owner = path.segments[1]
        repo_name = path.segments[2]

        # fetch repo from database
        try:
            repo = Repository.get(repo_owner, repo_name)
        except MultipleResultsFound:
            msg = "Repo {owner}/{repo} found multiple times!".format(
                owner=repo_owner,
                repo=repo_name,
            )
            raise DatabaseError(msg, {
                "type": "label",
                "owner": repo_owner,
                "repo": repo_name,
            })
        if not repo:
            msg = "Repo {owner}/{repo} not loaded in webhookdb".format(
                owner=repo_owner,
                repo=repo_name,
            )
            raise NotFound(msg, {
                "type": "label",
                "owner": repo_owner,
                "repo": repo_name,
            })
        repo_id = repo.id

    # fetch the object from the database,
    # or create it if it doesn't exist in the DB
    label = IssueLabel.query.get((repo_id, name))
    if not label:
        label = IssueLabel(repo_id=repo_id, name=name)

    # should we update the object?
    fetched_at = fetched_at or datetime.now()
    if label.last_replicated_at > fetched_at:
        raise StaleData()

    # color reference
    if "color" in label_data:
        color_hex = label_data["color"]
        if color_hex:
            label.color = Color("#{hex}".format(hex=color_hex))
        else:
            label.color = None

    # update replication timestamp
    replicated_dt_field = "last_replicated_via_{}_at".format(via)
    if hasattr(label, replicated_dt_field):
        setattr(label, replicated_dt_field, fetched_at)

    # add to DB session, so that it will be committed
    db.session.add(label)
    if commit:
        db.session.commit()

    return label
Exemple #7
0
def process_pull_request(pr_data, via="webhook", fetched_at=None, commit=True):
    pr_id = pr_data.get("id")
    if not pr_id:
        raise MissingData("no pull_request ID", obj=pr_data)

    # fetch the object from the database,
    # or create it if it doesn't exist in the DB
    pr = PullRequest.query.get(pr_id)
    if not pr:
        pr = PullRequest(id=pr_id)

    # should we update the object?
    fetched_at = fetched_at or datetime.now()
    if pr.last_replicated_at > fetched_at:
        raise StaleData()

    # Most fields have the same name in our model as they do in Github's API.
    # However, some are different. This mapping contains just the differences.
    field_to_model = {
        "comments": "comments_count",
        "review_comments": "review_comments_count",
        "commits": "commits_count",
    }

    # update the object
    fields = (
        "number", "state", "locked", "title", "body", "merged", "mergeable",
        "comments", "review_comments", "commits", "additions", "deletions",
        "changed_files",
    )
    for field in fields:
        if field in pr_data:
            mfield = field_to_model.get(field, field)
            setattr(pr, mfield, pr_data[field])
    dt_fields = ("created_at", "updated_at", "closed_at", "merged_at")
    for field in dt_fields:
        if pr_data.get(field):
            dt = parse_date(pr_data[field]).replace(tzinfo=None)
            mfield = field_to_model.get(field, field)
            setattr(pr, mfield, dt)

    # user references
    user_fields = ("user", "assignee", "merged_by")
    for user_field in user_fields:
        if user_field not in pr_data:
            continue
        user_data = pr_data[user_field]
        id_field = "{}_id".format(user_field)
        login_field = "{}_login".format(user_field)
        if user_data:
            setattr(pr, id_field, user_data["id"])
            if hasattr(pr, login_field):
                setattr(pr, login_field, user_data["login"])
            try:
                process_user(user_data, via=via, fetched_at=fetched_at)
            except StaleData:
                pass
        else:
            setattr(pr, id_field, None)
            if hasattr(pr, login_field):
                setattr(pr, login_field, None)

    # repository references
    refs = ("base", "head")
    for ref in refs:
        if not ref in pr_data:
            continue
        ref_data = pr_data[ref]
        ref_field = "{}_ref".format(ref)
        setattr(pr, ref_field, ref_data["ref"])
        repo_data = ref_data["repo"]
        repo_id_field = "{}_repo_id".format(ref)
        if repo_data:
            setattr(pr, repo_id_field, repo_data["id"])
            try:
                process_repository(repo_data, via=via, fetched_at=fetched_at)
            except StaleData:
                pass
        else:
            setattr(pr, repo_id_field, None)

    # update replication timestamp
    replicated_dt_field = "last_replicated_via_{}_at".format(via)
    if hasattr(pr, replicated_dt_field):
        setattr(pr, replicated_dt_field, fetched_at)

    # add to DB session, so that it will be committed
    db.session.add(pr)
    if commit:
        db.session.commit()

    return pr
Exemple #8
0
def process_issue(issue_data, via="webhook", fetched_at=None, commit=True):
    issue_id = issue_data.get("id")
    if not issue_id:
        raise MissingData("no issue ID", obj=issue_data)

    # fetch the object from the database,
    # or create it if it doesn't exist in the DB
    issue = Issue.query.get(issue_id)
    if not issue:
        issue = Issue(id=issue_id)

    # should we update the object?
    fetched_at = fetched_at or datetime.now()
    if issue.last_replicated_at > fetched_at:
        raise StaleData()

    # Most fields have the same name in our model as they do in Github's API.
    # However, some are different. This mapping contains just the differences.
    field_to_model = {
        "comments": "comments_count",
    }

    # update the object
    fields = (
        "number", "state", "title", "body", "comments",
    )
    for field in fields:
        if field in issue_data:
            mfield = field_to_model.get(field, field)
            setattr(issue, mfield, issue_data[field])
    dt_fields = ("created_at", "updated_at", "closed_at")
    for field in dt_fields:
        if issue_data.get(field):
            dt = parse_date(issue_data[field]).replace(tzinfo=None)
            mfield = field_to_model.get(field, field)
            setattr(issue, mfield, dt)

    # user references
    user_fields = ("user", "assignee", "closed_by")
    for user_field in user_fields:
        if user_field not in issue_data:
            continue
        user_data = issue_data[user_field]
        id_field = "{}_id".format(user_field)
        login_field = "{}_login".format(user_field)
        if user_data:
            setattr(issue, id_field, user_data["id"])
            if hasattr(issue, login_field):
                setattr(issue, login_field, user_data["login"])
            try:
                process_user(user_data, via=via, fetched_at=fetched_at)
            except StaleData:
                pass
        else:
            setattr(issue, id_field, None)
            if hasattr(issue, login_field):
                setattr(issue, login_field, None)

    # used for labels and milestone
    repo_id = None

    # label reference
    if "labels" in issue_data:
        label_data_list = issue_data["labels"]
        if label_data_list:
            labels = []
            for label_data in label_data_list:
                label = process_label(
                    label_data, via=via, fetched_at=fetched_at, commit=False,
                    repo_id=repo_id,
                )
                repo_id = repo_id or label.repo_id
                labels.append(label)
            issue.labels = labels
        else:
            issue.labels = []

    # milestone reference
    if "milestone" in issue_data:
        milestone_data = issue_data["milestone"]
        if milestone_data:
            milestone = process_milestone(
                milestone_data, via=via, fetched_at=fetched_at, commit=False,
                repo_id=repo_id,
            )
            repo_id = repo_id or milestone.repo_id
            issue.milestone_number = milestone.number
        else:
            issue.milestone = None

    # update replication timestamp
    replicated_dt_field = "last_replicated_via_{}_at".format(via)
    if hasattr(issue, replicated_dt_field):
        setattr(issue, replicated_dt_field, fetched_at)

    # add to DB session, so that it will be committed
    db.session.add(issue)
    if commit:
        db.session.commit()

    return issue