Esempio n. 1
0
def aggregate_build_stats_for_job(job_id: UUID):
    """
    Given a job, aggregate its data upwards into the Build.abs

    This should generally be fired upon a job's completion, or
    alternatively it can be used to repair aggregate data.
    """
    lock_key = "job:{job_id}".format(job_id=job_id)
    with redis.lock(lock_key):
        job = (Job.query.unrestricted_unsafe().with_for_update(
            nowait=True).filter(Job.id == job_id).first())
        if not job:
            raise ValueError

        auth.set_current_tenant(
            auth.RepositoryTenant(repository_id=job.repository_id))

        # we need to handle the race between when the mutations were made to <Job> and
        # when the only remaining artifact may have finished processing
        if job.status == Status.collecting_results:
            if not has_unprocessed_artifacts(job):
                job.status = Status.finished
                if not job.date_finished:
                    job.date_finished = timezone.now()
                db.session.add(job)
                db.session.commit()
            else:
                pending_artifact_ids = db.session.query(
                    PendingArtifact.id).filter(
                        PendingArtifact.repository_id == job.repository_id,
                        PendingArtifact.provider == job.provider,
                        PendingArtifact.external_build_id ==
                        job.build.external_id,
                        PendingArtifact.external_job_id == job.external_id,
                    )
                for pa_id in pending_artifact_ids:
                    process_pending_artifact.delay(pending_artifact_id=pa_id)

        # record any job-specific stats that might not have been taken care elsewhere
        if job.status == Status.finished:
            record_test_stats(job.id)
            record_style_violation_stats(job.id)
            record_bundle_stats(job.id)
            record_failure_reasons(job)
            db.session.commit()

    lock_key = "aggstatsbuild:{build_id}".format(build_id=job.build_id.hex)
    with redis.lock(lock_key):
        aggregate_build_stats.delay(build_id=job.build_id)
Esempio n. 2
0
def upsert_build(hook: Hook, external_id: str, data: dict = None) -> Response:
    provider_name = hook.get_provider().get_name(hook.config)
    lock_key = "hook:build:{repo_id}:{provider}:{build_xid}".format(
        repo_id=hook.repository_id, provider=provider_name, build_xid=external_id
    )
    # TODO (here and in other upsert_* functions): it's better to move all the locking
    # code to async tasks.
    with redis.lock(lock_key, timeout=BUILD_LOCK_TIMEOUT, expire=30):
        json = data.copy() if data else {}
        json["external_id"] = external_id
        json["provider"] = provider_name
        json["hook_id"] = str(hook.id)

        build = Build.query.filter(
            Build.provider == provider_name, Build.external_id == external_id
        ).first()

        if build:
            return client.put(
                "/repos/{}/builds/{}".format(
                    hook.repository.get_full_name(), build.number
                ),
                json=json,
            )

        return client.post(
            "/repos/{}/builds".format(hook.repository.get_full_name()), json=json
        )
Esempio n. 3
0
def identify_revision(repository: Repository, treeish: str):
    """
    Attempt to transform a a commit-like reference into a valid revision.
    """
    # try to find it from the database first
    if len(treeish) == 40:
        revision = Revision.query.filter(
            Revision.repository_id == repository.id, Revision.sha == treeish
        ).first()
        if revision:
            return revision

    try:
        vcs = repository.get_vcs()
    except UnknownRepositoryBackend:
        return None

    vcs.ensure(update_if_exists=False)

    lock_key = "sync_repo:{repo_id}".format(repo_id=repository.id)
    # lock this update to avoild piling up duplicate fetch/save calls
    with redis.lock(lock_key, expire=30):
        try:
            commit = next(vcs.log(parent=treeish, limit=1))
        except UnknownRevision:
            vcs.update()
            commit = next(vcs.log(parent=treeish, limit=1))

        revision, _ = commit.save(repository)

    return revision
Esempio n. 4
0
def upsert_job(build: Build,
               hook: Hook,
               external_id: str,
               data: dict = None) -> Response:
    provider_name = hook.get_provider().get_name(hook.config)
    lock_key = "upsert:job:{build_id}:{provider}:{job_xid}".format(
        build_id=build.id, provider=provider_name, job_xid=external_id)
    with redis.lock(lock_key):
        json = data.copy() if data else {}
        json["external_id"] = external_id
        json["provider"] = provider_name
        json["hook_id"] = str(hook.id)

        job = Job.query.filter(
            Job.provider == provider_name,
            Job.external_id == external_id,
            Job.build_id == build.id,
        ).first()

        if job:
            return client.put(
                "/repos/{}/builds/{}/jobs/{}".format(
                    build.repository.get_full_name(), job.build.number,
                    job.number),
                json=json,
            )

        return client.post(
            "/repos/{}/builds/{}/jobs".format(build.repository.get_full_name(),
                                              build.number),
            json=json,
        )
Esempio n. 5
0
def upsert_job(build: Build,
               provider: str,
               external_id: str,
               data: dict = None) -> Response:
    lock_key = 'upsert:job:{build_id}:{provider}:{job_xid}'.format(
        build_id=build.id,
        provider=provider,
        job_xid=external_id,
    )
    with redis.lock(lock_key):
        json = data.copy() if data else {}
        json['external_id'] = external_id
        json['provider'] = provider

        job = Job.query.filter(
            Job.provider == provider,
            Job.external_id == external_id,
            Job.build_id == build.id,
        ).first()

        if job:
            return client.put('/repos/{}/builds/{}/jobs/{}'.format(
                build.repository.get_full_name(),
                job.build.number,
                job.number,
            ),
                              json=json)

        return client.post('/repos/{}/builds/{}/jobs'.format(
            build.repository.get_full_name(),
            build.number,
        ),
                           json=json)
Esempio n. 6
0
def aggregate_build_stats_for_job(job_id: UUID):
    """
    Given a job, aggregate its data upwards into the Build.abs

    This should generally be fired upon a job's completion, or
    alternatively it can be used to repair aggregate data.
    """
    job = Job.query.unrestricted_unsafe().options(
        joinedload('project'), ).filter(Job.id == job_id, ).first()
    if not job:
        raise ValueError

    auth.set_current_tenant(
        auth.Tenant(
            organization_ids=[job.organization_id],
            project_ids=[job.project_id],
            repository_ids=[job.project.repository_id],
        ))

    # record any job-specific stats that might not have been taken care elsewhere
    # (we might want to move TestResult's stats here as well, or move coverage's
    # stats elsewhere)
    record_coverage_stats(job)

    lock_key = 'aggstatsbuild:{build_id}'.format(build_id=job.build_id.hex, )
    with redis.lock(lock_key):
        aggregate_build_stats(job.build_id)
Esempio n. 7
0
def upsert_change_request(repository: Repository,
                          provider: str,
                          external_id: str,
                          data: dict = None) -> Response:
    lock_key = "hook:cr:{repo_id}:{provider}:{cr_xid}".format(
        repo_id=repository.id, provider=provider, cr_xid=external_id)
    with redis.lock(lock_key):
        json = data.copy() if data else {}
        json["external_id"] = external_id
        json["provider"] = provider

        cr = ChangeRequest.query.filter(
            ChangeRequest.repository_id == repository.id,
            ChangeRequest.provider == provider,
            ChangeRequest.external_id == external_id,
        ).first()

        if cr:
            return client.put(
                "/repos/{}/change-requests/{}".format(
                    repository.get_full_name(), cr.number),
                json=json,
            )

        return client.post("/repos/{}/change-requests".format(
            repository.get_full_name()),
                           json=json)
Esempio n. 8
0
def upsert_build(repository: Repository,
                 provider: str,
                 external_id: str,
                 data: dict = None) -> Response:
    lock_key = 'hook:build:{repo_id}:{provider}:{build_xid}'.format(
        repo_id=repository.id,
        provider=provider,
        build_xid=external_id,
    )
    with redis.lock(lock_key):
        json = data.copy() if data else {}
        json['external_id'] = external_id
        json['provider'] = provider

        build = Build.query.filter(
            Build.provider == provider,
            Build.external_id == external_id,
        ).first()

        if build:
            return client.put('/repos/{}/builds/{}'.format(
                repository.get_full_name(),
                build.number,
            ),
                              json=json)
        return client.post('/repos/{}/builds'.format(
            repository.get_full_name(), ),
                           json=json)
Esempio n. 9
0
def resolve_ref_for_change_request(change_request_id: UUID):
    lock_key = f"resolve-cr-ref:{change_request_id}"
    with redis.lock(lock_key, timeout=60.0, nowait=True):
        cr = ChangeRequest.query.unrestricted_unsafe().get(change_request_id)
        if not cr:
            raise ValueError(
                "Unable to find change request with id = {}".format(
                    change_request_id))

        auth.set_current_tenant(
            auth.RepositoryTenant(repository_id=cr.repository_id))

        if not cr.parent_revision_sha and cr.parent_ref:
            try:
                revision = revisions.identify_revision(cr.repository,
                                                       cr.parent_ref,
                                                       with_vcs=True)
            except InvalidPublicKey:
                raise
            cr.parent_revision_sha = revision.sha
            db.session.add(cr)
            db.session.commit()

        if not cr.head_revision_sha and cr.head_ref:
            revision = revisions.identify_revision(cr.repository,
                                                   cr.head_ref,
                                                   with_vcs=True)
            cr.head_revision_sha = revision.sha
            if not cr.authors and revision.authors:
                cr.authors = revision.authors
            db.session.add(cr)
            db.session.commit()
Esempio n. 10
0
def upsert_build(hook: Hook, external_id: str, data: dict = None) -> Response:
    provider_name = hook.get_provider().get_name(hook.config)
    lock_key = "hook:build:{repo_id}:{provider}:{build_xid}".format(
        repo_id=hook.repository_id,
        provider=provider_name,
        build_xid=external_id)
    with redis.lock(lock_key):
        json = data.copy() if data else {}
        json["external_id"] = external_id
        json["provider"] = provider_name
        json["hook_id"] = str(hook.id)

        build = Build.query.filter(Build.provider == provider_name,
                                   Build.external_id == external_id).first()

        if build:
            return client.put(
                "/repos/{}/builds/{}".format(hook.repository.get_full_name(),
                                             build.number),
                json=json,
            )

        return client.post("/repos/{}/builds".format(
            hook.repository.get_full_name()),
                           json=json)
Esempio n. 11
0
def aggregate_build_stats_for_job(job_id: UUID):
    """
    Given a job, aggregate its data upwards into the Build.abs

    This should generally be fired upon a job's completion, or
    alternatively it can be used to repair aggregate data.
    """
    lock_key = 'job:{job_id}'.format(job_id=job_id, )
    with redis.lock(lock_key):
        job = Job.query.unrestricted_unsafe().with_for_update(
            nowait=True).filter(Job.id == job_id, ).first()
        if not job:
            raise ValueError

        auth.set_current_tenant(
            auth.Tenant(repository_ids=[job.repository_id]))

        # we need to handle the race between when the mutations were made to <Job> and
        # when the only remaining artifact may have finished processing
        if job.status == Status.collecting_results and not has_unprocessed_artifacts(
                job.id):
            job.status = Status.finished
            if not job.date_finished:
                job.date_finished = timezone.now()
            db.session.add(job)
            db.session.commit()

        # record any job-specific stats that might not have been taken care elsewhere
        # (we might want to move TestResult's stats here as well)
        if job.status == Status.finished:
            record_test_stats(job.id)
            record_style_violation_stats(job.id)
            record_bundle_stats(job.id)
            record_failure_reasons(job)

    lock_key = 'aggstatsbuild:{build_id}'.format(build_id=job.build_id.hex, )
    with redis.lock(lock_key):
        aggregate_build_stats.delay(build_id=job.build_id)
Esempio n. 12
0
def resolve_ref_for_build(build_id: UUID):
    lock_key = f"resolve-build-ref:{build_id}"
    with redis.lock(lock_key, timeout=60.0, nowait=True):
        build = Build.query.unrestricted_unsafe().get(build_id)
        if not build:
            raise ValueError(
                "Unable to find build with id = {}".format(build_id))

        if build.revision_sha:
            return

        auth.set_current_tenant(
            auth.RepositoryTenant(repository_id=build.repository_id))

        revision: Optional[Revision] = None
        try:
            revision = revisions.identify_revision(build.repository,
                                                   build.ref,
                                                   with_vcs=True)
        except UnknownRevision:
            build.result = Result.errored
            build.status = Status.finished
            try:
                with db.session.begin_nested():
                    db.session.add(
                        FailureReason(
                            repository_id=build.repository_id,
                            build_id=build.id,
                            reason=FailureReason.Reason.unresolvable_ref,
                        ))
                    db.session.flush()
            except IntegrityError as exc:
                if "duplicate" not in str(exc):
                    raise

        except InvalidPublicKey:
            pass

        if revision:
            build.revision_sha = revision.sha
            if not build.authors and revision.authors:
                build.authors = revision.authors
            if not build.label:
                build.label = revision.message.split("\n")[0]
        db.session.add(build)
        db.session.commit()

    data = build_schema.dump(build)
    publish("builds", "build.update", data)
Esempio n. 13
0
    def process(self, fp):
        results = self.get_coverage(fp)

        for result in results:
            try:
                with db.session.begin_nested():
                    db.session.add(result)
            except IntegrityError:
                lock_key = "coverage:{build_id}:{file_hash}".format(
                    build_id=result.build_id.hex,
                    file_hash=sha1(result.filename.encode("utf-8")).hexdigest(),
                )
                with redis.lock(lock_key):
                    result = self.merge_coverage(result)
                    db.session.add(result)
            db.session.flush()

        return results
Esempio n. 14
0
    def process(self, fp):
        results = self.get_coverage(fp)

        for result in results:
            try:
                with db.session.begin_nested():
                    db.session.add(result)
            except IntegrityError:
                lock_key = 'coverage:{job_id}:{file_hash}'.format(
                    job_id=result.job_id.hex,
                    file_hash=sha1(
                        result.filename.encode('utf-8')).hexdigest(),
                )
                with redis.lock(lock_key):
                    result = self.merge_coverage(result)
                    db.session.add(result)
            db.session.commit()

        return results
Esempio n. 15
0
    def post(self):
        """
        Activate a GitHub repository.
        """
        repo_name = (request.get_json() or {}).get('name')
        if not repo_name:
            return self.error('missing repo_name parameter')

        owner_name, repo_name = repo_name.split('/', 1)

        user = auth.get_current_user()
        provider = GitHubRepositoryProvider(cache=False)
        try:
            repo_data = provider.get_repo(user=user,
                                          owner_name=owner_name,
                                          repo_name=repo_name)
        except IdentityNeedsUpgrade as exc:
            return self.respond(
                {
                    'provider': 'github',
                    'error': 'identity_needs_upgrade',
                    'url': exc.get_upgrade_url(),
                }, 401)

        if not repo_data['admin']:
            return self.respond(
                {
                    'message':
                    'Insufficient permissions to activate repository',
                }, 403)

        lock_key = 'repo:{provider}/{owner_name}/{repo_name}'.format(
            provider='github',
            owner_name=owner_name,
            repo_name=repo_name,
        )
        with redis.lock(lock_key):
            try:
                with db.session.begin_nested():
                    # bind various github specific attributes
                    repo = Repository(
                        backend=RepositoryBackend.git,
                        provider=RepositoryProvider.github,
                        status=RepositoryStatus.active,
                        external_id=str(repo_data['id']),
                        owner_name=owner_name,
                        name=repo_name,
                        url=repo_data['url'],
                        data=repo_data['config'],
                    )
                    db.session.add(repo)
                    db.session.flush()
            except IntegrityError:
                repo = Repository.query.unrestricted_unsafe().filter(
                    Repository.provider == RepositoryProvider.github,
                    Repository.external_id == str(repo_data['id']),
                ).first()
                # it's possible to get here if the "full name" already exists
                assert repo
                needs_configured = repo.status == RepositoryStatus.inactive
                if needs_configured:
                    repo.status = RepositoryStatus.active
                    db.session.add(repo)
            else:
                needs_configured = True
            if needs_configured:
                # generate a new private key for use on github
                key = ssh.generate_key()
                db.session.add(
                    ItemOption(
                        item_id=repo.id,
                        name='auth.private-key',
                        value=key.private_key,
                    ))

                # register key with github
                provider.add_key(
                    user=user,
                    repo_name=repo_name,
                    owner_name=owner_name,
                    key=key,
                )

                # we need to commit before firing off the task
                db.session.commit()

                import_repo.delay(repo_id=repo.id)

        try:
            with db.session.begin_nested():
                db.session.add(
                    RepositoryAccess(
                        repository_id=repo.id,
                        user_id=user.id,
                    ))
                db.session.flush()
        except IntegrityError:
            pass

        db.session.commit()

        return self.respond_with_schema(repo_schema, repo, 201)
Esempio n. 16
0
def aggregate_build_stats(build_id: UUID):
    """
    Updates various denormalized / aggregate attributes on Build per its
    jobs. These attributes include start and completion dates, as well as
    the status and result.
    """
    # now we pull in the entirety of the build's data to aggregate state upward
    lock_key = "build:{build_id}".format(build_id=build_id)
    with redis.lock(lock_key):
        build = (Build.query.unrestricted_unsafe().with_for_update(
            nowait=True).get(build_id))
        if not build:
            raise ValueError(
                "Unable to find build with id = {}".format(build_id))

        auth.set_current_tenant(
            auth.RepositoryTenant(repository_id=build.repository_id))

        record_coverage_stats(build.id)

        job_list = Job.query.filter(Job.build_id == build.id)

        was_finished = build.status == Status.finished
        is_finished = all(p.status == Status.finished for p in job_list)

        # ensure build's dates are reflective of jobs
        build.date_started = safe_agg(min,
                                      (j.date_started
                                       for j in job_list if j.date_started))

        if is_finished:
            build.date_finished = safe_agg(
                max, (j.date_finished for j in job_list if j.date_finished))
        else:
            build.date_finished = None

        # if theres any failure, the build failed
        if any(j.result is Result.failed for j in job_list
               if not j.allow_failure):
            build.result = Result.failed
        # else, if we're finished, we can aggregate from results
        elif is_finished:
            if not job_list:
                build.result = Result.errored
            elif not any(j for j in job_list if not j.allow_failure):
                build.result = Result.passed
            else:
                build.result = aggregate_result(
                    (j.result for j in job_list if not j.allow_failure))
        # we should never get here as long we've got jobs and correct data
        else:
            build.result = Result.unknown

        if is_finished:
            build.status = Status.finished
        else:
            # ensure we dont set the status to finished unless it actually is
            new_status = aggregate_status((j.status for j in job_list))
            if build.status != new_status:
                build.status = new_status

        db.session.add(build)
        db.session.commit()

        # we dont bother aggregating stats unless we're finished
        if build.status == Status.finished and not was_finished:
            for stat in AGGREGATED_BUILD_STATS:
                aggregate_stat_for_build(build, stat)
            db.session.commit()
            send_build_notifications.delay(build_id=build.id)
Esempio n. 17
0
    def post(self):
        """
        Activate a GitHub repository.
        """
        repo_name = (request.get_json() or {}).get("name")
        if not repo_name:
            return self.error("missing repo_name parameter")

        owner_name, repo_name = repo_name.split("/", 1)

        user = auth.get_current_user()
        provider = GitHubRepositoryProvider(cache=False)
        try:
            repo_data = provider.get_repo(user=user,
                                          owner_name=owner_name,
                                          repo_name=repo_name)
        except IdentityNeedsUpgrade as exc:
            return self.respond(
                {
                    "provider": "github",
                    "error": "identity_needs_upgrade",
                    "url": exc.get_upgrade_url(),
                },
                401,
            )

        if Permission.admin not in repo_data["permission"]:
            return self.respond(
                {"message": "Insufficient permissions to activate repository"},
                403)

        lock_key = Repository.get_lock_key(RepositoryProvider.github,
                                           owner_name, repo_name)
        with redis.lock(lock_key):
            try:
                with db.session.begin_nested():
                    # bind various github specific attributes
                    repo = Repository(
                        backend=RepositoryBackend.git,
                        provider=RepositoryProvider.github,
                        status=RepositoryStatus.active,
                        external_id=str(repo_data["id"]),
                        owner_name=owner_name,
                        name=repo_name,
                        url=repo_data["url"],
                        data=repo_data["config"],
                    )
                    db.session.add(repo)
                    db.session.flush()
            except IntegrityError:
                repo = (Repository.query.unrestricted_unsafe().filter(
                    Repository.provider == RepositoryProvider.github,
                    Repository.external_id == str(repo_data["id"]),
                ).first())
                # it's possible to get here if the "full name" already exists
                assert repo
                needs_configured = repo.status == RepositoryStatus.inactive
                if needs_configured:
                    repo.status = RepositoryStatus.active
                    db.session.add(repo)
            else:
                needs_configured = True
            if needs_configured:
                # generate a new private key for use on github
                key = ssh.generate_key()
                db.session.add(
                    ItemOption(item_id=repo.id,
                               name="auth.private-key",
                               value=key.private_key))

                # register key with github
                # TODO(dcramer): we should store this key reference so we can delete it
                # when the user deactivates the repo
                provider.add_key(user=user,
                                 repo_name=repo_name,
                                 owner_name=owner_name,
                                 key=key)

                db.session.commit()

        try:
            with db.session.begin_nested():
                db.session.add(
                    RepositoryAccess(
                        repository_id=repo.id,
                        user_id=user.id,
                        permission=repo_data["permission"],
                    ))
                db.session.flush()
        except IntegrityError:
            pass

        db.session.commit()

        return self.respond_with_schema(repo_schema, repo, 201)