def execute(self, job): job.status = Status.queued db.session.add(job) phase, created = get_or_create( JobPhase, where={"job": job, "label": job.label}, defaults={"status": Status.queued, "project": job.project} ) step, created = get_or_create( JobStep, where={"phase": phase, "label": job.label}, defaults={"status": Status.pending_allocation, "job": phase.job, "project": phase.project}, ) for index, command in enumerate(self.iter_all_commands(job)): command_model, created = get_or_create( CommandModel, where={"jobstep": step, "order": index}, defaults={ "label": command.script.splitlines()[0][:128], "status": Status.queued, "script": command.script, "env": command.env, "cwd": command.path, "artifacts": command.artifacts, }, ) db.session.commit() sync_job_step.delay(step_id=step.id.hex, task_id=step.id.hex, parent_task_id=job.id.hex)
def simulate_local_repository(): # Identify if we're in a git or hg repo backend = identify_local_vcs() # Simulate the repository in a new project repository = mock.repository(backend=backend, status=RepositoryStatus.active) project = mock.project(repository) plan = mock.plan(project) get_or_create(Snapshot, where={ 'project': project, 'status': SnapshotStatus.active, }) get_or_create(Snapshot, where={ 'project': project, 'status': SnapshotStatus.pending, }) # Create some build data based off commits in the local repository print 'Creating data based on {0} repository in {1}'.format( backend, os.getcwd()) vcs = get_vcs(repository) for lazy_revision in vcs.log(limit=10): revision, created, source = lazy_revision.save(repository) print ' Created revision {0} in {1}'.format(revision.sha, revision.branches) build = add(project, revision, source) print ' Inserted build {0} into {1}'.format(build.id, project.slug)
def simulate_local_repository(): # Identify if we're in a git or hg repo backend = identify_local_vcs() # Simulate the repository in a new project repository = mock.repository(backend=backend, status=RepositoryStatus.active) project = mock.project(repository) plan = mock.plan(project) get_or_create(Snapshot, where={ 'project': project, 'status': SnapshotStatus.active, }) get_or_create(Snapshot, where={ 'project': project, 'status': SnapshotStatus.pending, }) # Create some build data based off commits in the local repository print 'Creating data based on {0} repository in {1}'.format(backend, os.getcwd()) vcs = get_vcs(repository) for lazy_revision in vcs.log(limit=10): revision, created, source = lazy_revision.save(repository) print ' Created revision {0} in {1}'.format(revision.sha, revision.branches) build = add(project, revision, source) print ' Inserted build {0} into {1}'.format(build.id, project.slug)
def _get_node(self, label): node, created = get_or_create(Node, {'label': label}) if not created: return node try: response = self._get_raw_response( '/computer/{}/config.xml'.format(label)) except NotFound: return node # lxml expects the response to be in bytes, so let's assume it's utf-8 # and send it back as the original format response = response.encode('utf-8') xml = objectify.fromstring(response) cluster_names = xml.label.text.split(' ') for cluster_name in cluster_names: # remove swarm client as a cluster label as its not useful if cluster_name == 'swarm': continue cluster, _ = get_or_create(Cluster, {'label': cluster_name}) get_or_create(ClusterNode, {'node': node, 'cluster': cluster}) return node
def _get_node(self, master_base_url, label): node, created = get_or_create(Node, {'label': label}) if not created: return node try: response = self._get_text_response( master_base_url=master_base_url, path='/computer/{}/config.xml'.format(label), ) except NotFound: return node # lxml expects the response to be in bytes, so let's assume it's utf-8 # and send it back as the original format response = response.encode('utf-8') xml = objectify.fromstring(response) cluster_names = xml.label.text.split(' ') for cluster_name in cluster_names: # remove swarm client as a cluster label as its not useful if cluster_name == 'swarm': continue cluster, _ = get_or_create(Cluster, {'label': cluster_name}) get_or_create(ClusterNode, {'node': node, 'cluster': cluster}) return node
def create_build( project, collection_id, label, target, message, author, change=None, patch=None, cause=None, source=None, sha=None, source_data=None, tag=None, snapshot_id=None, no_snapshot=False, ): assert sha or source repository = project.repository if source is None: if patch: source, _ = get_or_create( Source, where={"patch": patch}, defaults={"repository": repository, "revision_sha": sha, "data": source_data or {}}, ) else: source, _ = get_or_create( Source, where={"repository": repository, "patch": None, "revision_sha": sha}, defaults={"data": source_data or {}}, ) statsreporter.stats().incr("new_api_build") build = Build( project=project, project_id=project.id, collection_id=collection_id, source=source, source_id=source.id if source else None, status=Status.queued, author=author, author_id=author.id if author else None, label=label, target=target, message=message, cause=cause, tags=[tag] if tag else [], ) db.session.add(build) db.session.commit() execute_build(build=build, snapshot_id=snapshot_id, no_snapshot=no_snapshot) return build
def loop(): repository = mock.repository() project = mock.project(repository) plan = mock.plan() get_or_create(ProjectPlan, where={"plan": plan, "project": project}) while True: build = gen(project) print "Pushed build {0} on {1}".format(build.id, project.slug) time.sleep(0.1)
def execute(self, job): job.status = Status.pending_allocation db.session.add(job) phase, created = get_or_create(JobPhase, where={ 'job': job, 'label': job.label, }, defaults={ 'status': Status.pending_allocation, 'project': job.project, }) step, created = get_or_create(JobStep, where={ 'phase': phase, 'label': job.label, }, defaults={ 'status': Status.pending_allocation, 'job': phase.job, 'project': phase.project, 'data': { 'release': self.release, 'max_executors': self.max_executors, 'cpus': self.resources['cpus'], 'mem': self.resources['mem'], }, }) # HACK(dcramer): we need to filter out non-setup commands # if we're running a snapshot build is_snapshot = job.build.cause == Cause.snapshot index = 0 for future_command in self.iter_all_commands(job): if is_snapshot and future_command.type not in (CommandType.setup, CommandType.teardown): continue index += 1 command = future_command.as_command( jobstep=step, order=index, ) db.session.add(command) # TODO(dcramer): improve error handling here assert index != 0, "No commands were registered for build plan" db.session.commit() sync_job_step.delay( step_id=step.id.hex, task_id=step.id.hex, parent_task_id=job.id.hex, )
def create_build(project, collection_id, label, target, message, author, change=None, patch=None, cause=None, source=None, sha=None, source_data=None, tag=None, snapshot_id=None, no_snapshot=False, selective_testing_policy=None): assert sha or source repository = project.repository if source is None: if patch: source, _ = get_or_create(Source, where={ 'patch': patch, }, defaults={ 'repository': repository, 'revision_sha': sha, 'data': source_data or {}, }) else: source, _ = get_or_create(Source, where={ 'repository': repository, 'patch': None, 'revision_sha': sha, }, defaults={ 'data': source_data or {}, }) statsreporter.stats().incr('new_api_build') build = Build( project=project, project_id=project.id, collection_id=collection_id, source=source, source_id=source.id if source else None, status=Status.queued, author=author, author_id=author.id if author else None, label=label, target=target, message=message, cause=cause, tags=[tag] if tag else [], selective_testing_policy=selective_testing_policy, ) db.session.add(build) db.session.commit() execute_build(build=build, snapshot_id=snapshot_id, no_snapshot=no_snapshot) return build
def loop(): repository = mock.repository() project = mock.project(repository) plan = mock.plan() get_or_create(ProjectPlan, where={ 'plan': plan, 'project': project, }) while True: build = gen(project) print 'Pushed build {0} on {1}'.format(build.id, project.slug) time.sleep(1)
def process(self, fp, artifact): target_name = self._get_target_name(artifact) target, _ = get_or_create(BazelTarget, where={ 'step_id': self.step.id, 'job_id': self.step.job.id, 'name': target_name, 'result_source': ResultSource.from_self, }) test_suites = self.get_test_suites(fp) tests = self.aggregate_tests_from_suites(test_suites) manager = TestResultManager(self.step, artifact) manager.save(tests) # update target metadata # TODO handle multiple files per target, i.e. sharding and running multiple times target.status = Status.finished target.result = aggregate_result([t.result for t in tests]) duration = 0 for t in test_suites: if t.duration is None: duration = None break duration += t.duration target.duration = duration target.date_created = min([t.date_created for t in test_suites]) db.session.add(target) db.session.commit() return tests
def expand_jobs(self, step, phase_config): """ Creates and runs JobSteps for a set of tests, based on a phase config. This phase config comes from a tests.json file that the collection jobstep should generate. This method is then called by the TestsJsonHandler. """ assert phase_config['cmd'] assert '{test_names}' in phase_config['cmd'] assert 'tests' in phase_config num_tests = len(phase_config['tests']) test_stats, avg_test_time = TestsExpander.get_test_stats(self.get_test_stats_from() or step.project.slug) phase, _ = get_or_create(JobPhase, where={ 'job': step.job, 'project': step.project, 'label': phase_config.get('phase') or 'Test', }, defaults={ 'status': Status.queued }) db.session.commit() # If there are no tests to run, the phase is done. if num_tests == 0: phase.status = Status.finished phase.result = Result.passed db.session.add(phase) db.session.commit() return # Check for whether a previous run of this task has already # created JobSteps for us, since doing it again would create a # double-sharded build. steps = JobStep.query.filter_by(phase_id=phase.id, replacement_id=None).all() if steps: step_shard_counts = [s.data.get('shard_count', 1) for s in steps] assert len(set(step_shard_counts)) == 1, "Mixed shard counts in phase!" assert len(steps) == step_shard_counts[0] else: # Create all of the job steps and commit them together. groups = TestsExpander.shard_tests(phase_config['tests'], self.max_shards, test_stats, avg_test_time) steps = [ self._create_jobstep(phase, phase_config['cmd'], phase_config.get('path', ''), weight, test_list, len(groups)) for weight, test_list in groups ] assert len(steps) == len(groups) db.session.commit() # Now that that database transaction is done, we'll do the slow work of # creating jenkins builds. for step in steps: self._create_jenkins_build(step) sync_job_step.delay_if_needed( step_id=step.id.hex, task_id=step.id.hex, parent_task_id=phase.job.id.hex, )
def _create_jobstep(self, phase, phase_cmd, phase_path, weight, test_list, shard_count=1, force_create=False): """ Create a JobStep in the database for a single shard. This creates the JobStep, but does not commit the transaction. Args: phase (JobPhase): The phase this step will be part of. phase_cmd (str): Command configured for the collection step. phase_path (str): Path configured for the collection step. weight (int): The weight of this shard. test_list (list): The list of tests names for this shard. shard_count (int): The total number of shards in this JobStep's phase. force_create (bool): Force this JobStep to be created (rather than retrieved). This is used when replacing a JobStep to make sure we don't just get the old one. Returns: JobStep: the (possibly-newly-created) JobStep. """ test_names = ' '.join(test_list) label = md5(test_names).hexdigest() where = { 'job': phase.job, 'project': phase.project, 'phase': phase, 'label': label, } if force_create: # uuid is unique so forces JobStep to be created where['id'] = uuid.uuid4() step, created = get_or_create(JobStep, where=where, defaults={ 'data': { 'cmd': phase_cmd, 'path': phase_path, 'tests': test_list, 'expanded': True, 'shard_count': shard_count, 'job_name': self.job_name, 'build_no': None, 'weight': weight, }, 'status': Status.queued, }) assert created or not force_create BuildStep.handle_debug_infra_failures(step, self.debug_config, 'expanded') db.session.add(step) return step
def _sync_artifact_as_log(self, artifact): jobstep = artifact.step job = artifact.job logsource, created = get_or_create(LogSource, where={ 'name': artifact.data['displayPath'], 'job': job, 'step': jobstep, }, defaults={ 'job': job, 'project': job.project, 'date_created': job.date_started, }) offset = 0 with closing(self.fetch_artifact(jobstep, artifact.data)) as resp: iterator = resp.iter_content() for chunk in chunked(iterator, LOG_CHUNK_SIZE): chunk_size = len(chunk) chunk, _ = create_or_update(LogChunk, where={ 'source': logsource, 'offset': offset, }, values={ 'job': job, 'project': job.project, 'size': chunk_size, 'text': chunk, }) offset += chunk_size
def _create_job_step(self, phase, job_name=None, build_no=None, label=None, **kwargs): # TODO(dcramer): we make an assumption that the job step label is unique # but its not guaranteed to be the case. We can ignore this assumption # by guaranteeing that the JobStep.id value is used for builds instead # of the Job.id value. defaults = { 'data': { 'job_name': job_name, 'build_no': build_no, }, } defaults.update(kwargs) data = defaults['data'] if data['job_name'] and not label: label = '{0} #{1}'.format(data['job_name'], data['build_no'] or data['item_id']) assert label step, created = get_or_create(JobStep, where={ 'job': phase.job, 'project': phase.project, 'phase': phase, 'label': label, }, defaults=defaults) return step
def _expand_job(self, phase, label, cmd, replaces=None): where = { 'job': phase.job, 'project': phase.project, 'phase': phase, 'label': label, } if replaces: # uuid is unique which forces jobstep to be created where['id'] = uuid.uuid4() step, created = get_or_create(JobStep, where=where, defaults={ 'data': { 'cmd': cmd, 'job_name': self.job_name, 'build_no': None, 'expanded': True, }, 'status': Status.queued, }) assert created or not replaces BuildStep.handle_debug_infra_failures(step, self.debug_config, 'expanded') if replaces: replaces.replacement_id = step.id db.session.add(replaces) builder = self.get_builder() builder.create_jenkins_build(step, job_name=step.data['job_name'], script=step.data['cmd']) sync_job_step.delay_if_needed( step_id=step.id.hex, task_id=step.id.hex, parent_task_id=phase.job.id.hex, ) return step
def _create_job_step(self, phase, data, **defaults): # TODO(dcramer): we make an assumption that the job step label is unique # but its not guaranteed to be the case. We can ignore this assumption # by guaranteeing that the JobStep.id value is used for builds instead # of the Job.id value. assert 'master' in data assert 'job_name' in data assert 'build_no' in data or 'item_id' in data if not defaults.get('label'): label = '{0} #{1}'.format(data['job_name'], data['build_no'] or data['item_id']) assert label defaults['data'] = data step, created = get_or_create(JobStep, where={ 'job': phase.job, 'project': phase.project, 'phase': phase, 'label': label, }, defaults=defaults) return step
def _sync_phase(self, job, stage_type, stage_list): phase, _ = get_or_create( JobPhase, where={"job_id": job.id, "label": stage_type.title()}, defaults={"repository_id": job.build.repository_id, "project_id": job.project_id}, ) phase.date_started = self._get_start_time(stage_list) phase.date_finished = self._get_end_time(stage_list) # for stage in (s for s in stages if s['status'] == 'failed'): if phase.date_started and phase.date_finished: if all(s["status"] == "passed" for s in stage_list): phase.result = Result.passed else: phase.result = Result.failed phase.status = Status.finished elif phase.date_started: if any(s["status"] == "failed" for s in stage_list): phase.result = Result.failed else: phase.result = Result.unknown phase.status = Status.in_progress else: phase.status = Status.queued phase.result = Result.unknown db.session.add(phase) db.session.commit() return phase
def create_job(self, job, replaces=None): """ Creates a job within Jenkins. Due to the way the API works, this consists of two steps: - Submitting the job - Polling for the newly created job to associate either a queue ID or a finalized build number. """ phase, created = get_or_create(JobPhase, where={ 'job': job, 'label': self.get_default_job_phase_label(job, self.job_name), 'project': job.project, }, defaults={ 'status': job.status, }) assert not created or not replaces step = self._create_job_step( phase=phase, data={'job_name': self.job_name}, status=job.status, force_create=bool(replaces), cluster=self.cluster ) if replaces: replaces.replacement_id = step.id db.session.add(replaces) db.session.commit() # now create the jenkins build # we don't commit immediately because we also want to update the job # and jobstep using the job_data we get from jenkins job_data = self.create_jenkins_build(step, commit=False) if job_data['queued']: job.status = Status.queued else: job.status = Status.in_progress db.session.add(job) assert 'master' in step.data assert 'job_name' in step.data assert 'build_no' in step.data or 'item_id' in step.data # now we have the build_no/item_id and can set the full jobstep label step.label = '{0} #{1}'.format(step.data['job_name'], step.data['build_no'] or step.data['item_id']) db.session.add(step) db.session.commit() sync_job_step.delay( step_id=step.id.hex, task_id=step.id.hex, parent_task_id=job.id.hex, ) return step
def _handle_generic_artifact(self, jobstep, artifact, skip_checks=False): if not skip_checks: if artifact['fileName'].endswith('.log') and not self.sync_log_artifacts: return if artifact['fileName'].endswith(XUNIT_FILENAMES) and not self.sync_xunit_artifacts: return if artifact['fileName'].endswith(COVERAGE_FILENAMES) and not self.sync_coverage_artifacts: return artifact, created = get_or_create(Artifact, where={ 'step': jobstep, 'name': artifact['fileName'], }, defaults={ 'project': jobstep.project, 'job': jobstep.job, 'data': artifact, }) if not created: db.session.commit() sync_artifact.delay_if_needed( artifact_id=artifact.id.hex, task_id=artifact.id.hex, parent_task_id=jobstep.id.hex, )
def get(self): redirect_uri = url_for(self.authorized_url, _external=True) state = request.args.get("state", "") flow = get_auth_flow(redirect_uri=redirect_uri, state=state) resp = flow.step2_exchange(request.args['code']) if current_app.config['GOOGLE_DOMAIN']: # TODO(dcramer): confirm this is actually what this value means if resp.id_token.get('hd') != current_app.config['GOOGLE_DOMAIN']: # TODO(dcramer): this should show some kind of error return redirect(url_for(self.complete_url, {'finished_login': '******'})) user, _ = get_or_create(User, where={ 'email': resp.id_token['email'], }) if current_app.config['DEBUG']: user.is_admin = True db.session.add(user) session['uid'] = user.id.hex session['access_token'] = resp.access_token session['email'] = resp.id_token['email'] if 'state' in request.args: originating_url = base64.urlsafe_b64decode(request.args['state'].encode('utf-8')) # add a query parameter. It shouldn't be this cumbersome... url_parts = list(urlparse.urlparse(originating_url)) query = dict(urlparse.parse_qsl(url_parts[4])) query['finished_login'] = '******' url_parts[4] = urllib.urlencode(query) return redirect(urlparse.urlunparse(url_parts)) return redirect(url_for(self.complete_url, finished_login='******'))
def delay_if_needed(self, **kwargs): """ Enqueue this task if it's new or hasn't checked in in a reasonable amount of time. >>> task.delay_if_needed( >>> task_id='33846695b2774b29a71795a009e8168a', >>> parent_task_id='659974858dcf4aa08e73a940e1066328', >>> ) """ kwargs.setdefault("task_id", uuid4().hex) fn_kwargs = dict((k, v) for k, v in kwargs.iteritems() if k not in ("task_id", "parent_task_id")) task, created = get_or_create( Task, where={"task_name": self.task_name, "task_id": kwargs["task_id"]}, defaults={ "parent_id": kwargs.get("parent_task_id"), "data": {"kwargs": fn_kwargs}, "status": Status.queued, }, ) if created or self.needs_requeued(task): if not created: task.date_modified = datetime.utcnow() db.session.add(task) db.session.commit() queue.delay(self.task_name, kwargs=kwargs, countdown=_DEFAULT_COUNTDOWN) if created: self._report_created()
def _expand_job(self, phase, job_config): label = job_config.get('name') or md5(job_config['cmd']).hexdigest() step, created = get_or_create(JobStep, where={ 'job': phase.job, 'project': phase.project, 'phase': phase, 'label': label, }, defaults={ 'data': { 'cmd': job_config['cmd'], 'job_name': self.job_name, 'build_no': None, 'expanded': True, }, 'status': Status.queued, }) # TODO(dcramer): due to no unique constraints this section of code # presents a race condition when run concurrently if not step.data.get('build_no'): builder = self.get_builder() params = builder.get_job_parameters(step.job, changes_bid=step.id.hex, script=step.data['cmd']) success = False exn = None for _ in range(0, 3): try: job_data = builder.create_job_from_params( changes_bid=step.id.hex, params=params, job_name=step.data['job_name'], ) step.data.update(job_data) db.session.add(step) db.session.commit() success = True break except Exception as ex: logging.exception("Failed to create jobstep") exn = ex if not success: step.status = Status.finished step.result = Result.infra_failed db.session.add(step) db.session.commit() if exn: raise exn sync_job_step.delay_if_needed( step_id=step.id.hex, task_id=step.id.hex, parent_task_id=phase.job.id.hex, )
def _sync_artifact_as_log(self, jobstep, job_name, build_no, artifact): job = jobstep.job logsource, created = get_or_create(LogSource, where={ 'name': artifact['displayPath'], 'job': job, 'step': jobstep, }, defaults={ 'project': job.project, 'date_created': job.date_started, }) url = '{base}/job/{job}/{build}/artifact/{artifact}'.format( base=self.base_url, job=job_name, build=build_no, artifact=artifact['relativePath'], ) offset = 0 resp = requests.get(url, stream=True, timeout=15) iterator = resp.iter_content() for chunk in chunked(iterator, LOG_CHUNK_SIZE): chunk_size = len(chunk) chunk, _ = create_or_update(LogChunk, where={ 'source': logsource, 'offset': offset, }, values={ 'job': job, 'project': job.project, 'size': chunk_size, 'text': chunk, }) offset += chunk_size publish_logchunk_update(chunk)
def expand_jobs(self, step, phase_config): """ Creates and runs JobSteps for a set of tests, based on a phase config. This phase config comes from a tests.json file that the collection jobstep should generate. This method is then called by the TestsJsonHandler. """ if not phase_config.get("cmd"): raise ArtifactParseError("No cmd attribute") if "{test_names}" not in phase_config["cmd"]: raise ArtifactParseError("No {test_names} in cmd") if "tests" not in phase_config: raise ArtifactParseError("No tests attribute") num_tests = len(phase_config["tests"]) test_stats, avg_test_time = TestsExpander.get_test_stats(self.get_test_stats_from() or step.project.slug) phase, _ = get_or_create( JobPhase, where={"job": step.job, "project": step.project, "label": phase_config.get("phase") or "Test"}, defaults={"status": Status.queued}, ) db.session.commit() # If there are no tests to run, the phase is done. if num_tests == 0: phase.status = Status.finished phase.result = Result.passed db.session.add(phase) db.session.commit() return # Check for whether a previous run of this task has already # created JobSteps for us, since doing it again would create a # double-sharded build. steps = JobStep.query.filter_by(phase_id=phase.id, replacement_id=None).all() if steps: step_shard_counts = [s.data.get("shard_count", 1) for s in steps] if len(set(step_shard_counts)) != 1: raise Exception("Mixed shard counts in phase!") elif len(steps) != step_shard_counts[0]: raise Exception("Shard count incorrect") else: # Create all of the job steps and commit them together. groups = TestsExpander.shard_tests(phase_config["tests"], self.max_shards, test_stats, avg_test_time) steps = [ self._create_jobstep( phase, phase_config["cmd"], phase_config.get("path", ""), weight, test_list, len(groups) ) for weight, test_list in groups ] if len(steps) != len(groups): raise Exception("Didn't create correct number of shards") db.session.commit() # Now that that database transaction is done, we'll do the slow work of # creating jenkins builds. for step in steps: self._create_jenkins_build(step) sync_job_step.delay_if_needed(step_id=step.id.hex, task_id=step.id.hex, parent_task_id=phase.job.id.hex)
def get(self): state = "" if 'orig_url' in request.args: # we'll later redirect the user back the page they were on after # logging in state = base64.urlsafe_b64encode(request.args['orig_url'].encode('utf-8')) # If we're in the PP world, try to log in based on that header. if current_app.config.get('PP_AUTH', False): email = request.headers.get('X-PP-USER') if email is None: return current_app.make_response(('Expected PP auth!', 401, {})) # All we need to do is make sure that the user exists in the # database for get_current_user() to find. user, _ = get_or_create(User, where={ 'email': email, }) return get_orig_url_redirect(state) # if refresh token available, log in without prompt if 'refresh_token' in request.cookies and 'refresh_email' in request.cookies: resp = auth_with_refresh_token(request.cookies) email = Fernet(current_app.config['COOKIE_ENCRYPTION_KEY']).decrypt(str(request.cookies['refresh_email'])) set_session_state(access_token=resp['access_token'], email=email) return get_orig_url_redirect(state) redirect_uri = url_for(self.authorized_url, _external=True) flow = get_auth_flow(redirect_uri=redirect_uri, state=state) auth_uri = flow.step1_get_authorize_url() return redirect(auth_uri)
def _create_job_step(self, phase, data, force_create=False, **defaults): """ Gets or creates the primary JobStep for a Jenkins Job. Args: phase (JobPhase): JobPhase the JobStep should be part of. data (dict): JSON-serializable data associated with the Jenkins build. force_create (bool): Force this JobStep to be created (rather than retrieved). This is used when replacing a JobStep to make sure we don't just get the old one. Returns: JobStep: The JobStep that was retrieved or created. """ defaults['data'] = data # TODO(kylec): Get rid of the kwargs. if not defaults.get('label'): # we update this once we have the build_no for this jobstep defaults['label'] = '<Creating Jenkins build>' where = { 'job': phase.job, 'project': phase.project, 'phase': phase, } if force_create: # uuid is unique which forces jobstep to be created where['id'] = uuid.uuid4() step, created = get_or_create(JobStep, where=where, defaults=defaults) assert created or not force_create BuildStep.handle_debug_infra_failures(step, self.debug_config, 'primary') return step
def create_build(project, label, target, message, author, change=None, patch=None, cause=None, source=None, sha=None, source_data=None): assert sha or source repository = project.repository if source is None: source, _ = get_or_create(Source, where={ 'repository': repository, 'patch': patch, 'revision_sha': sha, 'data': source_data or {}, }) build = Build( project=project, project_id=project.id, source=source, source_id=source.id if source else None, status=Status.queued, author=author, author_id=author.id if author else None, label=label, target=target, message=message, cause=cause, ) db.session.add(build) db.session.commit() execute_build(build=build) return build
def sync_step(self, step): if step.data.get('job_name') != self.job_name: return super(JenkinsFactoryBuilder, self).sync_step(step) # for any downstream jobs, pull their results using xpath magic for downstream_job_name in self.downstream_job_names: downstream_build_nos = self._get_downstream_jobs(step, downstream_job_name) if not downstream_build_nos: continue phase, created = get_or_create(JobPhase, where={ 'job': step.job, 'label': downstream_job_name, }, defaults={ 'project_id': step.job.project_id, }) db.session.commit() for build_no in downstream_build_nos: # XXX(dcramer): ideally we would grab this with the first query # but because we dont want to rely on an XML parser, we're doing # a second http request for build details downstream_step = self._create_job_step( phase, downstream_job_name, build_no) db.session.commit() sync_job_step.delay_if_needed( step_id=downstream_step.id.hex, task_id=downstream_step.id.hex, parent_task_id=step.job.id.hex, ) return super(JenkinsFactoryBuilder, self).sync_step(step)
def delay(self, **kwargs): """ Enqueue this task. >>> task.delay( >>> task_id='33846695b2774b29a71795a009e8168a', >>> parent_task_id='659974858dcf4aa08e73a940e1066328', >>> ) """ kwargs.setdefault("task_id", uuid4().hex) fn_kwargs = dict((k, v) for k, v in kwargs.iteritems() if k not in ("task_id", "parent_task_id")) task, created = get_or_create( Task, where={"task_name": self.task_name, "task_id": kwargs["task_id"]}, defaults={ "parent_id": kwargs.get("parent_task_id"), "data": {"kwargs": fn_kwargs}, "status": Status.queued, }, ) if not created: task.date_modified = datetime.utcnow() db.session.add(task) db.session.commit() queue.delay(self.task_name, kwargs=kwargs, countdown=CONTINUE_COUNTDOWN)
def execute(self, job): job.status = Status.queued db.session.add(job) phase, created = get_or_create(JobPhase, where={ 'job': job, 'label': job.label, }, defaults={ 'status': Status.queued, 'project': job.project, }) step, created = get_or_create(JobStep, where={ 'phase': phase, 'label': job.label, }, defaults={ 'status': Status.pending_allocation, 'job': phase.job, 'project': phase.project, }) for index, command in enumerate(self.iter_all_commands(job)): command_model, created = get_or_create( CommandModel, where={ 'jobstep': step, 'order': index, }, defaults={ 'label': command.script.splitlines()[0][:128], 'status': Status.queued, 'script': command.script, 'env': command.env, 'cwd': command.path, 'artifacts': command.artifacts, }) db.session.commit() sync_job_step.delay( step_id=step.id.hex, task_id=step.id.hex, parent_task_id=job.id.hex, )
def create_job(self, job, replaces=None): """ Creates a job within Jenkins. Due to the way the API works, this consists of two steps: - Submitting the job - Polling for the newly created job to associate either a queue ID or a finalized build number. """ phase, created = get_or_create(JobPhase, where={ 'job': job, 'label': self.get_default_job_phase_label(job, self.job_name), 'project': job.project, }, defaults={ 'status': job.status, }) assert not created or not replaces step = self._create_job_step( phase=phase, data={'job_name': self.job_name}, status=job.status, force_create=bool(replaces) ) if replaces: replaces.replacement_id = step.id db.session.add(replaces) db.session.commit() # now create the jenkins build # we don't commit immediately because we also want to update the job # and jobstep using the job_data we get from jenkins job_data = self.create_jenkins_build(step, commit=False) if job_data['queued']: job.status = Status.queued else: job.status = Status.in_progress db.session.add(job) assert 'master' in step.data assert 'job_name' in step.data assert 'build_no' in step.data or 'item_id' in step.data # now we have the build_no/item_id and can set the full jobstep label step.label = '{0} #{1}'.format(step.data['job_name'], step.data['build_no'] or step.data['item_id']) db.session.add(step) db.session.commit() sync_job_step.delay( step_id=step.id.hex, task_id=step.id.hex, parent_task_id=job.id.hex, ) return step
def create_job(self, job): """ Creates a job within Jenkins. Due to the way the API works, this consists of two steps: - Submitting the job - Polling for the newly created job to associate either a queue ID or a finalized build number. """ # We want to use the JobStep ID for the CHANGES_BID so that JobSteps can be easily # associated with Jenkins builds, but the JobStep probably doesn't exist yet and # requires information about the Jenkins build to be created. # So, we generate the JobStep ID before we create the build on Jenkins, and we deterministically derive # it from the Job ID to be sure that we don't create new builds/JobSteps when this # method is retried. # TODO(kylec): The process described above seems too complicated. Try to fix that so we # can delete the comment. jobstep_id = uuid.uuid5(JOB_NAMESPACE_UUID, job.id.hex) params = self.get_job_parameters(job, changes_bid=jobstep_id.hex) is_diff = not job.source.is_commit() job_data = self.create_job_from_params( changes_bid=jobstep_id.hex, params=params, is_diff=is_diff ) if job_data['queued']: job.status = Status.queued else: job.status = Status.in_progress db.session.add(job) phase, created = get_or_create(JobPhase, where={ 'job': job, 'label': self.get_default_job_phase_label(job, job_data), 'project': job.project, }, defaults={ 'status': job.status, }) if not created: return # TODO(dcramer): due to no unique constraints this section of code # presents a race condition when run concurrently step = self._create_job_step( id=jobstep_id, phase=phase, status=job.status, data=job_data, ) db.session.commit() sync_job_step.delay( step_id=step.id.hex, task_id=step.id.hex, parent_task_id=job.id.hex, )
def __call__(self, value): parsed = self.parse(value) if not parsed: raise ValueError(value) name, email = parsed author, _ = get_or_create(Author, where={"email": email}, defaults={"name": name}) return author
def loop(): repository = mock.repository() project = mock.project(repository) plan = mock.plan(project) get_or_create(Snapshot, where={ 'project': project, 'status': SnapshotStatus.active, }) get_or_create(Snapshot, where={ 'project': project, 'status': SnapshotStatus.pending, }) print('Looping indefinitely, creating data for', project.slug) while True: build = gen(project) print ' Pushed build {0} on {1}'.format(build.id, project.slug) time.sleep(0.1)
def _sync_log(self, jobstep, name, job_name, build_no): job = jobstep.job logsource, created = get_or_create(LogSource, where={ 'name': name, 'step': jobstep, }, defaults={ 'job': job, 'project': jobstep.project, 'date_created': jobstep.date_started, }) if created: offset = 0 else: offset = jobstep.data.get('log_offset', 0) url = '{base}/job/{job}/{build}/logText/progressiveText/'.format( base=jobstep.data['master'], job=job_name, build=build_no, ) session = self.http_session with closing(session.get(url, params={'start': offset}, stream=True, timeout=15)) as resp: log_length = int(resp.headers['X-Text-Size']) # When you request an offset that doesnt exist in the build log, Jenkins # will instead return the entire log. Jenkins also seems to provide us # with X-Text-Size which indicates the total size of the log if offset > log_length: return # XXX: requests doesnt seem to guarantee chunk_size, so we force it # with our own helper iterator = resp.iter_content() for chunk in chunked(iterator, LOG_CHUNK_SIZE): chunk_size = len(chunk) chunk, _ = create_or_update(LogChunk, where={ 'source': logsource, 'offset': offset, }, values={ 'job': job, 'project': job.project, 'size': chunk_size, 'text': chunk, }) offset += chunk_size # Jenkins will suggest to us that there is more data when the job has # yet to complete has_more = resp.headers.get('X-More-Data') == 'true' # We **must** track the log offset externally as Jenkins embeds encoded # links and we cant accurately predict the next `start` param. jobstep.data['log_offset'] = log_length db.session.add(jobstep) return True if has_more else None
def _expand_jobs(self, step, artifact): builder = self.get_builder() artifact_data = builder.fetch_artifact(step, artifact.data) phase_config = artifact_data.json() assert phase_config['cmd'] assert '{test_names}' in phase_config['cmd'] assert phase_config['tests'] test_stats, avg_test_time = self.get_test_stats(step.project) def get_test_duration(test_name): segments = self._normalize_test_segments(test_name) result = test_stats.get(segments) if result is None: if test_stats: self.logger.info('No existing duration found for test %r', test_name) result = avg_test_time return result group_tests = [[] for _ in range(self.max_shards)] group_weights = [0 for _ in range(self.max_shards)] weights = [0] * self.max_shards weighted_tests = [(get_test_duration(t), t) for t in phase_config['tests']] for weight, test in sorted(weighted_tests, reverse=True): low_index, _ = min(enumerate(weights), key=itemgetter(1)) weights[low_index] += 1 + weight group_tests[low_index].append(test) group_weights[low_index] += 1 + weight phase, created = get_or_create(JobPhase, where={ 'job': step.job, 'project': step.project, 'label': phase_config.get('phase') or 'Test', }, defaults={ 'status': Status.queued, }) db.session.commit() assert len(group_tests) == self.max_shards assert len(group_weights) == self.max_shards for test_list, weight in itertools.izip(group_tests, group_weights): self._expand_job( phase, { 'tests': test_list, 'cmd': phase_config['cmd'], 'path': phase_config.get('path', ''), 'weight': weight, })
def _sync_log(self, jobstep, name, job_name, build_no): job = jobstep.job # TODO(dcramer): this doesnt handle concurrency logsource, created = get_or_create(LogSource, where={ 'name': name, 'job': job, }, defaults={ 'step': jobstep, 'project': jobstep.project, 'date_created': jobstep.date_started, }) if created: offset = 0 else: offset = jobstep.data.get('log_offset', 0) url = '{base}/job/{job}/{build}/logText/progressiveHtml/'.format( base=self.base_url, job=job_name, build=build_no, ) resp = requests.get( url, params={'start': offset}, stream=True, timeout=15) log_length = int(resp.headers['X-Text-Size']) # When you request an offset that doesnt exist in the build log, Jenkins # will instead return the entire log. Jenkins also seems to provide us # with X-Text-Size which indicates the total size of the log if offset > log_length: return iterator = resp.iter_content() # XXX: requests doesnt seem to guarantee chunk_size, so we force it # with our own helper for chunk in chunked(iterator, LOG_CHUNK_SIZE): chunk_size = len(chunk) chunk, _ = create_or_update(LogChunk, where={ 'source': logsource, 'offset': offset, }, values={ 'job': job, 'project': job.project, 'size': chunk_size, 'text': chunk, }) offset += chunk_size publish_logchunk_update(chunk) # We **must** track the log offset externally as Jenkins embeds encoded # links and we cant accurately predict the next `start` param. jobstep.data['log_offset'] = log_length db.session.add(jobstep) # Jenkins will suggest to us that there is more data when the job has # yet to complete return True if resp.headers.get('X-More-Data') == 'true' else None
def test_suite(job, name='default'): suite, _ = get_or_create(TestSuite, where={ 'job': job, 'name': name, }, defaults={ 'project': job.project, }) return suite
def _expand_jobs(self, step, artifact): builder = self.get_builder() artifact_data = builder.fetch_artifact(step, artifact.data) phase_config = artifact_data.json() assert phase_config['cmd'] assert '{test_names}' in phase_config['cmd'] assert phase_config['tests'] test_stats, avg_test_time = self.get_test_stats(step.project) phase, created = get_or_create(JobPhase, where={ 'job': step.job, 'project': step.project, 'label': phase_config.get('phase') or 'Test', }, defaults={ 'status': Status.queued, }) db.session.commit() # Check for whether a previous run of this task has already # created JobSteps for us, since doing it again would create a # double-sharded build. steps = JobStep.query.filter_by(phase_id=phase.id).all() if steps: step_shard_counts = [s.data.get('shard_count', 1) for s in steps] assert len( set(step_shard_counts)) == 1, "Mixed shard counts in phase!" assert len(steps) == step_shard_counts[0] else: # Create all of the job steps and commit them together. groups = self._shard_tests(phase_config['tests'], self.max_shards, test_stats, avg_test_time) steps = [ self._create_jobstep(phase, phase_config, weight, test_list, len(groups)) for weight, test_list in groups ] assert len(steps) == len(groups) db.session.commit() # Now that that database transaction is done, we'll do the slow work of # creating jenkins builds. for step in steps: self._create_jenkins_build(phase, step) sync_job_step.delay_if_needed( step_id=step.id.hex, task_id=step.id.hex, parent_task_id=phase.job.id.hex, )
def _handle_generic_artifact(self, jobstep, artifact): artifact, created = get_or_create(Artifact, where={ 'step': jobstep, 'name': self._get_artifact_path(artifact), }, defaults={ 'project': jobstep.project, 'job': jobstep.job, 'data': artifact, }) if not created: db.session.commit()
def simulate_local_repository(): # Identify if we're in a git or hg repo backend = identify_local_vcs() # Simulate the repository in a new project repository = mock.repository( backend=backend, status=RepositoryStatus.active, url=os.getcwd(), ) print 'Created repository for {0} repository in {1} ({2})'.format( repository.backend, repository.url, repository.id) project = mock.project(repository) plan = mock.plan(project) get_or_create(Snapshot, where={ 'project': project, 'status': SnapshotStatus.active, }) get_or_create(Snapshot, where={ 'project': project, 'status': SnapshotStatus.pending, }) # make sure we clone the repo vcs = repository.get_vcs() if vcs.exists(): vcs.update() else: vcs.clone() # Create some build data based off commits in the local repository for lazy_revision in vcs.log(limit=10): revision, created, source = lazy_revision.save(repository) print ' Created revision {0} in {1}'.format(revision.sha, revision.branches) build = add(project, revision, source) print ' Inserted build {0} into {1}'.format(build.id, project.slug) return repository
def __call__(self, value): parsed = self.parse(value) if not parsed: raise ValueError(value) name, email = parsed author, _ = get_or_create(Author, where={ 'email': email, }, defaults={ 'name': name, }) return author
def _expand_job(self, phase, job_config): assert job_config['tests'] test_names = ' '.join(job_config['tests']) label = md5(test_names).hexdigest() step, created = get_or_create(JobStep, where={ 'job': phase.job, 'project': phase.project, 'phase': phase, 'label': label, }, defaults={ 'data': { 'cmd': job_config['cmd'], 'path': job_config['path'], 'tests': job_config['tests'], 'expanded': True, 'job_name': self.job_name, 'build_no': None, 'weight': job_config['weight'] }, 'status': Status.queued, }) # TODO(dcramer): due to no unique constraints this section of code # presents a race condition when run concurrently if not step.data.get('build_no'): builder = self.get_builder() params = builder.get_job_parameters( step.job, script=step.data['cmd'].format(test_names=test_names, ), target_id=step.id.hex, path=step.data['path'], ) is_diff = not step.job.source.is_commit() job_data = builder.create_job_from_params( target_id=step.id.hex, params=params, job_name=step.data['job_name'], is_diff=is_diff) step.data.update(job_data) db.session.add(step) db.session.commit() sync_job_step.delay_if_needed( step_id=step.id.hex, task_id=step.id.hex, parent_task_id=phase.job.id.hex, )
def create_job(self, job): """ Creates a job within Jenkins. Due to the way the API works, this consists of two steps: - Submitting the job - Polling for the newly created job to associate either a queue ID or a finalized build number. """ params = self.get_job_parameters(job) job_data = self.create_job_from_params( target_id=job.id.hex, params=params, ) if job_data['queued']: job.status = Status.queued else: job.status = Status.in_progress db.session.add(job) phase, created = get_or_create(JobPhase, where={ 'job': job, 'label': self.get_default_job_phase_label( job, job_data), 'project': job.project, }, defaults={ 'status': job.status, }) if not created: return # TODO(dcramer): due to no unique constraints this section of code # presents a race condition when run concurrently step = self._create_job_step( phase=phase, status=job.status, data=job_data, ) db.session.commit() sync_job_step.delay( step_id=step.id.hex, task_id=step.id.hex, parent_task_id=job.id.hex, )
def execute(self, job): job.status = Status.pending_allocation db.session.add(job) phase, _ = get_or_create(JobPhase, where={ 'job': job, 'label': job.label, }, defaults={ 'status': Status.pending_allocation, 'project': job.project, }) self._setup_jobstep(phase, job)
def _process_test_report(self, step, test_report): test_list = [] if not test_report: return test_list for suite_data in test_report['suites']: suite_name = suite_data.get('name', 'default') # TODO(dcramer): this is not specific to Jenkins and should be # abstracted suite, _ = get_or_create(TestSuite, where={ 'job': step.job, 'name_sha': sha1(suite_name).hexdigest(), }, defaults={ 'name': suite_name, 'project': step.job.project, }) for case in suite_data['cases']: message = [] if case['errorDetails']: message.append('Error\n-----') message.append(case['errorDetails'] + '\n') if case['errorStackTrace']: message.append('Stacktrace\n----------') message.append(case['errorStackTrace'] + '\n') if case['skippedMessage']: message.append(case['skippedMessage'] + '\n') if case['status'] in ('PASSED', 'FIXED'): result = Result.passed elif case['status'] in ('FAILED', 'REGRESSION'): result = Result.failed elif case['status'] == 'SKIPPED': result = Result.skipped else: raise ValueError('Invalid test result: %s' % (case['status'],)) test_result = TestResult( step=step, suite=suite, name=case['name'], package=case['className'] or None, duration=int(case['duration'] * 1000), message='\n'.join(message).strip(), result=result, ) test_list.append(test_result) return test_list
def post(self, step_id): jobstep = JobStep.query.options(joinedload( 'project', innerjoin=True), ).get(step_id) if jobstep is None: return '', 404 args = self.post_parser.parse_args() current_datetime = args.date or datetime.utcnow() if args.result: jobstep.result = Result[args.result] if args.status: jobstep.status = Status[args.status] # if we've finished this job, lets ensure we have set date_finished if jobstep.status == Status.finished and jobstep.date_finished is None: jobstep.date_finished = current_datetime elif jobstep.status != Status.finished and jobstep.date_finished: jobstep.date_finished = None if jobstep.status != Status.queued and jobstep.date_started is None: jobstep.date_started = current_datetime elif jobstep.status == Status.queued and jobstep.date_started: jobstep.date_started = None if args.node: node, _ = get_or_create(Node, where={ 'label': args.node, }) jobstep.node_id = node.id db.session.add(jobstep) if db.session.is_modified(jobstep): db.session.commit() # TODO(dcramer): this is a little bit hacky, but until we can entirely # move to push APIs we need a good way to handle the existing sync job = jobstep.job sync_job.delay_if_needed( task_id=job.id.hex, parent_task_id=job.id.hex, job_id=job.build_id.hex, ) return self.respond(jobstep)
def _create_jobstep(self, phase, phase_config, weight, test_list, shard_count=1): """ Create a JobStep in the database for a single shard. This creates the JobStep, but does not commit the transaction. Args: phase (JobPhase): The phase this step will be part of. phase_config (dict): Configuration data gathered the collection step. weight (int): The weight of this shard. test_list (list): The list of tests names for this shard. shard_count (int): The total number of shards in this JobStep's phase. Returns: JobStep: the (possibly-newly-created) JobStep. """ test_names = ' '.join(test_list) label = md5(test_names).hexdigest() step, created = get_or_create(JobStep, where={ 'job': phase.job, 'project': phase.project, 'phase': phase, 'label': label, }, defaults={ 'data': { 'cmd': phase_config['cmd'], 'path': phase_config.get('path', ''), 'tests': test_list, 'expanded': True, 'shard_count': shard_count, 'job_name': self.job_name, 'build_no': None, 'weight': weight }, 'status': Status.queued, }) db.session.add(step) return step
def sync_step(self, step): if step.data.get('job_name') != self.job_name: return super(JenkinsFactoryBuilder, self).sync_step(step) job = step.job # for any downstream jobs, pull their results using xpath magic for downstream_job_name in self.downstream_job_names: downstream_build_nos = self._get_downstream_jobs( step, downstream_job_name) if not downstream_build_nos: continue phase, created = get_or_create(JobPhase, where={ 'job': job, 'label': downstream_job_name, }, defaults={ 'project_id': job.project_id, }) db.session.commit() for build_no in downstream_build_nos: # XXX(dcramer): ideally we would grab this with the first query # but because we dont want to rely on an XML parser, we're doing # a second http request for build details downstream_step = self._create_job_step( phase, data={ 'job_name': downstream_job_name, 'build_no': build_no, 'queued': False, 'master': step.data['master'] }) db.session.commit() sync_job_step.delay_if_needed( step_id=downstream_step.id.hex, task_id=downstream_step.id.hex, parent_task_id=job.id.hex, ) return super(JenkinsFactoryBuilder, self).sync_step(step)
def _get_author(self, value): match = re.match(r'^(.+) <([^>]+)>$', value) if not match: if '@' in value: name, email = value, value else: name, email = value, '{0}@localhost'.format(value) else: name, email = match.group(1), match.group(2) author, _ = get_or_create(Author, where={ 'email': email, }, defaults={ 'name': name, }) return author