def create_job(self): from fastlane.models.job import Job job_id = ObjectId() j = Job(id=job_id, job_id=str(job_id)) j.task = self j.save() self.jobs.append(j) self.save() return j
def create_job(self): from fastlane.models.job import Job job_id = uuid4() j = Job(task_id=str(self.task_id), job_id=str(job_id)) j.task = self j.save() self.jobs.append(j) self.save() return j
def test_job_get_by_job_id(client): """Test getting a job by id""" task_id = str(uuid4()) t = Task.create_task(task_id) j = t.create_job() topic = Job.get_by_id(task_id, j.job_id) expect(topic).not_to_be_null() expect(topic.job_id).to_equal(str(j.id)) topic = Job.get_by_id("invalid", "invalid") expect(topic).to_be_null()
def create_or_update_job(self, job_id): from fastlane.models.job import Job jobs = list(filter(lambda job: str(job.job_id) == job_id, self.jobs)) if not jobs: j = Job(task_id=str(self.task_id), job_id=str(job_id)) j.task = self j.save() self.jobs.append(j) self.save() else: j = jobs[0] return j
def stop_job(task_id, job_id): logger = g.logger.bind(operation="stop", task_id=task_id, job_id=job_id) logger.debug("Getting job...") job = Job.get_by_id(task_id=task_id, job_id=job_id) if job is None: logger.error("Job not found in task.") abort(404) return execution = job.get_last_execution() if execution is not None and execution.status == JobExecution.Status.running: logger.debug("Stopping current execution...") executor = current_app.load_executor() executor.stop_job(job.task, job, execution) logger.debug("Current execution stopped.") scheduler = Scheduler("jobs", connection=current_app.redis) if "enqueued_id" in job.metadata and job.metadata[ "enqueued_id"] in scheduler: scheduler.cancel(job.metadata["enqueued_id"]) job.scheduled = False job.save() logger.debug("Job stopped.") return get_job_summary(task_id, job_id)
def get_response(task_id, job_id, get_data_fn): logger = g.logger.bind(operation="get_response", task_id=task_id, job_id=job_id) logger.debug("Getting job...") job = Job.get_by_id(task_id=task_id, job_id=job_id) if job is None: logger.error("Job not found in task.") abort(404) return if not job.executions: logger.error("No executions found in job.") abort(400) return execution = job.get_last_execution() headers = {"Fastlane-Exit-Code": str(execution.exit_code)} return Response(headers=headers, response=get_data_fn(execution), status=200)
def ws(ws, task_id, job_id): executor = current_app.executor logger = current_app.logger.bind(task_id=task_id, job_id=job_id) job = Job.get_by_id(task_id=task_id, job_id=job_id) if job is None: logger.error("Job not found in task.") ws.close() return ex = job.get_last_execution() if ex is None: logger.error("No executions found in job.") ws.close(message="wsretry") return p = Process(target=stream_log, args=(executor, task_id, job, ex, ws)) p.start() while not ws.closed: time.sleep(10) p.terminate()
def test_enqueue7(client): """Test enqueue with metadata""" app = client.application app.redis.flushall() task_id = str(uuid4()) data = { "image": "ubuntu", "command": "ls", "metadata": { "a": 123, "b": 456 } } options = dict( data=dumps(data), headers={"Content-Type": "application/json"}, follow_redirects=True, ) rv = client.post(f"/tasks/{task_id}", **options) expect(rv.status_code).to_equal(200) obj = loads(rv.data) job_id = obj["jobId"] expect(job_id).not_to_be_null() expect(obj["queueJobId"]).not_to_be_null() j = Job.get_by_id(task_id, job_id) expect(j.metadata).to_include("custom") metadata = j.metadata["custom"] expect(metadata).to_be_like(data["metadata"])
def status(): executor = current_app.load_executor() status = {"hosts": [], "containers": {"running": []}} containers = executor.get_running_containers() for host, port, container_id in containers["running"]: status["containers"]["running"].append({ "host": host, "port": port, "id": container_id }) status["hosts"] = containers["available"] status["queues"] = {"jobs": {}, "monitor": {}, "error": {}} for queue in ["jobs", "monitor", "error"]: jobs_queue_size = current_app.redis.llen(f"rq:queue:{queue}") status["queues"][queue]["length"] = jobs_queue_size status["tasks"] = {"count": Task.objects.count()} status["jobs"] = {"count": Job.objects.count()} status["jobs"]["scheduled"] = [] scheduled_jobs = Job.objects(scheduled=True).all() for job in scheduled_jobs: j = job.to_dict(include_executions=False) itr = croniter.croniter(job.metadata["cron"], datetime.utcnow()) j["nextScheduledAt"] = itr.get_next(datetime).isoformat() task_id = job.task.task_id job_url = url_for("task.get_job", task_id=task_id, job_id=str(job.id), _external=True) j["url"] = job_url stop_job_url = url_for("task.stop_job", task_id=task_id, job_id=str(job.id), _external=True) j["stopUrl"] = stop_job_url task_url = url_for("task.get_task", task_id=task_id, _external=True) del j["taskId"] j["task"] = {"id": task_id, "url": task_url} status["scheduled"].append(j) return jsonify(status), 200
def create_job(self, image, command): from fastlane.models.job import Job job_id = uuid4() j = Job(task_id=str(self.task_id), job_id=str(job_id)) j.task = self j.image = image j.command = command j.save() self.jobs.append(j) self.save() return j
def retry_job(task_id, job_id): logger = g.logger.bind(operation="retry", task_id=task_id, job_id=job_id) logger.debug("Getting job...") job = Job.get_by_id(task_id=task_id, job_id=job_id) if job is None: logger.error("Job not found in task.") abort(404) return execution = job.get_last_execution() if execution is None: logger.error("No execution yet to retry.") abort(Response(response="No execution yet to retry.", status=400)) return scheduler = Scheduler("jobs", connection=current_app.redis) if "enqueued_id" in job.metadata and job.metadata[ "enqueued_id"] in scheduler: msg = "Can't retry a scheduled job." logger.error(msg) abort(Response(response=msg, status=400)) return if execution.status == JobExecution.Status.running: logger.debug("Stopping current execution...") executor = current_app.load_executor() executor.stop_job(job.task, job, execution) logger.debug("Current execution stopped.") execution.status = JobExecution.Status.failed job.save() logger.debug("Enqueuing job execution...") args = [task_id, job_id, execution.image, execution.command] result = current_app.job_queue.enqueue(run_job, *args, timeout=-1) job.metadata["enqueued_id"] = result.id job.save() logger.info("Job execution enqueued successfully.") return get_job_summary(task_id, job_id)
def test_enqueue6(client): """Test enqueue with webhooks""" app = client.application app.redis.flushall() task_id = str(uuid4()) data = { "image": "ubuntu", "command": "ls", "webhooks": { "succeeds": [{ "method": "GET", "url": "http://some.test.url" }], "fails": [{ "method": "GET", "url": "http://some.test.url" }], "finishes": [{ "method": "POST", "url": "http://some.test.url" }], }, } options = dict( data=dumps(data), headers={"Content-Type": "application/json"}, follow_redirects=True, ) rv = client.post(f"/tasks/{task_id}", **options) expect(rv.status_code).to_equal(200) obj = loads(rv.data) job_id = obj["jobId"] expect(job_id).not_to_be_null() expect(obj["queueJobId"]).not_to_be_null() j = Job.get_by_id(task_id, job_id) expect(j.metadata).to_include("webhooks") wh = j.metadata["webhooks"] expect(wh).to_be_like(data["webhooks"])
def get_job(task_id, job_id): logger = g.logger.bind(operation="get_job", task_id=task_id, job_id=job_id) logger.debug("Getting job...") job = Job.get_by_id(task_id=task_id, job_id=job_id) if job is None: logger.error("Job not found in task.") abort(404) return logger.debug("Job retrieved successfully...") details = job.to_dict( include_log=True, include_error=True, blacklist=current_app.config["ENV_BLACKLISTED_WORDS"].lower().split( ","), ) task_url = url_for("task.get_task", task_id=task_id, _external=True) return jsonify({"task": {"id": task_id, "url": task_url}, "job": details})
def run_job(task_id, job_id, image, command): app = current_app logger = app.logger.bind(task_id=task_id, job_id=job_id, image=image, command=command) try: executor = app.load_executor() job = Job.get_by_id(task_id, job_id) if job is None: logger.error("Job was not found with task id and job id.") return False if not validate_max_concurrent(executor, task_id, job, image, command, logger): return False tag = "latest" if ":" in image: image, tag = image.split(":") logger = logger.bind(image=image, tag=tag) logger.debug("Changing job status...", status=JobExecution.Status.pulling) ex = job.create_execution(image=image, command=command) ex.status = JobExecution.Status.enqueued job.save() logger.debug("Job status changed successfully.", status=JobExecution.Status.pulling) logger = logger.bind(execution_id=ex.execution_id) except Exception as err: logger.error("Failed to create job execution. Skipping job...", error=err) current_app.report_error( err, metadata=dict(task_id=task_id, job_id=job_id, image=image, command=command), ) return False try: if not validate_expiration(job, ex, logger): return False logger.info("Started processing job.") if not download_image(executor, job, ex, image, tag, command, logger): return False if not run_container(executor, job, ex, image, tag, command, logger): return False logger.debug("Changing job status...", status=JobExecution.Status.running) ex.status = JobExecution.Status.running job.save() logger.debug("Job status changed successfully.", status=JobExecution.Status.running) app.monitor_queue.enqueue(monitor_job, task_id, job_id, ex.execution_id, timeout=-1) return True except Exception as err: logger.error("Failed to run job", error=err) ex.status = JobExecution.Status.failed ex.error = f"Job failed to run with error: {str(err)}" job.save() current_app.report_error( err, metadata=dict( operation="Running Container", task_id=task_id, job_id=job_id, execution_id=ex.execution_id, image=image, tag=tag, command=command, ), )
def monitor_job(task_id, job_id, execution_id): try: app = current_app executor = app.load_executor() job = Job.get_by_id(task_id, job_id) logger = app.logger.bind(task_id=task_id, job_id=job_id) if job is None: logger.error("Failed to retrieve task or job.") return False execution = job.get_execution_by_id(execution_id) result = executor.get_result(job.task, job, execution) logger.info( "Container result obtained.", container_status=result.status, container_exit_code=result.exit_code, ) if result.status in ( ExecutionResult.Status.created, ExecutionResult.Status.running, ): ellapsed = (datetime.utcnow() - execution.started_at).total_seconds() if ellapsed > job.metadata["timeout"]: execution.finished_at = datetime.utcnow() execution.status = JobExecution.Status.timedout execution.error = f"Job execution timed out after {ellapsed} seconds." executor.stop_job(job.task, job, execution) logger.debug( "Job execution timed out. Storing job details in mongo db.", status=execution.status, ellapsed=ellapsed, error=result.error, ) job.save() logger.info("Job execution timed out.", status=execution.status) return False scheduler = Scheduler("monitor", connection=app.redis) logger.info( "Job has not finished. Retrying monitoring in the future.", container_status=result.status, seconds=1, ) interval = timedelta(seconds=5) scheduler.enqueue_in(interval, monitor_job, task_id, job_id, execution_id) return True if (result.exit_code != 0 and "retry_count" in job.metadata and job.metadata["retry_count"] < job.metadata["retries"]): retry_logger = logger.bind( exit_code=result.exit_code, retry_count=job.metadata["retry_count"], retries=job.metadata["retries"], ) retry_logger.debug("Job failed. Enqueuing job retry...") job.metadata["retry_count"] += 1 scheduler = Scheduler("jobs", connection=current_app.redis) args = [task_id, job_id, execution.image, execution.command] factor = app.config["EXPONENTIAL_BACKOFF_FACTOR"] min_backoff = app.config["EXPONENTIAL_BACKOFF_MIN_MS"] / 1000.0 delta = timedelta(seconds=min_backoff) if job.metadata["retries"] > 0: delta = timedelta( seconds=math.pow(factor, job.metadata["retry_count"]) * min_backoff) dt = datetime.utcnow() + delta enqueued = scheduler.enqueue_at(dt, run_job, *args) job.metadata["enqueued_id"] = enqueued.id job.save() retry_logger.info("Job execution enqueued successfully.") # still need to finish current execution as the retry # will be a new execution execution.finished_at = datetime.utcnow() execution.exit_code = result.exit_code execution.status = (JobExecution.Status.done if execution.exit_code == 0 else JobExecution.Status.failed) execution.log = result.log.decode("utf-8") execution.error = result.error.decode("utf-8") logger.debug( "Job finished. Storing job details in mongo db.", status=execution.status, log=result.log, error=result.error, ) job.save() logger.info("Job details stored in mongo db.", status=execution.status) return True except Exception as err: logger.error("Failed to monitor job", error=err) current_app.report_error( err, metadata=dict( operation="Monitoring Job", task_id=task_id, job_id=job_id, execution_id=execution_id, ), ) raise err
def send_email(task_id, job_id, execution_id, subject, to_email): app = current_app job = Job.get_by_id(task_id, job_id) logger = app.logger.bind( operation="send_email", task_id=task_id, job_id=job_id, to_email=to_email, execution_id=execution_id, subject=subject, ) if job is None: logger.error("Failed to retrieve task or job.") return False execution = job.get_execution_by_id(execution_id) logger.info("Execution loaded successfully") smtp_host = app.config["SMTP_HOST"] smtp_port = app.config["SMTP_PORT"] smtp_from = app.config["SMTP_FROM"] if smtp_host is None or smtp_port is None or smtp_from is None: logger.error( "SMTP_HOST, SMTP_PORT and SMTP_FROM must be configured. Skipping sending e-mail." ) return False try: smtp_port = int(smtp_port) logger = logger.bind(smtp_host=smtp_host, smtp_port=smtp_port) logger.info("Connecting to SMTP Server...") server = smtplib.SMTP(smtp_host, smtp_port) server.set_debuglevel(0) if app.config.get("SMTP_USE_SSL"): logger.info("Starting TLS...") server.starttls() smtp_user = app.config.get("SMTP_USER") smtp_password = app.config.get("SMTP_PASSWORD") if smtp_user and smtp_password: logger.info( "Authenticating with SMTP...", smtp_user=smtp_user, smtp_password=smtp_password, ) server.login(smtp_user, smtp_password) from_email = app.config["SMTP_FROM"] task_url = url_for("task.get_task", task_id=task_id, _external=True) job_url = url_for("task.get_job", task_id=task_id, job_id=job_id, _external=True) job_data = json.dumps( execution.to_dict(include_log=True, include_error=True), indent=4, sort_keys=True, ) body = (""" Automatic message. Please do not reply to this! Job Details: %s """ % job_data) subj = "[Fastlane] %s" % subject msg = MIMEMultipart("alternative") msg["Subject"] = subj msg["From"] = from_email msg["To"] = to_email part1 = MIMEText(body, "plain") html_body = """<html><body> <h2>Job Details:</h2> <div><pre><code>%s</code></pre></div> <div>---</div> <p><a href="%s">[View Task Details]</a> | <a href="%s">[View Job Details]</a></p> <div>---</div> <p>Automatic message. Please do not reply to this!</p> </body></html> """ % ( job_data, task_url, job_url, ) part2 = MIMEText(html_body, "html") msg.attach(part1) msg.attach(part2) logger.info("Sending email...") server.sendmail(from_email, to_email, msg.as_string()) server.quit() logger.info("Email sent successfully.") except Exception as exc: error = traceback.format_exc() logger.error("Sending e-mail failed with exception!", error=error) raise exc
def send_webhook(task_id, job_id, execution_id, method, url, headers, retries, retry_count): app = current_app job = Job.get_by_id(task_id, job_id) logger = app.logger.bind( operation="send_webhook", task_id=task_id, job_id=job_id, execution_id=execution_id, method=method, url=url, headers=headers, retries=retries, retry_count=retry_count, ) if job is None: logger.error("Failed to retrieve task or job.") return False execution = job.get_execution_by_id(execution_id) logger.info("Execution loaded successfully") data = execution.to_dict(include_log=True, include_error=True) data = json.loads(json.dumps(data)) if "webhookDispatch" in data["metadata"]: del data["metadata"]["webhookDispatch"] data["metadata"]["custom"] = job.metadata.get("custom", {}) data = json.dumps(data) try: w = WebhooksDispatcher() response = w.dispatch(method, url, data, headers) execution.metadata.setdefault("webhookDispatch", []) execution.metadata["webhookDispatch"].append({ "timestamp": datetime.utcnow().isoformat(), "url": url, "statusCode": response.status_code, "body": response.body, "headers": response.headers, }) job.save() logger.info("Webhook dispatched successfully.") except WebhooksDispatchError as err: error = traceback.format_exc() execution.metadata.setdefault("webhookDispatch", []) execution.metadata["webhookDispatch"].append({ "timestamp": datetime.utcnow().isoformat(), "url": url, "statusCode": err.status_code, "body": err.body, "headers": err.headers, "error": error, }) job.save() logger.error("Failed to dispatch webhook.", err=error) if retry_count < retries: logger.debug("Retrying...") args = [ task_id, job_id, execution_id, method, url, headers, retries, retry_count + 1, ] scheduler = Scheduler("webhooks", connection=current_app.redis) factor = app.config["WEBHOOKS_EXPONENTIAL_BACKOFF_FACTOR"] min_backoff = app.config[ "WEBHOOKS_EXPONENTIAL_BACKOFF_MIN_MS"] / 1000.0 delta = timedelta(seconds=math.pow(factor, retry_count) * min_backoff) scheduler.enqueue_in(delta, send_webhook, *args) logger.info("Webhook dispatch retry scheduled.", date=delta)