def get_pipeline_schedules(): """ Endpoint for getting the pipeline schedules """ project = Project.find() schedule_service = ScheduleService(project) schedules = list(map(dict, schedule_service.schedules())) for schedule in schedules: finder = JobFinder(schedule["name"]) state_job = finder.latest(db.session) schedule["has_error"] = state_job.has_error() if state_job else False schedule["is_running"] = state_job.is_running() if state_job else False schedule["job_id"] = schedule["name"] schedule["started_at"] = state_job.started_at if state_job else None schedule["ended_at"] = state_job.ended_at if state_job else None schedule["trigger"] = state_job.trigger if state_job else None state_job_success = finder.latest_success(db.session) schedule["has_ever_succeeded"] = (state_job_success.is_success() if state_job_success else None) schedule["start_date"] = (schedule["start_date"].date().isoformat() if schedule["start_date"] else None) return jsonify(schedules)
def job_state() -> Response: """ Endpoint for getting the status of N jobs """ project = Project.find() poll_payload = request.get_json() job_ids = poll_payload["job_ids"] jobs = [] for job_id in job_ids: finder = JobFinder(job_id) state_job = finder.latest(db.session) # Validate existence first as a job may not be queued yet as a result of # another prerequisite async process (dbt installation for example) if state_job: state_job_success = finder.latest_success(db.session) jobs.append({ "job_id": job_id, "is_complete": state_job.is_complete(), "has_error": state_job.has_error(), "started_at": state_job.started_at, "ended_at": state_job.ended_at, "has_ever_succeeded": state_job_success.is_success() if state_job_success else None, }) return jsonify({"jobs": jobs})
def job_log(job_id) -> Response: """ Endpoint for getting the most recent log generated by a job with job_id """ project = Project.find() try: log_service = JobLoggingService(project) log = log_service.get_latest_log(job_id) has_log_exceeded_max_size = False except SizeThresholdJobLogException as err: log = None has_log_exceeded_max_size = True finder = JobFinder(job_id) state_job = finder.latest(db.session) state_job_success = finder.latest_success(db.session) return jsonify({ "job_id": job_id, "log": log, "has_log_exceeded_max_size": has_log_exceeded_max_size, "has_error": state_job.has_error() if state_job else False, "started_at": state_job.started_at if state_job else None, "ended_at": state_job.ended_at if state_job else None, "trigger": state_job.trigger if state_job else None, "has_ever_succeeded": state_job_success.is_success() if state_job_success else None, })
def get_pipeline_schedules(): """ endpoint for getting the pipeline schedules """ project = Project.find() schedule_service = ScheduleService(project) schedules = [s._asdict() for s in schedule_service.schedules()] for schedule in schedules: finder = JobFinder(schedule["name"]) state_job = finder.latest(db.session) schedule["has_error"] = state_job.has_error() if state_job else False schedule["is_running"] = state_job.is_running() if state_job else False schedule["job_id"] = state_job.job_id if state_job else None return jsonify(schedules)
def job_log(job_id) -> Response: """ Endpoint for getting the most recent log generated by a job with job_id """ project = Project.find() log_service = JobLoggingService(project) log = log_service.get_latest_log(job_id) finder = JobFinder(job_id) state_job = finder.latest(db.session) return jsonify({ "job_id": job_id, "log": log, "has_error": state_job.has_error() if state_job else False, })
def restore_bookmark(self, session, tap: PluginInvoker): if self.context.job is None: logging.info( f"Running outside a Job context: incremental state could not be loaded." ) return # the `state.json` is stored in the database finder = JobFinder(self.context.job.job_id) state_job = finder.latest_with_payload(session, flags=SingerPayload.STATE) if state_job and "singer_state" in state_job.payload: logging.info(f"Found state from {state_job.started_at}.") with tap.files["state"].open("w+") as state: json.dump(state_job.payload["singer_state"], state) else: logging.warning("No state was found, complete import.")
async def _run_job(project, job, session, context_builder, force=False): StaleJobFailer(job.job_id).fail_stale_jobs(session) if not force: existing = JobFinder(job.job_id).latest_running(session) if existing: raise CliError( f"Another '{job.job_id}' pipeline is already running which started at {existing.started_at}. " + "To ignore this check use the '--force' option.") async with job.run(session): job_logging_service = JobLoggingService(project) with job_logging_service.create_log(job.job_id, job.run_id) as log_file: output_logger = OutputLogger(log_file) await _run_elt(project, context_builder, output_logger)
def last_successful_run(self, session): return JobFinder(self.name).latest_success(session)
def look_up_state(self, plugin_invoker, exec_args=[]): if "state" not in plugin_invoker.capabilities: raise PluginLacksCapabilityError( f"Extractor '{self.name}' does not support incremental state") state_path = plugin_invoker.files["state"] try: # Delete state left over from different pipeline run for same extractor state_path.unlink() except FileNotFoundError: pass elt_context = plugin_invoker.context if not elt_context or not elt_context.job: # Running outside pipeline context: incremental state could not be loaded return if elt_context.full_refresh: logger.info( "Performing full refresh, ignoring state left behind by any previous runs." ) return custom_state_filename = plugin_invoker.plugin_config_extras["_state"] if custom_state_filename: custom_state_path = plugin_invoker.project.root.joinpath( custom_state_filename) try: shutil.copy(custom_state_path, state_path) logger.info(f"Found state in {custom_state_filename}") except FileNotFoundError as err: raise PluginExecutionError( f"Could not find state file {custom_state_path}") from err return # the `state.json` is stored in the database state = {} incomplete_since = None finder = JobFinder(elt_context.job.job_id) state_job = finder.latest_with_payload(elt_context.session, flags=Payload.STATE) if state_job: logger.info(f"Found state from {state_job.started_at}.") incomplete_since = state_job.ended_at if "singer_state" in state_job.payload: merge(state_job.payload["singer_state"], state) incomplete_state_jobs = finder.with_payload( elt_context.session, flags=Payload.INCOMPLETE_STATE, since=incomplete_since) for state_job in incomplete_state_jobs: logger.info( f"Found and merged incomplete state from {state_job.started_at}." ) if "singer_state" in state_job.payload: merge(state_job.payload["singer_state"], state) if state: with state_path.open("w") as state_file: json.dump(state, state_file, indent=2) else: logger.warning("No state was found, complete import.")
"retry_delay": timedelta(minutes=5), "concurrency": 1, } engine_uri = f"sqlite:///{os.getcwd()}/.meltano/meltano.db" engine, Session = project_engine(project, engine_uri, default=True) session = Session() for schedule in schedule_service.schedules(): if schedule.interval == "@once": logging.info( f"No DAG created for schedule '{schedule.name}' because its interval is set to `@once`." ) continue finder = JobFinder(schedule.name) last_successful_run = finder.latest_success(session) if not last_successful_run: logging.info( f"No DAG created for schedule '{schedule.name}' because it hasn't had a successful (manual) run yet." ) continue args = default_args.copy() if schedule.start_date: args["start_date"] = coerce_datetime(schedule.start_date) dag_id = f"meltano_{schedule.name}" # from https://airflow.apache.org/docs/stable/scheduler.html#backfill-and-catchup #