Python JobFinder Examples, meltano.core.job.JobFinder Python Examples

Example #1

0

Show file

def get_pipeline_schedules():
    """
    Endpoint for getting the pipeline schedules
    """
    project = Project.find()
    schedule_service = ScheduleService(project)
    schedules = list(map(dict, schedule_service.schedules()))
    for schedule in schedules:
        finder = JobFinder(schedule["name"])
        state_job = finder.latest(db.session)
        schedule["has_error"] = state_job.has_error() if state_job else False
        schedule["is_running"] = state_job.is_running() if state_job else False
        schedule["job_id"] = schedule["name"]
        schedule["started_at"] = state_job.started_at if state_job else None
        schedule["ended_at"] = state_job.ended_at if state_job else None
        schedule["trigger"] = state_job.trigger if state_job else None

        state_job_success = finder.latest_success(db.session)
        schedule["has_ever_succeeded"] = (state_job_success.is_success()
                                          if state_job_success else None)

        schedule["start_date"] = (schedule["start_date"].date().isoformat()
                                  if schedule["start_date"] else None)

    return jsonify(schedules)

Example #2

0

Show file

def job_state() -> Response:
    """
    Endpoint for getting the status of N jobs
    """
    project = Project.find()
    poll_payload = request.get_json()
    job_ids = poll_payload["job_ids"]

    jobs = []
    for job_id in job_ids:
        finder = JobFinder(job_id)
        state_job = finder.latest(db.session)
        # Validate existence first as a job may not be queued yet as a result of
        # another prerequisite async process (dbt installation for example)
        if state_job:
            state_job_success = finder.latest_success(db.session)
            jobs.append({
                "job_id":
                job_id,
                "is_complete":
                state_job.is_complete(),
                "has_error":
                state_job.has_error(),
                "started_at":
                state_job.started_at,
                "ended_at":
                state_job.ended_at,
                "has_ever_succeeded":
                state_job_success.is_success() if state_job_success else None,
            })

    return jsonify({"jobs": jobs})

Example #3

0

Show file

def job_log(job_id) -> Response:
    """
    Endpoint for getting the most recent log generated by a job with job_id
    """
    project = Project.find()
    try:
        log_service = JobLoggingService(project)
        log = log_service.get_latest_log(job_id)
        has_log_exceeded_max_size = False
    except SizeThresholdJobLogException as err:
        log = None
        has_log_exceeded_max_size = True

    finder = JobFinder(job_id)
    state_job = finder.latest(db.session)
    state_job_success = finder.latest_success(db.session)

    return jsonify({
        "job_id":
        job_id,
        "log":
        log,
        "has_log_exceeded_max_size":
        has_log_exceeded_max_size,
        "has_error":
        state_job.has_error() if state_job else False,
        "started_at":
        state_job.started_at if state_job else None,
        "ended_at":
        state_job.ended_at if state_job else None,
        "trigger":
        state_job.trigger if state_job else None,
        "has_ever_succeeded":
        state_job_success.is_success() if state_job_success else None,
    })

Example #4

0

Show file

def get_pipeline_schedules():
    """
    endpoint for getting the pipeline schedules
    """
    project = Project.find()
    schedule_service = ScheduleService(project)
    schedules = [s._asdict() for s in schedule_service.schedules()]
    for schedule in schedules:
        finder = JobFinder(schedule["name"])
        state_job = finder.latest(db.session)
        schedule["has_error"] = state_job.has_error() if state_job else False
        schedule["is_running"] = state_job.is_running() if state_job else False
        schedule["job_id"] = state_job.job_id if state_job else None

    return jsonify(schedules)

Example #5

0

Show file

def job_log(job_id) -> Response:
    """
    Endpoint for getting the most recent log generated by a job with job_id
    """
    project = Project.find()
    log_service = JobLoggingService(project)
    log = log_service.get_latest_log(job_id)

    finder = JobFinder(job_id)
    state_job = finder.latest(db.session)

    return jsonify({
        "job_id": job_id,
        "log": log,
        "has_error": state_job.has_error() if state_job else False,
    })

Example #6

0

Show file

    def restore_bookmark(self, session, tap: PluginInvoker):
        if self.context.job is None:
            logging.info(
                f"Running outside a Job context: incremental state could not be loaded."
            )
            return

        # the `state.json` is stored in the database
        finder = JobFinder(self.context.job.job_id)
        state_job = finder.latest_with_payload(session,
                                               flags=SingerPayload.STATE)

        if state_job and "singer_state" in state_job.payload:
            logging.info(f"Found state from {state_job.started_at}.")
            with tap.files["state"].open("w+") as state:
                json.dump(state_job.payload["singer_state"], state)
        else:
            logging.warning("No state was found, complete import.")

Example #7

0

Show file

async def _run_job(project, job, session, context_builder, force=False):
    StaleJobFailer(job.job_id).fail_stale_jobs(session)

    if not force:
        existing = JobFinder(job.job_id).latest_running(session)
        if existing:
            raise CliError(
                f"Another '{job.job_id}' pipeline is already running which started at {existing.started_at}. "
                + "To ignore this check use the '--force' option.")

    async with job.run(session):
        job_logging_service = JobLoggingService(project)
        with job_logging_service.create_log(job.job_id,
                                            job.run_id) as log_file:
            output_logger = OutputLogger(log_file)

            await _run_elt(project, context_builder, output_logger)

Example #8

0

Show file

File: schedule.py Project: code-watch/meltano

 def last_successful_run(self, session):
     return JobFinder(self.name).latest_success(session)

Example #9

0

Show file

File: tap.py Project: learningequality/meltano

    def look_up_state(self, plugin_invoker, exec_args=[]):
        if "state" not in plugin_invoker.capabilities:
            raise PluginLacksCapabilityError(
                f"Extractor '{self.name}' does not support incremental state")

        state_path = plugin_invoker.files["state"]

        try:
            # Delete state left over from different pipeline run for same extractor
            state_path.unlink()
        except FileNotFoundError:
            pass

        elt_context = plugin_invoker.context
        if not elt_context or not elt_context.job:
            # Running outside pipeline context: incremental state could not be loaded
            return

        if elt_context.full_refresh:
            logger.info(
                "Performing full refresh, ignoring state left behind by any previous runs."
            )
            return

        custom_state_filename = plugin_invoker.plugin_config_extras["_state"]
        if custom_state_filename:
            custom_state_path = plugin_invoker.project.root.joinpath(
                custom_state_filename)

            try:
                shutil.copy(custom_state_path, state_path)
                logger.info(f"Found state in {custom_state_filename}")
            except FileNotFoundError as err:
                raise PluginExecutionError(
                    f"Could not find state file {custom_state_path}") from err

            return

        # the `state.json` is stored in the database
        state = {}
        incomplete_since = None
        finder = JobFinder(elt_context.job.job_id)

        state_job = finder.latest_with_payload(elt_context.session,
                                               flags=Payload.STATE)
        if state_job:
            logger.info(f"Found state from {state_job.started_at}.")
            incomplete_since = state_job.ended_at
            if "singer_state" in state_job.payload:
                merge(state_job.payload["singer_state"], state)

        incomplete_state_jobs = finder.with_payload(
            elt_context.session,
            flags=Payload.INCOMPLETE_STATE,
            since=incomplete_since)
        for state_job in incomplete_state_jobs:
            logger.info(
                f"Found and merged incomplete state from {state_job.started_at}."
            )
            if "singer_state" in state_job.payload:
                merge(state_job.payload["singer_state"], state)

        if state:
            with state_path.open("w") as state_file:
                json.dump(state, state_file, indent=2)
        else:
            logger.warning("No state was found, complete import.")

Example #10

0

Show file

File: meltano.py Project: aroder/meltano

    "retry_delay": timedelta(minutes=5),
    "concurrency": 1,
}

engine_uri = f"sqlite:///{os.getcwd()}/.meltano/meltano.db"
engine, Session = project_engine(project, engine_uri, default=True)
session = Session()

for schedule in schedule_service.schedules():
    if schedule.interval == "@once":
        logging.info(
            f"No DAG created for schedule '{schedule.name}' because its interval is set to `@once`."
        )
        continue

    finder = JobFinder(schedule.name)
    last_successful_run = finder.latest_success(session)
    if not last_successful_run:
        logging.info(
            f"No DAG created for schedule '{schedule.name}' because it hasn't had a successful (manual) run yet."
        )
        continue

    args = default_args.copy()
    if schedule.start_date:
        args["start_date"] = coerce_datetime(schedule.start_date)

    dag_id = f"meltano_{schedule.name}"

    # from https://airflow.apache.org/docs/stable/scheduler.html#backfill-and-catchup
    #