Exemple #1
0
def elt(project, extractor, loader, dry, transform, job_id):
    """
    meltano elt EXTRACTOR_NAME LOADER_NAME

    extractor_name: Which extractor should be used in this extraction
    loader_name: Which loader should be used in this extraction
    """

    job_logging_service = JobLoggingService(project)
    job = Job(
        job_id=job_id
        or f'job_{datetime.datetime.now().strftime("%Y%m%d-%H:%M:%S.%f")}')

    _, Session = project_engine(project)
    session = Session()
    try:
        with job.run(session), job_logging_service.create_log(
                job.job_id, job.run_id) as log_file, OutputLogger(log_file):
            try:
                success = install_missing_plugins(project, extractor, loader,
                                                  transform)

                if not success:
                    raise click.Abort()

                elt_context = (ELTContextBuilder(project).with_job(
                    job).with_extractor(extractor).with_loader(
                        loader).with_transform(transform).context(session))

                if transform != "only":
                    run_extract_load(elt_context, session, dry_run=dry)
                else:
                    click.secho("Extract & load skipped.", fg="yellow")

                if elt_context.transformer:
                    # Use a new session for the Transform Part to address the last
                    # update for Job state not being saved in the DB
                    transform_session = Session()
                    try:
                        run_transform(elt_context,
                                      transform_session,
                                      dry_run=dry)
                    finally:
                        transform_session.close()
                else:
                    click.secho("Transformation skipped.", fg="yellow")
            except Exception as err:
                logging.error(
                    f"ELT could not complete, an error happened during the process: {err}"
                )
                raise click.Abort()
    finally:
        session.close()
    # fmt: on

    tracker = GoogleAnalyticsTracker(project)
    tracker.track_meltano_elt(extractor=extractor,
                              loader=loader,
                              transform=transform)
Exemple #2
0
    def test_run_id(self, session):
        expected_uuid = uuid.uuid4()
        job = Job()

        with mock.patch("uuid.uuid4", return_value=expected_uuid):
            assert job.run_id is None
            job.save(session)

            assert isinstance(job.run_id, uuid.UUID)
Exemple #3
0
    def complete_job(self, session):
        job = Job(job_id="other")
        job.start()
        job.success()
        job.save(session)

        return job
Exemple #4
0
    def test_elt_already_running(self, cli_runner, tap, target,
                                 project_plugins_service, session):
        job_id = "already_running"
        args = ["elt", "--job_id", job_id, tap.name, target.name]

        existing_job = Job(job_id=job_id, state=State.RUNNING)
        existing_job.save(session)

        with mock.patch(
                "meltano.cli.elt.ProjectPluginsService",
                return_value=project_plugins_service,
        ), mock.patch("meltano.cli.elt.project_engine",
                      return_value=(None, lambda: session)):
            result = cli_runner.invoke(cli, args)
            assert result.exit_code == 1
            assert f"Another '{job_id}' pipeline is already running" in str(
                result.exception)
Exemple #5
0
    def other_stale_job(self, session):
        job = Job(job_id="other")
        job.start()
        job.last_heartbeat_at = datetime.utcnow() - timedelta(minutes=10)
        job.save(session)

        return job
Exemple #6
0
    def subject(self, session, elt_context):
        Job(
            job_id=TEST_JOB_ID,
            state=State.SUCCESS,
            payload_flags=SingerPayload.STATE,
            payload={
                "singer_state": {
                    "bookmarks": []
                }
            },
        ).save(session)

        return SingerRunner(elt_context)
Exemple #7
0
    def subject(self, session, mkdtemp, elt_context):
        tap_config_dir = mkdtemp()
        target_config_dir = mkdtemp()

        create_plugin_files(tap_config_dir, elt_context.extractor.install)
        create_plugin_files(target_config_dir, elt_context.loader.install)

        Job(
            job_id=TEST_JOB_ID,
            state=State.SUCCESS,
            payload_flags=SingerPayload.STATE,
            payload={
                "singer_state": {
                    "bookmarks": []
                }
            },
        ).save(session)

        return SingerRunner(
            elt_context,
            TEST_JOB_ID,
            tap_config_dir=tap_config_dir,
            target_config_dir=target_config_dir,
        )
Exemple #8
0
def elt(
    project,
    extractor,
    loader,
    transform,
    dry,
    full_refresh,
    select,
    exclude,
    catalog,
    state,
    dump,
    job_id,
    force,
):
    """
    meltano elt EXTRACTOR_NAME LOADER_NAME

    extractor_name: Which extractor should be used in this extraction
    loader_name: Which loader should be used in this extraction
    """

    select_filter = [*select, *(f"!{entity}" for entity in exclude)]

    job = Job(
        job_id=job_id or
        f'{datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%S")}--{extractor}--{loader}'
    )

    _, Session = project_engine(project)
    session = Session()
    try:
        plugins_service = ProjectPluginsService(project)

        context_builder = _elt_context_builder(
            project,
            job,
            session,
            extractor,
            loader,
            transform,
            dry_run=dry,
            full_refresh=full_refresh,
            select_filter=select_filter,
            catalog=catalog,
            state=state,
            plugins_service=plugins_service,
        )

        if dump:
            dump_file(context_builder, dump)
        else:
            run_async(
                _run_job(project, job, session, context_builder, force=force))
    finally:
        session.close()

    tracker = GoogleAnalyticsTracker(project)
    tracker.track_meltano_elt(extractor=extractor,
                              loader=loader,
                              transform=transform)
Exemple #9
0
    def elt_context(self, project, session, tap, target, elt_context_builder):
        job = Job(job_id="pytest_test_runner")

        return (elt_context_builder.with_extractor(
            tap.name).with_job(job).with_loader(target.name).context(session))
Exemple #10
0
 def sample_job(self, payload={}):
     return Job(job_id="meltano:sample-elt",
                state=State.IDLE,
                payload=payload)
Exemple #11
0
 def create_job():
     new_job = Job(job_id=job.job_id)
     new_job.start()
     yield new_job
     new_job.save(session)
Exemple #12
0
    async def test_look_up_state(
        self,
        subject,
        project,
        session,
        plugin_invoker_factory,
        elt_context_builder,
        monkeypatch,
    ):
        job = Job(job_id="pytest_test_runner")
        elt_context = (
            elt_context_builder.with_session(session)
            .with_extractor(subject.name)
            .with_job(job)
            .context()
        )

        invoker = plugin_invoker_factory(subject, context=elt_context)

        @contextmanager
        def create_job():
            new_job = Job(job_id=job.job_id)
            new_job.start()
            yield new_job
            new_job.save(session)

        def assert_state(state):
            with invoker.prepared(session):
                subject.look_up_state(invoker, [])

            if state:
                assert invoker.files["state"].exists()
                assert json.load(invoker.files["state"].open()) == state
            else:
                assert not invoker.files["state"].exists()

        # No state by default
        assert_state(None)

        # Running jobs with state are not considered
        with create_job() as job:
            job.payload["singer_state"] = {"success": True}
            job.payload_flags = Payload.STATE

        assert_state(None)

        # Successful jobs without state are not considered
        with create_job() as job:
            job.success()

        assert_state(None)

        # Successful jobs with incomplete state are considered
        with create_job() as job:
            job.payload["singer_state"] = {"incomplete_success": True}
            job.payload_flags = Payload.INCOMPLETE_STATE
            job.success()

        assert_state({"incomplete_success": True})

        # Successful jobs with state are considered
        with create_job() as job:
            job.payload["singer_state"] = {"success": True}
            job.payload_flags = Payload.STATE
            job.success()

        assert_state({"success": True})

        # Running jobs with state are not considered
        with create_job() as job:
            job.payload["singer_state"] = {"success": True}
            job.payload_flags = Payload.STATE

        assert_state({"success": True})

        # Failed jobs without state are not considered
        with create_job() as job:
            job.fail("Whoops")

        assert_state({"success": True})

        # Failed jobs with state are considered
        with create_job() as job:
            job.payload["singer_state"] = {"failed": True}
            job.payload_flags = Payload.STATE
            job.fail("Whoops")

        assert_state({"failed": True})

        # Successful jobs with incomplete state are considered
        with create_job() as job:
            job.payload["singer_state"] = {"success": True}
            job.payload_flags = Payload.INCOMPLETE_STATE
            job.success()

        # Incomplete state is merged into complete state
        assert_state({"failed": True, "success": True})

        # Failed jobs with incomplete state are considered
        with create_job() as job:
            job.payload["singer_state"] = {"failed_again": True}
            job.payload_flags = Payload.INCOMPLETE_STATE
            job.fail("Whoops")

        # Incomplete state is merged into complete state
        assert_state({"failed": True, "success": True, "failed_again": True})

        # Custom state takes precedence
        custom_state_filename = "custom_state.json"
        custom_state_path = project.root.joinpath(custom_state_filename)
        custom_state_path.write_text('{"custom": true}')

        monkeypatch.setitem(
            invoker.settings_service.config_override, "_state", custom_state_filename
        )

        assert_state({"custom": True})

        # With a full refresh, no state is considered
        elt_context.full_refresh = True
        assert_state(None)
Exemple #13
0
    def live_job(self, session):
        job = Job(job_id="test")
        job.start()
        job.save(session)

        return job