Python StepUpdate Examples, cjwstate.clientside.StepUpdate Python Examples

Example #1

0

Show file

File: step.py Project: admariner/cjworkbench

def _step_delete_secret_and_build_delta(
    workflow: Workflow, step: Step, param: str
) -> Optional[clientside.Update]:
    """Write a new secret (or `None`) to `step`, or raise.

    Return a `clientside.Update`, or `None` if the database is not modified.

    Raise Workflow.DoesNotExist if the Workflow was deleted.
    """
    with workflow.cooperative_lock():  # raises Workflow.DoesNotExist
        try:
            step.refresh_from_db()
        except Step.DoesNotExist:
            return None  # no-op

        if step.secrets.get(param) is None:
            return None  # no-op

        step.secrets = dict(step.secrets)  # shallow copy
        del step.secrets[param]
        step.save(update_fields=["secrets"])

        return clientside.Update(
            steps={step.id: clientside.StepUpdate(secrets=step.secret_metadata)}
        )

Example #2

0

Show file

    def test_mark_result_unchanged(self, send_update):
        send_update.side_effect = async_noop
        workflow = Workflow.create_and_init()
        wf_module = workflow.tabs.first().wf_modules.create(
            order=0, slug="step-1", is_busy=True, fetch_error="previous error"
        )
        now = timezone.datetime(2019, 10, 22, 12, 22, tzinfo=timezone.utc)

        self.run_with_async_db(save.mark_result_unchanged(workflow.id, wf_module, now))
        self.assertEqual(wf_module.stored_objects.count(), 0)

        self.assertEqual(wf_module.fetch_error, "previous error")
        self.assertEqual(wf_module.is_busy, False)
        self.assertEqual(wf_module.last_update_check, now)
        wf_module.refresh_from_db()
        self.assertEqual(wf_module.fetch_error, "previous error")
        self.assertEqual(wf_module.is_busy, False)
        self.assertEqual(wf_module.last_update_check, now)

        send_update.assert_called_with(
            workflow.id,
            clientside.Update(
                steps={
                    wf_module.id: clientside.StepUpdate(
                        is_busy=False, last_fetched_at=now
                    )
                }
            ),
        )

Example #3

0

Show file

    def test_mark_result_unchanged(self, send_update):
        send_update.side_effect = async_noop
        workflow = Workflow.create_and_init()
        step = workflow.tabs.first().steps.create(
            order=0,
            slug="step-1",
            is_busy=True,
            fetch_errors=[RenderError(I18nMessage("foo", {}, "module"))],
        )
        now = datetime.datetime(2019, 10, 22, 12, 22)

        self.run_with_async_db(
            save.mark_result_unchanged(workflow.id, step, now))
        self.assertEqual(step.stored_objects.count(), 0)

        self.assertEqual(step.fetch_errors,
                         [RenderError(I18nMessage("foo", {}, "module"))])
        self.assertEqual(step.is_busy, False)
        self.assertEqual(step.last_update_check, now)
        step.refresh_from_db()
        self.assertEqual(step.fetch_errors,
                         [RenderError(I18nMessage("foo", {}, "module"))])
        self.assertEqual(step.is_busy, False)
        self.assertEqual(step.last_update_check, now)

        send_update.assert_called_with(
            workflow.id,
            clientside.Update(steps={
                step.id:
                clientside.StepUpdate(is_busy=False, last_fetched_at=now)
            }),
        )

Example #4

0

Show file

File: autoupdate.py Project: vishalbelsare/cjworkbench

async def queue_fetches(pg_render_locker: PgRenderLocker):
    """Queue all pending fetches in RabbitMQ.

    We'll set is_busy=True as we queue them, so we don't send double-fetches.
    """
    pending_ids = await load_pending_steps()

    for workflow_id, step_id in pending_ids:
        # Don't schedule a fetch if we're currently rendering.
        #
        # This still lets us schedule a fetch if a render is _queued_, so it
        # doesn't solve any races. But it should lower the number of fetches of
        # resource-intensive workflows.
        #
        # Using pg_render_locker means we can only queue a fetch _between_
        # renders. The fetch/render queues may be non-empty (we aren't
        # checking); but we're giving the renderers a chance to tackle some
        # backlog.
        try:
            async with pg_render_locker.render_lock(workflow_id) as lock:
                # At this moment, the workflow isn't rendering. Let's pass
                # through and queue the fetch.
                await lock.stall_others()  # required by the PgRenderLocker API

            logger.info("Queue fetch of step(%d, %d)", workflow_id, step_id)
            await set_step_busy(step_id)
            await rabbitmq.send_update_to_workflow_clients(
                workflow_id,
                clientside.Update(steps={step_id: clientside.StepUpdate(is_busy=True)}),
            )
            await rabbitmq.queue_fetch(workflow_id, step_id)
        except WorkflowAlreadyLocked:
            # Don't queue a fetch. We'll revisit this Step next time we
            # query for pending fetches.
            pass

Example #5

0

Show file

File: upload.py Project: zhiliangpersonal/cjworkbench

def _do_finish_upload(
    workflow: Workflow, wf_module: WfModule, uuid: uuidgen.UUID, filename: str
) -> clientside.Update:
    with workflow.cooperative_lock():
        wf_module.refresh_from_db()
        try:
            in_progress_upload = wf_module.in_progress_uploads.get(
                id=uuid, is_completed=False
            )
        except InProgressUpload.DoesNotExist:
            raise HandlerError(
                "BadRequest: key is not being uploaded for this WfModule right now. "
                "(Even a valid key becomes invalid after you create, finish or abort "
                "an upload on its WfModule.)"
            )
        try:
            in_progress_upload.convert_to_uploaded_file(filename)
        except FileNotFoundError:
            raise HandlerError(
                "BadRequest: file not found. "
                "You must upload the file before calling finish_upload."
            )
        return clientside.Update(
            steps={
                wf_module.id: clientside.StepUpdate(
                    files=wf_module.to_clientside().files
                )
            }
        )

Example #6

0

Show file

File: test_step.py Project: vishalbelsare/cjworkbench

    def test_fetch(self, queue_fetch, send_update):
        future_none = asyncio.Future()
        future_none.set_result(None)

        queue_fetch.return_value = future_none
        send_update.return_value = future_none

        user = User.objects.create(username="******", email="*****@*****.**")
        workflow = Workflow.create_and_init(owner=user)
        step = workflow.tabs.first().steps.create(order=0, slug="step-1")

        response = self.run_handler(fetch,
                                    user=user,
                                    workflow=workflow,
                                    stepId=step.id)
        self.assertResponse(response, data=None)

        step.refresh_from_db()
        self.assertEqual(step.is_busy, True)
        queue_fetch.assert_called_with(workflow.id, step.id)
        send_update.assert_called_with(
            workflow.id,
            clientside.Update(
                steps={step.id: clientside.StepUpdate(is_busy=True)}),
        )

Example #7

0

Show file

File: step.py Project: admariner/cjworkbench

async def fetch(workflow: Workflow, step: Step, **kwargs):
    await _set_step_busy(step)
    await rabbitmq.queue_fetch(workflow.id, step.id)
    await rabbitmq.send_update_to_workflow_clients(
        workflow.id,
        clientside.Update(steps={step.id: clientside.StepUpdate(is_busy=True)}),
    )

Example #8

0

Show file

File: wf_module.py Project: brandonrobertz/cjworkbench

async def fetch(workflow: Workflow, wf_module: WfModule, **kwargs):
    await _set_wf_module_busy(wf_module)
    await rabbitmq.queue_fetch(workflow.id, wf_module.id)
    await rabbitmq.send_update_to_workflow_clients(
        workflow.id,
        clientside.Update(
            steps={wf_module.id: clientside.StepUpdate(is_busy=True)}),
    )

Example #9

0

Show file

    def to_clientside(
        self, *, force_module_zipfile: Optional[ModuleZipfile] = None
    ) -> clientside.StepUpdate:
        # module_zipfile, for params
        if force_module_zipfile:
            module_zipfile = force_module_zipfile
        else:
            from cjwstate.models.module_registry import MODULE_REGISTRY

            try:
                module_zipfile = MODULE_REGISTRY.latest(self.module_id_name)
            except KeyError:
                module_zipfile = None

        if module_zipfile is None:
            params = {}
        else:
            from cjwstate.params import get_migrated_params

            module_spec = module_zipfile.get_spec()
            param_schema = module_spec.param_schema
            # raise ModuleError
            params = get_migrated_params(self, module_zipfile=module_zipfile)
            try:
                param_schema.validate(params)
            except ValueError:
                logger.exception(
                    "%s.migrate_params() gave invalid output: %r",
                    self.module_id_name,
                    params,
                )
                params = param_schema.default

        crr = self._build_cached_render_result_fresh_or_not()
        if crr is None:
            crr = clientside.Null

        return clientside.StepUpdate(
            id=self.id,
            slug=self.slug,
            module_slug=self.module_id_name,
            tab_slug=self.tab_slug,
            is_busy=self.is_busy,
            render_result=crr,
            files=self._get_clientside_files(module_zipfile),
            params=params,
            secrets=self.secret_metadata,
            is_collapsed=self.is_collapsed,
            notes=self.notes,
            is_auto_fetch=self.auto_update_data,
            fetch_interval=self.update_interval,
            last_fetched_at=self.last_update_check,
            is_notify_on_change=self.notifications,
            last_relevant_delta_id=self.last_relevant_delta_id,
            versions=self._get_clientside_fetched_version_list(module_zipfile),
        )

Example #10

0

Show file

async def _notify_websockets(workflow_id: int, step: Step) -> None:
    """Send delta to client, syncing all `step` fields fetcher can edit."""
    update = clientside.Update(
        steps={
            step.id: clientside.StepUpdate(
                is_busy=step.is_busy, last_fetched_at=step.last_update_check
            )
        }
    )
    await rabbitmq.send_update_to_workflow_clients(workflow_id, update)

Example #11

0

Show file

    def test_execute_mark_unreachable(self, send_update):
        future_none = asyncio.Future()
        future_none.set_result(None)
        send_update.return_value = future_none

        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        create_module_zipfile(
            "mod",
            spec_kwargs={"loads_data": True},
            python_code=
            'def render(table, params): return "error, not warning"',
        )
        step1 = tab.steps.create(order=0, slug="step-1", module_id_name="mod")
        step2 = tab.steps.create(order=1, slug="step-2", module_id_name="mod")
        step3 = tab.steps.create(order=2, slug="step-3", module_id_name="mod")

        self._execute(workflow)

        # step1: error
        step1.refresh_from_db()
        with open_cached_render_result(step1.cached_render_result) as result:
            self.assertEqual(result.path.read_bytes(), b"")
            self.assertEqual(
                step1.cached_render_result.errors,
                [RenderError(TODO_i18n("error, not warning"))],
            )

        # step2, step3: unreachable (no errors, no table data)
        step2.refresh_from_db()
        self.assertEqual(step2.cached_render_result.status, "unreachable")
        with open_cached_render_result(step2.cached_render_result) as result:
            self.assertEqual(result.path.read_bytes(), b"")
            self.assertEqual(step2.cached_render_result.errors, [])

        step3.refresh_from_db()
        with open_cached_render_result(step3.cached_render_result) as result:
            self.assertEqual(result.path.read_bytes(), b"")
            self.assertEqual(step3.cached_render_result.errors, [])

        send_update.assert_called_with(
            workflow.id,
            clientside.Update(
                steps={
                    step3.id:
                    clientside.StepUpdate(
                        render_result=step3.cached_render_result,
                        module_slug="mod")
                }),
        )

Example #12

0

Show file

File: wf_module.py Project: brandonrobertz/cjworkbench

def _wf_module_set_secret_and_build_delta(
        workflow: Workflow, wf_module: WfModule, param: str,
        secret: str) -> Optional[clientside.Update]:
    """
    Write a new secret to `wf_module`, or raise.

    Return a `clientside.Update`, or `None` if the database is not modified.

    Raise Workflow.DoesNotExist if the Workflow was deleted.
    """
    with workflow.cooperative_lock():  # raises Workflow.DoesNotExist
        try:
            wf_module.refresh_from_db()
        except WfModule.DoesNotExist:
            return None  # no-op

        if wf_module.secrets.get(param, {}).get("secret") == secret:
            return None  # no-op

        try:
            module_zipfile = MODULE_REGISTRY.latest(wf_module.module_id_name)
        except KeyError:
            raise HandlerError(
                f"BadRequest: ModuleZipfile {wf_module.module_id_name} does not exist"
            )
        module_spec = module_zipfile.get_spec()
        if not any(p.type == "secret" and p.secret_logic.provider == "string"
                   for p in module_spec.param_fields):
            raise HandlerError(
                f"BadRequest: param is not a secret string parameter")

        created_at = timezone.now()
        created_at_str = (
            created_at.strftime("%Y-%m-%dT%H:%M:%S") + "." +
            created_at.strftime("%f")[0:3]  # milliseconds
            + "Z")

        wf_module.secrets = {
            **wf_module.secrets,
            param: {
                "name": created_at_str,
                "secret": secret
            },
        }
        wf_module.save(update_fields=["secrets"])

        return clientside.Update(steps={
            wf_module.id:
            clientside.StepUpdate(secrets=wf_module.secret_metadata)
        })

Example #13

0

Show file

File: test_step.py Project: vishalbelsare/cjworkbench

    def test_try_set_autofetch_disable_autofetch(self, update_user,
                                                 update_workflow):
        update_user.side_effect = async_noop
        update_workflow.side_effect = async_noop

        user = User.objects.create(username="******", email="*****@*****.**")
        UserProfile.objects.create(user=user)
        workflow = Workflow.create_and_init(owner=user, fetches_per_day=72.0)
        step = workflow.tabs.first().steps.create(
            order=0,
            slug="step-1",
            auto_update_data=True,
            update_interval=1200,
            next_update=datetime.datetime.now(),
        )

        response = self.run_handler(
            try_set_autofetch,
            user=user,
            workflow=workflow,
            stepSlug="step-1",
            isAutofetch=False,
            fetchInterval=300,
        )
        self.assertResponse(response, data=None)
        step.refresh_from_db()
        self.assertEqual(step.auto_update_data, False)
        self.assertEqual(step.update_interval, 300)
        self.assertIsNone(step.next_update)
        workflow.refresh_from_db()
        self.assertEqual(workflow.fetches_per_day, 0.0)

        update_workflow.assert_called_with(
            workflow.id,
            clientside.Update(
                workflow=clientside.WorkflowUpdate(fetches_per_day=0.0),
                steps={
                    step.id:
                    clientside.StepUpdate(is_auto_fetch=False,
                                          fetch_interval=300)
                },
            ),
        )
        update_user.assert_called_with(
            user.id,
            clientside.UserUpdate(usage=UserUsage(fetches_per_day=0.0)))

Example #14

0

Show file

File: test_step.py Project: vishalbelsare/cjworkbench

    def test_try_set_autofetch_happy_path(self, update_user, update_workflow):
        update_user.side_effect = async_noop
        update_workflow.side_effect = async_noop

        user = User.objects.create(username="******", email="*****@*****.**")
        UserProfile.objects.create(user=user)
        workflow = Workflow.create_and_init(owner=user)
        step = workflow.tabs.first().steps.create(order=0, slug="step-1")

        response = self.run_handler(
            try_set_autofetch,
            user=user,
            workflow=workflow,
            stepSlug="step-1",
            isAutofetch=True,
            fetchInterval=19200,
        )
        self.assertResponse(response, data=None)
        step.refresh_from_db()
        self.assertEqual(step.auto_update_data, True)
        self.assertEqual(step.update_interval, 19200)
        self.assertLess(
            step.next_update,
            datetime.datetime.now() + datetime.timedelta(seconds=19202),
        )
        self.assertGreater(
            step.next_update,
            datetime.datetime.now() + datetime.timedelta(seconds=19198),
        )
        workflow.refresh_from_db()
        self.assertEqual(workflow.fetches_per_day, 4.5)

        update_user.assert_called_with(
            user.id,
            clientside.UserUpdate(usage=UserUsage(fetches_per_day=4.5)))
        update_workflow.assert_called_with(
            workflow.id,
            clientside.Update(
                workflow=clientside.WorkflowUpdate(fetches_per_day=4.5),
                steps={
                    step.id:
                    clientside.StepUpdate(is_auto_fetch=True,
                                          fetch_interval=19200)
                },
            ),
        )

Example #15

0

Show file

File: test_save.py Project: brandonrobertz/cjworkbench

    def test_create_result(self, send_update):
        send_update.side_effect = async_noop

        workflow = Workflow.create_and_init()
        wf_module = workflow.tabs.first().wf_modules.create(
            order=0,
            slug="step-1",
            is_busy=True,
            fetch_errors=[RenderError(I18nMessage("foo", {}, "module"))],
        )
        now = timezone.datetime(2019, 10, 22, 12, 22, tzinfo=timezone.utc)

        with parquet_file({"A": [1], "B": ["x"]}) as parquet_path:
            self.run_with_async_db(
                save.create_result(
                    workflow.id, wf_module, FetchResult(parquet_path), now
                )
            )
        self.assertEqual(wf_module.stored_objects.count(), 1)

        self.assertEqual(wf_module.fetch_errors, [])
        self.assertEqual(wf_module.is_busy, False)
        self.assertEqual(wf_module.last_update_check, now)
        wf_module.refresh_from_db()
        self.assertEqual(wf_module.fetch_errors, [])
        self.assertEqual(wf_module.is_busy, False)
        self.assertEqual(wf_module.last_update_check, now)

        send_update.assert_called_with(
            workflow.id,
            clientside.Update(
                steps={
                    wf_module.id: clientside.StepUpdate(
                        is_busy=False, last_fetched_at=now
                    )
                }
            ),
        )

        workflow.refresh_from_db()
        self.assertIsInstance(workflow.last_delta, ChangeDataVersionCommand)

Example #16

0

Show file

    def test_create_result(self, send_update):
        send_update.side_effect = async_noop

        workflow = Workflow.create_and_init()
        step = workflow.tabs.first().steps.create(
            order=0,
            slug="step-1",
            is_busy=True,
            fetch_errors=[RenderError(I18nMessage("foo", {}, "module"))],
        )
        now = datetime.datetime(2019, 10, 22, 12, 22)

        with parquet_file({"A": [1], "B": ["x"]}) as parquet_path:
            self.run_with_async_db(
                save.create_result(workflow.id, step,
                                   FetchResult(parquet_path), now))
        self.assertEqual(step.stored_objects.count(), 1)

        self.assertEqual(step.fetch_errors, [])
        self.assertEqual(step.is_busy, False)
        self.assertEqual(step.last_update_check, now)
        step.refresh_from_db()
        self.assertEqual(step.fetch_errors, [])
        self.assertEqual(step.is_busy, False)
        self.assertEqual(step.last_update_check, now)

        send_update.assert_called_with(
            workflow.id,
            clientside.Update(steps={
                step.id:
                clientside.StepUpdate(is_busy=False, last_fetched_at=now)
            }),
        )

        workflow.refresh_from_db()
        self.assertEqual(workflow.deltas.last().command_name,
                         SetStepDataVersion.__name__)

Example #17

0

Show file

File: WfModule.py Project: brandonrobertz/cjworkbench

    def to_clientside(self) -> clientside.StepUpdate:
        # params
        from cjwstate.models.module_registry import MODULE_REGISTRY

        try:
            module_zipfile = MODULE_REGISTRY.latest(self.module_id_name)
        except KeyError:
            module_zipfile = None

        if module_zipfile is None:
            params = {}
        else:
            from cjwstate.params import get_migrated_params

            module_spec = module_zipfile.get_spec()
            param_schema = module_spec.get_param_schema()
            # raise ModuleError
            params = get_migrated_params(self, module_zipfile=module_zipfile)
            try:
                param_schema.validate(params)
            except ValueError:
                logger.exception(
                    "%s.migrate_params() gave invalid output: %r",
                    self.module_id_name,
                    params,
                )
                params = param_schema.coerce(params)

        crr = self._build_cached_render_result_fresh_or_not()
        if crr is None:
            crr = clientside.Null

        return clientside.StepUpdate(
            id=self.id,
            slug=self.slug,
            module_slug=self.module_id_name,
            tab_slug=self.tab_slug,
            is_busy=self.is_busy,
            render_result=crr,
            files=[
                clientside.UploadedFile(
                    name=name, uuid=uuid, size=size, created_at=created_at
                )
                for name, uuid, size, created_at in self.uploaded_files.order_by(
                    "-created_at"
                ).values_list("name", "uuid", "size", "created_at")
            ],
            params=params,
            secrets=self.secret_metadata,
            is_collapsed=self.is_collapsed,
            notes=self.notes,
            is_auto_fetch=self.auto_update_data,
            fetch_interval=self.update_interval,
            last_fetched_at=self.last_update_check,
            is_notify_on_change=self.notifications,
            has_unseen_notification=self.has_unseen_notification,
            last_relevant_delta_id=self.last_relevant_delta_id,
            versions=clientside.FetchedVersionList(
                versions=[
                    clientside.FetchedVersion(created_at=created_at, is_seen=is_seen)
                    for created_at, is_seen in self.stored_objects.order_by(
                        "-stored_at"
                    ).values_list("stored_at", "read")
                ],
                selected=self.stored_data_version,
            ),
        )

Example #18

0

Show file

    def test_execute_mark_unreachable(self, send_update, fake_load_module):
        future_none = asyncio.Future()
        future_none.set_result(None)
        send_update.return_value = future_none

        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        delta_id = workflow.last_delta_id
        ModuleVersion.create_or_replace_from_spec({
            "id_name": "mod",
            "name": "Mod",
            "category": "Clean",
            "parameters": []
        })
        wf_module1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )
        wf_module2 = tab.wf_modules.create(
            order=1,
            slug="step-2",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )
        wf_module3 = tab.wf_modules.create(
            order=2,
            slug="step-3",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )

        fake_module = Mock(LoadedModule)
        fake_load_module.return_value = fake_module
        fake_module.migrate_params.return_value = {}
        error_result = RenderResult(
            errors=[RenderError(I18nMessage.TODO_i18n("error, not warning"))])
        fake_module.render.return_value = error_result

        self._execute(workflow)

        wf_module1.refresh_from_db()
        self.assertEqual(wf_module1.cached_render_result.status, "error")
        with open_cached_render_result(
                wf_module1.cached_render_result) as result:
            assert_render_result_equals(result, error_result)

        wf_module2.refresh_from_db()
        self.assertEqual(wf_module2.cached_render_result.status, "unreachable")
        with open_cached_render_result(
                wf_module2.cached_render_result) as result:
            assert_render_result_equals(result, RenderResult())

        wf_module3.refresh_from_db()
        self.assertEqual(wf_module3.cached_render_result.status, "unreachable")
        with open_cached_render_result(
                wf_module3.cached_render_result) as result:
            assert_render_result_equals(result, RenderResult())

        send_update.assert_called_with(
            workflow.id,
            clientside.Update(
                steps={
                    wf_module3.id:
                    clientside.StepUpdate(
                        render_result=wf_module3.cached_render_result)
                }),
        )

Example #19

0

Show file

async def execute_wfmodule(
    chroot_context: ChrootContext,
    workflow: Workflow,
    wf_module: WfModule,
    params: Dict[str, Any],
    tab: Tab,
    input_result: RenderResult,
    tab_results: Dict[Tab, Optional[RenderResult]],
    output_path: Path,
) -> RenderResult:
    """
    Render a single WfModule; cache, broadcast and return output.

    CONCURRENCY NOTES: This function is reasonably concurrency-friendly:

    * It returns a valid cache result immediately.
    * It checks with the database that `wf_module` hasn't been deleted from
      its workflow.
    * It checks with the database that `wf_module` hasn't been deleted from
      the database entirely.
    * It checks with the database that `wf_module` hasn't been modified. (It
      is very common for a user to request a module's output -- kicking off a
      sequence of `execute_wfmodule` -- and then change a param in a prior
      module, making all those calls obsolete.
    * It locks the workflow while collecting `render()` input data.
    * When writing results to the database, it avoids writing if the module has
      changed.

    These guarantees mean:

    * TODO It's relatively cheap to render twice.
    * Users who modify a WfModule while it's rendering will be stalled -- for
      as short a duration as possible.
    * When a user changes a workflow significantly, all prior renders will end
      relatively cheaply.

    Raises `UnneededExecution` when the input WfModule should not be rendered.
    """
    # delta_id won't change throughout this function
    delta_id = wf_module.last_relevant_delta_id

    # may raise UnneededExecution
    result = await _render_wfmodule(
        chroot_context,
        workflow,
        wf_module,
        params,
        tab,
        input_result,
        tab_results,
        output_path,
    )

    # may raise UnneededExecution
    crr, output_delta = await _execute_wfmodule_save(workflow, wf_module, result)

    update = clientside.Update(
        steps={wf_module.id: clientside.StepUpdate(render_result=crr)}
    )
    await rabbitmq.send_update_to_workflow_clients(workflow.id, update)

    # Email notification if data has changed. Do this outside of the database
    # lock, because SMTP can be slow, and Django's email backend is
    # synchronous.
    if output_delta:
        loop = asyncio.get_event_loop()
        await loop.run_in_executor(
            None,
            notifications.email_output_delta,
            output_delta,
            datetime.datetime.now(),
        )

    # TODO if there's no change, is it possible for us to skip the render
    # of future modules, setting their cached_render_result_delta_id =
    # last_relevant_delta_id?  Investigate whether this is worthwhile.
    return result

Example #20

0

Show file

File: step.py Project: vishalbelsare/cjworkbench

async def execute_step(
    *,
    chroot_context: ChrootContext,
    workflow: Workflow,
    step: Step,
    module_zipfile: Optional[ModuleZipfile],
    params: Dict[str, Any],
    tab_name: str,
    input_path: Path,
    input_table_columns: List[Column],
    tab_results: Dict[Tab, Optional[StepResult]],
    output_path: Path,
) -> StepResult:
    """Render a single Step; cache, broadcast and return output.

    CONCURRENCY NOTES: This function is reasonably concurrency-friendly:

    * It returns a valid cache result immediately.
    * It checks with the database that `step` hasn't been deleted from
      its workflow.
    * It checks with the database that `step` hasn't been deleted from
      the database entirely.
    * It checks with the database that `step` hasn't been modified. (It
      is very common for a user to request a module's output -- kicking off a
      sequence of `execute_step` -- and then change a param in a prior
      module, making all those calls obsolete.
    * It locks the workflow while collecting `render()` input data.
    * When writing results to the database, it avoids writing if the module has
      changed.

    These guarantees mean:

    * TODO It's relatively cheap to render twice.
    * Users who modify a Step while it's rendering will be stalled -- for
      as short a duration as possible.
    * When a user changes a workflow significantly, all prior renders will end
      relatively cheaply.

    Raises `UnneededExecution` when the input Step should not be rendered.
    """
    # may raise UnneededExecution
    loaded_render_result = await _render_step(
        chroot_context=chroot_context,
        workflow=workflow,
        step=step,
        module_zipfile=module_zipfile,
        raw_params=params,
        tab_name=tab_name,
        input_path=input_path,
        input_table_columns=input_table_columns,
        tab_results=tab_results,
        output_path=output_path,
    )

    # may raise UnneededExecution
    crr, output_delta = await _execute_step_save(workflow, step, loaded_render_result)

    update = clientside.Update(
        steps={
            step.id: clientside.StepUpdate(
                render_result=crr, module_slug=step.module_id_name
            )
        }
    )
    await rabbitmq.send_update_to_workflow_clients(workflow.id, update)

    # Email notification if data has changed. Do this outside of the database
    # lock, because SMTP can be slow, and Django's email backend is
    # synchronous.
    if output_delta and workflow.owner_id is not None:
        loop = asyncio.get_event_loop()
        await loop.run_in_executor(
            None,
            notifications.email_output_delta,
            output_delta,
            datetime.datetime.now(),
        )

    # TODO if there's no change, is it possible for us to skip the render
    # of future modules, setting their cached_render_result_delta_id =
    # last_relevant_delta_id?  Investigate whether this is worthwhile.
    return StepResult(
        path=loaded_render_result.path, columns=loaded_render_result.columns
    )

Example #21

0

Show file

def finish_authorize(request: HttpRequest) -> HttpResponse:
    """
    Set parameter secret to something valid.

    The external service redirects here after _we_ redirect to _it_ in
    start_authorize(). We cannot include pk in the URL (since the external
    service -- e.g., Google -- requires a fixed URL), so we store the pk in
    the session.
    """
    try:
        flow = request.session["oauth-flow"]
    except KeyError:
        return HttpResponseForbidden(
            "Missing auth session. Please try connecting again.")

    try:
        scope = Scope(**flow)
    except TypeError:
        # This would _normally_ be a crash-worthy exception. But there might
        # be sessions in progress as we deploy, [2018-12-21]. TODO nix this
        # `except` to keep our code clean.
        logger.exception("Malformed auth session. Data: %r", flow)
        return HttpResponseForbidden(
            "Malformed auth session. Please try connecting again.")

    service = oauth.OAuthService.lookup_or_none(scope.service_id)
    if not service:
        return HttpResponseNotFound("Service not configured")

    offline_token = service.acquire_refresh_token_or_str_error(
        request.GET, scope.state)
    if isinstance(offline_token, str):
        return HttpResponseForbidden(offline_token)

    username = service.extract_username_from_token(offline_token)

    try:
        with Workflow.authorized_lookup_and_cooperative_lock(
                "owner",  # only owner can modify params
                request.user,
                request.session,
                pk=scope.workflow_id,
        ) as workflow:
            # raises Step.DoesNotExist, ModuleVersion.DoesNotExist
            step, _ = _load_step_and_service(workflow, scope.step_id,
                                             scope.param)
            step.secrets = {
                **step.secrets,
                scope.param: {
                    "name": username,
                    "secret": offline_token
                },
            }
            step.save(update_fields=["secrets"])
    except Workflow.DoesNotExist as err:
        # Possibilities:
        # str(err) = 'owner access denied'
        # str(err) = 'Workflow matching query does not exist'
        return HttpResponseForbidden(str(err))
    except (ModuleVersion.DoesNotExist, Step.DoesNotExist):
        return HttpResponseNotFound("Step or parameter was deleted.")

    update = clientside.Update(
        steps={step.id: clientside.StepUpdate(secrets=step.secret_metadata)})
    async_to_sync(rabbitmq.send_update_to_workflow_clients)(workflow.id,
                                                            update)

    response = HttpResponse(
        b"""<!DOCTYPE html>
            <html lang="en-US">
                <head>
                    <title>Authorized</title>
                </head>
                <body>
                    <p class="success">
                        You have logged in. You may close this window now.
                    </p>
                </body>
            </html>
        """,
        content_type="text/html; charset=utf-8",
    )
    response["Cache-Control"] = "no-cache"
    return response

Example #22

0

Show file

File: test_workflow.py Project: brandonrobertz/cjworkbench

    def test_execute_mark_unreachable(self, send_update):
        future_none = asyncio.Future()
        future_none.set_result(None)
        send_update.return_value = future_none

        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        delta_id = workflow.last_delta_id
        create_module_zipfile(
            "mod",
            python_code='def render(table, params): return "error, not warning"'
        )
        wf_module1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )
        wf_module2 = tab.wf_modules.create(
            order=1,
            slug="step-2",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )
        wf_module3 = tab.wf_modules.create(
            order=2,
            slug="step-3",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )

        error_result = RenderResult(
            errors=[RenderError(I18nMessage.TODO_i18n("error, not warning"))])

        self._execute(workflow)

        wf_module1.refresh_from_db()
        self.assertEqual(wf_module1.cached_render_result.status, "error")
        with open_cached_render_result(
                wf_module1.cached_render_result) as result:
            assert_render_result_equals(result, error_result)

        wf_module2.refresh_from_db()
        self.assertEqual(wf_module2.cached_render_result.status, "unreachable")
        with open_cached_render_result(
                wf_module2.cached_render_result) as result:
            assert_render_result_equals(result, RenderResult())

        wf_module3.refresh_from_db()
        self.assertEqual(wf_module3.cached_render_result.status, "unreachable")
        with open_cached_render_result(
                wf_module3.cached_render_result) as result:
            assert_render_result_equals(result, RenderResult())

        send_update.assert_called_with(
            workflow.id,
            clientside.Update(
                steps={
                    wf_module3.id:
                    clientside.StepUpdate(
                        render_result=wf_module3.cached_render_result,
                        module_slug="mod")
                }),
        )

Example #23

0

Show file

File: step.py Project: admariner/cjworkbench

async def try_set_autofetch(
    workflow: Workflow,
    stepSlug: str,
    isAutofetch: bool,
    fetchInterval: int,
    scope,
    **kwargs,
):
    """Set step's autofetch settings, or not; respond with temporary data.

    Client-side, the amalgam of races looks like:

        1. Submit form with these `try_set_autofetch()` parameters.
        2. Server sends three pieces of data in parallel:
            a. Update the client state's step
            b. Update the client state's user usage
            c. Respond "ok"
        3. Client waits for all three messages, and shows "busy" until then.
        4. Client resets the form (because the state holds correct data now).

    Unfortunately, our client/server mechanism doesn't have a way to wait for
    _both_ 2a and 2b. (We have a "mutation" mechanism, but it can only wait
    for 2a, not both 2a and 2b.) [2021-06-17] this problem occurs nowhere else
    in our codebase, so we aren't inspired to build a big solution.

    Our hack: we assume that in practice, the client will usually receive
    2a+2b+2c nearly at the same time (since RabbitMQ is fast and the Internet
    is slow). So the client (3) waits for 2c and then waits a fixed duration;
    then (4) assumes 2a and 2b have arrived and resets the form.
    """
    step_slug = str(stepSlug)
    auto_update_data = bool(isAutofetch)
    try:
        update_interval = max(settings.MIN_AUTOFETCH_INTERVAL, int(fetchInterval))
    except (ValueError, TypeError):
        return HandlerError("BadRequest: fetchInterval must be an integer")

    try:
        step, usage = await _do_try_set_autofetch(
            scope, workflow, step_slug, auto_update_data, update_interval
        )  # updates workflow, step
    except AutofetchQuotaExceeded:
        raise HandlerError("AutofetchQuotaExceeded")

    await rabbitmq.send_user_update_to_user_clients(
        workflow.owner_id, clientside.UserUpdate(usage=usage)
    )
    await rabbitmq.send_update_to_workflow_clients(
        workflow.id,
        clientside.Update(
            workflow=clientside.WorkflowUpdate(
                fetches_per_day=workflow.fetches_per_day
            ),
            steps={
                step.id: clientside.StepUpdate(
                    is_auto_fetch=step.auto_update_data,
                    fetch_interval=step.update_interval,
                )
            },
        ),
    )