Example #1
0
 async def test_two_clients_get_messages_on_same_workflow(
         self, communicate):
     comm1 = communicate(self.application,
                         f"/workflows/{self.workflow.id}/")
     comm2 = communicate(self.application,
                         f"/workflows/{self.workflow.id}/")
     connected1, _ = await comm1.connect()
     self.assertTrue(connected1)
     await comm1.receive_from()  # ignore initial workflow delta
     connected2, _ = await comm2.connect()
     self.assertTrue(connected2)
     await comm2.receive_from()  # ignore initial workflow delta
     async with self.global_rabbitmq_connection():
         await send_update_to_workflow_clients(self.workflow.id,
                                               clientside.Update())
     response1 = await comm1.receive_from()
     self.assertEqual(json.loads(response1), {
         "type": "apply-delta",
         "data": {}
     })
     response2 = await comm2.receive_from()
     self.assertEqual(json.loads(response2), {
         "type": "apply-delta",
         "data": {}
     })
Example #2
0
def _do_finish_upload(
    workflow: Workflow, wf_module: WfModule, uuid: uuidgen.UUID, filename: str
) -> clientside.Update:
    with workflow.cooperative_lock():
        wf_module.refresh_from_db()
        try:
            in_progress_upload = wf_module.in_progress_uploads.get(
                id=uuid, is_completed=False
            )
        except InProgressUpload.DoesNotExist:
            raise HandlerError(
                "BadRequest: key is not being uploaded for this WfModule right now. "
                "(Even a valid key becomes invalid after you create, finish or abort "
                "an upload on its WfModule.)"
            )
        try:
            in_progress_upload.convert_to_uploaded_file(filename)
        except FileNotFoundError:
            raise HandlerError(
                "BadRequest: file not found. "
                "You must upload the file before calling finish_upload."
            )
        return clientside.Update(
            steps={
                wf_module.id: clientside.StepUpdate(
                    files=wf_module.to_clientside().files
                )
            }
        )
Example #3
0
async def queue_fetches(pg_render_locker: PgRenderLocker):
    """Queue all pending fetches in RabbitMQ.

    We'll set is_busy=True as we queue them, so we don't send double-fetches.
    """
    pending_ids = await load_pending_steps()

    for workflow_id, step_id in pending_ids:
        # Don't schedule a fetch if we're currently rendering.
        #
        # This still lets us schedule a fetch if a render is _queued_, so it
        # doesn't solve any races. But it should lower the number of fetches of
        # resource-intensive workflows.
        #
        # Using pg_render_locker means we can only queue a fetch _between_
        # renders. The fetch/render queues may be non-empty (we aren't
        # checking); but we're giving the renderers a chance to tackle some
        # backlog.
        try:
            async with pg_render_locker.render_lock(workflow_id) as lock:
                # At this moment, the workflow isn't rendering. Let's pass
                # through and queue the fetch.
                await lock.stall_others()  # required by the PgRenderLocker API

            logger.info("Queue fetch of step(%d, %d)", workflow_id, step_id)
            await set_step_busy(step_id)
            await rabbitmq.send_update_to_workflow_clients(
                workflow_id,
                clientside.Update(steps={step_id: clientside.StepUpdate(is_busy=True)}),
            )
            await rabbitmq.queue_fetch(workflow_id, step_id)
        except WorkflowAlreadyLocked:
            # Don't queue a fetch. We'll revisit this Step next time we
            # query for pending fetches.
            pass
Example #4
0
def _step_delete_secret_and_build_delta(
    workflow: Workflow, step: Step, param: str
) -> Optional[clientside.Update]:
    """Write a new secret (or `None`) to `step`, or raise.

    Return a `clientside.Update`, or `None` if the database is not modified.

    Raise Workflow.DoesNotExist if the Workflow was deleted.
    """
    with workflow.cooperative_lock():  # raises Workflow.DoesNotExist
        try:
            step.refresh_from_db()
        except Step.DoesNotExist:
            return None  # no-op

        if step.secrets.get(param) is None:
            return None  # no-op

        step.secrets = dict(step.secrets)  # shallow copy
        del step.secrets[param]
        step.save(update_fields=["secrets"])

        return clientside.Update(
            steps={step.id: clientside.StepUpdate(secrets=step.secret_metadata)}
        )
Example #5
0
async def fetch(workflow: Workflow, step: Step, **kwargs):
    await _set_step_busy(step)
    await rabbitmq.queue_fetch(workflow.id, step.id)
    await rabbitmq.send_update_to_workflow_clients(
        workflow.id,
        clientside.Update(steps={step.id: clientside.StepUpdate(is_busy=True)}),
    )
Example #6
0
    def _get_workflow_as_clientside_update(self) -> WorkflowUpdateData:
        """Return (clientside.Update, delta_id).

        Raise Workflow.DoesNotExist if a race deletes the Workflow.
        """
        with self._lookup_requested_workflow_with_auth_and_cooperative_lock(
        ) as workflow:
            if self.scope["user"].is_anonymous:
                user = None
            else:
                user_id = self.scope["user"].id
                lock_user_by_id(user_id, for_write=False)
                user = query_clientside_user(user_id)

            update = clientside.Update(
                user=user,
                workflow=workflow.to_clientside(),
                tabs={
                    tab.slug: tab.to_clientside()
                    for tab in workflow.live_tabs
                },
                steps={
                    step.id: step.to_clientside()
                    for step in Step.live_in_workflow(workflow)
                },
            )
            return WorkflowUpdateData(update, workflow.last_delta_id)
Example #7
0
    def test_mark_result_unchanged(self, send_update):
        send_update.side_effect = async_noop
        workflow = Workflow.create_and_init()
        wf_module = workflow.tabs.first().wf_modules.create(
            order=0, slug="step-1", is_busy=True, fetch_error="previous error"
        )
        now = timezone.datetime(2019, 10, 22, 12, 22, tzinfo=timezone.utc)

        self.run_with_async_db(save.mark_result_unchanged(workflow.id, wf_module, now))
        self.assertEqual(wf_module.stored_objects.count(), 0)

        self.assertEqual(wf_module.fetch_error, "previous error")
        self.assertEqual(wf_module.is_busy, False)
        self.assertEqual(wf_module.last_update_check, now)
        wf_module.refresh_from_db()
        self.assertEqual(wf_module.fetch_error, "previous error")
        self.assertEqual(wf_module.is_busy, False)
        self.assertEqual(wf_module.last_update_check, now)

        send_update.assert_called_with(
            workflow.id,
            clientside.Update(
                steps={
                    wf_module.id: clientside.StepUpdate(
                        is_busy=False, last_fetched_at=now
                    )
                }
            ),
        )
Example #8
0
    def test_fetch(self, queue_fetch, send_update):
        future_none = asyncio.Future()
        future_none.set_result(None)

        queue_fetch.return_value = future_none
        send_update.return_value = future_none

        user = User.objects.create(username="******", email="*****@*****.**")
        workflow = Workflow.create_and_init(owner=user)
        step = workflow.tabs.first().steps.create(order=0, slug="step-1")

        response = self.run_handler(fetch,
                                    user=user,
                                    workflow=workflow,
                                    stepId=step.id)
        self.assertResponse(response, data=None)

        step.refresh_from_db()
        self.assertEqual(step.is_busy, True)
        queue_fetch.assert_called_with(workflow.id, step.id)
        send_update.assert_called_with(
            workflow.id,
            clientside.Update(
                steps={step.id: clientside.StepUpdate(is_busy=True)}),
        )
Example #9
0
    def test_mark_result_unchanged(self, send_update):
        send_update.side_effect = async_noop
        workflow = Workflow.create_and_init()
        step = workflow.tabs.first().steps.create(
            order=0,
            slug="step-1",
            is_busy=True,
            fetch_errors=[RenderError(I18nMessage("foo", {}, "module"))],
        )
        now = datetime.datetime(2019, 10, 22, 12, 22)

        self.run_with_async_db(
            save.mark_result_unchanged(workflow.id, step, now))
        self.assertEqual(step.stored_objects.count(), 0)

        self.assertEqual(step.fetch_errors,
                         [RenderError(I18nMessage("foo", {}, "module"))])
        self.assertEqual(step.is_busy, False)
        self.assertEqual(step.last_update_check, now)
        step.refresh_from_db()
        self.assertEqual(step.fetch_errors,
                         [RenderError(I18nMessage("foo", {}, "module"))])
        self.assertEqual(step.is_busy, False)
        self.assertEqual(step.last_update_check, now)

        send_update.assert_called_with(
            workflow.id,
            clientside.Update(steps={
                step.id:
                clientside.StepUpdate(is_busy=False, last_fetched_at=now)
            }),
        )
Example #10
0
def _get_workflow_as_clientside_update(user, session,
                                       workflow_id: int) -> WorkflowUpdateData:
    """
    Return (clientside.Update, delta_id).

    Raise Workflow.DoesNotExist if a race deletes the Workflow.

    The purpose of this method is to hide races from users who disconnect
    and reconnect while changes are being made. It's okay for things to be
    slightly off, as long as users don't notice. (Long-term, we can build
    better a more-correct synchronization strategy.)
    """
    with Workflow.authorized_lookup_and_cooperative_lock(
            "read", user, session, pk=workflow_id) as workflow_lock:
        workflow = workflow_lock.workflow
        update = clientside.Update(
            workflow=workflow.to_clientside(),
            tabs={tab.slug: tab.to_clientside()
                  for tab in workflow.live_tabs},
            steps={
                step.id: step.to_clientside()
                for step in WfModule.live_in_workflow(workflow)
            },
        )
        return WorkflowUpdateData(update, workflow.last_delta_id)
Example #11
0
async def fetch(workflow: Workflow, wf_module: WfModule, **kwargs):
    await _set_wf_module_busy(wf_module)
    await rabbitmq.queue_fetch(workflow.id, wf_module.id)
    await rabbitmq.send_update_to_workflow_clients(
        workflow.id,
        clientside.Update(
            steps={wf_module.id: clientside.StepUpdate(is_busy=True)}),
    )
Example #12
0
 async def test_message(self, communicate):
     comm = communicate(self.application, f"/workflows/{self.workflow.id}/")
     connected, _ = await comm.connect()
     self.assertTrue(connected)
     await comm.receive_from()  # ignore initial workflow delta
     await send_update_to_workflow_clients(self.workflow.id, clientside.Update())
     response = await comm.receive_from()
     self.assertEqual(json.loads(response), {"type": "apply-delta", "data": {}})
Example #13
0
    def load_clientside_update(self, delta: "Delta") -> clientside.Update:
        """Build state updates for the client to receive over Websockets.

        This is called synchronously. It may access the database. When
        overriding, be sure to call super() to update the most basic data.

        This must be called in a `workflow.cooperative_lock()`.
        """
        return clientside.Update(workflow=clientside.WorkflowUpdate(
            updated_at=delta.workflow.updated_at))
Example #14
0
async def _notify_websockets(workflow_id: int, step: Step) -> None:
    """Send delta to client, syncing all `step` fields fetcher can edit."""
    update = clientside.Update(
        steps={
            step.id: clientside.StepUpdate(
                is_busy=step.is_busy, last_fetched_at=step.last_update_check
            )
        }
    )
    await rabbitmq.send_update_to_workflow_clients(workflow_id, update)
Example #15
0
def _wf_module_set_secret_and_build_delta(
        workflow: Workflow, wf_module: WfModule, param: str,
        secret: str) -> Optional[clientside.Update]:
    """
    Write a new secret to `wf_module`, or raise.

    Return a `clientside.Update`, or `None` if the database is not modified.

    Raise Workflow.DoesNotExist if the Workflow was deleted.
    """
    with workflow.cooperative_lock():  # raises Workflow.DoesNotExist
        try:
            wf_module.refresh_from_db()
        except WfModule.DoesNotExist:
            return None  # no-op

        if wf_module.secrets.get(param, {}).get("secret") == secret:
            return None  # no-op

        try:
            module_zipfile = MODULE_REGISTRY.latest(wf_module.module_id_name)
        except KeyError:
            raise HandlerError(
                f"BadRequest: ModuleZipfile {wf_module.module_id_name} does not exist"
            )
        module_spec = module_zipfile.get_spec()
        if not any(p.type == "secret" and p.secret_logic.provider == "string"
                   for p in module_spec.param_fields):
            raise HandlerError(
                f"BadRequest: param is not a secret string parameter")

        created_at = timezone.now()
        created_at_str = (
            created_at.strftime("%Y-%m-%dT%H:%M:%S") + "." +
            created_at.strftime("%f")[0:3]  # milliseconds
            + "Z")

        wf_module.secrets = {
            **wf_module.secrets,
            param: {
                "name": created_at_str,
                "secret": secret
            },
        }
        wf_module.save(update_fields=["secrets"])

        return clientside.Update(steps={
            wf_module.id:
            clientside.StepUpdate(secrets=wf_module.secret_metadata)
        })
Example #16
0
    def test_execute_mark_unreachable(self, send_update):
        future_none = asyncio.Future()
        future_none.set_result(None)
        send_update.return_value = future_none

        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        create_module_zipfile(
            "mod",
            spec_kwargs={"loads_data": True},
            python_code=
            'def render(table, params): return "error, not warning"',
        )
        step1 = tab.steps.create(order=0, slug="step-1", module_id_name="mod")
        step2 = tab.steps.create(order=1, slug="step-2", module_id_name="mod")
        step3 = tab.steps.create(order=2, slug="step-3", module_id_name="mod")

        self._execute(workflow)

        # step1: error
        step1.refresh_from_db()
        with open_cached_render_result(step1.cached_render_result) as result:
            self.assertEqual(result.path.read_bytes(), b"")
            self.assertEqual(
                step1.cached_render_result.errors,
                [RenderError(TODO_i18n("error, not warning"))],
            )

        # step2, step3: unreachable (no errors, no table data)
        step2.refresh_from_db()
        self.assertEqual(step2.cached_render_result.status, "unreachable")
        with open_cached_render_result(step2.cached_render_result) as result:
            self.assertEqual(result.path.read_bytes(), b"")
            self.assertEqual(step2.cached_render_result.errors, [])

        step3.refresh_from_db()
        with open_cached_render_result(step3.cached_render_result) as result:
            self.assertEqual(result.path.read_bytes(), b"")
            self.assertEqual(step3.cached_render_result.errors, [])

        send_update.assert_called_with(
            workflow.id,
            clientside.Update(
                steps={
                    step3.id:
                    clientside.StepUpdate(
                        render_result=step3.cached_render_result,
                        module_slug="mod")
                }),
        )
Example #17
0
    def test_try_set_autofetch_disable_autofetch(self, update_user,
                                                 update_workflow):
        update_user.side_effect = async_noop
        update_workflow.side_effect = async_noop

        user = User.objects.create(username="******", email="*****@*****.**")
        UserProfile.objects.create(user=user)
        workflow = Workflow.create_and_init(owner=user, fetches_per_day=72.0)
        step = workflow.tabs.first().steps.create(
            order=0,
            slug="step-1",
            auto_update_data=True,
            update_interval=1200,
            next_update=datetime.datetime.now(),
        )

        response = self.run_handler(
            try_set_autofetch,
            user=user,
            workflow=workflow,
            stepSlug="step-1",
            isAutofetch=False,
            fetchInterval=300,
        )
        self.assertResponse(response, data=None)
        step.refresh_from_db()
        self.assertEqual(step.auto_update_data, False)
        self.assertEqual(step.update_interval, 300)
        self.assertIsNone(step.next_update)
        workflow.refresh_from_db()
        self.assertEqual(workflow.fetches_per_day, 0.0)

        update_workflow.assert_called_with(
            workflow.id,
            clientside.Update(
                workflow=clientside.WorkflowUpdate(fetches_per_day=0.0),
                steps={
                    step.id:
                    clientside.StepUpdate(is_auto_fetch=False,
                                          fetch_interval=300)
                },
            ),
        )
        update_user.assert_called_with(
            user.id,
            clientside.UserUpdate(usage=UserUsage(fetches_per_day=0.0)))
Example #18
0
    def test_try_set_autofetch_happy_path(self, update_user, update_workflow):
        update_user.side_effect = async_noop
        update_workflow.side_effect = async_noop

        user = User.objects.create(username="******", email="*****@*****.**")
        UserProfile.objects.create(user=user)
        workflow = Workflow.create_and_init(owner=user)
        step = workflow.tabs.first().steps.create(order=0, slug="step-1")

        response = self.run_handler(
            try_set_autofetch,
            user=user,
            workflow=workflow,
            stepSlug="step-1",
            isAutofetch=True,
            fetchInterval=19200,
        )
        self.assertResponse(response, data=None)
        step.refresh_from_db()
        self.assertEqual(step.auto_update_data, True)
        self.assertEqual(step.update_interval, 19200)
        self.assertLess(
            step.next_update,
            datetime.datetime.now() + datetime.timedelta(seconds=19202),
        )
        self.assertGreater(
            step.next_update,
            datetime.datetime.now() + datetime.timedelta(seconds=19198),
        )
        workflow.refresh_from_db()
        self.assertEqual(workflow.fetches_per_day, 4.5)

        update_user.assert_called_with(
            user.id,
            clientside.UserUpdate(usage=UserUsage(fetches_per_day=4.5)))
        update_workflow.assert_called_with(
            workflow.id,
            clientside.Update(
                workflow=clientside.WorkflowUpdate(fetches_per_day=4.5),
                steps={
                    step.id:
                    clientside.StepUpdate(is_auto_fetch=True,
                                          fetch_interval=19200)
                },
            ),
        )
Example #19
0
    def _get_workflow_as_clientside_update(self) -> WorkflowUpdateData:
        """Return (clientside.Update, delta_id).

        Raise Workflow.DoesNotExist if a race deletes the Workflow.
        """
        with self._lookup_requested_workflow_with_auth_and_cooperative_lock(
        ) as workflow_lock:
            workflow = workflow_lock.workflow
            update = clientside.Update(
                workflow=workflow.to_clientside(),
                tabs={
                    tab.slug: tab.to_clientside()
                    for tab in workflow.live_tabs
                },
                steps={
                    step.id: step.to_clientside()
                    for step in Step.live_in_workflow(workflow)
                },
            )
            return WorkflowUpdateData(update, workflow.last_delta_id)
Example #20
0
    def test_create_result(self, send_update):
        send_update.side_effect = async_noop

        workflow = Workflow.create_and_init()
        wf_module = workflow.tabs.first().wf_modules.create(
            order=0,
            slug="step-1",
            is_busy=True,
            fetch_errors=[RenderError(I18nMessage("foo", {}, "module"))],
        )
        now = timezone.datetime(2019, 10, 22, 12, 22, tzinfo=timezone.utc)

        with parquet_file({"A": [1], "B": ["x"]}) as parquet_path:
            self.run_with_async_db(
                save.create_result(
                    workflow.id, wf_module, FetchResult(parquet_path), now
                )
            )
        self.assertEqual(wf_module.stored_objects.count(), 1)

        self.assertEqual(wf_module.fetch_errors, [])
        self.assertEqual(wf_module.is_busy, False)
        self.assertEqual(wf_module.last_update_check, now)
        wf_module.refresh_from_db()
        self.assertEqual(wf_module.fetch_errors, [])
        self.assertEqual(wf_module.is_busy, False)
        self.assertEqual(wf_module.last_update_check, now)

        send_update.assert_called_with(
            workflow.id,
            clientside.Update(
                steps={
                    wf_module.id: clientside.StepUpdate(
                        is_busy=False, last_fetched_at=now
                    )
                }
            ),
        )

        workflow.refresh_from_db()
        self.assertIsInstance(workflow.last_delta, ChangeDataVersionCommand)
Example #21
0
    def test_create_result(self, send_update):
        send_update.side_effect = async_noop

        workflow = Workflow.create_and_init()
        step = workflow.tabs.first().steps.create(
            order=0,
            slug="step-1",
            is_busy=True,
            fetch_errors=[RenderError(I18nMessage("foo", {}, "module"))],
        )
        now = datetime.datetime(2019, 10, 22, 12, 22)

        with parquet_file({"A": [1], "B": ["x"]}) as parquet_path:
            self.run_with_async_db(
                save.create_result(workflow.id, step,
                                   FetchResult(parquet_path), now))
        self.assertEqual(step.stored_objects.count(), 1)

        self.assertEqual(step.fetch_errors, [])
        self.assertEqual(step.is_busy, False)
        self.assertEqual(step.last_update_check, now)
        step.refresh_from_db()
        self.assertEqual(step.fetch_errors, [])
        self.assertEqual(step.is_busy, False)
        self.assertEqual(step.last_update_check, now)

        send_update.assert_called_with(
            workflow.id,
            clientside.Update(steps={
                step.id:
                clientside.StepUpdate(is_busy=False, last_fetched_at=now)
            }),
        )

        workflow.refresh_from_db()
        self.assertEqual(workflow.deltas.last().command_name,
                         SetStepDataVersion.__name__)
Example #22
0
async def execute_wfmodule(
    chroot_context: ChrootContext,
    workflow: Workflow,
    wf_module: WfModule,
    params: Dict[str, Any],
    tab: Tab,
    input_result: RenderResult,
    tab_results: Dict[Tab, Optional[RenderResult]],
    output_path: Path,
) -> RenderResult:
    """
    Render a single WfModule; cache, broadcast and return output.

    CONCURRENCY NOTES: This function is reasonably concurrency-friendly:

    * It returns a valid cache result immediately.
    * It checks with the database that `wf_module` hasn't been deleted from
      its workflow.
    * It checks with the database that `wf_module` hasn't been deleted from
      the database entirely.
    * It checks with the database that `wf_module` hasn't been modified. (It
      is very common for a user to request a module's output -- kicking off a
      sequence of `execute_wfmodule` -- and then change a param in a prior
      module, making all those calls obsolete.
    * It locks the workflow while collecting `render()` input data.
    * When writing results to the database, it avoids writing if the module has
      changed.

    These guarantees mean:

    * TODO It's relatively cheap to render twice.
    * Users who modify a WfModule while it's rendering will be stalled -- for
      as short a duration as possible.
    * When a user changes a workflow significantly, all prior renders will end
      relatively cheaply.

    Raises `UnneededExecution` when the input WfModule should not be rendered.
    """
    # delta_id won't change throughout this function
    delta_id = wf_module.last_relevant_delta_id

    # may raise UnneededExecution
    result = await _render_wfmodule(
        chroot_context,
        workflow,
        wf_module,
        params,
        tab,
        input_result,
        tab_results,
        output_path,
    )

    # may raise UnneededExecution
    crr, output_delta = await _execute_wfmodule_save(workflow, wf_module, result)

    update = clientside.Update(
        steps={wf_module.id: clientside.StepUpdate(render_result=crr)}
    )
    await rabbitmq.send_update_to_workflow_clients(workflow.id, update)

    # Email notification if data has changed. Do this outside of the database
    # lock, because SMTP can be slow, and Django's email backend is
    # synchronous.
    if output_delta:
        loop = asyncio.get_event_loop()
        await loop.run_in_executor(
            None,
            notifications.email_output_delta,
            output_delta,
            datetime.datetime.now(),
        )

    # TODO if there's no change, is it possible for us to skip the render
    # of future modules, setting their cached_render_result_delta_id =
    # last_relevant_delta_id?  Investigate whether this is worthwhile.
    return result
Example #23
0
async def execute_step(
    *,
    chroot_context: ChrootContext,
    workflow: Workflow,
    step: Step,
    module_zipfile: Optional[ModuleZipfile],
    params: Dict[str, Any],
    tab_name: str,
    input_path: Path,
    input_table_columns: List[Column],
    tab_results: Dict[Tab, Optional[StepResult]],
    output_path: Path,
) -> StepResult:
    """Render a single Step; cache, broadcast and return output.

    CONCURRENCY NOTES: This function is reasonably concurrency-friendly:

    * It returns a valid cache result immediately.
    * It checks with the database that `step` hasn't been deleted from
      its workflow.
    * It checks with the database that `step` hasn't been deleted from
      the database entirely.
    * It checks with the database that `step` hasn't been modified. (It
      is very common for a user to request a module's output -- kicking off a
      sequence of `execute_step` -- and then change a param in a prior
      module, making all those calls obsolete.
    * It locks the workflow while collecting `render()` input data.
    * When writing results to the database, it avoids writing if the module has
      changed.

    These guarantees mean:

    * TODO It's relatively cheap to render twice.
    * Users who modify a Step while it's rendering will be stalled -- for
      as short a duration as possible.
    * When a user changes a workflow significantly, all prior renders will end
      relatively cheaply.

    Raises `UnneededExecution` when the input Step should not be rendered.
    """
    # may raise UnneededExecution
    loaded_render_result = await _render_step(
        chroot_context=chroot_context,
        workflow=workflow,
        step=step,
        module_zipfile=module_zipfile,
        raw_params=params,
        tab_name=tab_name,
        input_path=input_path,
        input_table_columns=input_table_columns,
        tab_results=tab_results,
        output_path=output_path,
    )

    # may raise UnneededExecution
    crr, output_delta = await _execute_step_save(workflow, step, loaded_render_result)

    update = clientside.Update(
        steps={
            step.id: clientside.StepUpdate(
                render_result=crr, module_slug=step.module_id_name
            )
        }
    )
    await rabbitmq.send_update_to_workflow_clients(workflow.id, update)

    # Email notification if data has changed. Do this outside of the database
    # lock, because SMTP can be slow, and Django's email backend is
    # synchronous.
    if output_delta and workflow.owner_id is not None:
        loop = asyncio.get_event_loop()
        await loop.run_in_executor(
            None,
            notifications.email_output_delta,
            output_delta,
            datetime.datetime.now(),
        )

    # TODO if there's no change, is it possible for us to skip the render
    # of future modules, setting their cached_render_result_delta_id =
    # last_relevant_delta_id?  Investigate whether this is worthwhile.
    return StepResult(
        path=loaded_render_result.path, columns=loaded_render_result.columns
    )
Example #24
0
    def test_duplicate_empty_tab(self, send_update, queue_render):
        send_update.side_effect = async_noop
        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()

        cmd = self.run_with_async_db(
            commands.do(
                DuplicateTab,
                workflow_id=workflow.id,
                from_tab=tab,
                slug="tab-2",
                name="Tab 2",
            ))

        # Adds new tab
        cmd.tab.refresh_from_db()
        self.assertFalse(cmd.tab.is_deleted)
        self.assertEqual(cmd.tab.slug, "tab-2")
        self.assertEqual(cmd.tab.name, "Tab 2")
        workflow.refresh_from_db()
        send_update.assert_called_with(
            workflow.id,
            clientside.Update(
                workflow=clientside.WorkflowUpdate(
                    updated_at=workflow.updated_at,
                    tab_slugs=["tab-1", "tab-2"]),
                tabs={
                    "tab-2":
                    clientside.TabUpdate(
                        slug="tab-2",
                        name="Tab 2",
                        step_ids=[],
                        selected_step_index=None,
                    )
                },
            ),
        )

        # Backward: should delete tab
        self.run_with_async_db(commands.undo(workflow.id))
        cmd.tab.refresh_from_db()
        self.assertTrue(cmd.tab.is_deleted)
        workflow.refresh_from_db()
        send_update.assert_called_with(
            workflow.id,
            clientside.Update(
                workflow=clientside.WorkflowUpdate(
                    updated_at=workflow.updated_at, tab_slugs=["tab-1"]),
                clear_tab_slugs=frozenset(["tab-2"]),
            ),
        )

        # Forward: should bring us back
        self.run_with_async_db(commands.redo(workflow.id))
        cmd.tab.refresh_from_db()
        self.assertFalse(cmd.tab.is_deleted)
        workflow.refresh_from_db()
        send_update.assert_called_with(
            workflow.id,
            clientside.Update(
                workflow=clientside.WorkflowUpdate(
                    updated_at=workflow.updated_at,
                    tab_slugs=["tab-1", "tab-2"]),
                tabs={
                    "tab-2":
                    clientside.TabUpdate(
                        slug="tab-2",
                        name="Tab 2",
                        step_ids=[],
                        selected_step_index=None,
                    )
                },
            ),
        )

        # There should never be a render: we aren't changing any module
        # outputs.
        queue_render.assert_not_called()
Example #25
0
async def try_set_autofetch(
    workflow: Workflow,
    stepSlug: str,
    isAutofetch: bool,
    fetchInterval: int,
    scope,
    **kwargs,
):
    """Set step's autofetch settings, or not; respond with temporary data.

    Client-side, the amalgam of races looks like:

        1. Submit form with these `try_set_autofetch()` parameters.
        2. Server sends three pieces of data in parallel:
            a. Update the client state's step
            b. Update the client state's user usage
            c. Respond "ok"
        3. Client waits for all three messages, and shows "busy" until then.
        4. Client resets the form (because the state holds correct data now).

    Unfortunately, our client/server mechanism doesn't have a way to wait for
    _both_ 2a and 2b. (We have a "mutation" mechanism, but it can only wait
    for 2a, not both 2a and 2b.) [2021-06-17] this problem occurs nowhere else
    in our codebase, so we aren't inspired to build a big solution.

    Our hack: we assume that in practice, the client will usually receive
    2a+2b+2c nearly at the same time (since RabbitMQ is fast and the Internet
    is slow). So the client (3) waits for 2c and then waits a fixed duration;
    then (4) assumes 2a and 2b have arrived and resets the form.
    """
    step_slug = str(stepSlug)
    auto_update_data = bool(isAutofetch)
    try:
        update_interval = max(settings.MIN_AUTOFETCH_INTERVAL, int(fetchInterval))
    except (ValueError, TypeError):
        return HandlerError("BadRequest: fetchInterval must be an integer")

    try:
        step, usage = await _do_try_set_autofetch(
            scope, workflow, step_slug, auto_update_data, update_interval
        )  # updates workflow, step
    except AutofetchQuotaExceeded:
        raise HandlerError("AutofetchQuotaExceeded")

    await rabbitmq.send_user_update_to_user_clients(
        workflow.owner_id, clientside.UserUpdate(usage=usage)
    )
    await rabbitmq.send_update_to_workflow_clients(
        workflow.id,
        clientside.Update(
            workflow=clientside.WorkflowUpdate(
                fetches_per_day=workflow.fetches_per_day
            ),
            steps={
                step.id: clientside.StepUpdate(
                    is_auto_fetch=step.auto_update_data,
                    fetch_interval=step.update_interval,
                )
            },
        ),
    )
    def test_execute_mark_unreachable(self, send_update):
        future_none = asyncio.Future()
        future_none.set_result(None)
        send_update.return_value = future_none

        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        delta_id = workflow.last_delta_id
        create_module_zipfile(
            "mod",
            python_code='def render(table, params): return "error, not warning"'
        )
        wf_module1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )
        wf_module2 = tab.wf_modules.create(
            order=1,
            slug="step-2",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )
        wf_module3 = tab.wf_modules.create(
            order=2,
            slug="step-3",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )

        error_result = RenderResult(
            errors=[RenderError(I18nMessage.TODO_i18n("error, not warning"))])

        self._execute(workflow)

        wf_module1.refresh_from_db()
        self.assertEqual(wf_module1.cached_render_result.status, "error")
        with open_cached_render_result(
                wf_module1.cached_render_result) as result:
            assert_render_result_equals(result, error_result)

        wf_module2.refresh_from_db()
        self.assertEqual(wf_module2.cached_render_result.status, "unreachable")
        with open_cached_render_result(
                wf_module2.cached_render_result) as result:
            assert_render_result_equals(result, RenderResult())

        wf_module3.refresh_from_db()
        self.assertEqual(wf_module3.cached_render_result.status, "unreachable")
        with open_cached_render_result(
                wf_module3.cached_render_result) as result:
            assert_render_result_equals(result, RenderResult())

        send_update.assert_called_with(
            workflow.id,
            clientside.Update(
                steps={
                    wf_module3.id:
                    clientside.StepUpdate(
                        render_result=wf_module3.cached_render_result,
                        module_slug="mod")
                }),
        )
Example #27
0
    def test_execute_mark_unreachable(self, send_update, fake_load_module):
        future_none = asyncio.Future()
        future_none.set_result(None)
        send_update.return_value = future_none

        workflow = Workflow.create_and_init()
        tab = workflow.tabs.first()
        delta_id = workflow.last_delta_id
        ModuleVersion.create_or_replace_from_spec({
            "id_name": "mod",
            "name": "Mod",
            "category": "Clean",
            "parameters": []
        })
        wf_module1 = tab.wf_modules.create(
            order=0,
            slug="step-1",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )
        wf_module2 = tab.wf_modules.create(
            order=1,
            slug="step-2",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )
        wf_module3 = tab.wf_modules.create(
            order=2,
            slug="step-3",
            last_relevant_delta_id=delta_id,
            module_id_name="mod",
        )

        fake_module = Mock(LoadedModule)
        fake_load_module.return_value = fake_module
        fake_module.migrate_params.return_value = {}
        error_result = RenderResult(
            errors=[RenderError(I18nMessage.TODO_i18n("error, not warning"))])
        fake_module.render.return_value = error_result

        self._execute(workflow)

        wf_module1.refresh_from_db()
        self.assertEqual(wf_module1.cached_render_result.status, "error")
        with open_cached_render_result(
                wf_module1.cached_render_result) as result:
            assert_render_result_equals(result, error_result)

        wf_module2.refresh_from_db()
        self.assertEqual(wf_module2.cached_render_result.status, "unreachable")
        with open_cached_render_result(
                wf_module2.cached_render_result) as result:
            assert_render_result_equals(result, RenderResult())

        wf_module3.refresh_from_db()
        self.assertEqual(wf_module3.cached_render_result.status, "unreachable")
        with open_cached_render_result(
                wf_module3.cached_render_result) as result:
            assert_render_result_equals(result, RenderResult())

        send_update.assert_called_with(
            workflow.id,
            clientside.Update(
                steps={
                    wf_module3.id:
                    clientside.StepUpdate(
                        render_result=wf_module3.cached_render_result)
                }),
        )
Example #28
0
def finish_authorize(request: HttpRequest) -> HttpResponse:
    """
    Set parameter secret to something valid.

    The external service redirects here after _we_ redirect to _it_ in
    start_authorize(). We cannot include pk in the URL (since the external
    service -- e.g., Google -- requires a fixed URL), so we store the pk in
    the session.
    """
    try:
        flow = request.session["oauth-flow"]
    except KeyError:
        return HttpResponseForbidden(
            "Missing auth session. Please try connecting again.")

    try:
        scope = Scope(**flow)
    except TypeError:
        # This would _normally_ be a crash-worthy exception. But there might
        # be sessions in progress as we deploy, [2018-12-21]. TODO nix this
        # `except` to keep our code clean.
        logger.exception("Malformed auth session. Data: %r", flow)
        return HttpResponseForbidden(
            "Malformed auth session. Please try connecting again.")

    service = oauth.OAuthService.lookup_or_none(scope.service_id)
    if not service:
        return HttpResponseNotFound("Service not configured")

    offline_token = service.acquire_refresh_token_or_str_error(
        request.GET, scope.state)
    if isinstance(offline_token, str):
        return HttpResponseForbidden(offline_token)

    username = service.extract_username_from_token(offline_token)

    try:
        with Workflow.authorized_lookup_and_cooperative_lock(
                "owner",  # only owner can modify params
                request.user,
                request.session,
                pk=scope.workflow_id,
        ) as workflow:
            # raises Step.DoesNotExist, ModuleVersion.DoesNotExist
            step, _ = _load_step_and_service(workflow, scope.step_id,
                                             scope.param)
            step.secrets = {
                **step.secrets,
                scope.param: {
                    "name": username,
                    "secret": offline_token
                },
            }
            step.save(update_fields=["secrets"])
    except Workflow.DoesNotExist as err:
        # Possibilities:
        # str(err) = 'owner access denied'
        # str(err) = 'Workflow matching query does not exist'
        return HttpResponseForbidden(str(err))
    except (ModuleVersion.DoesNotExist, Step.DoesNotExist):
        return HttpResponseNotFound("Step or parameter was deleted.")

    update = clientside.Update(
        steps={step.id: clientside.StepUpdate(secrets=step.secret_metadata)})
    async_to_sync(rabbitmq.send_update_to_workflow_clients)(workflow.id,
                                                            update)

    response = HttpResponse(
        b"""<!DOCTYPE html>
            <html lang="en-US">
                <head>
                    <title>Authorized</title>
                </head>
                <body>
                    <p class="success">
                        You have logged in. You may close this window now.
                    </p>
                </body>
            </html>
        """,
        content_type="text/html; charset=utf-8",
    )
    response["Cache-Control"] = "no-cache"
    return response