def test_force_stop_migration(self, mock_app_control_revoke): sm = create_async_migration() force_stop_migration(sm, rollback=False) sm.refresh_from_db() mock_app_control_revoke.assert_called_once() self.assertEqual(sm.status, MigrationStatus.Errored) errors = AsyncMigrationError.objects.filter(async_migration=sm) self.assertEqual(errors.count(), 1) self.assertEqual(errors[0].description, "Force stopped by user")
def _force_stop(self, rollback: bool): migration_instance = self.get_object() if migration_instance.status != MigrationStatus.Running: return response.Response( { "success": False, "error": "Can't stop a migration that isn't running.", }, status=400, ) force_stop_migration(migration_instance, rollback=rollback) return response.Response({"success": True}, status=200)
def check_async_migration_health() -> None: from posthog.models.async_migration import AsyncMigration, MigrationStatus try: migration_instance: AsyncMigration = AsyncMigration.objects.get(status=MigrationStatus.Running) except AsyncMigration.DoesNotExist: return migration_task_celery_state = AsyncResult(migration_instance.celery_task_id).state # we only care about "supposedly running" tasks here # failures and successes are handled elsewhere # pending means we haven't picked up the task yet # retry is not possible as max_retries == 0 if migration_task_celery_state != states.STARTED: return inspector = app.control.inspect() active_tasks_per_node = inspector.active() active_task_ids = [] if active_tasks_per_node: for _, tasks in active_tasks_per_node.items(): active_task_ids += [task["id"] for task in tasks] # the worker crashed - this is how we find out and process the error if migration_instance.celery_task_id not in active_task_ids: if getattr(config, "ASYNC_MIGRATIONS_AUTO_CONTINUE"): trigger_migration(migration_instance, fresh_start=False) else: process_error(migration_instance, "Celery worker crashed while running migration.") return ok, error = run_migration_healthcheck(migration_instance) if not ok: force_stop_migration(migration_instance, f"Healthcheck failed with error: {error}") return update_migration_progress(migration_instance)