예제 #1
0
    def test_force_stop_migration(self, mock_app_control_revoke):
        sm = create_async_migration()
        force_stop_migration(sm, rollback=False)

        sm.refresh_from_db()
        mock_app_control_revoke.assert_called_once()
        self.assertEqual(sm.status, MigrationStatus.Errored)
        errors = AsyncMigrationError.objects.filter(async_migration=sm)
        self.assertEqual(errors.count(), 1)
        self.assertEqual(errors[0].description, "Force stopped by user")
예제 #2
0
 def _force_stop(self, rollback: bool):
     migration_instance = self.get_object()
     if migration_instance.status != MigrationStatus.Running:
         return response.Response(
             {
                 "success": False,
                 "error": "Can't stop a migration that isn't running.",
             },
             status=400,
         )
     force_stop_migration(migration_instance, rollback=rollback)
     return response.Response({"success": True}, status=200)
예제 #3
0
def check_async_migration_health() -> None:
    from posthog.models.async_migration import AsyncMigration, MigrationStatus

    try:
        migration_instance: AsyncMigration = AsyncMigration.objects.get(status=MigrationStatus.Running)
    except AsyncMigration.DoesNotExist:
        return

    migration_task_celery_state = AsyncResult(migration_instance.celery_task_id).state

    # we only care about "supposedly running" tasks here
    # failures and successes are handled elsewhere
    # pending means we haven't picked up the task yet
    # retry is not possible as max_retries == 0
    if migration_task_celery_state != states.STARTED:
        return

    inspector = app.control.inspect()
    active_tasks_per_node = inspector.active()

    active_task_ids = []

    if active_tasks_per_node:
        for _, tasks in active_tasks_per_node.items():
            active_task_ids += [task["id"] for task in tasks]

    # the worker crashed - this is how we find out and process the error
    if migration_instance.celery_task_id not in active_task_ids:
        if getattr(config, "ASYNC_MIGRATIONS_AUTO_CONTINUE"):
            trigger_migration(migration_instance, fresh_start=False)
        else:
            process_error(migration_instance, "Celery worker crashed while running migration.")
        return

    ok, error = run_migration_healthcheck(migration_instance)

    if not ok:
        force_stop_migration(migration_instance, f"Healthcheck failed with error: {error}")
        return

    update_migration_progress(migration_instance)