Esempio n. 1
0
def test_counter():
    @traced
    async def foo():
        pass

    @traced
    async def bar():
        pass

    async def call_foo(num):
        await asyncio.gather(*[foo() for _ in range(num)])

    async def call_bar(num):
        await asyncio.gather(*[bar() for _ in range(num)])

    async def run():
        await call_foo(10)
        await call_foo(10)
        await call_bar(10)

    traced_counter.set(Counter())
    loop = asyncio.get_event_loop()
    loop.run_until_complete(run())
    counter = traced_counter.get()
    assert isinstance(counter, Counter)
    counts = counter.counts()
    assert counts["foo"] == 20
    assert counts["bar"] == 10
Esempio n. 2
0
def test_asset_batching():
    with instance_for_test() as instance:
        repo = get_asset_repo()
        foo_job = repo.get_job("foo_job")
        for _ in range(3):
            foo_job.execute_in_process(instance=instance)
        with define_out_of_process_context(__file__, "asset_repo",
                                           instance) as context:
            traced_counter.set(Counter())
            result = execute_dagster_graphql(
                context,
                ASSET_RUNS_QUERY,
                variables={"assetKey": {
                    "path": ["foo"]
                }})
            assert result.data
            assert "assetOrError" in result.data
            assert "assetMaterializations" in result.data["assetOrError"]
            materializations = result.data["assetOrError"][
                "assetMaterializations"]
            assert len(materializations) == 3
            counter = traced_counter.get()
            counts = counter.counts()
            assert counts
            assert counts.get("DagsterInstance.get_run_records") == 1
Esempio n. 3
0
def test_repository_batching(graphql_context):
    instance = graphql_context.instance
    if not instance.supports_batch_tick_queries or not instance.supports_bucket_queries:
        pytest.skip("storage cannot batch fetch")

    traced_counter.set(Counter())
    selector = infer_repository_selector(graphql_context)
    result = execute_dagster_graphql(
        graphql_context,
        REPOSITORY_SENSORS_QUERY,
        variables={"repositorySelector": selector},
    )
    assert result.data
    assert "repositoryOrError" in result.data
    assert "sensors" in result.data["repositoryOrError"]
    counter = traced_counter.get()
    counts = counter.counts()
    assert counts
    assert len(counts) == 3

    # We should have a single batch call to fetch run records (to fetch sensor runs) and a single
    # batch call to fetch instigator state, instead of separate calls for each sensor (~5 distinct
    # sensors in the repo)
    # 1) `get_run_records` is fetched to instantiate GrapheneRun
    # 2) `all_instigator_state` is fetched to instantiate GrapheneSensor
    assert counts.get("DagsterInstance.get_run_records") == 1
    assert counts.get("DagsterInstance.all_instigator_state") == 1
    assert counts.get("DagsterInstance.get_batch_ticks") == 1
Esempio n. 4
0
def test_repository_batching():
    with instance_for_test() as instance:
        repo = get_repo_at_time_1()
        foo_pipeline = repo.get_pipeline("foo_pipeline")
        evolving_pipeline = repo.get_pipeline("evolving_pipeline")
        foo_run_ids = [execute_pipeline(foo_pipeline, instance=instance).run_id for i in range(3)]
        evolving_run_ids = [
            execute_pipeline(evolving_pipeline, instance=instance).run_id for i in range(2)
        ]
        with define_out_of_process_context(__file__, "get_repo_at_time_1", instance) as context:
            traced_counter.set(Counter())
            result = execute_dagster_graphql(
                context,
                REPOSITORY_RUNS_QUERY,
                variables={"repositorySelector": infer_repository_selector(context)},
            )
            assert result.data
            assert "repositoryOrError" in result.data
            assert "pipelines" in result.data["repositoryOrError"]
            pipelines = result.data["repositoryOrError"]["pipelines"]
            assert len(pipelines) == 2
            pipeline_runs = {pipeline["name"]: pipeline["runs"] for pipeline in pipelines}
            assert len(pipeline_runs["foo_pipeline"]) == 3
            assert len(pipeline_runs["evolving_pipeline"]) == 2
            assert set(foo_run_ids) == set(run["runId"] for run in pipeline_runs["foo_pipeline"])
            assert set(evolving_run_ids) == set(
                run["runId"] for run in pipeline_runs["evolving_pipeline"]
            )
            counter = traced_counter.get()
            counts = counter.counts()
            assert counts
            assert len(counts) == 1
            # We should have a single batch call to fetch run records, instead of 3 separate calls
            # to fetch run records (which is fetched to instantiate GrapheneRun)
            assert counts.get("DagsterInstance.get_run_records") == 1
Esempio n. 5
0
    async def __call__(self, scope, receive, send):
        traced_counter.set(Counter())

        def send_wrapper(message: Message):
            if message["type"] == "http.response.start":
                counter = traced_counter.get()
                if counter and isinstance(counter, Counter):
                    headers = MutableHeaders(scope=message)
                    headers.append("x-dagster-call-counts", json.dumps(counter.counts()))

            return send(message)

        await self.app(scope, receive, send_wrapper)
Esempio n. 6
0
def test_repository_batching(graphql_context):
    traced_counter.set(Counter())
    selector = infer_repository_selector(graphql_context)
    result = execute_dagster_graphql(
        graphql_context,
        REPOSITORY_SCHEDULES_QUERY,
        variables={"repositorySelector": selector},
    )
    assert result.data
    assert "repositoryOrError" in result.data
    assert "schedules" in result.data["repositoryOrError"]
    counter = traced_counter.get()
    counts = counter.counts()
    assert counts
    assert len(counts) == 2

    # We should have a single batch call to fetch run records (to fetch schedule runs) and a single
    # batch call to fetch instigator state, instead of separate calls for each schedule (~18
    # distinct schedules in the repo)
    # 1) `get_run_records` is fetched to instantiate GrapheneRun
    # 2) `all_instigator_state` is fetched to instantiate GrapheneSchedule
    assert counts.get("DagsterInstance.get_run_records") == 1
    assert counts.get("DagsterInstance.all_instigator_state") == 1
Esempio n. 7
0
File: app.py Progetto: keyz/dagster
def initialize_counts():
    traced_counter.set(Counter())