Ejemplo n.º 1
0
def test_fetch_by_tags(run_storage_factory_cm_fn):
    with run_storage_factory_cm_fn() as storage:
        assert storage
        one = str(uuid.uuid4())
        two = str(uuid.uuid4())
        three = str(uuid.uuid4())
        storage.add_run(
            build_run(
                run_id=one,
                pipeline_name='some_pipeline',
                tags={'mytag': 'hello', 'mytag2': 'world'},
            )
        )
        storage.add_run(
            build_run(
                run_id=two,
                pipeline_name='some_pipeline',
                tags={'mytag': 'goodbye', 'mytag2': 'world'},
            )
        )
        storage.add_run(build_run(run_id=three, pipeline_name='some_pipeline'))
        assert len(storage.get_runs()) == 3

        some_runs = storage.get_runs(PipelineRunsFilter(tags={'mytag': 'hello', 'mytag2': 'world'}))

        assert len(some_runs) == 1
        assert some_runs[0].run_id == one

        some_runs = storage.get_runs(PipelineRunsFilter(tags={'mytag2': 'world'}))
        assert len(some_runs) == 2
        assert any(x.run_id == one for x in some_runs)
        assert any(x.run_id == two for x in some_runs)

        some_runs = storage.get_runs(PipelineRunsFilter(tags={}))
        assert len(some_runs) == 3
Ejemplo n.º 2
0
def test_fetch_count_by_tag(clean_storage):
    storage = clean_storage
    one = str(uuid.uuid4())
    two = str(uuid.uuid4())
    three = str(uuid.uuid4())
    storage.add_run(
        build_run(run_id=one,
                  pipeline_name='some_pipeline',
                  tags={
                      'mytag': 'hello',
                      'mytag2': 'world'
                  }))
    storage.add_run(
        build_run(run_id=two,
                  pipeline_name='some_pipeline',
                  tags={
                      'mytag': 'goodbye',
                      'mytag2': 'world'
                  }))
    storage.add_run(build_run(run_id=three, pipeline_name='some_pipeline'))
    assert len(storage.get_runs()) == 3

    run_count = storage.get_runs_count(
        PipelineRunsFilter(tags={
            'mytag': 'hello',
            'mytag2': 'world'
        }))
    assert run_count == 1

    run_count = storage.get_runs_count(
        PipelineRunsFilter(tags={'mytag2': 'world'}))
    assert run_count == 2

    run_count = storage.get_runs_count(PipelineRunsFilter())
    assert run_count == 3
Ejemplo n.º 3
0
def test_fetch_count_by_tag(run_storage_factory_cm_fn):
    with run_storage_factory_cm_fn() as storage:
        assert storage
        one = str(uuid.uuid4())
        two = str(uuid.uuid4())
        three = str(uuid.uuid4())
        storage.add_run(
            build_run(
                run_id=one,
                pipeline_name='some_pipeline',
                tags={'mytag': 'hello', 'mytag2': 'world'},
            )
        )
        storage.add_run(
            build_run(
                run_id=two,
                pipeline_name='some_pipeline',
                tags={'mytag': 'goodbye', 'mytag2': 'world'},
            )
        )
        storage.add_run(build_run(run_id=three, pipeline_name='some_pipeline'))
        assert len(storage.get_runs()) == 3

        run_count = storage.get_runs_count(
            filters=PipelineRunsFilter(tags={'mytag': 'hello', 'mytag2': 'world'})
        )
        assert run_count == 1

        run_count = storage.get_runs_count(filters=PipelineRunsFilter(tags={'mytag2': 'world'}))
        assert run_count == 2

        run_count = storage.get_runs_count()
        assert run_count == 3

        assert storage.get_run_tags() == [('mytag', {'hello', 'goodbye'}), ('mytag2', {'world'})]
Ejemplo n.º 4
0
def test_fetch_by_status(clean_storage):
    storage = clean_storage
    one = str(uuid.uuid4())
    two = str(uuid.uuid4())
    three = str(uuid.uuid4())
    four = str(uuid.uuid4())
    storage.add_run(
        build_run(run_id=one,
                  pipeline_name='some_pipeline',
                  status=PipelineRunStatus.NOT_STARTED))
    storage.add_run(
        build_run(run_id=two,
                  pipeline_name='some_pipeline',
                  status=PipelineRunStatus.STARTED))
    storage.add_run(
        build_run(run_id=three,
                  pipeline_name='some_pipeline',
                  status=PipelineRunStatus.STARTED))
    storage.add_run(
        build_run(run_id=four,
                  pipeline_name='some_pipeline',
                  status=PipelineRunStatus.FAILURE))

    assert {
        run.run_id
        for run in storage.get_runs(
            PipelineRunsFilter(status=PipelineRunStatus.NOT_STARTED))
    } == {one}

    assert {
        run.run_id
        for run in storage.get_runs(
            PipelineRunsFilter(status=PipelineRunStatus.STARTED))
    } == {
        two,
        three,
    }

    assert {
        run.run_id
        for run in storage.get_runs(
            PipelineRunsFilter(status=PipelineRunStatus.FAILURE))
    } == {four}

    assert {
        run.run_id
        for run in storage.get_runs(
            PipelineRunsFilter(status=PipelineRunStatus.SUCCESS))
    } == set()
Ejemplo n.º 5
0
def test_basic_start_pipeline_execution_with_tags():
    instance = DagsterInstance.ephemeral()
    result = execute_dagster_graphql(
        define_test_context(instance=instance),
        START_PIPELINE_EXECUTION_QUERY,
        variables={
            'executionParams': {
                'selector': {'name': 'csv_hello_world'},
                'environmentConfigData': csv_hello_world_solids_config(),
                'executionMetadata': {'tags': [{'key': 'dagster/test_key', 'value': 'test_value'}]},
                'mode': 'default',
            }
        },
    )

    assert not result.errors
    assert result.data
    assert result.data['startPipelineExecution']['__typename'] == 'StartPipelineExecutionSuccess'

    run = result.data['startPipelineExecution']['run']
    run_id = run['runId']
    assert len(run['tags']) > 0
    assert any([x['key'] == 'dagster/test_key' and x['value'] == 'test_value' for x in run['tags']])

    # Check run storage
    runs_with_tag = instance.get_runs(
        filters=PipelineRunsFilter(tags={'dagster/test_key': 'test_value'})
    )
    assert len(runs_with_tag) == 1
    assert runs_with_tag[0].run_id == run_id
Ejemplo n.º 6
0
 def resolve_runs(self, graphene_info):
     return [
         graphene_info.schema.type_named('PipelineRun')(r)
         for r in graphene_info.context.instance.get_runs(
             filters=PipelineRunsFilter(pipeline_name=self._pipeline.name)
         )
     ]
Ejemplo n.º 7
0
 def resolve_runs(self, graphene_info, **kwargs):
     return [
         graphene_info.schema.type_named('PipelineRun')(r)
         for r in graphene_info.context.instance.get_runs(
             filters=PipelineRunsFilter(tags={'dagster/schedule_name': self._schedule.name}),
             limit=kwargs.get('limit'),
         )
     ]
Ejemplo n.º 8
0
def test_fetch_by_status_cursored(run_storage_factory_cm_fn):
    with run_storage_factory_cm_fn() as storage:
        assert storage
        one = str(uuid.uuid4())
        two = str(uuid.uuid4())
        three = str(uuid.uuid4())
        four = str(uuid.uuid4())
        storage.add_run(
            build_run(run_id=one, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED)
        )
        storage.add_run(
            build_run(run_id=two, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED)
        )
        storage.add_run(
            build_run(
                run_id=three, pipeline_name='some_pipeline', status=PipelineRunStatus.NOT_STARTED
            )
        )
        storage.add_run(
            build_run(run_id=four, pipeline_name='some_pipeline', status=PipelineRunStatus.STARTED)
        )

        cursor_four_runs = storage.get_runs(
            PipelineRunsFilter(status=PipelineRunStatus.STARTED), cursor=four
        )
        assert len(cursor_four_runs) == 2
        assert {run.run_id for run in cursor_four_runs} == {one, two}

        cursor_two_runs = storage.get_runs(
            PipelineRunsFilter(status=PipelineRunStatus.STARTED), cursor=two
        )
        assert len(cursor_two_runs) == 1
        assert {run.run_id for run in cursor_two_runs} == {one}

        cursor_one_runs = storage.get_runs(
            PipelineRunsFilter(status=PipelineRunStatus.STARTED), cursor=one
        )
        assert not cursor_one_runs

        cursor_four_limit_one = storage.get_runs(
            PipelineRunsFilter(status=PipelineRunStatus.STARTED), cursor=four, limit=1
        )
        assert len(cursor_four_limit_one) == 1
        assert cursor_four_limit_one[0].run_id == two
Ejemplo n.º 9
0
def test_run_priority_pipeline():
    with seven.TemporaryDirectory() as tempdir:
        instance = DagsterInstance.local_temp(tempdir)

        low_done = threading.Event()
        hi_done = threading.Event()

        # enqueue low-priority tasks
        low_thread = threading.Thread(target=execute_on_thread,
                                      args=(tempdir, 'low_pipeline', -3,
                                            low_done))
        low_thread.daemon = True
        low_thread.start()

        time.sleep(
            1)  # sleep so that we don't hit any sqlite concurrency issues

        # enqueue hi-priority tasks
        hi_thread = threading.Thread(target=execute_on_thread,
                                     args=(tempdir, 'hi_pipeline', 3, hi_done))
        hi_thread.daemon = True
        hi_thread.start()

        time.sleep(5)  # sleep to give queue time to prioritize tasks

        with start_celery_worker():
            while not low_done.is_set() or not hi_done.is_set():
                time.sleep(1)

            low_runs = instance.get_runs(filters=PipelineRunsFilter(
                pipeline_name='low_pipeline'))
            assert len(low_runs) == 1
            low_run = low_runs[0]
            lowstats = instance.get_run_stats(low_run.run_id)
            hi_runs = instance.get_runs(filters=PipelineRunsFilter(
                pipeline_name='hi_pipeline'))
            assert len(hi_runs) == 1
            hi_run = hi_runs[0]
            histats = instance.get_run_stats(hi_run.run_id)

            assert lowstats.start_time < histats.start_time
            assert lowstats.end_time > histats.end_time
Ejemplo n.º 10
0
def test_slice(clean_storage):
    storage = clean_storage
    one, two, three = sorted(
        [str(uuid.uuid4()),
         str(uuid.uuid4()),
         str(uuid.uuid4())])
    storage.add_run(
        build_run(run_id=one,
                  pipeline_name='some_pipeline',
                  tags={'mytag': 'hello'}))
    storage.add_run(
        build_run(run_id=two,
                  pipeline_name='some_pipeline',
                  tags={'mytag': 'hello'}))
    storage.add_run(
        build_run(run_id=three,
                  pipeline_name='some_pipeline',
                  tags={'mytag': 'hello'}))

    all_runs = storage.get_runs()
    assert len(all_runs) == 3
    sliced_runs = storage.get_runs(cursor=three, limit=1)
    assert len(sliced_runs) == 1
    assert sliced_runs[0].run_id == two

    all_runs = storage.get_runs(
        PipelineRunsFilter(pipeline_name='some_pipeline'))
    assert len(all_runs) == 3
    sliced_runs = storage.get_runs(
        PipelineRunsFilter(pipeline_name='some_pipeline'),
        cursor=three,
        limit=1)
    assert len(sliced_runs) == 1
    assert sliced_runs[0].run_id == two

    all_runs = storage.get_runs(PipelineRunsFilter(tags={'mytag': 'hello'}))
    assert len(all_runs) == 3
    sliced_runs = storage.get_runs(PipelineRunsFilter(tags={'mytag': 'hello'}),
                                   cursor=three,
                                   limit=1)
    assert len(sliced_runs) == 1
    assert sliced_runs[0].run_id == two
Ejemplo n.º 11
0
def test_fetch_by_pipeline(run_storage_factory_cm_fn):
    with run_storage_factory_cm_fn() as storage:
        assert storage
        one = str(uuid.uuid4())
        two = str(uuid.uuid4())
        storage.add_run(build_run(run_id=one, pipeline_name='some_pipeline'))
        storage.add_run(build_run(run_id=two, pipeline_name='some_other_pipeline'))
        assert len(storage.get_runs()) == 2
        some_runs = storage.get_runs(PipelineRunsFilter(pipeline_name='some_pipeline'))
        assert len(some_runs) == 1
        assert some_runs[0].run_id == one
Ejemplo n.º 12
0
def test_fetch_by_pipeline(clean_storage):
    storage = clean_storage
    one = str(uuid.uuid4())
    two = str(uuid.uuid4())
    storage.add_run(build_run(run_id=one, pipeline_name='some_pipeline'))
    storage.add_run(build_run(run_id=two, pipeline_name='some_other_pipeline'))
    assert len(storage.get_runs()) == 2
    some_runs = storage.get_runs(
        PipelineRunsFilter(pipeline_name='some_pipeline'))
    assert len(some_runs) == 1
    assert some_runs[0].run_id == one
Ejemplo n.º 13
0
 def to_selector(self):
     if self.status:
         status = PipelineRunStatus[self.status]
     else:
         status = None
     return PipelineRunsFilter(
         run_id=self.runId,
         pipeline=self.pipeline,
         tag_key=self.tagKey,
         tag_value=self.tagValue,
         status=status,
     )
Ejemplo n.º 14
0
def _fetch_runs_by_partition(instance, partition_set_def):
    # query runs db for this partition set
    filters = PipelineRunsFilter(
        tags={'dagster/partition_set': partition_set_def.name})
    partition_set_runs = instance.get_runs(filters)

    runs_by_partition = defaultdict(list)

    for run in partition_set_runs:
        runs_by_partition[run.tags['dagster/partition']].append(run)

    return runs_by_partition
Ejemplo n.º 15
0
def test_add_get_postgres_run_storage(clean_storage):
    run_storage = clean_storage
    run_id = str(uuid.uuid4())
    run_to_add = build_run(pipeline_name='pipeline_name', run_id=run_id)
    added = run_storage.add_run(run_to_add)
    assert added

    fetched_run = run_storage.get_run_by_id(run_id)

    assert run_to_add == fetched_run

    assert run_storage.has_run(run_id)
    assert not run_storage.has_run(str(uuid.uuid4()))

    assert run_storage.get_runs() == [run_to_add]
    assert run_storage.get_runs(
        PipelineRunsFilter(pipeline_name='pipeline_name')) == [run_to_add]
    assert run_storage.get_runs(PipelineRunsFilter(pipeline_name='nope')) == []

    run_storage.wipe()
    assert run_storage.get_runs() == []
Ejemplo n.º 16
0
    def to_selector(self):
        if self.status:
            status = PipelineRunStatus[self.status]
        else:
            status = None

        if self.tags:
            # We are wrapping self.tags in a list because dauphin.List is not marked as iterable
            tags = {tag['key']: tag['value'] for tag in list(self.tags)}
        else:
            tags = None

        return PipelineRunsFilter(
            run_id=self.run_id, pipeline_name=self.pipeline_name, tags=tags, status=status,
        )
Ejemplo n.º 17
0
def test_paginated_fetch(run_storage_factory_cm_fn):
    storage = InMemoryRunStorage()
    with run_storage_factory_cm_fn() as storage:
        assert storage
        one, two, three = [str(uuid.uuid4()), str(uuid.uuid4()), str(uuid.uuid4())]
        storage.add_run(
            build_run(run_id=one, pipeline_name='some_pipeline', tags={'mytag': 'hello'})
        )
        storage.add_run(
            build_run(run_id=two, pipeline_name='some_pipeline', tags={'mytag': 'hello'})
        )
        storage.add_run(
            build_run(run_id=three, pipeline_name='some_pipeline', tags={'mytag': 'hello'})
        )

        all_runs = storage.get_runs()
        assert len(all_runs) == 3
        sliced_runs = storage.get_runs(cursor=three, limit=1)
        assert len(sliced_runs) == 1
        assert sliced_runs[0].run_id == two

        all_runs = storage.get_runs(PipelineRunsFilter(pipeline_name='some_pipeline'))
        assert len(all_runs) == 3
        sliced_runs = storage.get_runs(
            PipelineRunsFilter(pipeline_name='some_pipeline'), cursor=three, limit=1
        )
        assert len(sliced_runs) == 1
        assert sliced_runs[0].run_id == two

        all_runs = storage.get_runs(PipelineRunsFilter(tags={'mytag': 'hello'}))
        assert len(all_runs) == 3
        sliced_runs = storage.get_runs(
            PipelineRunsFilter(tags={'mytag': 'hello'}), cursor=three, limit=1
        )
        assert len(sliced_runs) == 1
        assert sliced_runs[0].run_id == two
Ejemplo n.º 18
0
    def get_runs(self, filters=None, cursor=None, limit=None):
        filters = check.opt_inst_param(
            filters, 'filters', PipelineRunsFilter, default=PipelineRunsFilter()
        )
        check.opt_str_param(cursor, 'cursor')
        check.opt_int_param(limit, 'limit')

        # If we have a tags filter, then we need to select from a joined table
        if filters.tags:
            base_query = db.select([RunsTable.c.run_body]).select_from(
                RunsTable.outerjoin(RunTagsTable, RunsTable.c.run_id == RunTagsTable.c.run_id)
            )
        else:
            base_query = db.select([RunsTable.c.run_body]).select_from(RunsTable)

        query = self._add_filters_to_query(base_query, filters)
        query = self._add_cursor_limit_to_query(query, cursor, limit)
        rows = self.execute(query)
        return self._rows_to_runs(rows)
Ejemplo n.º 19
0
def last_empty_partition(context, partition_set_def):
    check.inst_param(context, 'context', ScheduleExecutionContext)
    partition_set_def = check.inst_param(
        partition_set_def, 'partition_set_def', PartitionSetDefinition
    )
    partitions = partition_set_def.get_partitions()
    if not partitions:
        return None
    selected = None
    for partition in reversed(partitions):
        filters = PipelineRunsFilter(
            tags={
                "dagster/partition": partition.name,
                'dagster/partition_set': partition_set_def.name,
            }
        )
        matching = context.instance.get_runs(filters)
        if not any(run.status == PipelineRunStatus.SUCCESS for run in matching):
            selected = partition
            break
    return selected
Ejemplo n.º 20
0
    def get_runs_count(self, filters=None):
        filters = check.opt_inst_param(
            filters, 'filters', PipelineRunsFilter, default=PipelineRunsFilter()
        )

        # If we have a tags filter, then we need to select from a joined table
        if filters.tags:
            subquery = db.select([1]).select_from(
                RunsTable.outerjoin(RunTagsTable, RunsTable.c.run_id == RunTagsTable.c.run_id)
            )
        else:
            subquery = db.select([1]).select_from(RunsTable)

        subquery = self._add_filters_to_query(subquery, filters)

        # We use an alias here because Postgres requires subqueries to be
        # aliased.
        subquery = subquery.alias("subquery")

        query = db.select([db.func.count()]).select_from(subquery)
        rows = self.execute(query)
        count = rows[0][0]
        return count
Ejemplo n.º 21
0
 def resolve_runs_count(self, graphene_info):
     return graphene_info.context.instance.get_runs_count(
         filter=PipelineRunsFilter(tags=[("dagster/schedule_id", self._schedule.schedule_id)])
     )
Ejemplo n.º 22
0
 def resolve_runs_count(self, graphene_info):
     return graphene_info.context.instance.get_runs_count(
         filters=PipelineRunsFilter(
             tags={"dagster/schedule_name": self._schedule.name}))
Ejemplo n.º 23
0
def test_fetch_by_filter(clean_storage):
    storage = clean_storage
    one = str(uuid.uuid4())
    two = str(uuid.uuid4())
    three = str(uuid.uuid4())

    storage.add_run(
        build_run(
            run_id=one,
            pipeline_name='some_pipeline',
            tags={
                'tag': 'hello',
                'tag2': 'world'
            },
            status=PipelineRunStatus.SUCCESS,
        ))
    storage.add_run(
        build_run(
            run_id=two,
            pipeline_name='some_pipeline',
            tags={'tag': 'hello'},
            status=PipelineRunStatus.FAILURE,
        ), )

    storage.add_run(
        build_run(run_id=three,
                  pipeline_name='other_pipeline',
                  status=PipelineRunStatus.SUCCESS))

    assert len(storage.get_runs()) == 3

    some_runs = storage.get_runs(PipelineRunsFilter(run_id=one))
    count = storage.get_runs_count(PipelineRunsFilter(run_id=one))
    assert len(some_runs) == 1
    assert count == 1
    assert some_runs[0].run_id == one

    some_runs = storage.get_runs(
        PipelineRunsFilter(pipeline_name='some_pipeline'))
    count = storage.get_runs_count(
        PipelineRunsFilter(pipeline_name='some_pipeline'))
    assert len(some_runs) == 2
    assert count == 2
    assert any(x.run_id == one for x in some_runs)
    assert any(x.run_id == two for x in some_runs)

    some_runs = storage.get_runs(
        PipelineRunsFilter(status=PipelineRunStatus.SUCCESS))
    count = storage.get_runs_count(
        PipelineRunsFilter(status=PipelineRunStatus.SUCCESS))
    assert len(some_runs) == 2
    assert count == 2
    assert any(x.run_id == one for x in some_runs)
    assert any(x.run_id == three for x in some_runs)

    some_runs = storage.get_runs(PipelineRunsFilter(tags={'tag': 'hello'}))
    count = storage.get_runs_count(PipelineRunsFilter(tags={'tag': 'hello'}))
    assert len(some_runs) == 2
    assert count == 2
    assert any(x.run_id == one for x in some_runs)
    assert any(x.run_id == two for x in some_runs)

    some_runs = storage.get_runs(
        PipelineRunsFilter(tags={
            'tag': 'hello',
            'tag2': 'world'
        }))
    count = storage.get_runs_count(
        PipelineRunsFilter(tags={
            'tag': 'hello',
            'tag2': 'world'
        }))
    assert len(some_runs) == 1
    assert count == 1
    assert some_runs[0].run_id == one

    some_runs = storage.get_runs(
        PipelineRunsFilter(pipeline_name="some_pipeline",
                           tags={'tag': 'hello'}))
    count = storage.get_runs_count(
        PipelineRunsFilter(pipeline_name="some_pipeline",
                           tags={'tag': 'hello'}))
    assert len(some_runs) == 2
    assert count == 2
    assert any(x.run_id == one for x in some_runs)
    assert any(x.run_id == two for x in some_runs)

    some_runs = storage.get_runs(
        PipelineRunsFilter(
            pipeline_name="some_pipeline",
            tags={'tag': 'hello'},
            status=PipelineRunStatus.SUCCESS,
        ))
    count = storage.get_runs_count(
        PipelineRunsFilter(
            pipeline_name="some_pipeline",
            tags={'tag': 'hello'},
            status=PipelineRunStatus.SUCCESS,
        ))
    assert len(some_runs) == 1
    assert count == 1
    assert some_runs[0].run_id == one

    # All filters
    some_runs = storage.get_runs(
        PipelineRunsFilter(
            run_id=one,
            pipeline_name="some_pipeline",
            tags={'tag': 'hello'},
            status=PipelineRunStatus.SUCCESS,
        ))
    count = storage.get_runs_count(
        PipelineRunsFilter(
            run_id=one,
            pipeline_name="some_pipeline",
            tags={'tag': 'hello'},
            status=PipelineRunStatus.SUCCESS,
        ))
    assert len(some_runs) == 1
    assert count == 1
    assert some_runs[0].run_id == one

    some_runs = storage.get_runs(PipelineRunsFilter())
    count = storage.get_runs_count(PipelineRunsFilter())
    assert len(some_runs) == 3
    assert count == 3