Ejemplo n.º 1
0
    def test_single_write_read_with_snapshot(self, storage):
        if not isinstance(storage, InMemoryRunStorage):
            pytest.skip()

        run_with_snapshot_id = 'lkasjdflkjasdf'
        pipeline_def = PipelineDefinition(name='some_pipeline', solid_defs=[])

        pipeline_snapshot = pipeline_def.get_pipeline_snapshot()

        pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot)

        run_with_snapshot = PipelineRun.create_empty_run(
            run_id=run_with_snapshot_id,
            pipeline_name=pipeline_def.name,
            pipeline_snapshot_id=pipeline_snapshot_id,
        )

        assert not storage.has_pipeline_snapshot(pipeline_snapshot_id)

        assert storage.add_pipeline_snapshot(pipeline_snapshot) == pipeline_snapshot_id

        assert storage.get_pipeline_snapshot(pipeline_snapshot_id) == pipeline_snapshot

        storage.add_run(run_with_snapshot)

        assert storage.get_run_by_id(run_with_snapshot_id) == run_with_snapshot

        storage.wipe()

        assert not storage.has_pipeline_snapshot(pipeline_snapshot_id)
        assert not storage.has_run(run_with_snapshot_id)
Ejemplo n.º 2
0
    def create_run_with_snapshot(self, create_run_args):
        check.inst_param(create_run_args, 'create_run_args',
                         InstanceCreateRunArgs)

        from dagster.core.snap.pipeline_snapshot import create_pipeline_snapshot_id

        snapshot_id = create_pipeline_snapshot_id(
            create_run_args.pipeline_snapshot)

        if not self._run_storage.has_pipeline_snapshot(snapshot_id):
            returned_snapshot_id = self._run_storage.add_pipeline_snapshot(
                create_run_args.pipeline_snapshot)

            check.invariant(snapshot_id == returned_snapshot_id)

        return self.create_run(
            PipelineRun(
                pipeline_name=create_run_args.pipeline_snapshot.name,
                pipeline_snapshot_id=snapshot_id,
                run_id=create_run_args.run_id,
                environment_dict=create_run_args.environment_dict,
                mode=create_run_args.mode,
                selector=create_run_args.selector,
                step_keys_to_execute=create_run_args.step_keys_to_execute,
                status=create_run_args.status,
                tags=create_run_args.tags,
                parent_run_id=create_run_args.parent_run_id,
                root_run_id=create_run_args.root_run_id,
            ))
Ejemplo n.º 3
0
 def add_pipeline_snapshot(self, pipeline_snapshot):
     check.inst_param(pipeline_snapshot, 'pipeline_snapshot',
                      PipelineSnapshot)
     return self._add_snapshot(
         snapshot_id=create_pipeline_snapshot_id(pipeline_snapshot),
         snapshot_obj=pipeline_snapshot,
         snapshot_type=SnapshotType.PIPELINE,
     )
Ejemplo n.º 4
0
def test_empty_pipeline_snap_props(snapshot):

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(get_noop_pipeline())

    assert pipeline_snapshot.name == 'noop_pipeline'
    assert pipeline_snapshot.description is None
    assert pipeline_snapshot.tags == {}

    assert pipeline_snapshot == serialize_rt(pipeline_snapshot)

    snapshot.assert_match(serialize_pp(pipeline_snapshot))
    snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
Ejemplo n.º 5
0
 def add_pipeline_snapshot(self, pipeline_snapshot):
     check.inst_param(pipeline_snapshot, 'pipeline_snapshot',
                      PipelineSnapshot)
     with self.connect() as conn:
         snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot)
         snapshot_insert = SnapshotsTable.insert().values(  # pylint: disable=no-value-for-parameter
             snapshot_id=snapshot_id,
             snapshot_body=zlib.compress(
                 serialize_dagster_namedtuple(pipeline_snapshot).encode()),
             snapshot_type='PIPELINE',
         )
         conn.execute(snapshot_insert)
         return snapshot_id
Ejemplo n.º 6
0
def test_create_noop_execution_plan_with_tags(snapshot):
    @solid(tags={'foo': 'bar', 'bar': 'baaz'})
    def noop_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        noop_solid()

    execution_plan = create_execution_plan(noop_pipeline)

    snapshot.assert_match(
        serialize_pp(
            snapshot_from_execution_plan(
                execution_plan,
                create_pipeline_snapshot_id(
                    noop_pipeline.get_pipeline_snapshot()))))
Ejemplo n.º 7
0
def test_create_noop_execution_plan(snapshot):
    @solid
    def noop_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        noop_solid()

    execution_plan = create_execution_plan(noop_pipeline)

    snapshot.assert_match(
        serialize_pp(
            snapshot_from_execution_plan(
                execution_plan,
                create_pipeline_snapshot_id(
                    noop_pipeline.get_pipeline_snapshot()))))
Ejemplo n.º 8
0
def test_create_pipeline_snapshot():
    @solid
    def noop_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        noop_solid()

    instance = DagsterInstance.local_temp()

    result = execute_pipeline(noop_pipeline, instance=instance)
    assert result.success

    run = instance.get_run_by_id(result.run_id)

    assert run.pipeline_snapshot_id == create_pipeline_snapshot_id(
        noop_pipeline.get_pipeline_snapshot())
Ejemplo n.º 9
0
def test_pipeline_snap_all_props(snapshot):
    @solid
    def noop_solid(_):
        pass

    @pipeline(description='desc', tags={'key': 'value'})
    def noop_pipeline():
        noop_solid()

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline)

    assert pipeline_snapshot.name == 'noop_pipeline'
    assert pipeline_snapshot.description == 'desc'
    assert pipeline_snapshot.tags == {'key': 'value'}

    assert pipeline_snapshot == serialize_rt(pipeline_snapshot)

    snapshot.assert_match(serialize_pp(pipeline_snapshot))
    snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
Ejemplo n.º 10
0
def test_basic_dep_fan_out(snapshot):
    @solid
    def return_one(_):
        return 1

    @solid(input_defs=[InputDefinition('value', int)])
    def passthrough(_, value):
        return value

    @pipeline
    def single_dep_pipeline():
        return_one_result = return_one()
        passthrough.alias('passone')(return_one_result)
        passthrough.alias('passtwo')(return_one_result)

    dep_structure_snapshot = build_dep_structure_snapshot_from_icontains_solids(
        single_dep_pipeline)
    index = DependencyStructureIndex(dep_structure_snapshot)

    assert index.get_invocation('return_one')
    assert index.get_invocation('passone')
    assert index.get_invocation('passtwo')

    assert index.get_upstream_output('passone', 'value') == OutputHandleSnap(
        'return_one', 'result')
    assert index.get_upstream_output('passtwo', 'value') == OutputHandleSnap(
        'return_one', 'result')

    assert set(index.get_downstream_inputs('return_one', 'result')) == set([
        InputHandle('passthrough', 'passone', 'value'),
        InputHandle('passthrough', 'passtwo', 'value'),
    ])

    assert (deserialize_json_to_dagster_namedtuple(
        serialize_dagster_namedtuple(dep_structure_snapshot)) ==
            dep_structure_snapshot)

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(single_dep_pipeline)
    assert pipeline_snapshot == serialize_rt(pipeline_snapshot)

    snapshot.assert_match(serialize_pp(pipeline_snapshot))
    snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
Ejemplo n.º 11
0
def test_two_invocations_deps_snap(snapshot):
    @solid
    def noop_solid(_):
        pass

    @pipeline
    def two_solid_pipeline():
        noop_solid.alias('one')()
        noop_solid.alias('two')()

    index = DependencyStructureIndex(
        build_dep_structure_snapshot_from_icontains_solids(two_solid_pipeline))
    assert index.get_invocation('one')
    assert index.get_invocation('two')

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(two_solid_pipeline)
    assert pipeline_snapshot == serialize_rt(pipeline_snapshot)

    snapshot.assert_match(serialize_pp(pipeline_snapshot))
    snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
Ejemplo n.º 12
0
def test_create_execution_plan_with_dep(snapshot):
    @solid
    def solid_one(_):
        return 1

    @solid
    def solid_two(_, num):
        return num + 1

    @pipeline
    def noop_pipeline():
        solid_two(solid_one())

    execution_plan = create_execution_plan(noop_pipeline)

    snapshot.assert_match(
        serialize_pp(
            snapshot_from_execution_plan(
                execution_plan,
                create_pipeline_snapshot_id(
                    noop_pipeline.get_pipeline_snapshot()))))
Ejemplo n.º 13
0
def test_create_with_composite(snapshot):
    @solid(output_defs=[OutputDefinition(name='out_num', dagster_type=int)])
    def return_one(_):
        return 1

    @solid(
        input_defs=[InputDefinition(name='num', dagster_type=int)],
        output_defs=[OutputDefinition(int)],
    )
    def add_one(_, num):
        return num + 1

    @composite_solid(
        output_defs=[OutputDefinition(name='named_output', dagster_type=int)])
    def comp_1():
        return add_one(return_one())

    @composite_solid(
        output_defs=[OutputDefinition(name='named_output', dagster_type=int)])
    def comp_2():
        return add_one(return_one())

    @solid
    def add(_, num_one, num_two):
        return num_one + num_two

    @pipeline
    def do_comps():
        add(num_one=comp_1(), num_two=comp_2())

    execution_plan = create_execution_plan(do_comps)

    snapshot.assert_match(
        serialize_pp(
            snapshot_from_execution_plan(
                execution_plan,
                create_pipeline_snapshot_id(
                    do_comps.get_pipeline_snapshot()))))
Ejemplo n.º 14
0
def test_basic_fan_in(snapshot):
    @solid(output_defs=[OutputDefinition(Nothing)])
    def return_nothing(_):
        return None

    @solid(input_defs=[InputDefinition('nothing', Nothing)])
    def take_nothings(_):
        return None

    @pipeline
    def fan_in_test():
        take_nothings([
            return_nothing.alias('nothing_one')(),
            return_nothing.alias('nothing_two')()
        ])

    dep_structure_snapshot = build_dep_structure_snapshot_from_icontains_solids(
        fan_in_test)
    index = DependencyStructureIndex(dep_structure_snapshot)

    assert index.get_invocation('nothing_one')
    assert index.get_invocation('take_nothings')

    assert index.get_upstream_outputs('take_nothings', 'nothing') == [
        OutputHandleSnap('nothing_one', 'result'),
        OutputHandleSnap('nothing_two', 'result'),
    ]

    assert (deserialize_json_to_dagster_namedtuple(
        serialize_dagster_namedtuple(dep_structure_snapshot)) ==
            dep_structure_snapshot)

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(fan_in_test)
    assert pipeline_snapshot == serialize_rt(pipeline_snapshot)

    snapshot.assert_match(serialize_pp(pipeline_snapshot))
    snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
Ejemplo n.º 15
0
 def add_pipeline_snapshot(self, pipeline_snapshot):
     check.inst_param(pipeline_snapshot, 'pipeline_snapshot',
                      PipelineSnapshot)
     pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot)
     self._pipeline_snapshots[pipeline_snapshot_id] = pipeline_snapshot
     return pipeline_snapshot_id
Ejemplo n.º 16
0
    def get_or_create_run(
        self,
        pipeline_name=None,
        run_id=None,
        environment_dict=None,
        mode=None,
        selector=None,
        step_keys_to_execute=None,
        status=None,
        tags=None,
        root_run_id=None,
        parent_run_id=None,
        pipeline_snapshot=None,
        execution_plan_snapshot=None,
    ):

        if tags and IS_AIRFLOW_INGEST_PIPELINE_STR in tags:
            if AIRFLOW_EXECUTION_DATE_STR not in tags:
                tags[AIRFLOW_EXECUTION_DATE_STR] = get_current_datetime_in_utc(
                ).isoformat()

        pipeline_run = PipelineRun(
            pipeline_name=pipeline_name,
            run_id=run_id,
            environment_dict=environment_dict,
            mode=mode,
            selector=selector,
            step_keys_to_execute=step_keys_to_execute,
            status=status,
            tags=tags,
            root_run_id=root_run_id,
            parent_run_id=parent_run_id,
        )

        if pipeline_snapshot is not None:
            from dagster.core.snap.pipeline_snapshot import create_pipeline_snapshot_id

            pipeline_snapshot_id = create_pipeline_snapshot_id(
                pipeline_snapshot)

            if not self._run_storage.has_pipeline_snapshot(
                    pipeline_snapshot_id):
                returned_pipeline_snapshot_id = self._run_storage.add_pipeline_snapshot(
                    pipeline_snapshot)

                check.invariant(
                    pipeline_snapshot_id == returned_pipeline_snapshot_id)

            pipeline_run = pipeline_run.with_pipeline_snapshot_id(
                pipeline_snapshot_id)

        if execution_plan_snapshot is not None:
            from dagster.core.snap.execution_plan_snapshot import create_execution_plan_snapshot_id

            check.invariant(execution_plan_snapshot.pipeline_snapshot_id ==
                            pipeline_snapshot_id)

            execution_plan_snapshot_id = create_execution_plan_snapshot_id(
                execution_plan_snapshot)

            if not self._run_storage.has_execution_plan_snapshot(
                    execution_plan_snapshot_id):
                returned_execution_plan_snapshot_id = self._run_storage.add_execution_plan_snapshot(
                    execution_plan_snapshot)

                check.invariant(execution_plan_snapshot_id ==
                                returned_execution_plan_snapshot_id)

            pipeline_run = pipeline_run.with_execution_plan_snapshot_id(
                execution_plan_snapshot_id)

        if self.has_run(pipeline_run.run_id):
            candidate_run = self.get_run_by_id(pipeline_run.run_id)

            field_diff = _check_run_equality(pipeline_run, candidate_run)

            if field_diff:
                raise DagsterRunConflict(
                    'Found conflicting existing run with same id {run_id}. Runs differ in:'
                    '\n{field_diff}'.format(
                        run_id=pipeline_run.run_id,
                        field_diff=_format_field_diff(field_diff),
                    ), )
            return candidate_run

        return self._run_storage.add_run(pipeline_run)