コード例 #1
0
    def add_schedule(self, repository, schedule):
        check.inst_param(repository, 'repository', RepositoryDefinition)
        check.inst_param(schedule, 'schedule', Schedule)

        with self.connect() as conn:
            try:
                schedule_insert = ScheduleTable.insert().values(  # pylint: disable=no-value-for-parameter
                    repository_name=repository.name,
                    schedule_name=schedule.name,
                    status=schedule.status.value,
                    schedule_body=serialize_dagster_namedtuple(schedule),
                )
                conn.execute(schedule_insert)
            except db.exc.IntegrityError as exc:
                six.raise_from(
                    DagsterInvariantViolationError(
                        'Schedule {schedule_name} for repository {repository_name} is already present '
                        'in storage'.format(
                            schedule_name=schedule.name, repository_name=repository.name
                        )
                    ),
                    exc,
                )

        return schedule
コード例 #2
0
def test_kitchen_sink():
    kitchen_sink = List[Dict({
        'opt_list_of_int':
        Field(List[int], is_optional=True),
        'tuple_of_things':
        Field(Tuple[int, str]),
        'nested_dict':
        Field(
            Dict({
                'list_list':
                Field(List[List[int]]),
                'nested_selector':
                Field(
                    Selector({
                        'some_field': Field(int),
                        'set': Field(Optional[Set[bool]])
                    })),
            })),
    })]

    kitchen_sink_meta = meta_from_dagster_type(kitchen_sink)

    rehydrated_meta = deserialize_json_to_dagster_namedtuple(
        serialize_dagster_namedtuple(kitchen_sink_meta))
    assert kitchen_sink_meta == rehydrated_meta
コード例 #3
0
    def add_run(self, pipeline_run):
        check.inst_param(pipeline_run, 'pipeline_run', PipelineRun)

        with self.connect() as conn:
            try:
                runs_insert = RunsTable.insert().values(  # pylint: disable=no-value-for-parameter
                    run_id=pipeline_run.run_id,
                    pipeline_name=pipeline_run.pipeline_name,
                    status=pipeline_run.status.value,
                    run_body=serialize_dagster_namedtuple(pipeline_run),
                )
                conn.execute(runs_insert)
            except db.exc.IntegrityError as exc:
                six.raise_from(DagsterRunAlreadyExists, exc)

            if pipeline_run.tags and len(pipeline_run.tags) > 0:
                conn.execute(
                    RunTagsTable.insert(),  # pylint: disable=no-value-for-parameter
                    [
                        dict(run_id=pipeline_run.run_id, key=k, value=v)
                        for k, v in pipeline_run.tags.items()
                    ],
                )

        return pipeline_run
コード例 #4
0
    def store_event(self, event):
        '''Store an event corresponding to a pipeline run.
        Args:
            event (EventRecord): The event to store.
        '''
        check.inst_param(event, 'event', EventRecord)

        dagster_event_type = None
        if event.is_dagster_event:
            dagster_event_type = event.dagster_event.event_type_value

        run_id = event.run_id

        with self.connect() as conn:
            # https://stackoverflow.com/a/54386260/324449
            event_insert = SqlEventLogStorageTable.insert().values(  # pylint: disable=no-value-for-parameter
                run_id=run_id,
                event=serialize_dagster_namedtuple(event),
                dagster_event_type=dagster_event_type,
                timestamp=datetime.datetime.fromtimestamp(event.timestamp),
            )
            result_proxy = conn.execute(
                event_insert.returning(SqlEventLogStorageTable.c.run_id,
                                       SqlEventLogStorageTable.c.id))
            res = result_proxy.fetchone()
            result_proxy.close()
            conn.execute(
                '''NOTIFY {channel}, %s; '''.format(channel=CHANNEL_NAME),
                (res[0] + '_' + str(res[1]), ),
            )
コード例 #5
0
    def handle_run_event(self, run_id, event):
        check.str_param(run_id, 'run_id')
        check.inst_param(event, 'event', DagsterEvent)

        lookup = {
            DagsterEventType.PIPELINE_START: PipelineRunStatus.STARTED,
            DagsterEventType.PIPELINE_SUCCESS: PipelineRunStatus.SUCCESS,
            DagsterEventType.PIPELINE_FAILURE: PipelineRunStatus.FAILURE,
        }

        if event.event_type not in lookup:
            return

        run = self.get_run_by_id(run_id)
        if not run:
            # TODO log?
            return

        SQL_UPDATE = '''
        UPDATE runs
        SET status = %s, run_body = %s
        WHERE run_id = %s
        '''
        conn = get_conn(self.conn_string)
        new_pipeline_status = lookup[event.event_type]
        with conn.cursor() as curs:
            curs.execute(
                SQL_UPDATE,
                (
                    str(new_pipeline_status),
                    serialize_dagster_namedtuple(
                        run.run_with_status(new_pipeline_status)),
                    run_id,
                ),
            )
コード例 #6
0
    def handle_run_event(self, run_id, event):
        check.str_param(run_id, 'run_id')
        check.inst_param(event, 'event', DagsterEvent)

        lookup = {
            DagsterEventType.PIPELINE_START: PipelineRunStatus.STARTED,
            DagsterEventType.PIPELINE_SUCCESS: PipelineRunStatus.SUCCESS,
            DagsterEventType.PIPELINE_FAILURE: PipelineRunStatus.FAILURE,
        }

        if event.event_type not in lookup:
            return

        run = self.get_run_by_id(run_id)
        if not run:
            # TODO log?
            return

        new_pipeline_status = lookup[event.event_type]

        with self.connect() as conn:
            conn.execute(
                RunsTable.update()  # pylint: disable=no-value-for-parameter
                .where(RunsTable.c.run_id == run_id)
                .values(
                    status=new_pipeline_status.value,
                    run_body=serialize_dagster_namedtuple(run.run_with_status(new_pipeline_status)),
                    update_timestamp=datetime.now(),
                )
            )
コード例 #7
0
ファイル: runs.py プロジェクト: ghosthamlet/dagster
    def _write_metadata_to_file(self, pipeline_run):
        metadata_filepath = self.filepath_for_run_id(pipeline_run.run_id)

        with self._lock:
            with io.open(metadata_filepath, 'w', encoding='utf-8') as f:
                f.write(six.text_type(serialize_dagster_namedtuple(pipeline_run)))

            return metadata_filepath
コード例 #8
0
ファイル: event_log.py プロジェクト: laudehenri/dagster
 def store_event(self, event):
     check.inst_param(event, 'event', EventRecord)
     run_id = event.run_id
     if not run_id in self._known_run_ids:
         with self._connect(run_id) as conn:
             conn.cursor().execute(CREATE_EVENT_LOG_SQL)
             self._known_run_ids.add(run_id)
     with self._connect(run_id) as conn:
         conn.cursor().execute(INSERT_EVENT_SQL, (serialize_dagster_namedtuple(event),))
コード例 #9
0
    def _write_schedule_to_file(self, schedule):
        metadata_file = os.path.join(
            self._base_dir,
            self._repository_name,
            '{}_{}.json'.format(schedule.name, schedule.schedule_id),
        )
        with io.open(metadata_file, 'w', encoding='utf-8') as f:
            f.write(six.text_type(serialize_dagster_namedtuple(schedule)))

        return metadata_file
コード例 #10
0
def test_basic_solid_definition():
    @solid
    def noop_solid(_):
        pass

    solid_snap = build_solid_def_snap(noop_solid)

    assert solid_snap
    assert (deserialize_json_to_dagster_namedtuple(
        serialize_dagster_namedtuple(solid_snap)) == solid_snap)
コード例 #11
0
    def update_schedule_tick(self, repository, tick):
        check.inst_param(repository, 'repository', RepositoryDefinition)
        check.inst_param(tick, 'tick', ScheduleTick)

        with self.connect() as conn:
            conn.execute(ScheduleTickTable.update()  # pylint: disable=no-value-for-parameter
                         .where(ScheduleTickTable.c.id == tick.tick_id).values(
                             status=tick.status.value,
                             tick_body=serialize_dagster_namedtuple(
                                 tick.schedule_tick_data),
                         ))

        return tick
コード例 #12
0
ファイル: tasks.py プロジェクト: nikie/dagster
    def _execute_query(_self, handle_dict, variables, instance_ref_dict):
        instance_ref = InstanceRef.from_dict(instance_ref_dict)
        handle = ExecutionTargetHandle.from_dict(handle_dict)

        events = execute_execute_plan_mutation(
            handle=handle,
            variables=variables,
            instance_ref=instance_ref,
        )
        serialized_events = [
            serialize_dagster_namedtuple(event) for event in events
        ]
        return serialized_events
コード例 #13
0
    def add_run(self, pipeline_run):
        check.inst_param(pipeline_run, 'pipeline_run', PipelineRun)

        conn = get_conn(self.conn_string)
        with conn.cursor() as curs:
            curs.execute(
                INSERT_RUN_SQL,
                (
                    pipeline_run.run_id,
                    pipeline_run.pipeline_name,
                    str(pipeline_run.status),
                    serialize_dagster_namedtuple(pipeline_run),
                ),
            )
コード例 #14
0
    def store_event(self, event):
        '''Store an event corresponding to a pipeline run.

        Args:
            run_id (str): The id of the run that generated the event.
            event (EventRecord): The event to store.
        '''

        check.inst_param(event, 'event', EventRecord)

        with get_conn(self.conn_string).cursor() as curs:
            curs.execute(
                'INSERT INTO event_log (run_id, event_body) VALUES (%s, %s)',
                (event.run_id, serialize_dagster_namedtuple(event)),
            )
コード例 #15
0
    def _write_schedule_to_file(self, repository, schedule):
        check.inst_param(repository, 'repository', RepositoryDefinition)
        check.inst_param(schedule, 'schedule', Schedule)

        repository_folder = os.path.join(self._base_dir, repository.name)
        utils.mkdir_p(repository_folder)

        metadata_file = os.path.join(
            repository_folder,
            '{}.json'.format(schedule.name),
        )

        with io.open(metadata_file, 'w+', encoding='utf-8') as f:
            f.write(six.text_type(serialize_dagster_namedtuple(schedule)))

        return metadata_file
コード例 #16
0
    def update_schedule(self, repository, schedule):
        check.inst_param(repository, 'repository', RepositoryDefinition)
        check.inst_param(schedule, 'schedule', Schedule)

        if not self.get_schedule_by_name(repository, schedule.name):
            raise DagsterInvariantViolationError(
                'Schedule {name} for repository {repository_name} is not present in storage'
                .format(name=schedule.name, repository_name=repository.name))

        with self.connect() as conn:
            conn.execute(
                ScheduleTable.update()  # pylint: disable=no-value-for-parameter
                .where(ScheduleTable.c.repository_name == repository.name).
                where(ScheduleTable.c.schedule_name == schedule.name).values(
                    status=schedule.status.value,
                    schedule_body=serialize_dagster_namedtuple(schedule),
                ))
コード例 #17
0
ファイル: event_log.py プロジェクト: databill86/dagster
    def store_event(self, event):
        '''Store an event corresponding to a pipeline run.

        Args:
            run_id (str): The id of the run that generated the event.
            event (EventRecord): The event to store.
        '''

        check.inst_param(event, 'event', EventRecord)

        with get_conn(self.conn_string).cursor() as curs:
            event_body = serialize_dagster_namedtuple(event)
            curs.execute(
                '''INSERT INTO event_log (run_id, event_body) VALUES (%s, %s);
                NOTIFY {channel}, %s; '''.format(channel=CHANNEL_NAME),
                (event.run_id, event_body, event_body),
            )
コード例 #18
0
def test_simple_pipeline_smoke_test():
    @solid
    def solid_without_config(_):
        pass

    @pipeline
    def single_solid_pipeline():
        solid_without_config()

    config_schema_snapshot = build_config_schema_snapshot(
        single_solid_pipeline)
    assert config_schema_snapshot.all_config_snaps_by_key

    serialized = serialize_dagster_namedtuple(config_schema_snapshot)
    rehydrated_config_schema_snapshot = deserialize_json_to_dagster_namedtuple(
        serialized)
    assert config_schema_snapshot == rehydrated_config_schema_snapshot
コード例 #19
0
    def store_event(self, event):
        check.inst_param(event, 'event', EventRecord)
        run_id = event.run_id
        if not run_id in self._known_run_ids:
            with self._connect(run_id) as conn:
                conn.cursor().execute(CREATE_EVENT_LOG_SQL)
                conn.cursor().execute('PRAGMA journal_mode=WAL;')
                self._known_run_ids.add(run_id)
        with self._connect(run_id) as conn:
            dagster_event_type = None
            if event.is_dagster_event:
                dagster_event_type = event.dagster_event.event_type_value

            conn.cursor().execute(
                INSERT_EVENT_SQL,
                (serialize_dagster_namedtuple(event), dagster_event_type, event.timestamp),
            )
コード例 #20
0
def test_kitchen_sink():
    kitchen_sink = resolve_to_config_type([{
        'opt_list_of_int':
        Field(int, is_optional=True),
        'nested_dict': {
            'list_list': [[int]],
            'nested_selector':
            Field(Selector({
                'some_field': int,
                'more_list': Noneable([bool])
            })),
        },
    }])

    kitchen_sink_meta = meta_from_dagster_type(kitchen_sink)

    rehydrated_meta = deserialize_json_to_dagster_namedtuple(
        serialize_dagster_namedtuple(kitchen_sink_meta))
    assert kitchen_sink_meta == rehydrated_meta
コード例 #21
0
    def add_run(self, pipeline_run):
        check.inst_param(pipeline_run, 'pipeline_run', PipelineRun)

        with self.connect() as conn:
            runs_insert = RunsTable.insert().values(  # pylint: disable=no-value-for-parameter
                run_id=pipeline_run.run_id,
                pipeline_name=pipeline_run.pipeline_name,
                status=pipeline_run.status.value,
                run_body=serialize_dagster_namedtuple(pipeline_run),
            )
            conn.execute(runs_insert)
            if pipeline_run.tags and len(pipeline_run.tags) > 0:
                conn.execute(
                    RunTagsTable.insert(),  # pylint: disable=no-value-for-parameter
                    [
                        dict(run_id=pipeline_run.run_id, key=k, value=v)
                        for k, v in pipeline_run.tags.items()
                    ],
                )

        return pipeline_run
コード例 #22
0
ファイル: sql_event_log.py プロジェクト: yetudada/dagster
    def store_event(self, event):
        '''Store an event corresponding to a pipeline run.

        Args:
            event (EventRecord): The event to store.
        '''
        check.inst_param(event, 'event', EventRecord)

        dagster_event_type = None
        if event.is_dagster_event:
            dagster_event_type = event.dagster_event.event_type_value

        run_id = event.run_id

        # https://stackoverflow.com/a/54386260/324449
        event_insert = SqlEventLogStorageTable.insert().values(  # pylint: disable=no-value-for-parameter
            run_id=run_id,
            event=serialize_dagster_namedtuple(event),
            dagster_event_type=dagster_event_type,
            timestamp=datetime.datetime.fromtimestamp(event.timestamp),
        )

        with self.connect(run_id) as conn:
            conn.execute(event_insert)
コード例 #23
0
    def create_schedule_tick(self, repository, schedule_tick_data):
        check.inst_param(repository, 'repository', RepositoryDefinition)
        check.inst_param(schedule_tick_data, 'schedule_tick_data',
                         ScheduleTickData)

        with self.connect() as conn:
            try:
                tick_insert = ScheduleTickTable.insert().values(  # pylint: disable=no-value-for-parameter
                    repository_name=repository.name,
                    schedule_name=schedule_tick_data.schedule_name,
                    status=schedule_tick_data.status.value,
                    tick_body=serialize_dagster_namedtuple(schedule_tick_data),
                )
                result = conn.execute(tick_insert)
                tick_id = result.inserted_primary_key[0]
                return ScheduleTick(tick_id, schedule_tick_data)
            except db.exc.IntegrityError as exc:
                six.raise_from(
                    DagsterInvariantViolationError(
                        'Unable to insert ScheduleTick for schedule {schedule_name} in storage'
                        .format(
                            schedule_name=schedule_tick_data.schedule_name, )),
                    exc,
                )
コード例 #24
0
ファイル: log.py プロジェクト: ghosthamlet/dagster
 def to_json(self):
     return serialize_dagster_namedtuple(self)
コード例 #25
0
def test_solid_definition_kitchen_sink():
    @solid(
        input_defs=[
            InputDefinition('arg_one', str, description='desc1'),
            InputDefinition('arg_two', int),
        ],
        output_defs=[
            OutputDefinition(name='output_one', dagster_type=str),
            OutputDefinition(name='output_two',
                             dagster_type=int,
                             description='desc2',
                             is_required=False),
        ],
        config={'foo': int},
        description='a description',
        tags={'a_tag': 'yup'},
        required_resource_keys={'a_resource'},
    )
    def kitchen_sink_solid(_, arg_two,
                           arg_one):  # out of order to test positional_inputs
        assert arg_one
        assert arg_two
        raise Exception('should not execute')

    kitchen_sink_solid_snap = build_solid_def_snap(kitchen_sink_solid)

    assert kitchen_sink_solid_snap
    assert kitchen_sink_solid_snap.name == 'kitchen_sink_solid'
    assert len(kitchen_sink_solid_snap.input_def_snaps) == 2
    assert [inp.name for inp in kitchen_sink_solid_snap.input_def_snaps
            ] == ['arg_one', 'arg_two']
    assert [
        inp.dagster_type_key for inp in kitchen_sink_solid_snap.input_def_snaps
    ] == [
        'String',
        'Int',
    ]

    assert kitchen_sink_solid_snap.get_input_snap(
        'arg_one').description == 'desc1'

    assert [out.name for out in kitchen_sink_solid_snap.output_def_snaps] == [
        'output_one',
        'output_two',
    ]

    assert [
        out.dagster_type_key
        for out in kitchen_sink_solid_snap.output_def_snaps
    ] == [
        'String',
        'Int',
    ]

    assert kitchen_sink_solid_snap.get_output_snap(
        'output_two').description == 'desc2'
    assert kitchen_sink_solid_snap.get_output_snap(
        'output_two').is_required is False

    assert (kitchen_sink_solid_snap.config_field_snap.type_key ==
            kitchen_sink_solid.config_field.config_type.key)

    assert kitchen_sink_solid_snap.required_resource_keys == ['a_resource']
    assert kitchen_sink_solid_snap.tags == {'a_tag': 'yup'}
    assert kitchen_sink_solid.positional_inputs == ['arg_two', 'arg_one']

    assert (deserialize_json_to_dagster_namedtuple(
        serialize_dagster_namedtuple(kitchen_sink_solid_snap)) ==
            kitchen_sink_solid_snap)