def add_schedule(self, repository, schedule): check.inst_param(repository, 'repository', RepositoryDefinition) check.inst_param(schedule, 'schedule', Schedule) with self.connect() as conn: try: schedule_insert = ScheduleTable.insert().values( # pylint: disable=no-value-for-parameter repository_name=repository.name, schedule_name=schedule.name, status=schedule.status.value, schedule_body=serialize_dagster_namedtuple(schedule), ) conn.execute(schedule_insert) except db.exc.IntegrityError as exc: six.raise_from( DagsterInvariantViolationError( 'Schedule {schedule_name} for repository {repository_name} is already present ' 'in storage'.format( schedule_name=schedule.name, repository_name=repository.name ) ), exc, ) return schedule
def test_kitchen_sink(): kitchen_sink = List[Dict({ 'opt_list_of_int': Field(List[int], is_optional=True), 'tuple_of_things': Field(Tuple[int, str]), 'nested_dict': Field( Dict({ 'list_list': Field(List[List[int]]), 'nested_selector': Field( Selector({ 'some_field': Field(int), 'set': Field(Optional[Set[bool]]) })), })), })] kitchen_sink_meta = meta_from_dagster_type(kitchen_sink) rehydrated_meta = deserialize_json_to_dagster_namedtuple( serialize_dagster_namedtuple(kitchen_sink_meta)) assert kitchen_sink_meta == rehydrated_meta
def add_run(self, pipeline_run): check.inst_param(pipeline_run, 'pipeline_run', PipelineRun) with self.connect() as conn: try: runs_insert = RunsTable.insert().values( # pylint: disable=no-value-for-parameter run_id=pipeline_run.run_id, pipeline_name=pipeline_run.pipeline_name, status=pipeline_run.status.value, run_body=serialize_dagster_namedtuple(pipeline_run), ) conn.execute(runs_insert) except db.exc.IntegrityError as exc: six.raise_from(DagsterRunAlreadyExists, exc) if pipeline_run.tags and len(pipeline_run.tags) > 0: conn.execute( RunTagsTable.insert(), # pylint: disable=no-value-for-parameter [ dict(run_id=pipeline_run.run_id, key=k, value=v) for k, v in pipeline_run.tags.items() ], ) return pipeline_run
def store_event(self, event): '''Store an event corresponding to a pipeline run. Args: event (EventRecord): The event to store. ''' check.inst_param(event, 'event', EventRecord) dagster_event_type = None if event.is_dagster_event: dagster_event_type = event.dagster_event.event_type_value run_id = event.run_id with self.connect() as conn: # https://stackoverflow.com/a/54386260/324449 event_insert = SqlEventLogStorageTable.insert().values( # pylint: disable=no-value-for-parameter run_id=run_id, event=serialize_dagster_namedtuple(event), dagster_event_type=dagster_event_type, timestamp=datetime.datetime.fromtimestamp(event.timestamp), ) result_proxy = conn.execute( event_insert.returning(SqlEventLogStorageTable.c.run_id, SqlEventLogStorageTable.c.id)) res = result_proxy.fetchone() result_proxy.close() conn.execute( '''NOTIFY {channel}, %s; '''.format(channel=CHANNEL_NAME), (res[0] + '_' + str(res[1]), ), )
def handle_run_event(self, run_id, event): check.str_param(run_id, 'run_id') check.inst_param(event, 'event', DagsterEvent) lookup = { DagsterEventType.PIPELINE_START: PipelineRunStatus.STARTED, DagsterEventType.PIPELINE_SUCCESS: PipelineRunStatus.SUCCESS, DagsterEventType.PIPELINE_FAILURE: PipelineRunStatus.FAILURE, } if event.event_type not in lookup: return run = self.get_run_by_id(run_id) if not run: # TODO log? return SQL_UPDATE = ''' UPDATE runs SET status = %s, run_body = %s WHERE run_id = %s ''' conn = get_conn(self.conn_string) new_pipeline_status = lookup[event.event_type] with conn.cursor() as curs: curs.execute( SQL_UPDATE, ( str(new_pipeline_status), serialize_dagster_namedtuple( run.run_with_status(new_pipeline_status)), run_id, ), )
def handle_run_event(self, run_id, event): check.str_param(run_id, 'run_id') check.inst_param(event, 'event', DagsterEvent) lookup = { DagsterEventType.PIPELINE_START: PipelineRunStatus.STARTED, DagsterEventType.PIPELINE_SUCCESS: PipelineRunStatus.SUCCESS, DagsterEventType.PIPELINE_FAILURE: PipelineRunStatus.FAILURE, } if event.event_type not in lookup: return run = self.get_run_by_id(run_id) if not run: # TODO log? return new_pipeline_status = lookup[event.event_type] with self.connect() as conn: conn.execute( RunsTable.update() # pylint: disable=no-value-for-parameter .where(RunsTable.c.run_id == run_id) .values( status=new_pipeline_status.value, run_body=serialize_dagster_namedtuple(run.run_with_status(new_pipeline_status)), update_timestamp=datetime.now(), ) )
def _write_metadata_to_file(self, pipeline_run): metadata_filepath = self.filepath_for_run_id(pipeline_run.run_id) with self._lock: with io.open(metadata_filepath, 'w', encoding='utf-8') as f: f.write(six.text_type(serialize_dagster_namedtuple(pipeline_run))) return metadata_filepath
def store_event(self, event): check.inst_param(event, 'event', EventRecord) run_id = event.run_id if not run_id in self._known_run_ids: with self._connect(run_id) as conn: conn.cursor().execute(CREATE_EVENT_LOG_SQL) self._known_run_ids.add(run_id) with self._connect(run_id) as conn: conn.cursor().execute(INSERT_EVENT_SQL, (serialize_dagster_namedtuple(event),))
def _write_schedule_to_file(self, schedule): metadata_file = os.path.join( self._base_dir, self._repository_name, '{}_{}.json'.format(schedule.name, schedule.schedule_id), ) with io.open(metadata_file, 'w', encoding='utf-8') as f: f.write(six.text_type(serialize_dagster_namedtuple(schedule))) return metadata_file
def test_basic_solid_definition(): @solid def noop_solid(_): pass solid_snap = build_solid_def_snap(noop_solid) assert solid_snap assert (deserialize_json_to_dagster_namedtuple( serialize_dagster_namedtuple(solid_snap)) == solid_snap)
def update_schedule_tick(self, repository, tick): check.inst_param(repository, 'repository', RepositoryDefinition) check.inst_param(tick, 'tick', ScheduleTick) with self.connect() as conn: conn.execute(ScheduleTickTable.update() # pylint: disable=no-value-for-parameter .where(ScheduleTickTable.c.id == tick.tick_id).values( status=tick.status.value, tick_body=serialize_dagster_namedtuple( tick.schedule_tick_data), )) return tick
def _execute_query(_self, handle_dict, variables, instance_ref_dict): instance_ref = InstanceRef.from_dict(instance_ref_dict) handle = ExecutionTargetHandle.from_dict(handle_dict) events = execute_execute_plan_mutation( handle=handle, variables=variables, instance_ref=instance_ref, ) serialized_events = [ serialize_dagster_namedtuple(event) for event in events ] return serialized_events
def add_run(self, pipeline_run): check.inst_param(pipeline_run, 'pipeline_run', PipelineRun) conn = get_conn(self.conn_string) with conn.cursor() as curs: curs.execute( INSERT_RUN_SQL, ( pipeline_run.run_id, pipeline_run.pipeline_name, str(pipeline_run.status), serialize_dagster_namedtuple(pipeline_run), ), )
def store_event(self, event): '''Store an event corresponding to a pipeline run. Args: run_id (str): The id of the run that generated the event. event (EventRecord): The event to store. ''' check.inst_param(event, 'event', EventRecord) with get_conn(self.conn_string).cursor() as curs: curs.execute( 'INSERT INTO event_log (run_id, event_body) VALUES (%s, %s)', (event.run_id, serialize_dagster_namedtuple(event)), )
def _write_schedule_to_file(self, repository, schedule): check.inst_param(repository, 'repository', RepositoryDefinition) check.inst_param(schedule, 'schedule', Schedule) repository_folder = os.path.join(self._base_dir, repository.name) utils.mkdir_p(repository_folder) metadata_file = os.path.join( repository_folder, '{}.json'.format(schedule.name), ) with io.open(metadata_file, 'w+', encoding='utf-8') as f: f.write(six.text_type(serialize_dagster_namedtuple(schedule))) return metadata_file
def update_schedule(self, repository, schedule): check.inst_param(repository, 'repository', RepositoryDefinition) check.inst_param(schedule, 'schedule', Schedule) if not self.get_schedule_by_name(repository, schedule.name): raise DagsterInvariantViolationError( 'Schedule {name} for repository {repository_name} is not present in storage' .format(name=schedule.name, repository_name=repository.name)) with self.connect() as conn: conn.execute( ScheduleTable.update() # pylint: disable=no-value-for-parameter .where(ScheduleTable.c.repository_name == repository.name). where(ScheduleTable.c.schedule_name == schedule.name).values( status=schedule.status.value, schedule_body=serialize_dagster_namedtuple(schedule), ))
def store_event(self, event): '''Store an event corresponding to a pipeline run. Args: run_id (str): The id of the run that generated the event. event (EventRecord): The event to store. ''' check.inst_param(event, 'event', EventRecord) with get_conn(self.conn_string).cursor() as curs: event_body = serialize_dagster_namedtuple(event) curs.execute( '''INSERT INTO event_log (run_id, event_body) VALUES (%s, %s); NOTIFY {channel}, %s; '''.format(channel=CHANNEL_NAME), (event.run_id, event_body, event_body), )
def test_simple_pipeline_smoke_test(): @solid def solid_without_config(_): pass @pipeline def single_solid_pipeline(): solid_without_config() config_schema_snapshot = build_config_schema_snapshot( single_solid_pipeline) assert config_schema_snapshot.all_config_snaps_by_key serialized = serialize_dagster_namedtuple(config_schema_snapshot) rehydrated_config_schema_snapshot = deserialize_json_to_dagster_namedtuple( serialized) assert config_schema_snapshot == rehydrated_config_schema_snapshot
def store_event(self, event): check.inst_param(event, 'event', EventRecord) run_id = event.run_id if not run_id in self._known_run_ids: with self._connect(run_id) as conn: conn.cursor().execute(CREATE_EVENT_LOG_SQL) conn.cursor().execute('PRAGMA journal_mode=WAL;') self._known_run_ids.add(run_id) with self._connect(run_id) as conn: dagster_event_type = None if event.is_dagster_event: dagster_event_type = event.dagster_event.event_type_value conn.cursor().execute( INSERT_EVENT_SQL, (serialize_dagster_namedtuple(event), dagster_event_type, event.timestamp), )
def test_kitchen_sink(): kitchen_sink = resolve_to_config_type([{ 'opt_list_of_int': Field(int, is_optional=True), 'nested_dict': { 'list_list': [[int]], 'nested_selector': Field(Selector({ 'some_field': int, 'more_list': Noneable([bool]) })), }, }]) kitchen_sink_meta = meta_from_dagster_type(kitchen_sink) rehydrated_meta = deserialize_json_to_dagster_namedtuple( serialize_dagster_namedtuple(kitchen_sink_meta)) assert kitchen_sink_meta == rehydrated_meta
def add_run(self, pipeline_run): check.inst_param(pipeline_run, 'pipeline_run', PipelineRun) with self.connect() as conn: runs_insert = RunsTable.insert().values( # pylint: disable=no-value-for-parameter run_id=pipeline_run.run_id, pipeline_name=pipeline_run.pipeline_name, status=pipeline_run.status.value, run_body=serialize_dagster_namedtuple(pipeline_run), ) conn.execute(runs_insert) if pipeline_run.tags and len(pipeline_run.tags) > 0: conn.execute( RunTagsTable.insert(), # pylint: disable=no-value-for-parameter [ dict(run_id=pipeline_run.run_id, key=k, value=v) for k, v in pipeline_run.tags.items() ], ) return pipeline_run
def store_event(self, event): '''Store an event corresponding to a pipeline run. Args: event (EventRecord): The event to store. ''' check.inst_param(event, 'event', EventRecord) dagster_event_type = None if event.is_dagster_event: dagster_event_type = event.dagster_event.event_type_value run_id = event.run_id # https://stackoverflow.com/a/54386260/324449 event_insert = SqlEventLogStorageTable.insert().values( # pylint: disable=no-value-for-parameter run_id=run_id, event=serialize_dagster_namedtuple(event), dagster_event_type=dagster_event_type, timestamp=datetime.datetime.fromtimestamp(event.timestamp), ) with self.connect(run_id) as conn: conn.execute(event_insert)
def create_schedule_tick(self, repository, schedule_tick_data): check.inst_param(repository, 'repository', RepositoryDefinition) check.inst_param(schedule_tick_data, 'schedule_tick_data', ScheduleTickData) with self.connect() as conn: try: tick_insert = ScheduleTickTable.insert().values( # pylint: disable=no-value-for-parameter repository_name=repository.name, schedule_name=schedule_tick_data.schedule_name, status=schedule_tick_data.status.value, tick_body=serialize_dagster_namedtuple(schedule_tick_data), ) result = conn.execute(tick_insert) tick_id = result.inserted_primary_key[0] return ScheduleTick(tick_id, schedule_tick_data) except db.exc.IntegrityError as exc: six.raise_from( DagsterInvariantViolationError( 'Unable to insert ScheduleTick for schedule {schedule_name} in storage' .format( schedule_name=schedule_tick_data.schedule_name, )), exc, )
def to_json(self): return serialize_dagster_namedtuple(self)
def test_solid_definition_kitchen_sink(): @solid( input_defs=[ InputDefinition('arg_one', str, description='desc1'), InputDefinition('arg_two', int), ], output_defs=[ OutputDefinition(name='output_one', dagster_type=str), OutputDefinition(name='output_two', dagster_type=int, description='desc2', is_required=False), ], config={'foo': int}, description='a description', tags={'a_tag': 'yup'}, required_resource_keys={'a_resource'}, ) def kitchen_sink_solid(_, arg_two, arg_one): # out of order to test positional_inputs assert arg_one assert arg_two raise Exception('should not execute') kitchen_sink_solid_snap = build_solid_def_snap(kitchen_sink_solid) assert kitchen_sink_solid_snap assert kitchen_sink_solid_snap.name == 'kitchen_sink_solid' assert len(kitchen_sink_solid_snap.input_def_snaps) == 2 assert [inp.name for inp in kitchen_sink_solid_snap.input_def_snaps ] == ['arg_one', 'arg_two'] assert [ inp.dagster_type_key for inp in kitchen_sink_solid_snap.input_def_snaps ] == [ 'String', 'Int', ] assert kitchen_sink_solid_snap.get_input_snap( 'arg_one').description == 'desc1' assert [out.name for out in kitchen_sink_solid_snap.output_def_snaps] == [ 'output_one', 'output_two', ] assert [ out.dagster_type_key for out in kitchen_sink_solid_snap.output_def_snaps ] == [ 'String', 'Int', ] assert kitchen_sink_solid_snap.get_output_snap( 'output_two').description == 'desc2' assert kitchen_sink_solid_snap.get_output_snap( 'output_two').is_required is False assert (kitchen_sink_solid_snap.config_field_snap.type_key == kitchen_sink_solid.config_field.config_type.key) assert kitchen_sink_solid_snap.required_resource_keys == ['a_resource'] assert kitchen_sink_solid_snap.tags == {'a_tag': 'yup'} assert kitchen_sink_solid.positional_inputs == ['arg_two', 'arg_one'] assert (deserialize_json_to_dagster_namedtuple( serialize_dagster_namedtuple(kitchen_sink_solid_snap)) == kitchen_sink_solid_snap)