def test_serialize_deserialize(): with yield_pipeline_execution_context(PipelineDefinition([]), {}, RunConfig()) as context: with tempfile.NamedTemporaryFile() as fd: serialize_to_file(context, PickleSerializationStrategy(), 'foo', fd.name) assert deserialize_from_file(context, PickleSerializationStrategy(), fd.name) == 'foo'
def test_gcs_object_store(gcs_bucket): object_store = GCSObjectStore(gcs_bucket) test_str = b"this is a test" file_obj = BytesIO() file_obj.write(test_str) file_obj.seek(0) serialization_strategy = PickleSerializationStrategy() key = "test-file-%s" % uuid.uuid4().hex object_store.set_object(key, file_obj, serialization_strategy) assert object_store.has_object(key) assert object_store.get_object( key, serialization_strategy)[0].read() == test_str other_key = "test-file-%s" % uuid.uuid4().hex object_store.cp_object(key, other_key) assert object_store.has_object(other_key) object_store.rm_object(key) object_store.rm_object(other_key) assert not object_store.has_object(key) assert not object_store.has_object(other_key)
def load_parameter(input_name, input_value): check.invariant(MANAGER_FOR_NOTEBOOK_INSTANCE.populated_by_papermill, 'populated_by_papermill') if MANAGER_FOR_NOTEBOOK_INSTANCE.solid_def is None: check.invariant( MANAGER_FOR_NOTEBOOK_INSTANCE.input_name_type_dict is not None, 'input_name_type_dict must not be None if solid_def is not defined!', ) input_name_type_dict = MANAGER_FOR_NOTEBOOK_INSTANCE.input_name_type_dict runtime_type_enum = input_name_type_dict[input_name] if (runtime_type_enum == SerializableRuntimeType.SCALAR or runtime_type_enum == SerializableRuntimeType.JSON_SERIALIZABLE): return input_value elif runtime_type_enum == SerializableRuntimeType.PICKLE_SERIALIZABLE: return deserialize_from_file(MANAGER_FOR_NOTEBOOK_INSTANCE.context, PickleSerializationStrategy(), input_value) else: raise DagstermillError( "loading parameter {input_name} resulted in an error".format( input_name=input_name)) else: solid_def = MANAGER_FOR_NOTEBOOK_INSTANCE.solid_def input_def = solid_def.input_def_named(input_name) return read_value(input_def.runtime_type, input_value)
def yield_result(self, value, output_name): if not self.populated_by_papermill: return value if self.solid_def is None: if output_name not in self.output_name_type_dict: raise DagstermillError( 'Solid {solid_name} does not have output named {output_name}' .format(solid_name=self.solid_def_name, output_name=output_name)) runtime_type_enum = self.output_name_type_dict[output_name] if runtime_type_enum == SerializableRuntimeType.SCALAR: pm.record(output_name, value) elif runtime_type_enum == SerializableRuntimeType.ANY and is_json_serializable( value): pm.record(output_name, value) elif runtime_type_enum == SerializableRuntimeType.PICKLE_SERIALIZABLE: out_file = os.path.join(self.marshal_dir, 'output-{}'.format(output_name)) serialize_to_file( MANAGER_FOR_NOTEBOOK_INSTANCE.context, PickleSerializationStrategy(), value, out_file, ) pm.record(output_name, out_file) else: raise DagstermillError( 'Output Definition for output {output_name} requires repo registration ' 'since it has a complex serialization format'.format( output_name=output_name)) else: if not self.solid_def.has_output(output_name): raise DagstermillError( 'Solid {solid_name} does not have output named {output_name}' .format(solid_name=self.solid_def.name, output_name=output_name)) runtime_type = self.solid_def.output_def_named( output_name).runtime_type out_file = os.path.join(self.marshal_dir, 'output-{}'.format(output_name)) pm.record(output_name, write_value(runtime_type, value, out_file))
def yield_result(self, value, output_name='result'): if not self.populated_by_papermill: return value if self.solid_def is None: if output_name not in self.output_name_type_dict: raise DagstermillError( 'Solid {solid_name} does not have output named {output_name}' .format(solid_name=self.solid_def_name, output_name=output_name)) runtime_type_enum = self.output_name_type_dict[output_name] if runtime_type_enum == SerializableRuntimeType.SCALAR: scrapbook.glue(output_name, value) elif runtime_type_enum == SerializableRuntimeType.ANY and is_json_serializable( value): scrapbook.glue(output_name, value) elif runtime_type_enum == SerializableRuntimeType.PICKLE_SERIALIZABLE: out_file = os.path.join(self.marshal_dir, 'output-{}'.format(output_name)) PickleSerializationStrategy().serialize_to_file( value, out_file) scrapbook.glue(output_name, out_file) else: raise DagstermillError( # Discuss this in the docs and improve error message # https://github.com/dagster-io/dagster/issues/1275 # https://github.com/dagster-io/dagster/issues/1276 'Output Definition for output {output_name} requires repo registration ' 'since it has a complex serialization format'.format( output_name=output_name)) else: if not self.solid_def.has_output(output_name): raise DagstermillError( 'Solid {solid_name} does not have output named {output_name}' .format(solid_name=self.solid_def.name, output_name=output_name)) runtime_type = self.solid_def.output_def_named( output_name).runtime_type out_file = os.path.join(self.marshal_dir, 'output-{}'.format(output_name)) scrapbook.glue(output_name, write_value(runtime_type, value, out_file))
def test_gcs_object_store(gcs_bucket): s = GCSObjectStore(gcs_bucket) test_str = b'this is a test' file_obj = BytesIO() file_obj.write(test_str) file_obj.seek(0) ss = PickleSerializationStrategy() key = 'test-file-%s' % uuid.uuid4().hex s.set_object(key, file_obj, ss) assert s.has_object(key) assert s.get_object(key, ss).obj.read() == test_str other_key = 'test-file-%s' % uuid.uuid4().hex s.cp_object(key, other_key) assert s.has_object(other_key) s.rm_object(key) s.rm_object(other_key) assert not s.has_object(key) assert not s.has_object(other_key)
def test_serialization_strategy(): serialization_strategy = PickleSerializationStrategy() with tempfile.NamedTemporaryFile() as fd: serialization_strategy.serialize_to_file('foo', fd.name) assert serialization_strategy.deserialize_from_file(fd.name) == 'foo'
Tuple[str, str]: The fully qualified key of the source object and the fully qualified destination key. """ @abstractmethod def uri_for_key(self, key, protocol=None): """Implement this method to get a URI for a key in the object store. Should return a URI as a string.""" def key_for_paths(self, path_fragments): """Joins path fragments into a key using the object-store specific path separator.""" return self.sep.join(path_fragments) DEFAULT_SERIALIZATION_STRATEGY = PickleSerializationStrategy() class InMemoryObjectStore(ObjectStore): def __init__(self): self.values = {} super(InMemoryObjectStore, self).__init__(name="memory") def set_object(self, key, obj, serialization_strategy=None): check.str_param(key, "key") self.values[key] = obj return key def get_object(self, key, serialization_strategy=DEFAULT_SERIALIZATION_STRATEGY):
def test_serialization_strategy(): serialization_strategy = PickleSerializationStrategy() with safe_tempfile_path() as tempfile_path: serialization_strategy.serialize_to_file("foo", tempfile_path) assert serialization_strategy.deserialize_from_file( tempfile_path) == "foo"