Esempio n. 1
0
    def set_object(self,
                   key,
                   obj,
                   serialization_strategy=DEFAULT_SERIALIZATION_STRATEGY):
        check.str_param(key, 'key')
        # obj is an arbitrary Python object
        check.inst_param(serialization_strategy, 'serialization_strategy',
                         SerializationStrategy)

        if os.path.exists(key):
            logging.warning('Removing existing path {path}'.format(path=key))
            os.unlink(key)

        # Ensure path exists
        mkdir_p(os.path.dirname(key))

        serialization_strategy.serialize_to_file(obj, key)

        return ObjectStoreOperation(
            op=ObjectStoreOperationType.SET_OBJECT,
            key=key,
            dest_key=None,
            obj=obj,
            serialization_strategy_name=serialization_strategy.name,
            object_store_name=self.name,
        )
Esempio n. 2
0
    def get_intermediate_from_address(
        self,
        context,
        dagster_type=None,
        step_output_handle=None,
        address=None,
    ):
        """
        This is an experimental method.
        This will likely to be merged into `get_intermediate_object`. To do so, we will need to
        update the `get_intermediate_object` to take `address` as an arg
        """
        dagster_type = resolve_dagster_type(dagster_type)
        check.opt_inst_param(context, "context", SystemExecutionContext)
        check.inst_param(dagster_type, "dagster_type", DagsterType)
        check.inst_param(step_output_handle, "step_output_handle",
                         StepOutputHandle)
        check.str_param(address, "address")

        # currently it doesn't support type_storage_plugin_registry
        try:
            obj, uri = self.object_store.get_object(
                key=address,
                serialization_strategy=dagster_type.serialization_strategy)
            return ObjectStoreOperation(
                op=ObjectStoreOperationType.GET_OBJECT,
                key=uri,
                dest_key=None,
                obj=obj,
                serialization_strategy_name=dagster_type.
                serialization_strategy.name,
                object_store_name=self.object_store.name,
            )
        except (IOError, OSError) as e:
            raise DagsterAddressIOError(str(e))
Esempio n. 3
0
    def set_object(self, key, obj, serialization_strategy=None):
        check.str_param(key, 'key')

        logging.info('Writing GCS object at: ' + self.uri_for_key(key))

        # cannot check obj since could be arbitrary Python object
        check.inst_param(serialization_strategy, 'serialization_strategy',
                         SerializationStrategy)  # cannot be none here

        if self.has_object(key):
            logging.warning('Removing existing GCS key: {key}'.format(key=key))
            backoff(self.rm_object, args=[key], retry_on=(TooManyRequests, ))

        with (BytesIO() if serialization_strategy.write_mode == 'wb'
              or sys.version_info < (3, 0) else StringIO()) as file_like:
            serialization_strategy.serialize(obj, file_like)
            file_like.seek(0)
            backoff(
                self.bucket_obj.blob(key).upload_from_file,
                args=[file_like],
                retry_on=(TooManyRequests, ),
            )

        return ObjectStoreOperation(
            op=ObjectStoreOperationType.SET_OBJECT,
            key=self.uri_for_key(key),
            dest_key=None,
            obj=obj,
            serialization_strategy_name=serialization_strategy.name,
            object_store_name=self.name,
        )
Esempio n. 4
0
    def set_intermediate_object(self, dagster_type, step_output_handle, value, version=None):
        check.inst_param(dagster_type, "dagster_type", DagsterType)
        check.inst_param(step_output_handle, "step_output_handle", StepOutputHandle)
        paths = self._get_paths(step_output_handle)
        check.param_invariant(len(paths) > 0, "paths")

        key = self.object_store.key_for_paths([self.root] + paths)

        try:
            uri = self.object_store.set_object(
                key, value, serialization_strategy=dagster_type.serialization_strategy
            )
        except Exception as error:  # pylint: disable=broad-except
            raise DagsterObjectStoreError(
                _object_store_operation_error_message(
                    step_output_handle=step_output_handle,
                    op=ObjectStoreOperationType.SET_OBJECT,
                    object_store_name=self.object_store.name,
                    serialization_strategy_name=dagster_type.serialization_strategy.name,
                )
            ) from error

        return ObjectStoreOperation(
            op=ObjectStoreOperationType.SET_OBJECT,
            key=uri,
            dest_key=None,
            obj=value,
            serialization_strategy_name=dagster_type.serialization_strategy.name,
            object_store_name=self.object_store.name,
            version=version,
        )
Esempio n. 5
0
def _set_intermediates(step_context, step_output, step_output_handle, output,
                       version):
    if step_output.asset_store_handle:
        # use asset_store if it's configured on provided by the user
        res = _set_addressable_asset(step_context, step_output_handle,
                                     step_output.asset_store_handle,
                                     output.value)

        if isinstance(res, AssetStoreOperation):
            yield DagsterEvent.asset_store_operation(step_context, res)
    else:
        res = step_context.intermediate_storage.set_intermediate(
            context=step_context,
            dagster_type=step_output.dagster_type,
            step_output_handle=step_output_handle,
            value=output.value,
            version=version,
        )

        if isinstance(res, ObjectStoreOperation):
            yield DagsterEvent.object_store_operation(
                step_context,
                ObjectStoreOperation.serializable(
                    res, value_name=output.output_name),
            )
Esempio n. 6
0
    def rm_object(self, key):
        check.str_param(key, 'key')
        check.param_invariant(len(key) > 0, 'key')

        def delete_for_results(store, results):
            store.s3.delete_objects(
                Bucket=store.bucket,
                Delete={'Objects': [{'Key': result['Key']} for result in results['Contents']]},
            )

        if self.has_object(key):
            results = self.s3.list_objects_v2(Bucket=self.bucket, Prefix=key)
            delete_for_results(self, results)

            continuation = results['IsTruncated']
            while continuation:
                continuation_token = results['NextContinuationToken']
                results = self.s3.list_objects_v2(
                    Bucket=self.bucket, Prefix=key, ContinuationToken=continuation_token
                )
                delete_for_results(self, results)
                continuation = results['IsTruncated']

        return ObjectStoreOperation(
            op=ObjectStoreOperationType.RM_OBJECT,
            key=self.uri_for_key(key),
            dest_key=None,
            obj=None,
            serialization_strategy_name=None,
            object_store_name=self.name,
        )
Esempio n. 7
0
    def get_object(self, key, serialization_strategy=None):
        check.str_param(key, 'key')
        check.param_invariant(len(key) > 0, 'key')
        check.inst_param(
            serialization_strategy, 'serialization_strategy', SerializationStrategy
        )  # cannot be none here

        # FIXME we need better error handling for object store
        obj = serialization_strategy.deserialize(
            BytesIO(self.s3.get_object(Bucket=self.bucket, Key=key)['Body'].read())
            if serialization_strategy.read_mode == 'rb'
            else StringIO(
                self.s3.get_object(Bucket=self.bucket, Key=key)['Body']
                .read()
                .decode(serialization_strategy.encoding)
            )
        )
        return ObjectStoreOperation(
            op=ObjectStoreOperationType.GET_OBJECT,
            key=self.uri_for_key(key),
            dest_key=None,
            obj=obj,
            serialization_strategy_name=serialization_strategy.name,
            object_store_name=self.name,
        )
Esempio n. 8
0
    def set_intermediate_object(self,
                                dagster_type,
                                step_output_handle,
                                value,
                                version=None):
        check.inst_param(dagster_type, "dagster_type", DagsterType)
        check.inst_param(step_output_handle, "step_output_handle",
                         StepOutputHandle)
        paths = self._get_paths(step_output_handle)
        check.param_invariant(len(paths) > 0, "paths")

        key = self.object_store.key_for_paths([self.root] + paths)
        uri = self.object_store.set_object(
            key,
            value,
            serialization_strategy=dagster_type.serialization_strategy)
        return ObjectStoreOperation(
            op=ObjectStoreOperationType.SET_OBJECT,
            key=uri,
            dest_key=None,
            obj=value,
            serialization_strategy_name=dagster_type.serialization_strategy.
            name,
            object_store_name=self.object_store.name,
            version=version,
        )
Esempio n. 9
0
    def set_object(self, key, obj, serialization_strategy=None):
        check.str_param(key, "key")

        logging.info("Writing S3 object at: " + self.uri_for_key(key))

        # cannot check obj since could be arbitrary Python object
        check.inst_param(serialization_strategy, "serialization_strategy",
                         SerializationStrategy)  # cannot be none here

        if self.has_object(key):
            logging.warning("Removing existing S3 key: {key}".format(key=key))
            self.rm_object(key)

        with BytesIO() as bytes_io:
            if serialization_strategy.write_mode == "w" and sys.version_info >= (
                    3, 0):
                with StringIO() as string_io:
                    string_io = StringIO()
                    serialization_strategy.serialize(obj, string_io)
                    string_io.seek(0)
                    bytes_io.write(string_io.read().encode("utf-8"))
            else:
                serialization_strategy.serialize(obj, bytes_io)
            bytes_io.seek(0)
            self.s3.put_object(Bucket=self.bucket, Key=key, Body=bytes_io)

        return ObjectStoreOperation(
            op=ObjectStoreOperationType.SET_OBJECT,
            key=self.uri_for_key(key),
            dest_key=None,
            obj=obj,
            serialization_strategy_name=serialization_strategy.name,
            object_store_name=self.name,
        )
Esempio n. 10
0
def _set_intermediates(step_context, step_output, step_output_handle, output,
                       version):
    if step_context.using_asset_store(step_output_handle):
        res = _set_addressable_asset(step_context, step_output_handle,
                                     output.value)
        for evt in res:
            if isinstance(evt, AssetStoreOperation):
                yield DagsterEvent.asset_store_operation(step_context, evt)
            if isinstance(evt, AssetMaterialization):
                yield DagsterEvent.step_materialization(step_context, evt)
    else:
        res = step_context.intermediate_storage.set_intermediate(
            context=step_context,
            dagster_type=step_output.dagster_type,
            step_output_handle=step_output_handle,
            value=output.value,
            version=version,
        )

        if isinstance(res, ObjectStoreOperation):
            yield DagsterEvent.object_store_operation(
                step_context,
                ObjectStoreOperation.serializable(
                    res, value_name=output.output_name),
            )
Esempio n. 11
0
    def set_object(self, key, obj, serialization_strategy=None):
        check.str_param(key, 'key')

        logging.info('Writing GCS object at: ' + self.uri_for_key(key))

        # cannot check obj since could be arbitrary Python object
        check.inst_param(serialization_strategy, 'serialization_strategy',
                         SerializationStrategy)  # cannot be none here

        if self.has_object(key):
            logging.warning('Removing existing GCS key: {key}'.format(key=key))
            self.rm_object(key)

        with BytesIO() as bytes_io:
            serialization_strategy.serialize(obj, bytes_io)
            bytes_io.seek(0)
            self.bucket_obj.blob(key).upload_from_file(bytes_io)

        return ObjectStoreOperation(
            op=ObjectStoreOperationType.SET_OBJECT,
            key=self.uri_for_key(key),
            dest_key=None,
            obj=obj,
            serialization_strategy_name=serialization_strategy.name,
            object_store_name=self.name,
        )
Esempio n. 12
0
    def get_object(self, key, serialization_strategy=None):
        check.str_param(key, 'key')
        check.param_invariant(len(key) > 0, 'key')
        check.inst_param(serialization_strategy, 'serialization_strategy',
                         SerializationStrategy)  # cannot be none here

        if serialization_strategy.read_mode == 'rb':
            file_obj = BytesIO()
            self.bucket_obj.blob(key).download_to_file(file_obj)
        else:
            file_obj = StringIO(
                self.bucket_obj.blob(key).download_as_string().decode(
                    serialization_strategy.encoding))

        file_obj.seek(0)

        obj = serialization_strategy.deserialize(file_obj)
        return ObjectStoreOperation(
            op=ObjectStoreOperationType.GET_OBJECT,
            key=self.uri_for_key(key),
            dest_key=None,
            obj=obj,
            serialization_strategy_name=serialization_strategy.name,
            object_store_name=self.name,
        )
Esempio n. 13
0
def _set_objects(step_context, step_output, step_output_handle, output):
    from dagster.core.storage.asset_store import AssetStoreHandle

    output_def = step_output.output_def
    output_manager = step_context.get_output_manager(step_output_handle)
    output_context = step_context.get_output_context(step_output_handle)
    materializations = output_manager.handle_output(output_context,
                                                    output.value)

    # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043
    if isinstance(materializations, ObjectStoreOperation):
        yield DagsterEvent.object_store_operation(
            step_context,
            ObjectStoreOperation.serializable(
                materializations, value_name=step_output_handle.output_name),
        )
    else:
        for evt in _materializations_to_events(step_context,
                                               step_output_handle,
                                               materializations):
            yield evt

        # SET_ASSET operation by AssetStore
        yield DagsterEvent.asset_store_operation(
            step_context,
            AssetStoreOperation(
                AssetStoreOperationType.SET_ASSET,
                step_output_handle,
                AssetStoreHandle(output_def.manager_key, output_def.metadata),
            ),
        )
Esempio n. 14
0
def _set_intermediates(step_context, step_output, step_output_handle, output):
    res = step_context.intermediate_storage.set_intermediate(
        context=step_context,
        dagster_type=step_output.dagster_type,
        step_output_handle=step_output_handle,
        value=output.value,
    )
    if isinstance(res, ObjectStoreOperation):
        yield DagsterEvent.object_store_operation(
            step_context,
            ObjectStoreOperation.serializable(res,
                                              value_name=output.output_name))
Esempio n. 15
0
    def cp_object(self, src, dst):
        check.str_param(src, 'src')
        check.str_param(dst, 'dst')

        source_blob = self.bucket_obj.blob(src)
        self.bucket_obj.copy_blob(source_blob, self.bucket_obj, dst)

        return ObjectStoreOperation(
            op=ObjectStoreOperationType.CP_OBJECT,
            key=self.uri_for_key(src),
            dest_key=self.uri_for_key(dst),
            object_store_name=self.name,
        )
Esempio n. 16
0
    def cp_object(self, src, dst):
        check.str_param(src, 'src')
        check.str_param(dst, 'dst')

        self.s3.copy_object(
            Bucket=self.bucket, Key=dst, CopySource={'Bucket': self.bucket, 'Key': src}
        )

        return ObjectStoreOperation(
            op=ObjectStoreOperationType.CP_OBJECT,
            key=self.uri_for_key(src),
            dest_key=self.uri_for_key(dst),
            object_store_name=self.name,
        )
Esempio n. 17
0
    def rm_object(self, key):
        check.str_param(key, 'key')
        check.param_invariant(len(key) > 0, 'key')

        if self.bucket_obj.blob(key).exists():
            self.bucket_obj.blob(key).delete()

        return ObjectStoreOperation(
            op=ObjectStoreOperationType.RM_OBJECT,
            key=self.uri_for_key(key),
            dest_key=None,
            obj=None,
            serialization_strategy_name=None,
            object_store_name=self.name,
        )
Esempio n. 18
0
    def get_object(self, key, serialization_strategy=DEFAULT_SERIALIZATION_STRATEGY):
        check.str_param(key, "key")
        check.param_invariant(len(key) > 0, "key")
        check.inst_param(serialization_strategy, "serialization_strategy", SerializationStrategy)

        obj = serialization_strategy.deserialize_from_file(key)

        return ObjectStoreOperation(
            op=ObjectStoreOperationType.GET_OBJECT,
            key=key,
            dest_key=None,
            obj=obj,
            serialization_strategy_name=serialization_strategy.name,
            object_store_name=self.name,
        )
Esempio n. 19
0
    def rm_object(self, key):
        check.str_param(key, "key")
        check.param_invariant(len(key) > 0, "key")

        # This operates recursively already so is nice and simple.
        self.file_system_client.delete_file(key)

        return ObjectStoreOperation(
            op=ObjectStoreOperationType.RM_OBJECT,
            key=self.uri_for_key(key),
            dest_key=None,
            obj=None,
            serialization_strategy_name=None,
            object_store_name=self.name,
        )
Esempio n. 20
0
    def get_object(self, key, serialization_strategy=None):
        check.str_param(key, 'key')
        check.param_invariant(len(key) > 0, 'key')

        # FIXME we need better error handling for object store
        obj = serialization_strategy.deserialize(
            BytesIO(self.s3.get_object(Bucket=self.bucket, Key=key)['Body'].read())
        )
        return ObjectStoreOperation(
            op=ObjectStoreOperationType.GET_OBJECT,
            key=self.uri_for_key(key),
            dest_key=None,
            obj=obj,
            serialization_strategy_name=serialization_strategy.name,
            object_store_name=self.name,
        )
Esempio n. 21
0
    def copy_intermediate_from_run(self, context, run_id, step_output_handle):
        check.opt_inst_param(context, "context", SystemExecutionContext)
        check.str_param(run_id, "run_id")
        check.inst_param(step_output_handle, "step_output_handle", StepOutputHandle)
        paths = self._get_paths(step_output_handle)

        src = self.object_store.key_for_paths([self.root_for_run_id(run_id)] + paths)
        dst = self.object_store.key_for_paths([self.root] + paths)

        src_uri, dst_uri = self.object_store.cp_object(src, dst)
        return ObjectStoreOperation(
            op=ObjectStoreOperationType.CP_OBJECT,
            key=src_uri,
            dest_key=dst_uri,
            object_store_name=self.object_store.name,
        )
Esempio n. 22
0
    def rm_intermediate(self, context, step_output_handle):
        check.opt_inst_param(context, "context", SystemExecutionContext)
        check.inst_param(step_output_handle, "step_output_handle", StepOutputHandle)
        paths = self._get_paths(step_output_handle)
        check.param_invariant(len(paths) > 0, "paths")
        key = self.object_store.key_for_paths([self.root] + paths)

        uri = self.object_store.rm_object(key)
        return ObjectStoreOperation(
            op=ObjectStoreOperationType.RM_OBJECT,
            key=uri,
            dest_key=None,
            obj=None,
            serialization_strategy_name=None,
            object_store_name=self.object_store.name,
        )
Esempio n. 23
0
    def get_object(self, key, serialization_strategy=None):
        check.str_param(key, 'key')
        check.param_invariant(len(key) > 0, 'key')

        file_obj = BytesIO()
        self.bucket_obj.blob(key).download_to_file(file_obj)
        file_obj.seek(0)

        obj = serialization_strategy.deserialize(file_obj)
        return ObjectStoreOperation(
            op=ObjectStoreOperationType.GET_OBJECT,
            key=self.uri_for_key(key),
            dest_key=None,
            obj=obj,
            serialization_strategy_name=serialization_strategy.name,
            object_store_name=self.name,
        )
Esempio n. 24
0
def copy_required_intermediates_for_execution(pipeline_context,
                                              execution_plan):
    """
    Uses the intermediates manager to copy intermediates from the previous run that apply to the
    current execution plan, and yields the corresponding events
    """
    check.inst_param(pipeline_context, "pipeline_context",
                     SystemExecutionContext)
    check.inst_param(execution_plan, "execution_plan", ExecutionPlan)
    parent_run_id = pipeline_context.pipeline_run.parent_run_id

    if not parent_run_id:
        return

    parent_run_logs = pipeline_context.instance.all_logs(parent_run_id)

    output_handles_for_current_run = output_handles_from_execution_plan(
        execution_plan)
    output_handles_from_previous_run = output_handles_from_event_logs(
        parent_run_logs)
    output_handles_to_copy = output_handles_for_current_run.intersection(
        output_handles_from_previous_run)
    output_handles_to_copy_by_step = defaultdict(list)
    for handle in output_handles_to_copy:
        output_handles_to_copy_by_step[handle.step_key].append(handle)

    intermediate_storage = pipeline_context.intermediate_storage
    for step in execution_plan.get_all_steps_in_topo_order():
        handles_to_copy = output_handles_to_copy_by_step.get(step.key, [])

        # exit early to avoid trying to make a context from an UnresolvedExecutionStep
        if not handles_to_copy:
            continue

        step_context = pipeline_context.for_step(step)
        for handle in handles_to_copy:
            if intermediate_storage.has_intermediate(pipeline_context, handle):
                continue

            operation = intermediate_storage.copy_intermediate_from_run(
                pipeline_context, parent_run_id, handle)
            yield DagsterEvent.object_store_operation(
                step_context,
                ObjectStoreOperation.serializable(
                    operation, value_name=handle.output_name),
            )
Esempio n. 25
0
    def rm_object(self, key):
        check.str_param(key, 'key')
        check.param_invariant(len(key) > 0, 'key')

        if self.has_object(key):
            if os.path.isfile(key):
                os.unlink(key)
            elif os.path.isdir(key):
                shutil.rmtree(key)

        return ObjectStoreOperation(
            op=ObjectStoreOperationType.RM_OBJECT,
            key=key,
            dest_key=None,
            obj=None,
            serialization_strategy_name=None,
            object_store_name=self.name,
        )
Esempio n. 26
0
def _set_objects(step_context, step_output, step_output_handle, output):
    from dagster.core.storage.asset_store import AssetStoreHandle

    output_def = step_output.output_def
    output_manager = step_context.get_output_manager(step_output_handle)
    output_context = step_context.get_output_context(step_output_handle)
    with user_code_error_boundary(
            DagsterExecutionHandleOutputError,
            control_flow_exceptions=[Failure, RetryRequested],
            msg_fn=lambda:
        (f"Error occurred during the the handling of step output:"
         f'    step key: "{step_context.step.key}"'
         f'    output name: "{output_context.name}"'),
            step_key=step_context.step.key,
            output_name=output_context.name,
    ):
        materializations = output_manager.handle_output(
            output_context, output.value)

    # TODO yuhan retire ObjectStoreOperation https://github.com/dagster-io/dagster/issues/3043
    if isinstance(materializations, ObjectStoreOperation):
        yield DagsterEvent.object_store_operation(
            step_context,
            ObjectStoreOperation.serializable(
                materializations, value_name=step_output_handle.output_name),
        )
    else:
        for evt in _materializations_to_events(step_context,
                                               step_output_handle,
                                               materializations):
            yield evt

        # SET_ASSET operation by AssetStore
        yield DagsterEvent.asset_store_operation(
            step_context,
            AssetStoreOperation.serializable(
                AssetStoreOperation(
                    AssetStoreOperationType.SET_ASSET,
                    step_output_handle,
                    AssetStoreHandle(output_def.manager_key,
                                     output_def.metadata),
                )),
        )
Esempio n. 27
0
    def get_object(self,
                   key,
                   serialization_strategy=DEFAULT_SERIALIZATION_STRATEGY):
        check.str_param(key, 'key')
        check.param_invariant(len(key) > 0, 'key')

        if serialization_strategy:
            obj = serialization_strategy.deserialize_from_file(key)
        else:
            with open(key, 'rb') as f:
                obj = f.read()

        return ObjectStoreOperation(
            op=ObjectStoreOperationType.GET_OBJECT,
            key=key,
            dest_key=None,
            obj=obj,
            serialization_strategy_name=serialization_strategy.name,
            object_store_name=self.name,
        )
Esempio n. 28
0
    def get_object(self, key, serialization_strategy=None):
        check.str_param(key, "key")
        check.param_invariant(len(key) > 0, "key")
        check.inst_param(serialization_strategy, "serialization_strategy",
                         SerializationStrategy)  # cannot be none here

        # FIXME we need better error handling for object store
        file = self.file_system_client.get_file_client(key)
        stream = file.download_file()
        obj = serialization_strategy.deserialize(
            BytesIO(stream.readall()) if serialization_strategy.read_mode ==
            "rb" else StringIO(stream.readall().decode(serialization_strategy.
                                                       encoding)))
        return ObjectStoreOperation(
            op=ObjectStoreOperationType.GET_OBJECT,
            key=self.uri_for_key(key),
            dest_key=None,
            obj=obj,
            serialization_strategy_name=serialization_strategy.name,
            object_store_name=self.name,
        )
Esempio n. 29
0
    def cp_object(self, src, dst):
        check.invariant(not os.path.exists(dst), "Path already exists {}".format(dst))

        # Ensure output path exists
        mkdir_p(os.path.dirname(dst))

        if os.path.isfile(src):
            shutil.copy(src, dst)
        elif os.path.isdir(src):
            shutil.copytree(src, dst)
        else:
            check.failed("should not get here")

        return ObjectStoreOperation(
            op=ObjectStoreOperationType.CP_OBJECT,
            key=src,
            dest_key=dst,
            obj=None,
            serialization_strategy_name=None,
            object_store_name=self.name,
        )
Esempio n. 30
0
    def set_object(self, key, obj, serialization_strategy=None):
        check.str_param(key, "key")

        logging.info("Writing ADLS2 object at: " + self.uri_for_key(key))

        # cannot check obj since could be arbitrary Python object
        check.inst_param(serialization_strategy, "serialization_strategy",
                         SerializationStrategy)  # cannot be none here

        if self.has_object(key):
            logging.warning(
                "Removing existing ADLS2 key: {key}".format(key=key))
            self.rm_object(key)

        file = self.file_system_client.create_file(key)
        with file.acquire_lease(self.lease_duration) as lease:
            with BytesIO() as bytes_io:
                if serialization_strategy.write_mode == "w" and sys.version_info >= (
                        3, 0):
                    with StringIO() as string_io:
                        string_io = StringIO()
                        serialization_strategy.serialize(obj, string_io)
                        string_io.seek(0)
                        bytes_io.write(string_io.read().encode("utf-8"))
                else:
                    serialization_strategy.serialize(obj, bytes_io)
                bytes_io.seek(0)
                file.upload_data(bytes_io, lease=lease, overwrite=True)

        return ObjectStoreOperation(
            op=ObjectStoreOperationType.SET_OBJECT,
            key=self.uri_for_key(key),
            dest_key=None,
            obj=obj,
            serialization_strategy_name=serialization_strategy.name,
            object_store_name=self.name,
        )