Esempio n. 1
0
    def save(
        self,
        type,
        name,
        metadata=None,
        description=None,
        aliases=None,
        labels=None,
        use_after_commit=False,
    ):
        aliases = aliases or []
        alias_specs = []
        for alias in aliases:
            if ":" in alias:
                # Users can explicitly alias this artifact to names
                # other than the primary one passed in by using the
                # 'secondaryName:alias' notation.
                idx = alias.index(":")
                artifact_collection_name = alias[:idx - 1]
                tag = alias[idx + 1:]
            else:
                artifact_collection_name = name
                tag = alias
            alias_specs.append({
                "artifactCollectionName": artifact_collection_name,
                "alias": tag,
            })
        """Returns the server artifact."""
        self._server_artifact, latest = self._api.create_artifact(
            type,
            name,
            self._digest,
            metadata=metadata,
            aliases=alias_specs,
            labels=labels,
            description=description,
            is_user_created=self._is_user_created,
        )

        # TODO(artifacts):
        #   if it's committed, all is good. If it's committing, just moving ahead isn't necessarily
        #   correct. It may be better to poll until it's committed or failed, and then decided what to
        #   do
        artifact_id = self._server_artifact["id"]
        latest_artifact_id = latest["id"] if latest else None
        if (self._server_artifact["state"] == "COMMITTED"
                or self._server_artifact["state"] == "COMMITTING"):
            # TODO: update aliases, labels, description etc?
            if use_after_commit:
                self._api.use_artifact(artifact_id)
            return self._server_artifact
        elif (self._server_artifact["state"] != "PENDING"
              and self._server_artifact["state"] != "DELETED"):
            raise Exception('Unknown artifact state "{}"'.format(
                self._server_artifact["state"]))

        self._api.create_artifact_manifest(
            "wandb_manifest.json",
            "",
            artifact_id,
            base_artifact_id=latest_artifact_id,
            include_upload=False,
        )

        step_prepare = wandb.filesync.step_prepare.StepPrepare(
            self._api, 0.1, 0.01, 1000)  # TODO: params
        step_prepare.start()

        # Upload Artifact "L1" files, the actual artifact contents
        self._file_pusher.store_manifest_files(
            self._manifest,
            artifact_id,
            lambda entry, progress_callback: self._manifest.storage_policy.
            store_file(artifact_id,
                       entry,
                       step_prepare,
                       progress_callback=progress_callback),
        )

        def before_commit():
            with tempfile.NamedTemporaryFile("w+",
                                             suffix=".json",
                                             delete=False) as fp:
                path = os.path.abspath(fp.name)
                json.dump(self._manifest.to_manifest_json(), fp, indent=4)
            digest = wandb.util.md5_file(path)
            # We're duplicating the file upload logic a little, which isn't great.
            resp = self._api.create_artifact_manifest(
                "wandb_manifest.json",
                digest,
                artifact_id,
                base_artifact_id=latest_artifact_id,
            )
            upload_url = resp["uploadUrl"]
            upload_headers = resp["uploadHeaders"]
            extra_headers = {}
            for upload_header in upload_headers:
                key, val = upload_header.split(":", 1)
                extra_headers[key] = val
            with open(path, "rb") as fp:
                self._api.upload_file_retry(upload_url,
                                            fp,
                                            extra_headers=extra_headers)

        def on_commit():
            if use_after_commit:
                self._api.use_artifact(artifact_id)
            step_prepare.shutdown()

        # This will queue the commit. It will only happen after all the file uploads are done
        self._file_pusher.commit_artifact(artifact_id,
                                          before_commit=before_commit,
                                          on_commit=on_commit)
        return self._server_artifact
Esempio n. 2
0
    def save(
        self,
        type,
        name,
        distributed_id = None,
        finalize = True,
        metadata = None,
        description = None,
        aliases = None,
        labels = None,
        use_after_commit = False,
        incremental = False,
    ):
        aliases = aliases or []
        alias_specs = []
        for alias in aliases:
            if ":" in alias:
                # Users can explicitly alias this artifact to names
                # other than the primary one passed in by using the
                # 'secondaryName:alias' notation.
                idx = alias.index(":")
                artifact_collection_name = alias[: idx - 1]
                tag = alias[idx + 1 :]
            else:
                artifact_collection_name = name
                tag = alias
            alias_specs.append(
                {"artifactCollectionName": artifact_collection_name, "alias": tag,}
            )

        """Returns the server artifact."""
        self._server_artifact, latest = self._api.create_artifact(
            type,
            name,
            self._digest,
            metadata=metadata,
            aliases=alias_specs,
            labels=labels,
            description=description,
            is_user_created=self._is_user_created,
            distributed_id=distributed_id,
        )

        # TODO(artifacts):
        #   if it's committed, all is good. If it's committing, just moving ahead isn't necessarily
        #   correct. It may be better to poll until it's committed or failed, and then decided what to
        #   do
        assert self._server_artifact is not None  # mypy optionality unwrapper
        artifact_id = self._server_artifact["id"]
        latest_artifact_id = latest["id"] if latest else None
        if (
            self._server_artifact["state"] == "COMMITTED"
            or self._server_artifact["state"] == "COMMITTING"
        ):
            # TODO: update aliases, labels, description etc?
            if use_after_commit:
                self._api.use_artifact(artifact_id)
            return self._server_artifact
        elif (
            self._server_artifact["state"] != "PENDING"
            and self._server_artifact["state"] != "DELETED"
        ):
            raise Exception(
                'Unknown artifact state "{}"'.format(self._server_artifact["state"])
            )

        manifest_type = "FULL"
        manifest_filename = "wandb_manifest.json"
        if incremental:
            manifest_type = "INCREMENTAL"
            manifest_filename = "wandb_manifest.incremental.json"
        elif distributed_id:
            manifest_type = "PATCH"
            manifest_filename = "wandb_manifest.patch.json"
        artifact_manifest_id, _ = self._api.create_artifact_manifest(
            manifest_filename,
            "",
            artifact_id,
            base_artifact_id=latest_artifact_id,
            include_upload=False,
            type=manifest_type,
        )

        step_prepare = wandb.filesync.step_prepare.StepPrepare(
            self._api, 0.1, 0.01, 1000
        )  # TODO: params
        step_prepare.start()

        # Upload Artifact "L1" files, the actual artifact contents
        self._file_pusher.store_manifest_files(
            self._manifest,
            artifact_id,
            lambda entry, progress_callback: self._manifest.storage_policy.store_file(
                artifact_id,
                artifact_manifest_id,
                entry,
                step_prepare,
                progress_callback=progress_callback,
            ),
        )

        commit_event = threading.Event()

        def before_commit():
            with tempfile.NamedTemporaryFile("w+", suffix=".json", delete=False) as fp:
                path = os.path.abspath(fp.name)
                json.dump(self._manifest.to_manifest_json(), fp, indent=4)
            digest = wandb.util.md5_file(path)
            if distributed_id or incremental:
                # If we're in the distributed flow, we want to update the
                # patch manifest we created with our finalized digest.
                _, resp = self._api.update_artifact_manifest(
                    artifact_manifest_id, digest=digest,
                )
            else:
                # In the regular flow, we can recreate the full manifest with the
                # updated digest.
                #
                # NOTE: We do this for backwards compatibility with older backends
                # that don't support the 'updateArtifactManifest' API.
                _, resp = self._api.create_artifact_manifest(
                    manifest_filename,
                    digest,
                    artifact_id,
                    base_artifact_id=latest_artifact_id,
                )

            # We're duplicating the file upload logic a little, which isn't great.
            upload_url = resp["uploadUrl"]
            upload_headers = resp["uploadHeaders"]
            extra_headers = {}
            for upload_header in upload_headers:
                key, val = upload_header.split(":", 1)
                extra_headers[key] = val
            with open(path, "rb") as fp:  # type: ignore
                self._api.upload_file_retry(upload_url, fp, extra_headers=extra_headers)

        def on_commit():
            if finalize and use_after_commit:
                self._api.use_artifact(artifact_id)
            step_prepare.shutdown()
            commit_event.set()

        # This will queue the commit. It will only happen after all the file uploads are done
        self._file_pusher.commit_artifact(
            artifact_id,
            finalize=finalize,
            before_commit=before_commit,
            on_commit=on_commit,
        )

        # Block until all artifact files are uploaded and the
        # artifact is committed.
        while not commit_event.is_set():
            commit_event.wait()

        return self._server_artifact
Esempio n. 3
0
    def save(
        self,
        type,
        name,
        metadata=None,
        description=None,
        aliases=None,
        labels=None,
        use_after_commit=False,
    ):
        aliases = aliases or []
        alias_specs = []
        for alias in aliases:
            if ":" in alias:
                # Users can explicitly alias this artifact to names
                # other than the primary one passed in by using the
                # 'secondaryName:alias' notation.
                idx = alias.index(":")
                artifact_collection_name = alias[:idx - 1]
                tag = alias[idx + 1:]
            else:
                artifact_collection_name = name
                tag = alias
            alias_specs.append({
                "artifactCollectionName": artifact_collection_name,
                "alias": tag,
            })
        """Returns the server artifact."""
        self._server_artifact = self._api.create_artifact(
            type,
            name,
            self._digest,
            metadata=metadata,
            aliases=alias_specs,
            labels=labels,
            description=description,
            is_user_created=self._is_user_created,
        )
        # TODO(artifacts):
        #   if it's committed, all is good. If it's committing, just moving ahead isn't necessarily
        #   correct. It may be better to poll until it's committed or failed, and then decided what to
        #   do
        artifact_id = self._server_artifact["id"]
        if (self._server_artifact["state"] == "COMMITTED"
                or self._server_artifact["state"] == "COMMITTING"):
            # TODO: update aliases, labels, description etc?
            if use_after_commit:
                self._api.use_artifact(artifact_id)
            return self._server_artifact
        elif (self._server_artifact["state"] != "PENDING"
              and self._server_artifact["state"] != "DELETED"):
            raise Exception('Unknown artifact state "{}"'.format(
                self._server_artifact["state"]))

        # Upload Artifact "L0" files. This should only be wandb_manifest.json. We need to use
        # the use_prepare_flow, so that the file entry is created in our database before the
        # upload to cloud storage commences
        with tempfile.NamedTemporaryFile("w+", suffix=".json",
                                         delete=False) as fp:
            json.dump(self._manifest.to_manifest_json(), fp, indent=4)
        self._file_pusher.file_changed(
            save_name="wandb_manifest.json",
            path=os.path.abspath(fp.name),
            artifact_id=artifact_id,
            use_prepare_flow=True,
        )

        step_prepare = wandb.filesync.step_prepare.StepPrepare(
            self._api, 0.1, 0.01, 1000)  # TODO: params
        step_prepare.start()

        # Upload Artifact "L1" files, the actual artifact contents
        self._file_pusher.store_manifest_files(
            self._manifest,
            artifact_id,
            lambda entry, progress_callback: self._manifest.storage_policy.
            store_file(artifact_id,
                       entry,
                       step_prepare,
                       progress_callback=progress_callback),
        )

        def on_commit():
            if use_after_commit:
                self._api.use_artifact(artifact_id)
            step_prepare.shutdown()

        # This will queue the commit. It will only happen after all the file uploads are done
        self._file_pusher.commit_artifact(artifact_id, on_commit=on_commit)
        return self._server_artifact