Example #1
0
    def upload_artifact(self,
                        name,
                        artifact_object=None,
                        metadata=None,
                        preview=None,
                        delete_after_upload=False,
                        auto_pickle=True):
        # type: (str, Optional[object], Optional[dict], Optional[str], bool, bool) -> bool
        if not Session.check_min_api_version('2.3'):
            LoggerRoot.get_base_logger().warning(
                'Artifacts not supported by your TRAINS-server version, '
                'please upgrade to the latest server version')
            return False

        if name in self._artifacts_container:
            raise ValueError(
                "Artifact by the name of {} is already registered, use register_artifact"
                .format(name))

        # cast preview to string
        if preview:
            preview = str(preview)

        # convert string to object if try is a file/folder (dont try to serialize long texts
        if isinstance(artifact_object,
                      six.string_types) and len(artifact_object) < 2048:
            # noinspection PyBroadException
            try:
                artifact_path = Path(artifact_object)
                if artifact_path.exists():
                    artifact_object = artifact_path
                elif '*' in artifact_object or '?' in artifact_object:
                    # hackish, detect wildcard in tr files
                    folder = Path('').joinpath(*artifact_path.parts[:-1])
                    if folder.is_dir() and folder.parts:
                        wildcard = artifact_path.parts[-1]
                        if list(Path(folder).rglob(wildcard)):
                            artifact_object = artifact_path
            except Exception:
                pass

        artifact_type_data = tasks.ArtifactTypeData()
        artifact_type_data.preview = ''
        override_filename_in_uri = None
        override_filename_ext_in_uri = None
        uri = None
        if np and isinstance(artifact_object, np.ndarray):
            artifact_type = 'numpy'
            artifact_type_data.content_type = 'application/numpy'
            artifact_type_data.preview = preview or str(
                artifact_object.__repr__())
            override_filename_ext_in_uri = '.npz'
            override_filename_in_uri = name + override_filename_ext_in_uri
            fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.',
                                         suffix=override_filename_ext_in_uri)
            os.close(fd)
            np.savez_compressed(local_filename, **{name: artifact_object})
            delete_after_upload = True
        elif pd and isinstance(artifact_object, pd.DataFrame):
            artifact_type = 'pandas'
            artifact_type_data.content_type = 'text/csv'
            artifact_type_data.preview = preview or str(
                artifact_object.__repr__())
            override_filename_ext_in_uri = self._save_format
            override_filename_in_uri = name
            fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.',
                                         suffix=override_filename_ext_in_uri)
            os.close(fd)
            artifact_object.to_csv(local_filename,
                                   compression=self._compression)
            delete_after_upload = True
        elif isinstance(artifact_object, Image.Image):
            artifact_type = 'image'
            artifact_type_data.content_type = 'image/png'
            desc = str(artifact_object.__repr__())
            artifact_type_data.preview = preview or desc[1:desc.find(' at ')]
            override_filename_ext_in_uri = '.png'
            override_filename_in_uri = name + override_filename_ext_in_uri
            fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.',
                                         suffix=override_filename_ext_in_uri)
            os.close(fd)
            artifact_object.save(local_filename)
            delete_after_upload = True
        elif isinstance(artifact_object, dict):
            artifact_type = 'JSON'
            artifact_type_data.content_type = 'application/json'
            preview = preview or json.dumps(
                artifact_object, sort_keys=True, indent=4)
            override_filename_ext_in_uri = '.json'
            override_filename_in_uri = name + override_filename_ext_in_uri
            fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.',
                                         suffix=override_filename_ext_in_uri)
            os.write(fd, bytes(preview.encode()))
            os.close(fd)
            if len(preview) < self.max_preview_size_bytes:
                artifact_type_data.preview = preview
            else:
                artifact_type_data.preview = '# full json too large to store, storing first {}kb\n{}'.format(
                    self.max_preview_size_bytes // 1024,
                    preview[:self.max_preview_size_bytes])

            delete_after_upload = True
        elif isinstance(artifact_object, (
                Path,
                pathlib_Path,
        ) if pathlib_Path is not None else (Path, )):
            # check if single file
            artifact_object = Path(artifact_object)

            artifact_object.expanduser().absolute()
            # noinspection PyBroadException
            try:
                create_zip_file = not artifact_object.is_file()
            except Exception:  # Hack for windows pathlib2 bug, is_file isn't valid.
                create_zip_file = True
            else:  # We assume that this is not Windows os
                if artifact_object.is_dir():
                    # change to wildcard
                    artifact_object /= '*'

            if create_zip_file:
                folder = Path('').joinpath(*artifact_object.parts[:-1])
                if not folder.is_dir() or not folder.parts:
                    raise ValueError(
                        "Artifact file/folder '{}' could not be found".format(
                            artifact_object.as_posix()))

                wildcard = artifact_object.parts[-1]
                files = list(Path(folder).rglob(wildcard))
                override_filename_ext_in_uri = '.zip'
                override_filename_in_uri = folder.parts[
                    -1] + override_filename_ext_in_uri
                fd, zip_file = mkstemp(
                    prefix=quote(folder.parts[-1], safe="") + '.',
                    suffix=override_filename_ext_in_uri)
                try:
                    artifact_type_data.content_type = 'application/zip'
                    archive_preview = 'Archive content {}:\n'.format(
                        artifact_object.as_posix())

                    with ZipFile(zip_file,
                                 'w',
                                 allowZip64=True,
                                 compression=ZIP_DEFLATED) as zf:
                        for filename in sorted(files):
                            if filename.is_file():
                                relative_file_name = filename.relative_to(
                                    folder).as_posix()
                                archive_preview += '{} - {}\n'.format(
                                    relative_file_name,
                                    humanfriendly.format_size(
                                        filename.stat().st_size))
                                zf.write(filename.as_posix(),
                                         arcname=relative_file_name)
                except Exception as e:
                    # failed uploading folder:
                    LoggerRoot.get_base_logger().warning(
                        'Exception {}\nFailed zipping artifact folder {}'.
                        format(folder, e))
                    return False
                finally:
                    os.close(fd)
                artifact_type_data.preview = preview or archive_preview
                artifact_object = zip_file
                artifact_type = 'archive'
                artifact_type_data.content_type = mimetypes.guess_type(
                    artifact_object)[0]
                local_filename = artifact_object
                delete_after_upload = True
            else:
                if not artifact_object.is_file():
                    raise ValueError(
                        "Artifact file '{}' could not be found".format(
                            artifact_object.as_posix()))

                override_filename_in_uri = artifact_object.parts[-1]
                artifact_type_data.preview = preview or '{} - {}\n'.format(
                    artifact_object,
                    humanfriendly.format_size(artifact_object.stat().st_size))
                artifact_object = artifact_object.as_posix()
                artifact_type = 'custom'
                artifact_type_data.content_type = mimetypes.guess_type(
                    artifact_object)[0]
                local_filename = artifact_object
        elif (isinstance(artifact_object, six.string_types)
              and len(artifact_object) < 4096
              and urlparse(artifact_object).scheme in remote_driver_schemes):
            # we should not upload this, just register
            local_filename = None
            uri = artifact_object
            artifact_type = 'custom'
            artifact_type_data.content_type = mimetypes.guess_type(
                artifact_object)[0]
        elif isinstance(artifact_object, six.string_types):
            # if we got here, we should store it as text file.
            artifact_type = 'string'
            artifact_type_data.content_type = 'text/plain'
            if preview:
                artifact_type_data.preview = preview
            elif len(artifact_object) < self.max_preview_size_bytes:
                artifact_type_data.preview = artifact_object
            else:
                artifact_type_data.preview = '# full text too large to store, storing first {}kb\n{}'.format(
                    self.max_preview_size_bytes // 1024,
                    artifact_object[:self.max_preview_size_bytes])
            delete_after_upload = True
            override_filename_ext_in_uri = '.txt'
            override_filename_in_uri = name + override_filename_ext_in_uri
            fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.',
                                         suffix=override_filename_ext_in_uri)
            os.close(fd)
            # noinspection PyBroadException
            try:
                with open(local_filename, 'wt') as f:
                    f.write(artifact_object)
            except Exception:
                # cleanup and raise exception
                os.unlink(local_filename)
                raise
        elif auto_pickle:
            # if we are here it means we do not know what to do with the object, so we serialize it with pickle.
            artifact_type = 'pickle'
            artifact_type_data.content_type = 'application/pickle'
            # noinspection PyBroadException
            try:
                artifact_type_data.preview = preview or str(
                    artifact_object.__repr__())[:self.max_preview_size_bytes]
            except Exception:
                artifact_type_data.preview = preview or ''
            delete_after_upload = True
            override_filename_ext_in_uri = '.pkl'
            override_filename_in_uri = name + override_filename_ext_in_uri
            fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.',
                                         suffix=override_filename_ext_in_uri)
            os.close(fd)
            # noinspection PyBroadException
            try:
                with open(local_filename, 'wb') as f:
                    pickle.dump(artifact_object, f)
            except Exception:
                # cleanup and raise exception
                os.unlink(local_filename)
                raise
        else:
            raise ValueError("Artifact type {} not supported".format(
                type(artifact_object)))

        # remove from existing list, if exists
        for artifact in self._task_artifact_list:
            if artifact.key == name:
                if artifact.type == self._pd_artifact_type:
                    raise ValueError(
                        "Artifact of name {} already registered, "
                        "use register_artifact instead".format(name))

                self._task_artifact_list.remove(artifact)
                break

        if not local_filename:
            file_size = None
            file_hash = None
        else:
            # check that the file to upload exists
            local_filename = Path(local_filename).absolute()
            if not local_filename.exists() or not local_filename.is_file():
                LoggerRoot.get_base_logger().warning(
                    'Artifact upload failed, cannot find file {}'.format(
                        local_filename.as_posix()))
                return False

            file_hash, _ = self.sha256sum(local_filename.as_posix())
            file_size = local_filename.stat().st_size

            uri = self._upload_local_file(
                local_filename,
                name,
                delete_after_upload=delete_after_upload,
                override_filename=override_filename_in_uri,
                override_filename_ext=override_filename_ext_in_uri)

        timestamp = int(time())

        artifact = tasks.Artifact(
            key=name,
            type=artifact_type,
            uri=uri,
            content_size=file_size,
            hash=file_hash,
            timestamp=timestamp,
            type_data=artifact_type_data,
            display_data=[(str(k), str(v))
                          for k, v in metadata.items()] if metadata else None)

        # update task artifacts
        with self._task_edit_lock:
            self._task_artifact_list.append(artifact)
            self._task.set_artifacts(self._task_artifact_list)

        return True