def test_add_reference_local_file(runner):
    with runner.isolated_filesystem():
        open("file1.txt", "w").write("hello")
        artifact = wandb.Artifact(type="dataset", name="my-arty")
        artifact.add_reference("file://file1.txt")

        assert artifact.digest == "a00c2239f036fb656c1dcbf9a32d89b4"
        manifest = artifact.manifest.to_manifest_json()
        assert manifest["contents"]["file1.txt"] == {
            "digest": "XUFAKrxLKna5cZ2REBfFkg==",
            "ref": "file://file1.txt",
            "size": 5,
        }
def test_add_reference_s3_no_checksum(runner):
    with runner.isolated_filesystem():
        open("file1.txt", "w").write("hello")
        artifact = wandb.Artifact(type="dataset", name="my-arty")
        # TODO: Should we require name in this case?
        artifact.add_reference("s3://my_bucket/file1.txt", checksum=False)

        assert artifact.digest == "52631787ed3579325f985dc0f2374040"
        manifest = artifact.manifest.to_manifest_json()
        assert manifest["contents"]["file1.txt"] == {
            "digest": "s3://my_bucket/file1.txt",
            "ref": "s3://my_bucket/file1.txt",
        }
Beispiel #3
0
 def log_model(self, path, opt, epoch, fitness_score, best_model=False):
     model_artifact = wandb.Artifact('run_' + wandb.run.id + '_model', type='model', metadata={
         'original_url': str(path),
         'epochs_trained': epoch + 1,
         'save period': opt.save_period,
         'project': opt.project,
         'total_epochs': opt.epochs,
         'fitness_score': fitness_score
     })
     model_artifact.add_file(str(path / 'last.pt'), name='last.pt')
     wandb.log_artifact(model_artifact,
                        aliases=['latest', 'epoch ' + str(self.current_epoch), 'best' if best_model else ''])
     print("Saving model artifact on epoch ", epoch + 1)
def test_add_named_dir(runner):
    with runner.isolated_filesystem():
        open("file1.txt", "w").write("hello")
        artifact = wandb.Artifact(type="dataset", name="my-arty")
        artifact.add_dir(".", name="subdir")

        assert artifact.digest == "a757208d042e8627b2970d72a71bed5b"

        manifest = artifact.manifest.to_manifest_json()
        assert manifest["contents"]["subdir/file1.txt"] == {
            "digest": "XUFAKrxLKna5cZ2REBfFkg==",
            "size": 5,
        }
def test_table_slice_reference_artifact():
    with wandb.init() as run:
        artifact = wandb.Artifact("table_data", "data")
        table = _make_wandb_table()
        artifact.add(table, "table")
        run.log_artifact(artifact)

    with wandb.init() as run:
        artifact_1 = run.use_artifact("table_data:latest")
        t1 = artifact_1.get("table")
        artifact = wandb.Artifact("intermediate_data", "data")
        i1 = wandb.Table(t1.columns, t1.data[:1])
        i2 = wandb.Table(t1.columns, t1.data[1:])
        artifact.add(i1, "table1")
        artifact.add(i2, "table2")
        run.log_artifact(artifact)

    with wandb.init() as run:
        artifact_2 = run.use_artifact("intermediate_data:latest")
        i1 = artifact_2.get("table1")
        i2 = artifact_2.get("table2")
        artifact = wandb.Artifact("reference_data", "data")
        table1 = wandb.Table(t1.columns, i1.data)
        table2 = wandb.Table(t1.columns, i2.data)
        artifact.add(table1, "table1")
        artifact.add(table2, "table2")
        run.log_artifact(artifact)

    _cleanup()
    with wandb.init() as run:
        artifact_3 = run.use_artifact("reference_data:latest")
        table1 = artifact_3.get("table1")
        table2 = artifact_3.get("table2")

    assert not os.path.isdir(os.path.join(artifact_2._default_root()))
    # assert os.path.islink(os.path.join(artifact_3._default_root(), "media", "images", "test.png"))
    # assert os.path.islink(os.path.join(artifact_3._default_root(), "media", "images", "test2.png"))
    assert t1.data[:1] == table1.data
    assert t1.data[1:] == table2.data
def test_artifact_finish_distributed_id(runner, live_mock_server,
                                        test_settings):
    # NOTE: these tests are against a mock server so they are testing the internal flows, but
    # not the actual data transfer.
    artifact_name = "distributed_artifact_{}".format(round(time.time()))
    group_name = "test_group_{}".format(round(np.random.rand()))
    artifact_type = "dataset"

    # Finish with a distributed_id should succeed
    run = wandb.init(settings=test_settings)
    artifact = wandb.Artifact(artifact_name, type=artifact_type)
    run.finish_artifact(artifact, distributed_id=group_name)
    run.finish()
Beispiel #7
0
def save_model_to_artifact(model, path, name, artifact_path="final_model"):
    """During a wandb.Run, save a model to path and as a wandb.Artifact
  and returns the resulting Artifact's complete identifier.

  See PyTorch documentation for details on saving and loading models:
    https://pytorch.org/tutorials/beginner/saving_loading_models.html
  """
    model_artifact = wandb.Artifact(name=name, type="trained-model")
    torch.save(model.state_dict(), path)
    model_artifact.add_file(path, artifact_path)
    wandb.run.log_artifact(model_artifact)

    return "/".join([wandb.run.entity, wandb.run.project, model_artifact.name])
    def on_train_end(self, trainer, pl_module):
        logger = get_wandb_logger(trainer=trainer)
        experiment = logger.experiment

        ckpts = wandb.Artifact("experiment-ckpts", type="checkpoints")

        if self.upload_best_only:
            ckpts.add_file(trainer.checkpoint_callback.best_model_path)
        else:
            for path in glob.glob(os.path.join(self.ckpt_dir, "**/*.ckpt"), recursive=True):
                ckpts.add_file(path)

        experiment.use_artifact(ckpts)
Beispiel #9
0
def store_model_artifact(path: str, name: str):
    """
    Store a tf model directory as a WandB artifact

    Args:
        path: Path to tensorflow saved model (e.g., /path/to/model.tf/)
        name: name for the WandB artifact.  If it already exists a new
            version is stored
    """

    model_artifact = wandb.Artifact(name, type="model")
    model_artifact.add_dir(path)
    wandb.log_artifact(model_artifact)
Beispiel #10
0
def test_add_reference_named_local_file(runner):
    with runner.isolated_filesystem():
        open("file1.txt", "w").write("hello")
        artifact = wandb.Artifact(type="dataset", name="my-arty")
        artifact.add_reference("file://file1.txt", name="great-file.txt")

        assert artifact.digest == "585b9ada17797e37c9cbab391e69b8c5"
        manifest = artifact.manifest.to_manifest_json()
        assert manifest["contents"]["great-file.txt"] == {
            "digest": "XUFAKrxLKna5cZ2REBfFkg==",
            "ref": "file://file1.txt",
            "size": 5,
        }
Beispiel #11
0
def test_audio_refs():
    audioObj = wandb.Audio(
        "https://wandb-artifacts-refs-public-test.s3-us-west-2.amazonaws.com/StarWars3.wav"
    )
    art = wandb.Artifact("audio_ref_test", "dataset")
    art.add(audioObj, "audio_ref")

    audio_expected = {
        "_type": "audio-file",
        "caption": None,
    }
    assert utils.subdict(audioObj.to_json(art),
                         audio_expected) == audio_expected
Beispiel #12
0
    def setup_training(self, opt):
        """
        Setup the necessary processes for training YOLO models:
          - Attempt to download model checkpoint and dataset artifacts if opt.resume stats with WANDB_ARTIFACT_PREFIX
          - Update data_dict, to contain info of previous run if resumed and the paths of dataset artifact if downloaded
          - Setup log_dict, initialize bbox_interval

        arguments:
        opt (namespace) -- commandline arguments for this run

        """
        self.log_dict, self.current_epoch = {}, 0
        self.bbox_interval = opt.bbox_interval
        if isinstance(opt.resume, str):
            modeldir, _ = self.download_model_artifact(opt)
            if modeldir:
                self.weights = Path(modeldir) / "last.pt"
                config = self.wandb_run.config
                opt.weights, opt.save_period, opt.batch_size, opt.bbox_interval, opt.epochs, opt.hyp = str(
                    self.weights), config.save_period, config.batch_size, config.bbox_interval, config.epochs, \
                                                                                                       config.hyp
        data_dict = self.data_dict
        if self.val_artifact is None:  # If --upload_dataset is set, use the existing artifact, don't download
            self.train_artifact_path, self.train_artifact = self.download_dataset_artifact(
                data_dict.get('train'), opt.artifact_alias)
            self.val_artifact_path, self.val_artifact = self.download_dataset_artifact(
                data_dict.get('val'), opt.artifact_alias)

        if self.train_artifact_path is not None:
            train_path = Path(self.train_artifact_path) / 'data/images/'
            data_dict['train'] = str(train_path)
        if self.val_artifact_path is not None:
            val_path = Path(self.val_artifact_path) / 'data/images/'
            data_dict['val'] = str(val_path)

        if self.val_artifact is not None:
            self.result_artifact = wandb.Artifact(
                "run_" + wandb.run.id + "_progress", "evaluation")
            columns = ["epoch", "id", "ground truth", "prediction"]
            columns.extend(self.data_dict['names'])
            self.result_table = wandb.Table(columns)
            self.val_table = self.val_artifact.get("val")
            if self.val_table_path_map is None:
                self.map_val_table_path()
        if opt.bbox_interval == -1:
            self.bbox_interval = opt.bbox_interval = (
                opt.epochs // 10) if opt.epochs > 10 else 1
        train_from_artifact = self.train_artifact_path is not None and self.val_artifact_path is not None
        # Update the the data_dict to point to local artifacts dir
        if train_from_artifact:
            self.data_dict = data_dict
def test_artifact_references_internal(
    runner,
    mocked_run,
    mock_server,
    internal_sm,
    backend_interface,
    parse_ctx,
    test_settings,
):
    with runner.isolated_filesystem():
        mock_server.set_context("max_cli_version", "0.11.0")
        run = wandb.init(settings=test_settings)
        t1 = wandb.Table(columns=[], data=[])
        art = wandb.Artifact("A", "dataset")
        art.add(t1, "t1")
        run.log_artifact(art)
        run.finish()

        art = wandb.Artifact("A_PENDING", "dataset")
        art.add(t1, "t1")

        with backend_interface() as interface:
            proto_run = interface._make_run(mocked_run)
            r = internal_sm.send_run(interface._make_record(run=proto_run))

            proto_artifact = interface._make_artifact(art)
            proto_artifact.run_id = proto_run.run_id
            proto_artifact.project = proto_run.project
            proto_artifact.entity = proto_run.entity
            proto_artifact.user_created = False
            proto_artifact.use_after_commit = False
            proto_artifact.finalize = True
            for alias in ["latest"]:
                proto_artifact.aliases.append(alias)
            log_artifact = pb.LogArtifactRequest()
            log_artifact.artifact.CopyFrom(proto_artifact)

            internal_sm.send_artifact(log_artifact)
def test_artifact_run_lookup_apis():
    artifact_1_name = "a1-{}".format(str(time.time()))
    artifact_2_name = "a2-{}".format(str(time.time()))

    # Initial setup
    run_1 = wandb.init()
    artifact = wandb.Artifact(artifact_1_name, "test_type")
    artifact.add(wandb.Image(np.random.randint(0, 255, (10, 10))), "image")
    run_1.log_artifact(artifact)
    artifact = wandb.Artifact(artifact_2_name, "test_type")
    artifact.add(wandb.Image(np.random.randint(0, 255, (10, 10))), "image")
    run_1.log_artifact(artifact)
    run_1.finish()

    # Create a second version for a1
    run_2 = wandb.init()
    artifact = wandb.Artifact(artifact_1_name, "test_type")
    artifact.add(wandb.Image(np.random.randint(0, 255, (10, 10))), "image")
    run_2.log_artifact(artifact)
    run_2.finish()

    # Use both
    run_3 = wandb.init()
    a1 = run_3.use_artifact(artifact_1_name + ":latest")
    assert _runs_eq(a1.used_by(), [run_3])
    assert _run_eq(a1.logged_by(), run_2)
    a2 = run_3.use_artifact(artifact_2_name + ":latest")
    assert _runs_eq(a2.used_by(), [run_3])
    assert _run_eq(a2.logged_by(), run_1)
    run_3.finish()

    # Use both
    run_4 = wandb.init()
    a1 = run_4.use_artifact(artifact_1_name + ":latest")
    assert _runs_eq(a1.used_by(), [run_3, run_4])
    a2 = run_4.use_artifact(artifact_2_name + ":latest")
    assert _runs_eq(a2.used_by(), [run_3, run_4])
    run_4.finish()
def test_artifact_creation_with_diff_type():
    artifact_name = "a1-{}".format(str(time.time()))

    # create
    with wandb.init() as run:
        artifact = wandb.Artifact(artifact_name, "artifact_type_1")
        artifact.add(wandb.Image(np.random.randint(0, 255, (10, 10))), "image")
        run.log_artifact(artifact)

    # update
    with wandb.init() as run:
        artifact = wandb.Artifact(artifact_name, "artifact_type_1")
        artifact.add(wandb.Image(np.random.randint(0, 255, (10, 10))), "image")
        run.log_artifact(artifact)

    # invalid
    with wandb.init() as run:
        artifact = wandb.Artifact(artifact_name, "artifact_type_2")
        artifact.add(wandb.Image(np.random.randint(0, 255, (10, 10))),
                     "image_2")
        did_err = False
        try:
            run.log_artifact(artifact)
        except ValueError as err:
            did_err = True
            assert str(
                err
            ) == "Expected artifact type artifact_type_1, got artifact_type_2"
        assert did_err

    with wandb.init() as run:
        artifact = run.use_artifact(artifact_name + ":latest")
        # should work
        image = artifact.get("image")
        assert image is not None
        # should not work
        image_2 = artifact.get("image_2")
        assert image_2 is None
Beispiel #16
0
def test_add_http_reference_path(runner):
    with runner.isolated_filesystem():
        artifact = wandb.Artifact(type="dataset", name="my-arty")
        mock_http(artifact, headers={"ETag": '"abc"', "Content-Length": "256",})
        artifact.add_reference("http://example.com/file1.txt")

        assert artifact.digest == "48237ccc050a88af9dcd869dd5a7e9f4"
        manifest = artifact.manifest.to_manifest_json()
        assert manifest["contents"]["file1.txt"] == {
            "digest": "abc",
            "ref": "http://example.com/file1.txt",
            "size": 256,
            "extra": {"etag": '"abc"',},
        }
Beispiel #17
0
def test_deduplicate_wbimagemask_from_array(runner):
    test_folder = os.path.dirname(os.path.realpath(__file__))
    im_data_1 = np.random.randint(0, 10, (300, 300))
    im_data_2 = np.random.randint(0, 10, (300, 300))
    with runner.isolated_filesystem():
        artifact = wandb.Artifact(type="dataset", name="artifact")
        wb_imagemask_1 = data_types.ImageMask({"mask_data": im_data_1},
                                              key="test")
        wb_imagemask_2 = data_types.ImageMask({"mask_data": im_data_2},
                                              key="test2")
        artifact.add(wb_imagemask_1, "my-imagemask_1")
        artifact.add(wb_imagemask_2, "my-imagemask_2")
        assert len(artifact.manifest.entries) == 4

    with runner.isolated_filesystem():
        artifact = wandb.Artifact(type="dataset", name="artifact")
        wb_imagemask_1 = data_types.ImageMask({"mask_data": im_data_1},
                                              key="test")
        wb_imagemask_2 = data_types.ImageMask({"mask_data": im_data_1},
                                              key="test2")
        artifact.add(wb_imagemask_1, "my-imagemask_1")
        artifact.add(wb_imagemask_2, "my-imagemask_2")
        assert len(artifact.manifest.entries) == 3
Beispiel #18
0
def test_add_gs_reference_object(runner, mocker):
    with runner.isolated_filesystem():
        artifact = wandb.Artifact(type="dataset", name="my-arty")
        mock_gcs(artifact)
        artifact.add_reference("gs://my-bucket/my_object.pb")

        assert artifact.digest == "8aec0d6978da8c2b0bf5662b3fd043a4"
        manifest = artifact.manifest.to_manifest_json()
        assert manifest["contents"]["my_object.pb"] == {
            "digest": "1234567890abcde",
            "ref": "gs://my-bucket/my_object.pb",
            "extra": {"etag": "1234567890abcde", "versionID": "1"},
            "size": 10,
        }
Beispiel #19
0
def test_add_gs_reference_object_with_name(runner, mocker):
    with runner.isolated_filesystem():
        artifact = wandb.Artifact(type="dataset", name="my-arty")
        mock_gcs(artifact)
        artifact.add_reference("gs://my-bucket/my_object.pb", name="renamed.pb")

        assert artifact.digest == "bd85fe009dc9e408a5ed9b55c95f47b2"
        manifest = artifact.manifest.to_manifest_json()
        assert manifest["contents"]["renamed.pb"] == {
            "digest": "1234567890abcde",
            "ref": "gs://my-bucket/my_object.pb",
            "extra": {"etag": "1234567890abcde", "versionID": "1"},
            "size": 10,
        }
Beispiel #20
0
    def _scan_and_log_checkpoints(
            self,
            checkpoint_callback: "ReferenceType[ModelCheckpoint]") -> None:
        # get checkpoints to be saved with associated score
        checkpoints = {
            checkpoint_callback.last_model_path:
            checkpoint_callback.current_score,
            checkpoint_callback.best_model_path:
            checkpoint_callback.best_model_score,
            **checkpoint_callback.best_k_models,
        }
        checkpoints = sorted((Path(p).stat().st_mtime, p, s)
                             for p, s in checkpoints.items()
                             if Path(p).is_file())
        checkpoints = [
            c for c in checkpoints
            if c[1] not in self._logged_model_time.keys()
            or self._logged_model_time[c[1]] < c[0]
        ]

        # log iteratively all new checkpoints
        for t, p, s in checkpoints:
            metadata = ({
                "score": s,
                "original_filename": Path(p).name,
                "ModelCheckpoint": {
                    k: getattr(checkpoint_callback, k)
                    for k in [
                        "monitor",
                        "mode",
                        "save_last",
                        "save_top_k",
                        "save_weights_only",
                        "_every_n_train_steps",
                        "_every_n_val_epochs",
                    ]
                    # ensure it does not break if `ModelCheckpoint` args change
                    if hasattr(checkpoint_callback, k)
                },
            } if _WANDB_GREATER_EQUAL_0_10_22 else None)
            artifact = wandb.Artifact(name=f"model-{self.experiment.id}",
                                      type="model",
                                      metadata=metadata)
            artifact.add_file(p, name="model.ckpt")
            aliases = [
                "latest", "best"
            ] if p == checkpoint_callback.best_model_path else ["latest"]
            self.experiment.log_artifact(artifact, aliases=aliases)
            # remember logged models - timestamp needed in case filename didn't change (lastkckpt or custom name)
            self._logged_model_time[p] = t
Beispiel #21
0
def test_artifact_upsert_group_id(runner, live_mock_server, test_settings):
    # NOTE: these tests are against a mock server so they are testing the internal flows, but
    # not the actual data transfer.
    artifact_name = "distributed_artifact_{}".format(round(time.time()))
    group_name = "test_group_{}".format(round(np.random.rand()))
    artifact_type = "dataset"

    # Upsert with a group should succeed
    run = wandb.init(group=group_name, settings=test_settings)
    artifact = wandb.Artifact(name=artifact_name, type=artifact_type)
    image = wandb.Image(np.random.randint(0, 255, (10, 10)))
    artifact.add(image, "image_1")
    run.upsert_artifact(artifact)
    run.finish()
Beispiel #22
0
def log_plot_as_wandb_artifact(wand_run,
                               fig,
                               fig_name,
                               temp_dir=Path(
                                   tempfile.NamedTemporaryFile().name)):
    temp_dir.mkdir(exist_ok=True)
    fig_name = f"{fig_name}.html"
    filepath = temp_dir / fig_name
    fig.write_html(open(filepath, "w"))
    artifact = wandb.Artifact("run_" + wand_run.id + fig_name, type='result')
    # Add a file to the artifact's contents
    artifact.add_file(filepath)
    # Save the artifact version to W&B and mark it as the output of this run
    wand_run.log_artifact(artifact)
def test_artifact_finish_no_id(runner, live_mock_server, test_settings):
    with runner.isolated_filesystem():
        # NOTE: these tests are against a mock server so they are testing the internal flows, but
        # not the actual data transfer.
        artifact_name = "distributed_artifact_{}".format(round(time.time()))
        group_name = "test_group_{}".format(round(np.random.rand()))
        artifact_type = "dataset"

        # Finish without a distributed_id should fail
        run = wandb.init(settings=test_settings)
        artifact = wandb.Artifact(artifact_name, type=artifact_type)
        with pytest.raises(TypeError):
            run.finish_artifact(artifact)
        run.finish()
Beispiel #24
0
def test_add_reference_local_file_no_checksum(runner):
    with runner.isolated_filesystem():
        open("file1.txt", "w").write("hello")
        size = os.path.getsize("file1.txt")
        artifact = wandb.Artifact(type="dataset", name="my-arty")
        artifact.add_reference("file://file1.txt", checksum=False)

        assert artifact.digest == "415f3bca4b095cbbbbc47e0d44079e05"
        manifest = artifact.manifest.to_manifest_json()
        assert manifest["contents"]["file1.txt"] == {
            "digest": md5_string(str(size)),
            "ref": "file://file1.txt",
            "size": size,
        }
Beispiel #25
0
    def log_artifact(
        self,
        tag: str,
        artifact: object = None,
        path_to_artifact: str = None,
        scope: str = None,
        # experiment info
        run_key: str = None,
        global_epoch_step: int = 0,
        global_batch_step: int = 0,
        global_sample_step: int = 0,
        # stage info
        stage_key: str = None,
        stage_epoch_len: int = 0,
        stage_epoch_step: int = 0,
        stage_batch_step: int = 0,
        stage_sample_step: int = 0,
        # loader info
        loader_key: str = None,
        loader_batch_len: int = 0,
        loader_sample_len: int = 0,
        loader_batch_step: int = 0,
        loader_sample_step: int = 0,
    ) -> None:
        """Logs artifact (arbitrary file like audio, video, model weights) to the logger."""
        if artifact is None and path_to_artifact is None:
            ValueError("Both artifact and path_to_artifact cannot be None")

        artifact = wandb.Artifact(
            name=self.run.id + "_aritfacts",
            type="artifact",
            metadata={
                "stage_key": stage_key,
                "loader_key": loader_key,
                "scope": scope,
            },
        )

        if artifact:
            art_file_dir = os.path.join("wandb", self.run.id, "artifact_dumps")
            os.makedirs(art_file_dir, exist_ok=True)

            art_file = open(os.path.join(art_file_dir, tag), "wb")
            pickle.dump(artifact, art_file)
            art_file.close()

            artifact.add_file(str(os.path.join(art_file_dir, tag)))
        else:
            artifact.add_file(path_to_artifact)
        self.run.log_artifact(artifact)
def test_image_reference_with_preferred_path():
    orig_im_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "assets", "test.png")
    orig_im_path_2 = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "assets", "test2.png")
    desired_artifact_path = "images/sample.png"
    with wandb.init(project=WANDB_PROJECT) as run:
        artifact = wandb.Artifact("artifact_1", type="test_artifact")
        # manually add the image to a desired path
        artifact.add_file(orig_im_path, desired_artifact_path)
        # create an image that uses this image (it should be smart enough not to add the image twice)
        image = wandb.Image(orig_im_path)
        image_2 = wandb.Image(orig_im_path_2) # this one does not have the path preadded
        # add the image to the table
        table = wandb.Table(["image"], data=[[image],[image_2]])
        # add the table to the artifact
        artifact.add(table, "table")
        run.log_artifact(artifact)
    
    _cleanup()
    with wandb.init(project=WANDB_PROJECT) as run:
        artifact_1 = run.use_artifact("artifact_1:latest")
        original_table = artifact_1.get("table")

        artifact = wandb.Artifact("artifact_2", type="test_artifact")
        
        # add the image by reference
        image = wandb.Image(original_table.data[0][0])
        image_2 = wandb.Image(original_table.data[1][0])
        # add the image to the table
        table = wandb.Table(["image"], data=[[image],[image_2]])
        # add the table to the artifact
        artifact.add(table, "table")
        run.log_artifact(artifact)

    _cleanup()
    with wandb.init(project=WANDB_PROJECT) as run:
        artifact_2 = run.use_artifact("artifact_2:latest")
        artifact_2.download()
Beispiel #27
0
 def create_dataset_table(self, dataset, class_to_id, name='dataset'):
     # TODO: Explore multiprocessing to slpit this loop parallely| This is essential for speeding up the the logging
     artifact = wandb.Artifact(name=name, type="dataset")
     img_files = tqdm([dataset.path]) if isinstance(
         dataset.path, str) and Path(dataset.path).is_dir() else None
     img_files = tqdm(dataset.img_files) if not img_files else img_files
     for img_file in img_files:
         if Path(img_file).is_dir():
             artifact.add_dir(img_file, name='data/images')
             labels_path = 'labels'.join(dataset.path.rsplit('images', 1))
             artifact.add_dir(labels_path, name='data/labels')
         else:
             artifact.add_file(img_file,
                               name='data/images/' + Path(img_file).name)
             label_file = Path(img2label_paths([img_file])[0])
             artifact.add_file(
                 str(label_file), name='data/labels/' +
                 label_file.name) if label_file.exists() else None
     table = wandb.Table(columns=["id", "train_image", "Classes", "name"])
     class_set = wandb.Classes([{
         'id': id,
         'name': name
     } for id, name in class_to_id.items()])
     for si, (img, labels, paths, shapes) in enumerate(tqdm(dataset)):
         box_data, img_classes = [], {}
         for cls, *xywh in labels[:, 1:].tolist():
             cls = int(cls)
             box_data.append({
                 "position": {
                     "middle": [xywh[0], xywh[1]],
                     "width": xywh[2],
                     "height": xywh[3]
                 },
                 "class_id": cls,
                 "box_caption": "%s" % (class_to_id[cls])
             })
             img_classes[cls] = class_to_id[cls]
         boxes = {
             "ground_truth": {
                 "box_data": box_data,
                 "class_labels": class_to_id
             }
         }  # inference-space
         table.add_data(si,
                        wandb.Image(paths, classes=class_set, boxes=boxes),
                        list(img_classes.values()),
                        Path(paths).name)
     artifact.add(table, name)
     return artifact
Beispiel #28
0
 def log_dataset_artifact(self, dataset, class_to_id, name='dataset'):
     artifact = wandb.Artifact(name=name, type="dataset")
     image_path = dataset.path
     artifact.add_dir(image_path, name='data/images')
     table = wandb.Table(columns=["id", "train_image", "Classes"])
     class_set = wandb.Classes([{
         'id': id,
         'name': name
     } for id, name in class_to_id.items()])
     for si, (img, labels, paths, shapes) in enumerate(dataset):
         height, width = shapes[0]
         labels[:, 2:] = (xywh2xyxy(labels[:, 2:].view(-1, 4)))
         labels[:, 2:] *= torch.Tensor([width, height, width, height])
         box_data = []
         img_classes = {}
         for cls, *xyxy in labels[:, 1:].tolist():
             cls = int(cls)
             box_data.append({
                 "position": {
                     "minX": xyxy[0],
                     "minY": xyxy[1],
                     "maxX": xyxy[2],
                     "maxY": xyxy[3]
                 },
                 "class_id": cls,
                 "box_caption": "%s" % (class_to_id[cls]),
                 "scores": {
                     "acc": 1
                 },
                 "domain": "pixel"
             })
             img_classes[cls] = class_to_id[cls]
         boxes = {
             "ground_truth": {
                 "box_data": box_data,
                 "class_labels": class_to_id
             }
         }  # inference-space
         table.add_data(si,
                        wandb.Image(paths, classes=class_set, boxes=boxes),
                        json.dumps(img_classes))
     artifact.add(table, name)
     labels_path = 'labels'.join(image_path.rsplit('images', 1))
     zip_path = Path(labels_path).parent / (name + '_labels.zip')
     if not zip_path.is_file():  # make_archive won't check if file exists
         shutil.make_archive(zip_path.with_suffix(''), 'zip', labels_path)
     artifact.add_file(str(zip_path), name='data/labels.zip')
     wandb.log_artifact(artifact)
     print("Saving data to W&B...")
Beispiel #29
0
def log_model(path, name=None, metadata={}):
    "Log model file"
    if wandb.run is None:
        raise ValueError('You must call wandb.init() before log_model()')
    path = Path(path)
    if not path.is_file():
        raise f'path must be a valid file: {path}'
    name = ifnone(name, f'run-{wandb.run.id}-model')
    _format_metadata(metadata)
    artifact_model = wandb.Artifact(name=name,
                                    type='model',
                                    description='trained model',
                                    metadata=metadata)
    artifact_model.add_file(str(path.resolve()))
    wandb.run.log_artifact(artifact_model)
Beispiel #30
0
def test_add_obj_wbimage_no_classes(runner):
    test_folder = os.path.dirname(os.path.realpath(__file__))
    im_path = os.path.join(test_folder, "..", "assets", "2x2.png")
    with runner.isolated_filesystem():
        artifact = wandb.Artifact(type="dataset", name="my-arty")
        wb_image = wandb.Image(
            im_path,
            masks={
                "ground_truth": {
                    "path": os.path.join(test_folder, "..", "assets", "2x2.png"),
                },
            },
        )
        with pytest.raises(ValueError):
            artifact.add(wb_image, "my-image")