def test_joined_table_add_by_path():
    src_image_1 = _make_wandb_image()
    src_image_2 = _make_wandb_image()
    src_image_3 = _make_wandb_image()
    src_image_4 = _make_wandb_image()
    src_table_1 = wandb.Table(["id", "image"],
                              [[1, src_image_1], [2, src_image_2]])
    src_table_2 = wandb.Table(["id", "image"],
                              [[1, src_image_3], [2, src_image_4]])
    with wandb.init() as run:
        tables = wandb.Artifact("tables", "database")
        tables.add(src_table_1, "src_table_1")
        tables.add(src_table_2, "src_table_2")

        # Should be able to add by name directly
        jt = wandb.JoinedTable("src_table_1.table.json",
                               "src_table_2.table.json", "id")
        tables.add(jt, "jt")

        # Make sure it errors when you are not referencing the correct table names
        jt_bad = wandb.JoinedTable("bad_table_name.table.json",
                                   "bad_table_name.table.json", "id")
        got_err = False
        try:
            tables.add(jt_bad, "jt_bad")
        except ValueError as err:
            got_err = True
        assert got_err

        run.log_artifact(tables)

    _cleanup()
    with wandb.init() as run:
        tables_2 = wandb.Artifact("tables_2", "database")
        upstream = run.use_artifact("tables:latest")

        # Able to add by reference
        jt = wandb.JoinedTable(upstream.get_path("src_table_1"),
                               upstream.get_path("src_table_2"), "id")
        tables_2.add(jt, "jt")
        run.log_artifact(tables_2)

    _cleanup()
    with wandb.init() as run:
        tables_2 = run.use_artifact("tables_2:latest")
        jt_2 = tables_2.get("jt")
        assert wandb.JoinedTable(upstream.get("src_table_1"),
                                 upstream.get("src_table_2"), "id") == jt_2
 def end_epoch(self, best_result=False):
     if self.wandb_run:
         wandb.log(self.log_dict)
         self.log_dict = {}
         if self.result_artifact:
             train_results = wandb.JoinedTable(self.val_table, self.result_table, "id")
             self.result_artifact.add(train_results, 'result')
             wandb.log_artifact(self.result_artifact, aliases=['latest', 'epoch ' + str(self.current_epoch),
                                                               ('best' if best_result else '')])
             self.result_table = wandb.Table(["epoch", "id", "prediction", "avg_confidence"])
             self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")
Ejemplo n.º 3
0
 def finish_run(self):
     if self.wandb_run:
         if self.result_artifact:
             print("Add Training Progress Artifact")
             self.result_artifact.add(self.result_table, 'result')
             train_results = wandb.JoinedTable(self.testset_artifact.get("val"), self.result_table, "id")
             self.result_artifact.add(train_results, 'joined_result')
             wandb.log_artifact(self.result_artifact)
         if self.log_dict:
             wandb.log(self.log_dict)
         wandb.run.finish()
Ejemplo n.º 4
0
def test_joined_table_logging(mocked_run, live_mock_server, test_settings, api):
    run = wandb.init(settings=test_settings)
    art = wandb.Artifact("A", "dataset")
    t1 = wandb.Table(
        columns=["id", "a"], data=[[1, wandb.Image(np.ones(shape=(32, 32)))]],
    )
    t2 = wandb.Table(
        columns=["id", "a"], data=[[1, wandb.Image(np.ones(shape=(32, 32)))]],
    )
    art.add(t1, "t1")
    art.add(t2, "t2")
    jt = wandb.JoinedTable(t1, t2, "id")
    art.add(jt, "jt")
    run.log_artifact(art)
    run.log({"logged_table": jt})
    run.finish()
    assert True
Ejemplo n.º 5
0
def _make_wandb_joinedtable():
    return wandb.JoinedTable(_make_wandb_table(), _make_wandb_table(), "id")
Ejemplo n.º 6
0
def main():
    try:
        # Download the data if not already
        download_data()

        # Initialize the run
        with wandb.init(
                project=WANDB_PROJECT,  # The project to register this Run to
                job_type=
                "create_dataset",  # The type of this Run. Runs of the same type can be grouped together in the UI
                config=
            {  # Custom configuration parameters which you might want to tune or adjust for the Run
                "num_examples":
                NUM_EXAMPLES,  # The number of raw samples to include.
                "scale_factor": 2  # The scaling factor for the images
            }) as run:

            # Setup a WandB Classes object. This will give additional metadata for visuals
            class_set = wandb.Classes([{
                'name': name,
                'id': id
            } for name, id in zip(BDD_CLASSES, BDD_IDS)])

            # Setup a WandB Table object to hold our dataset
            table = wandb.Table(columns=[
                "id", "train_image", "colored_image", "label_mask",
                "dominant_class"
            ])

            # Fill up the table
            for ndx in range(run.config["num_examples"]):

                # First, we will build a wandb.Image to act as our raw example object
                #    classes: the classes which map to masks and/or box metadata
                #    masks: the mask metadata. In this case, we use a 2d array where each cell corresponds to the label (this comes directlyfrom the dataset)
                #    boxes: the bounding box metadata. For example sake, we create bounding boxes by looking at the mask data and creating boxes which fully encolose each class.
                #           The data is an array of objects like:
                #                 "position": {
                #                             "minX": minX,
                #                             "maxX": maxX,
                #                             "minY": minY,
                #                             "maxY": maxY,
                #                         },
                #                         "class_id" : id_num,
                #                     }
                example = wandb.Image(
                    get_scaled_train_image(ndx, run.config.scale_factor),
                    classes=class_set,
                    masks={
                        "ground_truth": {
                            "mask_data":
                            get_scaled_mask_label(ndx, run.config.scale_factor)
                        },
                    },
                    boxes={
                        "ground_truth": {
                            "box_data":
                            get_scaled_bounding_boxes(ndx,
                                                      run.config.scale_factor)
                        }
                    })

                # Next, we create two additional images which may be helpful during analysis. Notice that the additional metadata is optional.
                color_label = wandb.Image(
                    get_scaled_color_mask(ndx, run.config.scale_factor))
                label_mask = wandb.Image(
                    get_scaled_mask_label(ndx, run.config.scale_factor))

                # Finally, we add a row of our newly constructed data.
                table.add_data(train_ids[ndx], example, color_label,
                               label_mask, get_dominant_class(label_mask))

            # Create an Artifact (versioned folder)
            artifact = wandb.Artifact(name="raw_data", type="dataset")

            # add the table to the artifact
            artifact.add(table, "raw_examples")

            # Finally, log the artifact
            run.log_artifact(artifact)
        print("Step 1/5 Complete")

        # This step should look familiar by now:
        with wandb.init(project=WANDB_PROJECT,
                        job_type="split_dataset",
                        config={
                            "train_pct": 0.7,
                        }) as run:

            # Get the latest version of the artifact. Notice the name alias follows this convention: "<ARTIFACT_NAME>:<VERSION>"
            # when version is set to "latest", then the latest version will always be used. However, you can pin to a version by
            # using an alias such as "raw_data:v0"
            dataset_artifact = run.use_artifact("raw_data:latest")

            # Next, we "get" the table by the same name that we saved it in the last run.
            data_table = dataset_artifact.get("raw_examples")

            # Now we can build two separate artifacts for later use. We will first split the raw table into two parts,
            # then create two different artifacts, each of which will hold our new tables. We create two artifacts so that
            # in future runs, we can selectively decide which subsets of data to download.

            # Create the tables
            train_count = int(len(data_table.data) * run.config.train_pct)
            train_table = wandb.Table(columns=data_table.columns,
                                      data=data_table.data[:train_count])
            test_table = wandb.Table(columns=data_table.columns,
                                     data=data_table.data[train_count:])

            # Create the artifacts
            train_artifact = wandb.Artifact("train_data", "dataset")
            test_artifact = wandb.Artifact("test_data", "dataset")

            # Save the tables to the artifacts
            train_artifact.add(train_table, "train_table")
            test_artifact.add(test_table, "test_table")

            # Log the artifacts out as outputs of the run
            run.log_artifact(train_artifact)
            run.log_artifact(test_artifact)
        print("Step 2/5 Complete")

        # Again, create a run.
        with wandb.init(project=WANDB_PROJECT, job_type="model_train") as run:

            # Similar to before, we will load in the artifact and asset we need. In this case, the training data
            train_artifact = run.use_artifact("train_data:latest")
            train_table = train_artifact.get("train_table")

            # Next, we split out the labels and train the model
            train_data, mask_data = make_datasets(train_table, n_classes)
            model = ExampleSegmentationModel(n_classes)
            model.train(train_data, mask_data)

            # Finally we score the model. Behind the scenes, we score each mask on it's IOU score.
            scores, results = score_model(model, train_data, mask_data,
                                          n_classes)

            # Let's create a new table. Notice that we create many columns - an evaluation score for each class type.
            results_table = wandb.Table(
                columns=["id", "pred_mask", "dominant_pred"] + BDD_CLASSES,

                # Data construction is similar to before, but we now use the predicted masks and bound boxes.
                data=[[
                    train_table.data[ndx][0],
                    wandb.Image(train_table.data[ndx][1],
                                masks={
                                    "train_predicted_truth": {
                                        "mask_data": results[ndx],
                                    },
                                },
                                boxes={
                                    "ground_truth": {
                                        "box_data": mask_to_bounding(
                                            results[ndx])
                                    }
                                }),
                    BDD_CLASSES[get_dominant_id_ndx(results[ndx])],
                ] + list(row) for ndx, row in enumerate(scores)])

            # We create an artifact, add the table, and log it as part of the run.
            results_artifact = wandb.Artifact("train_results", "dataset")
            results_artifact.add(results_table, "train_iou_score_table")
            run.log_artifact(results_artifact)

            # Finally, let's save the model as a flat file and add that to it's own artifact.
            model.save("model.pkl")
            model_artifact = wandb.Artifact("trained_model", "model")
            model_artifact.add_file("model.pkl")
            run.log_artifact(model_artifact)
        print("Step 3/5 Complete")

        with wandb.init(project=WANDB_PROJECT, job_type="model_eval") as run:

            # Retrieve the test data
            test_artifact = run.use_artifact("test_data:latest")
            test_table = test_artifact.get("test_table")
            test_data, mask_data = make_datasets(test_table, n_classes)

            # Download the saved model file.
            model_artifact = run.use_artifact("trained_model:latest")
            path = model_artifact.get_path("model.pkl").download()

            # Load the model from the file and score it
            model = ExampleSegmentationModel.load(path)
            scores, results = score_model(model, test_data, mask_data,
                                          n_classes)

            # Create a predicted score table similar to step 3.
            results_artifact = wandb.Artifact("test_results", "dataset")
            data = [[
                test_table.data[ndx][0],
                wandb.Image(test_table.data[ndx][1],
                            masks={
                                "test_predicted_truth": {
                                    "mask_data": results[ndx],
                                },
                            },
                            boxes={
                                "ground_truth": {
                                    "box_data": mask_to_bounding(results[ndx])
                                }
                            }),
                BDD_CLASSES[get_dominant_id_ndx(results[ndx])],
            ] + list(row) for ndx, row in enumerate(scores)]

            # And log out the results.
            results_artifact.add(
                wandb.Table(["id", "pred_mask_test", "dominant_pred_test"] +
                            BDD_CLASSES,
                            data=data), "test_iou_score_table")
            run.log_artifact(results_artifact)
        print("Step 4/5 Complete")

        with wandb.init(project=WANDB_PROJECT,
                        job_type="model_result_analysis") as run:

            # Retrieve the original raw dataset
            dataset_artifact = run.use_artifact("raw_data:latest")
            data_table = dataset_artifact.get("raw_examples")

            # Retrieve the train and test score tables
            train_artifact = run.use_artifact("train_results:latest")
            train_table = train_artifact.get("train_iou_score_table")

            test_artifact = run.use_artifact("test_results:latest")
            test_table = test_artifact.get("test_iou_score_table")

            # Join the tables on ID column and log them as outputs.
            train_results = wandb.JoinedTable(train_table, data_table, "id")
            test_results = wandb.JoinedTable(test_table, data_table, "id")
            artifact = wandb.Artifact("summary_results", "dataset")
            artifact.add(train_results, "train_results")
            artifact.add(test_results, "test_results")
            run.log_artifact(artifact)
        print("Step 5/5 Complete")

        if WANDB_PROJECT_ENV is not None:
            os.environ["WANDB_PROJECT"] = WANDB_PROJECT_ENV

        if WANDB_SILENT_ENV is not None:
            os.environ["WANDB_SILENT"] = WANDB_SILENT_ENV
    finally:
        cleanup()
Ejemplo n.º 7
0
    with wandb.init(project=WANDB_PROJECT,
                    job_type="model_result_analysis") as run:

        # Retrieve the original raw dataset
        dataset_artifact = run.use_artifact("raw_data:latest")
        data_table = dataset_artifact.get("raw_examples")

        # Retrieve the train and test score tables
        train_artifact = run.use_artifact("train_results:latest")
        train_table = train_artifact.get("train_iou_score_table")

        test_artifact = run.use_artifact("test_results:latest")
        test_table = test_artifact.get("test_iou_score_table")

        # Join the tables on ID column and log them as outputs.
        train_results = wandb.JoinedTable(train_table, data_table, "id")
        test_results = wandb.JoinedTable(test_table, data_table, "id")
        artifact = wandb.Artifact("summary_results", "dataset")
        artifact.add(train_results, "train_results")
        artifact.add(test_results, "test_results")
        run.log_artifact(artifact)
    print("Step 5/5 Complete")

    if WANDB_PROJECT_ENV is not None:
        os.environ["WANDB_PROJECT"] = WANDB_PROJECT_ENV

    if WANDB_SILENT_ENV is not None:
        os.environ["WANDB_SILENT"] = WANDB_SILENT_ENV
finally:
    cleanup()
Ejemplo n.º 8
0
def _make_joined_table():
    table_1 = _make_wandb_table()
    table_2 = _make_wandb_table()
    return wandb.JoinedTable(table_1, table_2, "index")