def test_joined_table_add_by_path(): src_image_1 = _make_wandb_image() src_image_2 = _make_wandb_image() src_image_3 = _make_wandb_image() src_image_4 = _make_wandb_image() src_table_1 = wandb.Table(["id", "image"], [[1, src_image_1], [2, src_image_2]]) src_table_2 = wandb.Table(["id", "image"], [[1, src_image_3], [2, src_image_4]]) with wandb.init() as run: tables = wandb.Artifact("tables", "database") tables.add(src_table_1, "src_table_1") tables.add(src_table_2, "src_table_2") # Should be able to add by name directly jt = wandb.JoinedTable("src_table_1.table.json", "src_table_2.table.json", "id") tables.add(jt, "jt") # Make sure it errors when you are not referencing the correct table names jt_bad = wandb.JoinedTable("bad_table_name.table.json", "bad_table_name.table.json", "id") got_err = False try: tables.add(jt_bad, "jt_bad") except ValueError as err: got_err = True assert got_err run.log_artifact(tables) _cleanup() with wandb.init() as run: tables_2 = wandb.Artifact("tables_2", "database") upstream = run.use_artifact("tables:latest") # Able to add by reference jt = wandb.JoinedTable(upstream.get_path("src_table_1"), upstream.get_path("src_table_2"), "id") tables_2.add(jt, "jt") run.log_artifact(tables_2) _cleanup() with wandb.init() as run: tables_2 = run.use_artifact("tables_2:latest") jt_2 = tables_2.get("jt") assert wandb.JoinedTable(upstream.get("src_table_1"), upstream.get("src_table_2"), "id") == jt_2
def end_epoch(self, best_result=False): if self.wandb_run: wandb.log(self.log_dict) self.log_dict = {} if self.result_artifact: train_results = wandb.JoinedTable(self.val_table, self.result_table, "id") self.result_artifact.add(train_results, 'result') wandb.log_artifact(self.result_artifact, aliases=['latest', 'epoch ' + str(self.current_epoch), ('best' if best_result else '')]) self.result_table = wandb.Table(["epoch", "id", "prediction", "avg_confidence"]) self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")
def finish_run(self): if self.wandb_run: if self.result_artifact: print("Add Training Progress Artifact") self.result_artifact.add(self.result_table, 'result') train_results = wandb.JoinedTable(self.testset_artifact.get("val"), self.result_table, "id") self.result_artifact.add(train_results, 'joined_result') wandb.log_artifact(self.result_artifact) if self.log_dict: wandb.log(self.log_dict) wandb.run.finish()
def test_joined_table_logging(mocked_run, live_mock_server, test_settings, api): run = wandb.init(settings=test_settings) art = wandb.Artifact("A", "dataset") t1 = wandb.Table( columns=["id", "a"], data=[[1, wandb.Image(np.ones(shape=(32, 32)))]], ) t2 = wandb.Table( columns=["id", "a"], data=[[1, wandb.Image(np.ones(shape=(32, 32)))]], ) art.add(t1, "t1") art.add(t2, "t2") jt = wandb.JoinedTable(t1, t2, "id") art.add(jt, "jt") run.log_artifact(art) run.log({"logged_table": jt}) run.finish() assert True
def _make_wandb_joinedtable(): return wandb.JoinedTable(_make_wandb_table(), _make_wandb_table(), "id")
def main(): try: # Download the data if not already download_data() # Initialize the run with wandb.init( project=WANDB_PROJECT, # The project to register this Run to job_type= "create_dataset", # The type of this Run. Runs of the same type can be grouped together in the UI config= { # Custom configuration parameters which you might want to tune or adjust for the Run "num_examples": NUM_EXAMPLES, # The number of raw samples to include. "scale_factor": 2 # The scaling factor for the images }) as run: # Setup a WandB Classes object. This will give additional metadata for visuals class_set = wandb.Classes([{ 'name': name, 'id': id } for name, id in zip(BDD_CLASSES, BDD_IDS)]) # Setup a WandB Table object to hold our dataset table = wandb.Table(columns=[ "id", "train_image", "colored_image", "label_mask", "dominant_class" ]) # Fill up the table for ndx in range(run.config["num_examples"]): # First, we will build a wandb.Image to act as our raw example object # classes: the classes which map to masks and/or box metadata # masks: the mask metadata. In this case, we use a 2d array where each cell corresponds to the label (this comes directlyfrom the dataset) # boxes: the bounding box metadata. For example sake, we create bounding boxes by looking at the mask data and creating boxes which fully encolose each class. # The data is an array of objects like: # "position": { # "minX": minX, # "maxX": maxX, # "minY": minY, # "maxY": maxY, # }, # "class_id" : id_num, # } example = wandb.Image( get_scaled_train_image(ndx, run.config.scale_factor), classes=class_set, masks={ "ground_truth": { "mask_data": get_scaled_mask_label(ndx, run.config.scale_factor) }, }, boxes={ "ground_truth": { "box_data": get_scaled_bounding_boxes(ndx, run.config.scale_factor) } }) # Next, we create two additional images which may be helpful during analysis. Notice that the additional metadata is optional. color_label = wandb.Image( get_scaled_color_mask(ndx, run.config.scale_factor)) label_mask = wandb.Image( get_scaled_mask_label(ndx, run.config.scale_factor)) # Finally, we add a row of our newly constructed data. table.add_data(train_ids[ndx], example, color_label, label_mask, get_dominant_class(label_mask)) # Create an Artifact (versioned folder) artifact = wandb.Artifact(name="raw_data", type="dataset") # add the table to the artifact artifact.add(table, "raw_examples") # Finally, log the artifact run.log_artifact(artifact) print("Step 1/5 Complete") # This step should look familiar by now: with wandb.init(project=WANDB_PROJECT, job_type="split_dataset", config={ "train_pct": 0.7, }) as run: # Get the latest version of the artifact. Notice the name alias follows this convention: "<ARTIFACT_NAME>:<VERSION>" # when version is set to "latest", then the latest version will always be used. However, you can pin to a version by # using an alias such as "raw_data:v0" dataset_artifact = run.use_artifact("raw_data:latest") # Next, we "get" the table by the same name that we saved it in the last run. data_table = dataset_artifact.get("raw_examples") # Now we can build two separate artifacts for later use. We will first split the raw table into two parts, # then create two different artifacts, each of which will hold our new tables. We create two artifacts so that # in future runs, we can selectively decide which subsets of data to download. # Create the tables train_count = int(len(data_table.data) * run.config.train_pct) train_table = wandb.Table(columns=data_table.columns, data=data_table.data[:train_count]) test_table = wandb.Table(columns=data_table.columns, data=data_table.data[train_count:]) # Create the artifacts train_artifact = wandb.Artifact("train_data", "dataset") test_artifact = wandb.Artifact("test_data", "dataset") # Save the tables to the artifacts train_artifact.add(train_table, "train_table") test_artifact.add(test_table, "test_table") # Log the artifacts out as outputs of the run run.log_artifact(train_artifact) run.log_artifact(test_artifact) print("Step 2/5 Complete") # Again, create a run. with wandb.init(project=WANDB_PROJECT, job_type="model_train") as run: # Similar to before, we will load in the artifact and asset we need. In this case, the training data train_artifact = run.use_artifact("train_data:latest") train_table = train_artifact.get("train_table") # Next, we split out the labels and train the model train_data, mask_data = make_datasets(train_table, n_classes) model = ExampleSegmentationModel(n_classes) model.train(train_data, mask_data) # Finally we score the model. Behind the scenes, we score each mask on it's IOU score. scores, results = score_model(model, train_data, mask_data, n_classes) # Let's create a new table. Notice that we create many columns - an evaluation score for each class type. results_table = wandb.Table( columns=["id", "pred_mask", "dominant_pred"] + BDD_CLASSES, # Data construction is similar to before, but we now use the predicted masks and bound boxes. data=[[ train_table.data[ndx][0], wandb.Image(train_table.data[ndx][1], masks={ "train_predicted_truth": { "mask_data": results[ndx], }, }, boxes={ "ground_truth": { "box_data": mask_to_bounding( results[ndx]) } }), BDD_CLASSES[get_dominant_id_ndx(results[ndx])], ] + list(row) for ndx, row in enumerate(scores)]) # We create an artifact, add the table, and log it as part of the run. results_artifact = wandb.Artifact("train_results", "dataset") results_artifact.add(results_table, "train_iou_score_table") run.log_artifact(results_artifact) # Finally, let's save the model as a flat file and add that to it's own artifact. model.save("model.pkl") model_artifact = wandb.Artifact("trained_model", "model") model_artifact.add_file("model.pkl") run.log_artifact(model_artifact) print("Step 3/5 Complete") with wandb.init(project=WANDB_PROJECT, job_type="model_eval") as run: # Retrieve the test data test_artifact = run.use_artifact("test_data:latest") test_table = test_artifact.get("test_table") test_data, mask_data = make_datasets(test_table, n_classes) # Download the saved model file. model_artifact = run.use_artifact("trained_model:latest") path = model_artifact.get_path("model.pkl").download() # Load the model from the file and score it model = ExampleSegmentationModel.load(path) scores, results = score_model(model, test_data, mask_data, n_classes) # Create a predicted score table similar to step 3. results_artifact = wandb.Artifact("test_results", "dataset") data = [[ test_table.data[ndx][0], wandb.Image(test_table.data[ndx][1], masks={ "test_predicted_truth": { "mask_data": results[ndx], }, }, boxes={ "ground_truth": { "box_data": mask_to_bounding(results[ndx]) } }), BDD_CLASSES[get_dominant_id_ndx(results[ndx])], ] + list(row) for ndx, row in enumerate(scores)] # And log out the results. results_artifact.add( wandb.Table(["id", "pred_mask_test", "dominant_pred_test"] + BDD_CLASSES, data=data), "test_iou_score_table") run.log_artifact(results_artifact) print("Step 4/5 Complete") with wandb.init(project=WANDB_PROJECT, job_type="model_result_analysis") as run: # Retrieve the original raw dataset dataset_artifact = run.use_artifact("raw_data:latest") data_table = dataset_artifact.get("raw_examples") # Retrieve the train and test score tables train_artifact = run.use_artifact("train_results:latest") train_table = train_artifact.get("train_iou_score_table") test_artifact = run.use_artifact("test_results:latest") test_table = test_artifact.get("test_iou_score_table") # Join the tables on ID column and log them as outputs. train_results = wandb.JoinedTable(train_table, data_table, "id") test_results = wandb.JoinedTable(test_table, data_table, "id") artifact = wandb.Artifact("summary_results", "dataset") artifact.add(train_results, "train_results") artifact.add(test_results, "test_results") run.log_artifact(artifact) print("Step 5/5 Complete") if WANDB_PROJECT_ENV is not None: os.environ["WANDB_PROJECT"] = WANDB_PROJECT_ENV if WANDB_SILENT_ENV is not None: os.environ["WANDB_SILENT"] = WANDB_SILENT_ENV finally: cleanup()
with wandb.init(project=WANDB_PROJECT, job_type="model_result_analysis") as run: # Retrieve the original raw dataset dataset_artifact = run.use_artifact("raw_data:latest") data_table = dataset_artifact.get("raw_examples") # Retrieve the train and test score tables train_artifact = run.use_artifact("train_results:latest") train_table = train_artifact.get("train_iou_score_table") test_artifact = run.use_artifact("test_results:latest") test_table = test_artifact.get("test_iou_score_table") # Join the tables on ID column and log them as outputs. train_results = wandb.JoinedTable(train_table, data_table, "id") test_results = wandb.JoinedTable(test_table, data_table, "id") artifact = wandb.Artifact("summary_results", "dataset") artifact.add(train_results, "train_results") artifact.add(test_results, "test_results") run.log_artifact(artifact) print("Step 5/5 Complete") if WANDB_PROJECT_ENV is not None: os.environ["WANDB_PROJECT"] = WANDB_PROJECT_ENV if WANDB_SILENT_ENV is not None: os.environ["WANDB_SILENT"] = WANDB_SILENT_ENV finally: cleanup()
def _make_joined_table(): table_1 = _make_wandb_table() table_2 = _make_wandb_table() return wandb.JoinedTable(table_1, table_2, "index")