def on_validation_epoch_end(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule') -> None: confusion = self.confusion_matrix.compute() confusion = confusion.type(torch.int) confusion_table = wandb.Table(data=confusion.tolist(), columns=self.name_classes) pl_module.logger.experiment.log({'confusion': confusion_table}) table = [] for image, pred, label in zip(self.images.unbind(0), self.mlp_preds.unbind(0), self.labels.unbind(0)): table.append((self.epoch, label.cpu(), pred.cpu(), wandb.Image(image[0:3].detach().cpu()))) table = wandb.Table(data=table, columns=['epoch', 'label', 'pred', 'image']) pl_module.logger.experiment.log({'validation_sample': table}) self.epoch += 1 self.confusion_matrix = ConfusionMatrix(self.num_classes).to( pl_module.device) self.time_to_sample = True
def log_examples(e_in, d_in): enc_input = e_in.transpose(0, 2)[0].detach().cpu().numpy() dec_input = d_in.transpose(0, 2)[0].detach().cpu().numpy() columns = [ "Encoder Input: " + str(enc_input.shape), "Decoder Input: " + str(dec_input.shape) ] inputs = (enc_input, dec_input) table = wandb.Table(columns=columns) table.add_data(*inputs) wandb.log({"Inputs": table})
def test_table_logging(mocked_run, live_mock_server, test_settings, api): run = wandb.init(settings=test_settings) run.log( { "logged_table": wandb.Table( columns=["a"], data=[[wandb.Image(np.ones(shape=(32, 32)))]], ) } ) run.finish() assert True
def test(model, rank, test_loader): model.eval() # define ``wandb`` tabular columns and hooks into the model to collect gradients and the topology columns = [ "id", "image", "guess", "truth", *[f"score_{i}" for i in range(10)] ] if rank == 0: my_table = wandb.Table(columns=columns) wandb.watch(model) test_loss = 0 correct = 0 log_counter = 0 # disable gradient with torch.no_grad(): # loop through the test data loader total = 0. for images, targets in test_loader: total += len(targets) images, targets = images.to(rank), targets.to( rank) # device conversion outputs = model(images) # forward pass -- generate predictions test_loss += F.nll_loss( outputs, targets, reduction="sum").item() # sum up batch loss _, predicted = torch.max( outputs.data, 1) # get the index of the max log-probability correct += (predicted == targets ).sum().item() # compare predictions to true label # log predictions to the ``wandb`` table if log_counter < NUM_BATCHES_TO_LOG: if rank == 0: log_test_predictions(images, targets, outputs, predicted, my_table, log_counter) log_counter += 1 # compute the average loss test_loss /= total accuracy = float(correct) / total if rank == 0: print("\ntest_loss={:.4f}\naccuracy={:.4f}\n".format( test_loss, accuracy)) # log the average loss, accuracy, and table wandb.log({ "test_loss": test_loss, "accuracy": accuracy, "mnist_predictions": my_table }) return accuracy
def _log_feature_importance(self, model): fi = model.get_score(importance_type=self.importance_type) fi_data = [[k, fi[k]] for k in fi] table = wandb.Table(data=fi_data, columns=["Feature", "Importance"]) wandb.log({ "Feature Importance": wandb.plot.bar(table, "Feature", "Importance", title="Feature Importance") })
def confusion_matrix(preds=None, y_true=None, class_names=None): """ Computes a multi-run confusion matrix. Arguments: preds (arr): Array of predicted label indices. y_true (arr): Array of label indices. class_names (arr): Array of class names. Returns: Nothing. To see plots, go to your W&B run page then expand the 'media' tab under 'auto visualizations'. Example: wandb.log({'pr': wandb.plot.confusion_matrix(preds, y_true, labels)}) """ np = util.get_module( "numpy", required= "confusion matrix requires the numpy library, install with `pip install numpy`", ) assert len(preds) == len( y_true), "Number of predictions and label indices must match" if class_names is not None: n_classes = len(class_names) assert max(preds) <= len( class_names), "Higher predicted index than number of classes" assert max(y_true) <= len( class_names), "Higher label class index than number of classes" else: n_classes = max(max(preds), max(y_true)) class_names = ["Class_{}".format(i) for i in range(1, n_classes + 1)] counts = np.zeros((n_classes, n_classes)) for i in range(len(preds)): counts[y_true[i], preds[i]] += 1 data = [] for i in range(n_classes): data.extend([class_names[i], class_names[j], counts[i, j]] for j in range(n_classes)) fields = { "Actual": "Actual", "Predicted": "Predicted", "nPredicted": "Count" } return wandb.plot_table( "wandb/confusion_matrix/v0", wandb.Table(columns=["Actual", "Predicted", "Count"], data=data), fields, )
def end_epoch(self, best_result=False): if self.wandb_run: wandb.log(self.log_dict) self.log_dict = {} if self.result_artifact: train_results = wandb.JoinedTable(self.val_table, self.result_table, "id") self.result_artifact.add(train_results, 'result') wandb.log_artifact(self.result_artifact, aliases=['latest', 'epoch ' + str(self.current_epoch), ('best' if best_result else '')]) self.result_table = wandb.Table(["epoch", "id", "prediction", "avg_confidence"]) self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")
def wandb_table(txt_file, epoch=0, num_classes=8, fold_index=None): metrics, data, auc, balanced_accuracy = _log_output(txt_file, num_classes) # ipdb.set_trace() if fold_index is None: wandb.log({ f"Evaluation Metrics": wandb.Table(data=data, columns=metrics), 'epoch': epoch }) else: wandb.log({ f"Evaluation Metrics - Fold {fold_index}": wandb.Table(data=data, columns=metrics), 'epoch': epoch }) return auc, balanced_accuracy
def setup_training(self, opt): """ Setup the necessary processes for training YOLO models: - Attempt to download model checkpoint and dataset artifacts if opt.resume stats with WANDB_ARTIFACT_PREFIX - Update data_dict, to contain info of previous run if resumed and the paths of dataset artifact if downloaded - Setup log_dict, initialize bbox_interval arguments: opt (namespace) -- commandline arguments for this run """ self.log_dict, self.current_epoch = {}, 0 self.bbox_interval = opt.bbox_interval if isinstance(opt.resume, str): modeldir, _ = self.download_model_artifact(opt) if modeldir: self.weights = Path(modeldir) / "last.pt" config = self.wandb_run.config opt.weights, opt.save_period, opt.batch_size, opt.bbox_interval, opt.epochs, opt.hyp, opt.imgsz = str( self.weights), config.save_period, config.batch_size, config.bbox_interval, config.epochs,\ config.hyp, config.imgsz data_dict = self.data_dict if self.val_artifact is None: # If --upload_dataset is set, use the existing artifact, don't download self.train_artifact_path, self.train_artifact = self.download_dataset_artifact( data_dict.get('train'), opt.artifact_alias) self.val_artifact_path, self.val_artifact = self.download_dataset_artifact( data_dict.get('val'), opt.artifact_alias) if self.train_artifact_path is not None: train_path = Path(self.train_artifact_path) / 'data/images/' data_dict['train'] = str(train_path) if self.val_artifact_path is not None: val_path = Path(self.val_artifact_path) / 'data/images/' data_dict['val'] = str(val_path) if self.val_artifact is not None: self.result_artifact = wandb.Artifact( "run_" + wandb.run.id + "_progress", "evaluation") columns = ["epoch", "id", "ground truth", "prediction"] columns.extend(self.data_dict['names']) self.result_table = wandb.Table(columns) self.val_table = self.val_artifact.get("val") if self.val_table_path_map is None: self.map_val_table_path() if opt.bbox_interval == -1: self.bbox_interval = opt.bbox_interval = ( opt.epochs // 10) if opt.epochs > 10 else 1 if opt.evolve: self.bbox_interval = opt.bbox_interval = opt.epochs + 1 train_from_artifact = self.train_artifact_path is not None and self.val_artifact_path is not None # Update the the data_dict to point to local artifacts dir if train_from_artifact: self.data_dict = data_dict
def plot_heat_map(y_true, y_pred, num_labels): cm = confusion_matrix(y_true, y_pred) columns: List[str] = [label_mapping[i] for i in range(num_labels)] df = pd.DataFrame(cm, index=columns, columns=columns) df.to_csv("eval_cm.csv") fig = sns.heatmap(df, annot=True, fmt='d', cmap="Reds") wandb.log({ "confusion_matrix_plot": wandb.Image(fig), "confusion_matrix": wandb.Table(dataframe=df) })
def test_image_reference_with_preferred_path(): orig_im_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "assets", "test.png") orig_im_path_2 = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "assets", "test2.png") desired_artifact_path = "images/sample.png" with wandb.init() as run: artifact = wandb.Artifact("artifact_1", type="test_artifact") # manually add the image to a desired path artifact.add_file(orig_im_path, desired_artifact_path) # create an image that uses this image (it should be smart enough not to add the image twice) image = wandb.Image(orig_im_path) image_2 = wandb.Image( orig_im_path_2) # this one does not have the path preadded # add the image to the table table = wandb.Table(["image"], data=[[image], [image_2]]) # add the table to the artifact artifact.add(table, "table") run.log_artifact(artifact) _cleanup() with wandb.init() as run: artifact_1 = run.use_artifact("artifact_1:latest") original_table = artifact_1.get("table") artifact = wandb.Artifact("artifact_2", type="test_artifact") # add the image by reference image = wandb.Image(original_table.data[0][0]) image_2 = wandb.Image(original_table.data[1][0]) # add the image to the table table = wandb.Table(["image"], data=[[image], [image_2]]) # add the table to the artifact artifact.add(table, "table") run.log_artifact(artifact) _cleanup() with wandb.init() as run: artifact_2 = run.use_artifact("artifact_2:latest") artifact_2.download()
def report(model, wandb_logger): #https://donatstudios.com/CsvToMarkdownTable model.eval() model = model.cpu() y_pred = [] y_true = [] for x, y in model.val_dataloader(): res = torch.max(F.softmax(model(x), dim=1), 1)[1].numpy() y_pred.extend(res) y_true.extend(y.numpy()) break unique_label = np.unique([y_true, y_pred]) cmtx = pd.DataFrame(confusion_matrix(y_true, y_pred, labels=unique_label), index=['true:{:}'.format(x) for x in unique_label], columns=['pred:{:}'.format(x) for x in unique_label]) report = pd.DataFrame( classification_report(y_true, y_pred, output_dict=True)) wreport = [] tmp = [str(item) for item in report.values[0]] tmp.insert(0, 'precision') wreport.append(tmp) tmp = [str(item) for item in report.values[1]] tmp.insert(0, 'recall') wreport.append(tmp) tmp = [str(item) for item in report.values[2]] tmp.insert(0, 'f1-score') wreport.append(tmp) tmp = [str(item) for item in report.values[3]] tmp.insert(0, 'support') wreport.append(tmp) print(report, cmtx) hreport = report.columns hreport = hreport.insert(0, '') if wandb_logger: wandb_logger.log_metrics({ 'confusion_matrix': wandb.plots.HeatMap(unique_label, unique_label, cmtx.values, show_text=True), 'classification_report': wandb.Table(data=wreport, columns=hreport.values) })
def test(model, device, test_loader): # ``wandb`` tabular columns columns = ["id", "image", "guess", "truth"] for digit in range(10): columns.append("score_" + str(digit)) my_table = wandb.Table(columns=columns) model.eval() # hooks into the model to collect gradients and the topology wandb.watch(model) test_loss = 0 correct = 0 log_counter = 0 # disable gradient with torch.no_grad(): # loop through the test data loader for images, targets in test_loader: images, targets = images.to(device), targets.to( device) # device conversion outputs = model(images) # forward pass -- generate predictions test_loss += F.nll_loss( outputs, targets, reduction="sum").item() # sum up batch loss _, predicted = torch.max( outputs.data, 1) # get the index of the max log-probability correct += ((predicted == targets).sum().item() ) # compare predictions to true label # log predictions to the ``wandb`` table if log_counter < NUM_BATCHES_TO_LOG: log_test_predictions(images, targets, outputs, predicted, my_table, log_counter) log_counter += 1 # compute the average loss test_loss /= len(test_loader.dataset) print("\naccuracy={:.4f}\n".format( float(correct) / len(test_loader.dataset))) accuracy = float(correct) / len(test_loader.dataset) # log the average loss, accuracy, and table wandb.log({ "test_loss": test_loss, "accuracy": accuracy, "mnist_predictions": my_table }) return accuracy
def evaluate_and_print_metrics( predicted_path: str, gold_path: str, language='ru', max_count=None, is_multiple_ref=False, detokenize_after=False, tokenize_after=False, lower=False, are_clusters_used=False, ): hyps = [] refs = [] table = wandb.Table(columns=[ 'Reference', 'Prediction', 'Reference processed', 'Prediction processed' ]) with open(gold_path, "r") as gold, open(predicted_path, "r") as pred: for i, (ref, hyp) in enumerate(zip(gold, pred)): if i % 500 == 0: ref_before = ref hyp_before = hyp if max_count is not None and i >= max_count: break ref, hyp = postprocess(ref, hyp, language, is_multiple_ref, detokenize_after, tokenize_after, lower) if not hyp: print("Empty hyp for ref: ", ref) continue if not ref: continue if i % 500 == 0: table.add_data(ref_before, hyp_before, ref, hyp) refs.append(ref) hyps.append(hyp) if are_clusters_used: wandb.run.summary.update({'Examples with multiple references': table}) else: wandb.run.summary.update({'Examples': table}) print_metrics(refs, hyps, language=language, are_clusters_used=are_clusters_used)
def line_series(xs, ys, keys=None, title=None, xname=None): """ Construct a line series plot. Arguments: xs (array of arrays, or array): Array of arrays of x values ys (array of arrays): Array of y values title (string): Plot title. xname: Title of x-axis Returns: A plot object, to be passed to wandb.log() Example: ``` When logging a singular array for x, all ys are plotted against that x x = [i for i in range(10)] ys = [ [i for i in range(10)], [i**2 for i in range(10)] ] wandb.log({'line-series-plot1': wandb.plot.line_series(x, ys, "title", "step")}) xs can also contain an array of arrays for having different steps for each metric xs = [[i for i in range(10)], [2*i for i in range(10)]] ys = [ [i for i in range(10)], [i**2 for i in range(10)] ] wandb.log({'line-series-plot1': wandb.plot.line_series(xs, ys, "title", "step")}) ``` """ data = [] if not isinstance(xs[0], Sequence): xs = [xs for _ in range(len(ys))] assert len(xs) == len(ys), "Number of x-lines and y-lines must match" for i, series in enumerate([list(zip(xs[i], ys[i])) for i in range(len(xs))]): for x, y in series: if keys is None: key = "key_{}".format(i) else: key = keys[i] data.append([x, key, y]) table = wandb.Table(data=data, columns=["step", "lineKey", "lineVal"]) return wandb.plot_table( "wandb/lineseries/v0", table, {"step": "step", "lineKey": "lineKey", "lineVal": "lineVal"}, {"title": title, "xname": xname or "x"}, )
def test_table_slice_reference_artifact(): with wandb.init(project=WANDB_PROJECT) as run: artifact = wandb.Artifact("table_data", "data") table = _make_wandb_table() artifact.add(table, "table") run.log_artifact(artifact) with wandb.init(project=WANDB_PROJECT) as run: artifact_1 = run.use_artifact("table_data:latest") t1 = artifact_1.get("table") artifact = wandb.Artifact("intermediate_data", "data") i1 = wandb.Table(t1.columns, t1.data[:1]) i2 = wandb.Table(t1.columns, t1.data[1:]) artifact.add(i1, "table1") artifact.add(i2, "table2") run.log_artifact(artifact) with wandb.init(project=WANDB_PROJECT) as run: artifact_2 = run.use_artifact("intermediate_data:latest") i1 = artifact_2.get("table1") i2 = artifact_2.get("table2") artifact = wandb.Artifact("reference_data", "data") table1 = wandb.Table(t1.columns, i1.data) table2 = wandb.Table(t1.columns, i2.data) artifact.add(table1, "table1") artifact.add(table2, "table2") run.log_artifact(artifact) _cleanup() with wandb.init(project=WANDB_PROJECT) as run: artifact_3 = run.use_artifact("reference_data:latest") table1 = artifact_3.get("table1") table2 = artifact_3.get("table2") assert not os.path.isdir(os.path.join(artifact_2._default_root())) # assert os.path.islink(os.path.join(artifact_3._default_root(), "media", "images", "test.png")) # assert os.path.islink(os.path.join(artifact_3._default_root(), "media", "images", "test2.png")) assert t1.data[:1] == table1.data assert t1.data[1:] == table2.data
def test_add_table_from_dataframe(runner, live_mock_server, test_settings): with runner.isolated_filesystem(): import pandas as pd df_float = pd.DataFrame([[1, 2.0, 3.0]], dtype=np.float) df_float32 = pd.DataFrame([[1, 2.0, 3.0]], dtype=np.float32) df_bool = pd.DataFrame([[True, False, True]], dtype=np.bool) wb_table_float = wandb.Table(dataframe=df_float) wb_table_float32 = wandb.Table(dataframe=df_float32) wb_table_float32_recast = wandb.Table( dataframe=df_float32.astype(np.float)) wb_table_bool = wandb.Table(dataframe=df_bool) run = wandb.init(settings=test_settings) artifact = wandb.Artifact("table-example", "dataset") artifact.add(wb_table_float, "wb_table_float") artifact.add(wb_table_float32_recast, "wb_table_float32_recast") artifact.add(wb_table_float32, "wb_table_float32") artifact.add(wb_table_bool, "wb_table_bool") run.log_artifact(artifact) run.finish()
def log_predictions( self, predictions, prediction_col_name = "output", val_ndx_col_name = "val_row", table_name = "validation_predictions", commit = False, ): """Logs a set of predictions. Intended usage: vl.log_predictions(vl.make_predictions(self.model.predict)) Args: predictions (Sequence | Dict[str, Sequence]): A list of prediction vectors or dictionary of lists of prediction vectors prediction_col_name (str, optional): the name of the prediction column. Defaults to "output". val_ndx_col_name (str, optional): The name of the column linking prediction table to the validation ata table. Defaults to "val_row". table_name (str, optional): name of the prediction table. Defaults to "validation_predictions". commit (bool, optional): determines if commit should be called on the logged data. Defaults to False. """ if self.local_validation_artifact is not None: self.local_validation_artifact.wait() pred_table = wandb.Table(columns=[], data=[]) if isinstance(predictions, dict): for col_name in predictions: pred_table.add_column(col_name, predictions[col_name]) else: pred_table.add_column(prediction_col_name, predictions) pred_table.add_column(val_ndx_col_name, self.validation_indexes) if self.prediction_row_processor is None and self.infer_missing_processors: example_prediction = _make_example(predictions) example_input = _make_example(self.validation_inputs) if example_prediction is not None and example_input is not None: self.prediction_row_processor = _infer_prediction_row_processor( example_prediction, example_input, self.class_labels_table, self.input_col_name, prediction_col_name, ) if self.prediction_row_processor is not None: pred_table.add_computed_columns(self.prediction_row_processor) wandb.log({table_name: pred_table}) return pred_table
def main(argv): # test to ensure run = wandb.init() run_project = run.project run_id = run.id print("Started run {}/{}".format(run_project, run_id)) try: os.makedirs('./chdir_test') except Exception as e: pass os.chdir('./chdir_test') # log some table data, which is saved in the media folder pr_data = [['setosa', 1.0, 1.0], ['setosa', 1.0, 1.0], ['setosa', 1.0, 1.0], ['setosa', 1.0, 1.0], ['setosa', 1.0, 1.0], ['setosa', 1.0, 1.0], ['setosa', 1.0, 1.0], ['setosa', 1.0, 1.0], ['setosa', 1.0, 1.0], ['setosa', 1.0, 1.0], ['setosa', 1.0, 0.0], ['setosa', 1.0, 0.0], ['setosa', 1.0, 0.0], ['setosa', 1.0, 0.0], ['setosa', 1.0, 0.0], ['setosa', 1.0, 0.0], ['setosa', 1.0, 0.0], ['setosa', 1.0, 0.0], ['setosa', 1.0, 0.0], ['setosa', 1.0, 0.0], ['versicolor', 1.0, 1.0], ['versicolor', 1.0, 1.0], ['versicolor', 1.0, 1.0], ['versicolor', 1.0, 1.0], ['versicolor', 1.0, 1.0], ['versicolor', 1.0, 1.0], ['versicolor', 1.0, 1.0], ['versicolor', 1.0, 1.0], ['versicolor', 1.0, 1.0], ['versicolor', 1.0, 1.0], ['versicolor', 1.0, 0.0], ['versicolor', 1.0, 0.0], ['versicolor', 1.0, 0.0], ['versicolor', 1.0, 0.0], ['versicolor', 1.0, 0.0], ['versicolor', 1.0, 0.0], ['versicolor', 1.0, 0.0], ['versicolor', 1.0, 0.0], ['versicolor', 1.0, 0.0], ['versicolor', 1.0, 0.0]] # convert the data to a table pr_table = wandb.Table(data=pr_data, columns=["class", "precision", "recall"]) wandb.log({'pr_table': pr_table}) wandb.finish() # Check results api = wandb.Api() last_run = api.run("%s/%s" % (run_project, run_id)) media_path = last_run.summary_metrics["pr_table"]["path"] media_file = last_run.file(media_path) assert media_file.size > 0 print("Success")
def upload_history_to_wandb(history): """Convenience function to upload a Keras history to W&B Parameters ---------- history : tf.keras.callbacks.History History object obtained from training """ # Turn into df history_df = pd.DataFrame.from_dict(history.history) # Turn into wandb Table history_table = wandb.Table(dataframe=history_df) # Log wandb.log({"history": history_table})
def validation_end(self, outputs): val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean() val_ppl = torch.exp(val_loss_mean) adjusted_val_loss = val_loss_mean * \ ((self.val_dataset.n_tokens - 1) / (self.val_dataset.n_original_tokens - 1)) adjusted_val_ppl = torch.exp(adjusted_val_loss) if self.args.accelerator != "TPU": device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") prompt = torch.tensor(self.tokenizer.encode( "<|endoftext|> ")).unsqueeze(0).to(device) outputs = self.model.generate( input_ids=prompt, max_length=self.args.sample_len, temperature=self.args.temperature, top_k=self.args.top_k, top_p=self.args.top_p, repetition_penalty=self.args.repetition_penalty, num_return_sequences=1) outputs = self.tokenizer.decode(outputs[0].cpu().numpy(), skip_special_tokens=True) print("\nSampling:") print(outputs) print("\n") self.table_data.append([f'{self.trainer.current_epoch}', outputs]) metrics = { 'epoch': self.trainer.current_epoch, 'val_loss': val_loss_mean, 'val_ppl': val_ppl, 'adjusted_val_ppl': adjusted_val_ppl, "log": { 'epoch': self.trainer.current_epoch, 'val_loss': val_loss_mean, 'val_ppl': val_ppl, 'adjusted_val_ppl': adjusted_val_ppl, "samples": wandb.Table(columns=['Epoch', 'Text'], data=self.table_data) } } return metrics
def _make_wandb_table(): return wandb.Table( columns=columns, data=[ ["string", True, 1, 1.4, _make_wandb_image()], ["string", True, 1, 1.4, _make_wandb_image()], ["string2", False, -0, -1.4, _make_wandb_image("2")], ["string2", False, -0, -1.4, _make_wandb_image("2")], ], )
def test_reference_table_artifacts(mocked_run, live_mock_server, test_settings, api): live_mock_server.set_ctx({"max_cli_version": "0.11.0"}) run = wandb.init(settings=test_settings) t = wandb.Table(columns=["a"], data=[[wandb.Image(np.ones(shape=(32, 32)))]],) art = wandb.Artifact("A", "dataset") art.add(t, "table") run.log_artifact(art) art = wandb.Artifact("A", "dataset") art.add(t, "table") run.log_artifact(art) run.finish() assert True
def exp_generative_train(train_file, val_file, test_file, reconstruct_strategy, max_len, epochs, results_dir, model_conf_params, n_pretrain_steps=50, batch_size=256, lr=0.0005, betas=(0.5, 0.999)): wandb.init(project="generative train") if not os.path.exists(results_dir): os.mkdir(results_dir) training_log_dir = os.path.join(results_dir, 'training/') if not os.path.exists(training_log_dir): os.mkdir(training_log_dir) vocab = my_vocab.Vocab() vocab.build_from_formula_file(train_file) vocab.write_vocab_to_file(os.path.join(results_dir, 'vocab.txt')) device = torch.device('cuda') train_batches, _ = my_batch_builder.build_ordered_batches( train_file, vocab, batch_size, device) valid_batches, _ = my_batch_builder.build_ordered_batches( val_file, vocab, batch_size, device) model_params = my_model.ModelParams(vocab=vocab, vocab_size=vocab.size(), device=device, **model_conf_params) model = my_model.FormulaVARE(model_params) model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=betas) n_formulas_to_sample = 2000 use_for_train_fraction = 0.2 table = wandb.Table(columns=[ "max_len", "epochs", "batch_size", "learning_rate", "n_formulas_sampled", "chosen_for_train_fraction", "n_pretrain_steps" ]) table.add_data(max_len, epochs, batch_size, lr, n_formulas_to_sample, use_for_train_fraction, n_pretrain_steps) wandb.log({'configs': table}) my_generative_train.generative_train(model, vocab, optimizer, epochs, device, batch_size, n_formulas_to_sample, 'sample', max_len, use_for_train_fraction, n_pretrain_steps, train_batches, valid_batches)
def wandb_log_gen_obs(self, outputs: List[List[List[str]]], table_title: str) -> None: flat_outputs = [item for sublist in outputs for item in sublist] data = ( random.sample(flat_outputs, self.hparams.sample_k_gen_obs) # type: ignore if len(flat_outputs) >= self.hparams.sample_k_gen_obs # type: ignore else flat_outputs) self.logger.experiment.log({ table_title: wandb.Table(data=data, columns=["Groundtruth", "Predicted", "Decoded"]) })
def _log_feature_importance(model: "Booster") -> None: """Log feature importance.""" feat_imps = model.feature_importance() feats = model.feature_name() fi_data = [[feat, feat_imp] for feat, feat_imp in zip(feats, feat_imps)] table = wandb.Table(data=fi_data, columns=["Feature", "Importance"]) wandb.log( { "Feature Importance": wandb.plot.bar( table, "Feature", "Importance", title="Feature Importance") }, commit=False, )
def get(self): clz_name, clz_ap = self._metric.get() table = [['Class', 'AP']] + list(zip(clz_name, clz_ap)) table = AsciiTable(table) table.justify_columns[1] = 'right' if self._log_flag: logging.info('\n' + table.table) if wandb.run: headers = table.table_data[0] data = table.table_data[1:-1] wandb_table = wandb.Table(columns=headers, data=data) wandb.log({'mAP': clz_ap[-1], 'APs': wandb_table}, commit=False) self._log_flag = False return clz_name[-1], clz_ap[-1]
def test_table_explicit_types(): table = wandb.Table(columns=["a", "b"], dtype=int) table.add_data(None, None) table.add_data(1, 2) with pytest.raises(TypeError): table.add_data(1, "a") table = wandb.Table(columns=["a", "b"], optional=False, dtype=[int, str]) with pytest.raises(TypeError): table.add_data(None, None) table.add_data(1, "a") with pytest.raises(TypeError): table.add_data("a", "a") table = wandb.Table(columns=["a", "b"], optional=[False, True], dtype=[int, str]) with pytest.raises(TypeError): table.add_data(None, None) with pytest.raises(TypeError): table.add_data(None, "a") table.add_data(1, None) table.add_data(1, "a") with pytest.raises(TypeError): table.add_data("a", "a")
def get_classification_report(self): table = wandb.Table(columns=[ "class", "accuracy", "precision", "recall", "f1-score", "support" ]) accuracies = self.confusion_matrix.astype( "float") / self.confusion_matrix.sum(axis=1)[:, np.newaxis] accuracies = accuracies.diagonal() for i in range(len(self.classes)): table.add_data(self.classes[i], accuracies[i], self.classification_report[str(i)]['precision'], self.classification_report[str(i)]['recall'], self.classification_report[str(i)]['f1-score'], self.classification_report[str(i)]['support']) return table
def create_dataset_table(self, dataset, class_to_id, name='dataset'): # TODO: Explore multiprocessing to slpit this loop parallely| This is essential for speeding up the the logging artifact = wandb.Artifact(name=name, type="dataset") img_files = tqdm([dataset.path]) if isinstance( dataset.path, str) and Path(dataset.path).is_dir() else None img_files = tqdm(dataset.img_files) if not img_files else img_files for img_file in img_files: if Path(img_file).is_dir(): artifact.add_dir(img_file, name='data/images') labels_path = 'labels'.join(dataset.path.rsplit('images', 1)) artifact.add_dir(labels_path, name='data/labels') else: artifact.add_file(img_file, name='data/images/' + Path(img_file).name) label_file = Path(img2label_paths([img_file])[0]) artifact.add_file( str(label_file), name='data/labels/' + label_file.name) if label_file.exists() else None table = wandb.Table(columns=["id", "train_image", "Classes", "name"]) class_set = wandb.Classes([{ 'id': id, 'name': name } for id, name in class_to_id.items()]) for si, (img, labels, paths, shapes) in enumerate(tqdm(dataset)): box_data, img_classes = [], {} for cls, *xywh in labels[:, 1:].tolist(): cls = int(cls) box_data.append({ "position": { "middle": [xywh[0], xywh[1]], "width": xywh[2], "height": xywh[3] }, "class_id": cls, "box_caption": "%s" % (class_to_id[cls]) }) img_classes[cls] = class_to_id[cls] boxes = { "ground_truth": { "box_data": box_data, "class_labels": class_to_id } } # inference-space table.add_data(si, wandb.Image(paths, classes=class_set, boxes=boxes), list(img_classes.values()), Path(paths).name) artifact.add(table, name) return artifact