Exemple #1
0
 def on_validation_epoch_end(self, trainer: 'pl.Trainer',
                             pl_module: 'pl.LightningModule') -> None:
     confusion = self.confusion_matrix.compute()
     confusion = confusion.type(torch.int)
     confusion_table = wandb.Table(data=confusion.tolist(),
                                   columns=self.name_classes)
     pl_module.logger.experiment.log({'confusion': confusion_table})
     table = []
     for image, pred, label in zip(self.images.unbind(0),
                                   self.mlp_preds.unbind(0),
                                   self.labels.unbind(0)):
         table.append((self.epoch, label.cpu(), pred.cpu(),
                       wandb.Image(image[0:3].detach().cpu())))
     table = wandb.Table(data=table,
                         columns=['epoch', 'label', 'pred', 'image'])
     pl_module.logger.experiment.log({'validation_sample': table})
     self.epoch += 1
     self.confusion_matrix = ConfusionMatrix(self.num_classes).to(
         pl_module.device)
     self.time_to_sample = True
Exemple #2
0
 def log_examples(e_in, d_in):
     enc_input = e_in.transpose(0, 2)[0].detach().cpu().numpy()
     dec_input = d_in.transpose(0, 2)[0].detach().cpu().numpy()
     columns = [
         "Encoder Input: " + str(enc_input.shape),
         "Decoder Input: " + str(dec_input.shape)
     ]
     inputs = (enc_input, dec_input)
     table = wandb.Table(columns=columns)
     table.add_data(*inputs)
     wandb.log({"Inputs": table})
Exemple #3
0
def test_table_logging(mocked_run, live_mock_server, test_settings, api):
    run = wandb.init(settings=test_settings)
    run.log(
        {
            "logged_table": wandb.Table(
                columns=["a"], data=[[wandb.Image(np.ones(shape=(32, 32)))]],
            )
        }
    )
    run.finish()
    assert True
Exemple #4
0
def test(model, rank, test_loader):
    model.eval()

    # define ``wandb`` tabular columns and hooks into the model to collect gradients and the topology
    columns = [
        "id", "image", "guess", "truth", *[f"score_{i}" for i in range(10)]
    ]
    if rank == 0:
        my_table = wandb.Table(columns=columns)
        wandb.watch(model)

    test_loss = 0
    correct = 0
    log_counter = 0

    # disable gradient
    with torch.no_grad():

        # loop through the test data loader
        total = 0.
        for images, targets in test_loader:
            total += len(targets)
            images, targets = images.to(rank), targets.to(
                rank)  # device conversion
            outputs = model(images)  # forward pass -- generate predictions
            test_loss += F.nll_loss(
                outputs, targets, reduction="sum").item()  # sum up batch loss
            _, predicted = torch.max(
                outputs.data, 1)  # get the index of the max log-probability
            correct += (predicted == targets
                        ).sum().item()  # compare predictions to true label

            # log predictions to the ``wandb`` table
            if log_counter < NUM_BATCHES_TO_LOG:
                if rank == 0:
                    log_test_predictions(images, targets, outputs, predicted,
                                         my_table, log_counter)
                log_counter += 1

    # compute the average loss
    test_loss /= total
    accuracy = float(correct) / total

    if rank == 0:
        print("\ntest_loss={:.4f}\naccuracy={:.4f}\n".format(
            test_loss, accuracy))
        # log the average loss, accuracy, and table
        wandb.log({
            "test_loss": test_loss,
            "accuracy": accuracy,
            "mnist_predictions": my_table
        })

    return accuracy
Exemple #5
0
 def _log_feature_importance(self, model):
     fi = model.get_score(importance_type=self.importance_type)
     fi_data = [[k, fi[k]] for k in fi]
     table = wandb.Table(data=fi_data, columns=["Feature", "Importance"])
     wandb.log({
         "Feature Importance":
         wandb.plot.bar(table,
                        "Feature",
                        "Importance",
                        title="Feature Importance")
     })
Exemple #6
0
def confusion_matrix(preds=None, y_true=None, class_names=None):
    """
    Computes a multi-run confusion matrix.

    Arguments:
    preds (arr): Array of predicted label indices.
    y_true (arr): Array of label indices.
    class_names (arr): Array of class names.

    Returns:
    Nothing. To see plots, go to your W&B run page then expand the 'media' tab
    under 'auto visualizations'.

    Example:
    wandb.log({'pr': wandb.plot.confusion_matrix(preds, y_true, labels)})
    """

    np = util.get_module(
        "numpy",
        required=
        "confusion matrix requires the numpy library, install with `pip install numpy`",
    )
    assert len(preds) == len(
        y_true), "Number of predictions and label indices must match"
    if class_names is not None:
        n_classes = len(class_names)
        assert max(preds) <= len(
            class_names), "Higher predicted index than number of classes"
        assert max(y_true) <= len(
            class_names), "Higher label class index than number of classes"
    else:
        n_classes = max(max(preds), max(y_true))
        class_names = ["Class_{}".format(i) for i in range(1, n_classes + 1)]

    counts = np.zeros((n_classes, n_classes))
    for i in range(len(preds)):
        counts[y_true[i], preds[i]] += 1

    data = []
    for i in range(n_classes):
        data.extend([class_names[i], class_names[j], counts[i, j]]
                    for j in range(n_classes))

    fields = {
        "Actual": "Actual",
        "Predicted": "Predicted",
        "nPredicted": "Count"
    }

    return wandb.plot_table(
        "wandb/confusion_matrix/v0",
        wandb.Table(columns=["Actual", "Predicted", "Count"], data=data),
        fields,
    )
Exemple #7
0
 def end_epoch(self, best_result=False):
     if self.wandb_run:
         wandb.log(self.log_dict)
         self.log_dict = {}
         if self.result_artifact:
             train_results = wandb.JoinedTable(self.val_table, self.result_table, "id")
             self.result_artifact.add(train_results, 'result')
             wandb.log_artifact(self.result_artifact, aliases=['latest', 'epoch ' + str(self.current_epoch),
                                                               ('best' if best_result else '')])
             self.result_table = wandb.Table(["epoch", "id", "prediction", "avg_confidence"])
             self.result_artifact = wandb.Artifact("run_" + wandb.run.id + "_progress", "evaluation")
def wandb_table(txt_file, epoch=0, num_classes=8, fold_index=None):

    metrics, data, auc, balanced_accuracy = _log_output(txt_file, num_classes)

    # ipdb.set_trace()
    if fold_index is None:
        wandb.log({
            f"Evaluation Metrics":
            wandb.Table(data=data, columns=metrics),
            'epoch':
            epoch
        })
    else:
        wandb.log({
            f"Evaluation Metrics - Fold {fold_index}":
            wandb.Table(data=data, columns=metrics),
            'epoch':
            epoch
        })

    return auc, balanced_accuracy
Exemple #9
0
    def setup_training(self, opt):
        """
        Setup the necessary processes for training YOLO models:
          - Attempt to download model checkpoint and dataset artifacts if opt.resume stats with WANDB_ARTIFACT_PREFIX
          - Update data_dict, to contain info of previous run if resumed and the paths of dataset artifact if downloaded
          - Setup log_dict, initialize bbox_interval

        arguments:
        opt (namespace) -- commandline arguments for this run

        """
        self.log_dict, self.current_epoch = {}, 0
        self.bbox_interval = opt.bbox_interval
        if isinstance(opt.resume, str):
            modeldir, _ = self.download_model_artifact(opt)
            if modeldir:
                self.weights = Path(modeldir) / "last.pt"
                config = self.wandb_run.config
                opt.weights, opt.save_period, opt.batch_size, opt.bbox_interval, opt.epochs, opt.hyp, opt.imgsz = str(
                    self.weights), config.save_period, config.batch_size, config.bbox_interval, config.epochs,\
                    config.hyp, config.imgsz
        data_dict = self.data_dict
        if self.val_artifact is None:  # If --upload_dataset is set, use the existing artifact, don't download
            self.train_artifact_path, self.train_artifact = self.download_dataset_artifact(
                data_dict.get('train'), opt.artifact_alias)
            self.val_artifact_path, self.val_artifact = self.download_dataset_artifact(
                data_dict.get('val'), opt.artifact_alias)

        if self.train_artifact_path is not None:
            train_path = Path(self.train_artifact_path) / 'data/images/'
            data_dict['train'] = str(train_path)
        if self.val_artifact_path is not None:
            val_path = Path(self.val_artifact_path) / 'data/images/'
            data_dict['val'] = str(val_path)

        if self.val_artifact is not None:
            self.result_artifact = wandb.Artifact(
                "run_" + wandb.run.id + "_progress", "evaluation")
            columns = ["epoch", "id", "ground truth", "prediction"]
            columns.extend(self.data_dict['names'])
            self.result_table = wandb.Table(columns)
            self.val_table = self.val_artifact.get("val")
            if self.val_table_path_map is None:
                self.map_val_table_path()
        if opt.bbox_interval == -1:
            self.bbox_interval = opt.bbox_interval = (
                opt.epochs // 10) if opt.epochs > 10 else 1
            if opt.evolve:
                self.bbox_interval = opt.bbox_interval = opt.epochs + 1
        train_from_artifact = self.train_artifact_path is not None and self.val_artifact_path is not None
        # Update the the data_dict to point to local artifacts dir
        if train_from_artifact:
            self.data_dict = data_dict
def plot_heat_map(y_true, y_pred, num_labels):
    cm = confusion_matrix(y_true, y_pred)
    columns: List[str] = [label_mapping[i] for i in range(num_labels)]
    df = pd.DataFrame(cm, index=columns, columns=columns)
    df.to_csv("eval_cm.csv")

    fig = sns.heatmap(df, annot=True, fmt='d', cmap="Reds")

    wandb.log({
        "confusion_matrix_plot": wandb.Image(fig),
        "confusion_matrix": wandb.Table(dataframe=df)
    })
Exemple #11
0
def test_image_reference_with_preferred_path():
    orig_im_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                "..", "assets", "test.png")
    orig_im_path_2 = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                  "..", "assets", "test2.png")
    desired_artifact_path = "images/sample.png"
    with wandb.init() as run:
        artifact = wandb.Artifact("artifact_1", type="test_artifact")
        # manually add the image to a desired path
        artifact.add_file(orig_im_path, desired_artifact_path)
        # create an image that uses this image (it should be smart enough not to add the image twice)
        image = wandb.Image(orig_im_path)
        image_2 = wandb.Image(
            orig_im_path_2)  # this one does not have the path preadded
        # add the image to the table
        table = wandb.Table(["image"], data=[[image], [image_2]])
        # add the table to the artifact
        artifact.add(table, "table")
        run.log_artifact(artifact)

    _cleanup()
    with wandb.init() as run:
        artifact_1 = run.use_artifact("artifact_1:latest")
        original_table = artifact_1.get("table")

        artifact = wandb.Artifact("artifact_2", type="test_artifact")

        # add the image by reference
        image = wandb.Image(original_table.data[0][0])
        image_2 = wandb.Image(original_table.data[1][0])
        # add the image to the table
        table = wandb.Table(["image"], data=[[image], [image_2]])
        # add the table to the artifact
        artifact.add(table, "table")
        run.log_artifact(artifact)

    _cleanup()
    with wandb.init() as run:
        artifact_2 = run.use_artifact("artifact_2:latest")
        artifact_2.download()
Exemple #12
0
def report(model, wandb_logger):
    #https://donatstudios.com/CsvToMarkdownTable

    model.eval()
    model = model.cpu()
    y_pred = []
    y_true = []

    for x, y in model.val_dataloader():

        res = torch.max(F.softmax(model(x), dim=1), 1)[1].numpy()
        y_pred.extend(res)
        y_true.extend(y.numpy())
        break

    unique_label = np.unique([y_true, y_pred])
    cmtx = pd.DataFrame(confusion_matrix(y_true, y_pred, labels=unique_label),
                        index=['true:{:}'.format(x) for x in unique_label],
                        columns=['pred:{:}'.format(x) for x in unique_label])

    report = pd.DataFrame(
        classification_report(y_true, y_pred, output_dict=True))

    wreport = []
    tmp = [str(item) for item in report.values[0]]
    tmp.insert(0, 'precision')
    wreport.append(tmp)
    tmp = [str(item) for item in report.values[1]]
    tmp.insert(0, 'recall')
    wreport.append(tmp)
    tmp = [str(item) for item in report.values[2]]
    tmp.insert(0, 'f1-score')
    wreport.append(tmp)
    tmp = [str(item) for item in report.values[3]]
    tmp.insert(0, 'support')
    wreport.append(tmp)

    print(report, cmtx)

    hreport = report.columns
    hreport = hreport.insert(0, '')

    if wandb_logger:
        wandb_logger.log_metrics({
            'confusion_matrix':
            wandb.plots.HeatMap(unique_label,
                                unique_label,
                                cmtx.values,
                                show_text=True),
            'classification_report':
            wandb.Table(data=wreport, columns=hreport.values)
        })
Exemple #13
0
def test(model, device, test_loader):
    # ``wandb`` tabular columns
    columns = ["id", "image", "guess", "truth"]
    for digit in range(10):
        columns.append("score_" + str(digit))
    my_table = wandb.Table(columns=columns)

    model.eval()

    # hooks into the model to collect gradients and the topology
    wandb.watch(model)

    test_loss = 0
    correct = 0
    log_counter = 0

    # disable gradient
    with torch.no_grad():

        # loop through the test data loader
        for images, targets in test_loader:
            images, targets = images.to(device), targets.to(
                device)  # device conversion
            outputs = model(images)  # forward pass -- generate predictions
            test_loss += F.nll_loss(
                outputs, targets, reduction="sum").item()  # sum up batch loss
            _, predicted = torch.max(
                outputs.data, 1)  # get the index of the max log-probability
            correct += ((predicted == targets).sum().item()
                        )  # compare predictions to true label

            # log predictions to the ``wandb`` table
            if log_counter < NUM_BATCHES_TO_LOG:
                log_test_predictions(images, targets, outputs, predicted,
                                     my_table, log_counter)
                log_counter += 1

    # compute the average loss
    test_loss /= len(test_loader.dataset)

    print("\naccuracy={:.4f}\n".format(
        float(correct) / len(test_loader.dataset)))
    accuracy = float(correct) / len(test_loader.dataset)

    # log the average loss, accuracy, and table
    wandb.log({
        "test_loss": test_loss,
        "accuracy": accuracy,
        "mnist_predictions": my_table
    })

    return accuracy
Exemple #14
0
def evaluate_and_print_metrics(
    predicted_path: str,
    gold_path: str,
    language='ru',
    max_count=None,
    is_multiple_ref=False,
    detokenize_after=False,
    tokenize_after=False,
    lower=False,
    are_clusters_used=False,
):
    hyps = []
    refs = []

    table = wandb.Table(columns=[
        'Reference', 'Prediction', 'Reference processed',
        'Prediction processed'
    ])

    with open(gold_path, "r") as gold, open(predicted_path, "r") as pred:
        for i, (ref, hyp) in enumerate(zip(gold, pred)):

            if i % 500 == 0:
                ref_before = ref
                hyp_before = hyp

            if max_count is not None and i >= max_count:
                break

            ref, hyp = postprocess(ref, hyp, language, is_multiple_ref,
                                   detokenize_after, tokenize_after, lower)
            if not hyp:
                print("Empty hyp for ref: ", ref)
                continue
            if not ref:
                continue

            if i % 500 == 0:
                table.add_data(ref_before, hyp_before, ref, hyp)

            refs.append(ref)
            hyps.append(hyp)

    if are_clusters_used:
        wandb.run.summary.update({'Examples with multiple references': table})
    else:
        wandb.run.summary.update({'Examples': table})

    print_metrics(refs,
                  hyps,
                  language=language,
                  are_clusters_used=are_clusters_used)
Exemple #15
0
def line_series(xs, ys, keys=None, title=None, xname=None):
    """
    Construct a line series plot.

    Arguments:
        xs (array of arrays, or array): Array of arrays of x values
        ys (array of arrays): Array of y values
        title (string): Plot title.
        xname: Title of x-axis

    Returns:
        A plot object, to be passed to wandb.log()

    Example:
        ```
        When logging a singular array for x, all ys are plotted against that x
        x = [i for i in range(10)]
        ys = [
            [i for i in range(10)],
            [i**2 for i in range(10)]
        ]
        wandb.log({'line-series-plot1': wandb.plot.line_series(x, ys, "title", "step")})

        xs can also contain an array of arrays for having different steps for each metric
        xs = [[i for i in range(10)], [2*i for i in range(10)]]
        ys = [
            [i for i in range(10)],
            [i**2 for i in range(10)]
        ]
        wandb.log({'line-series-plot1': wandb.plot.line_series(xs, ys, "title", "step")})
        ```
    """
    data = []
    if not isinstance(xs[0], Sequence):
        xs = [xs for _ in range(len(ys))]
    assert len(xs) == len(ys), "Number of x-lines and y-lines must match"
    for i, series in enumerate([list(zip(xs[i], ys[i])) for i in range(len(xs))]):
        for x, y in series:
            if keys is None:
                key = "key_{}".format(i)
            else:
                key = keys[i]
            data.append([x, key, y])

    table = wandb.Table(data=data, columns=["step", "lineKey", "lineVal"])

    return wandb.plot_table(
        "wandb/lineseries/v0",
        table,
        {"step": "step", "lineKey": "lineKey", "lineVal": "lineVal"},
        {"title": title, "xname": xname or "x"},
    )
Exemple #16
0
def test_table_slice_reference_artifact():
    with wandb.init(project=WANDB_PROJECT) as run:
        artifact = wandb.Artifact("table_data", "data")
        table = _make_wandb_table()
        artifact.add(table, "table")
        run.log_artifact(artifact)

    with wandb.init(project=WANDB_PROJECT) as run:
        artifact_1 = run.use_artifact("table_data:latest")
        t1 = artifact_1.get("table")
        artifact = wandb.Artifact("intermediate_data", "data")
        i1 = wandb.Table(t1.columns, t1.data[:1])
        i2 = wandb.Table(t1.columns, t1.data[1:])
        artifact.add(i1, "table1")
        artifact.add(i2, "table2")
        run.log_artifact(artifact)

    with wandb.init(project=WANDB_PROJECT) as run:
        artifact_2 = run.use_artifact("intermediate_data:latest")
        i1 = artifact_2.get("table1")
        i2 = artifact_2.get("table2")
        artifact = wandb.Artifact("reference_data", "data")
        table1 = wandb.Table(t1.columns, i1.data)
        table2 = wandb.Table(t1.columns, i2.data)
        artifact.add(table1, "table1")
        artifact.add(table2, "table2")
        run.log_artifact(artifact)

    _cleanup()
    with wandb.init(project=WANDB_PROJECT) as run:
        artifact_3 = run.use_artifact("reference_data:latest")
        table1 = artifact_3.get("table1")
        table2 = artifact_3.get("table2")

    assert not os.path.isdir(os.path.join(artifact_2._default_root()))
    # assert os.path.islink(os.path.join(artifact_3._default_root(), "media", "images", "test.png"))
    # assert os.path.islink(os.path.join(artifact_3._default_root(), "media", "images", "test2.png"))
    assert t1.data[:1] == table1.data
    assert t1.data[1:] == table2.data
def test_add_table_from_dataframe(runner, live_mock_server, test_settings):
    with runner.isolated_filesystem():
        import pandas as pd

        df_float = pd.DataFrame([[1, 2.0, 3.0]], dtype=np.float)
        df_float32 = pd.DataFrame([[1, 2.0, 3.0]], dtype=np.float32)
        df_bool = pd.DataFrame([[True, False, True]], dtype=np.bool)

        wb_table_float = wandb.Table(dataframe=df_float)
        wb_table_float32 = wandb.Table(dataframe=df_float32)
        wb_table_float32_recast = wandb.Table(
            dataframe=df_float32.astype(np.float))
        wb_table_bool = wandb.Table(dataframe=df_bool)

        run = wandb.init(settings=test_settings)
        artifact = wandb.Artifact("table-example", "dataset")
        artifact.add(wb_table_float, "wb_table_float")
        artifact.add(wb_table_float32_recast, "wb_table_float32_recast")
        artifact.add(wb_table_float32, "wb_table_float32")
        artifact.add(wb_table_bool, "wb_table_bool")
        run.log_artifact(artifact)
        run.finish()
Exemple #18
0
    def log_predictions(
        self,
        predictions,
        prediction_col_name = "output",
        val_ndx_col_name = "val_row",
        table_name = "validation_predictions",
        commit = False,
    ):
        """Logs a set of predictions.

        Intended usage:

        vl.log_predictions(vl.make_predictions(self.model.predict))

        Args:
            predictions (Sequence | Dict[str, Sequence]): A list of prediction vectors or dictionary
                of lists of prediction vectors
            prediction_col_name (str, optional): the name of the prediction column. Defaults to "output".
            val_ndx_col_name (str, optional): The name of the column linking prediction table
                to the validation ata table. Defaults to "val_row".
            table_name (str, optional): name of the prediction table. Defaults to "validation_predictions".
            commit (bool, optional): determines if commit should be called on the logged data. Defaults to False.
        """
        if self.local_validation_artifact is not None:
            self.local_validation_artifact.wait()

        pred_table = wandb.Table(columns=[], data=[])
        if isinstance(predictions, dict):
            for col_name in predictions:
                pred_table.add_column(col_name, predictions[col_name])
        else:
            pred_table.add_column(prediction_col_name, predictions)
        pred_table.add_column(val_ndx_col_name, self.validation_indexes)

        if self.prediction_row_processor is None and self.infer_missing_processors:
            example_prediction = _make_example(predictions)
            example_input = _make_example(self.validation_inputs)
            if example_prediction is not None and example_input is not None:
                self.prediction_row_processor = _infer_prediction_row_processor(
                    example_prediction,
                    example_input,
                    self.class_labels_table,
                    self.input_col_name,
                    prediction_col_name,
                )

        if self.prediction_row_processor is not None:
            pred_table.add_computed_columns(self.prediction_row_processor)

        wandb.log({table_name: pred_table})
        return pred_table
def main(argv):
    # test to ensure
    run = wandb.init()
    run_project = run.project
    run_id = run.id
    print("Started run {}/{}".format(run_project, run_id))

    try:
        os.makedirs('./chdir_test')
    except Exception as e:
        pass

    os.chdir('./chdir_test')
    # log some table data, which is saved in the media folder
    pr_data = [['setosa', 1.0, 1.0], ['setosa', 1.0,
                                      1.0], ['setosa', 1.0, 1.0],
               ['setosa', 1.0, 1.0], ['setosa', 1.0,
                                      1.0], ['setosa', 1.0, 1.0],
               ['setosa', 1.0, 1.0], ['setosa', 1.0,
                                      1.0], ['setosa', 1.0, 1.0],
               ['setosa', 1.0, 1.0], ['setosa', 1.0,
                                      0.0], ['setosa', 1.0, 0.0],
               ['setosa', 1.0, 0.0], ['setosa', 1.0,
                                      0.0], ['setosa', 1.0, 0.0],
               ['setosa', 1.0, 0.0], ['setosa', 1.0,
                                      0.0], ['setosa', 1.0, 0.0],
               ['setosa', 1.0, 0.0], ['setosa', 1.0, 0.0],
               ['versicolor', 1.0, 1.0], ['versicolor', 1.0, 1.0],
               ['versicolor', 1.0, 1.0], ['versicolor', 1.0, 1.0],
               ['versicolor', 1.0, 1.0], ['versicolor', 1.0, 1.0],
               ['versicolor', 1.0, 1.0], ['versicolor', 1.0, 1.0],
               ['versicolor', 1.0, 1.0], ['versicolor', 1.0, 1.0],
               ['versicolor', 1.0, 0.0], ['versicolor', 1.0, 0.0],
               ['versicolor', 1.0, 0.0], ['versicolor', 1.0, 0.0],
               ['versicolor', 1.0, 0.0], ['versicolor', 1.0, 0.0],
               ['versicolor', 1.0, 0.0], ['versicolor', 1.0, 0.0],
               ['versicolor', 1.0, 0.0], ['versicolor', 1.0, 0.0]]

    # convert the data to a table
    pr_table = wandb.Table(data=pr_data,
                           columns=["class", "precision", "recall"])
    wandb.log({'pr_table': pr_table})
    wandb.finish()

    # Check results
    api = wandb.Api()
    last_run = api.run("%s/%s" % (run_project, run_id))
    media_path = last_run.summary_metrics["pr_table"]["path"]
    media_file = last_run.file(media_path)
    assert media_file.size > 0
    print("Success")
Exemple #20
0
def upload_history_to_wandb(history):
    """Convenience function to upload a Keras history to W&B

    Parameters
    ----------
    history : tf.keras.callbacks.History
        History object obtained from training
    """
    # Turn into df
    history_df = pd.DataFrame.from_dict(history.history)
    # Turn into wandb Table
    history_table = wandb.Table(dataframe=history_df)
    # Log
    wandb.log({"history": history_table})
Exemple #21
0
    def validation_end(self, outputs):
        val_loss_mean = torch.stack([x['val_loss'] for x in outputs]).mean()
        val_ppl = torch.exp(val_loss_mean)
        adjusted_val_loss = val_loss_mean * \
            ((self.val_dataset.n_tokens - 1) /
             (self.val_dataset.n_original_tokens - 1))
        adjusted_val_ppl = torch.exp(adjusted_val_loss)

        if self.args.accelerator != "TPU":
            device = torch.device(
                "cuda" if torch.cuda.is_available() else "cpu")

            prompt = torch.tensor(self.tokenizer.encode(
                "<|endoftext|> ")).unsqueeze(0).to(device)
            outputs = self.model.generate(
                input_ids=prompt,
                max_length=self.args.sample_len,
                temperature=self.args.temperature,
                top_k=self.args.top_k,
                top_p=self.args.top_p,
                repetition_penalty=self.args.repetition_penalty,
                num_return_sequences=1)
            outputs = self.tokenizer.decode(outputs[0].cpu().numpy(),
                                            skip_special_tokens=True)
            print("\nSampling:")
            print(outputs)
            print("\n")

            self.table_data.append([f'{self.trainer.current_epoch}', outputs])

        metrics = {
            'epoch': self.trainer.current_epoch,
            'val_loss': val_loss_mean,
            'val_ppl': val_ppl,
            'adjusted_val_ppl': adjusted_val_ppl,
            "log": {
                'epoch':
                self.trainer.current_epoch,
                'val_loss':
                val_loss_mean,
                'val_ppl':
                val_ppl,
                'adjusted_val_ppl':
                adjusted_val_ppl,
                "samples":
                wandb.Table(columns=['Epoch', 'Text'], data=self.table_data)
            }
        }

        return metrics
def _make_wandb_table():
    return wandb.Table(
        columns=columns,
        data=[
            ["string", True, 1, 1.4,
             _make_wandb_image()],
            ["string", True, 1, 1.4,
             _make_wandb_image()],
            ["string2", False, -0, -1.4,
             _make_wandb_image("2")],
            ["string2", False, -0, -1.4,
             _make_wandb_image("2")],
        ],
    )
Exemple #23
0
def test_reference_table_artifacts(mocked_run, live_mock_server, test_settings, api):
    live_mock_server.set_ctx({"max_cli_version": "0.11.0"})
    run = wandb.init(settings=test_settings)
    t = wandb.Table(columns=["a"], data=[[wandb.Image(np.ones(shape=(32, 32)))]],)

    art = wandb.Artifact("A", "dataset")
    art.add(t, "table")
    run.log_artifact(art)
    art = wandb.Artifact("A", "dataset")
    art.add(t, "table")
    run.log_artifact(art)

    run.finish()
    assert True
Exemple #24
0
def exp_generative_train(train_file,
                         val_file,
                         test_file,
                         reconstruct_strategy,
                         max_len,
                         epochs,
                         results_dir,
                         model_conf_params,
                         n_pretrain_steps=50,
                         batch_size=256,
                         lr=0.0005,
                         betas=(0.5, 0.999)):
    wandb.init(project="generative train")
    if not os.path.exists(results_dir):
        os.mkdir(results_dir)
    training_log_dir = os.path.join(results_dir, 'training/')
    if not os.path.exists(training_log_dir):
        os.mkdir(training_log_dir)

    vocab = my_vocab.Vocab()
    vocab.build_from_formula_file(train_file)
    vocab.write_vocab_to_file(os.path.join(results_dir, 'vocab.txt'))
    device = torch.device('cuda')
    train_batches, _ = my_batch_builder.build_ordered_batches(
        train_file, vocab, batch_size, device)
    valid_batches, _ = my_batch_builder.build_ordered_batches(
        val_file, vocab, batch_size, device)

    model_params = my_model.ModelParams(vocab=vocab,
                                        vocab_size=vocab.size(),
                                        device=device,
                                        **model_conf_params)
    model = my_model.FormulaVARE(model_params)
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=betas)
    n_formulas_to_sample = 2000
    use_for_train_fraction = 0.2
    table = wandb.Table(columns=[
        "max_len", "epochs", "batch_size", "learning_rate",
        "n_formulas_sampled", "chosen_for_train_fraction", "n_pretrain_steps"
    ])
    table.add_data(max_len, epochs, batch_size, lr, n_formulas_to_sample,
                   use_for_train_fraction, n_pretrain_steps)
    wandb.log({'configs': table})
    my_generative_train.generative_train(model, vocab, optimizer, epochs,
                                         device, batch_size,
                                         n_formulas_to_sample, 'sample',
                                         max_len, use_for_train_fraction,
                                         n_pretrain_steps, train_batches,
                                         valid_batches)
Exemple #25
0
 def wandb_log_gen_obs(self, outputs: List[List[List[str]]],
                       table_title: str) -> None:
     flat_outputs = [item for sublist in outputs for item in sublist]
     data = (
         random.sample(flat_outputs,
                       self.hparams.sample_k_gen_obs)  # type: ignore
         if
         len(flat_outputs) >= self.hparams.sample_k_gen_obs  # type: ignore
         else flat_outputs)
     self.logger.experiment.log({
         table_title:
         wandb.Table(data=data,
                     columns=["Groundtruth", "Predicted", "Decoded"])
     })
Exemple #26
0
def _log_feature_importance(model: "Booster") -> None:
    """Log feature importance."""
    feat_imps = model.feature_importance()
    feats = model.feature_name()
    fi_data = [[feat, feat_imp] for feat, feat_imp in zip(feats, feat_imps)]
    table = wandb.Table(data=fi_data, columns=["Feature", "Importance"])
    wandb.log(
        {
            "Feature Importance":
            wandb.plot.bar(
                table, "Feature", "Importance", title="Feature Importance")
        },
        commit=False,
    )
Exemple #27
0
 def get(self):
     clz_name, clz_ap = self._metric.get()
     table = [['Class', 'AP']] + list(zip(clz_name, clz_ap))
     table = AsciiTable(table)
     table.justify_columns[1] = 'right'
     if self._log_flag:
         logging.info('\n' + table.table)
         if wandb.run:
             headers = table.table_data[0]
             data = table.table_data[1:-1]
             wandb_table = wandb.Table(columns=headers, data=data)
             wandb.log({'mAP': clz_ap[-1], 'APs': wandb_table}, commit=False)
         self._log_flag = False
     return clz_name[-1], clz_ap[-1]
Exemple #28
0
def test_table_explicit_types():
    table = wandb.Table(columns=["a", "b"], dtype=int)
    table.add_data(None, None)
    table.add_data(1, 2)
    with pytest.raises(TypeError):
        table.add_data(1, "a")

    table = wandb.Table(columns=["a", "b"], optional=False, dtype=[int, str])
    with pytest.raises(TypeError):
        table.add_data(None, None)
    table.add_data(1, "a")
    with pytest.raises(TypeError):
        table.add_data("a", "a")

    table = wandb.Table(columns=["a", "b"], optional=[False, True], dtype=[int, str])
    with pytest.raises(TypeError):
        table.add_data(None, None)
    with pytest.raises(TypeError):
        table.add_data(None, "a")
    table.add_data(1, None)
    table.add_data(1, "a")
    with pytest.raises(TypeError):
        table.add_data("a", "a")
 def get_classification_report(self):
     table = wandb.Table(columns=[
         "class", "accuracy", "precision", "recall", "f1-score", "support"
     ])
     accuracies = self.confusion_matrix.astype(
         "float") / self.confusion_matrix.sum(axis=1)[:, np.newaxis]
     accuracies = accuracies.diagonal()
     for i in range(len(self.classes)):
         table.add_data(self.classes[i], accuracies[i],
                        self.classification_report[str(i)]['precision'],
                        self.classification_report[str(i)]['recall'],
                        self.classification_report[str(i)]['f1-score'],
                        self.classification_report[str(i)]['support'])
     return table
Exemple #30
0
 def create_dataset_table(self, dataset, class_to_id, name='dataset'):
     # TODO: Explore multiprocessing to slpit this loop parallely| This is essential for speeding up the the logging
     artifact = wandb.Artifact(name=name, type="dataset")
     img_files = tqdm([dataset.path]) if isinstance(
         dataset.path, str) and Path(dataset.path).is_dir() else None
     img_files = tqdm(dataset.img_files) if not img_files else img_files
     for img_file in img_files:
         if Path(img_file).is_dir():
             artifact.add_dir(img_file, name='data/images')
             labels_path = 'labels'.join(dataset.path.rsplit('images', 1))
             artifact.add_dir(labels_path, name='data/labels')
         else:
             artifact.add_file(img_file,
                               name='data/images/' + Path(img_file).name)
             label_file = Path(img2label_paths([img_file])[0])
             artifact.add_file(
                 str(label_file), name='data/labels/' +
                 label_file.name) if label_file.exists() else None
     table = wandb.Table(columns=["id", "train_image", "Classes", "name"])
     class_set = wandb.Classes([{
         'id': id,
         'name': name
     } for id, name in class_to_id.items()])
     for si, (img, labels, paths, shapes) in enumerate(tqdm(dataset)):
         box_data, img_classes = [], {}
         for cls, *xywh in labels[:, 1:].tolist():
             cls = int(cls)
             box_data.append({
                 "position": {
                     "middle": [xywh[0], xywh[1]],
                     "width": xywh[2],
                     "height": xywh[3]
                 },
                 "class_id": cls,
                 "box_caption": "%s" % (class_to_id[cls])
             })
             img_classes[cls] = class_to_id[cls]
         boxes = {
             "ground_truth": {
                 "box_data": box_data,
                 "class_labels": class_to_id
             }
         }  # inference-space
         table.add_data(si,
                        wandb.Image(paths, classes=class_set, boxes=boxes),
                        list(img_classes.values()),
                        Path(paths).name)
     artifact.add(table, name)
     return artifact