Example #1
0
def make_grad_attn_viz(grads, attention, threshold=0.005):
    """
    Plots gradients, shows
    :param grads: list of gradient values length T
    :param attention: TxT matrix of attention weights
    :return:
    """
    data = []
    T = attention.shape[0]

    for i in range(T):
        for j in range(T):
            data.append([i, j, grads[j], attention[i, j]])
    data_table = wandb.Table(
        data=data, columns=['source_step', 'target_step', 'grad', 'attn'])
    fields_map = {
        "source step": "source_step",
        "target step": "target_step",
        "grad": "grad",
        "attn": "attn"
    }
    return wandb.plot_table(
        vega_spec_name="kylegoyette/loss-gradient-attention-propagation",
        data_table=data_table,
        fields=fields_map)
Example #2
0
def bar(table, label, value, title=None):
    """
    Construct a bar plot.

    Arguments:
    table (wandb.Table): Table of data.
    label (string): Name of column to use as each bar's label.
    value (string): Name of column to use as each bar's value.
    title (string): Plot title.

    Returns:
    A plot object, to be passed to wandb.log()

    Example:
    table = wandb.Table(data=[
        ['car', random.random()],
        ['bus', random.random()],
        ['road', random.random()],
        ['person', random.random()],
        ], columns=["class", "acc"])
    wandb.log({'bar-plot1': wandb.plot.bar(table, "class", "acc")})
    """
    return wandb.plot_table("wandb/bar/v0", table, {
        "label": label,
        "value": value
    }, {"title": title})
Example #3
0
def confusion_matrix(preds=None, y_true=None, class_names=None):
    """
    Computes a multi-run confusion matrix.

    Arguments:
    preds (arr): Array of predicted label indices.
    y_true (arr): Array of label indices.
    class_names (arr): Array of class names.

    Returns:
    Nothing. To see plots, go to your W&B run page then expand the 'media' tab
    under 'auto visualizations'.

    Example:
    wandb.log({'pr': wandb.plot.confusion_matrix(preds, y_true, labels)})
    """

    np = util.get_module(
        "numpy",
        required=
        "confusion matrix requires the numpy library, install with `pip install numpy`",
    )
    assert len(preds) == len(
        y_true), "Number of predictions and label indices must match"
    if class_names is not None:
        n_classes = len(class_names)
        assert max(preds) <= len(
            class_names), "Higher predicted index than number of classes"
        assert max(y_true) <= len(
            class_names), "Higher label class index than number of classes"
    else:
        n_classes = max(max(preds), max(y_true))
        class_names = ["Class_{}".format(i) for i in range(1, n_classes + 1)]

    counts = np.zeros((n_classes, n_classes))
    for i in range(len(preds)):
        counts[y_true[i], preds[i]] += 1

    data = []
    for i in range(n_classes):
        data.extend([class_names[i], class_names[j], counts[i, j]]
                    for j in range(n_classes))

    fields = {
        "Actual": "Actual",
        "Predicted": "Predicted",
        "nPredicted": "Count"
    }

    return wandb.plot_table(
        "wandb/confusion_matrix/v0",
        wandb.Table(columns=["Actual", "Predicted", "Count"], data=data),
        fields,
    )
Example #4
0
def line_series(xs, ys, keys=None, title=None, xname=None):
    """
    Construct a line series plot.

    Arguments:
        xs (array of arrays, or array): Array of arrays of x values
        ys (array of arrays): Array of y values
        title (string): Plot title.
        xname: Title of x-axis

    Returns:
        A plot object, to be passed to wandb.log()

    Example:
        ```
        When logging a singular array for x, all ys are plotted against that x
        x = [i for i in range(10)]
        ys = [
            [i for i in range(10)],
            [i**2 for i in range(10)]
        ]
        wandb.log({'line-series-plot1': wandb.plot.line_series(x, ys, "title", "step")})

        xs can also contain an array of arrays for having different steps for each metric
        xs = [[i for i in range(10)], [2*i for i in range(10)]]
        ys = [
            [i for i in range(10)],
            [i**2 for i in range(10)]
        ]
        wandb.log({'line-series-plot1': wandb.plot.line_series(xs, ys, "title", "step")})
        ```
    """
    data = []
    if not isinstance(xs[0], Sequence):
        xs = [xs for _ in range(len(ys))]
    assert len(xs) == len(ys), "Number of x-lines and y-lines must match"
    for i, series in enumerate([list(zip(xs[i], ys[i])) for i in range(len(xs))]):
        for x, y in series:
            if keys is None:
                key = "key_{}".format(i)
            else:
                key = keys[i]
            data.append([x, key, y])

    table = wandb.Table(data=data, columns=["step", "lineKey", "lineVal"])

    return wandb.plot_table(
        "wandb/lineseries/v0",
        table,
        {"step": "step", "lineKey": "lineKey", "lineVal": "lineVal"},
        {"title": title, "xname": xname or "x"},
    )
Example #5
0
def histogram(table, value, title=None):
    """
    Construct a histogram plot.

    Arguments:
    table (wandb.Table): Table of data.
    label (string): Name of column to use as data for bucketing.
    title (string): Plot title.

    Returns:
    A plot object, to be passed to wandb.log()

    Example:
    data = [[i, random.random() + math.sin(i / 10)] for i in range(100)]
    table = wandb.Table(data=data, columns=["step", "height"])
    wandb.log({'histogram-plot1': wandb.plot.histogram(table, "height")})
    """
    return wandb.plot_table('wandb/histogram/v0', table, {'value': value},
                            {'title': title})
Example #6
0
def line_series(xs, ys, keys=None, title=None, xname=None):
    data = []
    if not isinstance(xs[0], Sequence):
        xs = [xs for _ in range(len(ys))]
    assert len(xs) == len(ys), "Number of x-lines and y-lines must match"
    for i, series in enumerate([list(zip(xs[i], ys[i])) for i in range(len(xs))]):
        for x, y in series:
            if keys is None:
                key = "key_{}".format(i)
            else:
                key = keys[i]
            data.append([x, key, y])

    table = wandb.Table(data=data, columns=["step", "lineKey", "lineVal"])

    return wandb.plot_table(
        "wandb/lineseries/v0",
        table,
        {"step": "step", "lineKey": "lineKey", "lineVal": "lineVal"},
        {"title": title, "xname": xname or "x"},
    )
Example #7
0
def scatter(table, x, y, title=None):
    """
    Construct a scatter plot.

    Arguments:
    table (wandb.Table): Table of data.
    x (string): Name of column to as for x-axis values.
    y (string): Name of column to as for y-axis values.
    title (string): Plot title.

    Returns:
    A plot object, to be passed to wandb.log()

    Example:
    data = [[i, random.random() + math.sin(i / 10)] for i in range(100)]
    table = wandb.Table(data=data, columns=["step", "height"])
    wandb.log({'scatter-plot1': wandb.plot.scatter(table, "step", "height")})
    """
    return wandb.plot_table("wandb/scatter/v0", table, {
        "x": x,
        "y": y
    }, {"title": title})
Example #8
0
def line(table, x, y, stroke=None, title=None):
    """
    Construct a line plot.

    Arguments:
    table (wandb.Table): Table of data.
    x (string): Name of column to as for x-axis values.
    y (string): Name of column to as for y-axis values.
    stroke (string): Name of column to map to the line stroke scale.
    title (string): Plot title.

    Returns:
    A plot object, to be passed to wandb.log()

    Example:
    data = [[i, random.random() + math.sin(i / 10)] for i in range(100)]
    table = wandb.Table(data=data, columns=["step", "height"])
    wandb.log({'line-plot1': wandb.plot.line(table, "step", "height")})
    """
    return wandb.plot_table("wandb/line/v0", table, {
        "x": x,
        "y": y,
        "stroke": stroke
    }, {"title": title})
Example #9
0
def roc_curve(y_true=None, y_probas=None, labels=None, classes_to_plot=None):
    """
    Calculates receiver operating characteristic scores and visualizes them as the
        ROC curve.

    Arguments:
        y_true (arr): Test set labels.
        y_probas (arr): Test set predicted probabilities.
        labels (list): Named labels for target varible (y). Makes plots easier to
                        read by replacing target values with corresponding index.
                        For example labels= ['dog', 'cat', 'owl'] all 0s are
                        replaced by 'dog', 1s by 'cat'.

    Returns:
        Nothing. To see plots, go to your W&B run page then expand the 'media' tab
            under 'auto visualizations'.

    Example:
        wandb.log({'roc-curve': wandb.plot.roc_curve(y_true, y_probas, labels)})
    """
    np = util.get_module(
        "numpy",
        required=
        "roc requires the numpy library, install with `pip install numpy`")
    util.get_module(
        "sklearn",
        required=
        "roc requires the scikit library, install with `pip install scikit-learn`"
    )
    from sklearn.metrics import roc_curve

    if (test_missing(y_true=y_true, y_probas=y_probas)
            and test_types(y_true=y_true, y_probas=y_probas)):
        y_true = np.array(y_true)
        y_probas = np.array(y_probas)
        classes = np.unique(y_true)
        probas = y_probas

        if classes_to_plot is None:
            classes_to_plot = classes

        fpr_dict = dict()
        tpr_dict = dict()

        indices_to_plot = np.in1d(classes, classes_to_plot)

        data = []
        count = 0

        for i, to_plot in enumerate(indices_to_plot):
            fpr_dict[i], tpr_dict[i], _ = roc_curve(y_true,
                                                    probas[:, i],
                                                    pos_label=classes[i])
            if to_plot:
                for j in range(len(fpr_dict[i])):
                    if labels is not None and (isinstance(classes[i], int)
                                               or isinstance(
                                                   classes[0], np.integer)):
                        class_dict = labels[classes[i]]
                    else:
                        class_dict = classes[i]
                    fpr = [
                        class_dict,
                        round(fpr_dict[i][j], 3),
                        round(tpr_dict[i][j], 3)
                    ]
                    data.append(fpr)
                    count += 1
                    if count >= chart_limit:
                        wandb.termwarn(
                            "wandb uses only the first %d datapoints to create the plots."
                            % wandb.Table.MAX_ROWS)
                        break
        table = wandb.Table(columns=['class', 'fpr', 'tpr'], data=data)
        return wandb.plot_table('wandb/area-under-curve/v0', table, {
            'x': 'fpr',
            'y': 'tpr',
            'class': 'class'
        }, {
            'title': 'ROC',
            'x-axis-title': 'False positive rate',
            'y-axis-title': 'True positive rate'
        })
Example #10
0
def pr_curve(y_true=None, y_probas=None, labels=None, classes_to_plot=None):
    """
    Computes the tradeoff between precision and recall for different thresholds.
        A high area under the curve represents both high recall and high precision,
        where high precision relates to a low false positive rate, and high recall
        relates to a low false negative rate. High scores for both show that the
        classifier is returning accurate results (high precision), as well as
        returning a majority of all positive results (high recall).
        PR curve is useful when the classes are very imbalanced.

    Arguments:
    y_true (arr): Test set labels.
    y_probas (arr): Test set predicted probabilities.
    labels (list): Named labels for target varible (y). Makes plots easier to
      read by replacing target values with corresponding index.
      For example labels= ['dog', 'cat', 'owl'] all 0s are
      replaced by 'dog', 1s by 'cat'.

    Returns:
    Nothing. To see plots, go to your W&B run page then expand the 'media' tab
    under 'auto visualizations'.

    Example:
    wandb.log({'pr-curve': wandb.plot.pr_curve(y_true, y_probas, labels)})
    """
    np = util.get_module(
        "numpy",
        required="roc requires the numpy library, install with `pip install numpy`",
    )
    scikit = util.get_module(
        "sklearn",
        "roc requires the scikit library, install with `pip install scikit-learn`",
    )

    y_true = np.array(y_true)
    y_probas = np.array(y_probas)

    if test_missing(y_true=y_true, y_probas=y_probas) and test_types(
        y_true=y_true, y_probas=y_probas
    ):
        classes = np.unique(y_true)
        probas = y_probas

        if classes_to_plot is None:
            classes_to_plot = classes

        binarized_y_true = scikit.preprocessing.label_binarize(y_true, classes=classes)
        if len(classes) == 2:
            binarized_y_true = np.hstack((1 - binarized_y_true, binarized_y_true))

        pr_curves = {}
        indices_to_plot = np.in1d(classes, classes_to_plot)
        for i, to_plot in enumerate(indices_to_plot):
            if to_plot:
                precision, recall, _ = scikit.metrics.precision_recall_curve(
                    y_true, probas[:, i], pos_label=classes[i]
                )

                samples = 20
                sample_precision = []
                sample_recall = []
                for k in range(samples):
                    sample_precision.append(
                        precision[int(len(precision) * k / samples)]
                    )
                    sample_recall.append(recall[int(len(recall) * k / samples)])

                pr_curves[classes[i]] = (sample_precision, sample_recall)

        data = []
        count = 0
        for class_name in pr_curves.keys():
            precision, recall = pr_curves[class_name]
            for p, r in zip(precision, recall):
                # if class_names are ints and labels are set
                if labels is not None and (
                    isinstance(class_name, int) or isinstance(class_name, np.integer)
                ):
                    class_name = labels[class_name]
                # if class_names are ints and labels are not set
                # or, if class_names have something other than ints
                # (string, float, date) - user class_names
                data.append([class_name, round(p, 3), round(r, 3)])
                count += 1
                if count >= chart_limit:
                    wandb.termwarn(
                        "wandb uses only the first %d datapoints to create the plots."
                        % wandb.Table.MAX_ROWS
                    )
                    break
        table = wandb.Table(columns=["class", "precision", "recall"], data=data)
        return wandb.plot_table(
            "wandb/area-under-curve/v0",
            table,
            {"x": "recall", "y": "precision", "class": "class"},
            {"title": "Precision v. Recall"},
        )
Example #11
0
def confusion_matrix(probs=None,
                     y_true=None,
                     preds=None,
                     class_names=None,
                     title=None):
    """
    Computes a multi-run confusion matrix.

    Arguments:
        probs (2-d arr): Shape [n_examples, n_classes]
        y_true (arr): Array of label indices.
        preds (arr): Array of predicted label indices.
        class_names (arr): Array of class names.

    Returns:
        Nothing. To see plots, go to your W&B run page then expand the 'media' tab
        under 'auto visualizations'.

    Example:
        ```
        vals = np.random.uniform(size=(10, 5))
        probs = np.exp(vals)/np.sum(np.exp(vals), keepdims=True, axis=1)
        y_true = np.random.randint(0, 5, size=(10))
        labels = ["Cat", "Dog", "Bird", "Fish", "Horse"]
        wandb.log({'confusion_matrix': wandb.plot.confusion_matrix(probs, y_true=y_true, class_names=labels)})
        ```
    """

    np = util.get_module(
        "numpy",
        required=
        "confusion matrix requires the numpy library, install with `pip install numpy`",
    )
    # change warning
    assert probs is None or len(probs.shape) == 2, (
        "confusion_matrix has been updated to accept"
        " probabilities as the default first argument. Use preds=...")

    assert (probs is None or preds is None) and not (
        probs is None and preds is None
    ), "Must provide probabilties or predictions but not both to confusion matrix"

    if probs is not None:
        preds = np.argmax(probs, axis=1).tolist()

    assert len(preds) == len(
        y_true), "Number of predictions and label indices must match"

    if class_names is not None:
        n_classes = len(class_names)
        class_inds = [i for i in range(n_classes)]
        assert max(preds) <= len(
            class_names), "Higher predicted index than number of classes"
        assert max(y_true) <= len(
            class_names), "Higher label class index than number of classes"
    else:
        class_inds = set(preds).union(set(y_true))
        n_classes = len(class_inds)
        class_names = ["Class_{}".format(i) for i in range(1, n_classes + 1)]

    # get mapping of inds to class index in case user has weird prediction indices
    class_mapping = {}
    for i, val in enumerate(sorted(list(class_inds))):
        class_mapping[val] = i
    counts = np.zeros((n_classes, n_classes))
    for i in range(len(preds)):
        counts[class_mapping[y_true[i]], class_mapping[preds[i]]] += 1

    data = []
    for i in range(n_classes):
        for j in range(n_classes):
            data.append([class_names[i], class_names[j], counts[i, j]])

    fields = {
        "Actual": "Actual",
        "Predicted": "Predicted",
        "nPredictions": "nPredictions",
    }
    title = title or ""
    return wandb.plot_table(
        "wandb/confusion_matrix/v1",
        wandb.Table(columns=["Actual", "Predicted", "nPredictions"],
                    data=data),
        fields,
        {"title": title},
    )
Example #12
0
def run():
    args = parser.parse_args()
    hyper_parameter_defaults = dict(opt='RMSProp',
                                    nonlin='relu',
                                    batch_size=12,
                                    learning_rate=0.0002,
                                    betas=(0.5, 0.999),
                                    alpha=0.9)
    if args.device is not None:
        args.device = torch.device(f'cuda:{args.device}')

    # wandb
    if args.name is None:
        run = wandb.init(project="gradientsandtranslation2",
                         config=hyper_parameter_defaults)
        wandb.config["more"] = "custom"
        # save run to get readable run name
        run.save()
        run.name = os.path.join('NMT', run.name)
        config = wandb.config
        config.save_dir = os.path.join('experiments', 'NMT', run.name)
        run.save()
    else:
        run = wandb.init(project="gradientsandtranslation",
                         config=hyper_parameter_defaults,
                         name=args.name)
        wandb.config["more"] = "custom"
        run.name = os.path.join('NMT', run.name)
        config = wandb.config
        config.save_dir = os.path.join('experiments', 'NMT', args.name)
        run.save()

    # update config object with args
    wandb.config.update(args, allow_val_change=True)

    # set up language
    try:
        spacy_en = spacy.load('en')
    except OSError as e:
        print(e)
        print('Downloading model...')
        os.system('python -m spacy download en')
        spacy_en = spacy.load('en')
    try:
        spacy_de = spacy.load('de')
    except OSError as e:
        print(e)
        print('Downloading model...')
        os.system('python -m spacy download de')
        spacy_de = spacy.load('de')

    def tokenize_de(text):
        """
        Tokenizes German text from a string into a list of strings (tokens) and reverses it
        """

        return [tok.text for tok in spacy_de.tokenizer(text)]  #[::-1]

    def tokenize_en(text):
        """
        Tokenizes English text from a string into a list of strings (tokens)
        """

        return [tok.text for tok in spacy_en.tokenizer(text)]

    if args.model == 'Trans':
        batch_first = True
    else:
        batch_first = False
    SRC = Field(tokenize_de,
                init_token='<sos>',
                eos_token='<eos>',
                lower=True,
                batch_first=batch_first)

    TRG = Field(tokenize_en,
                init_token='<sos>',
                eos_token='<eos>',
                lower=True,
                batch_first=batch_first)

    train_data, val_data, test_data = Multi30k.splits(exts=('.de', '.en'),
                                                      fields=(SRC, TRG))

    SRC.build_vocab(train_data, min_freq=2)
    TRG.build_vocab(train_data, min_freq=2)

    config.SRCPADIDX = SRC.vocab.stoi[SRC.pad_token]
    config.TRGPADIDX = TRG.vocab.stoi[TRG.pad_token]
    train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
        (train_data, val_data, test_data), batch_size=config.batch_size)
    config.inp_size = len(SRC.vocab)
    config.out_size = len(TRG.vocab)

    # create experiment management object
    experiment = NMTExperiment(config)
    model = experiment.model
    wandb.watch(model)
    criterion = nn.CrossEntropyLoss(ignore_index=config.TRGPADIDX)
    for i in range(config.nepochs):

        train_loss = train_nmt(experiment.model, train_iterator,
                               experiment.optimizer, criterion, config, run,
                               SRC, TRG)
        val_loss = eval_nmt(model, valid_iterator, criterion, config, run, SRC,
                            TRG)
        # visualize an example
        for example_idx in [8]:
            src = vars(train_data.examples[example_idx])['src']
            trg = vars(train_data.examples[example_idx])['trg']
            translation_inds, translation, attention = translate_sentence(
                src, SRC, TRG, spacy_de, model, config, max_len=50)
            src = [SRC.init_token] + src + [SRC.eos_token]
            attn = attention[0, :, :, :].mean(dim=0).cpu().numpy()
            attn_data = []
            for m in range(attn.shape[0]):
                for n in range(attn.shape[1]):
                    attn_data.append(
                        [n, m, src[n], translation[m], attn[m, n]])
            data_table = wandb.Table(
                data=attn_data,
                columns=["s_ind", "t_ind", "s_word", "t_word", "attn"])
            fields = {
                "sindex": "s_ind",
                "tindex": "t_ind",
                "sword": "s_word",
                "tword": "t_word",
                "attn": "attn"
            }
            wandb.log({
                "my_nlp_viz_id":
                wandb.plot_table("kylegoyette/nlp-attention-visualization",
                                 data_table, fields)
            })

        print(f'Epoch: {i} Train Loss: {train_loss} Val Loss {val_loss}')
Example #13
0
import math

# Start a new run
run = wandb.init(project='custom-charts',
                 notes='Custom stacked bar chart')
offset = random.random()

# Set up data to log in custom charts
data = []
for i in range(100):
  data_1.append([i, random.random() + math.log(1 + i) + offset + random.random()])

# Create a table with the columns to plot
table = wandb.Table(data=data_1, columns=["step", "height"])

# Map from the table's columns to the chart's fields
fields = {"x": "step",
          "value": "height"}

# Use the table to populate the new custom chart preset
my_custom_chart = wandb.plot_table(vega_spec_name="carey/stacked_bar_chart",
              data_table=table,
              fields=fields,
              )

# Log the plot to have it show up in the UI
wandb.log({"custom_chart": my_custom_chart})

# Finally, end the run. We only need this ine in Jupyter notebooks.
run.finish()