Esempio n. 1
0
    def add(self, preds, targets, info, precomputed_metrics={}):
        """
        Add a batches preds and targets. If you've already computed metrics for the batch
        (e.g. loss) you can pass in that value via precomputed metrics and it will update
        the average value in metrics. Note: must always provide the same precomputed
        metrics.
        args:
            preds    (list(tensor))    [(batch_size, ..., k)]
            targets     (list(tensor))    [(batch_size, ...,  1)]
            precomputed_metrics (dict) A dictionary of metric  values that have already
                been computed for the batch
        """
        # convert to list if only one element is passed in
        if type(preds) != dict:
            preds = {"primary": preds}
        if type(targets) != dict:
            targets = {"primary": targets}
        if preds.keys() != targets.keys():
            raise ValueError("Predictions and targets over different tasks.")

        tasks = list(preds.keys())
        batch_size = get_batch_size(list(targets.values())[0])
        for task in tasks:
            task_targets = targets[task]
            task_preds = preds[task]
            if (get_batch_size(task_targets) != get_batch_size(task_preds)):
                raise ValueError("preds must match targets in first dim.")

            self.preds[task].append(place_on_cpu(task_preds))
            self.targets[task].append(place_on_cpu(task_targets))

        self.info.extend(info)

        # include precomputed keys in global metrics
        if self.precomputed_keys is None:
            self.precomputed_keys = set(precomputed_metrics.keys())
        elif self.precomputed_keys != set(precomputed_metrics.keys()):
            raise ValueError("must always supply same precomputed metrics.")

        for key, value in precomputed_metrics.items():
            self.global_metrics[key] = (
                (self.total_size * self.global_metrics[key] +
                 batch_size * value) / (batch_size + self.total_size))
        self.total_size += batch_size
    def generate_gradients(self,
                           inputs,
                           targets,
                           target_task=None,
                           token_idx=None,
                           device=0):
        """
        Generates gradients through the scan for the first element in the batch
        specified by inputs and targets. If batch size is greater than 1, only gradients
        for the first example will be computed. Supports multi-task output via the
        target_task argument. Supports tasks with multiple outputs (as in masked language
        modeling or natural language generation).

        @inputs (dict or torch.Tensor) the 3D input scan. if dict, should contain key "scan".
        @targets (dict, torch.Tensor) target tensor or dict
        @target_task    (None or str) required if targets is dict
        @token_idx      (None or int) optional: specific
        @grad     (torch.Tensor, torch.Tensor) the gradient through the scan on the cpu
        @scan   (torch.Tensor) the scan itself through the scan on the cpu
        """
        inputs = place_on_gpu(inputs, device=device)
        self.model = self.model.to(device=device)

        # require gradient on scan so we can backpropagate through the pixels
        scan = inputs["scan"] if isinstance(inputs, dict) else inputs
        scan.requires_grad = True

        # forward pass
        output = self.model(inputs, targets)
        self.model.zero_grad()

        # backward pass
        targets = targets[target_task] if isinstance(targets,
                                                     dict) else targets
        target_class = targets[
            0, token_idx] if token_idx is not None else targets[0]

        # if not an index but a softmax (probabilistic case)
        if len(target_class.shape) > 0:
            target_class = target_class.argmax()

        output = output[target_task] if target_task is not None else output
        if type(output) == dict:
            output = output['out']
        output = output[0, token_idx] if token_idx is not None else output[0]

        output[target_class].backward()

        scan.requires_grad = False
        grad, scan = place_on_cpu([scan.grad, scan])

        # empty relu outputs so we don't leak CPU memory
        self.forward_relu_outputs = []

        return grad, scan
Esempio n. 3
0
    def from_output_tensor(self, batch_output):
        """ Places batch output on cpu and converts it to tokens ignoring -1's and padding.
        args:
            batch_output    (tensor)   (batch_size, max_len)
        """
        place_on_cpu(batch_output)
        sents = []
        for output in batch_output:
            sent = []
            for idx in output:
                idx = idx.item()
                if idx == -1:
                    continue

                token = self.idx_to_token[idx]

                if token == "[PAD]":
                    continue

                sent.append(token)
            sents.append(sent)
        return sents
def generate_integrated_gradient(model,
                                 inputs,
                                 targets,
                                 target_task="primary",
                                 steps=50,
                                 baseline=None):
    """
    """
    scan = inputs["scan"] if isinstance(inputs, dict) else inputs

    if baseline is None:
        baseline = torch.zeros_like(scan)
    expanded_scan = torch.cat([
        baseline + (float(i) / steps) * (scan - baseline)
        for i in range(0, steps + 1)
    ],
                              dim=0)

    # require gradient on scan so we can backpropagate through the pixels
    expanded_scan.requires_grad = True

    # forward pass
    output = model(expanded_scan, targets)
    model.zero_grad()

    # backward pass
    targets = targets[target_task] if isinstance(targets, dict) else targets
    target_class = targets[0]

    # if not an index but a softmax (probabilistic case)
    if len(target_class.shape) > 0:
        target_class = target_class.argmax()
    output = output[target_task] if isinstance(output, dict) else output
    if type(output) == dict:
        output = output['out']

    # get first element in batch
    output[:, target_class].backward()
    grad, scan = place_on_cpu([expanded_scan.grad, scan])
    avg_grad = torch.mean(grad[:-1], axis=0)
    integrated_grad = (scan - baseline) * avg_grad

    integrated_grad = torch.tensor(integrated_grad)

    scan.requires_grad = False

    return integrated_grad, scan
def generate_gradient(model, inputs, targets, target_task="primary"):
    """
    Generates
    """
    # require gradient on scan so we can backpropagate through the pixels
    scan = inputs["scan"] if isinstance(inputs, dict) else inputs
    scan.requires_grad = True

    # forward pass
    output = model(inputs, targets)
    model.zero_grad()

    # backward pass
    targets = targets[target_task] if isinstance(targets, dict) else targets
    target_class = targets[0]

    # if not an index but a softmax (probabilistic case)
    if len(target_class.shape) > 0:
        target_class = target_class.argmax()
    print(output)
    output = output[target_task] if isinstance(output, dict) else output
    print(output)
    if type(output) == dict:
        output = output['out']

    # get first element in batch
    output = output[0]

    output[target_class].backward()

    grad, scan = place_on_cpu([scan.grad, scan])
    grad = torch.tensor(grad)

    scan.requires_grad = False

    return grad, scan