Beispiel #1
0
    def mask_and_tensorize(self, batch):
        batch = list(batch)
        if not batch:
            return torch.Tensor()
        masked_sources = []
        masked_targets = []

        for tokens in batch:
            dec_source, dec_target = self.mask.gen_masked_source_target(
                tokens, vocab=self.vocab
            )

            masked_sources.append(dec_source)
            masked_targets.append(dec_target)

        return (
            cuda.tensor(
                pad(masked_sources, pad_token=self.vocab.get_pad_index()),
                dtype=torch.long,
            ),
            cuda.tensor(
                pad(masked_targets, pad_token=self.vocab.get_pad_index()),
                dtype=torch.long,
            ),
        )
Beispiel #2
0
 def get_weights_context(self, tensor_dict):
     batch_size = tensor_dict["doc_labels"].size()[0]
     return {
         "doc_weight":
         tensor_dict.get("doc_weight")
         or cuda_util.tensor([self.default_doc_loss_weight] * batch_size,
                             dtype=torch.float),
         "word_weight":
         tensor_dict.get("word_weight")
         or cuda_util.tensor([self.default_word_loss_weight] * batch_size,
                             dtype=torch.float),
     }
Beispiel #3
0
def pad_and_tensorize(batch, pad_token=0, pad_shape=None, dtype=torch.long):
    batch = list(batch)
    if not batch:
        return torch.Tensor()

    return cuda.tensor(pad(batch, pad_token=pad_token, pad_shape=pad_shape),
                       dtype=dtype)
Beispiel #4
0
 def tensorize(self, batch):
     # Pad a minibatch of dictionary features to be
     # batch_size * max_number_of_words * max_number_of_features
     # unpack the minibatch
     feats, weights, lengths = zip(*batch)
     lengths_flattened = [l for l_list in lengths for l in l_list]
     seq_lens = [len(l_list) for l_list in lengths]
     max_ex_len = max(seq_lens)
     max_feat_len = max(lengths_flattened)
     all_lengths, all_feats, all_weights = [], [], []
     for i, seq_len in enumerate(seq_lens):
         ex_feats, ex_weights, ex_lengths = [], [], []
         feats_lengths, feats_vals, feats_weights = lengths[i], feats[
             i], weights[i]
         max_feat_len_example = max(feats_lengths)
         r_offset = 0
         for _ in feats_lengths:
             # The dict feats obtained from the featurizer will have necessary
             # padding at the utterance level. Therefore we move the offset by
             # max feature length in the example.
             ex_feats.extend(feats_vals[r_offset:r_offset +
                                        max_feat_len_example])
             ex_feats.extend([self.vocab.get_pad_index()] *
                             (max_feat_len - max_feat_len_example))
             ex_weights.extend(feats_weights[r_offset:r_offset +
                                             max_feat_len_example])
             ex_weights.extend([0.0] *
                               (max_feat_len - max_feat_len_example))
             r_offset += max_feat_len_example
         ex_lengths.extend(feats_lengths)
         # Pad examples
         ex_padding = (max_ex_len - seq_len) * max_feat_len
         ex_feats.extend([self.vocab.get_pad_index()] * ex_padding)
         ex_weights.extend([0.0] * ex_padding)
         ex_lengths.extend([1] * (max_ex_len - seq_len))
         all_feats.append(ex_feats)
         all_weights.append(ex_weights)
         all_lengths.append(ex_lengths)
     return (
         cuda.tensor(all_feats, torch.long),
         cuda.tensor(all_weights, torch.float),
         cuda.tensor(all_lengths, torch.long),
     )
Beispiel #5
0
def get_label_weights(vocab_dict: Dict[str, int], label_weights: Dict[str,
                                                                      float]):
    # prune the label_weights to remove the labels that do not exist in the dataset
    pruned_label_weights = {
        vocab_dict[k]: v
        for (k, v) in label_weights.items() if k in vocab_dict
    }
    if len(pruned_label_weights) != len(label_weights):
        filtered_labels = [k for k in label_weights if k not in vocab_dict]
        print(
            f"Warning: these labels are filtered from original label weights {filtered_labels}"
        )
    if len(pruned_label_weights) == 0:
        return None

    # All unspecified classes will get a weight of 1
    weights_tensor = [1] * len(vocab_dict)
    for k, v in pruned_label_weights.items():
        weights_tensor[k] = v
    return tensor(weights_tensor, dtype=torch.float)
    def report_realtime_metric(self, stage):
        if stage != Stage.TRAIN:
            return

        if cuda.DISTRIBUTED_WORLD_SIZE > 1:
            all_reduce_stats = cuda.tensor(
                [
                    self.last_batch_tps,
                    self.last_batch_loss,
                    self.aggregate_loss,
                    self.total_masked_tokens,
                    self.realtime_meters["tps"].n,
                ],
                dtype=torch.float32,
            )
            total_elapsed_time = self.realtime_meters["tps"].elapsed_time

            torch.distributed.all_reduce(all_reduce_stats)
            # average last_batch_loss by distributed_world_size
            all_reduce_stats[1:2].div_(cuda.DISTRIBUTED_WORLD_SIZE)
            [
                last_batch_tps,
                last_batch_loss,
                aggregate_loss,
                total_masked_tokens,
                total_tokens,
            ] = all_reduce_stats.tolist()
            tps = total_tokens / total_elapsed_time
        else:
            last_batch_tps = self.last_batch_tps
            last_batch_loss = self.last_batch_loss
            aggregate_loss = self.aggregate_loss
            total_masked_tokens = self.total_masked_tokens
            tps = self.realtime_meters["tps"].avg

        print(
            f"Tokens/s: {last_batch_tps:.0f}, "
            f"batch ppl: {math.exp(last_batch_loss):.2f}, "
            f"agg ppl: {math.exp(self._calculate_loss(aggregate_loss, total_masked_tokens)):.2f}, "
            f"number of batches: {self.total_batches:.0f}, "
            f"accumulated tokens/s: {tps:.0f}",
            flush=True,
        )
        # TODO: remove GPU0 report
        print(
            f"GPU-0 tokens/s: {self.last_batch_tps:.0f}, "
            f"batch ppl: {math.exp(self.last_batch_loss):.2f}, "
            f"agg ppl: {math.exp(self.calculate_loss()):.2f}, "
            f"number of batches: {self.total_batches}, "
            f"accumulated tokens/s: {self.realtime_meters['tps'].avg:.0f}",
            flush=True,
        )

        if self.pep_format:
            # used for pep regression benchmark
            print(
                "PyTorchObserver " + json.dumps({
                    "type": "MLM",
                    "metric": "tps",
                    "unit": "token/sec",
                    "value": f"{tps:.0f}",
                }),
                flush=True,
            )
Beispiel #7
0
 def tensorize(self, batch):
     return cuda.tensor(batch, torch.float)