class TestCopyGeneratorLoss(unittest.TestCase):
    INIT_CASES = list(product_dict(
        vocab_size=[172],
        unk_index=[0, 39],
        ignore_index=[1, 17],  # pad idx
        force_copy=[True, False]
    ))
    PARAMS = list(product_dict(
        batch_size=[1, 14],
        tgt_max_len=[50],
        n_extra_words=[107]
    ))

    @classmethod
    def dummy_inputs(cls, params, init_case):
        n_unique_src_words = 13
        scores = torch.randn((params["batch_size"] * params["tgt_max_len"],
                              init_case["vocab_size"] + n_unique_src_words))
        scores = softmax(scores, dim=1)
        align = torch.randint(0, n_unique_src_words,
                              (params["batch_size"] * params["tgt_max_len"],))
        target = torch.randint(0, init_case["vocab_size"],
                               (params["batch_size"] * params["tgt_max_len"],))
        target[0] = init_case["unk_index"]
        target[1] = init_case["ignore_index"]
        return scores, align, target

    @classmethod
    def expected_shape(cls, params, init_case):
        return (params["batch_size"] * params["tgt_max_len"],)

    def test_copy_loss_forward_shape(self):
        for params, init_case in itertools.product(
                self.PARAMS, self.INIT_CASES):
            loss = CopyGeneratorLoss(**init_case)
            dummy_in = self.dummy_inputs(params, init_case)
            res = loss(*dummy_in)
            expected_shape = self.expected_shape(params, init_case)
            self.assertEqual(res.shape, expected_shape, init_case.__str__())

    def test_copy_loss_ignore_index_is_ignored(self):
        for params, init_case in itertools.product(
                self.PARAMS, self.INIT_CASES):
            loss = CopyGeneratorLoss(**init_case)
            scores, align, target = self.dummy_inputs(params, init_case)
            res = loss(scores, align, target)
            should_be_ignored = (target == init_case["ignore_index"]).nonzero()
            assert len(should_be_ignored) > 0  # otherwise not testing anything
            self.assertTrue(res[should_be_ignored].allclose(torch.tensor(0.0)))

    def test_copy_loss_output_range_is_positive(self):
        for params, init_case in itertools.product(
                self.PARAMS, self.INIT_CASES):
            loss = CopyGeneratorLoss(**init_case)
            dummy_in = self.dummy_inputs(params, init_case)
            res = loss(*dummy_in)
            self.assertTrue((res >= 0).all())
class TestTextMultiField(unittest.TestCase):
    INIT_CASES = list(
        product_dict(base_name=["base_field", "zbase_field"],
                     base_field=[Field],
                     feats_fields=[[], [("a", Field)],
                                   [("r", Field), ("b", Field)]]))

    PARAMS = list(product_dict(include_lengths=[False, True]))

    @classmethod
    def initialize_case(cls, init_case, params):
        # initialize fields at the top of each unit test to prevent
        # any undesired stateful effects
        case = deepcopy(init_case)
        case["base_field"] = case["base_field"](
            include_lengths=params["include_lengths"])
        for i, (n, f_cls) in enumerate(case["feats_fields"]):
            case["feats_fields"][i] = (n, f_cls(sequential=True))
Esempio n. 3
0
class TestEmbeddings(unittest.TestCase):
    INIT_CASES = list(product_dict(
        word_vec_size=[172],
        word_vocab_size=[319],
        word_padding_idx=[17],
        position_encoding=[False, True],
        feat_merge=["first", "concat", "sum", "mlp"],
        feat_vec_exponent=[-1, 1.1, 0.7],
        feat_vec_size=[0, 200],
        feat_padding_idx=[[], [29], [0, 1]],
        feat_vocab_sizes=[[], [39], [401, 39]],
        dropout=[0, 0.5],
        fix_word_vecs=[False, True]
    ))
    PARAMS = list(product_dict(
        batch_size=[1, 14],
        max_seq_len=[23]
    ))

    @classmethod
    def case_is_degenerate(cls, case):
        no_feats = len(case["feat_vocab_sizes"]) == 0
        if case["feat_merge"] != "first" and no_feats:
            return True
        if case["feat_merge"] == "first" and not no_feats:
            return True
        if case["feat_merge"] == "concat" and case["feat_vec_exponent"] != -1:
            return True
        if no_feats and case["feat_vec_exponent"] != -1:
            return True
        if len(case["feat_vocab_sizes"]) != len(case["feat_padding_idx"]):
            return True
        if case["feat_vec_size"] == 0 and case["feat_vec_exponent"] <= 0:
            return True
        if case["feat_merge"] == "sum":
            if case["feat_vec_exponent"] != -1:
                return True
            if case["feat_vec_size"] != 0:
                return True
        if case["feat_vec_size"] != 0 and case["feat_vec_exponent"] != -1:
            return True
        return False
class TestCopyGenerator(unittest.TestCase):
    INIT_CASES = list(
        product_dict(
            input_size=[172],
            output_size=[319],
            pad_idx=[0, 39],
        ))
    PARAMS = list(
        product_dict(batch_size=[1, 14],
                     max_seq_len=[23],
                     tgt_max_len=[50],
                     n_extra_words=[107]))

    @classmethod
    def dummy_inputs(cls, params, init_case):
        hidden = torch.randn((params["batch_size"] * params["tgt_max_len"],
                              init_case["input_size"]))
        attn = torch.randn((params["batch_size"] * params["tgt_max_len"],
                            params["max_seq_len"]))
        src_map = torch.randn((params["max_seq_len"], params["batch_size"],
                               params["n_extra_words"]))
        return hidden, attn, src_map

    @classmethod
    def expected_shape(cls, params, init_case):
        return params["tgt_max_len"] * params["batch_size"], \
               init_case["output_size"] + params["n_extra_words"]

    def test_copy_gen_forward_shape(self):
        for params, init_case in itertools.product(self.PARAMS,
                                                   self.INIT_CASES):
            cgen = CopyGenerator(**init_case)
            dummy_in = self.dummy_inputs(params, init_case)
            res = cgen(*dummy_in)
            expected_shape = self.expected_shape(params, init_case)
            self.assertEqual(res.shape, expected_shape, init_case.__str__())

    def test_copy_gen_outp_has_no_prob_of_pad(self):
        for params, init_case in itertools.product(self.PARAMS,
                                                   self.INIT_CASES):
            cgen = CopyGenerator(**init_case)
            dummy_in = self.dummy_inputs(params, init_case)
            res = cgen(*dummy_in)
            self.assertTrue(res[:, init_case["pad_idx"]].allclose(
                torch.tensor(0.0)))

    def test_copy_gen_trainable_params_update(self):
        for params, init_case in itertools.product(self.PARAMS,
                                                   self.INIT_CASES):
            cgen = CopyGenerator(**init_case)
            trainable_params = {
                n: p
                for n, p in cgen.named_parameters() if p.requires_grad
            }
            assert len(trainable_params) > 0  # sanity check
            old_weights = deepcopy(trainable_params)
            dummy_in = self.dummy_inputs(params, init_case)
            res = cgen(*dummy_in)
            pretend_loss = res.sum()
            pretend_loss.backward()
            dummy_optim = torch.optim.SGD(trainable_params.values(), 1)
            dummy_optim.step()
            for param_name in old_weights.keys():
                self.assertTrue(
                    trainable_params[param_name].ne(
                        old_weights[param_name]).any(),
                    param_name + " " + init_case.__str__())
Esempio n. 5
0
class TestAudioField(unittest.TestCase):
    INIT_CASES = list(
        product_dict(pad_index=[0, 32],
                     batch_first=[False, True],
                     include_lengths=[True, False]))

    PARAMS = list(
        product_dict(batch_size=[1, 17],
                     max_len=[23],
                     full_length_seq=[0, 5, 16],
                     nfeats=[1, 5]))

    @classmethod
    def degenerate_case(cls, init_case, params):
        if params["batch_size"] < params["full_length_seq"]:
            return True
        return False

    @classmethod
    def pad_inputs(cls, params):
        lengths = torch.randint(1, params["max_len"],
                                (params["batch_size"], )).tolist()
        lengths[params["full_length_seq"]] = params["max_len"]
        fake_input = [
            torch.randn((params["nfeats"], lengths[b]))
            for b in range(params["batch_size"])
        ]
        return fake_input, lengths

    @classmethod
    def numericalize_inputs(cls, init_case, params):
        bs = params["batch_size"]
        max_len = params["max_len"]
        lengths = torch.randint(1, max_len, (bs, ))
        lengths[params["full_length_seq"]] = max_len
        nfeats = params["nfeats"]
        fake_input = torch.full((bs, 1, nfeats, max_len),
                                init_case["pad_index"])
        for b in range(bs):
            fake_input[b, :, :, :lengths[b]] = torch.randn(
                (1, nfeats, lengths[b]))
        if init_case["include_lengths"]:
            fake_input = (fake_input, lengths)
        return fake_input, lengths

    def test_pad_shape_and_lengths(self):
        for init_case, params in itertools.product(self.INIT_CASES,
                                                   self.PARAMS):
            if not self.degenerate_case(init_case, params):
                field = AudioSeqField(**init_case)
                fake_input, lengths = self.pad_inputs(params)
                outp = field.pad(fake_input)
                if init_case["include_lengths"]:
                    outp, _ = outp
                expected_shape = (params["batch_size"], 1, params["nfeats"],
                                  params["max_len"])
                self.assertEqual(outp.shape, expected_shape)

    def test_pad_returns_correct_lengths(self):
        for init_case, params in itertools.product(self.INIT_CASES,
                                                   self.PARAMS):
            if not self.degenerate_case(init_case, params) and \
                    init_case["include_lengths"]:
                field = AudioSeqField(**init_case)
                fake_input, lengths = self.pad_inputs(params)
                _, outp_lengths = field.pad(fake_input)
                self.assertEqual(outp_lengths, lengths)

    def test_pad_pads_right_places_and_uses_correct_index(self):
        for init_case, params in itertools.product(self.INIT_CASES,
                                                   self.PARAMS):
            if not self.degenerate_case(init_case, params):
                field = AudioSeqField(**init_case)
                fake_input, lengths = self.pad_inputs(params)
                outp = field.pad(fake_input)
                if init_case["include_lengths"]:
                    outp, _ = outp
                for b in range(params["batch_size"]):
                    for s in range(lengths[b], params["max_len"]):
                        self.assertTrue(outp[b, :, :, s].allclose(
                            torch.tensor(float(init_case["pad_index"]))))

    def test_numericalize_shape(self):
        for init_case, params in itertools.product(self.INIT_CASES,
                                                   self.PARAMS):
            if not self.degenerate_case(init_case, params):
                field = AudioSeqField(**init_case)
                fake_input, lengths = self.numericalize_inputs(
                    init_case, params)
                outp = field.numericalize(fake_input)
                if init_case["include_lengths"]:
                    outp, _ = outp
                if init_case["batch_first"]:
                    expected_shape = (params["batch_size"], 1,
                                      params["nfeats"], params["max_len"])
                else:
                    expected_shape = (params["max_len"], params["batch_size"],
                                      1, params["nfeats"])
                self.assertEqual(expected_shape, outp.shape,
                                 init_case.__str__())

    def test_process_shape(self):
        # tests pad and numericalize integration
        for init_case, params in itertools.product(self.INIT_CASES,
                                                   self.PARAMS):
            if not self.degenerate_case(init_case, params):
                field = AudioSeqField(**init_case)
                fake_input, lengths = self.pad_inputs(params)
                outp = field.process(fake_input)
                if init_case["include_lengths"]:
                    outp, _ = outp
                if init_case["batch_first"]:
                    expected_shape = (params["batch_size"], 1,
                                      params["nfeats"], params["max_len"])
                else:
                    expected_shape = (params["max_len"], params["batch_size"],
                                      1, params["nfeats"])
                self.assertEqual(expected_shape, outp.shape,
                                 init_case.__str__())

    def test_process_lengths(self):
        # tests pad and numericalize integration
        for init_case, params in itertools.product(self.INIT_CASES,
                                                   self.PARAMS):
            if not self.degenerate_case(init_case, params):
                if init_case["include_lengths"]:
                    field = AudioSeqField(**init_case)
                    fake_input, lengths = self.pad_inputs(params)
                    lengths = torch.tensor(lengths, dtype=torch.int)
                    _, outp_lengths = field.process(fake_input)
                    self.assertTrue(outp_lengths.eq(lengths).all())