Beispiel #1
0
    def test_tok2idx_unique_tokens(self):
        "tok2idx with unique tokens"
        tokens = [
            ["This", "is", "a", "sentence", "."],
            ["Over", "there", "!"],
        ]
        tok2idx = utils.tok2idx(tokens)

        # Ensure all tokens are in returned dict
        for sentence in tokens:
            for token in sentence:
                self.assertIn(token, tok2idx)

        # Ensure indices are contiguous
        idx = set(tok2idx.values())
        for i in range(len(tokens[0]) + len(tokens[1])):
            self.assertIn(i, idx)
Beispiel #2
0
    def test_idx2tok(self):
        "idx2tok"
        tokens = [
            ["This", "is", "a", "sentence", "."],
            ["Over", "there", "!"],
        ]
        tok2idx = utils.tok2idx(tokens)
        idx2tok = utils.idx2tok(tok2idx)

        # Ensure indices are contiguous
        idx = set(idx2tok.keys())
        for i in range(len(tokens[0]) + len(tokens[1])):
            self.assertIn(i, idx)

        all_tokens = set(idx2tok.values())
        for sentence in tokens:
            for token in sentence:
                self.assertIn(token, all_tokens)