Exemple #1
0
    def test_create_mapping(self):

        Preprocessor.load_models()

        WordMap.word_set = set()
        WordMap.word_to_id = {}

        Document("TST_ENG_20190101.0001")
        Document("TST_ENG_20190101.0002")

        WordMap.create_mapping()
        mapping = WordMap.get_mapping()

        self.assertCountEqual(self.word_set, mapping.keys())  # each word in word_set got added to the dictionary
        self.assertEqual(len(mapping), len(set(mapping.items())))  # each id value in the dict is unique
Exemple #2
0
class VectorsTests(unittest.TestCase):

    Preprocessor.load_models()
    topics = {
        1:
        [Document('TST_ENG_20190101.0001'),
         Document('TST_ENG_20190101.0002')]
    }
    WordMap.create_mapping()
    mapping = WordMap.get_mapping()
    topic_one = topics.get(1)  # list of Documents

    def test_create_freq_vectors(self):
        Vectors().create_freq_vectors(self.topics)
        for doc_list in self.topics.values():
            for doc in doc_list:
                # check that there's a vector for each sentence

                doc_matrix_shape = doc.vectors.get_shape()
                expected_rows = 3
                self.assertEqual(doc_matrix_shape[0], expected_rows)

    def test_sentence_vector(self):
        s = self.topics.get(1)[1].sens[1]  # s1 is a Sentence object
        # s text: 'He loves playing so he liked to run around with the other dogs playing fetch.'
        id_of_playing = WordMap.id_of('playing')
        self.assertEqual(s.vector.getcol(id_of_playing).sum(), 1)
        for word in s.tokens:
            id_of_word = WordMap.id_of(word)
            self.assertGreater(s.vector.getcol(id_of_word).sum(), 0)

    def test_get_topic_matrix(self):
        # make sure all sentences from all topic docs make it into the matrix
        topic_one_matrix = Vectors().get_topic_matrix(self.topic_one)
        expected_num_sentences = 6
        self.assertEqual(expected_num_sentences,
                         topic_one_matrix.get_shape()[0])
Exemple #3
0
 def __init__(self):
     self.num_unique_words = len(WordMap.get_mapping())