Example #1
0
    def test_init_no_id(self):
        corpus = Corpus(words=["keiffster", "ABCDEF123", "PASSWORD123"])

        self.assertIsNotNone(corpus)
        self.assertIsNone(corpus.id)
        self.assertEqual(["keiffster", "ABCDEF123", "PASSWORD123"],
                         corpus.words)

        self.assertEqual({'words': ['keiffster', 'ABCDEF123', 'PASSWORD123']},
                         corpus.to_document())
Example #2
0
 def test_from_document_no_id(self):
     corpus1 = Corpus.from_document(
         {'words': ['keiffster', 'ABCDEF123', 'PASSWORD123']})
     self.assertIsNotNone(corpus1)
     self.assertIsNone(corpus1.id)
     self.assertEqual(['keiffster', 'ABCDEF123', 'PASSWORD123'],
                      corpus1.words)
Example #3
0
    def upload_from_file(self,
                         filename,
                         format=Store.TEXT_FORMAT,
                         commit=True,
                         verbose=False):

        YLogger.debug(self, "Uplading spelling corpus file [%s] to Mongo",
                      filename)

        count = 0
        try:
            corpus_words = []
            with open(filename, "r") as text_file:
                for lines in text_file:
                    words = lines.split(' ')
                    for word in words:
                        corpus_words.append(word)

            corpus = Corpus(words=corpus_words)
            self.add_document(corpus)

            if verbose is True:
                print(corpus_words)

            if commit is True:
                self.commit()

            count = len(corpus_words)

        except Exception as excep:
            YLogger.exception(self, "Failed to load spelling corpus from [%s]",
                              excep, filename)

        # Assume all words loaded are success, no need for additional count
        return count, count
Example #4
0
    def test_from_document(self):
        corpus1 = Corpus.from_document(
            {'words': ['keiffster', 'ABCDEF123', 'PASSWORD123']})
        self.assertIsNotNone(corpus1)
        self.assertIsNone(corpus1.id)
        self.assertEqual(['keiffster', 'ABCDEF123', 'PASSWORD123'],
                         corpus1.words)

        corpus2 = Corpus.from_document({
            '_id':
            '666',
            'words': ['keiffster', 'ABCDEF123', 'PASSWORD123']
        })
        self.assertIsNotNone(corpus2)
        self.assertEqual("666", corpus2.id)
        self.assertEqual(['keiffster', 'ABCDEF123', 'PASSWORD123'],
                         corpus2.words)
Example #5
0
    def _read_corpus_from_file(self, filename, verbose):
        corpus_words = []
        with open(filename, "r") as text_file:
            for lines in text_file:
                words = lines.split(' ')
                for word in words:
                    corpus_words.append(word)

        corpus = Corpus(words=corpus_words)
        if self.add_document(corpus) is False:
            return 0, 0

        count = len(corpus_words)
        return count, count
Example #6
0
    def test_repr(self):
        corpus1 = Corpus.from_document(
            {'words': ['keiffster', 'ABCDEF123', 'PASSWORD123']})

        self.assertEquals("<Corpus(id='n/a')>", str(corpus1))
Example #7
0
 def test_from_document_no_words(self):
     corpus2 = Corpus.from_document({'_id': '666'})
     self.assertIsNotNone(corpus2)
     self.assertEqual("666", corpus2.id)
     self.assertEqual([], corpus2.words)