Esempio n. 1
0
    def test_safe_to_use(self):
        book = Book(title='t',
                    author='Author',
                    year_published=2020,
                    num_ratings=100,
                    content_path='/tmp/mauve/isbn___Author___title.txt')
        self.assertTrue(book.safe_to_use)

        book = Book(title='t',
                    author='Author',
                    year_published=2020,
                    num_ratings=0,
                    content_path='/tmp/mauve/isbn___Author___title.txt')
        self.assertFalse(book.safe_to_use)

        book = Book(title='t',
                    author='Author',
                    year_published=1700,
                    num_ratings=100,
                    content_path='/tmp/mauve/isbn___Author___title.txt')
        self.assertFalse(book.safe_to_use)

        book = Book(title='t',
                    author='Arthor',
                    year_published=2020,
                    num_ratings=100,
                    content_path='/tmp/mauve/isbn___Author___title.txt')
        self.assertFalse(book.safe_to_use)
Esempio n. 2
0
    def test_to_array(self):
        base = AlwaysIncludeBaseTaggedDocs()
        books = [
            Book(title='1', author='a', year_published=0),
            Book(title='2', author='a', year_published=0)
        ]
        base.load(books[0])
        base.load(books[1])

        copied_base = copy.deepcopy(base)

        self.assertEqual(base.to_array(), list([i for i in copied_base]))
Esempio n. 3
0
    def test_iter(self):
        base = AlwaysIncludeBaseTaggedDocs()
        books = [
            Book(title='1', author='a', year_published=0),
            Book(title='2', author='a', year_published=0)
        ]
        base.load(books[0])
        base.load(books[1])

        for idx, b in enumerate(base):
            if idx == 0:
                self.assertEqual(b.tags, ['1_0'])
            else:
                self.assertEqual(b.tags, ['2_0'])
Esempio n. 4
0
 def test_is_genre(self):
     tags = Tags()
     tags.append(Tag(name='something'))
     tags.append(Tag(name='something else'))
     book = Book(title='t', author='a', year_published=1, tags=tags)
     self.assertFalse(book.is_genre('som'))
     self.assertTrue(book.is_genre('something'))
Esempio n. 5
0
 def test_get_lexical_diversity_dictionary(self):
     book = Book(title='t',
                 author='a',
                 year_published=1,
                 content_path='/tmp/mauve_tok')
     self.assertEquals(
         1 / 2., book.get_lexical_diversity(only_dictionary_words=True))
Esempio n. 6
0
def iter_books(source='goodreads') -> Iterator:
    """

    :kwarg source:
    :kwarg: the v of tokens to get from
    :return: generator of book objects
    """
    from mauve.models.books.book import Book
    for book_meta in get_metadata(source=source):
        content_path = os.path.join(TEXT_PATH, book_meta['original_filename'])

        genres = book_meta.get('genres', [])

        tags = Tags()
        for genre in genres:
            tags.append(Tag(name=genre))

        book = Book(title=book_meta.get('book_title', None),
                    isbn=book_meta.get('isbn', None),
                    isbn13=book_meta.get('isbn13', None),
                    year_published=book_meta.get('year_first_published', None),
                    author=book_meta.get('author', None),
                    avg_rating=book_meta.get('average_rating', None),
                    tags=tags,
                    num_ratings=book_meta.get('num_ratings', None),
                    content_path=content_path)

        yield book
Esempio n. 7
0
 def test_load(self):
     base = AlwaysIncludeBaseTaggedDocs()
     self.assertEqual(base.items, [])
     self.assertEqual(base.num_items, 0)
     book = Book(title='t', author='a', year_published=0)
     base.load(book)
     self.assertEqual(base.items, [book])
     self.assertEqual(base.num_items, 1)
Esempio n. 8
0
 def test_get_token_type_score(self):
     book = Book(title='t',
                 author='a',
                 year_published=1,
                 content_path='/tmp/mauve_tok')
     self.assertAlmostEqual((2 / 3.) * 10000,
                            book.get_token_type_score('adjective'))
     self.assertAlmostEqual((1 / 3.) * 10000,
                            book.get_token_type_score('verb'))
Esempio n. 9
0
 def test_set_reviews(self):
     book = Book(title='t',
                 author='Arthur',
                 year_published=2020,
                 num_ratings=100,
                 reviews=Reviews(data=[Review(user='******', score=5)]))
     self.assertEqual(book.reviews.serialize(), [{
         'score': 5,
         'user': '******'
     }])
Esempio n. 10
0
 def test_sentences_tokens(self):
     book = Book(title='t',
                 author='a',
                 year_published=1,
                 content_path='/tmp/mauve_tok')
     self.assertEquals(book.sentences_tokens,
                       [[('I', 'PRP'), ("am", 'VBP'), ('a', 'DT'),
                         ('little', 'JJ'), ('teapot', 'NN'), ('.', '.')],
                        [('Really', 'RB'), (',', ','), ('I', 'PRP'),
                         ('am', 'VBP'),
                         ('!', '.')], [('Right', 'NNP'), ('?', '.')]])
Esempio n. 11
0
    def test_serialize(self):
        # NOTE: This will probably change a fair bit over time but just
        #       want to be aware if it changes
        book = Book(title='t',
                    author='Arthur',
                    year_published=2020,
                    num_ratings=100,
                    content_path='/tmp/mauve_tok/NOPE___AUTHOR___TITLE.txt')

        self.assertEqual(
            book.serialize(), {
                'analysis_version': int(constants.ANALYSIS_VERSION),
                'author_similarity': False,
                'title': 't',
                'author': 'Arthur',
                'author_gender': 'male',
                'year_published': 2020,
                'publisher': None,
                'isbn': None,
                'isbn13': None,
                'subtitle': None,
                'avg_rating': None,
                'author_nationality': None,
                'author_birth_year': None,
                'num_ratings': 100,
                'tags': [],
                'reviews': [],
                'word_count': 3,
                'lexical_diversity': 1.0,
                'avg_word_len': 2.6666666666666665,
                'profanity_score': 0,
                'avg_sentence_word_len': 3,
                'avg_sentence_char_len': 10,
                'adverb_score': 0.0,
                'interjection_score': 0.0,
                'adjective_score': 3333.3333333333335,
                'top_adjectives': {
                    'blue': 1
                },
                'top_nouns': {},
                'top_verbs': {
                    'is': 1
                },
                'flesch_reading_ease_score': 119.19,
                'reading_difficulty': 0,
                'reading_time': 0.7201152184349495,
                'sentiment': 0,
                'cliche_score': 0
            })
Esempio n. 12
0
    def test_author_similarity(self):
        book = Book(title='t',
                    author='Author',
                    year_published=1,
                    content_path='/tmp/mauve/isbn___Author___title.txt')
        self.assertTrue(book.author_similarity)

        book = Book(title='t',
                    author='Author',
                    year_published=1,
                    content_path='/tmp/mauve/isbn___Author M___title.txt')
        self.assertTrue(book.author_similarity)

        book = Book(title='t',
                    author='Author',
                    year_published=1,
                    content_path='/tmp/mauve/isbn___Author M___title.txt')
        self.assertTrue(book.author_similarity)

        book = Book(title='t',
                    author='Arthor',
                    year_published=1,
                    content_path='/tmp/mauve/isbn___Author___title.txt')
        self.assertFalse(book.author_similarity)
Esempio n. 13
0
    def test_compress_file(self):
        book = Book(title='t',
                    author='a',
                    year_published=0,
                    content_path=self.clean_epub_1)
        book.all_tokens
        book.word_tokens

        compress_file(book.all_tokens_pickle_path)
        self.assertTrue(os.path.exists(book.all_tokens_pickle_path + '.bz'))

        content_pickle = get_file_content(book.all_tokens_pickle_path)
        content_bz = get_file_content(book.all_tokens_pickle_path + '.bz')

        self.assertEqual(content_pickle, content_bz)

        with self.assertRaises(NotImplementedError):
            compress_file(self.text_path_1)
Esempio n. 14
0
    def test_compress_files(self):
        book = Book(title='t',
                    author='a',
                    year_published=0,
                    content_path=self.clean_epub_1)
        book.all_tokens
        book.word_tokens

        content_pickle = get_file_content(book.word_tokens_pickle_path)

        compress(num_processes=1)

        # make sure files deleted and bz created

        self.assertTrue(os.path.exists(book.word_tokens_pickle_path + '.bz'))
        self.assertFalse(os.path.exists(book.word_tokens_pickle_path))

        content_bz = get_file_content(book.word_tokens_pickle_path + '.bz')

        self.assertEqual(content_pickle, content_bz)
Esempio n. 15
0
 def test_get_top_adjectives(self):
     book = Book(title='t',
                 author='a',
                 year_published=1,
                 content_path='/tmp/mauve_tok')
     self.assertEquals(book.get_top_adjectives(10), {'happy': 2, 'big': 1})
Esempio n. 16
0
 def test_get_top_verbs(self):
     book = Book(title='t',
                 author='a',
                 year_published=1,
                 content_path='/tmp/mauve_tok')
     self.assertEquals(book.get_top_verbs(10), {'go': 2, 'run': 1})
Esempio n. 17
0
 def test_get_lexical_diversity_2(self):
     book = Book(title='t',
                 author='a',
                 year_published=1,
                 content_path='/tmp/mauve_tok')
     self.assertEquals(1 / 3., book.get_lexical_diversity())
Esempio n. 18
0
 def test_get_profanity_score_3(self):
     book = Book(title='t',
                 author='a',
                 year_published=1,
                 content_path='/tmp/mauve_tok')
     self.assertAlmostEqual((2 / 3.) * 10000, book.get_profanity_score())
Esempio n. 19
0
 def test_get_top_nouns(self):
     book = Book(title='t',
                 author='a',
                 year_published=1,
                 content_path='/tmp/mauve_tok')
     self.assertEquals(book.get_top_nouns(10), {'pencil': 2, 'house': 1})
Esempio n. 20
0
 def test_adjectives(self):
     book = Book(title='t',
                 author='a',
                 year_published=1,
                 content_path='/tmp/mauve_tok')
     self.assertEquals(book.adjectives, ['blue'])
Esempio n. 21
0
 def test_adverbs(self):
     book = Book(title='t',
                 author='a',
                 year_published=1,
                 content_path='/tmp/mauve_tok')
     self.assertEquals(book.adverbs, ['quietly'])