def test_safe_to_use(self): book = Book(title='t', author='Author', year_published=2020, num_ratings=100, content_path='/tmp/mauve/isbn___Author___title.txt') self.assertTrue(book.safe_to_use) book = Book(title='t', author='Author', year_published=2020, num_ratings=0, content_path='/tmp/mauve/isbn___Author___title.txt') self.assertFalse(book.safe_to_use) book = Book(title='t', author='Author', year_published=1700, num_ratings=100, content_path='/tmp/mauve/isbn___Author___title.txt') self.assertFalse(book.safe_to_use) book = Book(title='t', author='Arthor', year_published=2020, num_ratings=100, content_path='/tmp/mauve/isbn___Author___title.txt') self.assertFalse(book.safe_to_use)
def test_to_array(self): base = AlwaysIncludeBaseTaggedDocs() books = [ Book(title='1', author='a', year_published=0), Book(title='2', author='a', year_published=0) ] base.load(books[0]) base.load(books[1]) copied_base = copy.deepcopy(base) self.assertEqual(base.to_array(), list([i for i in copied_base]))
def test_iter(self): base = AlwaysIncludeBaseTaggedDocs() books = [ Book(title='1', author='a', year_published=0), Book(title='2', author='a', year_published=0) ] base.load(books[0]) base.load(books[1]) for idx, b in enumerate(base): if idx == 0: self.assertEqual(b.tags, ['1_0']) else: self.assertEqual(b.tags, ['2_0'])
def test_is_genre(self): tags = Tags() tags.append(Tag(name='something')) tags.append(Tag(name='something else')) book = Book(title='t', author='a', year_published=1, tags=tags) self.assertFalse(book.is_genre('som')) self.assertTrue(book.is_genre('something'))
def test_get_lexical_diversity_dictionary(self): book = Book(title='t', author='a', year_published=1, content_path='/tmp/mauve_tok') self.assertEquals( 1 / 2., book.get_lexical_diversity(only_dictionary_words=True))
def iter_books(source='goodreads') -> Iterator: """ :kwarg source: :kwarg: the v of tokens to get from :return: generator of book objects """ from mauve.models.books.book import Book for book_meta in get_metadata(source=source): content_path = os.path.join(TEXT_PATH, book_meta['original_filename']) genres = book_meta.get('genres', []) tags = Tags() for genre in genres: tags.append(Tag(name=genre)) book = Book(title=book_meta.get('book_title', None), isbn=book_meta.get('isbn', None), isbn13=book_meta.get('isbn13', None), year_published=book_meta.get('year_first_published', None), author=book_meta.get('author', None), avg_rating=book_meta.get('average_rating', None), tags=tags, num_ratings=book_meta.get('num_ratings', None), content_path=content_path) yield book
def test_load(self): base = AlwaysIncludeBaseTaggedDocs() self.assertEqual(base.items, []) self.assertEqual(base.num_items, 0) book = Book(title='t', author='a', year_published=0) base.load(book) self.assertEqual(base.items, [book]) self.assertEqual(base.num_items, 1)
def test_get_token_type_score(self): book = Book(title='t', author='a', year_published=1, content_path='/tmp/mauve_tok') self.assertAlmostEqual((2 / 3.) * 10000, book.get_token_type_score('adjective')) self.assertAlmostEqual((1 / 3.) * 10000, book.get_token_type_score('verb'))
def test_set_reviews(self): book = Book(title='t', author='Arthur', year_published=2020, num_ratings=100, reviews=Reviews(data=[Review(user='******', score=5)])) self.assertEqual(book.reviews.serialize(), [{ 'score': 5, 'user': '******' }])
def test_sentences_tokens(self): book = Book(title='t', author='a', year_published=1, content_path='/tmp/mauve_tok') self.assertEquals(book.sentences_tokens, [[('I', 'PRP'), ("am", 'VBP'), ('a', 'DT'), ('little', 'JJ'), ('teapot', 'NN'), ('.', '.')], [('Really', 'RB'), (',', ','), ('I', 'PRP'), ('am', 'VBP'), ('!', '.')], [('Right', 'NNP'), ('?', '.')]])
def test_serialize(self): # NOTE: This will probably change a fair bit over time but just # want to be aware if it changes book = Book(title='t', author='Arthur', year_published=2020, num_ratings=100, content_path='/tmp/mauve_tok/NOPE___AUTHOR___TITLE.txt') self.assertEqual( book.serialize(), { 'analysis_version': int(constants.ANALYSIS_VERSION), 'author_similarity': False, 'title': 't', 'author': 'Arthur', 'author_gender': 'male', 'year_published': 2020, 'publisher': None, 'isbn': None, 'isbn13': None, 'subtitle': None, 'avg_rating': None, 'author_nationality': None, 'author_birth_year': None, 'num_ratings': 100, 'tags': [], 'reviews': [], 'word_count': 3, 'lexical_diversity': 1.0, 'avg_word_len': 2.6666666666666665, 'profanity_score': 0, 'avg_sentence_word_len': 3, 'avg_sentence_char_len': 10, 'adverb_score': 0.0, 'interjection_score': 0.0, 'adjective_score': 3333.3333333333335, 'top_adjectives': { 'blue': 1 }, 'top_nouns': {}, 'top_verbs': { 'is': 1 }, 'flesch_reading_ease_score': 119.19, 'reading_difficulty': 0, 'reading_time': 0.7201152184349495, 'sentiment': 0, 'cliche_score': 0 })
def test_author_similarity(self): book = Book(title='t', author='Author', year_published=1, content_path='/tmp/mauve/isbn___Author___title.txt') self.assertTrue(book.author_similarity) book = Book(title='t', author='Author', year_published=1, content_path='/tmp/mauve/isbn___Author M___title.txt') self.assertTrue(book.author_similarity) book = Book(title='t', author='Author', year_published=1, content_path='/tmp/mauve/isbn___Author M___title.txt') self.assertTrue(book.author_similarity) book = Book(title='t', author='Arthor', year_published=1, content_path='/tmp/mauve/isbn___Author___title.txt') self.assertFalse(book.author_similarity)
def test_compress_file(self): book = Book(title='t', author='a', year_published=0, content_path=self.clean_epub_1) book.all_tokens book.word_tokens compress_file(book.all_tokens_pickle_path) self.assertTrue(os.path.exists(book.all_tokens_pickle_path + '.bz')) content_pickle = get_file_content(book.all_tokens_pickle_path) content_bz = get_file_content(book.all_tokens_pickle_path + '.bz') self.assertEqual(content_pickle, content_bz) with self.assertRaises(NotImplementedError): compress_file(self.text_path_1)
def test_compress_files(self): book = Book(title='t', author='a', year_published=0, content_path=self.clean_epub_1) book.all_tokens book.word_tokens content_pickle = get_file_content(book.word_tokens_pickle_path) compress(num_processes=1) # make sure files deleted and bz created self.assertTrue(os.path.exists(book.word_tokens_pickle_path + '.bz')) self.assertFalse(os.path.exists(book.word_tokens_pickle_path)) content_bz = get_file_content(book.word_tokens_pickle_path + '.bz') self.assertEqual(content_pickle, content_bz)
def test_get_top_adjectives(self): book = Book(title='t', author='a', year_published=1, content_path='/tmp/mauve_tok') self.assertEquals(book.get_top_adjectives(10), {'happy': 2, 'big': 1})
def test_get_top_verbs(self): book = Book(title='t', author='a', year_published=1, content_path='/tmp/mauve_tok') self.assertEquals(book.get_top_verbs(10), {'go': 2, 'run': 1})
def test_get_lexical_diversity_2(self): book = Book(title='t', author='a', year_published=1, content_path='/tmp/mauve_tok') self.assertEquals(1 / 3., book.get_lexical_diversity())
def test_get_profanity_score_3(self): book = Book(title='t', author='a', year_published=1, content_path='/tmp/mauve_tok') self.assertAlmostEqual((2 / 3.) * 10000, book.get_profanity_score())
def test_get_top_nouns(self): book = Book(title='t', author='a', year_published=1, content_path='/tmp/mauve_tok') self.assertEquals(book.get_top_nouns(10), {'pencil': 2, 'house': 1})
def test_adjectives(self): book = Book(title='t', author='a', year_published=1, content_path='/tmp/mauve_tok') self.assertEquals(book.adjectives, ['blue'])
def test_adverbs(self): book = Book(title='t', author='a', year_published=1, content_path='/tmp/mauve_tok') self.assertEquals(book.adverbs, ['quietly'])