Exemple #1
0
def _get_epub_standard_word_count(iterator, lang='en'):
    '''
    This algorithm counts individual words instead of pages
    '''

    book_text = _read_epub_contents(iterator, strip_html=True)

    try:
        from calibre.spell.break_iterator import count_words
        wordcount = count_words(book_text, lang)
        logger.debug('\tWord count - count_words method:%s' % wordcount)
    except:
        try:  # The above method is new and no-one will have it as of 08/01/2016. Use an older method for a beta.
            from calibre.spell.break_iterator import split_into_words_and_positions
            wordcount = len(split_into_words_and_positions(book_text, lang))
            logger.debug(
                '\tWord count - split_into_words_and_positions method:%s' %
                wordcount)
        except:
            from calibre.utils.wordcount import get_wordcount_obj
            wordcount = get_wordcount_obj(book_text)
            wordcount = wordcount.words
            logger.debug('\tWord count - old method:%s' % wordcount)

    return wordcount
def _get_epub_standard_word_count(iterator):
    '''
    This algorithm counts individual words instead of pages
    '''
    from calibre.utils.wordcount import get_wordcount_obj

    book_text = _read_epub_contents(iterator, strip_html=True)
    wordcount = get_wordcount_obj(book_text)
    return wordcount.words
Exemple #3
0
def _get_epub_standard_word_count(iterator, lang='en'):
    '''
    This algorithm counts individual words instead of pages
    '''

    book_text = _read_epub_contents(iterator, strip_html=True)
    
    try:
        from calibre.spell.break_iterator import count_words
        wordcount = count_words(book_text, lang)
        logger.debug('\tWord count - count_words method:%s'%wordcount)
    except:
        try: # The above method is new and no-one will have it as of 08/01/2016. Use an older method for a beta.
            from calibre.spell.break_iterator import split_into_words_and_positions
            wordcount = len(split_into_words_and_positions(book_text, lang))
            logger.debug('\tWord count - split_into_words_and_positions method:%s'%wordcount)
        except:
            from calibre.utils.wordcount import get_wordcount_obj
            wordcount = get_wordcount_obj(book_text)
            wordcount = wordcount.words
            logger.debug('\tWord count - old method:%s'%wordcount)
    
    return wordcount
Exemple #4
0
 def get_word_count(self, html):
     word_count_text = re.sub(r'(?s)<head[^>]*>.*?</head>', '', html)
     word_count_text = re.sub(r'<[^>]*>', '', word_count_text)
     wordcount = get_wordcount_obj(word_count_text)
     return wordcount.words
Exemple #5
0
 def get_word_count(self, html):
     word_count_text = re.sub(r'(?s)<head[^>]*>.*?</head>', '', html)
     word_count_text = re.sub(r'<[^>]*>', '', word_count_text)
     wordcount = get_wordcount_obj(word_count_text)
     return wordcount.words