def __init__(self, *args, **kwargs): super(LiteralFunctionsTest, self).__init__(*args, **kwargs) self.exclude_re = '.*equation.*|.*eqn.*' self.stemmer = hrefliterals.Stemmer() self.words = hrefliterals.WordsDict('/usr/share/dict/words', 4) self.source = r""" \begin{document}words 1 word2 wo% rd \begin{equation}word\end{equation} \end{document} """ self.document = hrefliterals.parseDocument( 'f', StringIO.StringIO(self.source), os.getcwd())
def __init__(self, *args, **kwargs): super(LiteralFunctionsTest, self).__init__(*args, **kwargs) self.exclude_re = '.*equation.*|.*eqn.*' self.stemmer = hrefliterals.Stemmer() self.words = hrefliterals.WordsDict( '/usr/share/dict/words', 4) self.source = r""" \begin{document}words 1 word2 wo% rd \begin{equation}word\end{equation} \end{document} """ self.document = hrefliterals.parseDocument( 'f', StringIO.StringIO(self.source), os.getcwd())
def findLiterals(self, source, literals, notLiterals): document = hrefliterals.parseDocument('f', StringIO.StringIO(source), os.getcwd()) textTags = hrefliterals.extractTextInfo(document, self.exclude_re, '') return hrefliterals.findLiterals(textTags['f'], literals, notLiterals, self.words, self.stemmer, 0)