Exemple #1
0
 def __init__(self, headercoef=2.5):
     """
     Constructor.
     @param headercoef: lower border of elemet's visibility to be handled as header
     """
     self.generalizer = _RRSPropertyGeneralizer()
     self.ee = EntityExtractor()
     self.headercoef = headercoef
     self.bibtexparser = BibTeXParser()
     self.crawler = Crawler()
     self.mime_handler = MIMEhandler()
     self.crawler.set_handler(FileDownloader)
Exemple #2
0
 def __init__(self, xmlcompatibility='db09'):
     self.seqwrapper = HTMLSequenceWrapper(childcoef=7.0,
                                           headercoef=3.0,
                                           mintextlen=30)
     self.citaextractor = CitationEntityExtractor(
         ALL, xmlcompatibility=xmlcompatibility)
     self.ee = EntityExtractor()
     self.mime = MIMEhandler()
     self.crawler = Crawler()
     self.bibtex = BibTeXParser()
     self.xmlcompatibility = xmlcompatibility
     self._xmlvalid = int(xmlcompatibility.lstrip('db'))
     self._publ_list = []