def __init__(self, headercoef=2.5): """ Constructor. @param headercoef: lower border of elemet's visibility to be handled as header """ self.generalizer = _RRSPropertyGeneralizer() self.ee = EntityExtractor() self.headercoef = headercoef self.bibtexparser = BibTeXParser() self.crawler = Crawler() self.mime_handler = MIMEhandler() self.crawler.set_handler(FileDownloader)
def __init__(self, xmlcompatibility='db09'): self.seqwrapper = HTMLSequenceWrapper(childcoef=7.0, headercoef=3.0, mintextlen=30) self.citaextractor = CitationEntityExtractor( ALL, xmlcompatibility=xmlcompatibility) self.ee = EntityExtractor() self.mime = MIMEhandler() self.crawler = Crawler() self.bibtex = BibTeXParser() self.xmlcompatibility = xmlcompatibility self._xmlvalid = int(xmlcompatibility.lstrip('db')) self._publ_list = []