def __init__(self, install_dir='.'): if TermExtraction.__single is not None: raise RuntimeError("TermExtraction is singleton") TermExtraction.__single = self filterfn = os.path.join(install_dir, LIBRARYNAME, 'Core', 'Tag', 'stop_snowball.filter') self.stopwords_filter = StopwordsFilter(stopwordsfilename=filterfn) self.containsdigits_filter = re.compile(r'\d', re.UNICODE) self.alldigits_filter = re.compile(r'^\d*$', re.UNICODE) self.isepisode_filter = re.compile(r'^s\d{2}e\d{2}', re.UNICODE) self.domain_terms = set('www net com org'.split())
def __init__(self): if TermExtraction.__single is not None: raise RuntimeError, "TermExtraction is singleton" TermExtraction.__single = self from Tribler.Core.Session import Session session = Session.get_instance() filterfn = os.path.join(session.get_install_dir(),LIBRARYNAME,'Core','Tag','stop_snowball.filter') self.stopwords_filter = StopwordsFilter(stopwordsfilename=filterfn) self.containsdigits_filter = re.compile(r'\d',re.UNICODE) self.alldigits_filter = re.compile(r'^\d*$',re.UNICODE) self.isepisode_filter = re.compile(r'^s\d{2}e\d{2}',re.UNICODE) self.domain_terms = set('www net com org'.split())