Ejemplo n.º 1
0
    def __init__(self, install_dir='.'):
        if TermExtraction.__single is not None:
            raise RuntimeError("TermExtraction is singleton")
        TermExtraction.__single = self

        filterfn = os.path.join(install_dir, LIBRARYNAME, 'Core', 'Tag',
                                'stop_snowball.filter')
        self.stopwords_filter = StopwordsFilter(stopwordsfilename=filterfn)

        self.containsdigits_filter = re.compile(r'\d', re.UNICODE)
        self.alldigits_filter = re.compile(r'^\d*$', re.UNICODE)
        self.isepisode_filter = re.compile(r'^s\d{2}e\d{2}', re.UNICODE)

        self.domain_terms = set('www net com org'.split())
Ejemplo n.º 2
0
 def __init__(self):
     if TermExtraction.__single is not None:
         raise RuntimeError, "TermExtraction is singleton"
     TermExtraction.__single = self
     
     from Tribler.Core.Session import Session
     session = Session.get_instance()
     
     filterfn = os.path.join(session.get_install_dir(),LIBRARYNAME,'Core','Tag','stop_snowball.filter')
     self.stopwords_filter = StopwordsFilter(stopwordsfilename=filterfn)
     
     self.containsdigits_filter = re.compile(r'\d',re.UNICODE)
     self.alldigits_filter = re.compile(r'^\d*$',re.UNICODE)
     self.isepisode_filter = re.compile(r'^s\d{2}e\d{2}',re.UNICODE)
     
     self.domain_terms = set('www net com org'.split())