Beispiel #1
0
    def __init__(self,
                 tokenizer=None,
                 pos_tagger=None,
                 np_extractor=None,
                 analyzer=None,
                 parser=None,
                 classifier=None):

        self.tokenizer = tokenizer if tokenizer is not None else NLTKPunktTokenizer()
        self.pos_tagger = pos_tagger if pos_tagger is not None \
            else PatternTagger(tokenizer=self.tokenizer)
        self.np_extractor = np_extractor if np_extractor is not None \
            else PatternParserNPExtractor(tokenizer=self.tokenizer)
        self.analyzer = analyzer if analyzer is not None \
            else PatternAnalyzer(tokenizer=self.tokenizer)
        self.parser = parser if parser is not None \
            else PatternParser(tokenizer=self.tokenizer)
        self.classifier = classifier if classifier is not None else None

        _initialize_models(
            self,
            self.tokenizer,
            self.pos_tagger,
            self.np_extractor,
            self.analyzer,
            self.parser,
            self.classifier)
    def __init__(self,
                 tokenizer=None,
                 pos_tagger=None,
                 np_extractor=None,
                 analyzer=None,
                 parser=None,
                 classifier=None):

        self.tokenizer = tokenizer if tokenizer is not None else NLTKPunktTokenizer()
        self.pos_tagger = pos_tagger if pos_tagger is not None \
            else PatternTagger(tokenizer=self.tokenizer)
        self.np_extractor = np_extractor if np_extractor is not None \
            else PatternParserNPExtractor(tokenizer=self.tokenizer)
        self.analyzer = analyzer if analyzer is not None \
            else PatternAnalyzer(tokenizer=self.tokenizer)
        self.parser = parser if parser is not None \
            else PatternParser(tokenizer=self.tokenizer)
        self.classifier = classifier if classifier is not None else None

        _initialize_models(
            self,
            self.tokenizer,
            self.pos_tagger,
            self.np_extractor,
            self.analyzer,
            self.parser,
            self.classifier)
Beispiel #3
0
    def __init__(self,
                 text,
                 tokenizer=None,
                 pos_tagger=None,
                 np_extractor=None,
                 analyzer=None,
                 parser=None,
                 classifier=None,
                 clean_html=False):

        self.tokenizer = tokenizer if tokenizer is not None else NLTKPunktTokenizer(
        )
        self.pos_tagger = pos_tagger if pos_tagger is not None else PatternTagger(
            tokenizer=self.tokenizer)
        self.np_extractor = np_extractor if np_extractor is not None \
            else PatternParserNPExtractor(tokenizer=self.tokenizer)
        self.analyzer = analyzer if analyzer is not None \
            else PatternAnalyzer(tokenizer=self.tokenizer)
        self.parser = parser if parser is not None \
            else PatternParser(tokenizer=self.tokenizer)
        self.classifier = classifier if classifier is not None else None

        if not isinstance(text, basestring):
            raise TypeError('The `text` argument passed to `__init__(text)` '
                            'must be a string, not {0}'.format(type(text)))
        if clean_html:
            raise NotImplementedError(
                "clean_html has been deprecated. "
                "To remove HTML markup, use BeautifulSoup's "
                "get_text() function")
        self.raw = self.string = text
        self.stripped = lowerstrip(self.raw, all=True)
        _initialize_models(self, self.tokenizer, self.pos_tagger,
                           self.np_extractor, self.analyzer, self.parser,
                           self.classifier)
    def __init__(self, text, tokenizer=None,
                 pos_tagger=None,
                 np_extractor=None,
                 analyzer=None,
                 parser=None,
                 classifier=None, clean_html=False):

        self.tokenizer = tokenizer if tokenizer is not None else NLTKPunktTokenizer()
        self.pos_tagger = pos_tagger if pos_tagger is not None else PatternTagger(
            tokenizer=self.tokenizer)
        self.np_extractor = np_extractor if np_extractor is not None \
            else PatternParserNPExtractor(tokenizer=self.tokenizer)
        self.analyzer = analyzer if analyzer is not None \
            else PatternAnalyzer(tokenizer=self.tokenizer)
        self.parser = parser if parser is not None \
            else PatternParser(tokenizer=self.tokenizer)
        self.classifier = classifier if classifier is not None else None

        if not isinstance(text, basestring):
            raise TypeError('The `text` argument passed to `__init__(text)` '
                            'must be a string, not {0}'.format(type(text)))
        if clean_html:
            raise NotImplementedError(
                "clean_html has been deprecated. "
                "To remove HTML markup, use BeautifulSoup's "
                "get_text() function")
        self.raw = self.string = text
        self.stripped = lowerstrip(self.raw, all=True)
        _initialize_models(
            self,
            self.tokenizer,
            self.pos_tagger,
            self.np_extractor,
            self.analyzer,
            self.parser,
            self.classifier)