Python NlpPipeline.sentence_segmentationの例

プログラミング言語: Python

名前空間/パッケージ名: preprocessing.nlp_pipeline

クラス/型: NlpPipeline

メソッド/関数: sentence_segmentation

hotexamples.comのコード掲載数: 2

Python NlpPipeline.sentence_segmentation - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのpreprocessing.nlp_pipeline.NlpPipeline.sentence_segmentationの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

NlpPipeline(7)

parse_text(4)

pos_tag(3)

process_word(1)

sentence_segmentation(1)

コード例 #1

ファイルを表示

class PlaintextParser(AbstractParser):
    def __init__(self, filename):
        super(PlaintextParser, self).__init__(filename)
        if not self.wants_this_file():
            return
        self._init_line_count_progress()
        self.nlp_pipeline = NlpPipeline()

    def _wanted_file_endings(self):
        return (".txt", )

    def parse(self):
        text = Text()

        with open(self.filename, "r") as file_:
            for line_unenc in file_:
                self._progress += 1
                line = unicode(line_unenc.encode('utf8'))
                if line.startswith(TEXT_SEPARATOR):
                    if (len(text.sentences) > 0):
                        yield text
                        text = Text()
                        continue
                sentences = self.nlp_pipeline.sentence_segmentation(line)
                for sentence in sentences:
                    s = Sentence()
                    s.set_sentence_text(sentence)
                    s.set_tokens(self.nlp_pipeline.parse_text(sentence))
                    text.add_sentence(s)
        if (len(text.sentences) > 0):
            yield text

    def progress(self):
        return self._line_count_progress()

コード例 #2

ファイルを表示

ファイル: plaintext_parser.py プロジェクト: anukat2015/sentence-boundary-detection-nn

class PlaintextParser(AbstractParser):
    def __init__(self, filename):
        super(PlaintextParser, self).__init__(filename)
        if not self.wants_this_file():
            return
        self._init_line_count_progress()
        self.nlp_pipeline = NlpPipeline()

    def _wanted_file_endings(self):
        return (".txt",)

    def parse(self):
        text = Text()

        with open(self.filename, "r") as file_:
            for line_unenc in file_:
                self._progress += 1
                line = unicode(line_unenc.encode('utf8'))
                if line.startswith(TEXT_SEPARATOR):
                    if (len(text.sentences) > 0):
                        yield text
                        text = Text()
                        continue
                sentences = self.nlp_pipeline.sentence_segmentation(line)
                for sentence in sentences:
                    s = Sentence()
                    s.set_sentence_text(sentence)
                    s.set_tokens(self.nlp_pipeline.parse_text(sentence))
                    text.add_sentence(s)
        if (len(text.sentences) > 0):
            yield text

    def progress(self):
        return self._line_count_progress()