Python blanklineの例

プログラミング言語: Python

名前空間/パッケージ名: nodebox_linguistics_extended.parser.nltk_lite.tokenize

メソッド/関数: blankline

hotexamples.comのコード掲載数: 7

Python blankline - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのnodebox_linguistics_extended.parser.nltk_lite.tokenize.blanklineの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: conll2000.py プロジェクト: seanfcarroll/nodebox_linguistics_extended

def chunked(files=items, chunk_types=('NP', )):
    if type(files) is str: files = (files, )
    for file in files:
        path = os.path.join(get_basedir(), "conll2000", file + ".txt")
        s = open(path).read()
        for sent in tokenize.blankline(s):
            yield tree.conll_chunk(sent, chunk_types)

コード例 #2

ファイルを表示

ファイル: regexp.py プロジェクト: srinivas365/nodebox_linguistics_extended

def demo():
    """
    A demonstration that shows the output of several different
    tokenizers on the same string.
    """

    from nodebox_linguistics_extended.parser.nltk_lite import tokenize

    # Define the test string.
    s = "Good muffins cost $3.88\nin New York.  Please buy me\ntwo of them.\n\nThanks."
    print 'Input text:'
    print ` s `
    print
    print 'Tokenize using whitespace:'
    _display(tokenize.whitespace(s))
    print
    print 'Tokenize sequences of alphanumeric characters:'
    _display(tokenize.regexp(s, pattern=r'\w+', gaps=False))
    print
    print 'Tokenize sequences of letters and sequences of nonletters:'
    _display(tokenize.wordpunct(s))
    print
    print 'Tokenize by lines:'
    _display(tokenize.line(s))
    print
    print 'Tokenize by blank lines:'
    _display(tokenize.blankline(s))
    print
    print 'A simple sentence tokenizer:'
    _display(tokenize.regexp(s, pattern=r'\.(\s+|$)', gaps=True))
    print

コード例 #3

ファイルを表示

ファイル: conll2000.py プロジェクト: seanfcarroll/nodebox_linguistics_extended

def tagged(files=items):
    if type(files) is str: files = (files, )
    for file in files:
        path = os.path.join(get_basedir(), "conll2000", file + ".txt")
        s = open(path).read()
        for sent in tokenize.blankline(s):
            yield [(word, tag) for (word, tag, chunk) in _list_sent(sent)]

コード例 #4

ファイルを表示

ファイル: brown.py プロジェクト: srinivas365/nodebox_linguistics_extended

def _read(files, conversion_function):
    if type(files) is str: files = (files, )

    for file in files:
        path = os.path.join(get_basedir(), "brown", file)
        f = open(path).read()
        for sent in tokenize.blankline(f):
            yield conversion_function(sent)

コード例 #5

ファイルを表示

def chunked(files='chunked'):
    """
    @param files: One or more treebank files to be processed
    @type files: L{string} or L{tuple(string)}
    @rtype: iterator over L{tree}
    """

    # Just one file to process?  If so convert to a tuple so we can iterate
    if type(files) is str: files = (files, )

    for file in files:
        path = os.path.join(get_basedir(), "treebank", file)
        s = open(path).read()
        for t in tokenize.blankline(s):
            yield tree.chunk(t)

コード例 #6

ファイルを表示

ファイル: ycoe.py プロジェクト: seanfcarroll/nodebox_linguistics_extended

def _read(files, conversion_function):
    if type(files) is str: files = (files, )

    for file in files:
        path = os.path.join(get_basedir(), "ycoe/pos", file)
        f = open(path).read()
        rx_pattern = re.compile(
            r"""
                <.*>_CODE
                |\s.*_ID
        """, re.VERBOSE | re.UNICODE)
        mySents = tokenize.blankline(f)
        for sent in mySents:
            sent = re.sub(rx_pattern, '', sent)
            if sent != "":
                yield conversion_function(sent, sep="_")

コード例 #7

ファイルを表示

def raw(files='raw'):
    """
    @param files: One or more treebank files to be processed
    @type files: L{string} or L{tuple(string)}
    @rtype: iterator over L{list(string)}
    """

    # Just one file to process?  If so convert to a tuple so we can iterate
    if type(files) is str: files = (files, )

    for file in files:
        path = os.path.join(get_basedir(), "treebank", file)
        f = open(path).read()
        for sent in tokenize.blankline(f):
            l = []
            for t in tokenize.whitespace(sent):
                l.append(t)
            yield l