Python sublexicalize 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: experiment_support.preprocessing

메소드/함수: sublexicalize

hotexamples.com에서의 예제들: 9

Python sublexicalize - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 experiment_support.preprocessing.sublexicalize에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: test_preprocessing.py 프로젝트: andrely/sublexical-features

    def test_sublexicalize(self):
        result = sublexicalize("abc def ghi", order=3)
        self.assertEquals(result, "abc bc_ c_d _de def ef_ f_g _gh ghi")

        result = sublexicalize("abc def ghi", order=4)
        self.assertEquals(result, "abc_ bc_d c_de _def def_ ef_g f_gh _ghi")

        result = sublexicalize("abc def ghi", order=(3, 4))
        self.assertEquals(result, "abc bc_ abc_ c_d bc_d _de c_de def _def ef_ def_ f_g ef_g _gh f_gh ghi _ghi")

예제 #2

파일 보기

파일: test_preprocessing.py 프로젝트: johndpope/sublexical-features

    def test_sublexicalize(self):
        result = sublexicalize("abc def ghi", order=3)
        self.assertEquals(result, "abc bc_ c_d _de def ef_ f_g _gh ghi")

        result = sublexicalize("abc def ghi", order=4)
        self.assertEquals(result, "abc_ bc_d c_de _def def_ ef_g f_gh _ghi")

        result = sublexicalize("abc def ghi", order=(3, 4))
        self.assertEquals(
            result,
            "abc bc_ abc_ c_d bc_d _de c_de def _def ef_ def_ f_g ef_g _gh f_gh ghi _ghi"
        )

예제 #3

파일 보기

파일: experiment_runner.py 프로젝트: andrely/sublexical-features

def process(args):
    text, clean_func, order = args

    text = ' '.join(text)

    if clean_func:
        text = clean_func(text)

    return sublexicalize(text, order=order, join=False)

예제 #4

파일 보기

파일: experiment_runner.py 프로젝트: johndpope/sublexical-features

def process(args):
    text, clean_func, order = args

    text = ' '.join(text)

    if clean_func:
        text = clean_func(text)

    return sublexicalize(text, order=order, join=False)

예제 #5

파일 보기

def main():
    parser = ArgumentParser()
    parser.add_argument('-e', '--encoding')
    parser.add_argument('-o', '--order', default="3")
    args = parser.parse_args()

    encoding = args.encoding
    order = parse_ngram_order(args.order)

    if encoding:
        sys.stdout=codecs.getwriter(encoding)(sys.stdout)
        sys.stdin=codecs.getreader(encoding)(sys.stdin)

    for text in sys.stdin:
            text = sublexicalize(text, order=order)

            sys.stdout.write(text)
            sys.stdout.write('\n')

예제 #6

파일 보기

파일: experiment_runner.py 프로젝트: andrely/sublexical-features

def clean_c6(text_str):
    return sublexicalize(mahoney_clean(text_str), order=6)

예제 #7

파일 보기

from optparse import OptionParser
import os
import re
import sys

cur_path, _ = os.path.split(__file__)
sys.path.append(os.path.join(cur_path, '..', 'Experiments'))

from experiment_support.preprocessing import sublexicalize

BUF_SIZE = 8192

if __name__ == '__main__':
    parser = OptionParser()
    parser.add_option("-n", "--ngram-order", default=3)
    opts, args = parser.parse_args()

    order = int(opts.ngram_order)

    in_str = sys.stdin.read(BUF_SIZE)
    rest_str = ""

    while len(in_str) > 0:
        out_str = sublexicalize(rest_str + in_str.rstrip('\n'), order=order)
        rest_str = re.sub('_', ' ', out_str[-(order - 1):])

        sys.stdout.write(out_str + " ")

        in_str = sys.stdin.read(BUF_SIZE)

예제 #8

파일 보기

파일: sublexicalize.py 프로젝트: andrely/sublexical-features

from optparse import OptionParser
import os
import re
import sys

cur_path, _ = os.path.split(__file__)
sys.path.append(os.path.join(cur_path, '..', 'Experiments'))

from experiment_support.preprocessing import sublexicalize

BUF_SIZE = 8192

if __name__ == '__main__':
    parser = OptionParser()
    parser.add_option("-n", "--ngram-order", default=3)
    opts, args = parser.parse_args()

    order = int(opts.ngram_order)

    in_str = sys.stdin.read(BUF_SIZE)
    rest_str = ""

    while len(in_str) > 0:
        out_str = sublexicalize(rest_str + in_str.rstrip('\n'), order=order)
        rest_str = re.sub('_', ' ', out_str[-(order-1):])

        sys.stdout.write(out_str + " ")

        in_str = sys.stdin.read(BUF_SIZE)

예제 #9

파일 보기

파일: experiment_runner.py 프로젝트: johndpope/sublexical-features

def clean_c6(text_str):
    return sublexicalize(mahoney_clean(text_str), order=6)