Python Sandhisplitter.split 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: sandhisplitter

클래스/타입: Sandhisplitter

메소드/함수: split

hotexamples.com에서의 예제들: 4

Python Sandhisplitter.split - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 sandhisplitter.Sandhisplitter.split에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Sandhisplitter(3)

split(3)

get_info(1)

get_module_name(1)

join(1)

set_model(1)

예제 #1

파일 보기

class TestSandhisplitter(TestCase):
    def setUp(self):
        super(TestSandhisplitter, self).setUp()
        self.model = Model(depth=3, skip=1)
        self.SS = Sandhisplitter()
        testcases = resource_filename("sandhisplitter.tests",
                                      "resources/samples.txt")
        self.entries = open(testcases, "r", encoding='utf-8')

    def test_splits(self):
        count = 0
        entries = map(lambda x: x.strip(), self.entries.readlines())
        for line in entries:
            count += 1
            (word, splits, locs) = extract(line)
            self.model.add_entry(word, splits, locs)
        m = self.model.serialize()
        self.SS.set_model(m)
        for line in entries:
            (word, splits, locs) = extract(line)
            obtained, pos = self.SS.split(word)
            self.assertEqual(locs, pos)
            self.assertEqual(splits, obtained)

    def test_details(self):
        self.assertEqual(self.SS.get_module_name(), "Sandhi-Splitter")
        self.assertEqual(self.SS.get_info(), "Sandhi-splitter for malayalam")

    def test_instance(self):
        self.assertEqual(isinstance(getInstance(), Sandhisplitter), True)

예제 #2

파일 보기

def sandhi_split(token_words):
 #print("\n Splitted using Sandhi!!\n--------------------------")   
 temp=[]
 s = Sandhisplitter()
 for word in token_words:
    ss=s.split(word)
    out=ss[0]
    temp.append(out)
    print(out)
 return temp

예제 #3

파일 보기

class Malayalam(BaseMalayalam, object):
    def __init__(self):
        super(Malayalam, self).__init__()
        # Let's give the spellchecker a boost.
        self.sandhi = Sandhisplitter()

    def check(self, word):
        # Trivial case, word is in corpus
        if super(Malayalam, self).check(word):
            return True

        # Sandhisplitter additions
        # Check for each split word if word exists in corpus
        # Increases True Positives, Reduces False Negatives
        words, splits = self.sandhi.split(word)
        for w in words:
            if not super(Malayalam, self).check(w):
                return False
        return True

    def suggest(self, word, n=5):
        # Start with bases suggestions
        suggestions = super(Malayalam, self).suggest(word, n)

        # Sandhisplitter additions
        words, splits = self.sandhi.split(word)
        corrections = []
        for w in words:
            # Word in dictionary
            if super(Malayalam, self).check(w):
                corrections.append([w])
            # Word not in dictionary
            else:
                corrections.append(super(Malayalam, self).suggest(w, n))

        # Cross product to get all possibilities
        candidates = product(*corrections)

        # Apply joiner on possibile tuples.
        for group in candidates:
            joined = self.sandhi.join(group)
            suggestions.append(joined)

        # Scoring via levenstein, sort by levenshtein
        scores = []
        for suggestion in suggestions:
            score = super(Malayalam,
                          self).levenshtein_distance(suggestion, word)
            scores.append(score)

        paired = list(zip(scores, suggestions))
        paired.sort()
        sorted_suggestions = []
        for (score, suggestion) in paired:
            sorted_suggestions.append(suggestion)

        # Trim off to match n
        if (len(sorted_suggestions) > n):
            sorted_suggestions = sorted_suggestions[:n]

        # And tadaa!!!
        return sorted_suggestions

예제 #4

파일 보기

파일: Malayalam.py 프로젝트: jerinphilip/spellchecker

class Malayalam(BaseMalayalam, object):

    def __init__(self):
        super(Malayalam, self).__init__()
        # Let's give the spellchecker a boost.
        self.sandhi = Sandhisplitter()

    def check(self, word):
        # Trivial case, word is in corpus
        if super(Malayalam, self).check(word):
            return True

        # Sandhisplitter additions
        # Check for each split word if word exists in corpus
        # Increases True Positives, Reduces False Negatives
        words, splits = self.sandhi.split(word)
        for w in words:
            if not super(Malayalam, self).check(w):
                return False
        return True

    def suggest(self, word, n=5):
        # Start with bases suggestions
        suggestions = super(Malayalam, self).suggest(word, n)

        # Sandhisplitter additions
        words, splits = self.sandhi.split(word)
        corrections = []
        for w in words:
            # Word in dictionary
            if super(Malayalam, self).check(w):
                corrections.append([w])
            # Word not in dictionary
            else:
                corrections.append(super(Malayalam, self).suggest(w, n))

        # Cross product to get all possibilities
        candidates = product(*corrections)

        # Apply joiner on possibile tuples.
        for group in candidates:
            joined = self.sandhi.join(group)
            suggestions.append(joined)

        # Scoring via levenstein, sort by levenshtein
        scores = []
        for suggestion in suggestions:
            score = super(Malayalam, self).levenshtein_distance(
                suggestion, word)
            scores.append(score)

        paired = list(zip(scores, suggestions))
        paired.sort()
        sorted_suggestions = []
        for (score, suggestion) in paired:
            sorted_suggestions.append(suggestion)

        # Trim off to match n
        if (len(sorted_suggestions) > n):
            sorted_suggestions = sorted_suggestions[:n]

        # And tadaa!!!
        return sorted_suggestions