コード例 #1
0
def get_words_from_random_n_roots(number_of_roots: int = 1000):
    nepali_dictionary = read_csv(helper.get_root_pos_rule_csv_path())
    nepali_dictionary = random.sample(nepali_dictionary, number_of_roots)
    for d in nepali_dictionary:
        word = d.get('word').strip()
        rules = filter(None, d.get('rules').strip().split(','))
        if not rules:
            continue
        get_words(list(rules), [(word, list(rules))])
コード例 #2
0
 def _make_dict_trie():
     trie = Trie()
     csv_path = get_root_pos_rule_csv_path()
     root_data = read_csv(csv_path)
     for d in root_data:
         word = d.get('word').strip()
         rules = list(filter(None, d.get('rules').strip().split(',')))
         trie.insert(word, rules)
     return trie
コード例 #3
0
def make_dict_trie() -> TrieNode:
    root = TrieNode('*')
    csv_path = helper.get_root_pos_rule_csv_path()
    root_data = read_csv(csv_path)
    for d in root_data:
        word = d.get('word').strip()
        pos = d.get('pos').strip() or None
        rules = list(filter(None, d.get('rules').strip().split(',')))
        add(root, word, rules, pos)
    return root
コード例 #4
0
def get_rules_for_words_starting_with(
        starting: str, full_word: str) -> Tuple[Set, List[Tuple[str, List]]]:
    """
    get all words starting with a letter along with their rules for word formation
    :param starting: staring letter of word to be lemmatized, स
    :param full_word: word to be lemmatized; सम्बन्धमा
    :return: set of rules for all words founds, list of (word, rules)
    """
    rules_only = set()
    words = list()
    csv_path = helper.get_root_pos_rule_csv_path()
    root_data = read_csv(csv_path)
    for d in root_data:
        word = d.get('word').strip()
        rules = list(filter(None, d.get('rules').strip().split(',')))
        if rules and word.startswith(starting) and len(word) <= len(full_word):
            rules_only = rules_only.union(set(rules))
            words.append((word, rules))
    return rules_only, words
コード例 #5
0
ファイル: trie_v2.py プロジェクト: dpakpdl/NepaliLemmatizer
    def insert(self, word):
        node = self
        for letter in word:
            if letter not in node.children:
                node.children[letter] = TrieNode()

            node = node.children[letter]

        node.word = word


# read dictionary file into a trie
trie = TrieNode()

for d in read_csv(get_root_pos_rule_csv_path()):
    WordCount += 1
    trie.insert(d.get('word').strip())

print("Read %d words into %d nodes" % (WordCount, NodeCount))


# The search function returns a list of all words that are less than the given
# maximum distance from the target word
def search(word, max_cost):
    # build first row
    current_row = range(len(word) + 1)
    print(current_row)
    results = []

    # recursively search each branch of the trie
コード例 #6
0
ファイル: base.py プロジェクト: dpakpdl/NepaliLemmatizer
 def __init__(self):
     csv_path = helper.get_root_pos_rule_csv_path()
     self.data = read_csv(csv_path)