def build_test_data(): dawg.CompletionDAWG(['f', 'bar', 'foo', 'foobar']).save('dev_data/small/completion.dawg') dawg.CompletionDAWG([]).save('dev_data/small/completion-empty.dawg') bytes_data = (('foo', b'data1'), ('bar', b'data2'), ('foo', b'data3'), ('foobar', b'data4')) dawg.BytesDAWG(bytes_data).save('dev_data/small/bytes.dawg') record_data = (('foo', (3, 2, 256)), ('bar', (3, 1, 0)), ('foo', (3, 2, 1)), ('foobar', (6, 3, 0))) dawg.RecordDAWG(str(">3H"), record_data).save('dev_data/small/record.dawg') int_data = {'foo': 1, 'bar': 5, 'foobar': 3} dawg.IntDAWG(int_data).save('dev_data/small/int_dawg.dawg') dawg.IntCompletionDAWG(int_data).save( 'dev_data/small/int_completion_dawg.dawg') dawg.DAWG(TestPrediction.DATA).save('dev_data/small/prediction.dawg') dawg.RecordDAWG(str("=H"), [ (k, (len(k), )) for k in TestPrediction.DATA ]).save('dev_data/small/prediction-record.dawg') create_dawg().save('dev_data/large/dawg.dawg') create_bytes_dawg().save('dev_data/large/bytes_dawg.dawg') create_record_dawg().save('dev_data/large/record_dawg.dawg') create_int_dawg().save('dev_data/large/int_dawg.dawg')
def prefix_suffix(given_word, prnt_res): i = 2 prefix_match = '' suffix_match = '' prefix_dawg = dawg.CompletionDAWG(prefix_removed) suffix_dawg = dawg.CompletionDAWG(suffix_removed) len_given_word = len(given_word) while i < len_given_word: prefix_check_word = given_word[:i] if prefix_check_word in prefix_dawg: prefix_match = prefix_check_word i = i + 1 i = 2 while i < len_given_word: suffix_check_word = given_word[len_given_word - i:len_given_word] if suffix_check_word in suffix_dawg: suffix_match = suffix_check_word i = i + 1 if prefix_match != '' and suffix_match != '': print("printing prefix match") print(prefix_match) print(prefix_map[prefix_match]) print("printing suffix match") print(suffix_match) print(suffix_map[suffix_match]) return sandhi_maker(prefix_match, suffix_match, len_given_word, prnt_res) else: found_print = [] found_print.append(0) found_print.append('') found_print.append('') return found_print
def build_pathset(collection: Collection, cache_path: Path = None, log=_LOG) -> dawg.CompletionDAWG: """ Build a combined set (in dawg form) of all dataset paths in the given index and filesystem. Optionally use the given cache directory to cache repeated builds. """ locations_cache = cache_path.joinpath(query_name( collection.query), 'locations.dawg') if cache_path else None if locations_cache: fileutils.mkdir_p(str(locations_cache.parent)) log = log.bind(collection_name=collection.name) if locations_cache and not cache_is_too_old(locations_cache): path_set = dawg.CompletionDAWG() log.debug("paths.trie.cache.load", file=locations_cache) path_set.load(str(locations_cache)) else: log.info("paths.trie.build") path_set = dawg.CompletionDAWG( chain(collection.iter_index_uris(), collection.iter_fs_uris())) log.info("paths.trie.done") if locations_cache is not None: log.debug("paths.trie.cache.create", file=locations_cache) with fileutils.atomic_save(str(locations_cache)) as f: path_set.write(f) return path_set
def load_txt_file(self, file_path): possible_words_set = set() try: with open(file_path, 'r', encoding='UTF-8', newline='') as txt_file: for row in txt_file: word = ''.join(x.lower() for x in row if x.isalpha()) possible_words_set.add(word) possible_words_list = filter(lambda x: len(x) > 1, sorted(list(possible_words_set))) self.possible_words = dawg.CompletionDAWG(possible_words_list) except Exception: self.possible_words = dawg.CompletionDAWG()
def laodDist(): f = open( os.path.join(os.getcwd() + '/Players/' + sys.argv[5] + '/dictionary.txt'), 'r') dictArray = (f.read()).split('\n') global completion_dawg completion_dawg = dawg.CompletionDAWG(dictArray) f.close()
def spell_check(to_check, direction, begin_letter): suggests = [] to_merge_file = open("dictionary_15k.txt", "r") for line1 in to_merge_file: dictWords.append(line1.strip()) #read the store the reverse dictionary rev_f = open("reverse_dictionary_15k.txt", "r") for line in rev_f: rev_dictWords.append(line.strip()) #creating the reverse dawg rev_dict_dawg = dawg.CompletionDAWG(rev_dictWords) dict_dawg = dawg.CompletionDAWG(dictWords) #the suggestions that are to be returned ; ordered and shit if direction == 'r': dictionary = rev_dictWords used_dawg = rev_dict_dawg else: dictionary = dictWords used_dawg = dict_dawg #only requiredwords from dict required_words = used_dawg.keys(begin_letter) for word in required_words: d = distance(to_check, word) if d == 1: suggests.append(word) #adding morphology morphWords = [] morph_suggests = [] file_morph = open("morph_forms.txt", "r") for line in file_morph: morphWords.append(line.strip()) for word in morphWords: d = distance(to_check, word) if d == 1: suggests.append(word) return suggests
def _load_bytes_dawg(self) -> None: if self.workdir and os.path.isdir(self.workdir): bytes_dawg_path = os.path.join(self.workdir, DictionaryManager.persist_name) if os.path.isfile(bytes_dawg_path): bytes_dawg = dawg.BytesDAWG() bytes_dawg.load(bytes_dawg_path) for k, v in bytes_dawg.iteritems(): self.word_dict[k] = WordData.frombytes(v) self.completion_dawg = dawg.CompletionDAWG(self.word_dict.keys())
def __init__(self, filename, featname, format=None): import dawg if format is None: self.data = dawg.CompletionDAWG() else: self.data = dawg.RecordDAWG(format) self.data.load(filename) self.filename = filename super(DAWGGlobalFeature, self).__init__(self.data, featname)
def __init__(self, model): """ words is a list of words sorted in the same order as the corresponding breakpoints in breakpoints """ self._encodemap = model oldval = 0 self._tokens = dawg.CompletionDAWG(model.keys()[:-1]) self._upper = model[model.last_key()] self._size = self._upper.bit_length() + 2 self._split_re = re.compile("([^a-zA-Z0-9'])")
def check_indict(word): #read and append words to dictWords f = open("dictionary_15k.txt", "r") rf = open("reverse_dictionary_15k.txt", "r") for line in f: dictWords.append(line.strip()) #creating a dawg data structure dict_dawg = dawg.CompletionDAWG(dictWords) if word in dict_dawg: return 1 return 0
def __init__(self, words=None, model=None, entity=None, case_sensitive=None, lexicon=None): """ :param list(list(string)) words: list of words, each of which is a list of tokens. """ self._dawg = dawg.CompletionDAWG() self.model = model if model is not None else self.model self.entity = entity if entity is not None else self.entity self.case_sensitive = case_sensitive if case_sensitive is not None else self.case_sensitive self.lexicon = lexicon if lexicon is not None else self.lexicon self._loaded_model = False if words is not None: self.build(words)
def to_dawg(df, columns=None, format=None): """ Encode ``pandas.DataFrame`` with GeoNames data (loaded using :func:`read_geonames` and maybe filtered in some way) to ``dawg.DAWG`` or ``dawg.RecordDAWG``. ``dawg.DAWG`` is created if ``columns`` and ``format`` are both None. """ import dawg if columns is None: assert format is None df = _split_names_into_rows(df) return dawg.CompletionDAWG(iter(df.name)) return dawg.RecordDAWG(format, _iter_geonames_items(df, columns))
def create_dawgs(self, list_of_filepaths, force_pickle=True): all_dawgs = [] for file in list_of_filepaths: s_time = time.time() words = self.file_handler.get_words(file) base_dawg = dawg.DAWG(words) completion_dawg = dawg.CompletionDAWG(words) all_dawgs.append((base_dawg, completion_dawg)) print( f"Created DAWGs {list_of_filepaths.index(file) + 1}/{len(list_of_filepaths)} " f"| TIME: {time.time() - s_time}") if force_pickle: self.pickle_dawg(f"base_dawg_{list_of_filepaths.index(file) + 1}.pkl", base_dawg) self.pickle_dawg(f"completion_dawg_{list_of_filepaths.index(file) + 1}.pkl", completion_dawg) return all_dawgs
def add_word(word,prnt_res): f = open("dictionary_15k.txt", "a") rf = open("reverse_dictionary_15k.txt", "a") if prnt_res==3: word=to_roman(word) found=check_indict(word) if found==0: f.write("%s\n" % word) rf.write("%s\n" % word[::-1]) f.close() rf.close() f = open("dictionary_15k.txt", "r") #read and append words to dictWords for line in f: dictWords.append(line.strip()) dict_dawg = dawg.CompletionDAWG(dictWords) f.close() return 1
def build(self, words): """Construct dictionary DAWG from tokenized words.""" words = [self._normalize(tokens) for tokens in words] self._dawg = dawg.CompletionDAWG(words) self._loaded_model = True
def build_completion_dawg(geohash_list): d = dawg.CompletionDAWG(geohash_list) return CompletionDAWG(d)
def empty_dawg(self): return dawg.CompletionDAWG()
def dawg(self): return dawg.CompletionDAWG(self.keys)
''' String data in a DAWG may take 200x less memory than in a standard Python dict and the raw lookup speed is comparable; it also provides fast advanced methods like prefix search. ''' import dawg words = [u'foo', u'bar', u'foobar', u'foö', u'bör'] base_dawg = dawg.DAWG(words) completion_dawg = dawg.CompletionDAWG(words) print("foo" in base_dawg) print(completion_dawg.has_keys_with_prefix(u'f')) print(base_dawg.prefixes(u'foobarz')) <<<<<<< HEAD # print(completion_dawg.has(u'f')) ======= print(completion_dawg.has(u'f')) >>>>>>> 7aea316fb7211c19240808b49e999c9f2e0561f2
def _process_list(self, words): rev_words = [w[::-1] for w in words] norm_dawg = dawg.CompletionDAWG(words) rev_dawg = dawg.CompletionDAWG(rev_words) return norm_dawg, rev_dawg
def save(self): if self.tokens: c_dawg = dawg.CompletionDAWG(self.tokens) c_dawg.save(r'{}.dawg'.format(self.name))
import sys from successordict import SuccessorDict as sdict import json import gzip import copy import dawg from operator import mul import time import progressbar from repoze.lru import lru_cache with gzip.open('engmodel1.json.gz', 'rb') as f: countdict = json.load(f,object_pairs_hook=sdict) ddawg = dawg.CompletionDAWG(countdict.keys()) @lru_cache(30000) def tokenize(word): #print word prefices = ddawg.prefixes(word) l = [] if word in countdict.keys(): l = [[word]] for prefix in prefices[:-1]: suffix = word[len(prefix):] for tokenlist in tokenize(suffix): newlist = [prefix] newlist.extend(tokenlist) #tokenlist is an element of a memoized return value, so we must not modify it. CAN'T USE tokenlist.insert! if newlist not in l: l.append(newlist) return l
def create_DAWG(self, corpus): ''' I: corpus of documents (text) O: DAWG structure made for prefix lookup, counter dictionary ''' return dawg.CompletionDAWG(corpus)
def test_no_segfaults_on_empty_dawg(self): d = dawg.CompletionDAWG([]) assert d.keys() == []
def laodDist(): f = open('./BackPyScripts/dictionary.txt', 'r') dictArray = (f.read()).split('\n') global completion_dawg completion_dawg = dawg.CompletionDAWG(dictArray) f.close()
def semantic(given_word, prnt_res): if prnt_res == 3: list1 = [] diccy = open("hello_stripped.txt", "r") morphology = open("morphology1.txt", "r") to_the_file = [] lines = diccy.readlines() dictionaryWords = [] d = 0 correct_input = "" for line in morphology: dictionaryWords.append(line) #putting each word in list for a in range(0, 305570): dictionaryWords.append(dictionaryWords[a].strip()) completion_dawg = dawg.CompletionDAWG(dictionaryWords) #print(given_word) to_the_file.append(given_word) word = given_word.split() pronoun_value = "" verb = 0 noun = 0 tag_pronoun = "" noun_value = "" tag_noun = "" pronoun = 0 found = 0 flag = 0 tag_verb = "" for letter in word: suggest_word = [] suggest_line = [] ret_list = [] letter = to_roman(letter) if letter not in completion_dawg: for inflection in dictionaryWords: d = distance(letter, inflection) if d < 2: suggest_word.append(inflection) if suggest_word: suggest_word = list(set(suggest_word)) for suggestion in suggest_word: suggest_line.append( given_word.replace(to_uni(letter), to_uni(suggestion))) ret_list.append(0) ret_list.insert(1, "ಈ ವಾಕ್ಯದ ಪದಬರಿಗೆಯಲ್ಲಿ ತಪ್ಪಿದೆ!") ret_list.insert(2, suggest_line) return ret_list nlines = 0 for line in lines: nlines += 1 if (((letter + "\n") == line) == True): line_no = nlines if noun_value != "" and flag == 0: tag_noun = lines[nlines] flag = 1 noun = 1 if flag == 1 and (line[:-1].endswith( ('iMda', 'u')) == True) and lines[nlines].startswith('N'): tag_noun = "" noun_value = "" flag = 0 noun = 0 if (lines[nlines].startswith('N') and noun == 0 and (line[:-1].endswith(('iMda', 'u')) == True) and (line[:-1].endswith('annu') == False) and noun_value == ""): #to_the_file.append("ನಾಮಪದವು "+letter) tag_noun = lines[nlines] noun = 1 elif ((lines[nlines].startswith('ITER') or lines[nlines].startswith('PROG') or lines[nlines].startswith('ABS'))): #to_the_file.append("ಕ್ರಿಯಾಪದವು " +lines[nlines-1]) tag_verb = lines[nlines] verb = 1 elif (lines[nlines].startswith('PRO') and pronoun == 0): pronoun_value = lines[nlines - 1] if pronoun_value[:-1] == "nanna": number = word.index(letter) + 1 noun_value = word[number] if noun_value not in dict_dawg: noun_value = "" #to_the_file.append("ಆಡುಗಪದವು " +lines[nlines-1]) tag_pronoun = lines[nlines] pronoun = 1 if (noun == 1 and verb == 1): break nlines = line_no if ((tag_pronoun == "") == False) and (tag_noun == ""): tag_noun = tag_pronoun noun = pronoun if verb == 0 or noun == 0: found = 1 ret_list.insert(0, 1) if ('PAST' in tag_verb): to_the_file.append("ಇದು ಭೂತಕಾಲ ವಾಕ್ಯ ") elif ('PRES' in tag_verb): to_the_file.append("ಇದು ವರ್ತಮಾನಕಾಲ ವಾಕ್ಯ ") elif ('FUT' in tag_verb): to_the_file.append("ಇದು ಭವಿಷ್ಯತ್ಕಾಲ ವಾಕ್ಯ") #print('N.SL' in tag_noun) if (('N.SL' in tag_noun and 'N.SL' in tag_verb) or ('N.PL' in tag_noun and 'N.PL' in tag_verb) or (('M.SL' in tag_noun or 'MFN.SL' in tag_noun) and ('M.SL' in tag_verb or 'MFN.SL' in tag_verb)) or (('M.PL' in tag_noun or 'MFN.PL' in tag_noun) and ('M.PL' in tag_verb or 'MFN.PL' in tag_verb)) or (('F.SL' in tag_noun or 'MFN.SL' in tag_noun) and ('F.SL' in tag_verb or 'MFN.SL' in tag_verb)) or (('F.PL' in tag_noun or 'MFN.PL' in tag_noun) and ('F.PL' in tag_verb or 'MFN.PL' in tag_verb))): to_the_file.append("ಈ ವಾಕ್ಯದಲ್ಲಿ ಕಾಲ ಮತ್ತು ಲಿಂಗ ಸರಿಯಾಗಿದೆ!") if ('P1' in tag_noun and 'P1' in tag_verb): to_the_file.append( "ಈ ವಾಕ್ಯ ಉತ್ತಮ ಪುರುಷದಲ್ಲಿದೆ ಮತ್ತು ಇದರ ಕತೃ ಕ್ರಿಯಾಪದಗಳು ಹೊಂದಿಕೊಂಡಿವೆ" ) elif ('P2' in tag_noun and 'P2' in tag_verb): to_the_file.append( "ಈ ವಾಕ್ಯ ಮಧ್ಯಮ ಪುರುಷದಲ್ಲಿದೆ ಮತ್ತು ಇದರ ಕತೃ ಕ್ರಿಯಾಪದಗಳು ಹೊಂದಿಕೊಂಡಿವೆ" ) elif (('P3' in tag_noun and 'P3' in tag_verb) or ('P3' in tag_verb)): to_the_file.append( "ಈ ವಾಕ್ಯ ಪ್ರಥಮ ಪುರುಷದಲ್ಲಿದೆ ಮತ್ತು ಇದರ ಕತೃ ಕ್ರಿಯಾಪದಗಳು ಹೊಂದಿಕೊಂಡಿವೆ" ) elif ('P1' in tag_noun and 'P3' in tag_verb): to_the_file.append( "ಈ ವಾಕ್ಯ ಉತ್ತಮ ಪುರುಷದಲ್ಲಿದೆ ಮತ್ತು ಇದರ ಕತೃ ಕ್ರಿಯಾಪದಗಳು ಹೊಂದಿಕೊಂಡಿವೆt" ) elif ('P2' in tag_noun and 'P3' in tag_verb): to_the_file.append( "ಈ ವಾಕ್ಯ ಮಧ್ಯಮ ಪುರುಷದಲ್ಲಿದೆ ಮತ್ತು ಇದರ ಕತೃ ಕ್ರಿಯಾಪದಗಳು ಹೊಂದಿಕೊಂಡಿವೆ" ) elif ('P1' in tag_verb or 'P2' in tag_verb): to_the_file.append( "ವಾಕ್ಯದ ಪುರುಷ ಪ್ರಯೋಗದಲ್ಲಿ ತಪ್ಪಿದೆ ಮತ್ತು ಇದರ ಕತೃ ಕ್ರಿಯಾಪದಗಳು ಹೊಂದಿಕೊಂಡಿಲ್ಲ" ) elif found == 0: to_the_file.append("ಈ ವಾಕ್ಯದ ಲಿಂಗ ಪ್ರಯೋಗದಲ್ಲಿ ತಪ್ಪಿದೆ.!!!!") elif found == 1: to_the_file.append( "ಈ ವಾಕ್ಯದಲ್ಲಿ ಕತೃ ಅಥವಾ ಕ್ರಿಯಾಪದ ಇಲ್ಲ. ದಯವಿಟ್ಟು ಪುನಃ ಪ್ರಯತ್ನಿಸಿರಿ" ) to_the_file.append("ಫಲಿತಾಂಶ subject-verb_agreement.txt ಯಲ್ಲಿದೆ !") #to_the_file.append("------------------------------------------------------") ret_list.insert(1, to_the_file) output_file = "subject-verb_agreement.txt" out_file = open(output_file, "a") for word in to_the_file: out_file.write("%s\n" % word) return ret_list else: list1 = [] diccy = open("hello_stripped.txt", "r") morphology = open("morphology1.txt", "r") to_the_file = [] lines = diccy.readlines() d = 0 correct_input = "" dictionaryWords = [] for line in morphology: dictionaryWords.append(line) #putting each word in list for a in range(0, 305570): dictionaryWords.append(dictionaryWords[a].strip()) completion_dawg = dawg.CompletionDAWG(dictionaryWords) #print(given_word) to_the_file.append(given_word) word = given_word.split() pronoun_value = "" verb = 0 noun = 0 tag_verb = "" tag_pronoun = "" noun_value = "" tag_noun = "" pronoun = 0 found = 0 flag = 0 tag_verb = "" for letter in word: suggest_word = [] suggest_line = [] ret_list = [] #print(letter) if letter not in completion_dawg: for inflection in dictionaryWords: d = distance(letter, inflection) if d < 2: suggest_word.append(inflection) #print(suggest_word) if suggest_word: suggest_word = list(set(suggest_word)) for suggestion in suggest_word: suggest_line.append( given_word.replace(letter, suggestion)) ret_list.append(0) ret_list.insert(1, "The sentence has a spell error!") ret_list.insert(2, suggest_line) return ret_list nlines = 0 for line in lines: nlines += 1 if (((letter + "\n") == line) == True): line_no = nlines if noun_value != "" and flag == 0: tag_noun = lines[nlines] flag = 1 noun = 1 if flag == 1 and (line[:-1].endswith( ('iMda', 'u')) == True) and lines[nlines].startswith('N'): tag_noun = "" noun_value = "" flag = 0 noun = 0 if (lines[nlines].startswith('N') and noun == 0 and (line[:-1].endswith(('iMda', 'u')) == True) and (line[:-1].endswith('annu') == False) and noun_value == ""): #to_the_file.append("Noun is "+ letter) tag_noun = lines[nlines] noun = 1 elif ((lines[nlines].startswith('ITER') or lines[nlines].startswith('PROG') or lines[nlines].startswith('ABS'))): #to_the_file.append("Verb is " + lines[nlines-1]) tag_verb = lines[nlines] verb = 1 elif (lines[nlines].startswith('PRO') and pronoun == 0): pronoun_value = lines[nlines - 1] if pronoun_value[:-1] == "nanna": number = word.index(letter) + 1 noun_value = word[number] if noun_value not in dict_dawg: noun_value = "" #to_the_file.append("Pronoun is " + lines[nlines-1]) tag_pronoun = lines[nlines] pronoun = 1 if (noun == 1 and verb == 1): break nlines = line_no if ((tag_pronoun == "") == False) and (tag_noun == ""): tag_noun = tag_pronoun noun = pronoun if verb == 0 or noun == 0: found = 1 ret_list.insert(0, 1) if ('PAST' in tag_verb): to_the_file.append("The sentence is in past tense") elif ('PRES' in tag_verb): to_the_file.append("The sentence is in present tense") elif ('FUT' in tag_verb): to_the_file.append("The sentence is in future tense") #print('N.SL' in tag_noun) if (('N.SL' in tag_noun and 'N.SL' in tag_verb) or ('N.PL' in tag_noun and 'N.PL' in tag_verb) or (('M.SL' in tag_noun or 'MFN.SL' in tag_noun) and ('M.SL' in tag_verb or 'MFN.SL' in tag_verb)) or (('M.PL' in tag_noun or 'MFN.PL' in tag_noun) and ('M.PL' in tag_verb or 'MFN.PL' in tag_verb)) or (('F.SL' in tag_noun or 'MFN.SL' in tag_noun) and ('F.SL' in tag_verb or 'MFN.SL' in tag_verb)) or (('F.PL' in tag_noun or 'MFN.PL' in tag_noun) and ('F.PL' in tag_verb or 'MFN.PL' in tag_verb))): to_the_file.append("The Gender and the Tense match!") if ('P1' in tag_noun and 'P1' in tag_verb): to_the_file.append( "The person is P1 and the sentence has subject-verb agreement" ) elif ('P2' in tag_noun and 'P2' in tag_verb): to_the_file.append( "The person is P2 and the sentence has subject-verb agreement" ) elif (('P3' in tag_noun and 'P3' in tag_verb) or ('P3' in tag_verb)): to_the_file.append( "The person is P3 and the sentence has subject-verb agreement" ) elif ('P1' in tag_noun and 'P3' in tag_verb): to_the_file.append( "The person is P1 and the sentence has subject-verb agreement" ) elif ('P2' in tag_noun and 'P3' in tag_verb): to_the_file.append( "The person is P2 and the sentence has subject-verb agreement" ) elif ('P1' in tag_verb or 'P2' in tag_verb): to_the_file.append( "Wrong person relationship and the sentence has NO subject-verb agreement" ) elif found == 0: print("Error!!Gender in subject and verb dont match!!\n") to_the_file.append( "Error!!Gender in subject and verb dont match!!\n") elif found == 1: to_the_file.append( "Verb or Subject is missing in the sentence!! Please check again" ) to_the_file.append( "The output file subject-verb_agreement.txt is generated!") to_the_file.append( "------------------------------------------------------") ret_list.insert(1, to_the_file) output_file = "subject-verb_agreement.txt" out_file = open(output_file, "a") for word in to_the_file: out_file.write("%s\n" % word) return ret_list
done = 0 final_suggests = [] #holder for reverse dictionary rev_dictWords = [] to_merge_file = open("dictionary_15k.txt", "r") for line1 in to_merge_file: dictWords.append(line1.strip()) #read the store the reverse dictionary rev_f = open("reverse_dictionary_15k.txt", "r") for line in rev_f: rev_dictWords.append(line.strip()) #creating the reverse dawg rev_dict_dawg = dawg.CompletionDAWG(rev_dictWords) dict_dawg = dawg.CompletionDAWG(dictWords) recent = [] recent.append("") recent.append("") recent.append("") recent.append("") recent.append("") def set_done(): global spell_done #print("setting done") spell_done = 1
import timeit import dawg import gaddag with open('collins.txt', 'r') as inFile: wordy = [x.strip() for x in inFile.readlines()] words = set(wordy) normalgaddag = gaddag.GADDAG(words) normaldawg = dawg.DAWG(wordy) complete = dawg.CompletionDAWG(wordy) # print(normalgaddag.root["b"]['a']) # So we can do single letter word follows GOOD s = ''' import dawg; import gaddag; with open('collins.txt','r') as inFile: wordy = [x.strip() for x in inFile.readlines()]; words = set(wordy); normalgaddag = gaddag.GADDAG(words); normaldawg = dawg.DAWG(wordy); complete = dawg.CompletionDAWG(wordy); ''' def timest(stmt):
def __init__(self): possible_words_set = set() possible_words_list = filter(lambda x: len(x) > 1, sorted(list(possible_words_set))) self.possible_words = dawg.CompletionDAWG(possible_words_list)
def load(self): c_dawg = dawg.CompletionDAWG() return c_dawg.load(r'{}.dawg'.format(self.name))