Beispiel #1
0
def build_test_data():

    dawg.CompletionDAWG(['f', 'bar', 'foo',
                         'foobar']).save('dev_data/small/completion.dawg')
    dawg.CompletionDAWG([]).save('dev_data/small/completion-empty.dawg')

    bytes_data = (('foo', b'data1'), ('bar', b'data2'), ('foo', b'data3'),
                  ('foobar', b'data4'))
    dawg.BytesDAWG(bytes_data).save('dev_data/small/bytes.dawg')

    record_data = (('foo', (3, 2, 256)), ('bar', (3, 1, 0)),
                   ('foo', (3, 2, 1)), ('foobar', (6, 3, 0)))
    dawg.RecordDAWG(str(">3H"), record_data).save('dev_data/small/record.dawg')

    int_data = {'foo': 1, 'bar': 5, 'foobar': 3}
    dawg.IntDAWG(int_data).save('dev_data/small/int_dawg.dawg')
    dawg.IntCompletionDAWG(int_data).save(
        'dev_data/small/int_completion_dawg.dawg')

    dawg.DAWG(TestPrediction.DATA).save('dev_data/small/prediction.dawg')
    dawg.RecordDAWG(str("=H"), [
        (k, (len(k), )) for k in TestPrediction.DATA
    ]).save('dev_data/small/prediction-record.dawg')

    create_dawg().save('dev_data/large/dawg.dawg')
    create_bytes_dawg().save('dev_data/large/bytes_dawg.dawg')
    create_record_dawg().save('dev_data/large/record_dawg.dawg')
    create_int_dawg().save('dev_data/large/int_dawg.dawg')
def prefix_suffix(given_word, prnt_res):
    i = 2
    prefix_match = ''
    suffix_match = ''

    prefix_dawg = dawg.CompletionDAWG(prefix_removed)
    suffix_dawg = dawg.CompletionDAWG(suffix_removed)
    len_given_word = len(given_word)
    while i < len_given_word:
        prefix_check_word = given_word[:i]
        if prefix_check_word in prefix_dawg:
            prefix_match = prefix_check_word
        i = i + 1
    i = 2
    while i < len_given_word:
        suffix_check_word = given_word[len_given_word - i:len_given_word]
        if suffix_check_word in suffix_dawg:
            suffix_match = suffix_check_word
        i = i + 1
    if prefix_match != '' and suffix_match != '':
        print("printing prefix match")
        print(prefix_match)
        print(prefix_map[prefix_match])
        print("printing suffix match")
        print(suffix_match)
        print(suffix_map[suffix_match])

        return sandhi_maker(prefix_match, suffix_match, len_given_word,
                            prnt_res)
    else:
        found_print = []
        found_print.append(0)
        found_print.append('')
        found_print.append('')
        return found_print
Beispiel #3
0
def build_pathset(collection: Collection,
                  cache_path: Path = None,
                  log=_LOG) -> dawg.CompletionDAWG:
    """
    Build a combined set (in dawg form) of all dataset paths in the given index and filesystem.

    Optionally use the given cache directory to cache repeated builds.
    """
    locations_cache = cache_path.joinpath(query_name(
        collection.query), 'locations.dawg') if cache_path else None
    if locations_cache:
        fileutils.mkdir_p(str(locations_cache.parent))

    log = log.bind(collection_name=collection.name)
    if locations_cache and not cache_is_too_old(locations_cache):
        path_set = dawg.CompletionDAWG()
        log.debug("paths.trie.cache.load", file=locations_cache)
        path_set.load(str(locations_cache))
    else:
        log.info("paths.trie.build")
        path_set = dawg.CompletionDAWG(
            chain(collection.iter_index_uris(), collection.iter_fs_uris()))
        log.info("paths.trie.done")
        if locations_cache is not None:
            log.debug("paths.trie.cache.create", file=locations_cache)
            with fileutils.atomic_save(str(locations_cache)) as f:
                path_set.write(f)
    return path_set
Beispiel #4
0
    def load_txt_file(self, file_path):
        possible_words_set = set()
        try:
            with open(file_path, 'r', encoding='UTF-8',
                      newline='') as txt_file:
                for row in txt_file:
                    word = ''.join(x.lower() for x in row if x.isalpha())
                    possible_words_set.add(word)

            possible_words_list = filter(lambda x: len(x) > 1,
                                         sorted(list(possible_words_set)))
            self.possible_words = dawg.CompletionDAWG(possible_words_list)
        except Exception:
            self.possible_words = dawg.CompletionDAWG()
Beispiel #5
0
def laodDist():
    f = open(
        os.path.join(os.getcwd() + '/Players/' + sys.argv[5] +
                     '/dictionary.txt'), 'r')
    dictArray = (f.read()).split('\n')
    global completion_dawg
    completion_dawg = dawg.CompletionDAWG(dictArray)
    f.close()
Beispiel #6
0
def spell_check(to_check, direction, begin_letter):
    suggests = []

    to_merge_file = open("dictionary_15k.txt", "r")
    for line1 in to_merge_file:
        dictWords.append(line1.strip())

#read the store the reverse dictionary
    rev_f = open("reverse_dictionary_15k.txt", "r")
    for line in rev_f:
        rev_dictWords.append(line.strip())


#creating the reverse dawg
    rev_dict_dawg = dawg.CompletionDAWG(rev_dictWords)
    dict_dawg = dawg.CompletionDAWG(dictWords)

    #the suggestions that are to be returned ; ordered and shit

    if direction == 'r':
        dictionary = rev_dictWords
        used_dawg = rev_dict_dawg
    else:
        dictionary = dictWords
        used_dawg = dict_dawg

    #only requiredwords from dict
    required_words = used_dawg.keys(begin_letter)

    for word in required_words:
        d = distance(to_check, word)
        if d == 1:
            suggests.append(word)

    #adding morphology
    morphWords = []
    morph_suggests = []
    file_morph = open("morph_forms.txt", "r")
    for line in file_morph:
        morphWords.append(line.strip())
    for word in morphWords:
        d = distance(to_check, word)
        if d == 1:
            suggests.append(word)

    return suggests
Beispiel #7
0
 def _load_bytes_dawg(self) -> None:
     if self.workdir and os.path.isdir(self.workdir):
         bytes_dawg_path = os.path.join(self.workdir, DictionaryManager.persist_name)
         if os.path.isfile(bytes_dawg_path):
             bytes_dawg = dawg.BytesDAWG()
             bytes_dawg.load(bytes_dawg_path)
             for k, v in bytes_dawg.iteritems():
                 self.word_dict[k] = WordData.frombytes(v)
             self.completion_dawg = dawg.CompletionDAWG(self.word_dict.keys())
Beispiel #8
0
    def __init__(self, filename, featname, format=None):
        import dawg

        if format is None:
            self.data = dawg.CompletionDAWG()
        else:
            self.data = dawg.RecordDAWG(format)
        self.data.load(filename)

        self.filename = filename
        super(DAWGGlobalFeature, self).__init__(self.data, featname)
Beispiel #9
0
    def __init__(self, model):
        """
        words is a list of words sorted in the same order as the corresponding breakpoints in breakpoints
        """

        self._encodemap = model
        oldval = 0

        self._tokens = dawg.CompletionDAWG(model.keys()[:-1])
        self._upper = model[model.last_key()]
        self._size = self._upper.bit_length() + 2
        self._split_re = re.compile("([^a-zA-Z0-9'])")
Beispiel #10
0
def check_indict(word):
	#read and append words to dictWords
	f = open("dictionary_15k.txt", "r")
	rf = open("reverse_dictionary_15k.txt", "r")
	
	for line in f:
		dictWords.append(line.strip())

	#creating a dawg data structure
	dict_dawg = dawg.CompletionDAWG(dictWords)
	if word in dict_dawg:
		return 1
	return 0
Beispiel #11
0
    def __init__(self, words=None, model=None, entity=None, case_sensitive=None, lexicon=None):
        """

        :param list(list(string)) words: list of words, each of which is a list of tokens.
        """
        self._dawg = dawg.CompletionDAWG()
        self.model = model if model is not None else self.model
        self.entity = entity if entity is not None else self.entity
        self.case_sensitive = case_sensitive if case_sensitive is not None else self.case_sensitive
        self.lexicon = lexicon if lexicon is not None else self.lexicon
        self._loaded_model = False
        if words is not None:
            self.build(words)
Beispiel #12
0
def to_dawg(df, columns=None, format=None):
    """
    Encode ``pandas.DataFrame`` with GeoNames data
    (loaded using :func:`read_geonames` and maybe filtered in some way)
    to ``dawg.DAWG`` or ``dawg.RecordDAWG``. ``dawg.DAWG`` is created
    if ``columns`` and ``format`` are both None.
    """
    import dawg
    if columns is None:
        assert format is None
        df = _split_names_into_rows(df)
        return dawg.CompletionDAWG(iter(df.name))

    return dawg.RecordDAWG(format, _iter_geonames_items(df, columns))
Beispiel #13
0
    def create_dawgs(self, list_of_filepaths, force_pickle=True):
        all_dawgs = []
        for file in list_of_filepaths:
            s_time = time.time()
            words = self.file_handler.get_words(file)
            base_dawg = dawg.DAWG(words)
            completion_dawg = dawg.CompletionDAWG(words)
            all_dawgs.append((base_dawg, completion_dawg))

            print(
                f"Created DAWGs {list_of_filepaths.index(file) + 1}/{len(list_of_filepaths)} "
                f"| TIME: {time.time() - s_time}")
            
            if force_pickle:
                self.pickle_dawg(f"base_dawg_{list_of_filepaths.index(file) + 1}.pkl", base_dawg)
                self.pickle_dawg(f"completion_dawg_{list_of_filepaths.index(file) + 1}.pkl", completion_dawg)
        return all_dawgs
Beispiel #14
0
def add_word(word,prnt_res):
	f = open("dictionary_15k.txt", "a")
	rf = open("reverse_dictionary_15k.txt", "a")
	if prnt_res==3:
		word=to_roman(word)
	found=check_indict(word)
	if found==0:
		f.write("%s\n" % word)
		rf.write("%s\n" % word[::-1])
	f.close()
	rf.close()
	f = open("dictionary_15k.txt", "r")

	#read and append words to dictWords
	for line in f:
		dictWords.append(line.strip())
	dict_dawg = dawg.CompletionDAWG(dictWords)
	
	f.close()
	return 1
Beispiel #15
0
 def build(self, words):
     """Construct dictionary DAWG from tokenized words."""
     words = [self._normalize(tokens) for tokens in words]
     self._dawg = dawg.CompletionDAWG(words)
     self._loaded_model = True
def build_completion_dawg(geohash_list):
    d = dawg.CompletionDAWG(geohash_list)
    return CompletionDAWG(d)
Beispiel #17
0
 def empty_dawg(self):
     return dawg.CompletionDAWG()
Beispiel #18
0
 def dawg(self):
     return dawg.CompletionDAWG(self.keys)
Beispiel #19
0
'''
String data in a DAWG may take 200x less memory than in a standard Python dict and the raw lookup speed is comparable;
it also provides fast advanced methods like prefix search.
'''

import dawg
words = [u'foo', u'bar', u'foobar', u'foö', u'bör']
base_dawg = dawg.DAWG(words)
completion_dawg = dawg.CompletionDAWG(words)

print("foo" in base_dawg)
print(completion_dawg.has_keys_with_prefix(u'f'))
print(base_dawg.prefixes(u'foobarz'))
<<<<<<< HEAD
# print(completion_dawg.has(u'f'))
=======
print(completion_dawg.has(u'f'))
>>>>>>> 7aea316fb7211c19240808b49e999c9f2e0561f2
Beispiel #20
0
 def _process_list(self, words):
     rev_words = [w[::-1] for w in words]
     norm_dawg = dawg.CompletionDAWG(words)
     rev_dawg = dawg.CompletionDAWG(rev_words)
     return norm_dawg, rev_dawg
Beispiel #21
0
 def save(self):
     if self.tokens:
         c_dawg = dawg.CompletionDAWG(self.tokens)
         c_dawg.save(r'{}.dawg'.format(self.name))
Beispiel #22
0
import sys
from successordict import SuccessorDict as sdict
import json
import gzip
import copy
import dawg
from operator import mul
import time
import progressbar
from repoze.lru import lru_cache
    
with gzip.open('engmodel1.json.gz', 'rb') as f:
    countdict = json.load(f,object_pairs_hook=sdict)
ddawg = dawg.CompletionDAWG(countdict.keys())

@lru_cache(30000)
def tokenize(word):
    #print word
    prefices = ddawg.prefixes(word)
    l = []
    if word in countdict.keys():
        l = [[word]]
    for prefix in prefices[:-1]:
        suffix = word[len(prefix):]
        for tokenlist in tokenize(suffix):
            newlist = [prefix]
            newlist.extend(tokenlist) #tokenlist is an element of a memoized return value, so we must not modify it. CAN'T USE tokenlist.insert!
            if newlist not in l:
                l.append(newlist)
    return l
 def create_DAWG(self, corpus):
     '''
     I: corpus of documents (text)
     O: DAWG structure made for prefix lookup, counter dictionary
     '''
     return dawg.CompletionDAWG(corpus)
Beispiel #24
0
 def test_no_segfaults_on_empty_dawg(self):
     d = dawg.CompletionDAWG([])
     assert d.keys() == []
Beispiel #25
0
def laodDist():
    f = open('./BackPyScripts/dictionary.txt', 'r')
    dictArray = (f.read()).split('\n')
    global completion_dawg
    completion_dawg = dawg.CompletionDAWG(dictArray)
    f.close()
def semantic(given_word, prnt_res):
    if prnt_res == 3:

        list1 = []

        diccy = open("hello_stripped.txt", "r")
        morphology = open("morphology1.txt", "r")
        to_the_file = []
        lines = diccy.readlines()
        dictionaryWords = []
        d = 0
        correct_input = ""

        for line in morphology:
            dictionaryWords.append(line)

        #putting each word in list
        for a in range(0, 305570):
            dictionaryWords.append(dictionaryWords[a].strip())

        completion_dawg = dawg.CompletionDAWG(dictionaryWords)
        #print(given_word)
        to_the_file.append(given_word)
        word = given_word.split()
        pronoun_value = ""
        verb = 0
        noun = 0
        tag_pronoun = ""
        noun_value = ""
        tag_noun = ""
        pronoun = 0
        found = 0
        flag = 0
        tag_verb = ""
        for letter in word:
            suggest_word = []
            suggest_line = []
            ret_list = []
            letter = to_roman(letter)
            if letter not in completion_dawg:
                for inflection in dictionaryWords:
                    d = distance(letter, inflection)
                    if d < 2:
                        suggest_word.append(inflection)
                if suggest_word:

                    suggest_word = list(set(suggest_word))
                    for suggestion in suggest_word:
                        suggest_line.append(
                            given_word.replace(to_uni(letter),
                                               to_uni(suggestion)))
                    ret_list.append(0)
                    ret_list.insert(1, "ಈ ವಾಕ್ಯದ ಪದಬರಿಗೆಯಲ್ಲಿ ತಪ್ಪಿದೆ!")
                    ret_list.insert(2, suggest_line)
                    return ret_list
            nlines = 0
            for line in lines:
                nlines += 1

                if (((letter + "\n") == line) == True):

                    line_no = nlines

                    if noun_value != "" and flag == 0:
                        tag_noun = lines[nlines]

                        flag = 1
                        noun = 1
                    if flag == 1 and (line[:-1].endswith(
                        ('iMda',
                         'u')) == True) and lines[nlines].startswith('N'):
                        tag_noun = ""
                        noun_value = ""
                        flag = 0
                        noun = 0
                    if (lines[nlines].startswith('N') and noun == 0
                            and (line[:-1].endswith(('iMda', 'u')) == True)
                            and (line[:-1].endswith('annu') == False)
                            and noun_value == ""):

                        #to_the_file.append("ನಾಮಪದವು "+letter)

                        tag_noun = lines[nlines]

                        noun = 1
                    elif ((lines[nlines].startswith('ITER')
                           or lines[nlines].startswith('PROG')
                           or lines[nlines].startswith('ABS'))):

                        #to_the_file.append("ಕ್ರಿಯಾಪದವು  " +lines[nlines-1])

                        tag_verb = lines[nlines]
                        verb = 1
                    elif (lines[nlines].startswith('PRO') and pronoun == 0):

                        pronoun_value = lines[nlines - 1]
                        if pronoun_value[:-1] == "nanna":
                            number = word.index(letter) + 1
                            noun_value = word[number]

                            if noun_value not in dict_dawg:
                                noun_value = ""

                        #to_the_file.append("ಆಡುಗಪದವು " +lines[nlines-1])

                        tag_pronoun = lines[nlines]
                        pronoun = 1

                if (noun == 1 and verb == 1):
                    break
        nlines = line_no
        if ((tag_pronoun == "") == False) and (tag_noun == ""):
            tag_noun = tag_pronoun
            noun = pronoun
        if verb == 0 or noun == 0:
            found = 1
        ret_list.insert(0, 1)

        if ('PAST' in tag_verb):

            to_the_file.append("ಇದು ಭೂತಕಾಲ ವಾಕ್ಯ  ")
        elif ('PRES' in tag_verb):

            to_the_file.append("ಇದು ವರ್ತಮಾನಕಾಲ ವಾಕ್ಯ ")
        elif ('FUT' in tag_verb):

            to_the_file.append("ಇದು ಭವಿಷ್ಯತ್‌ಕಾಲ ವಾಕ್ಯ")
        #print('N.SL' in tag_noun)
        if (('N.SL' in tag_noun and 'N.SL' in tag_verb)
                or ('N.PL' in tag_noun and 'N.PL' in tag_verb)
                or (('M.SL' in tag_noun or 'MFN.SL' in tag_noun) and
                    ('M.SL' in tag_verb or 'MFN.SL' in tag_verb))
                or (('M.PL' in tag_noun or 'MFN.PL' in tag_noun) and
                    ('M.PL' in tag_verb or 'MFN.PL' in tag_verb))
                or (('F.SL' in tag_noun or 'MFN.SL' in tag_noun) and
                    ('F.SL' in tag_verb or 'MFN.SL' in tag_verb))
                or (('F.PL' in tag_noun or 'MFN.PL' in tag_noun) and
                    ('F.PL' in tag_verb or 'MFN.PL' in tag_verb))):

            to_the_file.append("ಈ ವಾಕ್ಯದಲ್ಲಿ ಕಾಲ ಮತ್ತು ಲಿಂಗ ಸರಿಯಾಗಿದೆ!")
            if ('P1' in tag_noun and 'P1' in tag_verb):

                to_the_file.append(
                    "ಈ ವಾಕ್ಯ ಉತ್ತಮ ಪುರುಷದಲ್ಲಿದೆ ಮತ್ತು ಇದರ ಕತೃ ಕ್ರಿಯಾಪದಗಳು ಹೊಂದಿಕೊಂಡಿವೆ"
                )
            elif ('P2' in tag_noun and 'P2' in tag_verb):

                to_the_file.append(
                    "ಈ ವಾಕ್ಯ ಮಧ್ಯಮ ಪುರುಷದಲ್ಲಿದೆ ಮತ್ತು ಇದರ ಕತೃ ಕ್ರಿಯಾಪದಗಳು ಹೊಂದಿಕೊಂಡಿವೆ"
                )
            elif (('P3' in tag_noun and 'P3' in tag_verb)
                  or ('P3' in tag_verb)):

                to_the_file.append(
                    "ಈ ವಾಕ್ಯ ಪ್ರಥಮ ಪುರುಷದಲ್ಲಿದೆ ಮತ್ತು ಇದರ ಕತೃ ಕ್ರಿಯಾಪದಗಳು ಹೊಂದಿಕೊಂಡಿವೆ"
                )
            elif ('P1' in tag_noun and 'P3' in tag_verb):

                to_the_file.append(
                    "ಈ ವಾಕ್ಯ ಉತ್ತಮ ಪುರುಷದಲ್ಲಿದೆ ಮತ್ತು ಇದರ ಕತೃ ಕ್ರಿಯಾಪದಗಳು ಹೊಂದಿಕೊಂಡಿವೆt"
                )
            elif ('P2' in tag_noun and 'P3' in tag_verb):

                to_the_file.append(
                    "ಈ ವಾಕ್ಯ ಮಧ್ಯಮ ಪುರುಷದಲ್ಲಿದೆ ಮತ್ತು ಇದರ ಕತೃ ಕ್ರಿಯಾಪದಗಳು ಹೊಂದಿಕೊಂಡಿವೆ"
                )
            elif ('P1' in tag_verb or 'P2' in tag_verb):

                to_the_file.append(
                    "ವಾಕ್ಯದ ಪುರುಷ ಪ್ರಯೋಗದಲ್ಲಿ ತಪ್ಪಿದೆ ಮತ್ತು ಇದರ ಕತೃ ಕ್ರಿಯಾಪದಗಳು ಹೊಂದಿಕೊಂಡಿಲ್ಲ"
                )
        elif found == 0:

            to_the_file.append("ಈ ವಾಕ್ಯದ ಲಿಂಗ ಪ್ರಯೋಗದಲ್ಲಿ ತಪ್ಪಿದೆ.!!!!")
        elif found == 1:

            to_the_file.append(
                "ಈ ವಾಕ್ಯದಲ್ಲಿ ಕತೃ ಅಥವಾ ಕ್ರಿಯಾಪದ ಇಲ್ಲ. ದಯವಿಟ್ಟು ಪುನಃ ಪ್ರಯತ್ನಿಸಿರಿ"
            )
        to_the_file.append("ಫಲಿತಾಂಶ  subject-verb_agreement.txt ಯಲ್ಲಿದೆ !")

        #to_the_file.append("------------------------------------------------------")
        ret_list.insert(1, to_the_file)
        output_file = "subject-verb_agreement.txt"
        out_file = open(output_file, "a")
        for word in to_the_file:
            out_file.write("%s\n" % word)
        return ret_list

    else:

        list1 = []

        diccy = open("hello_stripped.txt", "r")
        morphology = open("morphology1.txt", "r")
        to_the_file = []
        lines = diccy.readlines()

        d = 0
        correct_input = ""
        dictionaryWords = []

        for line in morphology:
            dictionaryWords.append(line)

        #putting each word in list
        for a in range(0, 305570):
            dictionaryWords.append(dictionaryWords[a].strip())

        completion_dawg = dawg.CompletionDAWG(dictionaryWords)
        #print(given_word)
        to_the_file.append(given_word)
        word = given_word.split()
        pronoun_value = ""
        verb = 0
        noun = 0
        tag_verb = ""
        tag_pronoun = ""
        noun_value = ""
        tag_noun = ""
        pronoun = 0
        found = 0
        flag = 0
        tag_verb = ""
        for letter in word:
            suggest_word = []
            suggest_line = []
            ret_list = []
            #print(letter)
            if letter not in completion_dawg:
                for inflection in dictionaryWords:
                    d = distance(letter, inflection)
                    if d < 2:
                        suggest_word.append(inflection)
                #print(suggest_word)
                if suggest_word:
                    suggest_word = list(set(suggest_word))
                    for suggestion in suggest_word:
                        suggest_line.append(
                            given_word.replace(letter, suggestion))
                    ret_list.append(0)
                    ret_list.insert(1, "The sentence has a spell error!")
                    ret_list.insert(2, suggest_line)
                    return ret_list
            nlines = 0
            for line in lines:
                nlines += 1

                if (((letter + "\n") == line) == True):

                    line_no = nlines

                    if noun_value != "" and flag == 0:
                        tag_noun = lines[nlines]

                        flag = 1
                        noun = 1
                    if flag == 1 and (line[:-1].endswith(
                        ('iMda',
                         'u')) == True) and lines[nlines].startswith('N'):
                        tag_noun = ""
                        noun_value = ""
                        flag = 0
                        noun = 0
                    if (lines[nlines].startswith('N') and noun == 0
                            and (line[:-1].endswith(('iMda', 'u')) == True)
                            and (line[:-1].endswith('annu') == False)
                            and noun_value == ""):

                        #to_the_file.append("Noun is "+ letter)

                        tag_noun = lines[nlines]

                        noun = 1
                    elif ((lines[nlines].startswith('ITER')
                           or lines[nlines].startswith('PROG')
                           or lines[nlines].startswith('ABS'))):

                        #to_the_file.append("Verb is " + lines[nlines-1])

                        tag_verb = lines[nlines]
                        verb = 1
                    elif (lines[nlines].startswith('PRO') and pronoun == 0):

                        pronoun_value = lines[nlines - 1]
                        if pronoun_value[:-1] == "nanna":
                            number = word.index(letter) + 1
                            noun_value = word[number]

                            if noun_value not in dict_dawg:
                                noun_value = ""

                        #to_the_file.append("Pronoun is " + lines[nlines-1])

                        tag_pronoun = lines[nlines]
                        pronoun = 1

                if (noun == 1 and verb == 1):
                    break
        nlines = line_no
        if ((tag_pronoun == "") == False) and (tag_noun == ""):
            tag_noun = tag_pronoun
            noun = pronoun
        if verb == 0 or noun == 0:
            found = 1
        ret_list.insert(0, 1)

        if ('PAST' in tag_verb):

            to_the_file.append("The sentence is in past tense")
        elif ('PRES' in tag_verb):

            to_the_file.append("The sentence is in present tense")
        elif ('FUT' in tag_verb):

            to_the_file.append("The sentence is in future tense")
        #print('N.SL' in tag_noun)
        if (('N.SL' in tag_noun and 'N.SL' in tag_verb)
                or ('N.PL' in tag_noun and 'N.PL' in tag_verb)
                or (('M.SL' in tag_noun or 'MFN.SL' in tag_noun) and
                    ('M.SL' in tag_verb or 'MFN.SL' in tag_verb))
                or (('M.PL' in tag_noun or 'MFN.PL' in tag_noun) and
                    ('M.PL' in tag_verb or 'MFN.PL' in tag_verb))
                or (('F.SL' in tag_noun or 'MFN.SL' in tag_noun) and
                    ('F.SL' in tag_verb or 'MFN.SL' in tag_verb))
                or (('F.PL' in tag_noun or 'MFN.PL' in tag_noun) and
                    ('F.PL' in tag_verb or 'MFN.PL' in tag_verb))):

            to_the_file.append("The Gender and the Tense match!")
            if ('P1' in tag_noun and 'P1' in tag_verb):

                to_the_file.append(
                    "The person is P1 and the sentence has subject-verb agreement"
                )
            elif ('P2' in tag_noun and 'P2' in tag_verb):

                to_the_file.append(
                    "The person is P2 and the sentence has subject-verb agreement"
                )
            elif (('P3' in tag_noun and 'P3' in tag_verb)
                  or ('P3' in tag_verb)):

                to_the_file.append(
                    "The person is P3 and the sentence has subject-verb agreement"
                )
            elif ('P1' in tag_noun and 'P3' in tag_verb):

                to_the_file.append(
                    "The person is P1 and the sentence has subject-verb agreement"
                )
            elif ('P2' in tag_noun and 'P3' in tag_verb):

                to_the_file.append(
                    "The person is P2 and the sentence has subject-verb agreement"
                )
            elif ('P1' in tag_verb or 'P2' in tag_verb):

                to_the_file.append(
                    "Wrong person relationship and the sentence has NO subject-verb agreement"
                )
        elif found == 0:
            print("Error!!Gender in subject and verb dont match!!\n")
            to_the_file.append(
                "Error!!Gender in subject and verb dont match!!\n")
        elif found == 1:

            to_the_file.append(
                "Verb or Subject is missing in the sentence!! Please check again"
            )
        to_the_file.append(
            "The output file subject-verb_agreement.txt is generated!")

        to_the_file.append(
            "------------------------------------------------------")
        ret_list.insert(1, to_the_file)
        output_file = "subject-verb_agreement.txt"
        out_file = open(output_file, "a")
        for word in to_the_file:
            out_file.write("%s\n" % word)
        return ret_list
Beispiel #27
0
done = 0
final_suggests = []
#holder for reverse dictionary
rev_dictWords = []

to_merge_file = open("dictionary_15k.txt", "r")
for line1 in to_merge_file:
    dictWords.append(line1.strip())

#read the store the reverse dictionary
rev_f = open("reverse_dictionary_15k.txt", "r")
for line in rev_f:
    rev_dictWords.append(line.strip())

#creating the reverse dawg
rev_dict_dawg = dawg.CompletionDAWG(rev_dictWords)
dict_dawg = dawg.CompletionDAWG(dictWords)

recent = []
recent.append("")
recent.append("")
recent.append("")
recent.append("")
recent.append("")


def set_done():
    global spell_done
    #print("setting done")
    spell_done = 1
Beispiel #28
0
import timeit

import dawg
import gaddag

with open('collins.txt', 'r') as inFile:
    wordy = [x.strip() for x in inFile.readlines()]
    words = set(wordy)

normalgaddag = gaddag.GADDAG(words)
normaldawg = dawg.DAWG(wordy)
complete = dawg.CompletionDAWG(wordy)

# print(normalgaddag.root["b"]['a'])
# So we can do single letter word follows GOOD

s = '''
import dawg;
import gaddag;

with open('collins.txt','r') as inFile:
    wordy = [x.strip() for x in inFile.readlines()];
    words = set(wordy);

normalgaddag = gaddag.GADDAG(words);
normaldawg = dawg.DAWG(wordy);
complete = dawg.CompletionDAWG(wordy);
'''


def timest(stmt):
Beispiel #29
0
 def __init__(self):
     possible_words_set = set()
     possible_words_list = filter(lambda x: len(x) > 1,
                                  sorted(list(possible_words_set)))
     self.possible_words = dawg.CompletionDAWG(possible_words_list)
Beispiel #30
0
 def load(self):
     c_dawg = dawg.CompletionDAWG()
     return c_dawg.load(r'{}.dawg'.format(self.name))