Ejemplo n.º 1
0
def parse_wordfile(source_file, target_json):
    #make the target file if it doesn't already exist
    if not os.path.exists(target_json):
        with open(target_json, 'w'):
            pass
        data = {"word_to_index": {}, "index_to_word": {}}
    else:
        #now load the existing file:
        with open(target_json) as json_file:
            data = json.load(json_file)

    #now iterate through the source_file
    with open(source_file, "rt") as source_file_object:
        for line in source_file_object:
            english, spanish = line.strip().split("\t")
            for word in english.split(" ") + spanish.split(" "):
                word = parse_in_dictionary.tame(word)
                parse_in_dictionary.get_or_make_entry(data, word)

    #now save it back out.
    with open(target_json, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)
Ejemplo n.º 2
0
 def _word_to_index(self, word):
     return parse_in_dictionary.get_or_make_entry(
         self.words, parse_in_dictionary.tame(word))["index"]
Ejemplo n.º 3
0
def word_to_index(words, word):
    word = parse_in_dictionary.tame(word)
    index = parse_in_dictionary.get_or_make_entry(words, word)["index"]
    return index