def build_objects(results, data_collection, files_dict, new_string): list_of_auto_completes = [] for i, sentence in enumerate(results): line = linecache.getline(files_dict[sentence[0]], sentence[1]) new_object = AutoCompleteData(line, files_dict[sentence[0]], sentence[1], AutoCompleteData.get_score(new_string)) list_of_auto_completes.append(new_object) return list_of_auto_completes
def load_data_from_files(list_, file_dict, prefix): data = {} for pair in list_: # load autocomplete objects into dict from file data sentence = linecache.getline(file_dict[pair[0]], pair[1])[:-1] data['*'.join(map(str, pair))] = AutoCompleteData( sentence, file_dict[pair[0]][:file_dict[pair[0]].index(".")], pair[1], AutoCompleteData.get_score(prefix, sentence)) return data
def complete(prefix): prefix_ignore_del = ignore_delimiters(prefix) results = [word for word in get_sentIndex(prefix_ignore_del)] best_completions = [] for i in range(len(results)): best_completions.append(AutoCompleteData(get_sentence(results[i]), get_sentence_src(results[i]), offset(results[i]), len(prefix) * 2)) fit_sents = fix_word(prefix_ignore_del, results) for sent in fit_sents: best_completions.append(AutoCompleteData(get_sentence(sent["sentence_index"]), get_sentence_src(sent["src"]), sent["offset"], sent["score"])) return best_completions
def find_identical_completions(data, prefix, completion_list): for sentence in data: completion_list.append( AutoCompleteData(sentence[0], sentence[1], len(prefix) * 2)) return completion_list
def create_auto_complete(completion_list, search_input): object_completion_list = [] for completion in completion_list: object_completion_list.append( AutoCompleteData(data_dict[str(completion["id"])], search_input, completion["score"], completion["offset"])) return object_completion_list
def create_auto_complete_data(match_sentences_indexes, score_list): K_auto_completed = [] for i, ind in enumerate(match_sentences_indexes): K_auto_completed.append( AutoCompleteData( extract_string_from_file(file_data_dict[ind].name, file_data_dict[ind].line), file_data_dict[ind].name, file_data_dict[ind].line, score_list[i])) return K_auto_completed
def add_complete_sentence(self, sentence, score): if sentence in self.data.sub_sentences.keys(): for id_ in self.data.sub_sentences[sentence]: if not self.is_exist(self.data.data[id_][0]): self.__complete_sentences.append(AutoCompleteData(self.data.data[id_][0], self.data.data[id_][1], self.data.data[id_][2], score)) if len(self.__complete_sentences) == self.LEN_RESULT: return True return False
def get_auto_complete_list(map_files, substring, list_file_id_file_line, less_score=0): auto_complete_list = [] for curr_file_id, curr_line in list_file_id_file_line: file_path = map_files[curr_file_id] sentence = read_sentence_from_file(file_path, curr_line) auto_complete_list.append( AutoCompleteData(sentence, substring, file_path, curr_line, less_score)) return auto_complete_list
def create_auto_complete(self, completion_list: List[dict], search_input: str) -> List[AutoCompleteData]: """" Returns the end result of the sentences """ object_completion_list = [] for completion in completion_list: object_completion_list.append( AutoCompleteData(self.data_dict[str(completion["id"])], search_input, completion["score"], completion["offset"])) return object_completion_list
def read_txt_file(): for root, dirs, files in os.walk("./my_files/python-3.8.4-docs-text/"): for file in files: if file.endswith(".txt"): with open(os.path.join(root, file), encoding="utf8") as myfile: offset = 0 sentences = myfile.readlines() for sentsnce in sentences: if sentsnce: offset += 1 all_sentences.append( AutoCompleteData(sentsnce.strip(), file, offset))
def init_sentences_dict(): # read sentences from files for (root, dirs, files) in os.walk("../--data--technology_texts", topdown=True): for file in files: with open(os.path.join(root, file), mode='r', encoding='utf-8') as f: sentences = f.readlines() sentences = [x.strip() for x in sentences] for i in range(len(sentences)): sentences_dict[i] = AutoCompleteData( sentences[i], file.rpartition(".")[0])
def load_data_from_files(file_dict, prefix): if prefix.strip(): with open(f"{JSON}/{prefix[0]}.json", encoding='utf-8') as data_file: data_collection = json.load(data_file) data = {} list_ = data_collection.get(prefix, None) if list_: for pair in list_: # load autocomplete objects into dict from file data sentence = linecache.getline(file_dict[pair[0]], pair[1])[:-1] data['*'.join(map(str, pair))] = AutoCompleteData(sentence, file_dict[pair[0]][:file_dict[pair[0]].index(".")], pair[1], pair[2]) return data
def search_best_comp(prefix: str, data_trie: Trie): edited_prefix = edit_sentence(prefix) comp_list = data_trie.search(edited_prefix) auto_complete_set = set() for comp_dict in comp_list: comp_dict["score"] = get_score(len(prefix)) if len(comp_list) < 5: comp_list += search_with_mistake(prefix, data_trie) for comp_dict in comp_list: sentence = get_sentence_from_path(comp_dict["path"], comp_dict["line_num"]) auto_comp_obj = AutoCompleteData(sentence, comp_dict["path"], comp_dict["offset"], comp_dict["score"]) auto_complete_set.add(auto_comp_obj) return sort_res_by_score(auto_complete_set)[:5]
def get_best_k_completions(string): detraction = 0 senten = data_dict[string][:RESULT_LEN] result = [AutoCompleteData(sentences[index.id].sentence, sentences[index.id].path, index.offset, get_score(string, detraction)) for index in senten] # if there are not enough suitable sequences # the best scores given when replacing a character except from the first character # the next best case is delete or add the 4th character # after try to replace the first character # and the final try is to delete or add a character if len(result) < RESULT_LEN: if len(string) > 1: fix_word, detraction = replace_char(string, 1, len(string)) add_to_result(senten, fix_word, result, detraction, string) if len(result) < RESULT_LEN: if len(string) > 3: fix_word, detraction = delete_unnecessary_char(string, 3, 4) add_to_result(senten, fix_word, result, detraction, string) if len(result) < RESULT_LEN: if len(string) > 3: fix_word, detraction = add_missed_char(string, 3, 4) add_to_result(senten, fix_word, result, detraction, string) if len(result) < RESULT_LEN: if len(string) > 0: fix_word, detraction = replace_char(string, 0, 1) add_to_result(senten, fix_word, result, detraction, string) if len(result) < RESULT_LEN: fix_word, detraction = delete_unnecessary_char(string, 0, len(string)) add_to_result(senten, fix_word, result, detraction, string) if len(result) < RESULT_LEN: fix_word, detraction = add_missed_char(string, 0, len(string)) add_to_result(senten, fix_word, result, detraction, string) return result[:RESULT_LEN]
def omit_character(data_dict, prefix, length, completion_list): reversed_prefix = prefix[::-1] list = [] no_dup_list = [[x.source_text, x.offset] for x in completion_list] for i in range(len(prefix)): try: path_list = data_dict[(reversed_prefix[:i] + reversed_prefix[i + 1:])[::-1]] score = calculate_omit_add_score(len(prefix) - 1, len(prefix) - i) for sentence in path_list: if [sentence[0], sentence[1]] not in no_dup_list: list.append( AutoCompleteData(sentence[0], sentence[1], score)) no_dup_list.append([sentence[0], sentence[1]]) if len(list) == length: return list except: continue return list
def from_sentence_to_auto_complete(sentence_list): auto_complete_list = [] for sentence_item in sentence_list: auto_complete_list.append(AutoCompleteData(sentence_item.completed_sentence, sentence_item.source_text, 0, 0)) return auto_complete_list
def remove_existing_sentences(old_auto_complete_data_list, new_auto_complete_data_list): auto_complete_data_list = [] in_old_list = False for new_auto_complete_item in new_auto_complete_data_list: for old_new_auto_complete_item in old_auto_complete_data_list: if new_auto_complete_item.source_text == old_new_auto_complete_item.source_text: in_old_list = True break if not in_old_list: auto_complete_data_list.append(new_auto_complete_item) in_old_list = False return auto_complete_data_list if __name__ == "__main__": auto_complete_list = [] auto_complete_list.append(AutoCompleteData("aaa aaa", "a 1", 0, 0)) auto_complete_list.append(AutoCompleteData("aaa bbb", "a 2", 0, 0)) auto_complete_list.append(AutoCompleteData("bbb bbb", "b 1", 0, 0)) new_auto_complete_list = [] new_auto_complete_list.append(AutoCompleteData("aaa aaa", "b 2", 0, 0)) new_auto_complete_list.append(AutoCompleteData("aaa bbb", "a 2", 0, 0)) new_auto_complete_list.append(AutoCompleteData("bbb bbb", "a 3", 0, 0)) print(new_sentences(auto_complete_list, new_auto_complete_list))
def add_to_result(senten, fix_word, result, detraction, string): senten = data_dict[fix_word][:(RESULT_LEN - len(senten))] result += [AutoCompleteData(sentences[index.id].sentence, sentences[index.id].path, index.offset, get_score(string, detraction)) for index in senten]