def build_objects(results, data_collection, files_dict, new_string):
    list_of_auto_completes = []
    for i, sentence in enumerate(results):
        line = linecache.getline(files_dict[sentence[0]], sentence[1])
        new_object = AutoCompleteData(line, files_dict[sentence[0]],
                                      sentence[1],
                                      AutoCompleteData.get_score(new_string))
        list_of_auto_completes.append(new_object)
    return list_of_auto_completes
def load_data_from_files(list_, file_dict, prefix):
    data = {}
    for pair in list_:
        # load autocomplete objects into dict from file data
        sentence = linecache.getline(file_dict[pair[0]], pair[1])[:-1]
        data['*'.join(map(str, pair))] = AutoCompleteData(
            sentence, file_dict[pair[0]][:file_dict[pair[0]].index(".")],
            pair[1], AutoCompleteData.get_score(prefix, sentence))
    return data
예제 #3
0
def complete(prefix):
    prefix_ignore_del = ignore_delimiters(prefix)
    results = [word for word in get_sentIndex(prefix_ignore_del)]
    best_completions = []

    for i in range(len(results)):
        best_completions.append(AutoCompleteData(get_sentence(results[i]), get_sentence_src(results[i]), offset(results[i]), len(prefix) * 2))

    fit_sents = fix_word(prefix_ignore_del, results)
    for sent in fit_sents:
        best_completions.append(AutoCompleteData(get_sentence(sent["sentence_index"]), get_sentence_src(sent["src"]), sent["offset"], sent["score"]))

    return best_completions
def find_identical_completions(data, prefix, completion_list):
    for sentence in data:
        completion_list.append(
            AutoCompleteData(sentence[0], sentence[1],
                             len(prefix) * 2))

    return completion_list
예제 #5
0
def create_auto_complete(completion_list, search_input):
    object_completion_list = []
    for completion in completion_list:
        object_completion_list.append(
            AutoCompleteData(data_dict[str(completion["id"])], search_input,
                             completion["score"], completion["offset"]))

    return object_completion_list
예제 #6
0
def create_auto_complete_data(match_sentences_indexes, score_list):
    K_auto_completed = []
    for i, ind in enumerate(match_sentences_indexes):
        K_auto_completed.append(
            AutoCompleteData(
                extract_string_from_file(file_data_dict[ind].name,
                                         file_data_dict[ind].line),
                file_data_dict[ind].name, file_data_dict[ind].line,
                score_list[i]))
    return K_auto_completed
    def add_complete_sentence(self, sentence, score):
        if sentence in self.data.sub_sentences.keys():

            for id_ in self.data.sub_sentences[sentence]:
                if not self.is_exist(self.data.data[id_][0]):
                    self.__complete_sentences.append(AutoCompleteData(self.data.data[id_][0], self.data.data[id_][1], self.data.data[id_][2], score))
                    if len(self.__complete_sentences) == self.LEN_RESULT:
                        return True

        return False
예제 #8
0
def get_auto_complete_list(map_files,
                           substring,
                           list_file_id_file_line,
                           less_score=0):
    auto_complete_list = []
    for curr_file_id, curr_line in list_file_id_file_line:
        file_path = map_files[curr_file_id]
        sentence = read_sentence_from_file(file_path, curr_line)
        auto_complete_list.append(
            AutoCompleteData(sentence, substring, file_path, curr_line,
                             less_score))
    return auto_complete_list
예제 #9
0
 def create_auto_complete(self, completion_list: List[dict],
                          search_input: str) -> List[AutoCompleteData]:
     """"
     Returns the end result of the sentences
     """
     object_completion_list = []
     for completion in completion_list:
         object_completion_list.append(
             AutoCompleteData(self.data_dict[str(completion["id"])],
                              search_input, completion["score"],
                              completion["offset"]))
     return object_completion_list
예제 #10
0
def read_txt_file():
    for root, dirs, files in os.walk("./my_files/python-3.8.4-docs-text/"):
        for file in files:
            if file.endswith(".txt"):
                with open(os.path.join(root, file), encoding="utf8") as myfile:
                    offset = 0
                    sentences = myfile.readlines()
                    for sentsnce in sentences:
                        if sentsnce:
                            offset += 1
                            all_sentences.append(
                                AutoCompleteData(sentsnce.strip(), file,
                                                 offset))
예제 #11
0
def init_sentences_dict():

    # read sentences from files
    for (root, dirs, files) in os.walk("../--data--technology_texts",
                                       topdown=True):
        for file in files:
            with open(os.path.join(root, file), mode='r',
                      encoding='utf-8') as f:
                sentences = f.readlines()
                sentences = [x.strip() for x in sentences]

                for i in range(len(sentences)):
                    sentences_dict[i] = AutoCompleteData(
                        sentences[i],
                        file.rpartition(".")[0])
def load_data_from_files(file_dict, prefix):
    if prefix.strip():
        with open(f"{JSON}/{prefix[0]}.json", encoding='utf-8') as data_file:
            data_collection = json.load(data_file)

    data = {}
    list_ = data_collection.get(prefix, None)
    if list_:
        for pair in list_:
            # load autocomplete objects into dict from file data
            sentence = linecache.getline(file_dict[pair[0]], pair[1])[:-1]
            data['*'.join(map(str, pair))] = AutoCompleteData(sentence,
                                                              file_dict[pair[0]][:file_dict[pair[0]].index(".")],
                                                              pair[1], pair[2])
    return data
예제 #13
0
def search_best_comp(prefix: str, data_trie: Trie):
    edited_prefix = edit_sentence(prefix)
    comp_list = data_trie.search(edited_prefix)
    auto_complete_set = set()

    for comp_dict in comp_list:
        comp_dict["score"] = get_score(len(prefix))

    if len(comp_list) < 5:
        comp_list += search_with_mistake(prefix, data_trie)

    for comp_dict in comp_list:
        sentence = get_sentence_from_path(comp_dict["path"],
                                          comp_dict["line_num"])
        auto_comp_obj = AutoCompleteData(sentence, comp_dict["path"],
                                         comp_dict["offset"],
                                         comp_dict["score"])
        auto_complete_set.add(auto_comp_obj)

    return sort_res_by_score(auto_complete_set)[:5]
예제 #14
0
def get_best_k_completions(string):
    detraction = 0
    senten = data_dict[string][:RESULT_LEN]
    result = [AutoCompleteData(sentences[index.id].sentence, sentences[index.id].path, index.offset, get_score(string, detraction)) for index in senten]

    # if there are not enough suitable sequences
    # the best scores given when replacing a character except from the first character
    # the next best case is delete or add the 4th character
    # after try to replace the first character
    # and the final try is to delete or add a character

    if len(result) < RESULT_LEN:
        if len(string) > 1:
            fix_word, detraction = replace_char(string, 1, len(string))
            add_to_result(senten, fix_word, result, detraction, string)

    if len(result) < RESULT_LEN:
        if len(string) > 3:
            fix_word, detraction = delete_unnecessary_char(string, 3, 4)
            add_to_result(senten, fix_word, result, detraction, string)

    if len(result) < RESULT_LEN:
        if len(string) > 3:
            fix_word, detraction = add_missed_char(string, 3, 4)
            add_to_result(senten, fix_word, result, detraction, string)

    if len(result) < RESULT_LEN:
        if len(string) > 0:
            fix_word, detraction = replace_char(string, 0, 1)
            add_to_result(senten, fix_word, result, detraction, string)

    if len(result) < RESULT_LEN:

        fix_word, detraction = delete_unnecessary_char(string, 0, len(string))
        add_to_result(senten, fix_word, result, detraction, string)

    if len(result) < RESULT_LEN:
        fix_word, detraction = add_missed_char(string, 0, len(string))
        add_to_result(senten, fix_word, result, detraction, string)

    return result[:RESULT_LEN]
def omit_character(data_dict, prefix, length, completion_list):
    reversed_prefix = prefix[::-1]
    list = []
    no_dup_list = [[x.source_text, x.offset] for x in completion_list]

    for i in range(len(prefix)):
        try:
            path_list = data_dict[(reversed_prefix[:i] +
                                   reversed_prefix[i + 1:])[::-1]]
            score = calculate_omit_add_score(len(prefix) - 1, len(prefix) - i)

            for sentence in path_list:
                if [sentence[0], sentence[1]] not in no_dup_list:
                    list.append(
                        AutoCompleteData(sentence[0], sentence[1], score))
                    no_dup_list.append([sentence[0], sentence[1]])
                    if len(list) == length:
                        return list
        except:
            continue
    return list
def from_sentence_to_auto_complete(sentence_list):
    auto_complete_list = []
    for sentence_item in sentence_list:
        auto_complete_list.append(AutoCompleteData(sentence_item.completed_sentence, sentence_item.source_text, 0, 0))

    return auto_complete_list
예제 #17
0

def remove_existing_sentences(old_auto_complete_data_list,
                              new_auto_complete_data_list):
    auto_complete_data_list = []
    in_old_list = False
    for new_auto_complete_item in new_auto_complete_data_list:
        for old_new_auto_complete_item in old_auto_complete_data_list:
            if new_auto_complete_item.source_text == old_new_auto_complete_item.source_text:
                in_old_list = True
                break

        if not in_old_list:
            auto_complete_data_list.append(new_auto_complete_item)
        in_old_list = False
    return auto_complete_data_list


if __name__ == "__main__":
    auto_complete_list = []
    auto_complete_list.append(AutoCompleteData("aaa aaa", "a 1", 0, 0))
    auto_complete_list.append(AutoCompleteData("aaa bbb", "a 2", 0, 0))
    auto_complete_list.append(AutoCompleteData("bbb bbb", "b 1", 0, 0))

    new_auto_complete_list = []
    new_auto_complete_list.append(AutoCompleteData("aaa aaa", "b 2", 0, 0))
    new_auto_complete_list.append(AutoCompleteData("aaa bbb", "a 2", 0, 0))
    new_auto_complete_list.append(AutoCompleteData("bbb bbb", "a 3", 0, 0))

    print(new_sentences(auto_complete_list, new_auto_complete_list))
예제 #18
0
def add_to_result(senten, fix_word, result, detraction, string):
    senten = data_dict[fix_word][:(RESULT_LEN - len(senten))]
    result += [AutoCompleteData(sentences[index.id].sentence, sentences[index.id].path, index.offset,
                                get_score(string, detraction)) for index in senten]