Exemple #1
0
    def setUp(self):
        self.file_name = 'sukuntest.csv'
        self.sukun_words = ExcelHelperMethod.read_csv_file(path + self.file_name)
        self.chars_count = WordLetterProcessingHelperMethod.get_chars_count_for_each_word_in_this(deepcopy(self.sukun_words))
        self.Chars = WordLetterProcessingHelperMethod.convert_list_of_words_to_list_of_chars(deepcopy(self.sukun_words))

        self.Chars_And_Its_Location = WordLetterProcessingHelperMethod.get_location_of_each_char(
            self.Chars, self.chars_count)
def create_netcdf_target_classes():
    execute_create_netcdf_target_classes_start_time = datetime.datetime.now()
    letter = []
    diacritics = []
    searchCounter = 0
    targetClass = []
    beforeWhileLoop = datetime.datetime.now()
    for eachItem in range(0, len(selected_letters_in_this_loop)):
        yourLabel = selected_letters_in_this_loop[eachItem][7]
        OneHotTargetClassNotFound = True

        decomposed_letter = WordLetterProcessingHelperMethod.decompose_diac_char_into_char_and_diacritics(
            yourLabel)
        if len(decomposed_letter) == 2 and decomposed_letter[1] == u'ّ':
            decomposed_letter[1] = u'َّ'
            letter.append(decomposed_letter[0])
            diacritics.append(decomposed_letter[1])
            yourLabel = WordLetterProcessingHelperMethod.attach_diacritics_to_chars(
                letter, diacritics)[0]

        while OneHotTargetClassNotFound:
            try:
                if listOfDiacritizedCharacter[searchCounter][1] == yourLabel:
                    OneHotTargetClassNotFound = False
                    targetClass.append(
                        listOfDiacritizedCharacter[searchCounter][0])
                    searchCounter = 0
                else:
                    searchCounter += 1
            except:
                x = 1
    afterWhileLoop = datetime.datetime.now()
    print "While Loop takes : ", afterWhileLoop - beforeWhileLoop

    global purified_target_class
    purified_target_class = []

    purified_target_class = np.array(targetClass)
    execute_create_netcdf_target_class_end_time = datetime.datetime.now()
    print "createNetCDFTargetClasses takes : ", \
        execute_create_netcdf_target_class_end_time - execute_create_netcdf_target_classes_start_time
Exemple #3
0
def do_we_need_to_search_in_dictionary(dictionary, word):

    for each_word in dictionary:
        decomposed_dict = WordLetterProcessingHelperMethod.decompose_word_into_letters(
            each_word)
        decomposed_act = WordLetterProcessingHelperMethod.decompose_word_into_letters(
            word)

        norm_dict = WordLetterProcessingHelperMethod.normalize(decomposed_dict)
        norm_act = WordLetterProcessingHelperMethod.normalize(decomposed_act)

        if len(norm_dict) != len(norm_act):
            raise ValueError(
                "Bug Found In 'do_we_need_to_search_in_dictionary'")

        if sorted(norm_dict) == sorted(norm_act):
            return False

    for each_word in dictionary:
        decomposed_dict = WordLetterProcessingHelperMethod.decompose_word_into_letters(
            each_word)
        decomposed_act = WordLetterProcessingHelperMethod.decompose_word_into_letters(
            word)

        norm_dict = WordLetterProcessingHelperMethod.normalize(decomposed_dict)
        norm_act = WordLetterProcessingHelperMethod.normalize(decomposed_act)

        for x in range(0, len(norm_act)):
            # compare letters before last letter
            if x < (len(norm_act) - 1):
                if norm_dict[x] != norm_act[x]:
                    # so diff is in first or middle letters
                    break
            else:
                # so diff is in last letter so ignore it
                return False

    return True
Exemple #4
0
def prepare_master_object(selected_sentence, rnn_op, exp_op, location,
                          undiac_words):

    list_of_master_object = []
    list_of_word_len = []
    total_length = 0
    letter_counter = 0
    if len(rnn_op) != len(exp_op) != len(location):
        raise Exception("bug found in data")

    for each_word in undiac_words:
        if each_word != 'space':
            decomposed_word = WordLetterProcessingHelperMethod.decompose_word_into_letters(
                each_word)
            total_length += len(decomposed_word)
            list_of_word_len.append(total_length)

    master = MasterObject()
    for (each_rnn_char, each_exp_char,
         each_location) in (zip(rnn_op, exp_op, location)):
        if each_rnn_char == 'space':
            continue

        master.undiac_char = WordLetterProcessingHelperMethod.remove_diacritics_from_this(
            each_rnn_char)
        master.rnn_diac_char = each_rnn_char
        decomposed_result_1 = WordLetterProcessingHelperMethod.decompose_diac_char_into_char_and_diacritics\
            (each_rnn_char)

        if len(decomposed_result_1) == 1:
            master.rnn_diac = ''

        elif len(decomposed_result_1) == 2 and decomposed_result_1[1] != u'ْ':
            master.rnn_diac = decomposed_result_1[1]
        elif len(decomposed_result_1) == 2 and decomposed_result_1[1] == u'ْ':
            master.rnn_diac = ''
            master.rnn_diac_char = master.undiac_char
        elif len(decomposed_result_1) == 3:
            master.rnn_diac = decomposed_result_1[1] + decomposed_result_1[2]

        master.exp_diac_char = each_exp_char
        decomposed_result_2 = WordLetterProcessingHelperMethod.decompose_diac_char_into_char_and_diacritics\
            (each_exp_char)

        if len(decomposed_result_2) == 1:
            master.exp_diac = ''
        elif len(decomposed_result_2) == 2 and decomposed_result_2[1] != u'ْ':
            master.exp_diac = decomposed_result_2[1]
        elif len(decomposed_result_2) == 2 and decomposed_result_2[1] == u'ْ':
            master.exp_diac = ''
            master.exp_diac_char = master.undiac_char
        elif len(decomposed_result_2) == 3:
            master.exp_diac = decomposed_result_2[1] + decomposed_result_2[2]

        master.location_in_word = each_location
        master.location_in_sent = letter_counter
        master.sentence = selected_sentence

        index = 0
        for each_length_index in range(0, len(list_of_word_len)):
            index = each_length_index
            if list_of_word_len[each_length_index] >= (letter_counter + 1):
                break

        master.undiac_word = undiac_words[index]

        if each_location == 'first' and list_of_word_len[index] != 1:
            master.has_next_char = True
            master.has_prev_char = False

        elif each_location == 'first' and len(list(master.undiac_word)) == 1:
            master.has_next_char = False
            master.has_prev_char = False

        elif each_location == 'middle':
            master.has_next_char = True
            master.has_prev_char = True

        elif each_location == 'last':
            master.has_next_char = False
            master.has_prev_char = True

        list_of_master_object.append(deepcopy(master))
        letter_counter += 1

    list_of_rnn_words = WordLetterProcessingHelperMethod.reform_word_from_version_2(
        list_of_master_object)
    st_range = 0
    for each_number in list_of_word_len:
        en_range = each_number

        for index in range(st_range, en_range):
            try:
                list_of_master_object[index].rnn_diac_word = list_of_rnn_words[
                    0]
                list_of_master_object[index].exp_diac_word = selected_sentence[
                    0]
            except:
                x = 1
        del list_of_rnn_words[0]
        del selected_sentence[0]
        st_range = en_range

    return list_of_master_object
        selected_sentence = DBHelperMethod.get_sentence_by(sentence_number)

        rnn_output = ExcelHelperMethod.read_rnn_op_csv_file(path + file_name)
        neurons_with_highest_probability, neurons_op_value = RNNOPProcessingHelperMethod.get_neurons_numbers_with_highest_output_value(
            rnn_output)

        list_of_available_diac_chars = DBHelperMethod.get_available_diacritized_chars(
        )
        RNN_Predicted_Diac_Chars = RNNOPProcessingHelperMethod.\
            deduce_from_rnn_op_predicted_chars(list_of_available_diac_chars, neurons_with_highest_probability)
        # Expected OP
        OP_Diac_Chars = DBHelperMethod.get_diacritized_chars_by(
            sentence_number, type)

        RNN_Predicted_Diac_Chars = WordLetterProcessingHelperMethod.check_target_and_output_letters_are_same(
            deepcopy(RNN_Predicted_Diac_Chars), OP_Diac_Chars)
        if sentence_number == 45:
            x = 1
        RNN_Predicted_Chars_Count = WordLetterProcessingHelperMethod.get_chars_count_for_each_word_in_this(
            selected_sentence)
        RNN_Predicted_Chars_And_Its_Location = WordLetterProcessingHelperMethod.get_location_of_each_char(
            RNN_Predicted_Diac_Chars, RNN_Predicted_Chars_Count)
        WordLetterProcessingHelperMethod.append_neuron_op_value(
            RNN_Predicted_Chars_And_Its_Location, neurons_op_value)
        # Post Processing
        RNN_Predicted_Chars_After_Sukun = SukunCorrection.sukun_correction(
            deepcopy(RNN_Predicted_Chars_And_Its_Location))
        RNN_Predicted_Chars_After_Fatha = FathaCorrection.fatha_correction(
            deepcopy(RNN_Predicted_Chars_After_Sukun))
        RNN_Predicted_Chars_After_Dictionary = DictionaryCorrection.get_diac_version_with_smallest_dist(
            deepcopy(RNN_Predicted_Chars_After_Fatha), sentence_number)
Exemple #6
0
def fatha_correction(list_of_objects_of_chars_and_its_location):
    counter = 0
    current_index = 0
    actual_letters_after_fatha_correction = []

    prev_char_object = WordLetterProcessingHelperMethod.LetterPosition()
    prev_prev_char_object = WordLetterProcessingHelperMethod.LetterPosition()
    next_char_object = WordLetterProcessingHelperMethod.LetterPosition()

    for each_letter_object in list_of_objects_of_chars_and_its_location:

        actual_letters_after_fatha_correction.append(each_letter_object)
        character = remove_diacritics(each_letter_object.letter)

        if (character in letters_of_fatha_correction) and (each_letter_object.location != 'first'):

            letter_caused_fatha_correction = character

            if (counter - 1) >= 0:
                prev_char_object = list_of_objects_of_chars_and_its_location[counter - 1]
                prev_char_object.letter = unicodedata2.normalize('NFC', str(prev_char_object.letter))
            if (counter - 2) >= 0:
                prev_prev_char_object = list_of_objects_of_chars_and_its_location[counter - 2]
                prev_prev_char_object.letter = unicodedata2.normalize('NFC', prev_prev_char_object.letter)
            if ((counter + 1) <= (len(list_of_objects_of_chars_and_its_location) - 1)) and (each_letter_object.location != 'last'):
                next_char_object = list_of_objects_of_chars_and_its_location[counter + 1]

            corrected_char = prev_char_object.letter
            if letter_caused_fatha_correction == u'ة':
                corrected_char = correct_teh_marbota_prev_char(prev_char_object)

            elif letter_caused_fatha_correction == u'ا':

                if each_letter_object.location == 'middle':
                    if remove_diacritics(prev_char_object.letter) == u'ب':
                        # , بِاتِّخَاذِكُمُ ,وَبِالْآخِرَةِ , بِالْعُدْوَةِ
                        if u'ّ' in next_char_object.letter or\
                                        next_char_object.letter == remove_diacritics(next_char_object.letter):
                            corrected_char = correct_alef_prev_char_ba2_maksora(prev_char_object)

                        # بَالِغَةٌ , بَاسِرَةٌ
                        else:
                            corrected_char = correct_alef_prev_char_normal_case(prev_char_object)

                    elif remove_diacritics(prev_char_object.letter) == u'ل':
                        if prev_char_object.location == 'first':
                            # do not handle this case
                            # special case with no law (these are contradict) لَا , لِامْرَأَتِهِ
                            corrected_char = prev_char_object.letter

                        elif prev_prev_char_object.letter == u'ا':
                            # do not handle this case
                            # special case with no law (these are contradict)  الِاسْمُ
                            corrected_char = prev_char_object.letter
                        else:
                            corrected_char = correct_alef_prev_char_normal_case(prev_char_object)
                    # مِائَةَ , مِائَتَيْنِ
                    elif remove_diacritics(prev_char_object.letter) == u'م' \
                            and prev_char_object.location == 'first' \
                            and next_char_object.letter == u'ئَ':

                        corrected_char = correct_alef_prev_char_mem(prev_char_object)

                    else:
                        corrected_char = correct_alef_prev_char_normal_case(prev_char_object)

                elif each_letter_object.location == 'last' or each_letter_object.location == 'first':
                    corrected_char = prev_char_object.letter

                else:
                    corrected_char = correct_alef_prev_char_normal_case(prev_char_object)

            elif letter_caused_fatha_correction == u'ى':

                # طُوًى, ضُحًى
                if prev_prev_char_object.location == 'first' and u'ُ' in prev_prev_char_object.letter and \
                                each_letter_object.location == 'last':

                    corrected_char = correct_alef_maksora_prev_char_tanween_case(prev_char_object)

                # أَبَى
                else:
                    corrected_char = correct_alef_maksora_prev_char_normal_case(prev_char_object)

            actual_letters_after_fatha_correction[counter - 1].letter = corrected_char
            counter += 1
        else:
            counter += 1

        current_index += 1
    return actual_letters_after_fatha_correction
Exemple #7
0
        location = loc[start_range:end_range:1]

        # Post Processing
        RNN_Predicted_Chars_And_Its_Location = dp.create_letter_location_object(
            nn_op_letters, location)
        RNN_Predicted_Chars_After_Sukun = SukunCorrection.sukun_correction(
            deepcopy(RNN_Predicted_Chars_And_Its_Location))
        RNN_Predicted_Chars_After_Fatha = FathaCorrection.fatha_correction(
            deepcopy(RNN_Predicted_Chars_After_Sukun))
        RNN_Predicted_Chars_After_Dictionary = DictionaryCorrection.get_diac_version_with_smallest_dist_no_db_access(
            RNN_Predicted_Chars_After_Fatha, undiac_words,
            dic_words_for_selected_sent)

        # Expected OP
        OP_Diac_Chars_Count = WordLetterProcessingHelperMethod.get_chars_count_for_each_word_in_this(
            selected_sentence)
        OP_Diac_Chars_And_Its_Location = WordLetterProcessingHelperMethod.get_location_of_each_char(
            expected_letters, OP_Diac_Chars_Count, True)
        OP_Diac_Chars_After_Sukun = SukunCorrection.sukun_correction(
            deepcopy(OP_Diac_Chars_And_Its_Location))

        # DER Calculation
        error = DERCalculationHelperMethod.get_diacritization_error \
            (RNN_Predicted_Chars_After_Dictionary, OP_Diac_Chars_After_Sukun, selected_sentence)

        error_without_last_letter = DERCalculationHelperMethod.get_diacritization_error_without_counting_last_letter \
            (RNN_Predicted_Chars_After_Dictionary, OP_Diac_Chars_After_Sukun, selected_sentence)

        # write error in excel file
        excel_1 = current_row_1
        current_row_1 = ExcelHelperMethod.write_data_into_excel_file(
Exemple #8
0
    for file_name, sentence_number in zip(result, list_of_sentence_numbers):

        selected_sentence = DBHelperMethod.get_sentence_by(sentence_number)

        rnn_output = ExcelHelperMethod.read_rnn_op_csv_file(path + file_name)
        neurons_with_highest_probability = RNNOPProcessingHelperMethod.get_neurons_numbers_with_highest_output_value(
            rnn_output)

        list_of_available_diacritics = DBHelperMethod.get_all_diacritics()
        RNN_Predicted_diacritics = RNNOPProcessingHelperMethod.\
            deduce_from_rnn_op_predicted_chars(list_of_available_diacritics, neurons_with_highest_probability)

        IP_Undiacritized_Chars = DBHelperMethod.get_un_diacritized_chars_by(
            sentence_number, type)
        RNN_Predicted_chars = WordLetterProcessingHelperMethod.attach_diacritics_to_chars(
            IP_Undiacritized_Chars, RNN_Predicted_diacritics)

        RNN_Predicted_Chars_Count = WordLetterProcessingHelperMethod.get_chars_count_for_each_word_in_this(
            selected_sentence)
        RNN_Predicted_Chars_And_Its_Location = WordLetterProcessingHelperMethod.get_location_of_each_char(
            RNN_Predicted_chars, RNN_Predicted_Chars_Count)

        # Post Processing
        RNN_Predicted_Chars_After_Sukun = SukunCorrection.sukun_correction(
            deepcopy(RNN_Predicted_Chars_And_Its_Location))
        RNN_Predicted_Chars_After_Fatha = FathaCorrection.fatha_correction(
            deepcopy(RNN_Predicted_Chars_After_Sukun))
        RNN_Predicted_Chars_After_Dictionary = DictionaryCorrection.get_diac_version_with_smallest_dist(
            deepcopy(RNN_Predicted_Chars_After_Fatha), sentence_number)

        # Expected OP
Exemple #9
0
def get_diac_version_with_smallest_dist(list_of_objects):

    list_of_actual_words_after_dictionary_correction = []
    list_of_undiac_objects = WordLetterProcessingHelperMethod.remove_diacritics_from(
        list_of_objects)
    list_of_undiac_words = WordLetterProcessingHelperMethod.reform_word_from(
        list_of_undiac_objects)

    diacritized_rnn_op_words = WordLetterProcessingHelperMethod.reform_word_from(
        list_of_objects)

    if len(diacritized_rnn_op_words) != len(list_of_undiac_words):
        raise Exception(
            "error appeared in get_diac_version_with_smallest_dist")

    for each_corrected_word, each_un_diacritized_word in zip(
            diacritized_rnn_op_words, list_of_undiac_words):

        minimum_error = 100000000
        dictionary_diacritized_words = DBHelperMethod.\
            get_dictionary_all_diacritized_version_of(each_un_diacritized_word)

        if len(dictionary_diacritized_words) == 0:
            dictionary_diacritized_words.append(each_corrected_word)

        dictionary_diacritized_words_after_sukun_correction = SukunCorrection.\
            sukun_correction_for_list_of_words(dictionary_diacritized_words)

        if do_we_need_to_search_in_dictionary(
                dictionary_diacritized_words_after_sukun_correction,
                each_corrected_word):

            for each_word in dictionary_diacritized_words_after_sukun_correction:
                error_count = 0

                decomposed_dic_word = WordLetterProcessingHelperMethod.decompose_word_into_letters(
                    each_word)
                decomposed_act_word = WordLetterProcessingHelperMethod.decompose_word_into_letters(
                    each_corrected_word)

                norm_dic_word = WordLetterProcessingHelperMethod.normalize(
                    decomposed_dic_word)
                norm_act_word = WordLetterProcessingHelperMethod.normalize(
                    decomposed_act_word)

                for each_diacritized_version_letter, each_current_word_letter in zip(
                        norm_dic_word, norm_act_word):

                    if (len(each_diacritized_version_letter) -
                            len(each_current_word_letter)
                            == 1) or ((len(each_diacritized_version_letter) -
                                       len(each_current_word_letter) == -1)):
                        error_count += 1

                    elif (len(each_diacritized_version_letter) -
                          len(each_current_word_letter)
                          == 2) or ((len(each_diacritized_version_letter) -
                                     len(each_current_word_letter) == -2)):
                        error_count += 2

                    else:
                        for each_item_in_diacritized_version, each_item_in_current_word in \
                                zip(each_diacritized_version_letter, each_current_word_letter):
                            if each_item_in_diacritized_version != each_item_in_current_word:
                                error_count += 1

                if error_count < minimum_error:
                    minimum_error = error_count

                    selected_dictionary_word = each_word

            list_of_actual_words_after_dictionary_correction.append(
                selected_dictionary_word)
        else:
            list_of_actual_words_after_dictionary_correction.append(
                each_corrected_word)

    chars_after_dic_correction = WordLetterProcessingHelperMethod.convert_list_of_words_to_list_of_chars(
        list_of_actual_words_after_dictionary_correction)
    if len(list_of_objects) != len(chars_after_dic_correction):
        raise Exception("Error Happened Here")

    for x in range(0, len(list_of_objects)):
        list_of_objects[x].letter = chars_after_dic_correction[x]

    return list_of_objects
Exemple #10
0
        selected_sentence = DBHelperMethod.get_sentence_by(sentence_number)
        rnn_input = DBHelperMethod.get_un_diacritized_chars_by(sentence_number, type)

        rnn_output = ExcelHelperMethod.read_rnn_op_csv_file(path + file_name)
        neurons_with_highest_probability, neurons_op_value = RNNOPProcessingHelperMethod.\
            get_neurons_numbers_with_highest_output_value(rnn_output)

        list_of_available_diac_chars = DBHelperMethod.get_available_diacritics_and_un_diacritized_chars()
        RNN_Predicted_Diac_Chars = RNNOPProcessingHelperMethod.\
            deduce_from_rnn_op_predicted_chars(list_of_available_diac_chars, neurons_with_highest_probability)
        # Expected OP
        OP_Diac_Chars = DBHelperMethod.get_diacritized_chars_by(sentence_number, type)

        # RNN_Predicted_Diac_Chars = WordLetterProcessingHelperMethod.check_target_and_output_letters_are_same(deepcopy(RNN_Predicted_Diac_Chars), OP_Diac_Chars)

        RNN_Predicted_Chars_Count = WordLetterProcessingHelperMethod.get_chars_count_for_each_word_in_this(selected_sentence)
        RNN_Predicted_Chars_And_Its_Location = WordLetterProcessingHelperMethod.get_location_of_each_char(RNN_Predicted_Diac_Chars, RNN_Predicted_Chars_Count)

        WordLetterProcessingHelperMethod.append_neuron_op_value(RNN_Predicted_Chars_And_Its_Location, neurons_op_value)
        # check below line
        if counter == 36:
            x = 1
        WordLetterProcessingHelperMethod.append_diacritics_with_un_diacritized_char(RNN_Predicted_Chars_And_Its_Location, rnn_input, list_of_available_diac_chars)
        # Post Processing
        RNN_Predicted_Chars_After_Sukun = SukunCorrection.sukun_correction(deepcopy(RNN_Predicted_Chars_And_Its_Location))
        RNN_Predicted_Chars_After_Fatha = FathaCorrection.fatha_correction(deepcopy(RNN_Predicted_Chars_After_Sukun))
        RNN_Predicted_Chars_After_Dictionary = DictionaryCorrection.get_diac_version_with_smallest_dist(deepcopy(RNN_Predicted_Chars_After_Fatha), sentence_number)

        # Expected OP
        OP_Diac_Chars = DBHelperMethod.get_diacritized_chars_by(sentence_number, type)
        OP_Diac_Chars_Count = WordLetterProcessingHelperMethod.get_chars_count_for_each_word_in_this(
def get_diac_version_with_smallest_dist_no_db_access_version_2(
        master_object, dic_words):

    # rnn_diac_words = [each_object.rnn_diac_word for each_object in master_object]
    # rnn_diac_words = [x[0] for x in groupby(rnn_diac_words1)]
    # rnn_diac_words = master_object[0].sentence
    undiac_words = []
    rnn_diac_words = []
    for each_word in master_object[0].sentence:
        undiac_words.append(
            WordLetterProcessingHelperMethod.remove_diacritics_from_this_word(
                each_word))

    for each_word in undiac_words:
        for each_object in master_object:
            if each_object.undiac_word == each_word:
                rnn_diac_words.append(each_object.rnn_diac_word)
                break

    #undiac_words = [each_object.undiac_word for each_object in master_object]
    #undiac_words = [x[0] for x in groupby(undiac_words)]

    selected_dictionary_word = ''
    selected_norm_dictionary_word = ''
    output = []
    for each_word, undiac_word in zip(rnn_diac_words, undiac_words):
        rows, cols = np.where(dic_words == undiac_word)

        dictionary_diacritized_words = []
        dictionary_diacritized_words = (dic_words[rows, 0]).tolist()

        # no dictionary data found
        if len(dictionary_diacritized_words) == 0:
            output.append(extract_data_in_req_format(WordLetterProcessingHelperMethod.normalize \
                                                         (WordLetterProcessingHelperMethod.decompose_word_into_letters(
                                                             each_word)), each_word))

        else:
            dict_words_after_sukun_correction = SukunCorrection. \
                sukun_correction_for_list_of_words(dictionary_diacritized_words)

            if not (do_we_need_to_search_in_dictionary(
                    dict_words_after_sukun_correction, each_word)):
                output.append(extract_data_in_req_format(WordLetterProcessingHelperMethod.normalize \
                        (WordLetterProcessingHelperMethod.decompose_word_into_letters(each_word)), each_word))

            else:
                minimum_error = 100000000
                for each_dic_word in dict_words_after_sukun_correction:
                    error_count = 0

                    norm_dic_word = WordLetterProcessingHelperMethod.normalize \
                        (WordLetterProcessingHelperMethod.decompose_word_into_letters(each_dic_word))

                    norm_act_word = WordLetterProcessingHelperMethod.normalize \
                        (WordLetterProcessingHelperMethod.decompose_word_into_letters(each_word))

                    # unify last char because it depend on context
                    norm_dic_word[-1] = norm_act_word[-1]

                    for each_dic_letter, each_act_letter in zip(
                            norm_dic_word, norm_act_word):
                        if each_dic_letter[0] != each_act_letter[
                                0] or each_dic_letter[1] != each_act_letter[1]:
                            error_count += 1

                    if error_count < minimum_error:
                        minimum_error = error_count

                        selected_norm_dictionary_word = norm_dic_word
                        selected_dictionary_word = each_dic_word

                output.append(
                    extract_data_in_req_format(selected_norm_dictionary_word,
                                               selected_dictionary_word))

    merged = list(itertools.chain(*output))
    if len(merged) != len(master_object):
        Exception("bug found")

    for index, (each_merged_object) in enumerate(merged):
        master_object[index].rnn_diac_char = each_merged_object.letter
        master_object[index].rnn_diac = each_merged_object.diac
        master_object[index].rnn_diac_word = each_merged_object.word

    return master_object