def abbreviating():
        remove_items = []
        new_items = {}

        for k, v in trans_finder.master_dic.items():
            ref_list = RefList(msg=v)
            new_v = ref_list.quotedToAbbrev(k)
            has_new_v = (new_v is not None) and (len(new_v) > 0)
            if has_new_v:
                new_entry = {k: new_v}
                new_items.update(new_entry)

        has_remove_items = (len(remove_items) > 0)
        if has_remove_items:
            for k in remove_items:
                dd(f'Delete from dictionary:[{k}]')
                del trans_finder.master_dic[k]

        is_writing_changes = (len(new_items) > 0)
        if is_writing_changes:
            trans_finder.master_dic.update(new_items)
            dic_file = '/Users/hoangduytran/blender_manual/test_dic.json'
            print(
                f'Writing changes to: {dic_file}, number of records:{len(new_items)}'
            )
            trans_finder.writeJSONDic(dict_list=trans_finder.master_dic,
                                      file_name=dic_file)
Example #2
0
    def isIgnoredWord(text_line: str):
        if (text_line is None) or (len(text_line) == 0):
            return True

        is_create_runtime_ignore_list = (df.runtime_ignore_list == None)
        if is_create_runtime_ignore_list:
            df.runtime_ignore_list = []
            for pattern in df.ignore_list:
                if len(pattern) == 0:
                    continue

                m = re.compile(pattern, flags=re.I)
                df.runtime_ignore_list.append(m)

        pattern = None
        try:
            for m in df.runtime_ignore_list:
                is_found = (m.search(text_line) is not None)
                if is_found:
                    dd(f'isIgnoredWord: pattern:[{m.pattern}] [{text_line}]')
                    return True
            else:
                return False
        except Exception as e:
            df.LOG(f'{e}; text_line:[{text_line}]; pattern:[{pattern}]',
                   error=True)
        return False
Example #3
0
 def isReverseOrder(msg):
     for w in Ignore.reverse_order_list:
         is_reverse = (re.search(w, msg, flags=re.I) is not None)
         if is_reverse:
             dd(f'isReverseOrder -> pattern:[{w}] msg:[{msg}]')
             return True
     return False
Example #4
0
    def isFilePath(text_line: str):
        if (text_line is None) or (len(text_line) == 0):
            return False

        has_path_characters = (df.PATH_CHAR.search(text_line)
                               is not None) and ('kbd' not in text_line)

        #check to see if any word is title case, ie. Selected/Unselected, in which case it's not a PATH
        if has_path_characters:
            word_list = text_line.split(dirsep)
            word: str = None
            for word in word_list:
                is_title_case = (word.istitle())
                if is_title_case:
                    return False

        starts_with_path_chars = text_line.startswith('~')
        ends_with_extensions = (df.ENDS_WITH_EXTENSION.search(text_line)
                                is not None)
        contain_spaces = (" " in text_line)
        is_path = (has_path_characters or starts_with_path_chars
                   or ends_with_extensions) and not contain_spaces

        if is_path:
            dd("isFilePath", text_line)
            #exit(0)

        return is_path
Example #5
0
    def findByReduction(self, msg):
        def append_selective(cover_length, new_text_length, new_text, trans, selective_list, function_name):
            entry = (cover_length, new_text_length, new_text, trans, function_name)
            selective_list.append(entry)

        trans = None
        original_text = str(msg)
        selective_list = []
        try:
            start_non_alpha, mid, end_non_alpha = cm.getTextWithin(msg)
            for f, params in self.tran_find_func_list:
                f_name = f.__name__
                dd(f'findByReduction(): trying function:[{f_name}]')
                txt, param1, param2 = params
                is_empty = not (param1 or param2)
                if is_empty:
                    new_text, trans, cover_length = f(msg)
                else:
                    new_text, trans, cover_length = f(msg, param1, param2)
                new_text_length = len(new_text) # the least cut off the better
                append_selective(cover_length, new_text_length, new_text, trans, selective_list, f_name)

            sorted_selective_list = list(sorted(selective_list, key=OP.itemgetter(0, 1), reverse=True))
            chosen_entry = sorted_selective_list[0]
            cover_length, new_text_length, new_text, trans, function_name = chosen_entry
            has_translation = (trans is not None)
            if not has_translation:
                return new_text, None, cover_length
            else:
                trans = cm.patchingBeforeReturn(start_non_alpha, end_non_alpha, trans, txt)
                dd(f'findByReduction: looking for: [{msg}] trans:[{trans}] function_name:[{function_name}]')
                return new_text, trans, cover_length
        except Exception as e:
            df.LOG(f'{e}; msg:{msg}', error=True)
            raise e
Example #6
0
def get_hd_url(crawler, count=200):
    try:
        videos = Video.select().where(Video.downloaded == 0).limit(count)
        if videos.count() > 0:
            view_ids = [video.view_id for video in videos]
            result = crawler.get_hd_detail(view_ids, raw=True)
            dd(result)
    except BaseException:
        result = crawler.get_pending_data()
    finally:
        urls = [
            detail['download_url'] for detail in result if detail is not None
        ]
        deleted = set(view_ids) - set(
            [detail['view_id'] for detail in result if detail is not None])
        deleted_count = len(deleted)
        # if deleted_count > 0:
        # deleted_count = Video.update(downloaded=-1).where(Video.view_id << deleted).execute()

        print('craw succeed count {}, deleted count {}'.format(
            len(set(view_ids)) - deleted_count, deleted_count))

        with open('data/urls.txt', 'w') as f:
            for url in urls:
                f.write(url + "\n")
        return urls
Example #7
0
    def isIgnored(msg):

        if not msg:
            return True

        # orig_msg = str(msg)
        # ex_ga_msg = cm.EXCLUDE_GA.findall(msg)
        # if (len(ex_ga_msg) > 0):
        #     msg = ex_ga_msg[0]
        #     dd("GA trimmed from:", orig_msg, msg)

        try:
            is_mm = isinstance(msg, MatcherRecord)
            if is_mm:
                dd('debug')
            find_msg = msg.lower()
            is_keep = Ignore.isKeep(find_msg)
            if is_keep:
                return False

            is_allowed_contains = Ignore.isKeepContains(find_msg)
            if is_allowed_contains:
                return False

            is_ref_link = is_function = is_ignore_word = is_dos_command = is_ignore_start = False

            is_ref_link = cm.isLinkPath(find_msg)
            if not is_ref_link:
                is_function = (df.FUNCTION.search(find_msg) is not None)
                if not is_function:
                    is_ignore_word = Ignore.isIgnoredWord(find_msg)
                    if not is_ignore_word:
                        is_dos_command = Ignore.isDosCommand(find_msg)
                        if not is_dos_command:
                            is_ignore_start = Ignore.isIgnoredIfStartsWith(
                                find_msg)

            is_ignore = (is_function or is_ignore_word or is_dos_command
                         or is_ignore_start or is_ref_link)
            # is_ignore = (is_ignore_word or is_dos_command or is_ignore_start)
            if is_ignore:
                #dd("checking for ignore")
                dict_ignore = {
                    "is_ignore_word": is_ignore_word,
                    "is_dos_command": is_dos_command,
                    "is_ignore_start": is_ignore_start,
                    "is_function": is_function,
                    "is_ref_link": is_ref_link
                }
                dd("IGNORING:", msg)
                pp(dict_ignore)

            return is_ignore
        except Exception as e:
            df.LOG(f'{e}; msg:{msg}', error=True)
            raise e
Example #8
0
    def tranByPartitioning(self, sl_txt):
        def markTranslated(txt_loc, sl_txt, tl_txt):
            ft_obs.markLocAsUsed(txt_loc)
            entry=(txt_loc, tl_txt)
            ft_translated_list.append(entry)

        fname = INP.currentframe().f_code.co_name
        ft_map = cm.genmap(sl_txt)
        ft_obs = LocationObserver(sl_txt)
        ft_translated_list = []
        part_txt = None
        try:
            ft_translation = str(sl_txt)
            for ft_loc, ft_word in ft_map:
                if ft_obs.isCompletelyUsed():
                    break

                if ft_obs.isLocUsed(ft_loc):
                    continue

                part_txt = ft_word
                ft_tran, selected_item, matched_ratio, untran_word_dic = self.simpleFuzzyTranslate( ft_word, acceptable_rate=df.FUZZY_ACCEPTABLE_RATIO )
                if ft_tran:
                    markTranslated(ft_loc, ft_word, ft_tran)
                else:
                    wc = len(ft_word.split())
                    is_single_word = (wc == 1)
                    if not is_single_word:
                        continue

                    chopped_txt, ft_tran = self.findByChangeSuffix(ft_word)
                    if ft_tran:
                        markTranslated(ft_loc, ft_word, ft_tran)

            ft_translated_list.sort(reverse=True)
            for ft_loc, ft_tran in ft_translated_list:
                ft_translation = cm.jointText(ft_translation, ft_tran, ft_loc)

            is_translated = (ft_translation != sl_txt)
            return_tran = (ft_translation if is_translated else None)

            dd(f'{fname}() msg:[{sl_txt}] tran_sub_text:[{ft_translation}]')
        except Exception as e:
            return_tran = None
            df.LOG(f'{e}; [{sl_txt}] dealing with: [{part_txt}]', error=True)
        un_tran_list = ft_obs.getUnmarkedPartsAsDict()
        return return_tran, un_tran_list
    def makeNonSRRecord(self, txt, root_location):
        sr = self.reproduce()
        print(f'IS TEXT:[{txt}]')
        current_processed_list = self.processed_list.keys()
        is_ignore = (txt in current_processed_list)
        if is_ignore:
            dd(f'makeNonSRRecord: [{txt}] is already processed')
            return None

        sr.__init__(root_loc=root_location,
                    tran_sl_txt=txt,
                    translation_engine=self.tf,
                    processed_dict=self.processed_list,
                    glob_sr=self.global_sr_list)
        sr.setupRecords()
        sr.getTextListTobeTranslated()
        self.global_sr_list.update({sr.tran_sl_txt: sr})
        return sr
Example #10
0
    def makeSRRecord(self, txt, root_location):
        try:
            dict_sl_pat, (
                dict_sl_txt, dict_sl_word_list, dict_sl_mm, dict_tl_txt,
                dict_tl_word_list, dict_tl_mm,
                sent_sl_mm) = self.getDict().getSentStructPattern(txt)
            current_processed_list = self.processed_list.keys()
            is_already_processed = (dict_sl_txt in current_processed_list)
            is_ignore = (not dict_sl_pat) or (is_already_processed)
            if is_already_processed:
                dd(f'makeSRRecord: [{txt}] is already processed')

            if is_ignore:
                return None

            print(
                f'IS STRUCTURE:[txt:{txt}] => sl:[{dict_sl_txt}] tl:[{dict_tl_txt}] pat:[{dict_sl_pat}]'
            )
            sr = self.reproduce()
            sr.__init__(root_loc=root_location,
                        dict_sl_txt=dict_sl_txt,
                        dict_sl_word_list=dict_sl_word_list,
                        dict_sl_rec=dict_sl_mm,
                        dict_tl_rec=dict_tl_mm,
                        dict_tl_word_list=dict_tl_word_list,
                        dict_tl_txt=dict_tl_txt,
                        tran_sl_txt=txt,
                        tran_sl_rec=sent_sl_mm,
                        recog_pattern=dict_sl_pat,
                        translation_engine=self.tf,
                        processed_dict=self.processed_list,
                        glob_sr=self.global_sr_list)
            sr.setupRecords()
            need_tran = sr.getTextListTobeTranslated()
            if need_tran:
                df.LOG(f'needed tran:{need_tran}')
                self.global_sr_list.update({sr.tran_sl_txt: sr})
            else:
                df.LOG(f'NO need translations, PROVIDED or LEAVE AS IS!')
            return sr
        except Exception as e:
            df.LOG(e, error=True)
Example #11
0
    def translateMatcherRecord(self, mm: MatcherRecord):
        sub_loc: tuple = None
        try:
            ref_txt = mm.getSubText()
            sub_loc = mm.getSubLoc()
            ref_type = mm.type

            is_blank_quote = (ref_type == RefType.BLANK_QUOTE)
            is_kbd = (ref_type == RefType.KBD)
            is_abbr = (ref_type == RefType.ABBR)
            is_menu = (ref_type == RefType.MENUSELECTION)
            is_ga = (ref_type == RefType.GA)
            is_ref = (ref_type == RefType.REF)
            is_doc = (ref_type == RefType.DOC)
            is_osl_attrib = (ref_type == RefType.OSL_ATTRIB)
            is_term = (ref_type == RefType.TERM)

            # ----------
            is_ast = (ref_type == RefType.AST_QUOTE)
            is_dbl_ast_quote = (ref_type == RefType.DBL_AST_QUOTE)
            is_dbl_quote = (ref_type == RefType.DBL_QUOTE)
            is_sng_quote = (ref_type == RefType.SNG_QUOTE)
            is_python_format = (ref_type == RefType.PYTHON_FORMAT)
            is_function = (ref_type == RefType.FUNCTION)

            is_quoted = (is_ast or is_dbl_quote or is_sng_quote
                         or is_dbl_ast_quote or is_blank_quote)

            converted_to_abbr = False
            if is_kbd:
                dd(f'translateRefItem: is_kbd:{ref_txt}')
                ok = self.tf.translateKeyboard(mm)
            elif is_abbr:
                dd(f'translateRefItem: is_abbr:{ref_txt}')
                ok = self.tf.translateAbbrev(mm)
            elif is_menu:
                dd(f'translateRefItem: is_menu:{ref_txt}')
                ok = self.tf.translateMenuSelection(mm)
            elif is_quoted:
                dd(f'translateRefItem: is_quoted:{ref_txt}')
                ok = self.tf.translateQuoted(mm)
                converted_to_abbr = True
            elif is_osl_attrib or is_python_format or is_function:
                return
            else:
                ok = self.tf.translateRefWithLink(mm)

            # mm_tran = cm.jointText(ref_txt, tran, sub_loc)
            # mm.setTranlation(mm_tran, is_fuzzy, is_ignore)
        except Exception as e:
            df.LOG(f'{e} ref_item:{mm}, ref_type:{ref_type}', error=True)
Example #12
0
    def __init__(self,
                 input_k=None,
                 item_found=None,
                 item_left=None,
                 item_right=None,
                 input_k_left=None,
                 input_k_right=None):
        self.input_k_mid = None
        self.input_k_mid_loc = None
        self.input_k: str = input_k
        self.item_found: str = item_found
        self.item_left = item_left
        self.item_right = item_right
        # self.item_mid = self.findExpVarPart(item_found, item_left, item_right)

        self.input_k_left = input_k_left
        self.input_k_right = input_k_right
        self.input_k_mid, self.input_k_mid_loc = self.findExpVarPart(
            input_k, input_k_left, input_k_right)
        # self.input_k_mid = mid
        # self.input_k_mid_loc = loc
        dd(f'FuzzyExpVarRecord() - self.input_k_mid:[{self.input_k_mid}]; self.input_k_mid_loc:[{self.input_k_mid_loc}]'
           )
Example #13
0
    def getTranBySlittingSymbols(self, input_txt):
        fname = INP.currentframe().f_code.co_name

        pattern_list = [
            df.NON_SPACE_SYMBOLS,
            df.SYMBOLS,
        ]
        translation = str(input_txt)
        translated_list = []
        selective_list = []

        tran, untran_dict = self.tranByPartitioning(input_txt)
        if tran:
            return tran

        for pat in pattern_list:
            word_list = cm.splitWordAtToList(pat, input_txt)
            if not word_list:
                continue

            for loc, txt in word_list:
                tran, untran_dict = self.tranByPartitioning(txt)
                is_translated = (tran != txt)
                if is_translated:
                    entry = (loc, tran)
                    translated_list.append(entry)

        translated_list.sort(reverse=True)
        for loc, tran in translated_list:
            translation = cm.jointText(translation, tran, loc)

        is_translated = (translation != input_txt)
        if is_translated:
            dd(f'{fname}(): input_txt:[{input_txt}]=>[{translation}]')
            return translation
        else:
            return None
Example #14
0
    def findPattern(self, pattern_list: list, txt: str):
        count_item = 0
        obs = LocationObserver(txt)
        pattern_list.reverse()
        obs = LocationObserver(txt)

        for index, item in enumerate(pattern_list):
            p, ref_type = item
            self.findOnePattern(obs, obs.blank, p, ref_type)
        self.validateFoundEntries()

        if len(self):
            dd('List of refs found:')
            dd('-' * 80)
            pp(self)
            dd('-' * 80)
        return count_item, obs.getUnmarkedPartsAsDict()
def build_finished(app, exeption):
    def refListGetKey(item):
        return item[1]

    def dicListGetKey(item):
        k, v = item
        trans, rep = v
        return rep

    # loc_dic_list={}
    # sorted_list = sorted(trans_finder.dic_list.items(), key=refListGetKey)
    # # pp(sorted_list)
    # for txt, rep in sorted_list:
    #     if ig.isIgnored(txt):
    #         continue
    #
    #     #txt = txt.strip()
    #     must_mark = False
    #     trans = trans_finder.findTranslation(txt)
    #     if not trans:
    #         trans = trans_finder.findTranslationByFragment(txt)
    #         must_mark = True
    #
    #     #is_same = (txt == trans)
    #     k = txt
    #     if (rep > 1) or must_mark:
    #         if trans is None:
    #             trans = ""
    #         v = "{}#{}#".format(trans, rep)
    #     else:
    #         v = trans
    #     entry={k:v}
    #     loc_dic_list.update(entry)

    # sorted_list = sorted(loc_dic_list.items(), key=dicListGetKey)

    # return

    # file_name = "/Users/hoangduytran/ref_dict_0001.json"
    # dic = cm.removeLowerCaseDic(loc_dic_list)
    # dic = trans_finder.master_dic_backup_list
    # dic.update(trans_finder.master_dic_list)

    # dic = trans_finder.dic_list
    # has_dic = (len(dic) > 0)
    # if not has_dic:
    #     return

    # clean_dic = trans_finder.removeIgnoredEntries(dic)
    # dic = clean_dic
    # pp(dic)
    # exit(0)
    # sorted_list = sorted(dic.items(), key=lambda x: x[1])

    # dic = trans_finder.master_dic_backup_list
    # file_name = trans_finder.master_dic_backup_file
    # print("Writing dictionary to:", file_name)
    # with open(file_name, 'w', newline='\n', encoding='utf8') as out_file:
    #     json.dump(dic, out_file, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ': '))

    # with open(file_name, "r") as f:
    #     data = json.load(f)

    # pp(data)

    # for k, v in data.items():
    #     is_null = (v == None)
    #     if is_null:
    #         entry={k:""}
    #         print("updating entry:", entry)
    #         data.update(entry)

    # with open(file_name, 'w', newline='\n', encoding='utf8') as out_file:
    #     json.dump(dic, out_file, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ': '))

    # with open(file_name, 'w', newline='\n', encoding='utf8') as out_file:
    #     json.dump(dic, out_file, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ': '))

    # trans_finder.writeJSONDic(dic_list=sorted_list, file_name="/home/htran/20191228_dict_0001.json")
    # pp(sorted_list)
    # exit(0)
    trans_finder.writeChosenDict(is_master=True)
    trans_finder.writeChosenDict(is_master=False)
    # trans_finder.writeBackupDict()
    # trans_finder.writeMasterDict()
    dd('DEBUG')
Example #16
0
    def getSentStructPattern(self, key):
        def isMatchedStructMode(pat_matched_text_pair_list):
            is_ok_list=[]
            (s_mode_dict, input_txt_list) = pat_matched_text_pair_list
            s_mode_dict_list = list(s_mode_dict.values())
            for index, matched_part in enumerate(input_txt_list):
                smode_item = s_mode_dict_list[index]
                (dict_sl_txt, structure_mode_list) = smode_item
                is_txt_only = (structure_mode_list is None)
                if is_txt_only:
                    continue

                smode_rec: SMODEREC = None
                for smode_rec in structure_mode_list:
                    structure_mode = smode_rec.smode
                    extra_param = smode_rec.extra_param

                    is_digits_only = (structure_mode == SMODE.NUMBER_ONLY)
                    if is_digits_only:
                        is_number = (df.NUMBERS.search(matched_part) is not None)
                        is_ok_list.append(is_number)
                        continue

                    is_no_full_stop = (structure_mode == SMODE.NO_FULL_STOP)
                    if is_no_full_stop:
                        no_fullstop = (df.FULLSTOP_IN_BETWEEN.search(matched_part) is None)
                        is_ok_list.append(no_fullstop)
                        continue

                    is_no_punctuation = (structure_mode == SMODE.NO_PUNCTUATION)
                    if is_no_punctuation:
                        no_punct = (df.PUNCT_IN_BETWEEN.search(matched_part) is None)
                        is_ok_list.append(no_punct)
                        continue

                    is_max_upto = (structure_mode == SMODE.MAX_UPTO)
                    if is_max_upto:
                        wc = cm.wordCount(matched_part)
                        is_max_upto = (wc <= extra_param)
                        is_ok_list.append(is_max_upto)
                        continue

                    is_no_conjunctives = (structure_mode == SMODE.NO_CONJUNCTIVES)
                    if is_no_conjunctives:
                        no_conjunctives = (df.BASIC_CONJUNCTS.search(matched_part) is None)
                        is_ok_list.append(no_conjunctives)
                        continue

            ok = (False not in is_ok_list)
            return ok

        def filterKeyChosenSet(pat_item):
            match = re.compile(pat_item, flags=re.I).search(key)
            is_found = (match is not None )
            return is_found

        try:
            default_value = (None, None, None, None, None, None, None)
            klower = key.lower()
            has_cached_key_value_set = (klower in self.local_text_and_chosen_sent_struct_list)
            if has_cached_key_value_set:
                set_value = self.local_text_and_chosen_sent_struct_list[klower]
                (pat, value) = set_value
                return (pat, value)

            selective_match = []
            key_list = self.sentence_struct_dict.keys()
            # pat_list = [x for x in pat_list if 'i\\.e\\.' in x]
            chosen_key_list = list(filter(filterKeyChosenSet, key_list))

            if not chosen_key_list:
                return (None, default_value)

            df.LOG(f'matched key: [{key}]')
            pp(chosen_key_list, width=200)

            # list_of_sent_struct = list(self.sentence_struct_dict.items())
            for pat in chosen_key_list:
                value = self.sentence_struct_dict[pat]
                pattern = re.compile(pat, flags=re.I)
                # match_list = re.findall(pat, key)
                matcher_dict = cm.patternMatchAll(pattern, key)
                sent_sl_record = list(matcher_dict.values())[0]
                loc_text_list = sent_sl_record.getSubEntriesAsList()
                interest_part = loc_text_list[1:]
                if not interest_part:
                    interest_part = loc_text_list
                unique_parts = cm.removeDuplicationFromlistLocText(interest_part)
                temp_dict = OrderedDict(unique_parts)

                matched_text_group = temp_dict.values()

                # matched_text_group = matcher.groups()
                (dict_sl_txt, dict_sl_word_list, dict_sl_mm, dict_tl_txt, dict_tl_word_list, dict_tl_mm) = value
                s_mode_list = dict_sl_mm.smode
                df.LOG(f'matched_text_group:[{matched_text_group}]')
                pattern_and_matched_text_pair_list = (s_mode_list, matched_text_group)
                df.LOG(f'pattern_and_matched_text_pair_list:')
                df.LOG('-' * 80)
                pp(pattern_and_matched_text_pair_list)
                df.LOG('-' * 80)
                try:
                    is_accept = isMatchedStructMode(pattern_and_matched_text_pair_list)
                except Exception as e:
                    is_accept = False

                if not is_accept:
                    continue

                match_rate = fuzz.ratio(dict_sl_txt, key)
                sent_sl_record.clear()
                sent_sl_record.update(unique_parts)
                value = (*value, sent_sl_record)

                entry=(match_rate, pat, value)
                selective_match.append(entry)

            if not selective_match:
                return (None, default_value)

            selective_match.sort(reverse=True)
            dd('-' * 80)
            pp(selective_match)
            dd('-' * 80)
            first_entry = selective_match[0]
            (match_rate, pat, value) = first_entry
            pattern = re.compile(pat, flags=re.I)

            cached_entry = {klower: (pattern, value)}
            self.local_text_and_chosen_sent_struct_list.update(cached_entry)

            return (pattern, value)
        except Exception as e:
            df.LOG(f'{e};', error=True)
            raise e
Example #17
0
    def simpleFuzzyTranslate(self, msg: str, acceptable_rate=df.FUZZY_ACCEPTABLE_RATIO):
        def getItemPart(item_txt):
            item_word_list = item_txt.split()
            item_first_word = item_word_list[0]
            item_part = item_first_word[:max_k_length]
            return item_part

        def validate(item):
            item_part = getItemPart(item)
            is_found = (item_part.lower() == k_part.lower())
            if not is_found:
                return -1, None

            allowable_length = (k_length * (1 + df.MAX_FUZZY_TEST_LENGTH))
            if is_k_single_word:
                item_len = len(item)
                acceptable = (item_len <= allowable_length)
            else:

                word_count = len(item.split())
                item_word_count_is_greater_or_equal_k_word_count = (word_count >= k_word_count)
                item_word_count_is_smaller_than_allowable_k_word_count = (word_count <= allowed_k_word_count)
                acceptable = (item_word_count_is_greater_or_equal_k_word_count
                               and
                               item_word_count_is_smaller_than_allowable_k_word_count)

            return_result = (1 if acceptable else 0)
            return return_result, item

        def findListOfCandidates():
            subset = []
            index = cm.binarySearch(key_list, k, key=getItemPart)
            is_found = (index >= 0)
            if not is_found:
                return subset

            found_list = []
            ss = index
            ee = index
            # dd(f'simpleFuzzyTranslate(): start index: [{index}]')
            for i in range(index-1, 0, -1):
                item = key_list[i]
                cond, found_item = validate(item)
                is_break = (cond == -1)
                if is_break:
                    # dd(f'simpleFuzzyTranslate(): traverse backward, stopped at: [{i}], item:[{item}]')
                    break
                is_accepted = (cond == 1)
                if is_accepted:
                    found_list.append(item)
            ss = i
            # dd(f'simpleFuzzyTranslate(): backward to index: [{i}]')
            for i in range(index, len(key_list)):
                item = key_list[i]
                cond, found_item = validate(item)
                is_break = (cond == -1)
                if is_break:
                    # dd(f'simpleFuzzyTranslate(): traverse forward, stopped at: [{i}], item:[{item}]')
                    break
                is_accepted = (cond == 1)
                if is_accepted:
                    found_list.append(item)
            ee = i
            found_list.sort(key=lambda x: len(x), reverse=True)

            for found_item in found_list:
                ratio = fuzz.ratio(found_item, k)
                is_found = (ratio >= df.FUZZY_LOW_ACCEPTABLE_RATIO)
                if not is_found:
                    continue

                entry = (ratio, found_item)
                subset.append(entry)

            subset.sort(reverse=True)
            return subset

        def simpleFindListOfCandidates():
            found_list = []
            for found_item in key_list:
                ratio = fuzz.ratio(found_item, k)
                partial_ratio = fuzz.partial_ratio(found_item, k)
                entry = (ratio, partial_ratio, found_item)
                found_list.append(entry)
            if found_list:
                if is_k_single_word:
                    found_list.sort(key=OP.itemgetter(1, 0), reverse=True)
                else:
                    found_list.sort(key=OP.itemgetter(0, 1), reverse=True)
                return [found_list[0]]
            else:
                return found_list

        def isKeyListTextValid(dict_item):
            is_same = (k == dict_item.lower())
            is_starts_with_k_part  = (dict_item.startswith(k_part))
            is_word_len_acceptable = (len(dict_item.split()) <= k_word_count * 1.5)
            key_len = len(dict_item)
            is_total_len_acceptable = (0.5 <= key_len <= k_length * 1.5)
            return is_same or (is_starts_with_k_part and is_word_len_acceptable and is_total_len_acceptable)

        def binSearchFunction(item):
            return item[:max_k_length]

        def simpleKeyListGetting():
            def findInRange(start_index, end_index, step):
                local_found=[]
                for i in range(start_index, end_index, step):
                    dict_item = dict_keys[i]
                    is_found = isKeyListTextValid(dict_item)
                    if is_found:
                        local_found.append(dict_item)
                    else:
                        dict_part = binSearchFunction(dict_item)
                        is_match = (dict_part == k_part)
                        if not is_match:
                            break
                        else:
                            continue
                return local_found

            found_list=[]
            dict_keys = self.local_keys
            index = cm.binarySearch(dict_keys, k, key=binSearchFunction)
            is_found = (index >= 0)
            if not is_found:
                return found_list

            before_items = dict_keys[index-10: index]
            after_items = dict_keys[index: index+10]
            test_item = dict_keys[index]
            dict_key_len = len(dict_keys)

            start_stop_at=None
            found_before = findInRange(index, 0, -1)
            found_after = findInRange(index+1, dict_key_len, 1)
            found_list.extend(found_before)
            found_list.extend(found_after)
            return found_list

        untran_word_dic = {}
        left, k, right = cm.getTextWithin(msg)
        k = k.lower()

        k_length = len(k)
        k_word_list = k.split()
        k_word_count = len(k_word_list)
        is_k_single_word = (k_word_count == 1)
        allowed_k_word_count = int(k_word_count * 1.7)

        k_matching_length = int(ceil(k_length * 0.5))
        if not is_k_single_word:
            first_word = k_word_list[0]
            first_word_len = len(first_word)
            is_two_small = (first_word_len < 3)
            k_matching_length = int(ceil(first_word_len * df.MAX_FUZZY_TEST_LENGTH))
            if is_two_small:
                try:
                    second_word = k_word_list[1]
                    second_word_len = len(second_word)
                    k_matching_length = int(ceil((first_word_len + second_word_len + 1) * df.MAX_FUZZY_TEST_LENGTH))
                except Exception as e:
                    pass

        first_word = k_word_list[0]
        first_word_len = len(first_word)
        if is_k_single_word:
            max_k_length = int(first_word_len * 0.5)
        else:
            max_k_length = int(first_word_len * 0.8)
        k_part = first_word[:max_k_length]

        # key_list = [x for x in self.local_keys if x.startswith(k_part)]
        is_cached = (k in self.local_keylist_cache)
        if is_cached:
            key_list = self.local_keylist_cache[k]
        else:
            tic = time.perf_counter()
            # key_list = list(filter(isKeyListTextValid, self.local_keys))
            key_list = simpleKeyListGetting()
            tok = time.perf_counter()
            taken = (tok - tic)
            self.local_keylist_cache.update({k: key_list})
        # pp(key_list)

        subset = key_list

        # subset = findListOfCandidates()
        subset = simpleFindListOfCandidates()
        found_candidates = (len(subset) > 0)
        if not found_candidates:
            return_tran = None
            rat = 0
            selected_item = None
            return return_tran, selected_item, rat, untran_word_dic

        matched_ratio, partial_ratio, selected_item = subset[0]
        is_accepted = ((partial_ratio >= acceptable_rate) if is_k_single_word else (matched_ratio >= acceptable_rate))
        if not is_accepted:
            perfect_match_percent = cm.matchTextPercent(k, selected_item)
            is_accepted = (perfect_match_percent > df.FUZZY_PERFECT_MATCH_PERCENT)
            # dd(f'simpleFuzzyTranslate(): perfect_match_percent:[{perfect_match_percent}] k:[{k}] => selected_item:[{selected_item}]; is_accepted:[{is_accepted}]')
            if not is_accepted:
                return_tran = None
                rat = 0
                selected_item = None
                return return_tran, selected_item, rat, untran_word_dic

        translation_txt = self[selected_item]
        lower_msg = msg.lower()
        try:
            loc, new_selected = cm.locRemain(lower_msg, selected_item)
            translation = lower_msg.replace(new_selected, translation_txt)
            untran_word_dic = cm.getRemainedWord(lower_msg, new_selected)
        except Exception as e:
            # fname = INP.currentframe().f_code.co_name
            # dd(f'{fname}() {e}')
            dd(f'FAILED TO REPLACE: [{lower_msg}] by [{selected_item}] with trans: [{translation_txt}], matched_ratio:[{matched_ratio}]')
            can_accept = (matched_ratio >= acceptable_rate)
            if can_accept:
                translation = translation_txt

                left, mid, right = cm.getTextWithin(lower_msg)
                had_the_same_right = (right and translation.endswith(right))
                had_the_same_left = (left and translation.startswith(left))

                if left and not had_the_same_left:
                    translation = left + translation

                if right and not had_the_same_right:
                    translation = translation + right

                dd(f'SIMPLE PATCHING: left:[{left}] right:[{right}] trans: [{translation}]')
                untran_word_dic = cm.getRemainedWord(lower_msg, selected_item)
            else:
                fname = INP.currentframe().f_code.co_name
                dd(f'{fname}() Unable to locate translation for {msg}')
                translation = None

        if translation:
            translation = self.replaceTranRef(translation)

        return translation, selected_item, partial_ratio, untran_word_dic
Example #18
0
    def parseMessage(self):

        trans = self.tf.isInDict(self.msg)
        if trans:
            self.setTranslation(trans, False, False)
            return

        is_link_path = cm.isLinkPath(self.msg)
        if is_link_path:
            dd(f'parseMessage(): IGNORED [{self.msg}]; is_full_path')
            return

        local_msg = str(self.msg)
        count, unparsed_dict = self.findPattern(df.pattern_list, local_msg)
        self.addUnparsedDict(unparsed_dict)
        if len(self):
            dd('Finishing parseMessage:')
            dd('-' * 80)
            for loc, mm_rec in self.items():
                dd(f'{loc}')
                dd(f'{mm_rec.txt}')
                dd('-' * 80)
def doctree_resolved(app, doctree, docname):
    def abbreviating():
        remove_items = []
        new_items = {}

        for k, v in trans_finder.master_dic.items():
            ref_list = RefList(msg=v)
            new_v = ref_list.quotedToAbbrev(k)
            has_new_v = (new_v is not None) and (len(new_v) > 0)
            if has_new_v:
                new_entry = {k: new_v}
                new_items.update(new_entry)

        has_remove_items = (len(remove_items) > 0)
        if has_remove_items:
            for k in remove_items:
                dd(f'Delete from dictionary:[{k}]')
                del trans_finder.master_dic[k]

        is_writing_changes = (len(new_items) > 0)
        if is_writing_changes:
            trans_finder.master_dic.update(new_items)
            dic_file = '/Users/hoangduytran/blender_manual/test_dic.json'
            print(
                f'Writing changes to: {dic_file}, number of records:{len(new_items)}'
            )
            trans_finder.writeJSONDic(dict_list=trans_finder.master_dic,
                                      file_name=dic_file)

    def checkDictForMultipleMeaningsInTrans():
        pattern = re.compile(r'(\w+(/\w+)+)')
        k: str = None
        v: str = None
        for k, v in trans_finder.master_dic.items():
            k_found_list = pattern.findall(k)
            v_found_list = pattern.findall(v)

            is_same = (len(k_found_list) == len(v_found_list))
            if is_same:
                continue

            k_is_single_word = (len(k.split(' ')) == 1)
            if k_is_single_word:
                continue

            PP(k_found_list)
            print('-' * 3)
            PP(v_found_list)
            print('-' * 3)
            print(f'{k}\n\n{v}')
            print('-' * 40)

    def trimmingText(text):
        txt_has_trimmable_ending = (cm.TRIMMABLE_ENDING.search(text)
                                    is not None)
        txt_has_trimmable_beginning = (cm.TRIMMABLE_BEGINNING.search(text)
                                       is not None)
        is_trim = (txt_has_trimmable_ending or txt_has_trimmable_beginning)
        if not is_trim:
            return text, False
        text = cm.TRIMMABLE_BEGINNING.sub('', text)
        text = cm.TRIMMABLE_ENDING.sub('', text)
        return text, True

    def removeDictBeginAndEndingPuncts():
        remove_set = {}
        add_set = {}

        for k, v in trans_finder.master_dic.items():
            # is_debug = ('Cut to' in k)
            # if is_debug:
            #     dd('DEBUG')

            trimmed_k, is_trimmed_k = trimmingText(k)
            trimmed_v, is_trimmed_v = trimmingText(v)

            changed = (is_trimmed_k or is_trimmed_v)
            if changed:
                remove_entry = {k: v}
                remove_set.update(remove_entry)

                add_entry = {trimmed_k: trimmed_v}
                add_set.update(add_entry)

            print(f'[{k}]')
            print(f'[{trimmed_k}]')
            print('-' * 3)
            print(f'[{v}]')
            print(f'[{trimmed_v}]')
            print('-' * 40)

        changed = False
        for k, v in remove_set.items():
            remove_entry = {k: v}
            print(f'remove:{remove_entry}')
            del trans_finder.master_dic[k]
            changed = True

        for k, v in add_set.items():
            add_entry = {k: v}
            trans_finder.master_dic.update(add_entry)
            print(f'added: {add_entry}')
            changed = True

        if changed:
            new_dict = cleanupLeadingTrailingPunct(trans_finder.master_dic)
            test_to_file = '/Users/hoangduytran/blender_manual/ref_dict_0005.json'
            trans_finder.writeJSONDic(dict_list=new_dict,
                                      file_name=test_to_file)

    def removeDuplication(txt_with_punct):
        # is_debug = (txt_with_punct.endswith('::'))
        # if is_debug:
        #     dd('DEBUG')
        cropped_txt, begin_with_punctuations, ending_with_punctuations = cm.beginAndEndPunctuation(
            txt_with_punct, is_single=True)
        trans = trans_finder.isInList(cropped_txt)
        is_repeat = (trans is not None)
        if not is_repeat:
            cropped_txt, begin_with_punctuations, ending_with_punctuations = cm.beginAndEndPunctuation(
                txt_with_punct, is_single=False)
            trans = trans_finder.isInList(cropped_txt)

        is_repeat = (trans is not None)
        return is_repeat

    def cleanupLeadingTrailingPunct(d_dict):
        return_dict = {}
        for k, v in d_dict.items():
            trimmed_k = str(k)
            trimmed_v = str(v)
            found_k = cm.WORD_WITHOUT_QUOTE.search(k)
            found_v = cm.WORD_WITHOUT_QUOTE.search(v)
            if found_k:
                trimmed_k = found_k.group(1)
            if found_v:
                trimmed_v = found_v.group(1)
            entry = {trimmed_k: trimmed_v}
            return_dict.update(entry)
        return return_dict

    def refToDictItems(ref_list):
        ref_dict = {}
        ref: RefRecord = None
        interest_ref = [
            RefType.REF,
            RefType.DOC,
            RefType.GA,
            RefType.TERM,
        ]
        for ref in ref_list:
            # print(ref)
            type = ref.getOrigin().getRefType()
            first_ref = ref.getRefItemByIndex(0)
            ref_text = first_ref.getText()

            is_debug = ('Poor mans steadycam' in ref_text)
            if is_debug:
                dd('DEBUG')

            en_part = None
            vn_part = None
            d_dict = {}
            if type == RefType.MENUSELECTION:
                print(f'MENUSELECTION:{type}')
                text_list = cm.MENU_TEXT_REVERSE.findall(ref_text)
                length = len(text_list)
                i_index = 0
                for i in range(length):
                    tran = text_list[i_index]
                    if i_index + 1 < length:
                        orig = text_list[i_index + 1]
                    else:
                        print('ERROR: Orig is NOT THERE, use original')
                        orig = ref.getOrigin().getText()

                    entry = {orig: tran}
                    print(f'menu:{entry}')
                    d_dict.update(entry)
                    i_index += 2
                    if i_index >= length:
                        break

            elif type == RefType.ABBR:
                print(f'ABBR:{type}')
                text_list = cm.ABBREV_TEXT_REVERSE.findall(ref_text)
                abbr = text_list[0]
                defin = text_list[1]
                has_further_explanation = (': ' in defin)
                if has_further_explanation:
                    exp_list = defin.split(': ')
                    orig_part = exp_list[0]
                    further_exp = exp_list[1]
                    print(
                        f'abbr:{abbr}; orig_part:{orig_part}; further_exp:{further_exp}'
                    )

                    if abbr.isascii():
                        entry = {abbr: f'{orig_part}, {further_exp}'}
                    elif orig_part.isascii():
                        entry = {orig_part: f'{further_exp}, {abbr}'}
                    else:
                        entry = {further_exp: f'{orig_part}, {abbr}'}
                    d_dict.update(entry)
                else:
                    print(f'abbr:{abbr}; defin:{defin}')
                    if defin.isascii():
                        entry = {defin: abbr}
                    else:
                        entry = {abbr: defin}
                    d_dict.update(entry)

            elif type in interest_ref:
                print(f'GENERIC_REF:{type}')
                text_list = cm.REF_TEXT_REVERSE.findall(ref_text)
                has_text = (len(text_list) > 0)
                if not has_text:
                    origin_text = ref.getOrigin().getText()
                    print(f'ERROR: origin_text:{origin_text}')
                    # print(f'{text_list}, appeared to be empty!!!')
                else:
                    vn_part, en_part = text_list[0]
                    print(f'en_part:{en_part} vn_part:{vn_part}')
                    entry = {en_part: vn_part}
                    d_dict.update(entry)
            else:
                dd(f'{type} is not the type we are looking for.')
            ref_dict.update(d_dict)

        return_dict = cleanupLeadingTrailingPunct(d_dict)

        return return_dict

    def listDictRefsToDict():
        interest_ref_list = [
            RefType.MENUSELECTION,
            RefType.REF,
            RefType.DOC,
            RefType.GA,
            RefType.TERM,
            RefType.ABBR,
        ]

        ref_dict = {}
        ref_dict_filename = '/Users/hoangduytran/blender_manual/ref_dict_refsonly.json'
        for k, v in trans_finder.master_dic.items():
            ref_list = RefList(msg=v, keep_orig=False, tf=trans_finder)
            ref_list.parseMessage()

            inter_ref_list = ref_list.getListOfRefType(interest_ref_list)
            has_ref = (len(inter_ref_list) > 0)
            if not has_ref:
                continue

            current_ref_dict = refToDictItems(inter_ref_list)
            ref_dict.update(current_ref_dict)

        has_dict_content = (len(ref_dict) > 0)
        if has_dict_content:
            trans_finder.writeJSONDic(dict_list=ref_dict,
                                      file_name=ref_dict_filename)

    def tranRef(msg, is_keep_original):
        ref_list = RefList(msg=msg,
                           keep_orig=is_keep_original,
                           tf=trans_finder)
        ref_list.parseMessage()
        ref_list.translateRefList()
        tran = ref_list.getTranslation()
        # trans_finder.addDictEntry((msg, tran))
        # print("Got translation from REF_LIST")
        return tran

    # def fuzzyTextSimilar(txt1 : str, txt2 : str, accept_ratio):
    #     try:
    #         similar_ratio = LE.ratio(txt1, txt2)
    #         is_similar = (similar_ratio >= accept_ratio)
    #         return is_similar
    #     except Exception as e:
    #         print(e)
    #         return False

    def getTimeNow(self):
        local_time = timezone('Europe/London')
        fmt = '%Y-%m-%d %H:%M%z'
        loc_dt = local_time.localize(datetime.datetime.now())
        formatted_dt = loc_dt.strftime(fmt)
        return formatted_dt

    # is_running = runAppOrNot()
    # if not is_running:
    #     return

    # correctingDictionary()
    # checkDictKeyboard()
    # checkDictRef()
    # checkNonTranslatedDictWords()
    # checkDictForMultipleMeaningsInTrans()
    # removeDictBeginAndEndingPuncts()
    # listDictRefsToDict()
    # trans_finder.saveMasterDict()
    # exit(0)

    try:

        is_debug = ('vr_scene_inspection' in docname)
        if is_debug:
            dd('DEBUG')

        ex_env_key = 'EX_PO_TRANS'
        is_ex_env_set = (ex_env_key in os.environ)
        if not is_ex_env_set:
            return
        ex_env_key_value = os.environ[ex_env_key]
        is_ex_set_true = (ex_env_key_value.lower() == 'true')
        if not is_ex_set_true:
            return

        debug_file = cm.debug_file
        if debug_file:
            is_debug_file = (debug_file in docname)
            if not is_debug_file:
                return

        build_dir = "build/rstdoc"
        po_vi_dir = "locale/vi/LC_MESSAGES"

        po_file_path = "{}.po".format(docname)
        local_path = os.path.dirname(os.path.abspath(__file__))
        blender_docs_path = cm.BLENDER_DOCS  # os.path.dirname(local_path)

        locale_vi_path = "locale/vi/LC_MESSAGES"

        po_path = os.path.join(blender_docs_path,
                               os.path.join(locale_vi_path, po_file_path))

        if not os.path.isfile(po_path):
            msg = f'po_path: {po_path} NOT FOUND!'
            print(msg)
            raise Exception(msg)
            exit(0)

        # #loading local po file to get translation if any
        po_dic, current_po_cat = trans_finder.loadPOAsDic(po_path)
        trans_finder.flatPOFile(po_path)

        rst_output_location = os.path.join(blender_docs_path, build_dir)
        output_path = os.path.join(rst_output_location, po_file_path)

        local_time = timezone(TIME_ZONE)
        time_now = local_time.localize(datetime.datetime.now())

        local_locale = locale.getlocale()[0]
        current_header = current_po_cat._get_header_comment()
        new_po_cat = Catalog(locale="vi",
                             header_comment=current_header,
                             project=current_po_cat.project,
                             version=current_po_cat.version,
                             copyright_holder=YOUR_ID,
                             creation_date=current_po_cat.creation_date,
                             revision_date=time_now,
                             last_translator=YOUR_ID,
                             language_team=YOUR_TRANSLATION_TEAM)

        dd("#" * 80)
        dd("filename: {}".format(output_path))

        # msgid = "Lines should be less than 120 characters long."
        # msgstr = "Số chữ trong các dòng phải ít hơn 120 ký tự de lam gi."
        # trans_finder.addDictEntry((msgid, msgstr), False)
        # exit(0)

        for node, msg in extract_messages(doctree):
            msg = msg.strip()
            dd("=" * 80)
            dd("msgid:[{}]".format(msg))

            # clean up po file

            is_inline = isinstance(node, nodes.inline)
            is_emphasis = isinstance(node, nodes.emphasis)
            is_title = isinstance(node, nodes.title)
            is_term = isinstance(node, nodes.term)
            is_rubric = isinstance(node, nodes.rubric)
            is_field_name = isinstance(node, nodes.field_name)
            is_reference = isinstance(node, nodes.reference)
            is_strong = isinstance(node, nodes.strong)

            is_keep_original = (is_inline or is_emphasis or is_title or is_term
                                or is_rubric or is_field_name or is_reference
                                or is_strong)

            tran = None
            # is_debug = ('Get involved in discussions' in msg)
            # if is_debug:
            #     dd('DEBUG')
            is_ignore = ig.isIgnored(msg)
            if is_ignore:
                print(f'IGNORED: {msg}')
                continue

            # is_added = False
            tran, is_ignore = trans_finder.findTranslation(msg)
            if is_ignore:
                continue

            has_translation = (tran is not None)
            if not has_translation:
                is_debug = ('is based on the OpenXR specification' in msg)
                if is_debug:
                    dd('Debug')

                ref_list = RefList(msg=msg,
                                   keep_orig=is_keep_original,
                                   tf=trans_finder)
                ref_list.parseMessage()
                ref_list.translateRefList()
                tran = ref_list.getTranslation()
                # tran = tranRef(msg, is_keep_original)
                has_translation = (tran is not None)
                if not has_translation:
                    tran = po_dic[msg]

            has_translation = (tran is not None)
            if has_translation:
                has_month = ('Tháng ' in tran)
                has_original = (msg.lower() in tran.lower())
                has_link = (cm.REF_LINK.search(tran) is not None)
                can_ignore = (has_month or has_original or has_link)
                is_repeat = is_keep_original and not can_ignore
                if is_repeat:
                    print('Repeating MSG')
                    tran = cm.matchCase(msg, tran)
                    tran = f'{tran} -- {msg}'
                    print(f'Repeating MSG:{tran}')

            if tran is not None:
                new_po_cat.add(msg, string=tran)
            else:
                new_po_cat.add(msg, string="")

            print(f'msgid \"{msg}\"')
            if tran is not None:
                print(f'msgstr \"{tran}\"')
            else:
                print('msgstr \"\"')

        print("Output to the path:", new_po_cat, output_path)
        c.dump_po(output_path, new_po_cat)
        # dd('DEBUG')
    except Exception as e:
        df.LOG(f'{e}', error=True)
    def refToDictItems(ref_list):
        ref_dict = {}
        ref: RefRecord = None
        interest_ref = [
            RefType.REF,
            RefType.DOC,
            RefType.GA,
            RefType.TERM,
        ]
        for ref in ref_list:
            # print(ref)
            type = ref.getOrigin().getRefType()
            first_ref = ref.getRefItemByIndex(0)
            ref_text = first_ref.getText()

            is_debug = ('Poor mans steadycam' in ref_text)
            if is_debug:
                dd('DEBUG')

            en_part = None
            vn_part = None
            d_dict = {}
            if type == RefType.MENUSELECTION:
                print(f'MENUSELECTION:{type}')
                text_list = cm.MENU_TEXT_REVERSE.findall(ref_text)
                length = len(text_list)
                i_index = 0
                for i in range(length):
                    tran = text_list[i_index]
                    if i_index + 1 < length:
                        orig = text_list[i_index + 1]
                    else:
                        print('ERROR: Orig is NOT THERE, use original')
                        orig = ref.getOrigin().getText()

                    entry = {orig: tran}
                    print(f'menu:{entry}')
                    d_dict.update(entry)
                    i_index += 2
                    if i_index >= length:
                        break

            elif type == RefType.ABBR:
                print(f'ABBR:{type}')
                text_list = cm.ABBREV_TEXT_REVERSE.findall(ref_text)
                abbr = text_list[0]
                defin = text_list[1]
                has_further_explanation = (': ' in defin)
                if has_further_explanation:
                    exp_list = defin.split(': ')
                    orig_part = exp_list[0]
                    further_exp = exp_list[1]
                    print(
                        f'abbr:{abbr}; orig_part:{orig_part}; further_exp:{further_exp}'
                    )

                    if abbr.isascii():
                        entry = {abbr: f'{orig_part}, {further_exp}'}
                    elif orig_part.isascii():
                        entry = {orig_part: f'{further_exp}, {abbr}'}
                    else:
                        entry = {further_exp: f'{orig_part}, {abbr}'}
                    d_dict.update(entry)
                else:
                    print(f'abbr:{abbr}; defin:{defin}')
                    if defin.isascii():
                        entry = {defin: abbr}
                    else:
                        entry = {abbr: defin}
                    d_dict.update(entry)

            elif type in interest_ref:
                print(f'GENERIC_REF:{type}')
                text_list = cm.REF_TEXT_REVERSE.findall(ref_text)
                has_text = (len(text_list) > 0)
                if not has_text:
                    origin_text = ref.getOrigin().getText()
                    print(f'ERROR: origin_text:{origin_text}')
                    # print(f'{text_list}, appeared to be empty!!!')
                else:
                    vn_part, en_part = text_list[0]
                    print(f'en_part:{en_part} vn_part:{vn_part}')
                    entry = {en_part: vn_part}
                    d_dict.update(entry)
            else:
                dd(f'{type} is not the type we are looking for.')
            ref_dict.update(d_dict)

        return_dict = cleanupLeadingTrailingPunct(d_dict)

        return return_dict