def get_verse_alignment_mp(self, verse_nums, edition_pairs): res = [] ps_lang, pt_lang, index_t, alignments = None, None, None, None # if we have multiple edition pairs of the same languages, we use prev loaded files! for edition_1, edition_2 in edition_pairs: aligns = {} if self.get_lang_from_edition( edition_1) == self.get_lang_from_edition(edition_2): res.append((edition_1, edition_2, aligns)) continue if edition_1 in self.bert_files and edition_2 in self.bert_files: LOG.info("going to super aglingment for: {}, {}".format( edition_1, edition_2)) res.append((edition_1, edition_2, super().get_verse_alignment( verse_nums, self.lang_prf_map[edition_1], self.lang_prf_map[edition_2]))) continue LOG.info("getting eflomal aglingment for: {} , {}".format( edition_1, edition_2)) s_lang, t_lang, s_edition, t_edition = self.get_ordered_editions( edition_1, edition_2) s_lang_file = self.edition_file_mapping[s_edition] t_lang_file = self.edition_file_mapping[t_edition] revert = False if s_edition == edition_2: revert = True if s_lang != ps_lang or t_lang != pt_lang: alignments = self.get_alignment(s_lang, t_lang) index_t = self.get_index(s_lang, t_lang) ps_lang, pt_lang = s_lang, t_lang index = None if s_lang_file in index_t: if t_lang_file in index_t[s_lang_file]: index = index_t[s_lang_file][t_lang_file] if index is not None: LOG.info( "getting verse, {}, {}, {}, {}, {}, {}, {}, {}".format( edition_1, edition_2, s_lang, t_lang, ps_lang, pt_lang, len(index_t), len(index))) for verse in verse_nums: if verse in index: aligns[verse] = self.create_ordered_alignment( alignments, index[verse], revert) LOG.info("verses got") else: LOG.warning("couldn't find index for: " + s_edition + ", " + t_edition) res.append((edition_1, edition_2, aligns)) return res
def read_langs_order_file(self): res = [] try: with open(self.lang_order_file_path, 'r') as inf: for l in inf: res.append(l.strip()) except FileNotFoundError as e: LOG.warning("Langs order file not found") return res
def read_dict_file(self, file_path, do_lower=False): res = {} try: with open(file_path, "r") as mapping_list: for l in mapping_list: if l.startswith('#'): continue if do_lower: l.lower() pair = l.strip().split('\t') res[pair[0].strip()] = pair[1].strip() except FileNotFoundError: LOG.warning(f"file {file_path} not found") return res