Exemple #1
0
    def get_verse_alignment_mp(self, verse_nums, edition_pairs):
        res = []
        ps_lang, pt_lang, index_t, alignments = None, None, None, None  # if we have multiple edition pairs of the same languages, we use prev loaded files!
        for edition_1, edition_2 in edition_pairs:
            aligns = {}

            if self.get_lang_from_edition(
                    edition_1) == self.get_lang_from_edition(edition_2):
                res.append((edition_1, edition_2, aligns))
                continue

            if edition_1 in self.bert_files and edition_2 in self.bert_files:
                LOG.info("going to super aglingment for: {}, {}".format(
                    edition_1, edition_2))
                res.append((edition_1, edition_2, super().get_verse_alignment(
                    verse_nums, self.lang_prf_map[edition_1],
                    self.lang_prf_map[edition_2])))
                continue

            LOG.info("getting eflomal aglingment for: {} , {}".format(
                edition_1, edition_2))
            s_lang, t_lang, s_edition, t_edition = self.get_ordered_editions(
                edition_1, edition_2)
            s_lang_file = self.edition_file_mapping[s_edition]
            t_lang_file = self.edition_file_mapping[t_edition]

            revert = False
            if s_edition == edition_2:
                revert = True

            if s_lang != ps_lang or t_lang != pt_lang:
                alignments = self.get_alignment(s_lang, t_lang)
                index_t = self.get_index(s_lang, t_lang)
                ps_lang, pt_lang = s_lang, t_lang

            index = None
            if s_lang_file in index_t:
                if t_lang_file in index_t[s_lang_file]:
                    index = index_t[s_lang_file][t_lang_file]

            if index is not None:

                LOG.info(
                    "getting verse, {}, {}, {}, {}, {}, {}, {}, {}".format(
                        edition_1, edition_2, s_lang, t_lang, ps_lang, pt_lang,
                        len(index_t), len(index)))
                for verse in verse_nums:
                    if verse in index:
                        aligns[verse] = self.create_ordered_alignment(
                            alignments, index[verse], revert)
                LOG.info("verses got")

            else:
                LOG.warning("couldn't find index for: " + s_edition + ", " +
                            t_edition)

            res.append((edition_1, edition_2, aligns))
        return res
Exemple #2
0
 def read_langs_order_file(self):
     res = []
     try:
         with open(self.lang_order_file_path, 'r') as inf:
             for l in inf:
                 res.append(l.strip())
     except FileNotFoundError as e:
         LOG.warning("Langs order file not found")
     return res
Exemple #3
0
    def read_dict_file(self, file_path, do_lower=False):
        res = {}
        try:
            with open(file_path, "r") as mapping_list:
                for l in mapping_list:
                    if l.startswith('#'):
                        continue

                    if do_lower:
                        l.lower()

                    pair = l.strip().split('\t')

                    res[pair[0].strip()] = pair[1].strip()
        except FileNotFoundError:
            LOG.warning(f"file {file_path} not found")
        return res