Ejemplo n.º 1
0
class Frame_aligner(Block):
    def __init__(self,
                 align_file_name="",
                 output_folder="",
                 output_name="",
                 obl_ratio_limit="0.5",
                 min_obl_inst_num=2,
                 **kwargs):
        """ overriden block method """
        super().__init__(**kwargs)

        self.align_file = None
        self.output_folder = output_folder
        self.output_name = output_name
        self.obl_ratio_limit = float(obl_ratio_limit)
        self.min_obl_inst_num = int(min_obl_inst_num)
        self.linker = Linker()  # !!!
        if align_file_name != "":
            try:
                self.align_file = open(align_file_name, 'r')
            except FileNotFoundError:
                #print( "Cesta: " + os.path.dirname(os.path.realpath(__file__)))
                print("ERROR: Alignment file " + align_file_name +
                      " not found.")
                exit()

        #self.a_and_b = 0
        #self.a_only = 0
        #self.b_only = 0
        #self.direction = 0 # 0 .. both, 1 .. a -> b, 2 .. b -> a

        # to be overloaded
        self.a_frame_extractor = Frame_extractor()
        self.b_frame_extractor = Frame_extractor()
        self.a_lang_mark = ""
        self.b_lang_mark = ""
        self.examiner = Modal_examiner()

    def process_bundle(self, bundle):  # void
        """ overriden Block method """
        #logging.info( "bundle id: " + str( bundle.bundle_id))
        a_frame_insts = []
        b_frame_insts = []
        for tree_root in bundle.trees:
            if tree_root.zone == self.a_lang_mark:
                a_frame_insts = \
                        self.a_frame_extractor.process_tree( tree_root)
                self.examiner.examine_sentence(tree_root, self.a_lang_mark)
            elif tree_root.zone == self.b_lang_mark:
                b_frame_insts = \
                        self.b_frame_extractor.process_tree( tree_root)
                self.examiner.examine_sentence(tree_root, self.b_lang_mark)
        word_alignments = self.align_file.readline().split()

        frame_pairs = self.linker.find_frame_pairs(a_frame_insts,
                                                   b_frame_insts,
                                                   word_alignments)
        print(len(frame_pairs))

        for frame_pair in frame_pairs:
            # linking frame types
            # if the frame type link does not exist yet, create one
            a_frame_type = frame_pair.a_frame_type
            b_frame_type = frame_pair.b_frame_type
            a_b_frame_type_link = a_frame_type.find_link_with(b_frame_type)
            # could be done the other way around: b_frame.find( frst_frame)
            if a_b_frame_type_link is None:
                a_b_frame_type_link = Frame_type_link(a_frame_type,
                                                      b_frame_type)
            # linking frame instances
            a_frame_inst = frame_pair.a_frame_inst
            b_frame_inst = frame_pair.b_frame_inst
            a_b_frame_type_link.link_frame_insts(a_frame_inst, b_frame_inst)

        # ADDED FOR MODALS, DELETE
        return a_frame_insts, b_frame_insts, frame_pairs

    def after_process_document(self, doc):  # void
        """ overriden block method """
        self.examiner.print_stats()
        a_dict_of_verbs = self.a_frame_extractor.get_dict_of_verbs()
        b_dict_of_verbs = self.b_frame_extractor.get_dict_of_verbs()
        #self.end_obl( a_dict_of_verbs)
        #self.end_obl( b_dict_of_verbs)
        #self._finalize_dictionary( a_dict_of_verbs, self.a_lang_mark)
        #self._finalize_dictionary( b_dict_of_verbs, self.b_lang_mark)
        #self._output_control()
        self._pickle_dict(a_dict_of_verbs, b_dict_of_verbs)
        #return
        #print( "=== pocty slovies ===")
        #print( self.a_lang_mark, len( self._a_dict_of_verbs))
        #print( self.b_lang_mark, len( self._b_dict_of_verbs))
        #print( self.a_and_b, self.a_only, self.b_only)
        super().after_process_document(doc)

    def end_obl(self, dict_of_verbs):
        suma = 0
        for vr in dict_of_verbs.values():
            for ft in vr.frame_types:
                for fta in ft.args:
                    if fta.deprel == "obl":
                        suma += len(fta.insts)
        print(suma)

    def _finalize_dictionary(self, dict_of_verbs, lang_mark):
        #obl_ratio_limit = 0#.5
        extraction_finalizer = \
            Extraction_finalizer( dict_of_verbs, self.obl_ratio_limit,
                                  self.min_obl_inst_num, lang_mark)
        for verb_record in dict_of_verbs.values():
            verb_record.finalize_extraction(extraction_finalizer)
        extraction_finalizer.finalize_extraction()

    def _pickle_dict(self, a_dict_of_verbs, b_dict_of_verbs):  # void
        """ called from after_process_document """
        a_b_dicts_of_verbs = a_dict_of_verbs, b_dict_of_verbs
        logging.info(sys.getrecursionlimit())
        logging.info(sys.getsizeof(a_b_dicts_of_verbs))
        sys.setrecursionlimit(50000)
        logging.info(sys.getrecursionlimit())
        #a_output_name = self.output_folder + self.a_lang_mark + \
        #        "_" + self.b_lang_mark + "_" + self.output_name
        #b_output_name = self.output_folder + self.b_lang_mark + \
        #        "_" + self.a_lang_mark + "_" + self.output_name
        a_b_output_name = self.output_folder + "_" + self.output_name
        pickle.dump(a_b_dicts_of_verbs, open(a_b_output_name, 'wb'))
        #pickle.dump( a_dict_of_verbs, open( a_output_name, 'wb'))
        #pickle.dump( b_dict_of_verbs, open( b_output_name, 'wb'))

    def _output_control(self):  # void
        """ called from after_process_document """
        a_dict_of_verbs = self.a_frame_extractor.dict_of_verbs
        for verb_record in list(a_dict_of_verbs.values()):
            for ft in verb_record.frame_types:
                for fi in ft.insts:
                    for fia in fi.args:
                        fial = fia.frame_inst_arg_link
                        print(fia.node.form, fial)