Beispiel #1
0
 def recategorize_file(self, file_path):
     for i, amr in enumerate(AMRIO.read(file_path), 1):
         self.recategorize_graph(amr)
         yield amr
         if i % 1000 == 0:
             logger.info('Processed {} examples.'.format(i))
     logger.info('Done.\n')
Beispiel #2
0
 def _get_senseless_node_counter(amr_train_files):
     logger.info('Building the senseless node counter.')
     sense_less_nodes = []
     for amr_file in amr_train_files:
         for amr in AMRIO.read(amr_file):
             for node in amr.graph.get_nodes():
                 for attr, value in node.get_senseless_attributes():
                     sense_less_nodes.append(value)
     return Counter(sense_less_nodes)
Beispiel #3
0
 def _update_counter_from_train_files(self, amr_train_files, base_freq=1):
     logger.info('Updating (lemma, frame) counter from AMR train files.')
     for file_path in amr_train_files:
         for amr in AMRIO.read(file_path):
             for node in amr.graph.get_nodes():
                 for _, frame in node.get_frame_attributes():
                     frame_lemma = re.sub(WORDSENSE_RE, '', frame)
                     self._update_counter(self.lemma_frame_counter,
                                          frame_lemma, frame, base_freq)
                     self._update_counter(self.frame_lemma_counter, frame,
                                          frame_lemma, base_freq)
Beispiel #4
0
    def from_json(cls, file_path: str) -> 'TextAnonymizor':
        with open(file_path, encoding="utf-8") as f:
            d = json.load(f)
        return cls(
            text_maps=d["text_maps"],
            priority_lists=d["priority_lists"],
            _VNE=d["VNE"],
            _LOCEN1=d["LOCEN1"],
            _LOCEN2=d["LOCEN2"],
            _N=d["N"],
            _M=d["M"],
            _R=d["R"],
            _INVP=d["INVP"],
            _INVS=d["INVS"],
        )


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser("text_anonymizor.py")
    parser.add_argument('--amr_file', required=True, help="File to anonymize.")
    parser.add_argument('--util_dir')
    args = parser.parse_args()
    text_anonymizor = TextAnonymizor.from_json(
        os.path.join(args.util_dir, "text_anonymization_rules.json"))
    with open(args.amr_file + ".recategorize", "w", encoding="utf-8") as f:
        for amr in AMRIO.read(args.amr_file):
            amr.abstract_map = text_anonymizor(amr)
            f.write(str(amr) + "\n\n")
Beispiel #5
0
    for key, value in type_counter.items():
        max_ner = None
        max_ner_count = None
        for ner, count in value.items():
            if max_ner is None:
                max_ner = ner
                max_ner_count = count
                continue
            if count > max_ner_count:
                max_ner = ner
                max_ner_count = count
        type_ner_mapper[key] = max_ner

    for file_path in args.amr_files:
        for amr in AMRIO.read(file_path):
            for key, value in amr.abstract_map.items():
                value_type = value["type"]
                value_span = value["span"]

                if len(value_span) <= 1:
                    continue

                if (value_type == 'named-entity' or value_type == 'ordinal-entity') \
                        and distance(value_span, value["ops"]) > 3:
                    continue

                value["ner"] = type_ner_mapper.get(
                    value_span.lower(), {
                        "named-entity": "PERSON",
                        "url-entity": "URL",
Beispiel #6
0
 def restore_file(self, file_path):
     for amr in AMRIO.read(file_path):
         self.restore_instance(amr)
         yield amr