예제 #1
0
    def __init__(self, model: Model, training_set_path=None, test_set_path=None, nodes_path=None, mappings_avail=False):
        self.model = model
        if training_set_path:
            self.training_examples = pandas.read_csv(training_set_path, sep="\t", names=globConst.COL_NAMES_SAMPLES)
        else:
            self.training_examples = pandas.DataFrame(columns=globConst.COL_NAMES_SAMPLES)
        if test_set_path:
            self.test_examples = pandas.read_csv(test_set_path, sep="\t", names=globConst.COL_NAMES_SAMPLES)
        else:
            self.test_examples = pandas.DataFrame(columns=globConst.COL_NAMES_SAMPLES)
        if nodes_path is not None:
            self.nodes = pandas.read_csv(nodes_path, sep="\t", names=globConst.COL_NAMES_NODES)
        else:
            self.nodes = None

        if not mappings_avail:
            relation_labels = set()
            relation_labels.update(set(self.test_examples[globConst.EDGE_TYPE_COL_NAME]))
            relation_labels.update(set(self.training_examples[globConst.EDGE_TYPE_COL_NAME]))

            self.node_label_to_id = None
            self.node_types_to_id = None
            self.relation_label_to_id = utils.create_mappings(relation_labels)
            if self.nodes is not None:
                self.node_label_to_id = utils.create_mappings(np.unique(self.nodes.values[:, 0]))
                self.node_types_to_id = utils.create_mappings(np.unique(self.nodes.values[:, 1]))
            else:
                node_labels = set()
                node_labels.update(set(self.test_examples[globConst.NODE1_ID_COL_NAME]))
                node_labels.update(set(self.test_examples[globConst.NODE2_ID_COL_NAME]))
                node_labels.update(set(self.training_examples[globConst.NODE1_ID_COL_NAME]))
                node_labels.update(set(self.training_examples[globConst.NODE2_ID_COL_NAME]))
                self.node_label_to_id = utils.create_mappings(node_labels)

            # output mappings
            io.write_mappings(
                node_label_to_id=self.node_label_to_id,
                node_types_to_id=self.node_types_to_id,
                relation_label_to_id=self.relation_label_to_id,
            )

        else:
            # testme
            output_directory = os.path.join(
                os.path.join(globConst.WORKING_DIR, evalConst.EVAL_OUTPUT_FOLDER_NAME), evalConst.MODEL_DIR
            )
            self.node_label_to_id = io.read_mapping(
                os.path.join(output_directory, evalConst.MODEL_ENTITY_NAME_MAPPING_NAME)
            )
            node_type_path = os.path.join(output_directory, evalConst.MODEL_ENTITY_TYPE_MAPPING_NAME)
            if os.path.exists(node_type_path):
                self.node_types_to_id = io.read_mapping(node_type_path)
            elif self.nodes is not None:
                self.node_types_to_id = utils.create_mappings(np.unique(self.nodes.values[:, 1]))
                io.write_mappings(node_types_to_id=self.node_types_to_id)
            else:
                pass  # fixme error nodes must be provided when eval
            self.relation_label_to_id = io.read_mapping(
                os.path.join(output_directory, evalConst.MODEL_RELATION_TYPE_MAPPING_NAME)
            )
예제 #2
0
 def create_mappings(self, relations, node_types, node_labels=None):
     self.node_label_to_id = utils.create_mappings(node_labels)
     self.node_types_to_id = utils.create_mappings(node_types)
     self.relation_label_to_id = utils.create_mappings(relations)