def __init__(self, model: Model, training_set_path=None, test_set_path=None, nodes_path=None, mappings_avail=False): self.model = model if training_set_path: self.training_examples = pandas.read_csv(training_set_path, sep="\t", names=globConst.COL_NAMES_SAMPLES) else: self.training_examples = pandas.DataFrame(columns=globConst.COL_NAMES_SAMPLES) if test_set_path: self.test_examples = pandas.read_csv(test_set_path, sep="\t", names=globConst.COL_NAMES_SAMPLES) else: self.test_examples = pandas.DataFrame(columns=globConst.COL_NAMES_SAMPLES) if nodes_path is not None: self.nodes = pandas.read_csv(nodes_path, sep="\t", names=globConst.COL_NAMES_NODES) else: self.nodes = None if not mappings_avail: relation_labels = set() relation_labels.update(set(self.test_examples[globConst.EDGE_TYPE_COL_NAME])) relation_labels.update(set(self.training_examples[globConst.EDGE_TYPE_COL_NAME])) self.node_label_to_id = None self.node_types_to_id = None self.relation_label_to_id = utils.create_mappings(relation_labels) if self.nodes is not None: self.node_label_to_id = utils.create_mappings(np.unique(self.nodes.values[:, 0])) self.node_types_to_id = utils.create_mappings(np.unique(self.nodes.values[:, 1])) else: node_labels = set() node_labels.update(set(self.test_examples[globConst.NODE1_ID_COL_NAME])) node_labels.update(set(self.test_examples[globConst.NODE2_ID_COL_NAME])) node_labels.update(set(self.training_examples[globConst.NODE1_ID_COL_NAME])) node_labels.update(set(self.training_examples[globConst.NODE2_ID_COL_NAME])) self.node_label_to_id = utils.create_mappings(node_labels) # output mappings io.write_mappings( node_label_to_id=self.node_label_to_id, node_types_to_id=self.node_types_to_id, relation_label_to_id=self.relation_label_to_id, ) else: # testme output_directory = os.path.join( os.path.join(globConst.WORKING_DIR, evalConst.EVAL_OUTPUT_FOLDER_NAME), evalConst.MODEL_DIR ) self.node_label_to_id = io.read_mapping( os.path.join(output_directory, evalConst.MODEL_ENTITY_NAME_MAPPING_NAME) ) node_type_path = os.path.join(output_directory, evalConst.MODEL_ENTITY_TYPE_MAPPING_NAME) if os.path.exists(node_type_path): self.node_types_to_id = io.read_mapping(node_type_path) elif self.nodes is not None: self.node_types_to_id = utils.create_mappings(np.unique(self.nodes.values[:, 1])) io.write_mappings(node_types_to_id=self.node_types_to_id) else: pass # fixme error nodes must be provided when eval self.relation_label_to_id = io.read_mapping( os.path.join(output_directory, evalConst.MODEL_RELATION_TYPE_MAPPING_NAME) )
def create_mappings(self, relations, node_types, node_labels=None): self.node_label_to_id = utils.create_mappings(node_labels) self.node_types_to_id = utils.create_mappings(node_types) self.relation_label_to_id = utils.create_mappings(relations)