def construct_model(args): kg = KnowledgeGraph(args) if args.model.endswith('.gc'): kg.load_fuzzy_facts() fn = ConvE(args, kg.num_entities) lf = EmbeddingBasedMethod(args, kg, fn) return lf
def construct_model(args): """ Construct NN graph. """ kg = KnowledgeGraph(args) if args.model.endswith('.gc'): kg.load_fuzzy_facts() if args.model in ['point', 'point.gc']: pn = GraphSearchPolicy(args) lf = PolicyGradient(args, kg, pn) elif args.model.startswith('point.rs'): fn_model = args.model.split('.')[2] fn_args = copy.deepcopy(args) fn_args.model = fn_model fn_args.relation_only = False if fn_model == 'complex': fn = ComplEx(fn_args) fn_kg = KnowledgeGraph(fn_args) elif fn_model == 'distmult': fn = DistMult(fn_args) fn_kg = KnowledgeGraph(fn_args) elif fn_model == 'conve': fn = ConvE(fn_args, kg.num_entities) fn_kg = KnowledgeGraph(fn_args) elif fn_model == 'tucker': fn = TuckER(fn_args, kg.num_entities) fn_kg = KnowledgeGraph(fn_args) elif fn_model == 'PTransE': fn = PTransE(fn_args) fn_kg = KnowledgeGraph(fn_args) pn = GraphSearchPolicy(args, fn_kg=fn_kg, fn=fn) lf = RewardShapingPolicyGradient(args, kg, pn, fn_kg, fn) elif args.model == 'complex': fn = ComplEx(args) lf = EmbeddingBasedMethod(args, kg, fn) elif args.model == 'distmult': fn = DistMult(args) lf = EmbeddingBasedMethod(args, kg, fn) elif args.model == 'tucker': fn = TuckER(args, kg.num_entities) lf = EmbeddingBasedMethod(args, kg, fn) elif args.model == 'conve': fn = ConvE(args, kg.num_entities) lf = EmbeddingBasedMethod(args, kg, fn) elif args.model == 'TransE': fn = TransE(args) lf = EmbeddingBasedMethod(args, kg, fn) elif args.model == 'PTransE': fn = PTransE(args) lf = EmbeddingBasedMethod(args, kg, fn) else: raise NotImplementedError return lf
def construct_model(args): """ Construct NN graph. """ kg = KnowledgeGraph(args) if args.model.startswith('point.fusion'): pn = GraphSearchPolicy(args) fn_model = args.model.split('.')[2] fn_args = copy.deepcopy(args) fn_args.model = fn_model fn_args.relation_only = False if fn_model == 'conve': fn = ConvE(fn_args, kg.num_entities) fn_kg = KnowledgeGraph(fn_args) lf = RewardShapingPolicyGradient(args, kg, pn, fn_kg, fn) return lf
def update_path(self, action: Action, kg: KnowledgeGraph, offset=None): """ Once an action was selected, update the action history. :param action (r, e): (Variable:batch) indices of the most recent action - r is the most recently traversed edge; - e is the destination entity. :param offset: (Variable:batch) if None, adjust path history with the given offset, used for search :param KG: Knowledge graph environment. """ def offset_path_history(p, offset): for i, x in enumerate(p): if type(x) is tuple: new_tuple = tuple([_x[:, offset, :] for _x in x]) p[i] = new_tuple else: p[i] = x[offset, :] # update action history if self.relation_only_in_path: action_embedding = kg.get_relation_embeddings(action.rel) else: action_embedding = self.get_action_embedding(action, kg) if offset is not None: offset_path_history(self.path, offset) self.path.append( self.path_encoder(action_embedding.unsqueeze(1), self.path[-1])[1])
def get_action_embedding(self, action: Action, kg: KnowledgeGraph): """ Return (batch) action embedding which is the concatenation of the embeddings of the traversed edge and the target node. :param action (r, e): (Variable:batch) indices of the most recent action - r is the most recently traversed edge - e is the destination entity. :param kg: Knowledge graph enviroment. """ relation_embedding = kg.get_relation_embeddings(action.rel) if self.relation_only: action_embedding = relation_embedding else: entity_embedding = kg.get_entity_embeddings(action.ent) action_embedding = torch.cat( [relation_embedding, entity_embedding], dim=-1) return action_embedding
def construct_model(args): """ Construct NN graph. """ kg = KnowledgeGraph(args) if args.model.endswith('.gc'): kg.load_fuzzy_facts() if args.model in ['point', 'point.gc']: pn = GraphSearchPolicy(args) parameters = pn.named_parameters() for name, value in parameters: print('parameter: {} | size: {}'.format(name, value.size())) # print('pn parameters: {}'.format(pn.named_parameters())) #print(args.device_ids) #print(type(args.device_ids[0])) #lf = nn.DataParallel(PolicyGradient(args, kg, pn), device_ids=args.device_ids) lf = PolicyGradient(args, kg, pn) elif args.model.startswith('point.rs'): pn = GraphSearchPolicy(args) fn_model = args.model.split('.')[2] fn_args = copy.deepcopy(args) fn_args.model = fn_model fn_args.relation_only = False if fn_model == 'complex': fn = ComplEx(fn_args) fn_kg = KnowledgeGraph(fn_args) elif fn_model == 'distmult': fn = DistMult(fn_args) fn_kg = KnowledgeGraph(fn_args) elif fn_model == 'conve': fn = ConvE(fn_args, kg.num_entities) fn_kg = KnowledgeGraph(fn_args) elif fn_model == 'cpg-conve': fn = CPG_ConvE(fn_args, kg.num_entities) fn_kg = KnowledgeGraph(fn_args) lf = RewardShapingPolicyGradient(args, kg, pn, fn_kg, fn) elif args.model == 'complex': fn = ComplEx(args) lf = EmbeddingBasedMethod(args, kg, fn) elif args.model == 'distmult': fn = DistMult(args) lf = EmbeddingBasedMethod(args, kg, fn) elif args.model == 'conve': fn = ConvE(args, kg.num_entities) lf = EmbeddingBasedMethod(args, kg, fn) elif args.model == 'cpg-conve': fn = CPG_ConvE(args, kg.num_entities) lf = EmbeddingBasedMethod(args, kg, fn) else: raise NotImplementedError return lf
def get_ground_truth_edge_mask(self, current_nodes, r_space, e_space, obs: Observation, kg: KnowledgeGraph): s_e = obs.source_entity t_e = obs.target_entity q = obs.query_relation def build_mask(source_nodes, target_nodes, relation): return ((current_nodes == source_nodes).unsqueeze(1) * (r_space == relation.unsqueeze(1)) * (e_space == target_nodes.unsqueeze(1))) mask = build_mask(s_e, t_e, q) inv_q = kg.get_inv_relation_id(q) inv_mask = build_mask(t_e, s_e, inv_q) return ((mask + inv_mask) * (s_e.unsqueeze(1) != kg.dummy_e)).float()
def initialize_path(self, action: Action, kg: KnowledgeGraph): # [batch_size, action_dim] if self.relation_only_in_path: init_action_embedding = kg.get_relation_embeddings(action.rel) else: init_action_embedding = self.get_action_embedding(action, kg) init_action_embedding.unsqueeze_(1) # [num_layers, batch_size, dim] init_h = zeros_var_cuda([ self.history_num_layers, len(init_action_embedding), self.history_dim ]) init_c = zeros_var_cuda([ self.history_num_layers, len(init_action_embedding), self.history_dim ]) self.path = [ self.path_encoder(init_action_embedding, (init_h, init_c))[1] ]
def construct_model(args): """ Construct NN graph. """ kg = KnowledgeGraph(args) # NOTE: initialize a KG instance if args.model.endswith( ".gc"): # CAVEAT: not sure what model needs fuzzy facts kg.load_fuzzy_facts() if args.model in ["point", "point.gc"]: pn = GraphSearchPolicy(args) lf = PolicyGradient(args, kg, pn) elif args.model.startswith("point.rs"): pn = GraphSearchPolicy(args) fn_model = args.model.split(".")[-1] fn_args = copy.deepcopy(args) fn_args.model = fn_model fn_args.relation_only = False if fn_model == "complex": fn = ComplEx(fn_args) fn_kg = KnowledgeGraph(fn_args) elif fn_model == "distmult": fn = DistMult(fn_args) fn_kg = KnowledgeGraph(fn_args) elif fn_model == "conve": fn = ConvE(fn_args, kg.num_entities) fn_kg = KnowledgeGraph(fn_args) lf = RewardShapingPolicyGradient(args, kg, pn, fn_kg, fn) elif args.model == "complex": fn = ComplEx(args) lf = EmbeddingBasedMethod(args, kg, fn) elif args.model == "distmult": print("jxtu: embedding model: distmult") fn = DistMult(args) lf = EmbeddingBasedMethod( args, kg, fn) # NOTE: embedding-based learning framework elif args.model == "conve": fn = ConvE(args, kg.num_entities) lf = EmbeddingBasedMethod(args, kg, fn) elif args.model == "TransE": fn = TransE(args) lf = EmbeddingBasedMethod(args, kg, fn) else: raise NotImplementedError return lf
neptune.append_tag('pytorch') if args.gpu: neptune.append_tag('gpu') if args.use_proteins: neptune.append_tag('proteins') if args.reversed: neptune.append_tag('reversed') neptune.append_tag('real data') neptune.append_tag('trivec') use_cuda = args.gpu and torch.cuda.is_available() device = torch.device("cuda" if args.gpu else "cpu") print(f'Use device: {device}') kg = KnowledgeGraph(data_path=DATA_CONST['work_dir'], use_proteins=args.use_proteins, use_proteins_on_validation=False, use_reversed_edges=args.reversed) # Pos loaders train_pos_loader = data.DataLoader(torch.Tensor( np.array(kg.get_data_by_type('train'))), batch_size=parameters['batch_size'], shuffle=True) val_pos_loader = data.DataLoader(torch.Tensor( np.array(kg.get_data_by_type('val'))), batch_size=parameters['batch_size'], shuffle=False) test_pos_loader = data.DataLoader(torch.Tensor( np.array(kg.get_data_by_type('test'))),
parameters['embed_dim'] = args.embed_dim, parameters['epoch'] = args.epoch parameters['learning_rate'] = args.learning_rate parameters['regularization'] = args.regularization parameters['use_proteins'] = args.use_proteins parameters['reversed'] = args.reversed parameters['metrics_separately'] = args.metrics_separately parameters['random_val_neg_sampler'] = args.random_val_neg_sampler parameters['val_regenerate'] = args.val_regenerate use_cuda = args.gpu and torch.cuda.is_available() device = torch.device("cuda" if args.gpu else "cpu") print(f'Use device: {device}') kg = KnowledgeGraph(data_path=DATA_CONST['work_dir'], use_proteins=args.use_proteins, use_proteins_on_validation=False, use_reversed_edges=args.reversed) model = TriVec(ent_total=kg.get_num_of_ent('train'), rel_total=kg.get_num_of_rel('train')) model = model.to(device) loss_func = NegativeSoftPlusLoss() print('Test') checkpoint = torch.load(args.model_path) model.load_state_dict(checkpoint['model_state_dict']) switch_grad_mode(model, requires_grad=False) switch_model_mode(model, train=False) model.eval() test_trivec_model('seen') print("===========================")
def get_action_space_in_buckets( self, current_entity: torch.Tensor, obs: Observation, kg: KnowledgeGraph, collapse_entities=False, ): """ To compute the search operation in batch, we group the action spaces of different states (i.e. the set of outgoing edges of different nodes) into buckets based on their sizes to save the memory consumption of paddings. For example, in large knowledge graphs, certain nodes may have thousands of outgoing edges while a long tail of nodes only have a small amount of outgoing edges. If a batch contains a node with 1000 outgoing edges while the rest of the nodes have a maximum of 5 outgoing edges, we need to pad the action spaces of all nodes to 1000, which consumes lots of memory. With the bucketing approach, each bucket is padded separately. In this case the node with 1000 outgoing edges will be in its own bucket and the rest of the nodes will suffer little from padding the action space to 5. Once we grouped the action spaces in buckets, the policy network computation is carried out for every bucket iteratively. Once all the computation is done, we concatenate the results of all buckets and restore their original order in the batch. The computation outside the policy network module is thus unaffected. :return db_action_spaces: [((r_space_b0, r_space_b0), action_mask_b0), ((r_space_b1, r_space_b1), action_mask_b1), ... ((r_space_bn, r_space_bn), action_mask_bn)] A list of action space tensor representations grouped in n buckets, s.t. r_space_b0.size(0) + r_space_b1.size(0) + ... + r_space_bn.size(0) = e.size(0) :return db_references: [l_batch_refs0, l_batch_refs1, ..., l_batch_refsn] l_batch_refsi stores the indices of the examples in bucket i in the current batch, which is used later to restore the output results to the original order. """ db_action_spaces, db_references = [], [] assert not collapse_entities # NotImplementedError bucket_ids, inbucket_ids = kg.get_bucket_and_inbucket_ids( current_entity) for b_key in set(bucket_ids.tolist()): inthisbucket_indices = (torch.nonzero( bucket_ids.eq(b_key)).squeeze().tolist()) if not isinstance(inthisbucket_indices, list): # TODO(tilo) wtf! inthisbucket_indices = [inthisbucket_indices] inbucket_ids_of_entities_inthisbucket = inbucket_ids[ inthisbucket_indices].tolist() bucket_action_space = kg.bucketid2ActionSpace[b_key] e_b = current_entity[inthisbucket_indices] obs_b = obs.get_slice(inthisbucket_indices) as_bucket = bucket_action_space.get_slice( inbucket_ids_of_entities_inthisbucket) action_mask = self.apply_action_masks(as_bucket, e_b, obs_b, kg) action_space_b = ActionSpace(as_bucket.forks, as_bucket.r_space, as_bucket.e_space, action_mask) db_action_spaces.append(action_space_b) db_references.append(inthisbucket_indices) return db_action_spaces, db_references