def run_kelpie(train_samples): print("Wrapping the original model in a Kelpie explainable model...") # use model_to_explain to initialize the Kelpie model kelpie_model = KelpieComplEx(model=original_model, dataset=kelpie_dataset, init_size=1e-3) # type: KelpieComplEx kelpie_model.to('cuda') ########### BUILD THE OPTIMIZER AND RUN POST-TRAINING print("Running post-training on the Kelpie model...") optimizer = KelpieMultiClassNLLptimizer(model=kelpie_model, optimizer_name=args.optimizer, batch_size=args.batch_size, learning_rate=args.learning_rate, decay1=args.decay1, decay2=args.decay2, regularizer_name="N3", regularizer_weight=args.reg) optimizer.train(train_samples=train_samples, max_epochs=args.max_epochs) ########### EXTRACT RESULTS print("\nExtracting results...") kelpie_entity_id = kelpie_dataset.kelpie_entity_id kelpie_sample_tuple = (kelpie_entity_id, relation_id, tail_id) if args.perspective == "head" else (head_id, relation_id, kelpie_entity_id) kelpie_sample = numpy.array(kelpie_sample_tuple) ### Evaluation on original entity # Kelpie model on original fact scores, ranks, predictions = kelpie_model.predict_sample(sample=original_sample, original_mode=True) original_direct_score, original_inverse_score = scores[0], scores[1] original_head_rank, original_tail_rank = ranks[0], ranks[1] print("\nKelpie model on the original test fact: <%s, %s, %s>" % original_triple) print("\tDirect fact score: %f; Inverse fact score: %f" % (original_direct_score, original_inverse_score)) print("\tHead Rank: %f" % original_head_rank) print("\tTail Rank: %f" % original_tail_rank) # Kelpie model on all facts containing the original entity print("\nKelpie model on all test facts containing the original entity:") mrr, h1 = KelpieEvaluator(kelpie_model).eval(samples=original_test_samples, original_mode=True) print("\tMRR: %f\n\tH@1: %f" % (mrr, h1)) ### Evaluation on kelpie entity # results on kelpie fact scores, ranks, _ = kelpie_model.predict_sample(sample=kelpie_sample, original_mode=False) kelpie_direct_score, kelpie_inverse_score = scores[0], scores[1] kelpie_head_rank, kelpie_tail_rank = ranks[0], ranks[1] print("\nKelpie model on the Kelpie test fact: <%s, %s, %s>" % kelpie_sample_tuple) print("\tDirect fact score: %f; Inverse fact score: %f" % (kelpie_direct_score, kelpie_inverse_score)) print("\tHead Rank: %f" % kelpie_head_rank) print("\tTail Rank: %f" % kelpie_tail_rank) # results on all facts containing the kelpie entity print("\nKelpie model on all test facts containing the Kelpie entity:") mrr, h1 = KelpieEvaluator(kelpie_model).eval(samples=kelpie_test_samples, original_mode=False) print("\tMRR: %f\n\tH@1: %f" % (mrr, h1)) return kelpie_direct_score, kelpie_inverse_score, kelpie_head_rank, kelpie_tail_rank
# check that the fact to explain is actually a test fact assert (original_sample in complex_dataset.test_samples) kelpie_dataset = KelpieDataset(dataset=complex_dataset, entity_id=original_entity_id) kelpie_entity_id = kelpie_dataset.kelpie_entity_id kelpie_triple = (kelpie_entity_id, relation_id, tail_id) if perspective == 'head' else (head_id, relation_id, kelpie_entity_id) kelpie_sample = numpy.array(kelpie_triple) print("Wrapping the original model in a Kelpie model...") kelpie_model = KelpieComplEx(dataset=kelpie_dataset, model=original_model, init_size=1e-3) # type: KelpieComplEx kelpie_model.to('cuda') print("Running post-training on the Kelpie model...") optimizer = KelpieMultiClassNLLptimizer( model=kelpie_model, optimizer_name=optimizer_name, batch_size=batch_size, learning_rate=learning_rate, decay1=decay1, decay2=decay2, regularizer_name=regularizer_name, regularizer_weight=regularizer_weight) optimizer.train(train_samples=kelpie_dataset.kelpie_train_samples,
# check that the fact to explain is actually a test fact assert(original_sample in original_dataset.test_samples) ############# INITIALIZE MODELS AND THEIR STRUCTURES print("Loading model at location %s..." % args.model_path) # instantiate and load the original model from filesystem original_model = ComplEx(dataset=original_dataset, dimension=args.dimension, init_random=True, init_size=args.init) # type: ComplEx original_model.load_state_dict(torch.load(args.model_path)) original_model.to('cuda') print("Wrapping the original model in a Kelpie explainable model...") # use model_to_explain to initialize the Kelpie model kelpie_dataset = KelpieDataset(dataset=original_dataset, entity_id=original_entity_id) kelpie_model = KelpieComplEx(model=original_model, dataset=kelpie_dataset, init_size=1e-3) # type: KelpieComplEx kelpie_model.to('cuda') ############ EXTRACT TEST FACTS AND TRAINING FACTS print("Extracting train and test samples for the original and the kelpie entities...") # extract all training facts and test facts involving the entity to explain # and replace the id of the entity to explain with the id of the fake kelpie entity original_entity_test_samples = kelpie_dataset.original_test_samples kelpie_test_samples = kelpie_dataset.kelpie_test_samples ########### BUILD THE OPTIMIZER AND RUN POST-TRAINING print("Running post-training on the Kelpie model...")
complex_dataset.get_id_for_entity_name(tail) original_entity_id = head_id if perspective == 'head' else tail_id original_triple = (head_id, relation_id, tail_id) original_sample = numpy.array(original_triple) # check that the fact to explain is actually a test fact assert(original_sample in complex_dataset.test_samples) kelpie_dataset = KelpieDataset(dataset=complex_dataset, entity_id=original_entity_id) kelpie_entity_id = kelpie_dataset.kelpie_entity_id kelpie_triple = (kelpie_entity_id, relation_id, tail_id) if perspective == 'head' else (head_id, relation_id, kelpie_entity_id) kelpie_sample = numpy.array(kelpie_triple) print("Wrapping the original model in a Kelpie model...") kelpie_model = KelpieComplEx(dataset=kelpie_dataset, model=original_model, init_size=1e-3) # type: KelpieComplEx kelpie_model.to('cuda') print("Running post-training on the Kelpie model...") optimizer = KelpieMultiClassNLLptimizer(model=kelpie_model, optimizer_name=optimizer_name, batch_size=batch_size, learning_rate=learning_rate, decay1=decay1, decay2=decay2, regularizer_name=regularizer_name, regularizer_weight=regularizer_weight) optimizer.train(train_samples=kelpie_dataset.kelpie_train_samples, max_epochs=max_epochs) original_direct_samples = numpy.vstack((kelpie_dataset.original_train_samples,
def rbo(original_model: ComplEx, kelpie_model: KelpieComplEx, original_samples: numpy.array, kelpie_samples: numpy.array): _, original_ranks, original_predictions = original_model.predict_samples( original_samples) _, kelpie_ranks, kelpie_predictions = kelpie_model.predict_samples( samples=kelpie_samples, original_mode=False) all_original_ranks = [] for (a, b) in original_ranks: all_original_ranks.append(a) all_original_ranks.append(b) all_kelpie_ranks = [] for (a, b) in kelpie_ranks: all_kelpie_ranks.append(a) all_kelpie_ranks.append(b) original_mrr = mrr(all_original_ranks) kelpie_mrr = mrr(all_kelpie_ranks) original_h1 = hits_k(all_original_ranks, 1) kelpie_h1 = hits_k(all_kelpie_ranks, 1) rbos = [] for i in range(len(original_samples)): _original_sample = original_samples[i] _kelpie_sample = kelpie_samples[i] original_target_head, _, original_target_tail = _original_sample kelpie_target_head, _, kelpie_target_tail = _kelpie_sample original_target_head_index, original_target_tail_index = int( original_ranks[i][0] - 1), int(original_ranks[i][1] - 1) kelpie_target_head_index, kelpie_target_tail_index = int( kelpie_ranks[i][0] - 1), int(kelpie_ranks[i][1] - 1) # get head and tail predictions original_head_predictions = original_predictions[i][0] kelpie_head_predictions = kelpie_predictions[i][0] original_tail_predictions = original_predictions[i][1] kelpie_tail_predictions = kelpie_predictions[i][1] assert original_head_predictions[ original_target_head_index] == original_target_head assert kelpie_head_predictions[ kelpie_target_head_index] == kelpie_target_head assert original_tail_predictions[ original_target_tail_index] == original_target_tail assert kelpie_tail_predictions[ kelpie_target_tail_index] == kelpie_target_tail # replace the target head id with the same value (-1 in this case) original_head_predictions[original_target_head_index] = -1 kelpie_head_predictions[kelpie_target_head_index] = -1 # cut both lists at the max rank that the target head obtained, between original and kelpie model original_head_predictions = original_head_predictions[: original_target_head_index + 1] kelpie_head_predictions = kelpie_head_predictions[: kelpie_target_head_index + 1] # replace the target tail id with the same value (-1 in this case) original_tail_predictions[original_target_tail_index] = -1 kelpie_tail_predictions[kelpie_target_tail_index] = -1 # cut both lists at the max rank that the target tail obtained, between original and kelpie model original_tail_predictions = original_tail_predictions[: original_target_tail_index + 1] kelpie_tail_predictions = kelpie_tail_predictions[: kelpie_target_tail_index + 1] rbos.append( ranking_similarity.rank_biased_overlap(original_head_predictions, kelpie_head_predictions)) rbos.append( ranking_similarity.rank_biased_overlap(original_tail_predictions, kelpie_tail_predictions)) avg_rbo = float(sum(rbos)) / float(len(rbos)) return avg_rbo, original_mrr, kelpie_mrr, original_h1, kelpie_h1
# check that the fact to explain is actually a test fact assert (original_sample in complex_dataset.test_samples) kelpie_dataset = KelpieDataset(dataset=complex_dataset, entity_id=original_entity_id) kelpie_entity_id = kelpie_dataset.kelpie_entity_id kelpie_triple = (kelpie_entity_id, relation_id, tail_id) if perspective == 'head' else (head_id, relation_id, kelpie_entity_id) kelpie_sample = numpy.array(kelpie_triple) print("Wrapping the original model in a Kelpie model...") kelpie_model = KelpieComplEx(dataset=kelpie_dataset, model=original_model, init_size=1e-3) # type: KelpieComplEx kelpie_model.to('cuda') print("Extracting samples...") original_train_samples = kelpie_dataset.original_train_samples original_valid_samples = kelpie_dataset.original_valid_samples original_test_samples = kelpie_dataset.original_test_samples kelpie_train_samples = kelpie_dataset.kelpie_train_samples kelpie_valid_samples = kelpie_dataset.kelpie_valid_samples kelpie_test_samples = kelpie_dataset.kelpie_test_samples all_original_samples = numpy.vstack( (original_train_samples, original_valid_samples, original_test_samples)) all_kelpie_samples = numpy.vstack(
# check that the fact to explain is actually a test fact assert (original_sample in complex_dataset.test_samples) kelpie_dataset = KelpieDataset(dataset=complex_dataset, entity_id=original_entity_id) kelpie_entity_id = kelpie_dataset.kelpie_entity_id kelpie_triple = (kelpie_entity_id, relation_id, tail_id) if perspective == 'head' else (head_id, relation_id, kelpie_entity_id) kelpie_sample = numpy.array(kelpie_triple) print("Wrapping the original model in a Kelpie model...") kelpie_model = KelpieComplEx(dataset=kelpie_dataset, model=original_model, init_size=1e-3) # type: KelpieComplEx kelpie_model.to('cuda') print("Running post-training on the Kelpie model...") optimizer = KelpieMultiClassNLLptimizer( model=kelpie_model, optimizer_name=optimizer_name, batch_size=batch_size, learning_rate=learning_rate, decay1=decay1, decay2=decay2, regularizer_name=regularizer_name, regularizer_weight=regularizer_weight) optimizer.train(train_samples=kelpie_dataset.kelpie_train_samples,