Beispiel #1
0
def run_kelpie(train_samples):
    print("Wrapping the original model in a Kelpie explainable model...")
    # use model_to_explain to initialize the Kelpie model
    kelpie_model = KelpieComplEx(model=original_model, dataset=kelpie_dataset, init_size=1e-3) # type: KelpieComplEx
    kelpie_model.to('cuda')

    ###########  BUILD THE OPTIMIZER AND RUN POST-TRAINING
    print("Running post-training on the Kelpie model...")
    optimizer = KelpieMultiClassNLLptimizer(model=kelpie_model,
                                            optimizer_name=args.optimizer,
                                            batch_size=args.batch_size,
                                            learning_rate=args.learning_rate,
                                            decay1=args.decay1,
                                            decay2=args.decay2,
                                            regularizer_name="N3",
                                            regularizer_weight=args.reg)

    optimizer.train(train_samples=train_samples, max_epochs=args.max_epochs)

    ###########  EXTRACT RESULTS

    print("\nExtracting results...")
    kelpie_entity_id = kelpie_dataset.kelpie_entity_id
    kelpie_sample_tuple = (kelpie_entity_id, relation_id, tail_id) if args.perspective == "head" else (head_id, relation_id, kelpie_entity_id)
    kelpie_sample = numpy.array(kelpie_sample_tuple)

    ### Evaluation on original entity

    # Kelpie model on original fact
    scores, ranks, predictions = kelpie_model.predict_sample(sample=original_sample, original_mode=True)
    original_direct_score, original_inverse_score = scores[0], scores[1]
    original_head_rank, original_tail_rank = ranks[0], ranks[1]
    print("\nKelpie model on the original test fact: <%s, %s, %s>" % original_triple)
    print("\tDirect fact score: %f; Inverse fact score: %f" % (original_direct_score, original_inverse_score))
    print("\tHead Rank: %f" % original_head_rank)
    print("\tTail Rank: %f" % original_tail_rank)

    # Kelpie model on all facts containing the original entity
    print("\nKelpie model on all test facts containing the original entity:")
    mrr, h1 = KelpieEvaluator(kelpie_model).eval(samples=original_test_samples, original_mode=True)
    print("\tMRR: %f\n\tH@1: %f" % (mrr, h1))


    ### Evaluation on kelpie entity

    # results on kelpie fact
    scores, ranks, _ = kelpie_model.predict_sample(sample=kelpie_sample, original_mode=False)
    kelpie_direct_score, kelpie_inverse_score = scores[0], scores[1]
    kelpie_head_rank, kelpie_tail_rank = ranks[0], ranks[1]
    print("\nKelpie model on the Kelpie test fact: <%s, %s, %s>" % kelpie_sample_tuple)
    print("\tDirect fact score: %f; Inverse fact score: %f" % (kelpie_direct_score, kelpie_inverse_score))
    print("\tHead Rank: %f" % kelpie_head_rank)
    print("\tTail Rank: %f" % kelpie_tail_rank)

    # results on all facts containing the kelpie entity
    print("\nKelpie model on all test facts containing the Kelpie entity:")
    mrr, h1 = KelpieEvaluator(kelpie_model).eval(samples=kelpie_test_samples, original_mode=False)
    print("\tMRR: %f\n\tH@1: %f" % (mrr, h1))

    return kelpie_direct_score, kelpie_inverse_score, kelpie_head_rank, kelpie_tail_rank
    # check that the fact to explain is actually a test fact
    assert (original_sample in complex_dataset.test_samples)

    kelpie_dataset = KelpieDataset(dataset=complex_dataset,
                                   entity_id=original_entity_id)
    kelpie_entity_id = kelpie_dataset.kelpie_entity_id
    kelpie_triple = (kelpie_entity_id, relation_id,
                     tail_id) if perspective == 'head' else (head_id,
                                                             relation_id,
                                                             kelpie_entity_id)
    kelpie_sample = numpy.array(kelpie_triple)

    print("Wrapping the original model in a Kelpie model...")
    kelpie_model = KelpieComplEx(dataset=kelpie_dataset,
                                 model=original_model,
                                 init_size=1e-3)  # type: KelpieComplEx
    kelpie_model.to('cuda')

    print("Running post-training on the Kelpie model...")
    optimizer = KelpieMultiClassNLLptimizer(
        model=kelpie_model,
        optimizer_name=optimizer_name,
        batch_size=batch_size,
        learning_rate=learning_rate,
        decay1=decay1,
        decay2=decay2,
        regularizer_name=regularizer_name,
        regularizer_weight=regularizer_weight)

    optimizer.train(train_samples=kelpie_dataset.kelpie_train_samples,
Beispiel #3
0
# check that the fact to explain is actually a test fact
assert(original_sample in original_dataset.test_samples)


#############   INITIALIZE MODELS AND THEIR STRUCTURES
print("Loading model at location %s..." % args.model_path)
# instantiate and load the original model from filesystem
original_model = ComplEx(dataset=original_dataset, dimension=args.dimension, init_random=True, init_size=args.init) # type: ComplEx
original_model.load_state_dict(torch.load(args.model_path))
original_model.to('cuda')

print("Wrapping the original model in a Kelpie explainable model...")
# use model_to_explain to initialize the Kelpie model
kelpie_dataset = KelpieDataset(dataset=original_dataset, entity_id=original_entity_id)
kelpie_model = KelpieComplEx(model=original_model, dataset=kelpie_dataset, init_size=1e-3) # type: KelpieComplEx
kelpie_model.to('cuda')


############ EXTRACT TEST FACTS AND TRAINING FACTS

print("Extracting train and test samples for the original and the kelpie entities...")
# extract all training facts and test facts involving the entity to explain
# and replace the id of the entity to explain with the id of the fake kelpie entity
original_entity_test_samples = kelpie_dataset.original_test_samples
kelpie_test_samples = kelpie_dataset.kelpie_test_samples


###########  BUILD THE OPTIMIZER AND RUN POST-TRAINING

print("Running post-training on the Kelpie model...")
                                    complex_dataset.get_id_for_entity_name(tail)

    original_entity_id = head_id if perspective == 'head' else tail_id
    original_triple = (head_id, relation_id, tail_id)
    original_sample = numpy.array(original_triple)

    # check that the fact to explain is actually a test fact
    assert(original_sample in complex_dataset.test_samples)

    kelpie_dataset = KelpieDataset(dataset=complex_dataset, entity_id=original_entity_id)
    kelpie_entity_id = kelpie_dataset.kelpie_entity_id
    kelpie_triple = (kelpie_entity_id, relation_id, tail_id) if perspective == 'head' else (head_id, relation_id, kelpie_entity_id)
    kelpie_sample = numpy.array(kelpie_triple)

    print("Wrapping the original model in a Kelpie model...")
    kelpie_model = KelpieComplEx(dataset=kelpie_dataset, model=original_model, init_size=1e-3) # type: KelpieComplEx
    kelpie_model.to('cuda')

    print("Running post-training on the Kelpie model...")
    optimizer = KelpieMultiClassNLLptimizer(model=kelpie_model,
                                            optimizer_name=optimizer_name,
                                            batch_size=batch_size,
                                            learning_rate=learning_rate,
                                            decay1=decay1,
                                            decay2=decay2,
                                            regularizer_name=regularizer_name,
                                            regularizer_weight=regularizer_weight)

    optimizer.train(train_samples=kelpie_dataset.kelpie_train_samples, max_epochs=max_epochs)

    original_direct_samples = numpy.vstack((kelpie_dataset.original_train_samples,
Beispiel #5
0
def rbo(original_model: ComplEx, kelpie_model: KelpieComplEx,
        original_samples: numpy.array, kelpie_samples: numpy.array):

    _, original_ranks, original_predictions = original_model.predict_samples(
        original_samples)
    _, kelpie_ranks, kelpie_predictions = kelpie_model.predict_samples(
        samples=kelpie_samples, original_mode=False)

    all_original_ranks = []
    for (a, b) in original_ranks:
        all_original_ranks.append(a)
        all_original_ranks.append(b)

    all_kelpie_ranks = []
    for (a, b) in kelpie_ranks:
        all_kelpie_ranks.append(a)
        all_kelpie_ranks.append(b)

    original_mrr = mrr(all_original_ranks)
    kelpie_mrr = mrr(all_kelpie_ranks)
    original_h1 = hits_k(all_original_ranks, 1)
    kelpie_h1 = hits_k(all_kelpie_ranks, 1)

    rbos = []
    for i in range(len(original_samples)):
        _original_sample = original_samples[i]
        _kelpie_sample = kelpie_samples[i]

        original_target_head, _, original_target_tail = _original_sample
        kelpie_target_head, _, kelpie_target_tail = _kelpie_sample

        original_target_head_index, original_target_tail_index = int(
            original_ranks[i][0] - 1), int(original_ranks[i][1] - 1)
        kelpie_target_head_index, kelpie_target_tail_index = int(
            kelpie_ranks[i][0] - 1), int(kelpie_ranks[i][1] - 1)

        # get head and tail predictions
        original_head_predictions = original_predictions[i][0]
        kelpie_head_predictions = kelpie_predictions[i][0]
        original_tail_predictions = original_predictions[i][1]
        kelpie_tail_predictions = kelpie_predictions[i][1]

        assert original_head_predictions[
            original_target_head_index] == original_target_head
        assert kelpie_head_predictions[
            kelpie_target_head_index] == kelpie_target_head
        assert original_tail_predictions[
            original_target_tail_index] == original_target_tail
        assert kelpie_tail_predictions[
            kelpie_target_tail_index] == kelpie_target_tail

        # replace the target head id with the same value (-1 in this case)
        original_head_predictions[original_target_head_index] = -1
        kelpie_head_predictions[kelpie_target_head_index] = -1
        # cut both lists at the max rank that the target head obtained, between original and kelpie model
        original_head_predictions = original_head_predictions[:
                                                              original_target_head_index
                                                              + 1]
        kelpie_head_predictions = kelpie_head_predictions[:
                                                          kelpie_target_head_index
                                                          + 1]

        # replace the target tail id with the same value (-1 in this case)
        original_tail_predictions[original_target_tail_index] = -1
        kelpie_tail_predictions[kelpie_target_tail_index] = -1
        # cut both lists at the max rank that the target tail obtained, between original and kelpie model
        original_tail_predictions = original_tail_predictions[:
                                                              original_target_tail_index
                                                              + 1]
        kelpie_tail_predictions = kelpie_tail_predictions[:
                                                          kelpie_target_tail_index
                                                          + 1]

        rbos.append(
            ranking_similarity.rank_biased_overlap(original_head_predictions,
                                                   kelpie_head_predictions))
        rbos.append(
            ranking_similarity.rank_biased_overlap(original_tail_predictions,
                                                   kelpie_tail_predictions))

    avg_rbo = float(sum(rbos)) / float(len(rbos))
    return avg_rbo, original_mrr, kelpie_mrr, original_h1, kelpie_h1
Beispiel #6
0
    # check that the fact to explain is actually a test fact
    assert (original_sample in complex_dataset.test_samples)

    kelpie_dataset = KelpieDataset(dataset=complex_dataset,
                                   entity_id=original_entity_id)
    kelpie_entity_id = kelpie_dataset.kelpie_entity_id
    kelpie_triple = (kelpie_entity_id, relation_id,
                     tail_id) if perspective == 'head' else (head_id,
                                                             relation_id,
                                                             kelpie_entity_id)
    kelpie_sample = numpy.array(kelpie_triple)

    print("Wrapping the original model in a Kelpie model...")
    kelpie_model = KelpieComplEx(dataset=kelpie_dataset,
                                 model=original_model,
                                 init_size=1e-3)  # type: KelpieComplEx
    kelpie_model.to('cuda')

    print("Extracting samples...")
    original_train_samples = kelpie_dataset.original_train_samples
    original_valid_samples = kelpie_dataset.original_valid_samples
    original_test_samples = kelpie_dataset.original_test_samples
    kelpie_train_samples = kelpie_dataset.kelpie_train_samples
    kelpie_valid_samples = kelpie_dataset.kelpie_valid_samples
    kelpie_test_samples = kelpie_dataset.kelpie_test_samples

    all_original_samples = numpy.vstack(
        (original_train_samples, original_valid_samples,
         original_test_samples))
    all_kelpie_samples = numpy.vstack(
Beispiel #7
0
    # check that the fact to explain is actually a test fact
    assert (original_sample in complex_dataset.test_samples)

    kelpie_dataset = KelpieDataset(dataset=complex_dataset,
                                   entity_id=original_entity_id)
    kelpie_entity_id = kelpie_dataset.kelpie_entity_id
    kelpie_triple = (kelpie_entity_id, relation_id,
                     tail_id) if perspective == 'head' else (head_id,
                                                             relation_id,
                                                             kelpie_entity_id)
    kelpie_sample = numpy.array(kelpie_triple)

    print("Wrapping the original model in a Kelpie model...")
    kelpie_model = KelpieComplEx(dataset=kelpie_dataset,
                                 model=original_model,
                                 init_size=1e-3)  # type: KelpieComplEx
    kelpie_model.to('cuda')

    print("Running post-training on the Kelpie model...")
    optimizer = KelpieMultiClassNLLptimizer(
        model=kelpie_model,
        optimizer_name=optimizer_name,
        batch_size=batch_size,
        learning_rate=learning_rate,
        decay1=decay1,
        decay2=decay2,
        regularizer_name=regularizer_name,
        regularizer_weight=regularizer_weight)

    optimizer.train(train_samples=kelpie_dataset.kelpie_train_samples,