Beispiel #1
0
def _test_adversarial():
    triples = [('john', 'friendOf', 'mark'), ('mark', 'friendOf', 'aleksi'),
               ('mark', 'friendOf', 'dazdrasmygda')]

    def fact(s, p, o):
        return Fact(predicate_name=p, argument_names=[s, o])

    facts = [fact(s, p, o) for s, p, o in triples]
    parser = KnowledgeBaseParser(facts)
    clauses = [parse_clause('friendOf(X, Y) :- friendOf(Y, X)')]

    nb_entities = len(parser.entity_vocabulary)
    nb_predicates = len(parser.predicate_vocabulary)

    entity_embedding_size = 100
    predicate_embedding_size = 100

    entity_embedding_layer = tf.get_variable(
        'entities',
        shape=[nb_entities + 1, entity_embedding_size],
        initializer=tf.contrib.layers.xavier_initializer())

    predicate_embedding_layer = tf.get_variable(
        'predicates',
        shape=[nb_predicates + 1, predicate_embedding_size],
        initializer=tf.contrib.layers.xavier_initializer())

    model_class = models.get_function('TransE')

    similarity_function = similarities.get_function('l1')
    model_parameters = dict(similarity_function=similarity_function)

    batch_size = 1000

    adversarial = Adversarial(
        clauses=clauses,
        parser=parser,
        entity_embedding_layer=entity_embedding_layer,
        predicate_embedding_layer=predicate_embedding_layer,
        model_class=model_class,
        model_parameters=model_parameters,
        batch_size=batch_size)

    init_op = tf.global_variables_initializer()

    with tf.Session() as session:
        session.run(init_op)
        assert len(adversarial.parameters) == 2
        for violating_embeddings in adversarial.parameters:
            shape = session.run(tf.shape(violating_embeddings))
            assert (shape == (batch_size, entity_embedding_size)).all()

        loss_value = session.run(adversarial.loss)
        errors_value = session.run(adversarial.errors)

        var1 = adversarial.parameters[0]
        var2 = adversarial.parameters[1]

        X_values = session.run(var1 if "X" in var1.name else var2)
        Y_values = session.run(var2 if "Y" in var2.name else var1)

        p_value = session.run(
            tf.nn.embedding_lookup(predicate_embedding_layer, 1))

        assert np.array(X_values.shape == (batch_size,
                                           entity_embedding_size)).all()
        assert np.array(Y_values.shape == (batch_size,
                                           entity_embedding_size)).all()
        assert np.array(p_value.shape == (predicate_embedding_size, ))

        head_scores = -np.sum(np.abs((X_values + p_value) - Y_values), axis=1)
        body_scores = -np.sum(np.abs((Y_values + p_value) - X_values), axis=1)

        assert int(errors_value) == np.sum(
            (head_scores < body_scores).astype(int))

        linear_losses = body_scores - head_scores
        np_loss_values = np.sum(linear_losses * (linear_losses > 0))
        assert np.abs(loss_value - np_loss_values) < 1e-3

    tf.reset_default_graph()
Beispiel #2
0
logger = logging.getLogger(__name__)

triples = [('a', 'p', 'b'), ('c', 'p', 'd'), ('a', 'q', 'b')]
facts = [Fact(predicate_name=p, argument_names=[s, o]) for s, p, o in triples]
parser = KnowledgeBaseParser(facts)

nb_entities = len(parser.entity_to_index)
nb_predicates = len(parser.predicate_to_index)

# Clauses
clause_str = 'q(X, Y) :- p(X, Y)'
clauses = [parse_clause(clause_str)]

# Instantiating the model parameters
model_class = models.get_function('TransE')
similarity_function = similarities.get_function('l2_sqr')

model_parameters = dict(similarity_function=similarity_function)


@pytest.mark.closedform
def test_transe_unit_cube():
    for seed in range(32):
        tf.reset_default_graph()

        np.random.seed(seed)
        tf.set_random_seed(seed)

        entity_embedding_size = np.random.randint(low=1, high=5)
        predicate_embedding_size = entity_embedding_size
Beispiel #3
0
logger = logging.getLogger(__name__)

triples = [('a', 'p', 'b'), ('c', 'p', 'd'), ('a', 'q', 'b')]
facts = [Fact(predicate_name=p, argument_names=[s, o]) for s, p, o in triples]
parser = KnowledgeBaseParser(facts)

nb_entities = len(parser.entity_to_index)
nb_predicates = len(parser.predicate_to_index)

# Clauses
clause_str = 'q(X, Y) :- p(Y, X)'
clauses = [parse_clause(clause_str)]

# Instantiating the model parameters
model_class = models.get_function('ComplEx')
similarity_function = similarities.get_function('dot')

model_parameters = dict(similarity_function=similarity_function)


@pytest.mark.closedform
def test_complex_unit_sphere():
    for seed in range(32):
        tf.reset_default_graph()

        np.random.seed(seed)
        tf.set_random_seed(seed)

        entity_embedding_size = np.random.randint(low=1, high=5) * 2
        predicate_embedding_size = entity_embedding_size
Beispiel #4
0
logger = logging.getLogger(__name__)

triples = [('a', 'p', 'b'), ('c', 'p', 'd'), ('a', 'q', 'b')]
facts = [Fact(predicate_name=p, argument_names=[s, o]) for s, p, o in triples]
parser = KnowledgeBaseParser(facts)

nb_entities = len(parser.entity_to_index)
nb_predicates = len(parser.predicate_to_index)

# Clauses
clause_str = 'q(X, Y) :- p(X, Y)'
clauses = [parse_clause(clause_str)]

# Instantiating the model parameters
model_class = models.get_function('DistMult')
similarity_function = similarities.get_function('dot')

model_parameters = dict(similarity_function=similarity_function)


@pytest.mark.closedform
def test_distmult_unit_sphere():
    for seed in range(32):
        tf.reset_default_graph()

        np.random.seed(seed)
        tf.set_random_seed(seed)

        entity_embedding_size = np.random.randint(low=1, high=5)
        predicate_embedding_size = entity_embedding_size
Beispiel #5
0
def test_losses():

    hyperparam_configurations = list(cartesian_product(hyperparams))

    for hyperparam_configuration in hyperparam_configurations:
        # Clauses
        clause = parse_clause(hyperparam_configuration['clause'])

        # Instantiating the model parameters
        model_class = models.get_function(hyperparam_configuration['model_name'])
        similarity_function = similarities.get_function('dot')

        unit_cube = hyperparam_configuration['unit_cube']

        for seed in range(4):
            print('Seed {}, Evaluating {}'.format(seed, str(hyperparam_configuration)))

            tf.reset_default_graph()

            np.random.seed(seed)
            tf.set_random_seed(seed)

            entity_embedding_size = np.random.randint(low=1, high=5) * 2
            predicate_embedding_size = entity_embedding_size

            # Instantiating entity and predicate embedding layers
            entity_embedding_layer = tf.get_variable('entities',
                                                     shape=[nb_entities + 1, entity_embedding_size],
                                                     initializer=tf.contrib.layers.xavier_initializer())

            predicate_embedding_layer = tf.get_variable('predicates',
                                                        shape=[nb_predicates + 1, predicate_embedding_size],
                                                        initializer=tf.contrib.layers.xavier_initializer())

            entity_projection = constraints.unit_sphere(entity_embedding_layer, norm=1.0)
            if unit_cube:
                entity_projection = constraints.unit_cube(entity_embedding_layer)

            entity_inputs = tf.placeholder(tf.int32, shape=[None, 2])
            walk_inputs = tf.placeholder(tf.int32, shape=[None, None])

            entity_embeddings = tf.nn.embedding_lookup(entity_embedding_layer, entity_inputs)
            predicate_embeddings = tf.nn.embedding_lookup(predicate_embedding_layer, walk_inputs)

            model_parameters = dict(entity_embeddings=entity_embeddings,
                                    predicate_embeddings=predicate_embeddings,
                                    similarity_function=similarity_function)

            model = model_class(**model_parameters)
            score = model()

            closed_form_lifted = ClosedForm(parser=parser,
                                            predicate_embedding_layer=predicate_embedding_layer,
                                            model_class=model_class,
                                            model_parameters=model_parameters,
                                            is_unit_cube=unit_cube)
            opt_adversarial_loss = closed_form_lifted(clause)

            v_optimizer = tf.train.AdagradOptimizer(learning_rate=1e-2)
            v_training_step = v_optimizer.minimize(opt_adversarial_loss, var_list=[predicate_embedding_layer])

            init_op = tf.global_variables_initializer()

            with tf.Session() as session:
                session.run(init_op)

                session.run([entity_projection])

                def scoring_function(args):
                    return session.run(score, feed_dict={walk_inputs: args[0], entity_inputs: args[1]})

                ground_loss = GroundLoss(clauses=[clause], parser=parser, scoring_function=scoring_function)
                feed_dict = {'X': a_idx, 'Y': b_idx}
                continuous_loss_0 = ground_loss.continuous_error(clause, feed_dict=feed_dict)

                for epoch in range(1, 100 + 1):
                    _ = session.run([v_training_step])
                    print(ground_loss.continuous_error(clause, feed_dict=feed_dict))

                continuous_loss_final = ground_loss.continuous_error(clause, feed_dict=feed_dict)

                assert continuous_loss_0 <= .0 or continuous_loss_final <= continuous_loss_0

        tf.reset_default_graph()