def test_split_train_test_set():
    vr = split_training_test_set(associations)
    train, test = vr
    logger.info("just random: train: %d, test: %d", len(train), len(test))
    vtnd = split_training_test_set(associations, variant='target_node_disjoint')
    train, test = vtnd
    logger.info("target node disjoint: train: %d, test: %d",
                len(train), len(test))
    vnd = split_training_test_set(associations, variant='node_disjoint')
    train, test = vnd
    logger.info("node disjoint: train: %d, test: %d", len(train), len(test))

    assert vr[0] == vtnd[0] == vnd[0], \
        "train set shouldn't be influenced by different splitting variant"
    assert set(vr[1]) > set(vtnd[1]) > set(vnd[1]), \
        "test set expected to shrink for more restrictive splitting variants"
def test_split_train_test_set():
    vr = split_training_test_set(associations)
    train, test = vr
    logger.info("just random: train: %d, test: %d", len(train), len(test))
    vtnd = split_training_test_set(associations, variant='target_node_disjoint')
    train, test = vtnd
    logger.info("target node disjoint: train: %d, test: %d",
                len(train), len(test))
    vnd = split_training_test_set(associations, variant='node_disjoint')
    train, test = vnd
    logger.info("node disjoint: train: %d, test: %d", len(train), len(test))

    assert vr[0] == vtnd[0] == vnd[0], \
        "train set shouldn't be influenced by different splitting variant"
    assert set(vr[1]) > set(vtnd[1]) > set(vnd[1]), \
        "test set expected to shrink for more restrictive splitting variants"
def main():
    semantic_associations = get_semantic_associations(
        config.GT_ASSOCIATIONS_FILENAME)
    assocs_train, assocs_test = split_training_test_set(semantic_associations,
                                                        variant='random')

    # setup node expander
    sparql = SPARQLWrapper(config.SPARQL_ENDPOINT)

    predict_set = assocs_test

    for method, query in sorted(prediction_queries.items()):
        target_idxs = []
        for source, target in predict_set:
            prediction = predict_target_with_query(sparql, query, source)
            target_idxs.append(find_in_prediction(prediction, target))
        print("'%s': %s," % (method, target_idxs))
def main():
    semantic_associations = get_semantic_associations(
        config.GT_ASSOCIATIONS_FILENAME)
    assocs_train, assocs_test = split_training_test_set(
        semantic_associations, variant='random'
    )

    # setup node expander
    sparql = SPARQLWrapper(config.SPARQL_ENDPOINT)

    predict_set = assocs_test



    for method, query in sorted(prediction_queries.items()):
        target_idxs = []
        for source, target in predict_set:
            prediction = predict_target_with_query(sparql, query, source)
            target_idxs.append(find_in_prediction(prediction, target))
        print("'%s': %s," % (method, target_idxs))
def main():
    semantic_associations = get_semantic_associations(
        config.GT_ASSOCIATIONS_FILENAME)
    assocs_train, assocs_test = split_training_test_set(
        semantic_associations, variant='random'
    )

    # setup node expander
    sparql = SPARQLWrapper(config.SPARQL_ENDPOINT)

    predict_list = assocs_test

    # degree, pagerank and hits
    for method, query in sorted(prediction_queries.items()):
        target_idxs = []
        for source, target in predict_list:
            logger.info(
                'method: %s, predicting targets for %s, ground truth: %s',
                method, source.n3(), target.n3())
            prediction = predict_target_with_query(sparql, query, source)
            idx = find_in_prediction(prediction, target)
            logger.info(
                format_prediction_results(method, prediction, target, idx))
            target_idxs.append(idx)
        print("'%s': %s," % (method, target_idxs))

    # milne-witten relatedness
    for method, pred in (('mw_wl', 'dbo:wikiPageWikiLink'),):
        target_idxs = []
        for source, target in predict_list:
            logger.info(
                'method: %s, predicting targets for %s, ground truth: %s',
                method, source.n3(), target.n3())
            prediction = predict_target_with_milne_witten(sparql, pred, source)
            idx = find_in_prediction(prediction, target)
            logger.info(
                format_prediction_results(method, prediction, target, idx))
            target_idxs.append(idx)
        print("'%s': %s," % (method, target_idxs))
Esempio n. 6
0
def main():
    semantic_associations = get_semantic_associations(
        config.GT_ASSOCIATIONS_FILENAME)
    assocs_train, assocs_test = split_training_test_set(semantic_associations,
                                                        variant='random')

    # setup node expander
    sparql = SPARQLWrapper(config.SPARQL_ENDPOINT)

    predict_list = assocs_test

    # degree, pagerank and hits
    for method, query in sorted(prediction_queries.items()):
        target_idxs = []
        for source, target in predict_list:
            logger.info(
                'method: %s, predicting targets for %s, ground truth: %s',
                method, source.n3(), target.n3())
            prediction = predict_target_with_query(sparql, query, source)
            idx = find_in_prediction(prediction, target)
            logger.info(
                format_prediction_results(method, prediction, target, idx))
            target_idxs.append(idx)
        print("'%s': %s," % (method, target_idxs))

    # milne-witten relatedness
    for method, pred in (('mw_wl', 'dbo:wikiPageWikiLink'), ):
        target_idxs = []
        for source, target in predict_list:
            logger.info(
                'method: %s, predicting targets for %s, ground truth: %s',
                method, source.n3(), target.n3())
            prediction = predict_target_with_milne_witten(sparql, pred, source)
            idx = find_in_prediction(prediction, target)
            logger.info(
                format_prediction_results(method, prediction, target, idx))
            target_idxs.append(idx)
        print("'%s': %s," % (method, target_idxs))
from gp_learner import mutate_merge_var
from gp_learner import mutate_simplify_pattern
from graph_pattern import GraphPattern
from graph_pattern import SOURCE_VAR
from graph_pattern import TARGET_VAR
from ground_truth_tools import get_semantic_associations
from ground_truth_tools import split_training_test_set
from gtp_scores import GTPScores

logger = logging.getLogger(__name__)

dbp = rdflib.Namespace('http://dbpedia.org/resource/')
wikilink = URIRef('http://dbpedia.org/ontology/wikiPageWikiLink')

ground_truth_pairs = get_semantic_associations()
ground_truth_pairs, _ = split_training_test_set(ground_truth_pairs)
gtp_scores = GTPScores(ground_truth_pairs)


def test_mutate_increase_dist():
    gp = GraphPattern([(SOURCE_VAR, wikilink, TARGET_VAR)])
    res = mutate_increase_dist(gp)
    assert gp != res
    assert gp.diameter() + 1 == res.diameter()
    assert gp.vars_in_graph == {SOURCE_VAR, TARGET_VAR}


def test_mutate_merge_var():
    p = Variable('p')
    q = Variable('q')
    gp = GraphPattern([(SOURCE_VAR, p, TARGET_VAR)])
from gp_learner import mutate_merge_var
from gp_learner import mutate_simplify_pattern
from graph_pattern import GraphPattern
from graph_pattern import SOURCE_VAR
from graph_pattern import TARGET_VAR
from ground_truth_tools import get_semantic_associations
from ground_truth_tools import split_training_test_set
from gtp_scores import GTPScores

logger = logging.getLogger(__name__)

dbp = rdflib.Namespace('http://dbpedia.org/resource/')
wikilink = URIRef('http://dbpedia.org/ontology/wikiPageWikiLink')

ground_truth_pairs = get_semantic_associations()
ground_truth_pairs, _ = split_training_test_set(ground_truth_pairs)
gtp_scores = GTPScores(ground_truth_pairs)


def test_mutate_increase_dist():
    gp = GraphPattern([(SOURCE_VAR, wikilink, TARGET_VAR)])
    res = mutate_increase_dist(gp)
    assert gp != res
    assert gp.diameter() + 1 == res.diameter()
    assert gp.vars_in_graph == {SOURCE_VAR, TARGET_VAR}


def test_mutate_merge_var():
    p = Variable('p')
    q = Variable('q')
    gp = GraphPattern([