Ejemplo n.º 1
0
def jaccard_similarity_kg(graph: KatanaGraph,
                          compare_node: NodeID) -> NumpyVectorType:
    jaccard_prop_name = "jaccard_prop_with_" + str(compare_node)
    if not has_node_prop(graph.value, jaccard_prop_name):
        jaccard(graph.value, compare_node, jaccard_prop_name)
    jaccard_similarities = graph.value.get_node_property(
        jaccard_prop_name).to_numpy()
    return jaccard_similarities
Ejemplo n.º 2
0
def test_jaccard_sorted(graph: Graph):
    sort_all_edges_by_dest(graph)

    property_name = "NewProp"
    compare_node = 0

    jaccard(graph, compare_node, property_name, JaccardPlan.sorted())

    jaccard_assert_valid(graph, compare_node, property_name)

    similarities: np.ndarray = graph.get_node_property(
        property_name).to_numpy()
    assert similarities[compare_node] == 1
    assert similarities[1917] == approx(0.0)
    assert similarities[2812] == approx(0.0)
Ejemplo n.º 3
0
def test_jaccard(graph: Graph):
    property_name = "NewProp"
    compare_node = 0

    jaccard(graph, compare_node, property_name)

    node_schema: Schema = graph.loaded_node_schema()
    num_node_properties = len(node_schema)
    new_property_id = num_node_properties - 1
    assert node_schema.names[new_property_id] == property_name

    jaccard_assert_valid(graph, compare_node, property_name)

    stats = JaccardStatistics(graph, compare_node, property_name)

    assert stats.max_similarity == approx(1)
    assert stats.min_similarity == approx(0)
    assert stats.average_similarity == approx(0.000552534)

    similarities: np.ndarray = graph.get_node_property(
        property_name).to_numpy()
    assert similarities[compare_node] == 1
    assert similarities[1917] == approx(0.0)
    assert similarities[2812] == approx(0.0)