def calculate(neo4j: Neo4JConnection, config: Config):

    nodetypes = ["Event", "Common", "Entity", "Log"]
    relationtypes = ["E_C", "DF", "E_EN", "L_E"]
    results = []

    volume = neo4j.query(
        f"""
        MATCH()-[e:{functools.reduce(lambda a,b : f'{a}|{b}', relationtypes)}]->()
        return count(e)
    """, "Calculating Volume")[0][0]

    nr_vertices = neo4j.query(f"""
        MATCH (n)
        WHERE {functools.reduce(lambda a,b : f'n:{a} OR n:{b}', nodetypes)}
        return count(n)
    """)[0][0]

    results.append(["volume", volume, "edges"])
    results.append(["nr_vertices", nr_vertices, "vertices"])
    results.append(["size", volume + nr_vertices, "vertices + edges"])
    results.append(
        ["fill", volume / (nr_vertices * nr_vertices), "edges/vertices^2"])

    csv.write(results, ["Statistic", "Value", "Unit"], "basic")
Beispiel #2
0
def calculate(neo4j: Neo4JConnection):
    edges = neo4j.query(
        """
        match ()-[n]->() return type(n), count(n)
    """, "calculating counts per edge type")

    csv.write(edges, ["type", "count"], "counts-per-edge-type")

    nodes = neo4j.query(
        """
        match (n) return labels(n), count(n)
    """, "calculating counts per node type")

    csv.write(nodes, ["labels", "count"], "counts-per-node-type")
def __retrieve_relationship_data(neo4j: Neo4JConnection):
    results = neo4j.query("""
        match (s)-[r]->(t)
        return ID(s) as sourceID, labels(s) as sourceLabels, ID(t) as targetID, labels(t) as targetLabels, type(r) as relationType
    """)

    for result in results:
        result[1] = result[1][0]
        result[3] = result[3][0]

    csv.write(
        results,
        ['source_id', 'source_label', 'target_id', 'target_label', 'rel_type'],
        'relationships')
def calculate(neo4j: Neo4JConnection, config: Config):

    nodetypes = ["Event", "Common", "Entity", "Log"]
    simple_results = []
    histogram_results = []

    for nodetype in nodetypes:
        for dir in ["<", ">"]:
            direction = "in" if dir == "<" else "out"

            simple_result = __simple(neo4j, nodetype, dir)
            simple_results.append([f'{nodetype}', direction] +
                                  simple_result[0])
            histogram_result = __histogram_query_data(neo4j, nodetype, dir)
            for entree in histogram_result:
                histogram_results.append([f'{nodetype}', direction] + entree)

            if nodetype in ["Event", "Entity"]:
                for entity_type in config['entity']:
                    simple_result = __simple(neo4j, nodetype, dir,
                                             entity_type['label'])
                    simple_results.append(
                        [f'{nodetype}: {entity_type["label"]}', direction] +
                        simple_result[0])
                    histogram_result = __histogram_query_data(
                        neo4j, nodetype, dir, entity_type['label'])
                    for entree in histogram_result:
                        histogram_results.append(
                            [f'{nodetype}: {entity_type["label"]}', direction
                             ] + entree)

    simple_results.sort(key=lambda a: a[0])
    histogram_results.sort(key=lambda a: a[2])
    histogram_results.sort(key=lambda a: a[1], reverse=True)
    histogram_results.sort(key=lambda a: a[0])

    csv.write(simple_results,
              ['node type', 'direction', 'average', 'stdev', 'max', 'min'],
              "degrees")
    csv.write(histogram_results, ['node type', 'direction', 'degree', 'count'],
              "degrees_histogram")
def calculate(neo4j: Neo4JConnection, config: Config):

    results = []
    results_histogram = []

    for entitytype in config['entity']:
        label = entitytype['label']

        result = __simple(neo4j, label)
        results.append([label] + result[0])
        result_histogram = __histogram(neo4j, label)

        for entree in result_histogram:
            results_histogram.append([label] + entree)

    csv.write(results, ['entity type', 'average', 'stdev', 'max', 'min'],
              'df-path-length')
    results_histogram.sort(key=lambda a: a[1])
    results_histogram.sort(key=lambda a: a[0])
    csv.write(results_histogram, ['entity type', 'df_path_length', 'count'],
              'df-path-length_histogram')
def calculate(neo4j: Neo4JConnection):
    if not path.exists("statistics-csv.bpi14/relationships.csv"):
        __retrieve_relationship_data(neo4j)
    else:
        print("Found existing relationship data, not retrieving new data")
        print(
            "Delete the file 'statistics/relationships.csv' and rerun the diameter calculation to retrieve new data"
        )

    diameters = []

    relationships = pd.read_csv('statistics-csv.bpi14/relationships.csv')
    rel_filter = ~relationships['rel_type'].str.contains('-')
    relationships = relationships[rel_filter]

    # diameters.append(['none', __calc(relationships)])

    le_filter = ~(relationships['rel_type'].str.contains('L_E'))
    relationships = relationships[le_filter]
    diameters.append(['no L_E', __calc(relationships)])

    csv.write(diameters, ['restriction', 'diameter'], 'diameter')