def calculate(neo4j: Neo4JConnection, config: Config): nodetypes = ["Event", "Common", "Entity", "Log"] relationtypes = ["E_C", "DF", "E_EN", "L_E"] results = [] volume = neo4j.query( f""" MATCH()-[e:{functools.reduce(lambda a,b : f'{a}|{b}', relationtypes)}]->() return count(e) """, "Calculating Volume")[0][0] nr_vertices = neo4j.query(f""" MATCH (n) WHERE {functools.reduce(lambda a,b : f'n:{a} OR n:{b}', nodetypes)} return count(n) """)[0][0] results.append(["volume", volume, "edges"]) results.append(["nr_vertices", nr_vertices, "vertices"]) results.append(["size", volume + nr_vertices, "vertices + edges"]) results.append( ["fill", volume / (nr_vertices * nr_vertices), "edges/vertices^2"]) csv.write(results, ["Statistic", "Value", "Unit"], "basic")
def calculate(neo4j: Neo4JConnection): edges = neo4j.query( """ match ()-[n]->() return type(n), count(n) """, "calculating counts per edge type") csv.write(edges, ["type", "count"], "counts-per-edge-type") nodes = neo4j.query( """ match (n) return labels(n), count(n) """, "calculating counts per node type") csv.write(nodes, ["labels", "count"], "counts-per-node-type")
def __retrieve_relationship_data(neo4j: Neo4JConnection): results = neo4j.query(""" match (s)-[r]->(t) return ID(s) as sourceID, labels(s) as sourceLabels, ID(t) as targetID, labels(t) as targetLabels, type(r) as relationType """) for result in results: result[1] = result[1][0] result[3] = result[3][0] csv.write( results, ['source_id', 'source_label', 'target_id', 'target_label', 'rel_type'], 'relationships')
def calculate(neo4j: Neo4JConnection, config: Config): nodetypes = ["Event", "Common", "Entity", "Log"] simple_results = [] histogram_results = [] for nodetype in nodetypes: for dir in ["<", ">"]: direction = "in" if dir == "<" else "out" simple_result = __simple(neo4j, nodetype, dir) simple_results.append([f'{nodetype}', direction] + simple_result[0]) histogram_result = __histogram_query_data(neo4j, nodetype, dir) for entree in histogram_result: histogram_results.append([f'{nodetype}', direction] + entree) if nodetype in ["Event", "Entity"]: for entity_type in config['entity']: simple_result = __simple(neo4j, nodetype, dir, entity_type['label']) simple_results.append( [f'{nodetype}: {entity_type["label"]}', direction] + simple_result[0]) histogram_result = __histogram_query_data( neo4j, nodetype, dir, entity_type['label']) for entree in histogram_result: histogram_results.append( [f'{nodetype}: {entity_type["label"]}', direction ] + entree) simple_results.sort(key=lambda a: a[0]) histogram_results.sort(key=lambda a: a[2]) histogram_results.sort(key=lambda a: a[1], reverse=True) histogram_results.sort(key=lambda a: a[0]) csv.write(simple_results, ['node type', 'direction', 'average', 'stdev', 'max', 'min'], "degrees") csv.write(histogram_results, ['node type', 'direction', 'degree', 'count'], "degrees_histogram")
def calculate(neo4j: Neo4JConnection, config: Config): results = [] results_histogram = [] for entitytype in config['entity']: label = entitytype['label'] result = __simple(neo4j, label) results.append([label] + result[0]) result_histogram = __histogram(neo4j, label) for entree in result_histogram: results_histogram.append([label] + entree) csv.write(results, ['entity type', 'average', 'stdev', 'max', 'min'], 'df-path-length') results_histogram.sort(key=lambda a: a[1]) results_histogram.sort(key=lambda a: a[0]) csv.write(results_histogram, ['entity type', 'df_path_length', 'count'], 'df-path-length_histogram')
def calculate(neo4j: Neo4JConnection): if not path.exists("statistics-csv.bpi14/relationships.csv"): __retrieve_relationship_data(neo4j) else: print("Found existing relationship data, not retrieving new data") print( "Delete the file 'statistics/relationships.csv' and rerun the diameter calculation to retrieve new data" ) diameters = [] relationships = pd.read_csv('statistics-csv.bpi14/relationships.csv') rel_filter = ~relationships['rel_type'].str.contains('-') relationships = relationships[rel_filter] # diameters.append(['none', __calc(relationships)]) le_filter = ~(relationships['rel_type'].str.contains('L_E')) relationships = relationships[le_filter] diameters.append(['no L_E', __calc(relationships)]) csv.write(diameters, ['restriction', 'diameter'], 'diameter')