def average_neighbour_degree_corrected(input_dataframe_dict, metric_variant_id): results_temp = copy_swap_columns_and_append(input_dataframe_dict['edges'], ['node_target', 'node_source']) results_temp = __average_neighbour_degree_corrected( input_dataframe_dict, metric_variant_id, results_temp) append_result_nmr(results_temp, metric_variant_id)
def iterated_average_neighbour_degree_default(input_dataframe_dict, metric_variant_id): results_temp = __iterated_average_neighbour_degree(input_dataframe_dict) results_temp = __join_node_degree_on_target(results_temp, input_dataframe_dict) results_temp.columns = ['node_source', metric_variant_id] results_temp = __average_node_degree_fix_lost_rows(input_dataframe_dict, results_temp) append_result_nmr(results_temp, metric_variant_id)
def normalise_min_max(input_dataframe_dict, metric_variant_id): # Min-Max normalization column_key_input = metric_variant_id.replace("corrected-and-normalised", "corrected").replace("normalised", "default") if column_key_input not in input_dataframe_dict['nmr-dependencies']: sys.exit('Missing data for node metric result dependency: ' + column_key_input) if input_dataframe_dict['nmr-dependencies'][column_key_input].max() != input_dataframe_dict['nmr-dependencies'][column_key_input].min(): input_dataframe_dict['nmr-dependencies'][metric_variant_id] = (input_dataframe_dict['nmr-dependencies'][column_key_input] - input_dataframe_dict['nmr-dependencies'][column_key_input].min()) / (input_dataframe_dict['nmr-dependencies'][column_key_input].max() - input_dataframe_dict['nmr-dependencies'][column_key_input].min()) else: input_dataframe_dict['nmr-dependencies'][metric_variant_id] = 0 append_result_nmr(input_dataframe_dict['nmr-dependencies'], metric_variant_id, replace=True)
def local_clustering_coefficient_corrected(input_dataframe_dict, metric_variant_id): input_dataframe_dict['nmr-dependencies'][ metric_variant_id] = input_dataframe_dict['nmr-dependencies'][ 'local-clustering-coefficients--default'] + input_dataframe_dict[ 'nmr-dependencies'][ 'local-clustering-coefficients--default'] * input_dataframe_dict[ 'nmr-dependencies']['node-degree--default'] / 4 append_result_nmr(input_dataframe_dict['nmr-dependencies'], metric_variant_id, replace=True)
def node_degree(input_dataframe_dict, metric_variant_id): if USE_CUDA: results_temp = input_dataframe_dict['graph'].out_degree()[[ 'vertex', 'degree' ]] results_temp.columns = ['node_source', metric_variant_id] else: results_temp = dict(input_dataframe_dict['graph'].degree()) results_temp = transform_networkx_result(results_temp, metric_variant_id) append_result_nmr(results_temp, metric_variant_id)
def unified_risk_score(input_dataframe_dict, metric_variant_id): input_dataframe_dict['nmr-dependencies'][metric_variant_id] = \ 0.25 * input_dataframe_dict['nmr-dependencies']['node-degree--normalised'] + \ 0.15 * input_dataframe_dict['nmr-dependencies']['average-neighbour-degree--corrected-and-normalised'] + \ 0.1 * input_dataframe_dict['nmr-dependencies']['iterated-average-neighbour-degree--corrected-and-normalised'] + \ 0.25 * input_dataframe_dict['nmr-dependencies']['betweenness-centrality--normalised'] + \ 0.125 * input_dataframe_dict['nmr-dependencies']['eccentricity--normalised'] + \ 0.125 * input_dataframe_dict['nmr-dependencies']['average-shortest-path-length--normalised'] append_result_nmr(input_dataframe_dict['nmr-dependencies'], metric_variant_id, replace=True)
def average_neighbour_degree_default(input_dataframe_dict, metric_variant_id): if USE_CUDA: results_temp = copy_swap_columns_and_append( input_dataframe_dict['edges'], ['node_target', 'node_source']) results_temp = __join_node_degree_on_target(results_temp, input_dataframe_dict) results_temp.columns = ['node_source', metric_variant_id] else: from coria_lib.coria_config import nx results_temp = nx.average_neighbor_degree( input_dataframe_dict['graph']) results_temp = transform_networkx_result(results_temp, metric_variant_id) append_result_nmr(results_temp, metric_variant_id)
def betweenness_centrality(input_dataframe_dict, metric_variant_id): if USE_CUDA: from coria_lib.coria_config import cugraph results_temp = cugraph.betweenness_centrality( input_dataframe_dict['graph'], normalized=False) results_temp = results_temp[['vertex', 'betweenness_centrality']] results_temp.columns = ['node_source', metric_variant_id] else: from coria_lib.coria_config import nx results_temp = nx.betweenness_centrality(input_dataframe_dict['graph'], normalized=False) results_temp = transform_networkx_result(results_temp, metric_variant_id) append_result_nmr(results_temp, metric_variant_id)
def connectivity_risk_classification(input_dataframe_dict, metric_variant_id): parameters_dict = get_metric_parameters(metric_variant_id) if 'threshold-low' not in parameters_dict: parameters_dict['threshold-low'] = 0.45 else: parameters_dict['threshold-low'] = float( parameters_dict['threshold-low']) if 'threshold-high' not in parameters_dict: parameters_dict['threshold-high'] = 0.55 else: parameters_dict['threshold-high'] = float( parameters_dict['threshold-high']) lcc_mask = (input_dataframe_dict['nmr-dependencies'] ['local-clustering-coefficients--corrected-and-normalised'] >= 0.25).astype(float) * 0.25 # lcc_mask now contains either 0 or 0.25. input_dataframe_dict['nmr-dependencies'][metric_variant_id] = 0 # if URS + 0.25 * MASK < LowerThreshold then -1 input_dataframe_dict['nmr-dependencies'][metric_variant_id].mask( input_dataframe_dict['nmr-dependencies']['unified-risk-score--default'] + lcc_mask < parameters_dict['threshold-low'], -1, inplace=True) # if URS - 0.25 * MASK >= HigherThreshold then 1 input_dataframe_dict['nmr-dependencies'][metric_variant_id].mask( input_dataframe_dict['nmr-dependencies']['unified-risk-score--default'] - lcc_mask >= parameters_dict['threshold-high'], 1, inplace=True) append_result_nmr(input_dataframe_dict['nmr-dependencies'], metric_variant_id, replace=True)
def shortest_path_length(input_dataframe_dict, metric_variant_id, spl_table_requested_for_export, spl_dependent_metrics): ecc_requested = 'eccentricity--default' in spl_dependent_metrics aspl_requested = 'average-shortest-path-length--default' in spl_dependent_metrics iandd_requested = 'iterated-average-neighbour-degree--default' in spl_dependent_metrics iandc_requested = 'iterated-average-neighbour-degree--corrected' in spl_dependent_metrics results_spl = [] results_nmr_from_spl = [ ] # this list collects up to 4 node metric results in tuples for each row. from time import time_ns tstamp_start = time_ns() result_column_names = ['node_source'] + spl_dependent_metrics node_degree_lookup = None if ecc_requested or aspl_requested or iandd_requested or iandc_requested: node_degree_lookup = input_dataframe_dict['nmr-dependencies'][[ 'node_source', 'node-degree--default' ]] def _shortest_path_length_inner(_df1): if ecc_requested or aspl_requested or iandd_requested or iandc_requested: nmr_row_results = [node_source] # ECC if ecc_requested: ecc = _df1['distance'].max() nmr_row_results.append(ecc) # ASPL if aspl_requested: aspl = _df1['distance'].mean() nmr_row_results.append(aspl) # IAND/Default if iandd_requested or iandc_requested: _results_temp = _df1[_df1['distance'] == 2][['vertex']] if _results_temp.empty: if iandd_requested: nmr_row_results.append(0) if iandc_requested: nmr_row_results.append(0) else: _results_temp = pd.merge(_results_temp, node_degree_lookup, how='inner', left_on='vertex', right_on='node_source', sort=False) ndeg_series = _results_temp['node-degree--default'] ndeg_mean = ndeg_series.mean() if iandd_requested: nmr_row_results.append(ndeg_mean) # IAND/Corrected if iandc_requested: iand_corrected = ndeg_mean ndeg_std = ndeg_series.std(ddof=0) ndeg_count = ndeg_series.count() ndeg_median = ndeg_series.median() if ndeg_std != 0 and ndeg_count != 0: iand_corrected = ndeg_mean + ( ((ndeg_median - ndeg_mean) / ndeg_std) / ndeg_count) * ndeg_mean nmr_row_results.append(iand_corrected) results_nmr_from_spl.append(nmr_row_results) if spl_table_requested_for_export: # filter out all redundant entries. We're expecting an indirect graph, hence dist(A, B) == dist(B, A) _df1 = _df1[_df1['vertex'] > node_source].copy() # add column _df1['node_source'] = node_source # store result results_spl.append(_df1) if USE_CUDA: # Combination of SPL, ECC, ASPL, IAND/Default and IAND/Corrected with storing the shortest paths being optional. # TODO /3 Test all metrics with a disconnected node too # Storing all shortest paths requires lots of GPU memory { O( 3 * |N| * (|N|-1) / 2) } therefore we will avoid it unless # explicitly requested by the corresponding command line parameter "-o [...]___shortestpathlength[...]" from coria_lib.coria_config import cugraph for node_source in input_dataframe_dict['graph'].nodes().values_host: # calculate all shortest paths starting from n df1 = cugraph.sssp(input_dataframe_dict['graph'], node_source) # remove unused column predecessor df1.drop('predecessor', axis=1, inplace=True) if not df1.empty: _shortest_path_length_inner(df1) else: from coria_lib.coria_config import nx from numpy import int64 # casts string to long spl = nx.shortest_path(input_dataframe_dict['graph']) for _node_source in spl: node_source = int64(_node_source) df1 = [] for _node_target in spl[_node_source]: df1.append({ 'vertex': int64(_node_target), 'distance': len(spl[_node_source][_node_target]) - 1 }) df1 = pd.DataFrame(df1) if not df1.empty: _shortest_path_length_inner(df1) # Store the node metric results if len(results_nmr_from_spl) > 0: results_temp = pd.DataFrame(results_nmr_from_spl, columns=result_column_names) append_result_nmr(results_temp, result_column_names[1:]) for variant_id in result_column_names[1:]: metric_execution_timestamps[variant_id] = (round( tstamp_start / 1e6), round(time_ns() / 1e6)) # Store the shortest path lengths if spl_table_requested_for_export: # merge all dataframes into one, resulting in |V|*(|V|-1)/2 rows results_spl = pd.concat(results_spl) # rename columns results_spl.rename(columns={'vertex': 'node_target'}, inplace=True) # reorder columns results_spl = results_spl.reindex( columns=['node_source', 'node_target', 'distance']) # reset row index results_spl.reset_index(drop=True, inplace=True) # write into input structure input_dataframe_dict['shortest-path-lengths'] = results_spl
def __average_shortest_path_length_postprocessing(results_temp, metric_variant_id): results_temp = results_temp[['node_source', metric_variant_id]] results_temp.columns = results_temp.columns.get_level_values(0) append_result_nmr(results_temp, metric_variant_id)
def iterated_average_neighbour_degree_corrected(input_dataframe_dict, metric_variant_id): results_temp = __iterated_average_neighbour_degree(input_dataframe_dict) results_temp = __average_neighbour_degree_corrected( input_dataframe_dict, metric_variant_id, results_temp) append_result_nmr(results_temp, metric_variant_id)
def local_clustering_coefficient_default(input_dataframe_dict, metric_variant_id): if USE_CUDA: edges = copy_swap_columns_and_append(input_dataframe_dict['edges'], ['node_target', 'node_source']) results_temp = pd.merge(edges, edges, how='inner', left_on='node_target', right_on='node_source', sort=False) results_temp.columns = [ 'node_source', 'node_neighbour_1hop', 'node_neighbour_1hop_copy', 'node_neighbour_2hop' ] results_temp = results_temp[( results_temp['node_source'] != results_temp['node_neighbour_1hop']) & (results_temp['node_source'] != results_temp['node_neighbour_2hop'])] results_temp = pd.merge(results_temp, edges, how='inner', left_on='node_neighbour_2hop', right_on='node_source', sort=False) results_temp.columns = [ 'node_source', 'node_neighbour_1hop', 'node_neighbour_1hop_copy', 'node_neighbour_2hop', 'node_neighbour_2hop_copy', 'node_neighbour_3hop' ] results_temp = results_temp[(results_temp['node_neighbour_1hop'] != results_temp['node_neighbour_3hop']) & (results_temp['node_source'] == results_temp['node_neighbour_3hop'])] results_temp = results_temp[['node_source', 'node_neighbour_3hop']] if results_temp['node_source'].count() == 0: results_temp = edges[['node_source']].drop_duplicates() results_temp[metric_variant_id] = 0 else: results_temp = results_temp.groupby('node_source', as_index=False).agg(['count']) results_temp.columns = results_temp.columns.get_level_values(0) results_temp = pd.merge(results_temp, input_dataframe_dict['nmr-dependencies'][[ 'node_source', 'node-degree--default' ]], how='right', on='node_source', sort=False).fillna(0) results_temp[metric_variant_id] = 0.0 df_subset_filter = results_temp['node-degree--default'] > 1 if df_subset_filter.any(): results_temp[metric_variant_id].mask( df_subset_filter, results_temp['node_neighbour_3hop'] / (results_temp['node-degree--default'] * (results_temp['node-degree--default'] - 1.0)), inplace=True) results_temp = results_temp[['node_source', metric_variant_id]] else: from coria_lib.coria_config import nx results_temp = nx.clustering(input_dataframe_dict['graph']) results_temp = transform_networkx_result(results_temp, metric_variant_id) append_result_nmr(results_temp, metric_variant_id)