예제 #1
0
def average_neighbour_degree_corrected(input_dataframe_dict,
                                       metric_variant_id):
    results_temp = copy_swap_columns_and_append(input_dataframe_dict['edges'],
                                                ['node_target', 'node_source'])
    results_temp = __average_neighbour_degree_corrected(
        input_dataframe_dict, metric_variant_id, results_temp)
    append_result_nmr(results_temp, metric_variant_id)
예제 #2
0
def iterated_average_neighbour_degree_default(input_dataframe_dict,
                                              metric_variant_id):
    results_temp = __iterated_average_neighbour_degree(input_dataframe_dict)
    results_temp = __join_node_degree_on_target(results_temp,
                                                input_dataframe_dict)
    results_temp.columns = ['node_source', metric_variant_id]
    results_temp = __average_node_degree_fix_lost_rows(input_dataframe_dict,
                                                       results_temp)
    append_result_nmr(results_temp, metric_variant_id)
예제 #3
0
def normalise_min_max(input_dataframe_dict, metric_variant_id):
    # Min-Max normalization
    column_key_input = metric_variant_id.replace("corrected-and-normalised", "corrected").replace("normalised", "default")
    if column_key_input not in input_dataframe_dict['nmr-dependencies']:
        sys.exit('Missing data for node metric result dependency: ' + column_key_input)
    if input_dataframe_dict['nmr-dependencies'][column_key_input].max() != input_dataframe_dict['nmr-dependencies'][column_key_input].min():
        input_dataframe_dict['nmr-dependencies'][metric_variant_id] = (input_dataframe_dict['nmr-dependencies'][column_key_input] - input_dataframe_dict['nmr-dependencies'][column_key_input].min()) / (input_dataframe_dict['nmr-dependencies'][column_key_input].max() - input_dataframe_dict['nmr-dependencies'][column_key_input].min())
    else:
        input_dataframe_dict['nmr-dependencies'][metric_variant_id] = 0
    append_result_nmr(input_dataframe_dict['nmr-dependencies'], metric_variant_id, replace=True)
예제 #4
0
def local_clustering_coefficient_corrected(input_dataframe_dict,
                                           metric_variant_id):
    input_dataframe_dict['nmr-dependencies'][
        metric_variant_id] = input_dataframe_dict['nmr-dependencies'][
            'local-clustering-coefficients--default'] + input_dataframe_dict[
                'nmr-dependencies'][
                    'local-clustering-coefficients--default'] * input_dataframe_dict[
                        'nmr-dependencies']['node-degree--default'] / 4
    append_result_nmr(input_dataframe_dict['nmr-dependencies'],
                      metric_variant_id,
                      replace=True)
예제 #5
0
def node_degree(input_dataframe_dict, metric_variant_id):
    if USE_CUDA:
        results_temp = input_dataframe_dict['graph'].out_degree()[[
            'vertex', 'degree'
        ]]
        results_temp.columns = ['node_source', metric_variant_id]
    else:
        results_temp = dict(input_dataframe_dict['graph'].degree())
        results_temp = transform_networkx_result(results_temp,
                                                 metric_variant_id)
    append_result_nmr(results_temp, metric_variant_id)
예제 #6
0
def unified_risk_score(input_dataframe_dict, metric_variant_id):
    input_dataframe_dict['nmr-dependencies'][metric_variant_id] = \
        0.25 * input_dataframe_dict['nmr-dependencies']['node-degree--normalised'] + \
        0.15 * input_dataframe_dict['nmr-dependencies']['average-neighbour-degree--corrected-and-normalised'] + \
        0.1 * input_dataframe_dict['nmr-dependencies']['iterated-average-neighbour-degree--corrected-and-normalised'] + \
        0.25 * input_dataframe_dict['nmr-dependencies']['betweenness-centrality--normalised'] + \
        0.125 * input_dataframe_dict['nmr-dependencies']['eccentricity--normalised'] + \
        0.125 * input_dataframe_dict['nmr-dependencies']['average-shortest-path-length--normalised']

    append_result_nmr(input_dataframe_dict['nmr-dependencies'],
                      metric_variant_id,
                      replace=True)
예제 #7
0
def average_neighbour_degree_default(input_dataframe_dict, metric_variant_id):
    if USE_CUDA:
        results_temp = copy_swap_columns_and_append(
            input_dataframe_dict['edges'], ['node_target', 'node_source'])
        results_temp = __join_node_degree_on_target(results_temp,
                                                    input_dataframe_dict)
        results_temp.columns = ['node_source', metric_variant_id]
    else:
        from coria_lib.coria_config import nx
        results_temp = nx.average_neighbor_degree(
            input_dataframe_dict['graph'])
        results_temp = transform_networkx_result(results_temp,
                                                 metric_variant_id)
    append_result_nmr(results_temp, metric_variant_id)
예제 #8
0
def betweenness_centrality(input_dataframe_dict, metric_variant_id):
    if USE_CUDA:
        from coria_lib.coria_config import cugraph
        results_temp = cugraph.betweenness_centrality(
            input_dataframe_dict['graph'], normalized=False)
        results_temp = results_temp[['vertex', 'betweenness_centrality']]
        results_temp.columns = ['node_source', metric_variant_id]
    else:
        from coria_lib.coria_config import nx
        results_temp = nx.betweenness_centrality(input_dataframe_dict['graph'],
                                                 normalized=False)
        results_temp = transform_networkx_result(results_temp,
                                                 metric_variant_id)
    append_result_nmr(results_temp, metric_variant_id)
예제 #9
0
def connectivity_risk_classification(input_dataframe_dict, metric_variant_id):
    parameters_dict = get_metric_parameters(metric_variant_id)
    if 'threshold-low' not in parameters_dict:
        parameters_dict['threshold-low'] = 0.45
    else:
        parameters_dict['threshold-low'] = float(
            parameters_dict['threshold-low'])

    if 'threshold-high' not in parameters_dict:
        parameters_dict['threshold-high'] = 0.55
    else:
        parameters_dict['threshold-high'] = float(
            parameters_dict['threshold-high'])

    lcc_mask = (input_dataframe_dict['nmr-dependencies']
                ['local-clustering-coefficients--corrected-and-normalised'] >=
                0.25).astype(float) * 0.25
    # lcc_mask now contains either 0 or 0.25.

    input_dataframe_dict['nmr-dependencies'][metric_variant_id] = 0

    # if URS + 0.25 * MASK < LowerThreshold then -1
    input_dataframe_dict['nmr-dependencies'][metric_variant_id].mask(
        input_dataframe_dict['nmr-dependencies']['unified-risk-score--default']
        + lcc_mask < parameters_dict['threshold-low'],
        -1,
        inplace=True)

    # if URS - 0.25 * MASK >= HigherThreshold then 1
    input_dataframe_dict['nmr-dependencies'][metric_variant_id].mask(
        input_dataframe_dict['nmr-dependencies']['unified-risk-score--default']
        - lcc_mask >= parameters_dict['threshold-high'],
        1,
        inplace=True)

    append_result_nmr(input_dataframe_dict['nmr-dependencies'],
                      metric_variant_id,
                      replace=True)
예제 #10
0
def shortest_path_length(input_dataframe_dict, metric_variant_id,
                         spl_table_requested_for_export,
                         spl_dependent_metrics):
    ecc_requested = 'eccentricity--default' in spl_dependent_metrics
    aspl_requested = 'average-shortest-path-length--default' in spl_dependent_metrics
    iandd_requested = 'iterated-average-neighbour-degree--default' in spl_dependent_metrics
    iandc_requested = 'iterated-average-neighbour-degree--corrected' in spl_dependent_metrics
    results_spl = []
    results_nmr_from_spl = [
    ]  # this list collects up to 4 node metric results in tuples for each row.

    from time import time_ns
    tstamp_start = time_ns()

    result_column_names = ['node_source'] + spl_dependent_metrics
    node_degree_lookup = None

    if ecc_requested or aspl_requested or iandd_requested or iandc_requested:
        node_degree_lookup = input_dataframe_dict['nmr-dependencies'][[
            'node_source', 'node-degree--default'
        ]]

    def _shortest_path_length_inner(_df1):
        if ecc_requested or aspl_requested or iandd_requested or iandc_requested:
            nmr_row_results = [node_source]

            # ECC
            if ecc_requested:
                ecc = _df1['distance'].max()
                nmr_row_results.append(ecc)

            # ASPL
            if aspl_requested:
                aspl = _df1['distance'].mean()
                nmr_row_results.append(aspl)

            # IAND/Default
            if iandd_requested or iandc_requested:
                _results_temp = _df1[_df1['distance'] == 2][['vertex']]
                if _results_temp.empty:
                    if iandd_requested:
                        nmr_row_results.append(0)
                    if iandc_requested:
                        nmr_row_results.append(0)
                else:
                    _results_temp = pd.merge(_results_temp,
                                             node_degree_lookup,
                                             how='inner',
                                             left_on='vertex',
                                             right_on='node_source',
                                             sort=False)
                    ndeg_series = _results_temp['node-degree--default']
                    ndeg_mean = ndeg_series.mean()
                    if iandd_requested:
                        nmr_row_results.append(ndeg_mean)

                    # IAND/Corrected
                    if iandc_requested:
                        iand_corrected = ndeg_mean
                        ndeg_std = ndeg_series.std(ddof=0)
                        ndeg_count = ndeg_series.count()
                        ndeg_median = ndeg_series.median()
                        if ndeg_std != 0 and ndeg_count != 0:
                            iand_corrected = ndeg_mean + (
                                ((ndeg_median - ndeg_mean) / ndeg_std) /
                                ndeg_count) * ndeg_mean
                        nmr_row_results.append(iand_corrected)

            results_nmr_from_spl.append(nmr_row_results)

        if spl_table_requested_for_export:
            # filter out all redundant entries. We're expecting an indirect graph, hence dist(A, B) == dist(B, A)
            _df1 = _df1[_df1['vertex'] > node_source].copy()

            # add column
            _df1['node_source'] = node_source

            # store result
            results_spl.append(_df1)

    if USE_CUDA:
        # Combination of SPL, ECC, ASPL, IAND/Default and IAND/Corrected with storing the shortest paths being optional.
        # TODO /3 Test all metrics with a disconnected node too
        # Storing all shortest paths requires lots of GPU memory { O( 3 * |N| * (|N|-1) / 2) } therefore we will avoid it unless
        # explicitly requested by the corresponding command line parameter "-o [...]___shortestpathlength[...]"
        from coria_lib.coria_config import cugraph

        for node_source in input_dataframe_dict['graph'].nodes().values_host:
            # calculate all shortest paths starting from n
            df1 = cugraph.sssp(input_dataframe_dict['graph'], node_source)

            # remove unused column predecessor
            df1.drop('predecessor', axis=1, inplace=True)

            if not df1.empty:
                _shortest_path_length_inner(df1)

    else:
        from coria_lib.coria_config import nx
        from numpy import int64  # casts string to long

        spl = nx.shortest_path(input_dataframe_dict['graph'])
        for _node_source in spl:
            node_source = int64(_node_source)
            df1 = []
            for _node_target in spl[_node_source]:
                df1.append({
                    'vertex': int64(_node_target),
                    'distance': len(spl[_node_source][_node_target]) - 1
                })
            df1 = pd.DataFrame(df1)
            if not df1.empty:
                _shortest_path_length_inner(df1)

                # Store the node metric results
    if len(results_nmr_from_spl) > 0:
        results_temp = pd.DataFrame(results_nmr_from_spl,
                                    columns=result_column_names)
        append_result_nmr(results_temp, result_column_names[1:])
        for variant_id in result_column_names[1:]:
            metric_execution_timestamps[variant_id] = (round(
                tstamp_start / 1e6), round(time_ns() / 1e6))

    # Store the shortest path lengths
    if spl_table_requested_for_export:
        # merge all dataframes into one, resulting in |V|*(|V|-1)/2 rows
        results_spl = pd.concat(results_spl)

        # rename columns
        results_spl.rename(columns={'vertex': 'node_target'}, inplace=True)

        # reorder columns
        results_spl = results_spl.reindex(
            columns=['node_source', 'node_target', 'distance'])

        # reset row index
        results_spl.reset_index(drop=True, inplace=True)

        # write into input structure
        input_dataframe_dict['shortest-path-lengths'] = results_spl
예제 #11
0
def __average_shortest_path_length_postprocessing(results_temp,
                                                  metric_variant_id):
    results_temp = results_temp[['node_source', metric_variant_id]]
    results_temp.columns = results_temp.columns.get_level_values(0)
    append_result_nmr(results_temp, metric_variant_id)
예제 #12
0
def iterated_average_neighbour_degree_corrected(input_dataframe_dict,
                                                metric_variant_id):
    results_temp = __iterated_average_neighbour_degree(input_dataframe_dict)
    results_temp = __average_neighbour_degree_corrected(
        input_dataframe_dict, metric_variant_id, results_temp)
    append_result_nmr(results_temp, metric_variant_id)
예제 #13
0
def local_clustering_coefficient_default(input_dataframe_dict,
                                         metric_variant_id):
    if USE_CUDA:
        edges = copy_swap_columns_and_append(input_dataframe_dict['edges'],
                                             ['node_target', 'node_source'])
        results_temp = pd.merge(edges,
                                edges,
                                how='inner',
                                left_on='node_target',
                                right_on='node_source',
                                sort=False)
        results_temp.columns = [
            'node_source', 'node_neighbour_1hop', 'node_neighbour_1hop_copy',
            'node_neighbour_2hop'
        ]
        results_temp = results_temp[(
            results_temp['node_source'] != results_temp['node_neighbour_1hop'])
                                    & (results_temp['node_source'] !=
                                       results_temp['node_neighbour_2hop'])]
        results_temp = pd.merge(results_temp,
                                edges,
                                how='inner',
                                left_on='node_neighbour_2hop',
                                right_on='node_source',
                                sort=False)
        results_temp.columns = [
            'node_source', 'node_neighbour_1hop', 'node_neighbour_1hop_copy',
            'node_neighbour_2hop', 'node_neighbour_2hop_copy',
            'node_neighbour_3hop'
        ]

        results_temp = results_temp[(results_temp['node_neighbour_1hop'] !=
                                     results_temp['node_neighbour_3hop'])
                                    & (results_temp['node_source'] ==
                                       results_temp['node_neighbour_3hop'])]
        results_temp = results_temp[['node_source', 'node_neighbour_3hop']]
        if results_temp['node_source'].count() == 0:
            results_temp = edges[['node_source']].drop_duplicates()
            results_temp[metric_variant_id] = 0
        else:
            results_temp = results_temp.groupby('node_source',
                                                as_index=False).agg(['count'])
            results_temp.columns = results_temp.columns.get_level_values(0)
            results_temp = pd.merge(results_temp,
                                    input_dataframe_dict['nmr-dependencies'][[
                                        'node_source', 'node-degree--default'
                                    ]],
                                    how='right',
                                    on='node_source',
                                    sort=False).fillna(0)
            results_temp[metric_variant_id] = 0.0
            df_subset_filter = results_temp['node-degree--default'] > 1
            if df_subset_filter.any():
                results_temp[metric_variant_id].mask(
                    df_subset_filter,
                    results_temp['node_neighbour_3hop'] /
                    (results_temp['node-degree--default'] *
                     (results_temp['node-degree--default'] - 1.0)),
                    inplace=True)
            results_temp = results_temp[['node_source', metric_variant_id]]
    else:
        from coria_lib.coria_config import nx
        results_temp = nx.clustering(input_dataframe_dict['graph'])
        results_temp = transform_networkx_result(results_temp,
                                                 metric_variant_id)
    append_result_nmr(results_temp, metric_variant_id)