def print_results(row_labels, prediction, labels, graph: gc.Graph): print("connected") for i in range(len(labels)): if labels[i] == 1: print(row_labels[i], "Predicted: ", prediction[i], "actual:", labels[i], "correct:", prediction[i] == labels[i], "Degree of node:", graph.degree(row_labels[i])) print("not connected") for i in range(len(labels)): if labels[i] == 0: print(row_labels[i], "Predicted: ", prediction[i], "actual:", labels[i], "correct:", prediction[i] == labels[i], "Degree of node:", graph.degree(row_labels[i]))
def test_per_node(nodes_to_train_on: List[int], graph: gc.Graph, save_info: sl.MemoryAccess, feature_type: ft.FeatureType, num_of_bins: int, limit_num_training_graphs: Optional[int], sampling_strategy: Optional, c, removed_node: int): if nodes_to_train_on is not None: tr_node_list = nodes_to_train_on[removed_node] else: tr_node_list = None raise ValueError( "Training node list is not given, should be given though") train_data = save_info.load_list_of_training_data( removed_node=removed_node, graph=graph.delete_node(removed_node), feature_type=feature_type, num_of_bins=num_of_bins, limit_num=limit_num_training_graphs, tr_node_list=tr_node_list) utils.assert_df_no_nan( train_data, text=f'Training data for removed node {removed_node}') test_data = save_info.load_test_data(removed_node=removed_node, feature_type=feature_type, num_of_bins=num_of_bins) utils.assert_df_no_nan(test_data, text=f'Test data for removed node {removed_node}') tr_labels, tr_predicted, tr_probabilities, te_labels, te_predicted, te_probabilities = \ _train(c, train_data=train_data, test_data=test_data, sampling_strategy=sampling_strategy) # train_results, test_results = evaluate(tr_labels, tr_predicted, te_labels, te_predicted, te_probabilities) train_results = evaluate(tr_labels, tr_predicted, tr_probabilities) test_results = evaluate(te_labels, te_predicted, te_probabilities) # add some additional information test_results["degree"] = graph.degree(removed_node) test_results["avg_neighbour_degree"] = graph.average_neighbour_degree( removed_node) test_results["avg dist to pos pred"] = \ calculate_avg_distance_to_positive_predicted_nodes(graph=graph, removed_node=removed_node, labels=test_data.index.values, predicted=te_predicted) test_results["num training features"] = len(train_data) test_results["num test features"] = len(test_data) test_results["train false negative"] = train_results["false negative"] test_results["train true positive"] = train_results["true positive"] test_results["train accuracy"] = train_results["accuracy"] test_results["train precision"] = train_results["precision"] test_results["train recall"] = train_results["recall"] test_results["train auc"] = train_results["auc"] return pd.Series(test_results), removed_node
def compute_degrees(graph: gc.Graph, labels: [int]): degrees = pd.DataFrame(0, labels, ["degree"]) for label in labels: degrees.loc[label] = graph.degree(label) # normalise degrees min_max_scaler = preprocessing.MinMaxScaler() x_scaled = min_max_scaler.fit_transform(degrees.astype(float)) return pd.DataFrame(x_scaled, index=labels, columns=["degree"])
def __create_degree_column_for_feature(graph: gc.Graph, row_labels: [int]): degrees = pd.DataFrame(0, row_labels, ["degree"]) for label in row_labels: degrees.loc[label] = graph.degree(label) # normalise degrees # min_max_scaler = preprocessing.MinMaxScaler() # x_scaled = min_max_scaler.fit_transform(degrees) degrees = degrees / max(degrees.values) return pd.DataFrame(degrees, index=row_labels, columns=["degree"])
def get_sample_with_degree(graph: gc.Graph, node_list: [int], degree: int, quantity: int): degrees = np.array([graph.degree(n) for n in node_list]) samples_to_find = quantity candidates = np.where(degrees == degree)[0] if len(candidates) > samples_to_find: np.random.choice(candidates, size=samples_to_find, replace=False) elif len(candidates) < samples_to_find: raise ValueError(f'Not enough training samples required {quantity} got {len(candidates)}') sample = candidates samples_to_find -= len(candidates) offset = 1 while samples_to_find > 0: # print(f"Sampling range {offset}") candidates = np.concatenate([np.where(degrees == (degree + offset))[0], np.where(degrees == (degree - offset))[0]]) if len(candidates) > samples_to_find: candidates = np.random.choice(candidates, size=samples_to_find, replace=False) sample = np.concatenate([sample, candidates]) samples_to_find -= len(candidates) offset += 1 return [node_list[s] for s in sample]
def _get_avg_degree_of_neighbours(graph: gc.Graph, node: int): neighbours_mean = (list( map(lambda n: graph.degree(n), list(graph.neighbours(node))))) return sum(neighbours_mean) / len(neighbours_mean)