def make_train_test_set(graph, radius, test_proportion=.3, ratio_neg_to_pos=10): """make_train_test_set.""" pos = [(u, v) for u, v in graph.edges()] neg = [(u, v) for u, v in nx.non_edges(graph)] random.shuffle(pos) random.shuffle(neg) pos_dim = len(pos) neg_dim = len(neg) max_n_neg = min(pos_dim * ratio_neg_to_pos, neg_dim) neg = neg[:max_n_neg] neg_dim = len(neg) tr_pos = pos[:-int(pos_dim * test_proportion)] te_pos = pos[-int(pos_dim * test_proportion):] tr_neg = neg[:-int(neg_dim * test_proportion)] te_neg = neg[-int(neg_dim * test_proportion):] # remove edges tr_graph = graph.copy() tr_graph.remove_edges_from(te_pos) tr_pos_graphs = list(_make_subgraph_set(tr_graph, radius, tr_pos)) tr_neg_graphs = list(_make_subgraph_set(tr_graph, radius, tr_neg)) te_pos_graphs = list(_make_subgraph_set(tr_graph, radius, te_pos)) te_neg_graphs = list(_make_subgraph_set(tr_graph, radius, te_neg)) tr_graphs = tr_pos_graphs + tr_neg_graphs te_graphs = te_pos_graphs + te_neg_graphs tr_targets = [1] * len(tr_pos_graphs) + [0] * len(tr_neg_graphs) te_targets = [1] * len(te_pos_graphs) + [0] * len(te_neg_graphs) tr_graphs, tr_targets = paired_shuffle(tr_graphs, tr_targets) te_graphs, te_targets = paired_shuffle(te_graphs, te_targets) return (tr_graphs, np.array(tr_targets)), (te_graphs, np.array(te_targets))
def learning_curve(self, graphs, targets, cv=5, n_steps=10, start_fraction=0.1): """learning_curve.""" graphs, targets = paired_shuffle(graphs, targets) x = self.transform(graphs) train_sizes = np.linspace(start_fraction, 1.0, n_steps) scoring = 'roc_auc' train_sizes, train_scores, test_scores = learning_curve( self.model, x, targets, cv=cv, train_sizes=train_sizes, scoring=scoring, n_jobs=self.n_jobs) return train_sizes, train_scores, test_scores
def learning_curve(self, graphs, targets, cv=5, n_steps=10, start_fraction=0.1): """learning_curve.""" graphs, targets = paired_shuffle(graphs, targets) x = self.transform(graphs) train_sizes = np.linspace(start_fraction, 1.0, n_steps) scoring = 'roc_auc' train_sizes, train_scores, test_scores = learning_curve( self.model, x, targets, cv=cv, train_sizes=train_sizes, scoring=scoring) return train_sizes, train_scores, test_scores