for idx, row in schedule.iterrows(): winner = row["Winner"] loser = row["Loser"] adjacency_df.loc[winner, loser] += 1 remove_winless = False n_wins = adjacency_df.sum(axis=1) if remove_winless: teams = teams[n_wins > 0] n_wins = adjacency_df.sum(axis=1) adjacency_df = adjacency_df.reindex(index=teams, columns=teams) n_teams = len(teams) ax, _ = adjplot(adjacency_df.values, plot_type="scattermap", sizes=(10, 10), marker="s") ax.set_title("2020 NCAA Footbal Season Graph", fontsize=25) ax.set(ylabel="Winning team") ax.set(xlabel="Losing team") plt.savefig(output_path / "unsorted_adjacency.png", **savefig_kws) print(f"Number of teams {n_teams}") #%% [markdown] # ## Matching to a flat upper triangular matrix # Under a given sorting (permutation) of the adjacency matrix, any game (edge) that is # an upset will fall in the lower triangle, because a lower-ranked team beat a higher-ranked # team. We can therefore create a ranking by graph matching the adjacency matrix to a # flat upper triangular matrix, thereby inducing a sorting/ranking that minimizes the # number of upsets.
for node in anytree.PreOrderIter(mt): node.probability_estimate_ = probs[node.name] mat = mt.full_probability_matrix.values from graspologic.simulations import sample_edges adj = sample_edges(mat, directed=False, loops=False) #%% fig, ax = plt.subplots(1, 1, figsize=(10, 10)) from graspologic.simulations import er_np # adj = er_np(sum(n_per_leaf), 0.1) _, divider = adjplot(mat, plot_type="heatmap", ax=ax) top_ax = divider.append_axes("top", size="40%", pad=0, sharex=ax) plot_dendrogram( top_ax, mt, index_key="adjacency_index", orientation="v", linewidth=2, markersize=25, linecolor="grey", markercolor="white", ) left_ax = divider.append_axes("left", size="40%", pad=0, sharey=ax) plot_dendrogram(
ax.axis("off") #%% [markdown] # ## Plot the network as an adjacency matrix #%% node_data = lt.node_data node_data.sort_values(["labels_0", "labels_1"], inplace=True) node_data["sorted_adjacency_index"] = range(len(node_data)) sorted_adjacency = adjacency[ np.ix_(node_data["adjacency_index"], node_data["adjacency_index"]) ] fig, ax = plt.subplots(1, 1, figsize=(16, 16)) ax, divider = adjplot( sorted_adjacency, plot_type="scattermap", sizes=(0.01, 0.01), ax=ax ) left_ax = divider.append_axes("left", size="10%", pad=0, sharey=ax) plot_dendrogram(left_ax, lt, orientation="h") top_ax = divider.append_axes("top", size="10%", pad=0, sharex=ax) plot_dendrogram(top_ax, lt, orientation="v") stashfig("hleiden-adjplot") #%% [markdown] # ## Create a flat set of labels (leafs of the hierarchical clustering tree) for plotting #%% node_data = lt.node_data.copy() node_data = node_data.set_index(["labels_0", "labels_1"]) flat_labels = node_data.index.to_flat_index()
lt = LeidenTree(trials=5, verbose=False, max_levels=2) lt.fit(adjacency) lt.estimate_parameters(adjacency) probability_matrix += lt.full_probability_matrix.values / n_trials np.unique(probability_matrix) #%% node_data = lt.node_data node_data.sort_values(["labels_0", "labels_1"], inplace=True) node_data["sorted_adjacency_index"] = range(len(node_data)) sorted_adjacency = adjacency[np.ix_(node_data["adjacency_index"], node_data["adjacency_index"])] fig, ax = plt.subplots(1, 1, figsize=(16, 16)) ax, divider = adjplot(sorted_adjacency, plot_type="heatmap", ax=ax) left_ax = divider.append_axes("left", size="10%", pad=0, sharey=ax) plot_dendrogram(left_ax, lt, orientation="h") top_ax = divider.append_axes("top", size="10%", pad=0, sharex=ax) plot_dendrogram(top_ax, lt, orientation="v") #%% adjacency, labels = sbm(ns, B, return_labels=True) lt = LeidenTree(trials=5, verbose=False, max_levels=2) lt.fit(adjacency) lt.estimate_parameters(adjacency) #%% A = np.arange(8 * 8).reshape((8, 8))
n_games = adj_matched.sum() print(f"Number of games: {n_games}") print(f"Number of non-upsets (graph matching score): {gm.score_}") print(f"Number of upsets: {upsets}") print(f"Upset ratio: {upsets/n_games}") print() print("Ranking:") print(teams[perm_inds]) #%% [markdown] # ## Plotting the matched (ranked) graph #%% ax, _ = adjplot(adj_matched, plot_type="scattermap", sizes=(10, 10), marker="s") ax.plot([0, n_teams], [0, n_teams], linewidth=1, color="black", linestyle="-") ylabel = r"$\leftarrow$ Ranked low " ylabel += "Winning team " ylabel += r"Ranked high $\rightarrow$" ax.set_ylabel(ylabel, fontsize="large") ax.set(xlabel="Losing team") ax.set_title("2020 NCAA Footbal Season Graph", fontsize=25) ax.fill_between( [0, n_teams], [0, n_teams], [n_teams, n_teams], zorder=-1, alpha=0.4, color="lightgrey",
ax.axhline(cut - 1, linewidth=1, color="grey", linestyle=":") ax.axis("off") #%% [markdown] # ## Plot the network as an adjacency matrix #%% node_data = lt.node_data node_data.sort_values(["labels_0", "labels_1"], inplace=True) node_data["sorted_adjacency_index"] = range(len(node_data)) sorted_adjacency = adjacency[ np.ix_(node_data["adjacency_index"], node_data["adjacency_index"]) ] fig, ax = plt.subplots(1, 1, figsize=(16, 16)) ax, divider = adjplot(sorted_adjacency, plot_type="scattermap", sizes=(0.1, 0.1), ax=ax) left_ax = divider.append_axes("left", size="10%", pad=0, sharey=ax) plot_dendrogram(left_ax, lt, orientation="h") top_ax = divider.append_axes("top", size="10%", pad=0, sharex=ax) plot_dendrogram(top_ax, lt, orientation="v") stashfig("hleiden-adjplot") #%% [markdown] # ## Create a flat set of labels (leafs of the hierarchical clustering tree) for plotting #%% node_data = lt.node_data.copy() node_data = node_data.set_index(["labels_0", "labels_1"]) flat_labels = node_data.index.to_flat_index() node_data["labels_flat"] = flat_labels