예제 #1
0
for idx, row in schedule.iterrows():
    winner = row["Winner"]
    loser = row["Loser"]
    adjacency_df.loc[winner, loser] += 1

remove_winless = False
n_wins = adjacency_df.sum(axis=1)
if remove_winless:
    teams = teams[n_wins > 0]
    n_wins = adjacency_df.sum(axis=1)
adjacency_df = adjacency_df.reindex(index=teams, columns=teams)

n_teams = len(teams)

ax, _ = adjplot(adjacency_df.values,
                plot_type="scattermap",
                sizes=(10, 10),
                marker="s")
ax.set_title("2020 NCAA Footbal Season Graph", fontsize=25)
ax.set(ylabel="Winning team")
ax.set(xlabel="Losing team")
plt.savefig(output_path / "unsorted_adjacency.png", **savefig_kws)

print(f"Number of teams {n_teams}")

#%% [markdown]
# ## Matching to a flat upper triangular matrix
# Under a given sorting (permutation) of the adjacency matrix, any game (edge) that is
# an upset will fall in the lower triangle, because a lower-ranked team beat a higher-ranked
# team. We can therefore create a ranking by graph matching the adjacency matrix to a
# flat upper triangular matrix, thereby inducing a sorting/ranking that minimizes the
# number of upsets.
예제 #2
0
for node in anytree.PreOrderIter(mt):
    node.probability_estimate_ = probs[node.name]

mat = mt.full_probability_matrix.values

from graspologic.simulations import sample_edges

adj = sample_edges(mat, directed=False, loops=False)

#%%
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
from graspologic.simulations import er_np

# adj = er_np(sum(n_per_leaf), 0.1)

_, divider = adjplot(mat, plot_type="heatmap", ax=ax)

top_ax = divider.append_axes("top", size="40%", pad=0, sharex=ax)
plot_dendrogram(
    top_ax,
    mt,
    index_key="adjacency_index",
    orientation="v",
    linewidth=2,
    markersize=25,
    linecolor="grey",
    markercolor="white",
)

left_ax = divider.append_axes("left", size="40%", pad=0, sharey=ax)
plot_dendrogram(
예제 #3
0
    ax.axis("off")


#%% [markdown]
# ## Plot the network as an adjacency matrix
#%%
node_data = lt.node_data
node_data.sort_values(["labels_0", "labels_1"], inplace=True)
node_data["sorted_adjacency_index"] = range(len(node_data))
sorted_adjacency = adjacency[
    np.ix_(node_data["adjacency_index"], node_data["adjacency_index"])
]
fig, ax = plt.subplots(1, 1, figsize=(16, 16))
ax, divider = adjplot(
    sorted_adjacency, plot_type="scattermap", sizes=(0.01, 0.01), ax=ax
)
left_ax = divider.append_axes("left", size="10%", pad=0, sharey=ax)
plot_dendrogram(left_ax, lt, orientation="h")

top_ax = divider.append_axes("top", size="10%", pad=0, sharex=ax)
plot_dendrogram(top_ax, lt, orientation="v")

stashfig("hleiden-adjplot")

#%% [markdown]
# ## Create a flat set of labels (leafs of the hierarchical clustering tree) for plotting
#%%
node_data = lt.node_data.copy()
node_data = node_data.set_index(["labels_0", "labels_1"])
flat_labels = node_data.index.to_flat_index()
예제 #4
0
    lt = LeidenTree(trials=5, verbose=False, max_levels=2)
    lt.fit(adjacency)
    lt.estimate_parameters(adjacency)
    probability_matrix += lt.full_probability_matrix.values / n_trials

np.unique(probability_matrix)

#%%

node_data = lt.node_data
node_data.sort_values(["labels_0", "labels_1"], inplace=True)
node_data["sorted_adjacency_index"] = range(len(node_data))
sorted_adjacency = adjacency[np.ix_(node_data["adjacency_index"],
                                    node_data["adjacency_index"])]
fig, ax = plt.subplots(1, 1, figsize=(16, 16))
ax, divider = adjplot(sorted_adjacency, plot_type="heatmap", ax=ax)
left_ax = divider.append_axes("left", size="10%", pad=0, sharey=ax)
plot_dendrogram(left_ax, lt, orientation="h")

top_ax = divider.append_axes("top", size="10%", pad=0, sharex=ax)
plot_dendrogram(top_ax, lt, orientation="v")

#%%
adjacency, labels = sbm(ns, B, return_labels=True)
lt = LeidenTree(trials=5, verbose=False, max_levels=2)
lt.fit(adjacency)
lt.estimate_parameters(adjacency)

#%%
A = np.arange(8 * 8).reshape((8, 8))
예제 #5
0
n_games = adj_matched.sum()

print(f"Number of games: {n_games}")
print(f"Number of non-upsets (graph matching score): {gm.score_}")
print(f"Number of upsets: {upsets}")
print(f"Upset ratio: {upsets/n_games}")

print()
print("Ranking:")
print(teams[perm_inds])

#%% [markdown]
# ## Plotting the matched (ranked) graph
#%%
ax, _ = adjplot(adj_matched,
                plot_type="scattermap",
                sizes=(10, 10),
                marker="s")
ax.plot([0, n_teams], [0, n_teams], linewidth=1, color="black", linestyle="-")
ylabel = r"$\leftarrow$ Ranked low         "
ylabel += "Winning team           "
ylabel += r"Ranked high $\rightarrow$"
ax.set_ylabel(ylabel, fontsize="large")
ax.set(xlabel="Losing team")
ax.set_title("2020 NCAA Footbal Season Graph", fontsize=25)
ax.fill_between(
    [0, n_teams],
    [0, n_teams],
    [n_teams, n_teams],
    zorder=-1,
    alpha=0.4,
    color="lightgrey",
예제 #6
0
            ax.axhline(cut - 1, linewidth=1, color="grey", linestyle=":")

    ax.axis("off")


#%% [markdown]
# ## Plot the network as an adjacency matrix
#%%
node_data = lt.node_data
node_data.sort_values(["labels_0", "labels_1"], inplace=True)
node_data["sorted_adjacency_index"] = range(len(node_data))
sorted_adjacency = adjacency[
    np.ix_(node_data["adjacency_index"], node_data["adjacency_index"])
]
fig, ax = plt.subplots(1, 1, figsize=(16, 16))
ax, divider = adjplot(sorted_adjacency, plot_type="scattermap", sizes=(0.1, 0.1), ax=ax)
left_ax = divider.append_axes("left", size="10%", pad=0, sharey=ax)
plot_dendrogram(left_ax, lt, orientation="h")

top_ax = divider.append_axes("top", size="10%", pad=0, sharex=ax)
plot_dendrogram(top_ax, lt, orientation="v")

stashfig("hleiden-adjplot")

#%% [markdown]
# ## Create a flat set of labels (leafs of the hierarchical clustering tree) for plotting
#%%
node_data = lt.node_data.copy()
node_data = node_data.set_index(["labels_0", "labels_1"])
flat_labels = node_data.index.to_flat_index()
node_data["labels_flat"] = flat_labels