Example #1
0
import convokit
import numpy as np
import matplotlib.pyplot as plt

print("Loading corpus")
corpus = convokit.Corpus(filename=convokit.download("reddit-corpus-small"))

print("Computing hypergraph features")
hc = convokit.HyperConvo()
hc.fit_transform(corpus)

print("Computing low-dimensional embeddings")
te = convokit.ThreadEmbedder(n_components=7)
te.fit_transform(corpus)

ce = convokit.CommunityEmbedder(community_key="subreddit", method="tsne")
ce.fit_transform(corpus)

pts = corpus.get_meta()["communityEmbedder"]["pts"]
labels = corpus.get_meta()["communityEmbedder"]["labels"]

xs, ys = zip(*pts)
plt.scatter(xs, ys)
for i, txt in enumerate(labels):
    plt.annotate(txt, (xs[i], ys[i]))
plt.savefig("tsne")
plt.show()
Example #2
0
import convokit
import numpy as np
import matplotlib.pyplot as plt

print("Loading corpus")
corpus = convokit.Corpus(filename=convokit.download("reddit-corpus"))

print("Computing hypergraph features")
hc = convokit.HyperConvo(corpus)
threads_feats = hc.retrieve_feats(prefix_len=10)
feat_names = list(sorted(threads_feats[list(threads_feats.keys())[0]].keys()))

print("Computing low-dimensional embeddings")
X_threads, roots, components = hc.embed_threads(threads_feats,
                                                return_components=True)
X_communities, subreddits = hc.embed_communities(threads_feats, "subreddit")

print("TOP THREADS")
for d in range(7):
    print("dimension {}".format(d))
    print("- most-negative threads")
    ranked = list(sorted(zip(roots, X_threads), key=lambda x: x[1][d]))
    for label, x in ranked[:10]:
        print("\t{}  {:.4f}".format(label, x[d]))
    print("- most-positive threads")
    for label, x in reversed(ranked[-10:]):
        print("\t{}  {:.4f}".format(label, x[d]))
    print()
    print()

print("TOP SUBREDDITS")
Example #3
0
import convokit
import numpy as np
import matplotlib.pyplot as plt

print("Loading corpus")
corpus = convokit.Corpus(filename=convokit.download("reddit-corpus-small"))

print("Computing hypergraph features")
hc = convokit.HyperConvo(prefix_len=10, include_root=False)
hc.fit_transform(corpus)

threads_feats = dict()
convos = corpus.iter_conversations()

for convo in convos:
    threads_feats.update(convo.meta['hyperconvo'])

feat_names = list(sorted(threads_feats[list(threads_feats.keys())[0]].keys()))

print("Computing low-dimensional embeddings")
te = convokit.ThreadEmbedder(return_components=True)
te.fit_transform(corpus)
X_threads = corpus.get_meta()["threadEmbedder"]["X"]
roots = corpus.get_meta()["threadEmbedder"]["roots"]
components = corpus.get_meta()["threadEmbedder"]["components"]

ce = convokit.CommunityEmbedder(community_key="subreddit")
ce.fit_transform(corpus)
X_communities = corpus.get_meta()["communityEmbedder"]["pts"]
subreddits = corpus.get_meta()["communityEmbedder"]["labels"]
import convokit
print(convokit)
import numpy as np

# create corpus object
corpus = convokit.Corpus(filename=convokit.download("reddit-corpus-small"))

# we typically would not need to expose make_hypergraph publicly, but we do this here
# to demonstrate Hypergraph methods

G = convokit.HyperConvo()._make_hypergraph(corpus)


def summarize_dist(name, l):
    print("{}: min {}, mean {:.4f}, max {}".format(name, min(l), np.mean(l),
                                                   max(l)))


# in- and outdegree distributions
summarize_dist("user to user indegrees", G.indegrees(True, True))
summarize_dist("user to user outdegrees", G.outdegrees(True, True))
summarize_dist("user to comment indegrees", G.indegrees(True, False))
summarize_dist("user to comment outdegrees", G.outdegrees(True, False))
summarize_dist("comment to comment indegrees", G.indegrees(False, False))
summarize_dist("comment to comment outdegrees", G.outdegrees(False, False))
print()


def summarize_motifs(name, l):
    print("{}: count {}".format(name, len(l)))