def main():
    parser = argparse.ArgumentParser(
        description="Plot semantic shift of words")
    parser.add_argument('-w',
                        '--words',
                        nargs='+',
                        help='List of words to plot',
                        required=True)
    parser.add_argument("-n",
                        "--neighbors",
                        type=int,
                        default=15,
                        help="Number of neighbors to plot",
                        required=True)
    parser.add_argument(
        "--protocol_type",
        type=str,
        help=
        "Whether to run test for Reichstagsprotokolle (RT) or Bundestagsprotokolle (BRD)",
        required=True)
    parser.add_argument("--model_folder",
                        type=str,
                        help="Folder where word2vec models are located",
                        required=False)

    args = parser.parse_args()
    words_to_plot = args.words
    n = args.neighbors

    if args.protocol_type == 'RT':
        embeddings = SequentialEmbedding.load(args.model_folder)

    if args.protocol_type == 'BRD':
        embeddings = SequentialEmbedding.load(args.model_folder)

    for word1 in words_to_plot:
        helpers.clear_figure()
        try:
            time_sims, lookups, nearests, sims = helpers.get_time_sims(
                embeddings, word1, topn=n)

            words = list(lookups.keys())
            values = [lookups[word] for word in words]
            fitted = helpers.fit_tsne(values)
            if not len(fitted):
                print(f"Couldn't model word {word1}")
                continue

            # draw the words onto the graph
            cmap = helpers.get_cmap(len(time_sims))
            annotations = helpers.plot_words(word1, words, fitted, cmap, sims,
                                             len(embeddings.embeds) + 1,
                                             args.protocol_type)
            print(f'Annotations:{annotations}')

            if annotations:
                helpers.plot_annotations(annotations)

            helpers.savefig(word1, args.protocol_type, n)
            for year, sim in time_sims.items():
                print(year, sim)
        except KeyError:
            print(f'{word1} is not in the embedding space.')
Esempio n. 2
0
import collections
from sklearn.manifold import TSNE

import numpy as np
import matplotlib.pyplot as plt

WORDS = helpers.get_words()
if __name__ == "__main__":
    embeddings = helpers.load_embeddings()

    for word1 in WORDS:
        time_sims, lookups, nearests, sims = helpers.get_time_sims(
            embeddings, word1)

        helpers.clear_figure()

        # we remove word1 from our words because we just want to plot the different
        # related words
        words = filter(lambda word: word.split("|")[0] != word1,
                       lookups.keys())

        values = [lookups[word] for word in words]
        fitted = helpers.fit_tsne(values)
        if not len(fitted):
            print "Couldn't model word", word1
            continue

        cmap = helpers.get_cmap(len(time_sims))
        annotations = helpers.plot_words(word1, words, fitted, cmap, sims)
import collections
from sklearn.manifold import TSNE


import numpy as np
import matplotlib.pyplot as plt

WORDS = helpers.get_words()
if __name__ == "__main__":
    embeddings = helpers.load_embeddings()

    for word1 in WORDS:
        time_sims, lookups, nearests, sims = helpers.get_time_sims(embeddings, word1)

        helpers.clear_figure()

        # we remove word1 from our words because we just want to plot the different
        # related words
        words = filter(lambda word: word.split("|")[0] != word1, lookups.keys())

        values = [ lookups[word] for word in words ]
        fitted = helpers.fit_tsne(values)
        if not len(fitted):
            print "Couldn't model word", word1
            continue

        cmap = helpers.get_cmap(len(time_sims))
        annotations = helpers.plot_words(word1, words, fitted, cmap, sims)

        helpers.savefig("%s_shaded" % word1)