def main(): parser = argparse.ArgumentParser( description="Plot semantic shift of words") parser.add_argument('-w', '--words', nargs='+', help='List of words to plot', required=True) parser.add_argument("-n", "--neighbors", type=int, default=15, help="Number of neighbors to plot", required=True) parser.add_argument( "--protocol_type", type=str, help= "Whether to run test for Reichstagsprotokolle (RT) or Bundestagsprotokolle (BRD)", required=True) parser.add_argument("--model_folder", type=str, help="Folder where word2vec models are located", required=False) args = parser.parse_args() words_to_plot = args.words n = args.neighbors if args.protocol_type == 'RT': embeddings = SequentialEmbedding.load(args.model_folder) if args.protocol_type == 'BRD': embeddings = SequentialEmbedding.load(args.model_folder) for word1 in words_to_plot: helpers.clear_figure() try: time_sims, lookups, nearests, sims = helpers.get_time_sims( embeddings, word1, topn=n) words = list(lookups.keys()) values = [lookups[word] for word in words] fitted = helpers.fit_tsne(values) if not len(fitted): print(f"Couldn't model word {word1}") continue # draw the words onto the graph cmap = helpers.get_cmap(len(time_sims)) annotations = helpers.plot_words(word1, words, fitted, cmap, sims, len(embeddings.embeds) + 1, args.protocol_type) print(f'Annotations:{annotations}') if annotations: helpers.plot_annotations(annotations) helpers.savefig(word1, args.protocol_type, n) for year, sim in time_sims.items(): print(year, sim) except KeyError: print(f'{word1} is not in the embedding space.')
import collections from sklearn.manifold import TSNE import numpy as np import matplotlib.pyplot as plt WORDS = helpers.get_words() if __name__ == "__main__": embeddings = helpers.load_embeddings() for word1 in WORDS: time_sims, lookups, nearests, sims = helpers.get_time_sims( embeddings, word1) helpers.clear_figure() # we remove word1 from our words because we just want to plot the different # related words words = filter(lambda word: word.split("|")[0] != word1, lookups.keys()) values = [lookups[word] for word in words] fitted = helpers.fit_tsne(values) if not len(fitted): print "Couldn't model word", word1 continue cmap = helpers.get_cmap(len(time_sims)) annotations = helpers.plot_words(word1, words, fitted, cmap, sims)
import collections from sklearn.manifold import TSNE import numpy as np import matplotlib.pyplot as plt WORDS = helpers.get_words() if __name__ == "__main__": embeddings = helpers.load_embeddings() for word1 in WORDS: time_sims, lookups, nearests, sims = helpers.get_time_sims(embeddings, word1) helpers.clear_figure() # we remove word1 from our words because we just want to plot the different # related words words = filter(lambda word: word.split("|")[0] != word1, lookups.keys()) values = [ lookups[word] for word in words ] fitted = helpers.fit_tsne(values) if not len(fitted): print "Couldn't model word", word1 continue cmap = helpers.get_cmap(len(time_sims)) annotations = helpers.plot_words(word1, words, fitted, cmap, sims) helpers.savefig("%s_shaded" % word1)