def main_freq():

    logging.info("Loading dataset")
    dataset = load_dataset("ag_news")
    dataset_text = [r['text'] for r in dataset['train']]
    dataset_labels = [r['label'] for r in dataset['train']]

    logging.info("Building vocabulary")
    vocab = Vocabulary(dataset_text)
    vocab.make_vocab_charts()
    plt.close()
    plt.pause(0.01)

    logging.info("Computing PPMI matrix")
    PPMI = compute_ppmi_matrix([doc['text'] for doc in dataset['train']],
                               vocab)

    logging.info("Performing Truncated SVD to reduce dimensionality")
    word_vectors = dim_reduce(PPMI)

    logging.info("Preparing T-SNE plot")
    plot_word_vectors_tsne(word_vectors, vocab)
def main_freq():
    logging.info("Loading dataset")
    dataset = load_dataset("ag_news")
    dataset_text = [r['text'] for r in dataset['train']]
    dataset_labels = [r['label'] for r in dataset['train']]

    logging.info("Building vocabulary")
    vocab = Vocabulary(dataset_text)
    # vocab = Vocabulary(dataset_text[:5])
    # vocab = Vocabulary(['With Funding From Jeff Bezos,\n MethaneSAT Picks Elon Musk\'s SpaceX for 2022 Launch.',
    #                     'We couldn\'t ask for a more capable launch partner.'])
    vocab.make_vocab_charts()
    plt.close()
    # plt.pause(0.01)

    logging.info("Computing PPMI matrix")
    PPMI = compute_ppmi_matrix([doc['text'] for doc in dataset['train']], vocab)

    logging.info("Performing Truncated SVD to reduce dimensionality")
    word_vectors = dim_reduce(PPMI)

    logging.info("Preparing T-SNE plot")
    plot_word_vectors_tsne(word_vectors, vocab)