Exemple #1
0
def displayTopics():
    dictionary = corpora.Dictionary.load(dictionary_path)
    corpus = corpora.BleiCorpus(corpus_path)
    lda = LdaMulticore.load(lda_model_path)
    i = 0
    for topic in lda.show_topics(lda_num_topics):
        print 'Topic #' + str(i) + ': ' + str(topic)
        i += 1
Exemple #2
0
def ldaplot():
    model = LdaMulticore.load('lda.model')
    with open('bow_dict.pk', 'rb') as f:
        bow, dict = pickle.load(f)
    # pdb.set_trace()
    vis = pyLDAvis.gensim.prepare(model, bow, dict)
    # pyLDAvis.display(vis)
    # pdb.set_trace()
    pyLDAvis.save_html(vis, 'lda.html')
Exemple #3
0
    def __init__(self, lda_path, dict_path):
        """
        lda_path - path to lda model
        dict_path - path to dict

        param: lda_path str
        param: dict_path str
        """
        self.dictionary = corpora.Dictionary.load(dict_path)
        self.lda = LdaMulticore.load(lda_path)
Exemple #4
0
 def run(self):
     (model_file, dictionary_file), data_file = self.input()
     model = GensimLdaModel.load(model_file.path)
     dictionary = corpora.Dictionary.load(dictionary_file.path)
     data = pd.read_pickle(data_file.path)
     features = data.apply(self.get_features,
                           model=model,
                           dictionary=dictionary,
                           axis=1)
     features.to_pickle(self.output().path)
Exemple #5
0
 def eval(self, dataset):
     path = '../models.nosync/lda/model'
     model = LdaMulticore.load(path)
     corpus, dictionary = self._prepare(dataset)
     x = model.log_perplexity(corpus)
     print(x)
     for i, (d, t) in enumerate(zip(corpus, dataset.titles)):
         print(t)
         for j, s in model.get_document_topics(d):
             print(dictionary.id2token[j], end=' ')
         print('\n')
def getLDA(topics):
    corp_d=Dictionary.load(MODEL_DIR+"corpus_dict.model")
    
    lda=LdaMulticore.load(MODEL_DIR+"lda.model")
    pp_docs=[]
    for topic in topics:
        pp_docs.append(lemma_pp(topic))
    dtm=[corp_d.doc2bow(doc) for doc in pp_docs]
    tfidf=TfidfModel(dtm)
    corp_tfidf=tfidf[dtm]
    return list(lda[tfidf[dtm]])
def create_dist_matrix (model_a_dest, model_b_dest, distance='jaccard', num_words=300, normed=True):
    
    a = LdaMulticore.load(model_a_dest)
    b = LdaMulticore.load(model_b_dest)

    mdiff_a_b, annotation_a_b = a.diff(b, distance=distance, num_words=num_words, normed=normed)
    mdiff_b_a, annotation_b_a = b.diff(a, distance=distance, num_words=num_words, normed=normed)
    #topic_diff_a_b = np.ones(mdiff_a_b.shape) - mdiff_a_b
    #topic_diff_b_a = np.ones(mdiff_a_b.shape) - mdiff_b_a
    topic_diff_a_b = mdiff_a_b
    topic_diff_b_a = mdiff_b_a

    a_ones = np.ones(topic_diff_a_b.shape)
    b_ones = np.ones(topic_diff_a_b.shape)

    first_half = np.concatenate((a_ones, topic_diff_b_a), axis=0)
    second_half = np.concatenate((topic_diff_a_b, b_ones), axis=0)
    total = np.concatenate((first_half, second_half), axis=1)
    
    return total
def main():

    lda_model = LdaMulticore.load('lda.model')
    print('Successfully Loaded')
    print(lda_model)

    f = open('cnn_text.pickle', 'r')
    test_data1 = pickle.load(f)

    f = open('test_dataset.txt', 'r')
    test_data2 = pickle.load(f)
Exemple #9
0
 def load(self,
          fname,
          return_dense=True,
          max_df=0.5,
          min_df=5,
          *args,
          **kwargs):
     lda = LdaMulticore.load(fname, *args, **kwargs)
     lda = LdaMulticore()
     alpha = lda.alpha
     eta = lda.eta
     iterations = lda.iterations
     random_seed = lda.random_state
     workers = lda.workers
     num_topics = lda.num_topics
     return GensimLDAVectorizer(num_topics, alpha, eta, workers, iterations,
                                return_dense, max_df, min_df, random_seed)
def visual_lda():
    lda = LdaMulticore.load("../model/lda.model")
    with open("../result/ad_issue_reviews") as fin:
        reviews = json.load(fin)
    # build bag-of-words, corpus
    reviews = [[word for word in review if word not in stopwords.words('english')] for review in reviews]
    from collections import defaultdict
    freq = defaultdict(int)
    for review in reviews:
        for token in review:
            freq[token] += 1
    reviews = [[token for token in review if freq[token] > 1] for review in reviews]
    dictionary = corpora.Dictionary(reviews)
    corpus = [dictionary.doc2bow(review) for review in reviews]
    import pyLDAvis.gensim as gensimvis
    import pyLDAvis
    vis_data = gensimvis.prepare(lda, corpus, dictionary)
    pyLDAvis.display(vis_data)
def load_resources():
    """
    loads all of the serialize objects for the recommender to work.  Books identifies exist in
    realms: there is the gutenberg book id and the index where the book exsists in the corpus.
    These are not identical.

    :return: model object, corpus vects object, list of gutenberg ids,
             dictionary of book index number to id
    """
    model = LdaMulticore.load(path.join(_RELATIVE_DIR, _CURRENT_MODEL))
    corpus = _unpickle(path.join(_RELATIVE_DIR, _CURRENT_CORPUS))
    ids = _unpickle(path.join(_RELATIVE_DIR, _CURRENT_TITLES))
    ids_to_ind_dict = {
        int(id_loop): ind_loop
        for ind_loop, id_loop in enumerate(ids)
    }
    distance_mat_location = path.join(_RELATIVE_DIR,
                                      _CURRENT_MODEL + '.distance_matrix.pkl')
    if path.isfile(distance_mat_location):
        dist_mat = _unpickle(distance_mat_location, True)
    else:
        dist_mat = None
    return model, corpus, ids, ids_to_ind_dict, dist_mat
Exemple #12
0
from gensim import corpora
from gensim.models import LdaMulticore
import string

# loading the dataframe
cleaned_data = pd.read_csv('./dataframes/final_df.csv',
                           index_col=0,
                           nrows=1000)

# selecting the 3grams_reviews for topic modeling
cleaned_data['3gram_reviews'] = cleaned_data['3gram_reviews'].map(
    lambda x: ''.join(c for c in x
                      if c == '_' or c not in string.punctuation).split())

# selecting the model
lda_model = LdaMulticore.load('.model/model.model')
documents = list(cleaned_data['3gram_reviews'])
dictionary = gensim.corpora.Dictionary(documents)

# loading the word dict
dictionary_saved = gensim.corpora.Dictionary.load(
    './model/model.model.id2word')
corpus = [dictionary_saved.doc2bow(text) for text in documents]

# making the topic dict
topicDictionary = {
    '0': 'Network Performance',
    '1': 'Overall Experience',
    '2': 'Gameplay Mechanics',
    '3': 'Content/Value',
    '4': 'NO TOPIC',
    def load_from_file(self):
        from gensim.models import LdaMulticore

        return LdaMulticore.load("lda_out_%d.model" % self.id)
        comments_corpus = text2corpus(comments_text_filtered, dictionary)
        del comments_text_filtered
        with open("temp_corpus.pickle", "wb") as f:
            pickle.dump((comments_corpus, dictionary), f)
    else:
        with open("temp_corpus.pickle", "rb") as f:
            comments_corpus, dictionary = np.array(pickle.load(f))

    print("created corpus")
    print('Number of unique tokens: %d' % len(dictionary))
    print('Number of documents: %d' % len(comments_corpus))

    num_topics = 150
    if args.load:
        model = LdaMulticore.load("topic_models/model_comments")
    else:
        model = LdaMulticore(comments_corpus, id2word=dictionary, num_topics=num_topics)
        print("model done")
        model.save("topic_models/model_comments")

    print(model.print_topics(20))

    top_topics = model.top_topics(comments_corpus) #, num_words=20)

    # Average topic coherence is the sum of topic coherences of all topics, divided by the number of topics.
    avg_topic_coherence = sum([t[1] for t in top_topics]) / num_topics
    print('Average topic coherence: %.4f.' % avg_topic_coherence)

    #from pprint import pprint
    #pprint(top_topics)
Exemple #15
0
from model.lda.preprocess import Preprocessor
from model.util.file_parser import parse_dir_json

if __name__ == '__main__':
    init_logger()
    log = logging.getLogger('lda_model')

    config = LdaConfig(sys.argv[1], 'lda_model').get_current_config()

    _, docs = zip(*parse_dir_json(config['data_path']))

    preprocessed_docs = Preprocessor(
        max_workers=config['max_workers']).process_docs(docs)

    log.info("Loading model from %s", config['model_path'])
    lda_model = LdaMulticore.load(config['model_path'])
    log.info("Loading dictionary from %s", config['dict_path'])
    dictionary = Dictionary.load(config['dict_path'])

    coherence_model_lda = CoherenceModel(model=lda_model,
                                         texts=preprocessed_docs,
                                         dictionary=dictionary,
                                         coherence='c_v')

    coherence_lda = coherence_model_lda.get_coherence()

    import csv

    with open(config['coherence_path'], "a") as csv_file:
        writer = csv.writer(csv_file, delimiter=';')
        writer.writerow([config['topics'], coherence_lda])
Exemple #16
0
def LDA_model_out_of_time_tpot(df,
                               features,
                               target,
                               dest_all_model,
                               downsample=False):

    X = df[features]
    y = df[target].astype('bool')

    scores = {'acc': [], 'f1': []}
    cf_matrix_val = np.zeros((2, 2), dtype=np.int)

    tbcv = TimeBasedCV(train_period=3, test_period=1, freq='years')
    tbcv_folds = tbcv.split(df,
                            validation_split_date=datetime.date(2008, 12, 31),
                            date_column='sec_filing_date')
    k_folds = len(tbcv_folds)
    for k_index, (train_index, test_index) in enumerate(tbcv_folds):

        dest_train, dest_val = dest_all_model[str(k_index + 1)]

        data_train = X.loc[train_index].drop('sec_filing_date', axis=1)
        target_train = y.loc[train_index]

        data_test = X.loc[test_index].drop('sec_filing_date', axis=1)
        target_test = y.loc[test_index]

        print("=========================================")
        print("==== K Fold Validation step => %d/%d ======" %
              (k_index + 1, k_folds))
        print("=========================================")

        lda_model_train = LdaMulticore.load(lda_data_dir + dest_train)

        if downsample:

            try:
                data_train = pickle.load(
                    open(
                        main_dir + 'data/topic_predictions/' +
                        str(k_index + 1) + '_downsample_data_train.list',
                        "rb"))
            except:
                print("Prepare Train data")
                data_train = get_topic_proba(data_train, lda_model_train)
                pickle.dump(
                    data_train,
                    open(
                        main_dir + 'data/topic_predictions/' +
                        str(k_index + 1) + '_downsample_data_train.list',
                        "wb"))

            try:
                data_test = pickle.load(
                    open(
                        main_dir + 'data/topic_predictions/' +
                        str(k_index + 1) + '_downsample_data_test.list', "rb"))
            except:
                print("Prepare Test data")
                data_test = get_topic_proba(data_test, lda_model_train)
                pickle.dump(
                    data_test,
                    open(
                        main_dir + 'data/topic_predictions/' +
                        str(k_index + 1) + '_downsample_data_test.list', "wb"))

        else:

            try:
                data_train = pickle.load(
                    open(
                        main_dir + 'data/topic_predictions/' +
                        str(k_index + 1) + '_data_train.list', "rb"))
            except:
                print("Prepare Train data")
                data_train = get_topic_proba(data_train, lda_model_train)
                pickle.dump(
                    data_train,
                    open(
                        main_dir + 'data/topic_predictions/' +
                        str(k_index + 1) + '_data_train.list', "wb"))

            try:
                data_test = pickle.load(
                    open(
                        main_dir + 'data/topic_predictions/' +
                        str(k_index + 1) + '_data_test.list', "rb"))
            except:
                print("Prepare Test data")
                data_test = get_topic_proba(data_test, lda_model_train)
                pickle.dump(
                    data_test,
                    open(
                        main_dir + 'data/topic_predictions/' +
                        str(k_index + 1) + '_data_test.list', "wb"))

        clf = TPOTClassifier(generations=5,
                             population_size=50,
                             verbosity=2,
                             max_time_mins=5)
        clf.fit(data_train, target_train.values.ravel())
        preds = clf.predict(data_test)

        # accuracy for the current fold only
        score = clf.score(data_test, target_test)

        f1 = f1_score(target_test, preds)

        cf_matrix_val += confusion_matrix(target_test, preds)
        scores['acc'].append(score)
        scores['f1'].append(f1)

    print("Cross Validation Score: " +
          str(sum(scores['acc']) / len(scores['acc'])))

    #Visualize confusion matrix for cross-val data
    labels = ['True Neg', 'False Pos', 'False Neg', 'True Pos']
    categories = ['No Downgrade', 'Downgrade']
    make_confusion_matrix(cf_matrix_val,
                          group_names=labels,
                          categories=categories,
                          cbar=False,
                          title='Confusion Matrix: TPOT',
                          figsize=(10, 10))

    return scores, clf, cf_matrix_val
Exemple #17
0
 def load_model(self, model_path, dict_path):
     logger.info("Loading model from {}".format(model_path))
     self.model = LdaMulticore.load(model_path)
     self.dictionary = Dictionary.load(dict_path)
Exemple #18
0
 def pretrained(self,lda,dicti):
     dictionary = corpora.Dictionary.load(dicti)
     model = LdaMulticore.load(lda)
     return dictionary, model
Exemple #19
0
def get_lda_model(mode=QUESTION):
    return LdaMulticore.load(os.path.join(MODEL_DIR, modes[mode].model))
    res_reduced = results[indices]
    plot_sentiment_time(res_reduced,
                        time_filtered,
                        title_suffix=", for fluff flair",
                        bins=60)

    time_filtered, indices = filter_comment_flair(comments_list, "created_utc",
                                                  "IMAGE")
    res_reduced = results[indices]
    plot_sentiment_time(res_reduced,
                        time_filtered,
                        title_suffix=", for image flair",
                        bins=60)

    if args.topic_model:
        topic_model = LdaMulticore.load(args.topic_model)

        if not args.load_preprocess:
            comments_text = load_all_comments(db_name=args.comments_cached,
                                              only="body")

            comments_text_filtered, dictionary, comments_tokenized = preprocess_text(
                comments_text)
            print("filtered text")

            comments_corpus = text2corpus(comments_text_filtered, dictionary)
            del comments_text_filtered
            with open("temp_corpus.pickle", "wb") as f:
                pickle.dump((comments_corpus, dictionary), f)
        else:
            with open("temp_corpus.pickle", "rb") as f:
Exemple #21
0
            tokens_or_none = [
                tok.lower_ for tok in doc if not tok.is_stop and tok.is_alpha
            ]
        else:
            tokens_or_none = None
    else:
        tokens_or_none = None
    return (tokens_or_none, component)


docs = defaultdict(list)
models = defaultdict()

print("Loading models...")
for component in components:
    models[component] = LdaMulticore.load(models_path + component +
                                          '.model.topic')

# Compute cross-corpus topic similarities
diffs = defaultdict(defaultdict)

for (train, test) in itertools.product(*(components, components)):
    print(f'Computing topic diff for {train} on {test}...')
    diff, annotation = models[train].diff(models[test])
    diffs[train][test] = diff.tolist()

    fig, ax = plt.subplots(1, 1)

    img = plt.imshow(diff, cmap='gray')

    ax.set_title("Topic Model Difference Matrix")
    ax.set_ylabel(train)
Exemple #22
0
 def getModel(self):
     dictionary_file = get_current_state()["dictionary"]
     lda_file = get_current_state()["lda"]
     dictionary = corpora.Dictionary.load("models/" + dictionary_file)
     model = LdaMulticore.load("models/" + lda_file)
     return dictionary, model
import string
import gensim
from gensim.models import LdaMulticore
from gensim import corpora
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

# read the cleaned data frame
model_data_frame = pd.read_csv('./dataframes/model2_df.csv', index_col=0)

# load the trained model
lda_model = LdaMulticore.load('./models/nouns_only/model/model.model')

# Turn the required columns of data into lists to be used in creating a dictionary (using doc2bow) in the next few steps 
model_data_frame['clean_reviews'] = model_data_frame['clean_reviews'].map(
    lambda x: ''.join(c for c in x if c == '_' or c not in string.punctuation).split())
model_data_frame['3grams_nouns'] = model_data_frame['3grams_nouns'].map(
    lambda x: ''.join(c for c in x if c == '_' or c not in string.punctuation).split())

documents = list(model_data_frame['3grams_nouns'])
dictionary = gensim.corpora.Dictionary(documents)
# reload the dictionary that was created during model creation
dictionary_saved = gensim.corpora.Dictionary.load(
    './models/nouns_only/model/model.model.id2word')  # this dictionary already had filter_extremes() applied during
# training step
corpus = [dictionary_saved.doc2bow(text) for text in documents]
# print the keywords associated with each topic
topic_list = lda_model.print_topics(num_topics=5, num_words=15)
Exemple #24
0
corpus = [dictionary.doc2bow(review["words"]) for review in corpus_list]
BleiCorpus.serialize(corpus_path, corpus, id2word=dictionary)

corpus = corpora.BleiCorpus(corpus_path)
print "running lda"
lda = gensim.models.LdaMulticore(corpus,
                                 num_topics=lda_num_topics,
                                 id2word=dictionary,
                                 minimum_probability=0.,
                                 workers=8)
lda.save(lda_model_path)
print "done lda"

dictionary = corpora.Dictionary.load(dictionary_path)
corpus = corpora.BleiCorpus(corpus_path)
lda = LdaMulticore.load(lda_model_path)
i = 0
for topic in lda.show_topics(lda_num_topics):
    print type(topic)
    print 'Topic #' + str(i) + ': ' + str(topic)
    i += 1

nwords_per_topic = 10
topic_words_set = set()
for i in range(lda_num_topics):
    topic_terms = lda.get_topic_terms(i, topn=nwords_per_topic)
    for term in topic_terms:
        topic_words_set.add(term[0])
# print 'Topic_Words = ', topic_words_set
print 'Topic_Words_len = ', len(topic_words_set)
topic_word_id = dict(zip(topic_words_set, range(len(topic_words_set))))
def summarize(csv_content):

    cleaned_data = pd.read_csv(csv_content, index_col=0, nrows=1500)
    cleaned_data['3gram_reviews'] = cleaned_data['3gram_reviews'].map(
        lambda x: ''.join(c for c in x
                          if c == '_' or c not in string.punctuation).split())
    lda_model = LdaMulticore.load('model.model')
    documents = list(cleaned_data['3gram_reviews'])
    dictionary = gensim.corpora.Dictionary(documents)
    dictionary_saved = gensim.corpora.Dictionary.load('model.model.id2word')
    corpus = [dictionary_saved.doc2bow(text) for text in documents]

    topicDictionary = {
        '0': 'Performance',
        '1': 'Overall_Experience',
        '2': 'Gameplay_Mechanics',
        '3': 'Content_Value',
        '4': 'NO_TOPIC',
    }

    def identifyReviewTopics(ldamodel=lda_model,
                             corpus=corpus,
                             documents=documents):
        reviewTopicsDataframe = pd.DataFrame()
        # Get main topic in each document
        for i, row in enumerate(ldamodel[corpus]):
            row = sorted(row, key=lambda x: (x[1]), reverse=True)
            # Get the Dominant topic, Perc Contribution and Keywords for each document
            for j, (topicNumber, prop_topic) in enumerate(row):
                if j == 0:
                    wp = ldamodel.show_topic(topicNumber)
                    topicKeywords = ", ".join([word for word, prop in wp])
                    reviewTopicsDataframe = reviewTopicsDataframe.append(
                        pd.Series([
                            topicDictionary[str(topicNumber)],
                            round(prop_topic, 4), topicKeywords
                        ]),
                        ignore_index=True
                    )  # replaced int(topicNumber) with str(topicNumber)
                else:
                    break
        reviewTopicsDataframe.columns = [
            'Dominant_Topic', 'Contribution_Percentage', 'Keywords'
        ]
        # Add original text to the end of the output
        originalDataframe = pd.DataFrame(
            cleaned_data[['review', '3gram_reviews']])
        reviewTopicsDataframe = pd.concat(
            [reviewTopicsDataframe, originalDataframe], axis=1)
        return reviewTopicsDataframe

    reviewTopicsDf = identifyReviewTopics()
    sentAnalyzer = SentimentIntensityAnalyzer()
    reviewTopicsDf['compound_sentiment'] = reviewTopicsDf['review'].map(
        lambda x: sentAnalyzer.polarity_scores(x)['compound'])
    sentimentDictionary = {}

    for topic in list(topicDictionary.values()):
        isCurrentTopic = reviewTopicsDf['Dominant_Topic'] == topic
        topicDf = reviewTopicsDf[isCurrentTopic]
        sentimentList = topicDf['compound_sentiment']
        positiveList = [x for x in sentimentList if x > 0.1]
        negativeList = [x for x in sentimentList if x < -0.1]
        neutralList = [
            x for x in sentimentList
            if x not in positiveList and x not in negativeList
        ]
        totalList = len(sentimentList)
        print(topic)
        print(totalList)
        if (totalList > 0):
            positivePercentage = len(positiveList) / totalList
            neutralPercentage = len(neutralList) / totalList
            negativePercentage = len(negativeList) / totalList
            sentimentDictionary[topic] = [
                round(positivePercentage, 3),
                round(neutralPercentage, 3),
                round(negativePercentage, 3)
            ]
            # output percentages as positive, neutral, then negative
    print("\nBy review\n", sentimentDictionary)

    return sentimentDictionary
Exemple #26
0
 def __init__(self):
     self.dictionary = corpora.Dictionary.load(dictionary_path)
     self.lda = LdaMulticore.load(lda_model_path)
Exemple #27
0
    def load_from_file(self):
        from gensim.models import LdaMulticore

        return LdaMulticore.load("lda_out_%d.model" % self.id)
Exemple #28
0
 def infer(self, dataset):
     path = '../models.nosync/lda/model'
     model = LdaMulticore.load(path)
     corpus, dictionary = self._prepare(dataset)
     return corpus, model
Exemple #29
0
def load_lda_model_multicores(filepath):
    return LdaMulticore.load(filepath)
Exemple #30
0
def model(n_topics,
          alpha=None,
          beta=None,
          saved=False,
          pyldavis=False,
          wordclouds=False,
          rep_letters=False,
          plots=False) -> dict:
    assert n_topics >= 2
    """
	aux functions to make sure it's loading the desired model
	"""
    def verify_alpha(lda_model, given):
        actual: list = lda_model.alpha
        if given == "asymmetric":
            return not np.isclose(actual[0], actual[-1])
        elif given == "symmetric":
            return np.isclose(actual[0], actual[-1])
        else:
            return np.isclose(given, actual[0]) and np.isclose(
                given, actual[-1])

    def verify_beta(lda_model, given):
        actual = lda_model.eta
        if type(given) == float:
            return np.isclose(given, actual[0]) and np.isclose(
                given,
                actual[-1])  # basic == comparison doesn't work bc floats suck
        else:
            return False

    print(f"Building LDA model for {n_topics} topics.")

    if saved:
        lda = LdaMulticore.load(f"{TRAINED_LDA}{n_topics}")

        # if not (verify_alpha(lda, alpha) and verify_beta(lda, beta)):
        # print("Loaded model didn't pass parameter verification; train it from scratch or load the correct one.")
        # return

        print(f"Trained LDA model with {n_topics} topics loaded successfully.")

    else:
        lda = LdaMulticore(
            corpus,
            num_topics=n_topics,
            id2word=dictionary,
            passes=20,
            alpha=alpha if alpha is not None else "symmetric",  # default
            eta=beta,
            random_state=1,
            iterations=100,
            eval_every=5,
            workers=3,
            per_word_topics=True)

        lda.save(f"{TRAINED_LDA}{n_topics}")
        print(
            f"LDA model with {n_topics} topics trained and saved successfully."
        )
    """
	save per-word-topics 3D matrix
	[!] alters global variable
	"""
    V = len(dictionary)
    K = n_topics
    N = len(corpus)
    global pwt
    pwt = np.zeros((V, K, N))
    """
	save topic assignment info in dataframes
	[!] alters global variables
	"""
    global vw
    global vws
    vws = get_topic_dists_dataframe(lda)
    vw, vws = set_main_topics(vw, vws)
    """
	coherence and silhouette scores
	"""
    coherence = CoherenceModel(model=lda,
                               texts=letters,
                               dictionary=dictionary,
                               coherence='c_v').get_coherence()
    print(f"Coherence score: {coherence}")  # the higher the better

    avg_silhouette = plot_silhouette(vws)
    print(f"Average silhouette coefficient: {avg_silhouette}"
          )  # the higher the better
    """
	other validation methods
	"""
    if pyldavis:
        vis = pyLDAvis.gensim.prepare(topic_model=lda,
                                      corpus=corpus,
                                      dictionary=dictionary,
                                      n_jobs=3)
        pyLDAvis.save_html(vis, f"{PYLDAVIS_PATH}/lda{n_topics}.html")

    if rep_letters:
        save_representative_letters(vws, 3)

    if wordclouds:
        save_topic_wordclouds(pwt)

    if plots:
        plot_topics_per_year(vw)
        plot_topics_per_recipient(vw)

    return {
        "model": lda,
        "num_topics": n_topics,
        "alpha": alpha,
        "beta": beta,
        "coherence": coherence,
        "silhouette": avg_silhouette,
        "vws": vws,
        "pwt": pwt
    }
from gensim.models import LdaMulticore as LDA
import os
import argparse

parser = argparse.ArgumentParser(description='View generated topics')
parser.add_argument('--n_topics', help='Number of Topics')
parser.add_argument('--gram', help='unigram or both')
args = parser.parse_args()

model = LDA.load(os.getcwd() +
                 "/LDA models/{}/{}-topics".format(args.gram, args.n_topics))

for (a, b) in model.show_topics():
    print("Topic-{} \n".format(a))
    print(b)
    print("\n")