def ldamodel(doc_clean,n_topics,n_words,description,tfidfmodel=False,unseen_docs=None): doc_clean = [min_char(doc).split() for doc in doc_clean] dictionary = corpora.Dictionary(doc_clean) # Converting list of documents (corpus) into Document Term Matrix using dictionary prepared above. corpus = [dictionary.doc2bow(doc) for doc in doc_clean] compute_coherence_values(dictionary=dictionary, corpus=corpus, texts=doc_clean, start=2, limit=40, step=6) if tfidfmodel: tfidf = TfidfModel(corpus,id2word=dictionary,smartirs='ntc') corpus = tfidf[corpus] ldamodel = LdaModel(corpus, num_topics=16, id2word=dictionary,random_state=1,passes=50,per_word_topics=True) print("#Tópicos LDA") for i in range(0, n_topics): temp = ldamodel.show_topic(i, n_words) terms = [] for term in temp: terms.append(term) print("Topic #" + str(i) + ": ", ", ".join([t + '*' + str(i) for t, i in terms])) print('Bound: ',ldamodel.bound(corpus)) # Compute Perplexity print('Perplexity: ',ldamodel.log_perplexity(corpus)) # Compute Coherence Score coherence_model_lda = CoherenceModel(model=ldamodel, texts=doc_clean, dictionary=dictionary, coherence='c_v') coherence_lda = coherence_model_lda.get_coherence() print('\nCoherence Score: ', coherence_lda) if unseen_docs: corpus_new = [dictionary.doc2bow(doc) for doc in unseen_docs] for i, unseen_doc in enumerate(corpus_new): topic = None score = 0 inference_doc = ldamodel[unseen_doc] print(unseen_docs[i]) for index,tmpScore in inference_doc[0]: if tmpScore > score: score = tmpScore topic = ldamodel.print_topic(index, 5) print ("Score: {}\t Topic: {}".format(score, topic)) print("Log perplexity for new corpus is", ldamodel.log_perplexity(corpus_new)) print_result(ldamodel, doc_clean, corpus, n_topics, description) pickle.dump(corpus, open(description+'.pkl', 'wb')) dictionary.save(description+'dictionary.gensim') ldamodel.save(description+'_ldamodel.gensim')
def lda_model_(documents, custom_stopwords_list, num_topics): log.info('In LDA for Topic Modelling function.') cleaned_documents = [ preprocess_(document, custom_stopwords_list).split() for document in documents ] document_dictionary = corpora.Dictionary(cleaned_documents) td_matrix = [ document_dictionary.doc2bow(document) for document in cleaned_documents ] model = LdaModel(corpus=td_matrix, num_topics=num_topics, id2word=document_dictionary, passes=100) coherence_model = CoherenceModel(model=model, texts=cleaned_documents, dictionary=document_dictionary, coherence='c_v') log.debug('Coherence Score: {}\nPerplexity Score: {}'.format( coherence_model.get_coherence(), model.log_perplexity(td_matrix))) return model, coherence_model.get_coherence(), model.log_perplexity( td_matrix)
def find_optimal_number_topics(corpus, id2word, min, max, step, texts): """ find optimal number of topics based on highest coherence and lowest perplexity""" range_coherence = {} range_perplexity = {} for n in np.arange(min, max, step): print(n) lda_model = LdaModel(corpus=corpus, id2word=id2word, num_topics=n, random_state=100, update_every=5, chunksize=100, passes=10, alpha='auto', per_word_topics=True) # Compute Perplexity - a measure of how good the model is. the lower the better perplexity_lda = lda_model.log_perplexity(corpus) # Compute Coherence Score coherence_model_lda = CoherenceModel(model=lda_model, texts=texts, dictionary=id2word, coherence='c_v') coherence_lda = coherence_model_lda.get_coherence() range_coherence.update({n: coherence_lda}) range_perplexity.update({n: perplexity_lda}) plt.plot(list(range_coherence.keys()), list(range_coherence.values())) plt.plot(list(range_perplexity.keys()), list(range_perplexity.values())) return range_coherence, range_perplexity
def compete_number_of_words(detoken_data, token_data, min_num, max_num, step, random_state=None): ''' number_of_words를 찾기 위한 함수 Parameters : ------------- detoken_data : list 형태의 역토큰화된 데이터 token_data : coherence 값을 계산하기 위한 token_data min_num : number of words range의 최솟값 min_num부터 시작 max_num : number of words range의 최댓값 max_num까지 찾음 step : min_num ~ max_num 까지 가기 위해 step을 얼마나 갈것인지 random_state : 재현성을 주기 위해 설정, default = None Output : ------------- coherence_value : Num of Words와 그에 따른 Coherence Value가 있는 DataFrame 반환 ''' coherence_value = pd.DataFrame(columns=['min_df', 'Perplexity Value','Coherence Value']) i = 0 min_df = list(np.arange(min_num,max_num,step)) for m in min_df : print("{} 번째, min_df = {}".format(i+1, m)) vectorizer = CountVectorizer(min_df=m) # CountVectorizer 생성 cv = vectorizer.fit_transform(detoken_data) # fit and transform dictionary = corpora.Dictionary([vectorizer.get_feature_names()]) corpus = Sparse2Corpus(cv.T) lda_model = LdaModel(corpus=corpus, id2word=dictionary, random_state=random_state) coherence_lda = CoherenceModel(model=lda_model, texts=token_data, dictionary=dictionary, coherence='c_v') coherence_value.loc[i] = [m, lda_model.log_perplexity(corpus),coherence_lda.get_coherence()] i += 1 return coherence_value
def perform_experiment(train_n_dw_matrix, test_n_dw_matrix, T, num_2_token): train_corpus = [zip(row.indices, row.data) for row in train_n_dw_matrix] for seed in [42, 7, 777, 12]: model = LdaModel(train_corpus, alpha='auto', id2word=num_2_token, num_topics=T, iterations=500, random_state=seed) gensim_phi = exp_common.get_phi(model) gensim_theta = exp_common.get_theta(train_corpus, model) print('gensim perplexity') print(np.exp(-model.log_perplexity(train_corpus))) D, W = train_n_dw_matrix.shape random_gen = np.random.RandomState(seed) phi = common.get_prob_matrix_by_counters( random_gen.uniform(size=(T, W)).astype(np.float64)) theta = common.get_prob_matrix_by_counters( np.ones(shape=(D, T)).astype(np.float64)) phi, theta = default.Optimizer([regularizers.Additive(0.1, 0.)] * 100, verbose=False).run( train_n_dw_matrix, phi, theta) callback = experiments.default_callback( train_n_dw_matrix=train_n_dw_matrix, test_n_dw_matrix=test_n_dw_matrix, top_pmi_sizes=[5, 10, 20, 30], top_avg_jaccard_sizes=[10, 50, 100, 200], measure_time=True) callback.start_launch() callback(0, phi, theta) callback(1, gensim_phi, gensim_theta) print('artm') for name, values in callback.launch_result.items(): print('\t{}: {}'.format(name, values[0])) print('gensim') for name, values in callback.launch_result.items(): print('\t{}: {}'.format(name, values[1]))
def build_model(raw_file, ret_file): """ :param raw_file: :param retfile: :return: """ all_tweets = load_all_tweets(raw_file) k = int(ret_file[ret_file.find('tweets_lda_') + 11]) print('k={}'.format(k)) idx2twetid = [] common_texts = [] for key, tweet in all_tweets.items(): idx2twetid.append(key) tokens = tweet['cleaned'].split(' ') text = [] for token in tokens: if token not in punc_words: text.append(token) common_texts.append(text) common_dictionary = Dictionary(common_texts) common_corpus = [common_dictionary.doc2bow(text) for text in common_texts] print('begin to train') lda_model = LdaModel(common_corpus, id2word=common_dictionary, num_topics=k, random_state=13) pprint(lda_model.print_topics(num_words=20)) print('\nPerplexity: ', lda_model.log_perplexity(common_corpus)) with open(ret_file, 'w', encoding='utf-8') as fout: for i, tweetid in enumerate(idx2twetid): tmp = lda_model[common_corpus[i]] lda_score = {} for ele in tmp: lda_score[str(ele[0])] = float(ele[1]) all_tweets[tweetid]['lda' + str(k)] = lda_score fout.write(json.dumps(all_tweets[tweetid])) fout.write('\n')
class CustomLda(object): def __init__(self, data=None, dictionary=None): """ initialize, data should be provided, only when unpickling class object it is not needed!""" self.data = data self.model = None self.num_topics = None self.iterations = None self.random_state = None self.dictionary = dictionary if self.data is not None: if self.dictionary is None: self.dictionary = Dictionary(self.data) self.corpus = [self.dictionary.doc2bow(text) for text in self.data] else: self.dictionary = None self.corpus = None self.distributed = None self.chuncksize = None self.passes = None self.update_every = None self.alpha = None self.eta = None self.decay = None self.offset = None self.eval_every = None self.gamma_threshold = None self.minimum_probability = None self.ns_conf = None self.minimum_phi_value = None self.per_word_topics = None self.num_topics = None self.iterations = None self.random_state = None self.model = None self.coherence_model = None self.coherence = None self.coherence_type = None def train(self, num_topics, iterations=1500, random_state=1, distributed=False, chunksize=2000, passes=1, update_every=1, alpha='symmetric', eta=None, decay=0.5, offset=1.0, eval_every=10, gamma_threshold=0.001, minimum_probability=0.01, ns_conf=None, minimum_phi_value=0.01, per_word_topics=False, workers=1): """train lda model. If workers >1, goes multicore""" self.distributed = distributed self.chuncksize = chunksize self.passes = passes self.update_every = update_every self.alpha = alpha self.eta = eta self.decay = decay self.offset = offset self.eval_every = eval_every self.gamma_threshold = gamma_threshold self.minimum_probability = minimum_probability self.ns_conf = ns_conf self.minimum_phi_value = minimum_phi_value self.per_word_topics = per_word_topics self.num_topics = num_topics self.iterations = iterations self.random_state = random_state self.workers = workers if self.workers > 1: self.model = LdaMulticore( workers=3, corpus=self.corpus, id2word=self.dictionary, iterations=self.iterations, num_topics=self.num_topics, random_state=self. random_state, # distributed=self.distributed, chunksize=self.chuncksize, passes=self.passes, # update_every= self.update_every, alpha=self.alpha, eta=self.eta, decay=self.decay, offset=self.offset, eval_every=self.eval_every, gamma_threshold=self.gamma_threshold, minimum_probability=self. minimum_probability, # ns_conf=self.ns_conf, minimum_phi_value=self.minimum_phi_value, per_word_topics=self.per_word_topics) else: self.model = LdaModel(corpus=self.corpus, id2word=self.dictionary, iterations=self.iterations, num_topics=self.num_topics, random_state=self.random_state, distributed=self.distributed, chunksize=self.chuncksize, passes=self.passes, update_every=self.update_every, alpha=self.alpha, eta=self.eta, decay=self.decay, offset=self.offset, eval_every=self.eval_every, gamma_threshold=self.gamma_threshold, minimum_probability=self.minimum_probability, ns_conf=self.ns_conf, minimum_phi_value=self.minimum_phi_value, per_word_topics=self.per_word_topics) print('Trained!') def _train_coherence_model(self, coherence_type='u_mass'): """could be made on top of model to get coherence, type could be 'u_mass' or 'c_v'""" self.coherence_model = CoherenceModel(model=self.model, texts=self.data, dictionary=self.dictionary, coherence=coherence_type) def _calculate_coherence(self, coherence_type='u_mass'): self._train_coherence_model(coherence_type=coherence_type) self.coherence = self.coherence_model.get_coherence() def get_coherence(self, coherence_type='u_mass'): if coherence_type != self.coherence_type: self._calculate_coherence(coherence_type=coherence_type) return self.coherence def get_topic_terms(self, num, topn=10): return self.model.get_topic_terms(num, topn=topn) def get_preplexity(self): return self.model.log_perplexity(self.corpus) def get_topics(self, num): return self.model.show_topics(num) def _make_visualization(self): """prepare visualisation for display/saving""" return pyLDAvis.gensim.prepare(self.model, self.corpus, self.dictionary, sort_topics=False) def display(self): """display LDAvis in notebook""" visualisation = self._make_visualization() return pyLDAvis.display(visualisation) def save_ldavis(self, filename='topic.html'): """save LDAvis to .html""" ldavis = self._make_visualization() pyLDAvis.save_html(ldavis, filename) def save_lda(self, filename): """save lda model only""" self.model.save(filename) def pickle(self, filename): """save class instance to file""" f = open(filename, 'wb') pickle.dump(self, f, pickle.HIGHEST_PROTOCOL) f.close() @staticmethod def unpickle(filename): """read class instance from file""" with open(filename, 'rb') as f: return pickle.load(f) def predict_topic(self, doc_list): """predict topic of document list (consists of strings""" topic_list = [] for doc in doc_list: bow = self.dictionary.doc2bow(str(doc).split()) topics_probs = self.model.get_document_topics(bow) topics_probs.sort(key=lambda tup: tup[1], reverse=True) topic_list.append(topics_probs) return topic_list
# In[18]: # calculate coherence metric for each of n topics in test set coherence_model_1_per_topic = coherence_model_1test.get_coherence_per_topic() # uncomment to print coherence_model_1_per_topic # print(coherence_model_1_per_topic) # #### Model #1 - Evaluate - Perplexity # Calculate perplexity metric. Metric calculates and returns per-word likelihood bound using a chunk of documents as evaluation corpus. Output calculated statistics, including the perplexity=2^(-bound), to log at INFO level. Returns the variational bound score calculated for each word # In[19]: # calculate perplexity metric for model_1 train set (1000 pats dataset) perplexity_model_1train = model_1.log_perplexity(corpus_1000train) print(perplexity_model_1train) # In[20]: # calculate perplexity metric for model_1 test set (1000 pats dataset) perplexity_model_1test = model_1.log_perplexity(corpus_1000test) print(perplexity_model_1test) # ### Model #1 - Predict # #### Model #1 - Predict - Pickle model # In[21]: # update path with location to save pickled model
import sys import numpy as np from gensim.corpora import Dictionary from gensim.models.ldamodel import LdaModel, CoherenceModel from gensim.models import word2vec data_file = sys.argv[1] alpha = float(sys.argv[2]) sentences = [s for s in word2vec.LineSentence(data_file) if len(s) >= 2] dic = Dictionary(sentences) corpus = [dic.doc2bow(s) for s in sentences] for i in range(1, 31): lda = LdaModel(corpus = corpus, id2word = dic, num_topics = i, alpha = 0.01, random_state = 1) cm = CoherenceModel(model = lda, corpus = corpus, coherence = 'u_mass') coherence = cm.get_coherence() perwordbound = lda.log_perplexity(corpus) perplexity = np.exp2(-perwordbound) print(f"num_topics = {i}, coherence = {coherence}, perplexity = {perplexity}")
def LDA_pd(data=data_path, list_keys=keywords, num_topics=num_topics, iterations=iterations, alpha=alpha, eta=eta, embeddings=embeddings, top=topn, output_path=output, use_keywords=use_keywords): output = open(output_path + '.output', 'w') output.write("Generating {} topics from {} initial keywords \n".format( num_topics, len(keywords))) output.write( "LDA model parameters:\n(1) alpha {}\n(2) eta {}\n(3) running {} iterations. \n" .format(alpha, eta, iterations)) if use_keywords: # if false, LDA is performed on all data (NOT Partial Data LDA) data_words = list(word_lists(data, list_keys)) output.write("Standard set of keywords includes:\n" + ', '.join(i for i in list_keys)) if embeddings: display_log("Loading word embeddings") model = load_model(model_path) most_similar = grab_most_similar(list_keys, model=model, top=topn) list_keys = add_similar(list_keys, most_similar) output.write("Supplemented keyword list includes:\n" + ', '.join(i for i in list_keys)) output.write('\n') output.write( "Top {} most similar words added from word emeddings (if found) \n" .format(topn)) else: data_words = list(word_lists_no_keywords(data_path)) display_log("Created data word list of size {}".format(str( len(data_words)))) # generate bigrams if bigrams: data_words = make_bigrams(data_words) display_log("Created bigrams word list") output.write("Topic integrates bigrams.\n\n") # create dictionary id2word = corpora.Dictionary(data_words) display_log("Created dictionary") # TDF corpus = [id2word.doc2bow(text) for text in data_words] display_log("Created corpus") #LDA model lda_model = LdaModel(corpus=corpus, id2word=id2word, num_topics=num_topics, random_state=100, update_every=1, chunksize=60, passes=25, alpha=alpha, eta=eta, iterations=iterations) display_log("Created LDA model") #pprint(lda_model.print_topics()) topic_header = ["Topic " + str(i + 1) for i in range(num_topics)] topic_array = np.array( [lda_model.show_topic(i) for i in range(num_topics)]).T output.write("Topics\n-----------------------\n") output.write( tabulate(topic_array[0], headers=topic_header, tablefmt='github')) output.write("\n\n") output.write("Similarity Scores\n-----------------------\n") output.write( tabulate(topic_array[1], headers=topic_header, tablefmt='github')) output.write("\n\n") display_log("printed table into output file " + output_path) df_all = pd.DataFrame() topics_transposed = topic_array.T for i in range(num_topics): new = pd.DataFrame(topics_transposed[i], columns=['Topic ' + str(i), 'score']) df_all = pd.concat([df_all, new], axis=1) df_all.to_csv(output_csv, index=False, encoding='utf-16') display_log("Exported topics and scores into csv file " + output_csv + '.csv') #coherence for LDA-PDs if coherence: coherence_model_lda = CoherenceModel(model=lda_model, texts=data_words, dictionary=id2word, coherence='c_v') coherence_lda = coherence_model_lda.get_coherence() output.write( "Coherence and Preplexity Scores\n-----------------------\n") output.write( "LDA-PD Model with {} keywords: \n Perplexity: {} \n Coherence: {}" .format(len(keywords), lda_model.log_perplexity(corpus), coherence_lda)) display_log("Coherence and Perplexity calculated, see " + output_path + '.output') display_log("Log saved in " + output_path + '.log') display_log("Output saved in " + output_path + '.output') display_log("Topics saved in " + output_path + '.csv') return lda_model
run_id = "ldaU_K{K}_a{alpha_frac}-K_b{beta}_iter{iter}.gensim".format(K=num_topics, alpha_frac=alpha_frac, beta=beta, iter=num_iterations) print run_id output_file = output_file_template.format(run_id=run_id) # Train and save print 'Training...' model = LdaModel(corpus, alpha=alpha, eta=beta, id2word=dictionary, num_topics=num_topics, iterations=num_iterations ) # model = LdaMulticore(corpus, # alpha=alpha, eta=beta, # id2word=dictionary, num_topics=num_topics, iterations=num_iterations, workers=2 # ) print 'Done training.' model.save(output_file) # Print top 10 words in topics, if desired if print_topics: topics = model.show_topics(num_topics=100, formatted=False) for topic in topics: for tup in topic[1]: print tup[0] + ": " + str(tup[1]) print '\n' # Evaluate perplexity ll = model.log_perplexity(test_corpus) print "LL: "+str(ll) print "Perp: "+str(np.exp2(-ll))
lda_model.update(corpus=train_data, decay=learning_decay, iterations=valid_iter) train_s.append( CoherenceModel(model=lda_model, corpus=train_data, dictionary=dictionary, coherence='u_mass').get_coherence()) test_s.append( CoherenceModel(model=lda_model, corpus=test_data, dictionary=dictionary, coherence='u_mass').get_coherence()) train_p.append(lda_model.log_perplexity(train_data)) test_p.append(lda_model.log_perplexity(test_data)) train_scores.append(train_s) test_scores.append(test_s) train_perplexities.append(train_p) test_perplexities.append(test_p) print "train_scores: ", train_scores[-1], " test_scores: ", test_scores[-1], \ " train_perplexities: ", train_perplexities[-1], " test_perplexities: ", test_perplexities[-1] dict_num_topic[str(n_component) + '_topics'] = { "max_iter": max_iter, "valid_iter": valid_iter, "train_scores": train_scores, "test_scores": test_scores,
output_file = output_file_template.format(run_id=run_id) # Train and save print 'Training...' model = LdaModel(corpus, alpha=alpha, eta=beta, id2word=dictionary, num_topics=num_topics, iterations=num_iterations) # model = LdaMulticore(corpus, # alpha=alpha, eta=beta, # id2word=dictionary, num_topics=num_topics, iterations=num_iterations, workers=2 # ) print 'Done training.' model.save(output_file) # Print top 10 words in topics, if desired if print_topics: topics = model.show_topics(num_topics=100, formatted=False) for topic in topics: for tup in topic[1]: print tup[0] + ": " + str(tup[1]) print '\n' # Evaluate perplexity ll = model.log_perplexity(test_corpus) print "LL: " + str(ll) print "Perp: " + str(np.exp2(-ll))
# Build LDA model print('Training LDA model...') model = LdaModel( corpus=corpus, id2word=id2word, num_topics=options.num_topics, random_state=100, update_every=1, chunksize=100, passes=options.iterations, alpha='auto', per_word_topics=True ) print('...done') print('Saving model...') model.save(model_path) print('...done') print('Topics found:') for i in range(options.num_topics): print(i, ' -> ', model.print_topic(i)) doc_lda = model[corpus] # Compute Perplexity print('Perplexity: ', model.log_perplexity(corpus)) # a measure of how good the model is. lower the better. # Compute Coherence Score coherence_model_lda = CoherenceModel(model=model, texts=texts, dictionary=id2word, coherence='c_v') coherence_lda = coherence_model_lda.get_coherence() print('Coherence Score: ', coherence_lda)
for idx, doc in enumerate(allmydocs): # if idx > num_docs: # break doc = doc.lower() doc = re.split(' |, |\n|: |(|)', doc) doc = [elt for elt in doc if elt is not None] tokens = [] for words in doc: cleaned = ''.join([i for i in words if i.isalpha()]) if cleaned not in stop_words and 2 < len(cleaned): tokens.append(cleaned) cleaned_docs.append(tokens[:]) # Create a corpus from a list of texts common_dictionary = Dictionary(cleaned_docs) common_corpus = [common_dictionary.doc2bow(text) for text in cleaned_docs] random.shuffle(common_corpus) train = common_corpus[:int(len(common_corpus)*0.8)] test = common_corpus[int(len(common_corpus)*0.8):] lda = LdaModel(common_corpus, num_topics=25, iterations=10000, eval_every=2, chunksize=10000, passes=10) perplex = lda.log_perplexity(common_corpus) print('perplex', perplex) # Save model to disk. temp_file = datapath("model") lda.save(temp_file)
pprint(model_lda.show_topic(1, topn=10)) # In[89]: pprint(model_lda.show_topics(num_topics=5, num_words=10)) # ### Evaluate - model #1 # In[91]: # calculate perplexity metrics perplexity = model_lda.log_perplexity(corpus_train) perplexity # In[92]: # TODO (Lee) - confirm that filtered_data is indeed the correct dataset to pass to texts param # calculate coherence metric coherence = CoherenceModel(model=model_lda, texts=filtered_data, dictionary=id_to_word, coherence='c_v') coherence_1 = coherence.get_coherence() coherence_1 # In[94]:
def find_best_model_log_perp(n_topic_range, texts, id2word, corpus, threshold=None, random_state=42, plot=True, verbose=False): """ Searches for the best model in a given range by log perplexity value Parameters: - `n_topic_range` a range of values for the `num_topics` parameter of a gensim LDA model to try - `texts` a list of documents broken into words - `id2word` a dictionary containing word encodings - `corpus` the result of mapping each word in `texts` to its value in `id2word` - `random_state` a random state for use in a gensim LDA model - `threshold` a float that specifies a log perplexity value that if reached will cause the function to return early - `plot` a boolean specifying whether or not to plot log perplexity values against each `num_topics` value - `verbose` a boolean specifying whether or not to print updates Returns: a tuple containing the best model, the list of all models attempted, and a list of all log perplexity values obtained, respectively. """ models = [] perp_vals = [] for n_topics in n_topic_range: # Print percentage progress if verbose: diff = max(n_topic_range) - n_topic_range.start print( str(round(100 * (n_topics - n_topic_range.start) / diff, 1)) + "% done") lda_model = LdaModel(corpus=corpus, id2word=id2word, num_topics=n_topics, random_state=random_state, update_every=1, chunksize=100, passes=10, alpha='auto', per_word_topics=True) p = lda_model.log_perplexity(corpus) models.append(lda_model) perp_vals.append(p) if threshold is not None and p < threshold: if verbose: print('Returning early with a log perplexity value of ' + str(p)) if plot: actual_range = range(n_topic_range.start, n_topics + n_topic_range.step, n_topic_range.step) plt.plot(actual_range, perp_vals, 'b') plt.show() return lda_model, models, perp_vals if plot: # The portion of the range that was actually iterated through plt.plot(n_topic_range, perp_vals, 'b') plt.show() return models[np.argmin(perp_vals)], models, perp_vals