def get_president_sentences(president): files = sorted( [file for file in os.listdir() if president.lower() in file.lower()]) speeches = [read_file(file) for file in files] processed_speeches = process_speeches(speeches) all_sentences = merge_speeches(processed_speeches) return all_sentences
def get_presidents_sentences(presidents): all_sentences = list() for president in presidents: files = sorted([ file for file in os.listdir() if president.lower() in file.lower() ]) speeches = [read_file(file) for file in files] processed_speeches = process_speeches(speeches) all_prez_sentences = merge_speeches(processed_speeches) all_sentences.extend(all_prez_sentences) return all_sentences
import os import gensim import spacy from president_helper import read_file, process_speeches, merge_speeches, get_president_sentences, get_presidents_sentences, most_frequent_words from nltk.corpus import stopwords from collections import Counter stop_words = stopwords.words('english') # get list of all speech files files = sorted([file for file in os.listdir() if file[-4:] == '.txt']) # print(speeches) # read each speech file speeches = [read_file(file) for file in files] # print(process_speeches) # print(all_sentences) # preprocess each speech processed_speeches = process_speeches(speeches) # merge speeches all_sentences = merge_speeches(processed_speeches) # print(all_sentences) # view most frequently used words most_freq_words = most_frequent_words(all_sentences) # print(most_freq_words)
import os import gensim import spacy from president_helper import read_file, process_speeches, merge_speeches, get_president_sentences, get_presidents_sentences, most_frequent_words # get list of all speech files files = sorted([file for file in os.listdir() if file[-4:] == '.txt']) #print(files) # read each speech file speeches = list() for file in files: speeches.append(read_file(file)) # preprocess each speech processed_speeches = process_speeches(speeches) # merge speeches all_sentences = merge_speeches(processed_speeches) # view most frequently used words most_freq_words = most_frequent_words(all_sentences) #print(most_freq_words) # create gensim model of all speeches all_prez_embeddings = gensim.models.Word2Vec(all_sentences, size=96, window=5, min_count=1, workers=2, sg=1)
import os import gensim import spacy from president_helper import read_file, process_speeches, merge_speeches, get_president_sentences, \ get_presidents_sentences, most_frequent_words # get list of all speech files files = sorted([file for file in os.listdir() if file[-4:] == '.txt']) print(files) # read each speech file speeches = [read_file(item) for item in files] # print(speeches) # preprocess each speech processed_speeches = process_speeches(speeches) # merge speeches all_sentences = merge_speeches(processed_speeches) # print(all_sentences) # view most frequently used words most_freq_words = most_frequent_words(all_sentences) print(most_freq_words) # create gensim model of all speeches all_prez_embeddings = gensim.models.Word2Vec(all_sentences, size=96, window=5, min_count=1, workers=2,
import os import gensim import spacy from president_helper import read_file, process_speeches, merge_speeches, get_president_sentences, get_presidents_sentences, most_frequent_words # get list of all speech files files = sorted([file for file in os.listdir() if file[-4:] == '.txt']) #print(files) # read each speech file speeches = list() for txt in files: speeches.append(read_file(txt)) #print(speeches) # preprocess each speech processed_speeches = process_speeches(speeches) #print(processed_speeches) # merge speeches all_sentences = merge_speeches(processed_speeches) #print(all_sentences) # view most frequently used words most_freq_words = most_frequent_words(all_sentences) #print(most_freq_words) # create gensim model of all speeches all_prez_embeddings = gensim.models.Word2Vec(all_sentences, size=96, window=5,
import os import gensim import spacy from president_helper import read_file, process_speeches, merge_speeches, get_president_sentences, get_presidents_sentences, most_frequent_words # get list of all speech files files = sorted([file for file in os.listdir() if file[-4:] == '.txt']) # read each speech file speeches = [read_file(speech) for speech in files] # preprocess each speech processed_speeches = process_speeches(speeches) # merge speeches all_sentences = merge_speeches(processed_speeches) # view most frequently used words most_freq_words = most_frequent_words(all_sentences) #print(most_freq_words) # create gensim model of all speeches all_prez_embeddings = gensim.models.Word2Vec(all_sentences, size=96, window=5, min_count=1, workers=2, sg=1) # view words similar to freedom similar_to_freedom = all_prez_embeddings.wv.most_similar('freedom', topn=20)
import os import gensim import spacy from president_helper import read_file, process_speeches, merge_speeches, get_president_sentences, get_presidents_sentences, most_frequent_words # get list of all speech files files = sorted([file for file in os.listdir() if file[-4:] == '.txt']) #print(files) # read each speech file for fil in files: speeches = read_file(fil) #print(speeches) # preprocess each speech processed_speeches = process_speeches(speeches) # merge speeches all_sentences = merge_speeches(processed_speeches) # view most frequently used words most_freq_words = most_frequent_words(all_sentences) #print(most_freq_words) # create gensim model of all speeches all_prez_embeddings = gensim.models.Word2Vec(all_sentences, size=96, window=5, min_count=1, workers=2, sg=1)