def get_president_sentences(president): files = sorted( [file for file in os.listdir() if president.lower() in file.lower()]) speeches = [read_file(file) for file in files] processed_speeches = process_speeches(speeches) all_sentences = merge_speeches(processed_speeches) return all_sentences
def get_presidents_sentences(presidents): all_sentences = list() for president in presidents: files = sorted([ file for file in os.listdir() if president.lower() in file.lower() ]) speeches = [read_file(file) for file in files] processed_speeches = process_speeches(speeches) all_prez_sentences = merge_speeches(processed_speeches) all_sentences.extend(all_prez_sentences) return all_sentences
stop_words = stopwords.words('english') # get list of all speech files files = sorted([file for file in os.listdir() if file[-4:] == '.txt']) # print(speeches) # read each speech file speeches = [read_file(file) for file in files] # print(process_speeches) # print(all_sentences) # preprocess each speech processed_speeches = process_speeches(speeches) # merge speeches all_sentences = merge_speeches(processed_speeches) # print(all_sentences) # view most frequently used words most_freq_words = most_frequent_words(all_sentences) # print(most_freq_words) # create gensim model of all speeches all_prez_embeddings = gensim.models.Word2Vec(all_sentences, size=96, window=5, min_count=1,