Beispiel #1
0
def get_president_sentences(president):
    files = sorted(
        [file for file in os.listdir() if president.lower() in file.lower()])
    speeches = [read_file(file) for file in files]
    processed_speeches = process_speeches(speeches)
    all_sentences = merge_speeches(processed_speeches)
    return all_sentences
Beispiel #2
0
def get_presidents_sentences(presidents):
    all_sentences = list()
    for president in presidents:
        files = sorted([
            file for file in os.listdir() if president.lower() in file.lower()
        ])
        speeches = [read_file(file) for file in files]
        processed_speeches = process_speeches(speeches)
        all_prez_sentences = merge_speeches(processed_speeches)
        all_sentences.extend(all_prez_sentences)
    return all_sentences
Beispiel #3
0
files = sorted([file for file in os.listdir() if file[-4:] == '.txt'])

# print(speeches)
# read each speech file
speeches = [read_file(file) for file in files]

# print(process_speeches)

# print(all_sentences)

# preprocess each speech

processed_speeches = process_speeches(speeches)

# merge speeches
all_sentences = merge_speeches(processed_speeches)

# print(all_sentences)
# view most frequently used words
most_freq_words = most_frequent_words(all_sentences)
# print(most_freq_words)

# create gensim model of all speeches

all_prez_embeddings = gensim.models.Word2Vec(all_sentences,
                                             size=96,
                                             window=5,
                                             min_count=1,
                                             workers=2,
                                             sg=1)
# view words similar to freedom