コード例 #1
0
def get_president_sentences(president):
    files = sorted(
        [file for file in os.listdir() if president.lower() in file.lower()])
    speeches = [read_file(file) for file in files]
    processed_speeches = process_speeches(speeches)
    all_sentences = merge_speeches(processed_speeches)
    return all_sentences
コード例 #2
0
def get_presidents_sentences(presidents):
    all_sentences = list()
    for president in presidents:
        files = sorted([
            file for file in os.listdir() if president.lower() in file.lower()
        ])
        speeches = [read_file(file) for file in files]
        processed_speeches = process_speeches(speeches)
        all_prez_sentences = merge_speeches(processed_speeches)
        all_sentences.extend(all_prez_sentences)
    return all_sentences
コード例 #3
0
import os
import gensim
import spacy
from president_helper import read_file, process_speeches, merge_speeches, get_president_sentences, get_presidents_sentences, most_frequent_words
from nltk.corpus import stopwords
from collections import Counter

stop_words = stopwords.words('english')
# get list of all speech files

files = sorted([file for file in os.listdir() if file[-4:] == '.txt'])

# print(speeches)
# read each speech file
speeches = [read_file(file) for file in files]

# print(process_speeches)

# print(all_sentences)

# preprocess each speech

processed_speeches = process_speeches(speeches)

# merge speeches
all_sentences = merge_speeches(processed_speeches)

# print(all_sentences)
# view most frequently used words
most_freq_words = most_frequent_words(all_sentences)
# print(most_freq_words)
コード例 #4
0
import os
import gensim
import spacy
from president_helper import read_file, process_speeches, merge_speeches, get_president_sentences, get_presidents_sentences, most_frequent_words

# get list of all speech files
files = sorted([file for file in os.listdir() if file[-4:] == '.txt'])
#print(files)

# read each speech file
speeches = list()
for file in files:
    speeches.append(read_file(file))

# preprocess each speech
processed_speeches = process_speeches(speeches)

# merge speeches
all_sentences = merge_speeches(processed_speeches)

# view most frequently used words
most_freq_words = most_frequent_words(all_sentences)
#print(most_freq_words)

# create gensim model of all speeches
all_prez_embeddings = gensim.models.Word2Vec(all_sentences,
                                             size=96,
                                             window=5,
                                             min_count=1,
                                             workers=2,
                                             sg=1)
import os
import gensim
import spacy
from president_helper import read_file, process_speeches, merge_speeches, get_president_sentences, \
    get_presidents_sentences, most_frequent_words

# get list of all speech files
files = sorted([file for file in os.listdir() if file[-4:] == '.txt'])
print(files)

# read each speech file
speeches = [read_file(item) for item in files]
# print(speeches)

# preprocess each speech
processed_speeches = process_speeches(speeches)

# merge speeches
all_sentences = merge_speeches(processed_speeches)
# print(all_sentences)

# view most frequently used words
most_freq_words = most_frequent_words(all_sentences)
print(most_freq_words)

# create gensim model of all speeches
all_prez_embeddings = gensim.models.Word2Vec(all_sentences,
                                             size=96,
                                             window=5,
                                             min_count=1,
                                             workers=2,
コード例 #6
0
import os
import gensim
import spacy
from president_helper import read_file, process_speeches, merge_speeches, get_president_sentences, get_presidents_sentences, most_frequent_words

# get list of all speech files
files = sorted([file for file in os.listdir() if file[-4:] == '.txt'])

#print(files)
# read each speech file
speeches = list()
for txt in files:
    speeches.append(read_file(txt))

#print(speeches)
# preprocess each speech
processed_speeches = process_speeches(speeches)
#print(processed_speeches)

# merge speeches
all_sentences = merge_speeches(processed_speeches)
#print(all_sentences)

# view most frequently used words
most_freq_words = most_frequent_words(all_sentences)
#print(most_freq_words)

# create gensim model of all speeches
all_prez_embeddings = gensim.models.Word2Vec(all_sentences,
                                             size=96,
                                             window=5,
コード例 #7
0
import os
import gensim
import spacy
from president_helper import read_file, process_speeches, merge_speeches, get_president_sentences, get_presidents_sentences, most_frequent_words

# get list of all speech files
files = sorted([file for file in os.listdir() if file[-4:] == '.txt'])

# read each speech file
speeches = [read_file(speech) for speech in files]

# preprocess each speech
processed_speeches = process_speeches(speeches)

# merge speeches
all_sentences = merge_speeches(processed_speeches)

# view most frequently used words
most_freq_words = most_frequent_words(all_sentences)
#print(most_freq_words)

# create gensim model of all speeches
all_prez_embeddings = gensim.models.Word2Vec(all_sentences,
                                             size=96,
                                             window=5,
                                             min_count=1,
                                             workers=2,
                                             sg=1)

# view words similar to freedom
similar_to_freedom = all_prez_embeddings.wv.most_similar('freedom', topn=20)
コード例 #8
0
import os
import gensim
import spacy
from president_helper import read_file, process_speeches, merge_speeches, get_president_sentences, get_presidents_sentences, most_frequent_words

# get list of all speech files
files = sorted([file for file in os.listdir() if file[-4:] == '.txt'])
#print(files)

# read each speech file
for fil in files:
    speeches = read_file(fil)
#print(speeches)

# preprocess each speech
processed_speeches = process_speeches(speeches)

# merge speeches
all_sentences = merge_speeches(processed_speeches)

# view most frequently used words
most_freq_words = most_frequent_words(all_sentences)
#print(most_freq_words)

# create gensim model of all speeches
all_prez_embeddings = gensim.models.Word2Vec(all_sentences,
                                             size=96,
                                             window=5,
                                             min_count=1,
                                             workers=2,
                                             sg=1)