forked from dpalinsk/CFS-Summarizer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
129 lines (103 loc) · 3.89 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# a driver for our summarizer.
# authors: Kirsten Vail, Derek Palinski, Lee Schumann
from nltk.corpus import reuters
from summarizer import Summarizer
import random
import paragraphs
def main():
# create our summarizer
cfs = Summarizer()
getSummaries(cfs, 2)
def getSummaries(cfs, number_of_summaries):
for n in range(number_of_summaries):
# get a random article from the corpus
article = random.choice(reuters.fileids())
# make sure the article is of the apropriate length
# I decided at least 5 sentences!
while len(reuters.sents(article)) < 5:
article = random.choice(reuters.fileids())
length = len(reuters.sents(article))//2
summary_sentences = cfs.summarize(article, length)
print_summary(summary_sentences)
'''
A function which generates both selections of sentences and summaries and prints them into a document.
Also creates a key.
'''
def filesForEval(cfs):
#the number of sentences in each summary to be generated
length = 5
#the number of summaries to be written into the file
number_of_summaries = 10
#create the file containing the summaries
summFile = open('random_order_summaries_2.txt', 'w')
#create the file which will be the key
keyFile = open('key_2.txt', 'w')
#generate the appropriate number of summaries
for n in range(number_of_summaries):
#get a random article from the corpus
article = random.choice(reuters.fileids())
#make sure the article is of the apropriate length
# I decided at least twice the length of the summary in this case.
while len(reuters.sents(article)) < length*4:
article = random.choice(reuters.fileids())
#print info about the article into the summary-containing document
summFile.write('Article #' + str(n) + '\n')
summFile.write('\nfileid: ' + article + '\n\n')
#print info about the article into the key document
keyFile.write('Article #' + str(n) + '\n')
keyFile.write('fileid: ' + article + '\n\n')
#get a list of sentences that is the summary generated by our algorithm
summSents = cfs.summarize(article, length)
#insert a marker to make sure we remember it is the summary
summSents.insert(0, 'summ')
#get a list of sentences that were randomly ordered
randSents = getRandom(article, length)
#insert a marker to make sure we remember it is the random selection
randSents.insert(0, 'rand')
#mix up the ordering
summs = [summSents, randSents]
random.shuffle(summs)
#Write the summaries into the file and write the key, in a semi-nice format
for summ in summs:
for i, sentence in enumerate(summ):
if i == 0:
keyFile.write(sentence)
else:
for word in sentence:
summFile.write(word + ' ')
summFile.write('\n')
summFile.write('\n')
keyFile.write(' ')
summFile.write('\n')
keyFile.write('\n')
summFile.close()
keyFile.close()
'''
A function which returns a random "imitation summary"
'''
def getRandom(article, length):
allSents = list(enumerate(list(reuters.sents(article))))
print(len(allSents))
sentences = []
sentences.append(allSents[0])
allSents.__delitem__(0)
for n in range(length):
sent = random.choice(allSents)
sentences.append(sent)
allSents.remove(sent)
sentences.sort()
sentList = []
for sent in sentences:
sentence = sent[1]
sentList.append(sentence)
return sentList
'''
Print the summaries given as a 3D list ( Summaries[Sentences[Words]] )
'''
def print_summary(summary):
for sentence in summary:
for word in sentence:
print(word, end=' ')
print()
print()
main()