print '5', doc.getSentenceOrginal(sent[4]) with open(str(filename) + '_output.txt', 'w') as file: for i in [0,1,2,3,4]: file.write(doc.getSentenceOrginal(sent[i]) + ' ') print ' ' ## LDA from collections import Counter ## get MAP sentences by lda topic i popular = Counter() for n in range(1): print n doc.getLDA(6) for i in range(doc.topics): sentences = doc.setencesByLDAFreqCloseness(i) popular.update(Counter([doc.getSentenceOrginal(sentence) for sentence in sentences[0:1]])) # for j in range(3): # sentence = doc.getSentenceOrginal(sentences[j]) # if sentence in popular_sentences: # popular_sentences[sentence] += 1 # else: # popular_sentences[sentence] = 1 popular = [elem[0] for elem in popular.most_common(6)] # popular = [sent for sent in set(popular_sentences)] # popular = sorted(popular_sentences, key=popular_sentences.get, reverse = True) for sent in popular: print sent
from Project import Document from collections import Counter import numpy as np # filename = raw_input("file name ") filename = 'economist1' doc = Document(filename + '.txt') ## Get key sentence sent = doc.setencesByFreqCloseness() maxSent = sent[0] print doc.getSentenceOrginal(maxSent) doc.getLDA(5) topicAndScore = doc.getTopicAndScore() maxTopic, maxScore = topicAndScore[maxSent] print topicAndScore[maxSent] sentByTopics = {} for key in topicAndScore: value = topicAndScore[key] topic = value[0] if topic in sentByTopics: sentByTopics[topic] += [key] else: sentByTopics[topic] = [key]