Python Document Examples

Programming Language: Python

Namespace/Package Name: Project

Class/Type: Document

Examples at hotexamples.com: 4

Python Document - 4 examples found. These are the top rated real world Python examples of Project.Document extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getSentenceOrginal(3)

getLDA(2)

getTopicAndScore(1)

parameterize(1)

setencesByFreqCloseness(1)

setencesByLDAFreqCloseness(1)

Example #1

Show file

File: ErrorTesting.py Project: jmaguire/CS221

    def compute(filename):
        gold_doc = Document(LDATester.PATH + filename + "_gold.txt")
        doc = Document(LDATester.PATH + filename + ".txt")

        ## Get random summary
        indices = [x for x in range(len(doc.sentences))]
        random.shuffle(indices)
        indices = indices[0 : len(gold_doc.sentences)]
        sentences = [doc.sentences[i] for i in indices]
        calibration = [doc.getSentenceOrginal(sentence) for sentence in sentences]
        calibration = " ".join(calibration)
        return BLEU.computeNormalize(gold_doc.document, calibration)

Example #2

Show file

File: kmeansTest.py Project: jmaguire/CS221

from Project import Document
# file = raw_input("file name ")
# doc = Document(file + '.txt')
doc = Document('economist1.txt')
# for i in range(len(doc.paragraphs)):
    # print doc.paragraphs[i]
# print doc.getParagraphLocation(doc.sentences[7])
# print doc.getWordFreqBins()
# print doc.getLengthBins()
for sentence in doc.sentences:
    doc.parameterize(sentence)

Example #3

Show file

File: test.py Project: jmaguire/CS221

from Project import Document
# filename = raw_input("file name ")
filename = 'newyorker1'
doc = Document(filename + '.txt')
# doc = Document('economist1.txt')


## Test Freq Distribution
print 'Frequency Test'
print
print 'freq of \'long\'', doc.freq_dist.freq('long')
print 'most common word', doc.freq_dist.max()
print 'num words', doc.freq_dist.N()


print 'Conditional Test previous'
print
## Test Conditional Frequency Distribution Previous
print 'most common word to follow Start', doc.cfdistPrev['Start'].max() ## most common word after Start
print 'most common word after',doc.freq_dist.max(),doc.cfdistPrev[doc.freq_dist.max()].max() ## most common word after long

print 'Conditional Test after'
print

## Test Conditional Frequency Distribution Next
print 'most common word to precede End', doc.cfdistNext['End'].max() ## most common word after Start
print 'most common word before',doc.freq_dist.max(),doc.cfdistNext[doc.freq_dist.max()].max() ## most common word after long

## get closest sentences to doc freq dist. WE WANT LDA DIST
sent = doc.setencesByFreqCloseness()
print '1', doc.getSentenceOrginal(sent[0])

Example #4

Show file

File: LDATest.py Project: jmaguire/CS221

from Project import Document
from collections import Counter
import numpy as np
# filename = raw_input("file name ")
filename = 'economist1'
doc = Document(filename + '.txt')

## Get key sentence
sent = doc.setencesByFreqCloseness()
maxSent = sent[0]
print doc.getSentenceOrginal(maxSent)



doc.getLDA(5)
topicAndScore = doc.getTopicAndScore()
maxTopic, maxScore = topicAndScore[maxSent]
print topicAndScore[maxSent]

sentByTopics = {}

for key in topicAndScore:
    
    value = topicAndScore[key]
    topic = value[0]
    if topic in sentByTopics:

        sentByTopics[topic] += [key]
    else:

        sentByTopics[topic] = [key]