Python Word.insertsentenceid Examples

Programming Language: Python

Namespace/Package Name: Word

Class/Type: Word

Method/Function: insertsentenceid

Examples at hotexamples.com: 1

Python Word.insertsentenceid - 1 examples found. These are the top rated real world Python examples of Word.Word.insertsentenceid extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Word(30)

GetScore(3)

GetString(3)

GetTiles(3)

find_all(2)

setOb(2)

GetPrime(2)

__init__(2)

CountSyllables(1)

cleanse(1)

derive(1)

deserialize(1)

increaseUsage(1)

getGrammer(1)

get_id(1)

get_lang(1)

addToSpamOccur(1)

insertsentenceid(1)

replace(1)

clean(1)

addPostWord(1)

addToHamOccur(1)

addRelated(1)

addPreWord(1)

addAdjective(1)

add(1)

actual(1)

_translate(1)

_format(1)

__str__(1)

__repr__(1)

SetScore(1)

GetWordStress(1)

GetWord(1)

word_form(1)

Example #1

Show file

File: Reader.py Project: dicleoztur/subjectivity_detection

def buildcorpus(corpus, rootpath, filelimit = 0):
    
    #rootpath = corpus.rootpath
    fileids = os.listdir(rootpath)
    
    hugewordlist = []   
    hugewordlist.extend(corpus.words)   # will contain distinct Word instances

    numoffiles = 0
    
    corpus.set_corpusname(str(max(filelimit, len(fileids)))+"texts")
    
    for fileid in fileids:
    
        
        allwords = nltk.FreqDist()    # will contain all words in this text
        
        doc_id = fileid.split(".")[0]
        # corpus.inserttext(doc_id)    ##### !   text in kendisini gondermeli
        newtext = Text(doc_id)
        
        path = rootpath + os.sep + fileid
        #lines = readtextlines(path)
    
        #rawtext = texter.readtxtfile(path)
        rawtext = texter.readnewstext(path)
        lines = texter.splitToSentences(rawtext)
        
        sntindex = 0
        # each line is a sentence
        for line in lines:
            words = []   # words in this sentence
            words = line.split()
            words = texter.eliminatepunctuation(words)
            words = [word for word in words if not word.isspace()]
            
            
            
            for word in words:
                allwords.inc(word)
                
                
                newword = Word(word)
                newword.insertsentenceid(doc_id+"_"+str(sntindex))
                
                if allwords[word] <= 1:    # if this was not added to the hugelist before, add it
                    hugewordlist.append(newword)
                
                    
            sentence = Sentence(sntindex)
            sntindex = sntindex + 1
            
            # sentence'a Word mu wordindex mi atalim?
            for word in words:
                index = hugewordlist.index(Word(word))
                hugewordlist[index].insertsentenceid(doc_id+"_"+str(sntindex-1))
                sentence.insertword(index)
                
            newtext.insertsentence(sentence)
            
        if (not rawtext.isspace()) or (len(allwords) != 0):   
            corpus.inserttext(newtext)    
            
            print str(numoffiles)," : finished handling the words-snts-txts ",doc_id 
    
                
            numofwords = reduce(lambda x,y : x+y, allwords.values())
            
            for word in hugewordlist:
                cnt =  allwords[word.literal]
                #freq = cnt / float(numofwords)
                word.assigntermfreq(cnt, numofwords, doc_id)
                #hugewordlist[index].toscreen()
        
        numoffiles = numoffiles + 1
        if filelimit == numoffiles:
            break       

        
    # end for - docs
    

    numofdocs = len(fileids)
    print "computing tf*idf"
    for word in hugewordlist:
        word.computeinvdocfreq(numofdocs)
        word.computeTFIDF()
        #word.toscreen()
        
    corpus.assignwords(hugewordlist)
    print "corpus length ",str(len(corpus.words))," words"
    print "huges length ",str(len(hugewordlist))," words"
    print "exiting buildcorpus()"
    
    print "pickle-dumping words"
    corpus.pickledumpwords()