Ejemplo n.º 1
0
def summaryGenerator(class_name, tweets, folder_mode, ranker):
    tweets=tweetCleaner(tweets) #Some cleaning
    #print "Set of Tweets=>", len(tweets)
    #tweetlist=[tweet for tweet in tweets]
    #print "List of tweets", tweetlist
    genSentences=wg.retrieveNewSentences(tweets, stopwords)
    wordScores=createDict(mainDatafolder+"/"+folder_mode['Extract']+"/"+class_name+"/"+class_name+"_weight.txt")

    #emptysentences=[sent for sent in genSentences if len(sent.strip())==0]
    #print "EMPTY::::", len(emptysentences)
    '''
    This is where the ILP works to select the best sentences and form the summary
    '''
    finalSentencesRetained=wg.solveILP(genSentences,wordScores,
                                            lm, 
                                            stopwords, 
                                            ranker,
                                            intraGenSimThreshold=0.25, 
                                            l_max=200
                                            )
    
    return finalSentencesRetained
Ejemplo n.º 2
0
def summaryGenerator(class_name, tweets, folder_mode, ranker):
    tweets = tweetCleaner(tweets)  #Some cleaning
    #print "Set of Tweets=>", len(tweets)
    #tweetlist=[tweet for tweet in tweets]
    #print "List of tweets", tweetlist
    genSentences = wg.retrieveNewSentences(tweets, stopwords)
    wordScores = createDict(mainDatafolder + "/" + folder_mode['Extract'] +
                            "/" + class_name + "/" + class_name +
                            "_weight.txt")

    #emptysentences=[sent for sent in genSentences if len(sent.strip())==0]
    #print "EMPTY::::", len(emptysentences)
    '''
    This is where the ILP works to select the best sentences and form the summary
    '''
    finalSentencesRetained = wg.solveILP(genSentences,
                                         wordScores,
                                         lm,
                                         stopwords,
                                         ranker,
                                         intraGenSimThreshold=0.25,
                                         l_max=200)

    return finalSentencesRetained
Ejemplo n.º 3
0
                #for text in v:
                origSentences.append(text)
                text=re.sub(p, '', text)
                text=text.strip().split('./PUNCT')
                
                for r in text:
                    if len(r.strip())!=0:
                        tweets.append(r.strip()+' ./PUNCT')
            tweets=set(tweets)
            
            #print len(tweets), tweets
            genSentences=WGGraph.retrieveNewSentences(tweets, english_postagger, stopwords)
            gengroupList.append(genSentences)
            print "Done with ", clustNum
        print gengroupList
        print 'Num of clusters', len(gengroupList)    
        finalSentencesRetained=WGGraph.solveILP(gengroupList,lm, stopwords, origSentences, intraGenSimThreshold=0.2, l_max=10)
 
        txtSummary=""
        for sent in finalSentencesRetained:
            sent=sentenceCapitalize(sent)
                
            txtSummary=txtSummary+"\n"+sent
    #print 'Number of tweets', len(tweets) , tweets 
        txtSummary=txtSummary.strip()
        writeAbstracts.write("==========="+eventfile+"~"+str(i)+":========\n"+txtSummary+"\n\n")
    writeAbstracts.close()   



Ejemplo n.º 4
0
                for r in text:
                    if len(r.strip()) != 0:
                        tweets.append(r.strip() + ' ./PUNCT')
            tweets = set(tweets)

            #print len(tweets), tweets
            genSentences = WGGraph.retrieveNewSentences(
                tweets, english_postagger, stopwords)
            gengroupList.append(genSentences)
            print "Done with ", clustNum
        print gengroupList
        print 'Num of clusters', len(gengroupList)
        finalSentencesRetained = WGGraph.solveILP(gengroupList,
                                                  lm,
                                                  stopwords,
                                                  origSentences,
                                                  intraGenSimThreshold=0.2,
                                                  l_max=10)

        txtSummary = ""
        for sent in finalSentencesRetained:
            sent = sentenceCapitalize(sent)

            txtSummary = txtSummary + "\n" + sent
    #print 'Number of tweets', len(tweets) , tweets
        txtSummary = txtSummary.strip()
        writeAbstracts.write("===========" + eventfile + "~" + str(i) +
                             ":========\n" + txtSummary + "\n\n")
    writeAbstracts.close()