Example #1
0
def main():
    global subTerms
    global polTerms
    global subjectSVMmodel
    global polarityBayesModel
    global showDetails
    
    subjectFilename = "../corpus/subject-tf-pos-words.csv"
    polarityFilename = "../corpus/polarity-tf-uni-bigrams.csv"

    filekeywords = ""
    keywords = []

    filestopwords = ""
    stopwords = []
    
    showDetails = False

    telnetServer = False
    telnetPort = 50000
    
    noPrompt = False

    opts, args = getopt.getopt(sys.argv[1:], "k:s:dtp:n")
    for o, a in opts:
        if o == "-k":
            filekeywords = a
        elif o == "-s":
            filestopwords = a
        elif o == "-d":
            showDetails = True
        elif o == "-t":
            telnetServer = True
        elif o == "-p":
            telnetPort = int(a)
        elif o == "-n":
            noPrompt = True

    ##
    # Reading keywords and stopwords file
    ##
    keywords = load.readStopwords(filekeywords)
    keywords = [clean.cleanText(w.lower()) for w in keywords]
    #print keywords

    stopwords = load.readStopwords(filestopwords)  
    stopwords.extend(keywords)


    ##
    # Setting server
    ##
    if telnetServer:
        host = ''
        port = telnetPort
        backlog = 5
        size = 1024
        server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        server.bind((host,port))
        server.listen(backlog)
    
        if noPrompt:
            input = [server]
        else:
            input = [sys.stdin, server] #this doesn't work when we load opinionapp.py in background

    print "Loading system... please wait until we work!"
    ##
    # Recovering CORPUS
    ##

    subTerms,sublabels,subvalues,subpresenceindocs,subsumcounters = load.readCSV(subjectFilename)
    
    polTerms,pollabels,polvalues,polpresenceindocs,polsumcounters = load.readCSV(polarityFilename)
        
    #load.writeCSV("subject-pos-words.csv", terms, sumcounters, labels, values)
    
    subjectLabel = sublabels
    subjectTrain = subvalues 

    polarityLabel = pollabels
    polarityTrain = polvalues
        
    print "Corpus loaded."

    if showDetails:
        nsamples = len(subvalues)   
        print "%d samples loaded for subjectivity." % (nsamples)

        nterms = len(subTerms)
        print "%d terms loaded subjectivity." % (nterms)

        nsamples = len(pollabels)   
        print "%d samples loaded for polarity." % (nsamples)

        nterms = len(polTerms)
        print "%d terms loaded for polarity." % (nterms)




    #TF FOR SUBJECTIVITY SVM GAUSSIANO POS+WORDS
    #TF FOR POLARITY BAYES  UNI+BIGRAMS

    subjectSVMparam = svm_parameter(svm_type = C_SVC, C=10, kernel_type = RBF)
    subjectSVMproblem = svm_problem(subjectLabel, subjectTrain)
    subjectSVMmodel = svm_model(subjectSVMproblem,subjectSVMparam)

#    subjectPrediction = subjectSVMmodel.predict(subjectTrain[0])
#    print subjectPrediction
    

    dictrain = [dict(zip(polTerms,freq)) for freq in polarityTrain]          
    train = [[dd, label] for dd,label in zip(dictrain, polarityLabel)]
    polarityBayesModel = NaiveBayesClassifier.train(train)
    
#    polarityPrediction = polarityBayesModel.classify(dictrain[0])     
#    print polarityPrediction
    
    
#        if doc['label'] in ['neg', 'not']:
#            l = -1
#        else:
#            l = +1


    
    print "Classifier trained."
        
#    classifier = train.train(typeclassificator, vectormodel);
#    print "Classifier trained."
#    classifier.show_most_informative_features()
    
    if telnetServer: 
        if noPrompt:
            print "Insert a tweet by telnet running 'telnet localhost %s'" % (telnetPort)        
        else:
            print "Insert a tweet here or by telnet (run 'telnet localhost %s'): " % (telnetPort)
    else:
        print "Insert a tweet: "
    
    running = 1
    while running:        
       if telnetServer: 
            inputready,outputready,exceptready = select.select(input,[],[])

            for s in inputready:

                if s == server:
                    # handle the server socket
                    client, address = server.accept()
                    input.append(client)

                elif s == sys.stdin:
                    # handle standard input

                    data = raw_input()

                    if data == ":QUIT":
                        running = 0
                    else:        
                        resp = detection(data)
                        print resp

                else:
                    # handle all other sockets
                    data = s.recv(size)
                    print data
                    if data == ":QUIT\r\n":
                        s.close()
                        input.remove(s)                    
                    elif data:
                        #s.send(data)
                        resp = detection(data)
                        print resp
                        s.send(resp)
                    else:
                        s.close()
                        input.remove(s)
            
       else:
            data = raw_input()
            if data == "QUIT":
                running = 0
            else:        
                resp = detection(data)
                print resp
    
    if telnetServer: 
        server.close()      
    print "Exit succesful."
Example #2
0
opts, args = getopt.getopt(sys.argv[1:], "d:k:c:t:e:")
for o, a in opts:
    if o == "-d":
        filename = a
    elif o == "-e":
        filename2 = a
    elif o == "-c":
        classifier = a
    elif o == "-k":
        Kfold = a
    elif o == "-t":
        kerneltype = a


terms,labels,values,presenceindocs,sumcounters = load.readCSV(filename)

if filename2 is not None:
    terms2,labels2,values2,presenceindocs2,sumcounters2 = load.readCSV(filename2)
    
    terms.extend(terms2)
    presenceindocs.extend(presenceindocs2)
    sumcounters.extend(sumcounters2)
    
    for i in range(len(values2)):
        values[i].extend(values2[i])
        
    filename += " + %s" % (filename2)


nsamples = len(values)