def main(): global subTerms global polTerms global subjectSVMmodel global polarityBayesModel global showDetails subjectFilename = "../corpus/subject-tf-pos-words.csv" polarityFilename = "../corpus/polarity-tf-uni-bigrams.csv" filekeywords = "" keywords = [] filestopwords = "" stopwords = [] showDetails = False telnetServer = False telnetPort = 50000 noPrompt = False opts, args = getopt.getopt(sys.argv[1:], "k:s:dtp:n") for o, a in opts: if o == "-k": filekeywords = a elif o == "-s": filestopwords = a elif o == "-d": showDetails = True elif o == "-t": telnetServer = True elif o == "-p": telnetPort = int(a) elif o == "-n": noPrompt = True ## # Reading keywords and stopwords file ## keywords = load.readStopwords(filekeywords) keywords = [clean.cleanText(w.lower()) for w in keywords] #print keywords stopwords = load.readStopwords(filestopwords) stopwords.extend(keywords) ## # Setting server ## if telnetServer: host = '' port = telnetPort backlog = 5 size = 1024 server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) server.bind((host,port)) server.listen(backlog) if noPrompt: input = [server] else: input = [sys.stdin, server] #this doesn't work when we load opinionapp.py in background print "Loading system... please wait until we work!" ## # Recovering CORPUS ## subTerms,sublabels,subvalues,subpresenceindocs,subsumcounters = load.readCSV(subjectFilename) polTerms,pollabels,polvalues,polpresenceindocs,polsumcounters = load.readCSV(polarityFilename) #load.writeCSV("subject-pos-words.csv", terms, sumcounters, labels, values) subjectLabel = sublabels subjectTrain = subvalues polarityLabel = pollabels polarityTrain = polvalues print "Corpus loaded." if showDetails: nsamples = len(subvalues) print "%d samples loaded for subjectivity." % (nsamples) nterms = len(subTerms) print "%d terms loaded subjectivity." % (nterms) nsamples = len(pollabels) print "%d samples loaded for polarity." % (nsamples) nterms = len(polTerms) print "%d terms loaded for polarity." % (nterms) #TF FOR SUBJECTIVITY SVM GAUSSIANO POS+WORDS #TF FOR POLARITY BAYES UNI+BIGRAMS subjectSVMparam = svm_parameter(svm_type = C_SVC, C=10, kernel_type = RBF) subjectSVMproblem = svm_problem(subjectLabel, subjectTrain) subjectSVMmodel = svm_model(subjectSVMproblem,subjectSVMparam) # subjectPrediction = subjectSVMmodel.predict(subjectTrain[0]) # print subjectPrediction dictrain = [dict(zip(polTerms,freq)) for freq in polarityTrain] train = [[dd, label] for dd,label in zip(dictrain, polarityLabel)] polarityBayesModel = NaiveBayesClassifier.train(train) # polarityPrediction = polarityBayesModel.classify(dictrain[0]) # print polarityPrediction # if doc['label'] in ['neg', 'not']: # l = -1 # else: # l = +1 print "Classifier trained." # classifier = train.train(typeclassificator, vectormodel); # print "Classifier trained." # classifier.show_most_informative_features() if telnetServer: if noPrompt: print "Insert a tweet by telnet running 'telnet localhost %s'" % (telnetPort) else: print "Insert a tweet here or by telnet (run 'telnet localhost %s'): " % (telnetPort) else: print "Insert a tweet: " running = 1 while running: if telnetServer: inputready,outputready,exceptready = select.select(input,[],[]) for s in inputready: if s == server: # handle the server socket client, address = server.accept() input.append(client) elif s == sys.stdin: # handle standard input data = raw_input() if data == ":QUIT": running = 0 else: resp = detection(data) print resp else: # handle all other sockets data = s.recv(size) print data if data == ":QUIT\r\n": s.close() input.remove(s) elif data: #s.send(data) resp = detection(data) print resp s.send(resp) else: s.close() input.remove(s) else: data = raw_input() if data == "QUIT": running = 0 else: resp = detection(data) print resp if telnetServer: server.close() print "Exit succesful."
opts, args = getopt.getopt(sys.argv[1:], "d:k:c:t:e:") for o, a in opts: if o == "-d": filename = a elif o == "-e": filename2 = a elif o == "-c": classifier = a elif o == "-k": Kfold = a elif o == "-t": kerneltype = a terms,labels,values,presenceindocs,sumcounters = load.readCSV(filename) if filename2 is not None: terms2,labels2,values2,presenceindocs2,sumcounters2 = load.readCSV(filename2) terms.extend(terms2) presenceindocs.extend(presenceindocs2) sumcounters.extend(sumcounters2) for i in range(len(values2)): values[i].extend(values2[i]) filename += " + %s" % (filename2) nsamples = len(values)