def testDetect(self): texts = [ (u"The quick brown",'en'), (u"Le renard brun rapide saute par-dessus le chien paresseux",'fr'), (u"@Ja_Nina HERRLICH :) ich hab nix auf planeten gefunden..deine version klingt absolut logisch :D",'de'), (u"En Google somos plenamente conscientes de la confianza que los usuarios depositan ",'es'), (u"Noi di Google siamo perfettamente consapevoli della fiducia che riponi in noi e della ",'it'), (u'русский язык','ru'), (u'','other') ] ld = LangDetect(languages = supportedLangs) r1 = [] r2 = [] for text,lang in texts: res = ld.detect(text) r1.append(res[0]) r2.append(lang) assert r1==r2
def loadCls(): ThreadedTCPServer.langCls = LangDetect(supportedLangs) ThreadedTCPServer.moodCls = MoodDetect(MoodDetectTrainer())
import sys sys.path.append('../../') import socket import os from tracker.lib.moodClassifierClient import MoodClassifierTCPClient from tracker.lib.lang_detection import LangDetect from tracker.lib.supportedLangs import supportedLangs import cPickle import linecache #MCC = MoodClassifierTCPClient('srv1.cyhex.com',6666) MCC = MoodClassifierTCPClient('127.0.0.1', 6666) cls_data = {'nc': 1, 'pc': 1, 'n': 1, 'p': 1, 'n#': 1, 'p#': 1} langClassifier = LangDetect(supportedLangs) tweetsPFile = "/home/gx/Sites/SMM/trunk/tracker/data/tweets_positive_test.dat" tweetsNFile = "/home/gx/Sites/SMM/trunk/tracker/data/tweets_negative_test.dat" def stripSmiles(text): emos = [ ':)', ':-)', ';-)', ': )', ':d', '=)', ':p', ';)', '<3', ':(', ':-(', ': (' ] for item in emos: text = text.replace(item, "") return text