Beispiel #1
0
 def testDetect(self):
     
     texts = [
          (u"The quick brown",'en'),
          (u"Le renard brun rapide saute par-dessus le chien paresseux",'fr'),
          (u"@Ja_Nina HERRLICH :) ich hab nix auf planeten gefunden..deine version klingt absolut logisch :D",'de'),
          (u"En Google somos plenamente conscientes de la confianza que los usuarios depositan ",'es'),
          (u"Noi di Google siamo perfettamente consapevoli della fiducia che riponi in noi e della ",'it'),
          (u'русский язык','ru'),
          (u'','other')
     ]
     
     ld = LangDetect(languages = supportedLangs)
     r1 = []
     r2 = []
     for text,lang in texts:
         res =  ld.detect(text)
         r1.append(res[0])
         r2.append(lang)
     assert r1==r2
 def loadCls():
     ThreadedTCPServer.langCls = LangDetect(supportedLangs)
     ThreadedTCPServer.moodCls = MoodDetect(MoodDetectTrainer())
Beispiel #3
0
import sys
sys.path.append('../../')
import socket
import os
from tracker.lib.moodClassifierClient import MoodClassifierTCPClient
from tracker.lib.lang_detection import LangDetect
from tracker.lib.supportedLangs import supportedLangs
import cPickle
import linecache

#MCC = MoodClassifierTCPClient('srv1.cyhex.com',6666)
MCC = MoodClassifierTCPClient('127.0.0.1', 6666)

cls_data = {'nc': 1, 'pc': 1, 'n': 1, 'p': 1, 'n#': 1, 'p#': 1}

langClassifier = LangDetect(supportedLangs)

tweetsPFile = "/home/gx/Sites/SMM/trunk/tracker/data/tweets_positive_test.dat"
tweetsNFile = "/home/gx/Sites/SMM/trunk/tracker/data/tweets_negative_test.dat"


def stripSmiles(text):
    emos = [
        ':)', ':-)', ';-)', ': )', ':d', '=)', ':p', ';)', '<3', ':(', ':-(',
        ': ('
    ]

    for item in emos:
        text = text.replace(item, "")
    return text