Esempio n. 1
0
    def _messageCB(self, con, msg):
        """Called when a message is recieved"""
        if not msg.getBody(): # Dont process blank messages
            return
        # Jabber IDs contain a slash between the server name and the
        # resource name.  This presents a problem, since Alice uses
        # the session name as the filename for the log/session files.
        # So the session name for Jabber sessions is just
        # "username@server".
        source = str(msg.getFrom()).split("/")[0]
        # In some case, message body is Unicode, and some bot doesn't like
        # Unicode.  Convert it to ASCII with str() before submitting.
        #response = self.submit(str(msg.getBody()), source+"@JABBER")
   
        try:
            # for Thai, it need word segmentation
            cmd = wordseg( msg.getBody() )
            # get the answer
            response = self._bot.reply(cmd).encode("utf-8")
            # learn something new
            self._bot.learn(cmd)
        except UnicodeEncodeError:
            response = "Unicode Encoding Error!"

        time.sleep( random.random() * self._maxdelay )
        self.display(response, source)
Esempio n. 2
0
def test_wordseg():
    for hashtag in hdata:
        wordseg(hashtag)
Esempio n. 3
0
def test_wordseg():
    for hashtag in hdata:
        wordseg(hashtag)


def test_wordsegment():
    for hashtag in hdata:
        segment(hashtag)


hdata = open('hashtags.txt').read().split()

n = 100

print("==wordseg==\t\t==wordsegment==")

for hashtag in hdata:
    print("[" + " ".join(wordseg(hashtag)[0]) + "]\t\t[" +
          " ".join(segment(hashtag)) + "]")

print("==wordseg==\t\t==wordsegment==")

print(
    str(
        timeit.timeit("test_wordseg()",
                      number=n,
                      setup="from __main__ import test_wordseg")) + "\t\t" +
    str(
        timeit.timeit("test_wordsegment()",
                      number=n,
                      setup="from __main__ import test_wordsegment")))
Esempio n. 4
0
File: t.py Progetto: wlmqgzm/wordseg
#!/opt/local/bin/python2.7
# -*- coding: utf-8 -*-
import sys
sys.path+=['.']
import wordseg
s=wordseg.wordseg()
s.load_dict("data/dict.txt")
for l in open("data/p.txt"):
	l=l.strip()
	if len(l)>0:
		b=s.segment(l,1,0)
		print l, " => ",
		for i in b:
			print i,
		print