コード例 #1
0
def predict(client):
    # predict the last shogun
    data = [
        datum([('name', u'慶喜')], []),
        datum([('name', u'義昭')], []),
        datum([('name', u'守時')], []),
    ]
    for d in data:
        res = client.classify(name, [d])
        # get the predicted shogun name
        print(max(res[0], key=lambda x: x.score).label, d.string_values[0][1])
コード例 #2
0
ファイル: shogun.py プロジェクト: Epictetus/jubatus-example
def predict(client):
    # predict the last shogun
    data = [
        datum([('name', u'慶喜')], []),
        datum([('name', u'義昭')], []),
        datum([('name', u'守時')], []),
        ]
    for d in data:
        res = client.classify(name, [d])
        # get the predicted shogun name
        print max(res[0], key = lambda x: x.score).label, d.string_values[0][1]
コード例 #3
0
ファイル: train.py プロジェクト: Epictetus/jubatus-example
    def on_status(self, status):
        if not hasattr(status, 'text'):
            return
        if not hasattr(status, 'coordinates'):
            return
        if not status.coordinates or not 'coordinates' in status.coordinates:
            return

        loc = None
        for l in self.locations:
            coordinates = status.coordinates['coordinates']
            if l.is_inside(coordinates[0], coordinates[1]):
                loc = l
                break
        if not loc:
            # Unknown location
            return
        hashtags = status.entities['hashtags']
        detagged_text = remove_hashtags_from_tweet(status.text, hashtags)

        # Create datum for Jubatus
        d = types.datum([], [])
        d.string_values = [('text', detagged_text)]

        # Send training data to Jubatus
        self.classifier.train(instance_name, [(loc.name, d)])

        # Print trained tweet
        print_green(loc.name, ' ')
        print detagged_text
コード例 #4
0
    def on_status(self, status):
        if not hasattr(status, 'text'):
            return
        if not hasattr(status, 'coordinates'):
            return
        if not status.coordinates or not 'coordinates' in status.coordinates:
            return

        loc = None
        for l in self.locations:
            coordinates = status.coordinates['coordinates']
            if l.is_inside(coordinates[0], coordinates[1]):
                loc = l
                break
        if not loc:
            # Unknown location
            return
        hashtags = status.entities['hashtags']
        detagged_text = remove_hashtags_from_tweet(status.text, hashtags)

        # Create datum for Jubatus
        d = types.datum([], [])
        d.string_values = [('text', detagged_text)]

        # Send training data to Jubatus
        self.classifier.train(instance_name, [(loc.name, d)])

        # Print trained tweet
        print_green(loc.name, ' ')
        print detagged_text
コード例 #5
0
    def characters(self, content):
        if not self.read:
            return

        d = types.datum([], [])
        d.string_values = [
            ['text', content],
        ]
        self.classifier.train(instance_name, [[self.label, d]])
        self.count += 1
        if (self.count % 1000 == 0):
            print "Training(%s): %d ..." % (self.label, self.count)
コード例 #6
0
    def characters(self, content):
        if not self.read:
            return

        d = types.datum([], [])
        d.string_values = [
            ['text', content],
        ]
        self.classifier.train(instance_name, [[self.label, d]])
        self.count += 1
        if (self.count % 1000 == 0):
            print "Training(%s): %d ..." % (self.label, self.count)
コード例 #7
0
    def on_status(self, status):
        if not hasattr(status, 'text'):
            return

        d = types.datum([], []);
        d.string_values = [
            ['text', status.text],
        ]
        result = self.classifier.classify(instance_name, [d])

        if len(result) > 0 and len(result[0]) > 0:
            # sort the result in order of score
            est = sorted(result[0], key=lambda est: est.score, reverse=True)

            print_green(est[0].label, end=" ")
            if est[0].label == self.highlight:
                print_red(status.text)
            else:
                print(status.text)
コード例 #8
0
    def on_status(self, status):
        if not hasattr(status, 'text'):
            return

        d = types.datum([], [])
        d.string_values = [
            ['text', status.text],
        ]
        result = self.classifier.classify(instance_name, [d])

        if len(result) > 0 and len(result[0]) > 0:
            # sort the result in order of score
            est = sorted(result[0], key=lambda est: est.score, reverse=True)

            print_green(est[0].label, end=" ")
            if est[0].label == self.highlight:
                print_red(status.text)
            else:
                print(status.text)
コード例 #9
0
def estimate_location_for(text):
    classifier = client.classifier(host, port)

    # Create datum for Jubatus
    d = types.datum([], [])
    d.string_values = [('text', text)]

    # Send estimation query to Jubatus
    result = classifier.classify(instance_name, [d])

    if len(result[0]) > 0:
        # Sort results by score
        est = sorted(result[0], key=lambda e: e.score, reverse=True)

        # Print the result
        print "Estimated Location for %s:" % text
        for e in est:
            print "  " + e.label + " (" + str(e.score) + ")"
    else:
        # No estimation results; maybe we haven't trained enough
        print "No estimation results available."
        print "Train more tweets or try using another text."
コード例 #10
0
ファイル: classify.py プロジェクト: Epictetus/jubatus-example
def estimate_location_for(text):
    classifier = client.classifier(host, port)

    # Create datum for Jubatus
    d = types.datum([], [])
    d.string_values = [('text', text)]

    # Send estimation query to Jubatus
    result = classifier.classify(instance_name, [d])

    if len(result[0]) > 0:
        # Sort results by score
        est = sorted(result[0], key=lambda e: e.score, reverse=True)

        # Print the result
        print "Estimated Location for %s:" % text
        for e in est:
            print "  " + e.label + " (" + str(e.score) + ")"
    else:
        # No estimation results; maybe we haven't trained enough
        print "No estimation results available."
        print "Train more tweets or try using another text."
コード例 #11
0
def train(client):
    # prepare training data
    # predict the last ones (that are commented out)
    train_data = [
        (u'徳川', datum([('name', u'家康')], [])),
        (u'徳川', datum([('name', u'秀忠')], [])),
        (u'徳川', datum([('name', u'家光')], [])),
        (u'徳川', datum([('name', u'家綱')], [])),
        (u'徳川', datum([('name', u'綱吉')], [])),
        (u'徳川', datum([('name', u'家宣')], [])),
        (u'徳川', datum([('name', u'家継')], [])),
        (u'徳川', datum([('name', u'吉宗')], [])),
        (u'徳川', datum([('name', u'家重')], [])),
        (u'徳川', datum([('name', u'家治')], [])),
        (u'徳川', datum([('name', u'家斉')], [])),
        (u'徳川', datum([('name', u'家慶')], [])),
        (u'徳川', datum([('name', u'家定')], [])),
        (u'徳川', datum([('name', u'家茂')], [])),
        # (u'徳川', datum([('name', u'慶喜')], [])),
        (u'足利', datum([('name', u'尊氏')], [])),
        (u'足利', datum([('name', u'義詮')], [])),
        (u'足利', datum([('name', u'義満')], [])),
        (u'足利', datum([('name', u'義持')], [])),
        (u'足利', datum([('name', u'義量')], [])),
        (u'足利', datum([('name', u'義教')], [])),
        (u'足利', datum([('name', u'義勝')], [])),
        (u'足利', datum([('name', u'義政')], [])),
        (u'足利', datum([('name', u'義尚')], [])),
        (u'足利', datum([('name', u'義稙')], [])),
        (u'足利', datum([('name', u'義澄')], [])),
        (u'足利', datum([('name', u'義稙')], [])),
        (u'足利', datum([('name', u'義晴')], [])),
        (u'足利', datum([('name', u'義輝')], [])),
        (u'足利', datum([('name', u'義栄')], [])),
        # (u'足利', datum([('name', u'義昭')], [])),
        (u'北条', datum([('name', u'時政')], [])),
        (u'北条', datum([('name', u'義時')], [])),
        (u'北条', datum([('name', u'泰時')], [])),
        (u'北条', datum([('name', u'経時')], [])),
        (u'北条', datum([('name', u'時頼')], [])),
        (u'北条', datum([('name', u'長時')], [])),
        (u'北条', datum([('name', u'政村')], [])),
        (u'北条', datum([('name', u'時宗')], [])),
        (u'北条', datum([('name', u'貞時')], [])),
        (u'北条', datum([('name', u'師時')], [])),
        (u'北条', datum([('name', u'宗宣')], [])),
        (u'北条', datum([('name', u'煕時')], [])),
        (u'北条', datum([('name', u'基時')], [])),
        (u'北条', datum([('name', u'高時')], [])),
        (u'北条', datum([('name', u'貞顕')], [])),
        # (u'北条', datum([('name', u'守時')], [])),
    ]

    # training data must be shuffled on online learning!
    random.shuffle(train_data)

    # run train
    client.train(name, train_data)
コード例 #12
0
ファイル: test.py プロジェクト: TkrUdagawa/jubatus-example
#!/usr/bin/env python

import json, commands
from jubatus.classifier import client
from jubatus.classifier import types

while True:
    buf = raw_input("> ")
    if buf == "":
        break
    classifier = client.classifier("127.0.0.1", 9199)
    datum = types.datum([["text", buf.rstrip()]], [])
    result = classifier.classify("", [datum])
    if len(result[0]) == 0:
        print("nothing")
        continue
    result[0].sort(key=lambda x:x.score, reverse=True)
    for res in result[0]:
        print(res.label + " -> " + str(res.score))
コード例 #13
0
    classifier = client.classifier(options.server_ip,options.server_port)

    pname = options.name

    print classifier.get_config(pname)
    print classifier.get_status(pname)
    splitter = re.compile(options.column_delimiter)

    trained_count=0
    #是否进行训练
    if(options.train=="true"):
        for line in open(options.train_file):
            array=splitter.split(line)
            if(len(array)==2):
                label, dat=array
                datum = types.datum(  [[options.key, dat]], [] )
                classifier.train(pname,[(label,datum)])
                trained_count=trained_count+1

        print classifier.get_status(pname)

        print classifier.save(pname, options.name)

    print classifier.load(pname, options.name)

    print classifier.get_config(pname)
    total=0.0
    hit=0.0
    for line in open(options.test_file):
        array=splitter.split(line)
        if(len(array)==2):
コード例 #14
0
ファイル: test.py プロジェクト: beam2d/jubatus-example
#!/usr/bin/env python

import json, commands
from jubatus.classifier import client
from jubatus.classifier import types

while True:
    buf = raw_input("> ")
    if buf == "":
        break
    classifier = client.classifier("127.0.0.1", 9199)
    datum = types.datum([["text", buf.rstrip()]], [])
    result = classifier.classify("", [datum])
    if len(result[0]) == 0:
        print("nothing")
        continue
    result[0].sort(key=lambda x: x.score, reverse=True)
    for res in result[0]:
        print(res.label + " -> " + str(res.score))
コード例 #15
0
ファイル: gender.py プロジェクト: Epictetus/jubatus-example
#!/usr/bin/env python

host = '127.0.0.1'
port = 9199
name = 'test'

import jubatus
from jubatus.classifier.types import datum

client = jubatus.Classifier(host, port)

train_data = [
    ('male',   datum([('hair', 'short'), ('top', 'sweater'), ('bottom', 'jeans')], [('height', 1.70)])),
    ('female', datum([('hair', 'long'),  ('top', 'shirt'),   ('bottom', 'skirt')], [('height', 1.56)])),
    ('male',   datum([('hair', 'short'), ('top', 'jacket'),  ('bottom', 'chino')], [('height', 1.65)])),
    ('female', datum([('hair', 'short'), ('top', 'T shirt'), ('bottom', 'jeans')], [('height', 1.72)])),
    ('male',   datum([('hair', 'long'),  ('top', 'T shirt'), ('bottom', 'jeans')], [('height', 1.82)])),
    ('female', datum([('hair', 'long'),  ('top', 'jacket'),  ('bottom', 'skirt')], [('height', 1.43)])),
#    ('male',   datum([('hair', 'short'), ('top', 'jacket'),  ('bottom', 'jeans')], [('height', 1.76)])),
#    ('female', datum([('hair', 'long'),  ('top', 'sweater'), ('bottom', 'skirt')], [('height', 1.52)])),
    ]

client.train(name, train_data)

test_data = [
    datum([('hair', 'short'), ('top', 'T shirt'), ('bottom', 'jeans')], [('height', 1.81)]),
    datum([('hair', 'long'),  ('top', 'shirt'),   ('bottom', 'skirt')], [('height', 1.50)]),
]

results = client.classify(name, test_data)
コード例 #16
0
                 dest='server_port', type='int', default='9199')
    p.add_option('-n', '--name', action='store',
                 dest='name', type='string', default='tutorial')
    return p.parse_args()

if __name__ == '__main__':
	options,remainder=parse_args()
  	classifier=client.classifier(options.server_ip,options.server_port)
   	pname=options.name

   	print classifier.get_config(pname)
    	print classifier.get_status(pname)

       	for line in open('adult.data'):
    		age,workclass,fnlwgt,education,education_num,marital_status,occupation,relationship,race,sex,capital_gain,capital_loss,hours_per_week,native_country,income=line[:-1].split(',')
    		datum=types.datum([('workclass',workclass),('sex',sex),('occupation',occupation),('education',education),('marital_status',marital_status),('native_country',native_country),('race',race),('relationship',relationship)],[('age',float(age)),('hours_per_week',float(hours_per_week)),('education_num',float(education_num))])
    		classifier.train(pname,[(income,datum)])
    		pass

    	print classifier.get_status(pname)

	print classifier.save(pname, "tutorial")

	print classifier.load(pname, "tutorial")

	print classifier.get_config(pname)

	total_num=0.00
	ok_num=0.00
	start_time=time.clock()
	for line in open('adult.test'):
コード例 #17
0
#!/usr/bin/env python

host = '127.0.0.1'
port = 9199
name = 'test'

import jubatus
from jubatus.classifier.types import datum

client = jubatus.Classifier(host, port)

train_data = [
    ('male',
     datum([('hair', 'short'), ('top', 'sweater'), ('bottom', 'jeans')],
           [('height', 1.70)])),
    ('female',
     datum([('hair', 'long'), ('top', 'shirt'), ('bottom', 'skirt')],
           [('height', 1.56)])),
    ('male',
     datum([('hair', 'short'), ('top', 'jacket'), ('bottom', 'chino')],
           [('height', 1.65)])),
    ('female',
     datum([('hair', 'short'), ('top', 'T shirt'), ('bottom', 'jeans')],
           [('height', 1.72)])),
    ('male',
     datum([('hair', 'long'), ('top', 'T shirt'), ('bottom', 'jeans')],
           [('height', 1.82)])),
    ('female',
     datum([('hair', 'long'), ('top', 'jacket'), ('bottom', 'skirt')],
           [('height', 1.43)])),
    #    ('male',   datum([('hair', 'short'), ('top', 'jacket'),  ('bottom', 'jeans')], [('height', 1.76)])),
コード例 #18
0
ファイル: train.py プロジェクト: beam2d/jubatus-example
#!/usr/bin/env python

import sys, json, subprocess
import random
from jubatus.classifier import client
from jubatus.classifier import types

NAME = "a"
classifier = client.classifier("127.0.0.1", 9199)

file_list = subprocess.check_output(["ls | grep _train.txt"],
                                    shell=True).split('\n')[0:-1]

fds = map(lambda x: [x.replace("_train.txt", ""), open(x, "r")], file_list)
while fds != []:
    [label, fd] = random.choice(fds)
    text = fd.readline()
    if text == "":
        fds.remove([label, fd])
        print("finished train of label %s \n" % (label))
        continue
    text_strip = text.rstrip()
    datum = types.datum([["text", text_strip]], [])
    print("train %s : %s ..." % (label, text_strip))
    classifier.train(NAME, [(label, datum)])
コード例 #19
0
ファイル: shogun.py プロジェクト: Epictetus/jubatus-example
def train(client):
    # prepare training data
    # predict the last ones (that are commented out)
    train_data = [ 
        (u'徳川', datum([('name', u'家康')], [])),
        (u'徳川', datum([('name', u'秀忠')], [])),
        (u'徳川', datum([('name', u'家光')], [])),
        (u'徳川', datum([('name', u'家綱')], [])),
        (u'徳川', datum([('name', u'綱吉')], [])),
        (u'徳川', datum([('name', u'家宣')], [])),
        (u'徳川', datum([('name', u'家継')], [])),
        (u'徳川', datum([('name', u'吉宗')], [])),
        (u'徳川', datum([('name', u'家重')], [])),
        (u'徳川', datum([('name', u'家治')], [])),
        (u'徳川', datum([('name', u'家斉')], [])),
        (u'徳川', datum([('name', u'家慶')], [])),
        (u'徳川', datum([('name', u'家定')], [])),
        (u'徳川', datum([('name', u'家茂')], [])),
        # (u'徳川', datum([('name', u'慶喜')], [])),

        (u'足利', datum([('name', u'尊氏')], [])),
        (u'足利', datum([('name', u'義詮')], [])),
        (u'足利', datum([('name', u'義満')], [])),
        (u'足利', datum([('name', u'義持')], [])),
        (u'足利', datum([('name', u'義量')], [])),
        (u'足利', datum([('name', u'義教')], [])),
        (u'足利', datum([('name', u'義勝')], [])),
        (u'足利', datum([('name', u'義政')], [])),
        (u'足利', datum([('name', u'義尚')], [])),
        (u'足利', datum([('name', u'義稙')], [])),
        (u'足利', datum([('name', u'義澄')], [])),
        (u'足利', datum([('name', u'義稙')], [])),
        (u'足利', datum([('name', u'義晴')], [])),
        (u'足利', datum([('name', u'義輝')], [])),
        (u'足利', datum([('name', u'義栄')], [])),
        # (u'足利', datum([('name', u'義昭')], [])),

        (u'北条', datum([('name', u'時政')], [])),
        (u'北条', datum([('name', u'義時')], [])),
        (u'北条', datum([('name', u'泰時')], [])),
        (u'北条', datum([('name', u'経時')], [])),
        (u'北条', datum([('name', u'時頼')], [])),
        (u'北条', datum([('name', u'長時')], [])),
        (u'北条', datum([('name', u'政村')], [])),
        (u'北条', datum([('name', u'時宗')], [])),
        (u'北条', datum([('name', u'貞時')], [])),
        (u'北条', datum([('name', u'師時')], [])),
        (u'北条', datum([('name', u'宗宣')], [])),
        (u'北条', datum([('name', u'煕時')], [])),
        (u'北条', datum([('name', u'基時')], [])),
        (u'北条', datum([('name', u'高時')], [])),
        (u'北条', datum([('name', u'貞顕')], [])),
        # (u'北条', datum([('name', u'守時')], [])),
        ]

    # training data must be shuffled on online learning!
    random.shuffle(train_data)

    # run train
    client.train(name, train_data)
コード例 #20
0
def train(client):
    # prepare training data
    # predict the last ones (that are commented out)
    train_data = [ 
        (u'徳川'.encode('utf_8'), datum([('name', u'家康'.encode('utf_8'))], [])),
        (u'徳川'.encode('utf_8'), datum([('name', u'秀忠'.encode('utf_8'))], [])),
        (u'徳川'.encode('utf_8'), datum([('name', u'家光'.encode('utf_8'))], [])),
        (u'徳川'.encode('utf_8'), datum([('name', u'家綱'.encode('utf_8'))], [])),
        (u'徳川'.encode('utf_8'), datum([('name', u'綱吉'.encode('utf_8'))], [])),
        (u'徳川'.encode('utf_8'), datum([('name', u'家宣'.encode('utf_8'))], [])),
        (u'徳川'.encode('utf_8'), datum([('name', u'家継'.encode('utf_8'))], [])),
        (u'徳川'.encode('utf_8'), datum([('name', u'吉宗'.encode('utf_8'))], [])),
        (u'徳川'.encode('utf_8'), datum([('name', u'家重'.encode('utf_8'))], [])),
        (u'徳川'.encode('utf_8'), datum([('name', u'家治'.encode('utf_8'))], [])),
        (u'徳川'.encode('utf_8'), datum([('name', u'家斉'.encode('utf_8'))], [])),
        (u'徳川'.encode('utf_8'), datum([('name', u'家慶'.encode('utf_8'))], [])),
        (u'徳川'.encode('utf_8'), datum([('name', u'家定'.encode('utf_8'))], [])),
        (u'徳川'.encode('utf_8'), datum([('name', u'家茂'.encode('utf_8'))], [])),
        # (u'徳川', datum([('name', u'慶喜')], [])),

        (u'足利'.encode('utf_8'), datum([('name', u'尊氏'.encode('utf_8'))], [])),
        (u'足利'.encode('utf_8'), datum([('name', u'義詮'.encode('utf_8'))], [])),
        (u'足利'.encode('utf_8'), datum([('name', u'義満'.encode('utf_8'))], [])),
        (u'足利'.encode('utf_8'), datum([('name', u'義持'.encode('utf_8'))], [])),
        (u'足利'.encode('utf_8'), datum([('name', u'義量'.encode('utf_8'))], [])),
        (u'足利'.encode('utf_8'), datum([('name', u'義教'.encode('utf_8'))], [])),
        (u'足利'.encode('utf_8'), datum([('name', u'義勝'.encode('utf_8'))], [])),
        (u'足利'.encode('utf_8'), datum([('name', u'義政'.encode('utf_8'))], [])),
        (u'足利'.encode('utf_8'), datum([('name', u'義尚'.encode('utf_8'))], [])),
        (u'足利'.encode('utf_8'), datum([('name', u'義稙'.encode('utf_8'))], [])),
        (u'足利'.encode('utf_8'), datum([('name', u'義澄'.encode('utf_8'))], [])),
        (u'足利'.encode('utf_8'), datum([('name', u'義稙'.encode('utf_8'))], [])),
        (u'足利'.encode('utf_8'), datum([('name', u'義晴'.encode('utf_8'))], [])),
        (u'足利'.encode('utf_8'), datum([('name', u'義輝'.encode('utf_8'))], [])),
        (u'足利'.encode('utf_8'), datum([('name', u'義栄'.encode('utf_8'))], [])),
        # (u'足利', datum([('name', u'義昭')], [])),

        (u'北条'.encode('utf_8'), datum([('name', u'時政'.encode('utf_8'))], [])),
        (u'北条'.encode('utf_8'), datum([('name', u'義時'.encode('utf_8'))], [])),
        (u'北条'.encode('utf_8'), datum([('name', u'泰時'.encode('utf_8'))], [])),
        (u'北条'.encode('utf_8'), datum([('name', u'経時'.encode('utf_8'))], [])),
        (u'北条'.encode('utf_8'), datum([('name', u'時頼'.encode('utf_8'))], [])),
        (u'北条'.encode('utf_8'), datum([('name', u'長時'.encode('utf_8'))], [])),
        (u'北条'.encode('utf_8'), datum([('name', u'政村'.encode('utf_8'))], [])),
        (u'北条'.encode('utf_8'), datum([('name', u'時宗'.encode('utf_8'))], [])),
        (u'北条'.encode('utf_8'), datum([('name', u'貞時'.encode('utf_8'))], [])),
        (u'北条'.encode('utf_8'), datum([('name', u'師時'.encode('utf_8'))], [])),
        (u'北条'.encode('utf_8'), datum([('name', u'宗宣'.encode('utf_8'))], [])),
        (u'北条'.encode('utf_8'), datum([('name', u'煕時'.encode('utf_8'))], [])),
        (u'北条'.encode('utf_8'), datum([('name', u'基時'.encode('utf_8'))], [])),
        (u'北条'.encode('utf_8'), datum([('name', u'高時'.encode('utf_8'))], [])),
        (u'北条'.encode('utf_8'), datum([('name', u'貞顕'.encode('utf_8'))], [])),
        # (u'北条', datum([('name', u'守時')], [])),
        ]

    # training data must be shuffled on online learning!
    random.shuffle(train_data)

    # run train
    client.train(name, train_data)
コード例 #21
0
ファイル: train.py プロジェクト: TkrUdagawa/jubatus-example
#!/usr/bin/env python

import sys, json, commands, pprint
import random
from jubatus.classifier import client
from jubatus.classifier import types

NAME = "a"
classifier = client.classifier("127.0.0.1", 9199)

file_list=commands.getoutput("ls|grep _train.txt").split("\n")
pp = pprint.PrettyPrinter()

fds = map(lambda x: [x.replace("_train.txt", ""), open(x, "r")], file_list)
while fds != []:
    [label, fd] = random.choice(fds)
    text = fd.readline()
    if text == "":
        fds.remove([label, fd])
        print "finished train of label %s \n" % (label)
        continue
    text_strip = text.rstrip()
    datum = types.datum([["text", text_strip]], [])
    print "train %s : %s ..." %(label, text_strip)
    classifier.train(NAME, [(label, datum)])