def predict(client): # predict the last shogun data = [ datum([('name', u'慶喜')], []), datum([('name', u'義昭')], []), datum([('name', u'守時')], []), ] for d in data: res = client.classify(name, [d]) # get the predicted shogun name print(max(res[0], key=lambda x: x.score).label, d.string_values[0][1])
def predict(client): # predict the last shogun data = [ datum([('name', u'慶喜')], []), datum([('name', u'義昭')], []), datum([('name', u'守時')], []), ] for d in data: res = client.classify(name, [d]) # get the predicted shogun name print max(res[0], key = lambda x: x.score).label, d.string_values[0][1]
def on_status(self, status): if not hasattr(status, 'text'): return if not hasattr(status, 'coordinates'): return if not status.coordinates or not 'coordinates' in status.coordinates: return loc = None for l in self.locations: coordinates = status.coordinates['coordinates'] if l.is_inside(coordinates[0], coordinates[1]): loc = l break if not loc: # Unknown location return hashtags = status.entities['hashtags'] detagged_text = remove_hashtags_from_tweet(status.text, hashtags) # Create datum for Jubatus d = types.datum([], []) d.string_values = [('text', detagged_text)] # Send training data to Jubatus self.classifier.train(instance_name, [(loc.name, d)]) # Print trained tweet print_green(loc.name, ' ') print detagged_text
def characters(self, content): if not self.read: return d = types.datum([], []) d.string_values = [ ['text', content], ] self.classifier.train(instance_name, [[self.label, d]]) self.count += 1 if (self.count % 1000 == 0): print "Training(%s): %d ..." % (self.label, self.count)
def on_status(self, status): if not hasattr(status, 'text'): return d = types.datum([], []); d.string_values = [ ['text', status.text], ] result = self.classifier.classify(instance_name, [d]) if len(result) > 0 and len(result[0]) > 0: # sort the result in order of score est = sorted(result[0], key=lambda est: est.score, reverse=True) print_green(est[0].label, end=" ") if est[0].label == self.highlight: print_red(status.text) else: print(status.text)
def on_status(self, status): if not hasattr(status, 'text'): return d = types.datum([], []) d.string_values = [ ['text', status.text], ] result = self.classifier.classify(instance_name, [d]) if len(result) > 0 and len(result[0]) > 0: # sort the result in order of score est = sorted(result[0], key=lambda est: est.score, reverse=True) print_green(est[0].label, end=" ") if est[0].label == self.highlight: print_red(status.text) else: print(status.text)
def estimate_location_for(text): classifier = client.classifier(host, port) # Create datum for Jubatus d = types.datum([], []) d.string_values = [('text', text)] # Send estimation query to Jubatus result = classifier.classify(instance_name, [d]) if len(result[0]) > 0: # Sort results by score est = sorted(result[0], key=lambda e: e.score, reverse=True) # Print the result print "Estimated Location for %s:" % text for e in est: print " " + e.label + " (" + str(e.score) + ")" else: # No estimation results; maybe we haven't trained enough print "No estimation results available." print "Train more tweets or try using another text."
def train(client): # prepare training data # predict the last ones (that are commented out) train_data = [ (u'徳川', datum([('name', u'家康')], [])), (u'徳川', datum([('name', u'秀忠')], [])), (u'徳川', datum([('name', u'家光')], [])), (u'徳川', datum([('name', u'家綱')], [])), (u'徳川', datum([('name', u'綱吉')], [])), (u'徳川', datum([('name', u'家宣')], [])), (u'徳川', datum([('name', u'家継')], [])), (u'徳川', datum([('name', u'吉宗')], [])), (u'徳川', datum([('name', u'家重')], [])), (u'徳川', datum([('name', u'家治')], [])), (u'徳川', datum([('name', u'家斉')], [])), (u'徳川', datum([('name', u'家慶')], [])), (u'徳川', datum([('name', u'家定')], [])), (u'徳川', datum([('name', u'家茂')], [])), # (u'徳川', datum([('name', u'慶喜')], [])), (u'足利', datum([('name', u'尊氏')], [])), (u'足利', datum([('name', u'義詮')], [])), (u'足利', datum([('name', u'義満')], [])), (u'足利', datum([('name', u'義持')], [])), (u'足利', datum([('name', u'義量')], [])), (u'足利', datum([('name', u'義教')], [])), (u'足利', datum([('name', u'義勝')], [])), (u'足利', datum([('name', u'義政')], [])), (u'足利', datum([('name', u'義尚')], [])), (u'足利', datum([('name', u'義稙')], [])), (u'足利', datum([('name', u'義澄')], [])), (u'足利', datum([('name', u'義稙')], [])), (u'足利', datum([('name', u'義晴')], [])), (u'足利', datum([('name', u'義輝')], [])), (u'足利', datum([('name', u'義栄')], [])), # (u'足利', datum([('name', u'義昭')], [])), (u'北条', datum([('name', u'時政')], [])), (u'北条', datum([('name', u'義時')], [])), (u'北条', datum([('name', u'泰時')], [])), (u'北条', datum([('name', u'経時')], [])), (u'北条', datum([('name', u'時頼')], [])), (u'北条', datum([('name', u'長時')], [])), (u'北条', datum([('name', u'政村')], [])), (u'北条', datum([('name', u'時宗')], [])), (u'北条', datum([('name', u'貞時')], [])), (u'北条', datum([('name', u'師時')], [])), (u'北条', datum([('name', u'宗宣')], [])), (u'北条', datum([('name', u'煕時')], [])), (u'北条', datum([('name', u'基時')], [])), (u'北条', datum([('name', u'高時')], [])), (u'北条', datum([('name', u'貞顕')], [])), # (u'北条', datum([('name', u'守時')], [])), ] # training data must be shuffled on online learning! random.shuffle(train_data) # run train client.train(name, train_data)
#!/usr/bin/env python import json, commands from jubatus.classifier import client from jubatus.classifier import types while True: buf = raw_input("> ") if buf == "": break classifier = client.classifier("127.0.0.1", 9199) datum = types.datum([["text", buf.rstrip()]], []) result = classifier.classify("", [datum]) if len(result[0]) == 0: print("nothing") continue result[0].sort(key=lambda x:x.score, reverse=True) for res in result[0]: print(res.label + " -> " + str(res.score))
classifier = client.classifier(options.server_ip,options.server_port) pname = options.name print classifier.get_config(pname) print classifier.get_status(pname) splitter = re.compile(options.column_delimiter) trained_count=0 #是否进行训练 if(options.train=="true"): for line in open(options.train_file): array=splitter.split(line) if(len(array)==2): label, dat=array datum = types.datum( [[options.key, dat]], [] ) classifier.train(pname,[(label,datum)]) trained_count=trained_count+1 print classifier.get_status(pname) print classifier.save(pname, options.name) print classifier.load(pname, options.name) print classifier.get_config(pname) total=0.0 hit=0.0 for line in open(options.test_file): array=splitter.split(line) if(len(array)==2):
#!/usr/bin/env python import json, commands from jubatus.classifier import client from jubatus.classifier import types while True: buf = raw_input("> ") if buf == "": break classifier = client.classifier("127.0.0.1", 9199) datum = types.datum([["text", buf.rstrip()]], []) result = classifier.classify("", [datum]) if len(result[0]) == 0: print("nothing") continue result[0].sort(key=lambda x: x.score, reverse=True) for res in result[0]: print(res.label + " -> " + str(res.score))
#!/usr/bin/env python host = '127.0.0.1' port = 9199 name = 'test' import jubatus from jubatus.classifier.types import datum client = jubatus.Classifier(host, port) train_data = [ ('male', datum([('hair', 'short'), ('top', 'sweater'), ('bottom', 'jeans')], [('height', 1.70)])), ('female', datum([('hair', 'long'), ('top', 'shirt'), ('bottom', 'skirt')], [('height', 1.56)])), ('male', datum([('hair', 'short'), ('top', 'jacket'), ('bottom', 'chino')], [('height', 1.65)])), ('female', datum([('hair', 'short'), ('top', 'T shirt'), ('bottom', 'jeans')], [('height', 1.72)])), ('male', datum([('hair', 'long'), ('top', 'T shirt'), ('bottom', 'jeans')], [('height', 1.82)])), ('female', datum([('hair', 'long'), ('top', 'jacket'), ('bottom', 'skirt')], [('height', 1.43)])), # ('male', datum([('hair', 'short'), ('top', 'jacket'), ('bottom', 'jeans')], [('height', 1.76)])), # ('female', datum([('hair', 'long'), ('top', 'sweater'), ('bottom', 'skirt')], [('height', 1.52)])), ] client.train(name, train_data) test_data = [ datum([('hair', 'short'), ('top', 'T shirt'), ('bottom', 'jeans')], [('height', 1.81)]), datum([('hair', 'long'), ('top', 'shirt'), ('bottom', 'skirt')], [('height', 1.50)]), ] results = client.classify(name, test_data)
dest='server_port', type='int', default='9199') p.add_option('-n', '--name', action='store', dest='name', type='string', default='tutorial') return p.parse_args() if __name__ == '__main__': options,remainder=parse_args() classifier=client.classifier(options.server_ip,options.server_port) pname=options.name print classifier.get_config(pname) print classifier.get_status(pname) for line in open('adult.data'): age,workclass,fnlwgt,education,education_num,marital_status,occupation,relationship,race,sex,capital_gain,capital_loss,hours_per_week,native_country,income=line[:-1].split(',') datum=types.datum([('workclass',workclass),('sex',sex),('occupation',occupation),('education',education),('marital_status',marital_status),('native_country',native_country),('race',race),('relationship',relationship)],[('age',float(age)),('hours_per_week',float(hours_per_week)),('education_num',float(education_num))]) classifier.train(pname,[(income,datum)]) pass print classifier.get_status(pname) print classifier.save(pname, "tutorial") print classifier.load(pname, "tutorial") print classifier.get_config(pname) total_num=0.00 ok_num=0.00 start_time=time.clock() for line in open('adult.test'):
#!/usr/bin/env python host = '127.0.0.1' port = 9199 name = 'test' import jubatus from jubatus.classifier.types import datum client = jubatus.Classifier(host, port) train_data = [ ('male', datum([('hair', 'short'), ('top', 'sweater'), ('bottom', 'jeans')], [('height', 1.70)])), ('female', datum([('hair', 'long'), ('top', 'shirt'), ('bottom', 'skirt')], [('height', 1.56)])), ('male', datum([('hair', 'short'), ('top', 'jacket'), ('bottom', 'chino')], [('height', 1.65)])), ('female', datum([('hair', 'short'), ('top', 'T shirt'), ('bottom', 'jeans')], [('height', 1.72)])), ('male', datum([('hair', 'long'), ('top', 'T shirt'), ('bottom', 'jeans')], [('height', 1.82)])), ('female', datum([('hair', 'long'), ('top', 'jacket'), ('bottom', 'skirt')], [('height', 1.43)])), # ('male', datum([('hair', 'short'), ('top', 'jacket'), ('bottom', 'jeans')], [('height', 1.76)])),
#!/usr/bin/env python import sys, json, subprocess import random from jubatus.classifier import client from jubatus.classifier import types NAME = "a" classifier = client.classifier("127.0.0.1", 9199) file_list = subprocess.check_output(["ls | grep _train.txt"], shell=True).split('\n')[0:-1] fds = map(lambda x: [x.replace("_train.txt", ""), open(x, "r")], file_list) while fds != []: [label, fd] = random.choice(fds) text = fd.readline() if text == "": fds.remove([label, fd]) print("finished train of label %s \n" % (label)) continue text_strip = text.rstrip() datum = types.datum([["text", text_strip]], []) print("train %s : %s ..." % (label, text_strip)) classifier.train(NAME, [(label, datum)])
def train(client): # prepare training data # predict the last ones (that are commented out) train_data = [ (u'徳川'.encode('utf_8'), datum([('name', u'家康'.encode('utf_8'))], [])), (u'徳川'.encode('utf_8'), datum([('name', u'秀忠'.encode('utf_8'))], [])), (u'徳川'.encode('utf_8'), datum([('name', u'家光'.encode('utf_8'))], [])), (u'徳川'.encode('utf_8'), datum([('name', u'家綱'.encode('utf_8'))], [])), (u'徳川'.encode('utf_8'), datum([('name', u'綱吉'.encode('utf_8'))], [])), (u'徳川'.encode('utf_8'), datum([('name', u'家宣'.encode('utf_8'))], [])), (u'徳川'.encode('utf_8'), datum([('name', u'家継'.encode('utf_8'))], [])), (u'徳川'.encode('utf_8'), datum([('name', u'吉宗'.encode('utf_8'))], [])), (u'徳川'.encode('utf_8'), datum([('name', u'家重'.encode('utf_8'))], [])), (u'徳川'.encode('utf_8'), datum([('name', u'家治'.encode('utf_8'))], [])), (u'徳川'.encode('utf_8'), datum([('name', u'家斉'.encode('utf_8'))], [])), (u'徳川'.encode('utf_8'), datum([('name', u'家慶'.encode('utf_8'))], [])), (u'徳川'.encode('utf_8'), datum([('name', u'家定'.encode('utf_8'))], [])), (u'徳川'.encode('utf_8'), datum([('name', u'家茂'.encode('utf_8'))], [])), # (u'徳川', datum([('name', u'慶喜')], [])), (u'足利'.encode('utf_8'), datum([('name', u'尊氏'.encode('utf_8'))], [])), (u'足利'.encode('utf_8'), datum([('name', u'義詮'.encode('utf_8'))], [])), (u'足利'.encode('utf_8'), datum([('name', u'義満'.encode('utf_8'))], [])), (u'足利'.encode('utf_8'), datum([('name', u'義持'.encode('utf_8'))], [])), (u'足利'.encode('utf_8'), datum([('name', u'義量'.encode('utf_8'))], [])), (u'足利'.encode('utf_8'), datum([('name', u'義教'.encode('utf_8'))], [])), (u'足利'.encode('utf_8'), datum([('name', u'義勝'.encode('utf_8'))], [])), (u'足利'.encode('utf_8'), datum([('name', u'義政'.encode('utf_8'))], [])), (u'足利'.encode('utf_8'), datum([('name', u'義尚'.encode('utf_8'))], [])), (u'足利'.encode('utf_8'), datum([('name', u'義稙'.encode('utf_8'))], [])), (u'足利'.encode('utf_8'), datum([('name', u'義澄'.encode('utf_8'))], [])), (u'足利'.encode('utf_8'), datum([('name', u'義稙'.encode('utf_8'))], [])), (u'足利'.encode('utf_8'), datum([('name', u'義晴'.encode('utf_8'))], [])), (u'足利'.encode('utf_8'), datum([('name', u'義輝'.encode('utf_8'))], [])), (u'足利'.encode('utf_8'), datum([('name', u'義栄'.encode('utf_8'))], [])), # (u'足利', datum([('name', u'義昭')], [])), (u'北条'.encode('utf_8'), datum([('name', u'時政'.encode('utf_8'))], [])), (u'北条'.encode('utf_8'), datum([('name', u'義時'.encode('utf_8'))], [])), (u'北条'.encode('utf_8'), datum([('name', u'泰時'.encode('utf_8'))], [])), (u'北条'.encode('utf_8'), datum([('name', u'経時'.encode('utf_8'))], [])), (u'北条'.encode('utf_8'), datum([('name', u'時頼'.encode('utf_8'))], [])), (u'北条'.encode('utf_8'), datum([('name', u'長時'.encode('utf_8'))], [])), (u'北条'.encode('utf_8'), datum([('name', u'政村'.encode('utf_8'))], [])), (u'北条'.encode('utf_8'), datum([('name', u'時宗'.encode('utf_8'))], [])), (u'北条'.encode('utf_8'), datum([('name', u'貞時'.encode('utf_8'))], [])), (u'北条'.encode('utf_8'), datum([('name', u'師時'.encode('utf_8'))], [])), (u'北条'.encode('utf_8'), datum([('name', u'宗宣'.encode('utf_8'))], [])), (u'北条'.encode('utf_8'), datum([('name', u'煕時'.encode('utf_8'))], [])), (u'北条'.encode('utf_8'), datum([('name', u'基時'.encode('utf_8'))], [])), (u'北条'.encode('utf_8'), datum([('name', u'高時'.encode('utf_8'))], [])), (u'北条'.encode('utf_8'), datum([('name', u'貞顕'.encode('utf_8'))], [])), # (u'北条', datum([('name', u'守時')], [])), ] # training data must be shuffled on online learning! random.shuffle(train_data) # run train client.train(name, train_data)
#!/usr/bin/env python import sys, json, commands, pprint import random from jubatus.classifier import client from jubatus.classifier import types NAME = "a" classifier = client.classifier("127.0.0.1", 9199) file_list=commands.getoutput("ls|grep _train.txt").split("\n") pp = pprint.PrettyPrinter() fds = map(lambda x: [x.replace("_train.txt", ""), open(x, "r")], file_list) while fds != []: [label, fd] = random.choice(fds) text = fd.readline() if text == "": fds.remove([label, fd]) print "finished train of label %s \n" % (label) continue text_strip = text.rstrip() datum = types.datum([["text", text_strip]], []) print "train %s : %s ..." %(label, text_strip) classifier.train(NAME, [(label, datum)])