def setUp(self): self.config = { "method": "AROW", "converter": { "string_filter_types": {}, "string_filter_rules": [], "num_filter_types": {}, "num_filter_rules": [], "string_types": {}, "string_rules": [{ "key": "*", "type": "str", "sample_weight": "bin", "global_weight": "bin" }], "num_types": {}, "num_rules": [{ "key": "*", "type": "num" }] }, "parameter": { "regularization_weight": 1.001 } } TestUtil.write_file('config_classifier.json', json.dumps(self.config)) self.srv = TestUtil.fork_process('classifier', port, 'config_classifier.json') try: self.cli = Classifier(host, port, "name") except: TestUtil.kill_process(self.srv) raise
def run(self): logging.debug('Start running with name: {0}, count: {1}'.format( self.name, self.count)) client = Classifier('127.0.0.1', 9199, 'test') for i in range(0, self.count): client.save(self.name + str(i)) logging.debug('Finished running')
def main(): args = parse_options() client = Classifier('127.0.0.1', args.port, 'test', 0) for i in range(0, 10000): client.do_mix() if not i % 100: status = client.get_status() for node in status.keys(): print '\t'.join([str(i), node, status[node]['RSS']])
def main(): args = parse_options() client = Classifier('127.0.0.1', args.port, 'test', 0) d = Datum() # Learn same data rand = random.randint(0, 1) d.add_number('key', 1.0 if rand else 2.0) print client.classify([d]) print client.get_labels()
def setUp(self): self.config = { "method": "AROW", "converter": { "string_filter_types": {}, "string_filter_rules": [], "num_filter_types": {}, "num_filter_rules": [], "string_types": {}, "string_rules": [{"key": "*", "type": "str", "sample_weight": "bin", "global_weight": "bin"}], "num_types": {}, "num_rules": [{"key": "*", "type": "num"}] }, "parameter": { "regularization_weight": 1.001 } } TestUtil.write_file('config_classifier.json', json.dumps(self.config)) self.srv = TestUtil.fork_process('classifier', port, 'config_classifier.json') try: self.cli = Classifier(host, port, "name") except: TestUtil.kill_process(self.srv) raise
def main(): args = parse_options() client = Classifier('127.0.0.1', args.port, 'test', 0) for i in range(0, 1000000): d = Datum() # Learn same data rand = random.randint(0, 1) d.add_number('key', 1.0 if rand else 2.0) ld = LabeledDatum('Pos' if rand else 'Neg', d) client.train([ld]) if not i % 10000: print 'train ' + str(i) + ' data'
def get_classify_data(usr): user = usr['display_name'] print user options, remainder = parse_args() classifier = Classifier(options.server_ip,options.server_port, options.name, 10.0) #train data_reader = select_Interrest_Blog(user) for row in data_reader: label = row['CATEGORY'] dat = row['TITLE'] datum = Datum({"message": dat}) classifier.train([LabeledDatum(label, datum)]) url_list = [] url_list = get_rss_data_from_catlist(usr,['social','fun','entertainment','game']) for data in url_list: title = data["title"] datum = Datum({"message": title}) classifier.train([LabeledDatum('no', datum)]) # print classifier.get_status() # print classifier.save("tutorial") # print classifier.load("tutorial") # print classifier.get_config() url_list = [] ret1 = [] ret2 = [] url_list = get_rss_data_from_catlist(usr,['it','popular','life','knowledge']) for data in url_list: title = data["title"] datum = Datum({"message": title}) ans = classifier.classify([datum]) if ans != None: estm = get_most_likely(ans[0]) if estm[0] == 'yes': ret1.append(data) else: ret2.append(data) print ret1 print "" print ret2 return ret1,ret2
x_vector = numpy.array(dat) if first_flag == 1: train_data = numpy.hstack((train_data, x_vector)) train_label = numpy.array(y_vector) first_flag = 0 else: train_data = numpy.vstack((train_data, x_vector)) train_label = numpy.array(y_vector) train_list = [train_data, train_label] return train_list if __name__ == '__main__': options, remainder = parse_args() classifier = Classifier(options.server_ip,options.server_port, options.name, 10.0) train_list = cross_validation_python() data_train, data_test, label_train, label_test = train_test_split(train_list[0], train_list[1]) for label, dat in izip(label_train, data_train): data_dict = json.loads(dat[0]) datum = Datum(data_dict) classifier.train([LabeledDatum(label, datum)]) count_ok = 0
#!/usr/bin/env python from jubatus.classifier.client import Classifier for idx in xrange(1, 50): client = Classifier('127.0.0.1', 9199, 'test') for i in xrange(1, 10001): client.do_mix() if not i % 1000: status = client.get_status() for node in status.keys(): print '\t'.join( [str((idx * 10000) + i), node, status[node]['RSS']])
#!/usr/bin/env python import random import time from jubatus.classifier.client import Classifier from jubatus.classifier.types import LabeledDatum from jubatus.common import Datum data = [] for i in xrange(0, 100000): d = Datum() for j in xrange(0, 20): d.add_number(str(j) + "-" + str(i), random.random() + 1.0) ld = LabeledDatum("Pos" if random.randint(0, 1) else "Neg", d) data.append(ld) client = Classifier("127.0.0.1", 9199, "test", 0) start_time = time.time() client.train(data) end_time = time.time() print str(len(data)) + " ... " + str((end_time - start_time) * 1000) + " msec"
#!/usr/bin/env python from jubatus.classifier.client import Classifier import time for idx in xrange(1, 50): client = Classifier("127.0.0.1", 9199, "test") for i in xrange(1, 10001): status = client.get_status() if not i % 1000: for node in status.keys(): print "\t".join([str((idx * 10000) + i), node, status[node]["RSS"]])
#!/usr/bin/env python import random import time from jubatus.classifier.client import Classifier from jubatus.classifier.types import LabeledDatum from jubatus.common import Datum data = [] for i in xrange(0, 100000): d = Datum() for j in xrange(0, 20): d.add_number(str(j) + '-' + str(i), random.random() + 1.0) ld = LabeledDatum('Pos' if random.randint(0, 1) else 'Neg', d) data.append(ld) client = Classifier('127.0.0.1', 9199, 'test', 0) start_time = time.time() client.train(data) end_time = time.time() print str(len(data)) + ' ... ' + str((end_time - start_time) * 1000) + ' msec'
class ClassifierTest(unittest.TestCase): def setUp(self): self.config = { "method": "AROW", "converter": { "string_filter_types": {}, "string_filter_rules": [], "num_filter_types": {}, "num_filter_rules": [], "string_types": {}, "string_rules": [{"key": "*", "type": "str", "sample_weight": "bin", "global_weight": "bin"}], "num_types": {}, "num_rules": [{"key": "*", "type": "num"}], }, "parameter": {"regularization_weight": 1.001}, } TestUtil.write_file("config_classifier.json", json.dumps(self.config)) self.srv = TestUtil.fork_process("classifier", port, "config_classifier.json") try: self.cli = Classifier(host, port, "name") except: TestUtil.kill_process(self.srv) raise def tearDown(self): if self.cli: self.cli.get_client().close() TestUtil.kill_process(self.srv) def test_get_client(self): self.assertTrue(isinstance(self.cli.get_client(), msgpackrpc.client.Client)) def test_get_config(self): config = self.cli.get_config() self.assertEqual(json.dumps(json.loads(config), sort_keys=True), json.dumps(self.config, sort_keys=True)) def test_train(self): d = Datum({"skey1": "val1", "skey2": "val2", "nkey1": 1.0, "nkey2": 2.0}) data = [["label", d]] self.assertEqual(self.cli.train(data), 1) def test_classify(self): d = Datum({"skey1": "val1", "skey2": "val2", "nkey1": 1.0, "nkey2": 2.0}) data = [d] result = self.cli.classify(data) def test_set_label(self): self.assertEqual(self.cli.set_label("label"), True) def test_get_labels(self): self.cli.set_label("label") self.assertEqual(self.cli.get_labels(), {"label": 0}) def test_delete_label(self): self.cli.set_label("label") self.assertEqual(self.cli.delete_label("label"), True) def test_save(self): self.assertEqual(len(self.cli.save("classifier.save_test.model")), 1) def test_load(self): model_name = "classifier.load_test.model" self.cli.save(model_name) self.assertEqual(self.cli.load(model_name), True) def test_get_status(self): self.cli.get_status() def test_str(self): self.assertEqual("estimate_result{label: label, score: 1.0}", str(EstimateResult("label", 1.0)))
client.train(train_data) result = client.classify([predict_data[0]]) predicted = max(result[0], key=lambda x: x.score).label if answer == predicted: print('correct', end="\t") else: print('wrong', end="\t") print(answer, predicted, result, sep="\t") if __name__ == '__main__': try: exclude = sys.argv[3] training = sys.argv[2] port = int(sys.argv[1]) except: sys.stderr.write( "Usage: jubatus.py port_number training.tsv exclude name\n") sys.exit(7) localhost = '127.0.0.1' if len(sys.argv) > 4: name = sys.argv[4] else: name = 'Coded by Kohji' client = Classifier(localhost, port, name) # connect to Jubatus train_and_predict(client, training)
#!/usr/bin/env python from jubatus.classifier.client import Classifier for idx in xrange(1, 50): client = Classifier('127.0.0.1', 9199, 'test') for i in xrange(1, 10001): client.do_mix() if not i % 1000: status = client.get_status() for node in status.keys(): print '\t'.join([str((idx * 10000) + i ), node, status[node]['RSS']])
import argparse import socket from jubatus.classifier.client import Classifier parser = argparse.ArgumentParser() parser.add_argument("-n", "--name", help="set the name of the file to be saved") parser.add_argument("--host", help="set the host address") parser.add_argument("--port", help="set the port number") args = parser.parse_args() print(args) host_ip = args.host if args.host else socket.gethostbyname(socket.gethostname()) port = args.port if args.port else 9199 client = Classifier(host_ip, port, '') if args.name: client.save(args.name) print("file saved at /tmp of the "+host_ip+" unless you specified output path with -d/--datadir when you started server process.") else: print("[Error] specify the model's name to be saved!")
#!/usr/bin/env python from jubatus.classifier.client import Classifier client = Classifier('127.0.0.1', 9199, 'test') for i in xrange(0, 10000): client.do_mix()
def main(): client = Classifier("127.0.0.1", port, "sleeping", timeout) client.classify([])
class ClassifierTest(unittest.TestCase): def setUp(self): self.config = { "method": "AROW", "converter": { "string_filter_types": {}, "string_filter_rules": [], "num_filter_types": {}, "num_filter_rules": [], "string_types": {}, "string_rules": [{ "key": "*", "type": "str", "sample_weight": "bin", "global_weight": "bin" }], "num_types": {}, "num_rules": [{ "key": "*", "type": "num" }] }, "parameter": { "regularization_weight": 1.001 } } TestUtil.write_file('config_classifier.json', json.dumps(self.config)) self.srv = TestUtil.fork_process('classifier', port, 'config_classifier.json') try: self.cli = Classifier(host, port, "name") except: TestUtil.kill_process(self.srv) raise def tearDown(self): if self.cli: self.cli.get_client().close() TestUtil.kill_process(self.srv) def test_get_client(self): self.assertTrue( isinstance(self.cli.get_client(), msgpackrpc.client.Client)) def test_get_config(self): config = self.cli.get_config() self.assertEqual(json.dumps(json.loads(config), sort_keys=True), json.dumps(self.config, sort_keys=True)) def test_train(self): d = Datum({ "skey1": "val1", "skey2": "val2", "nkey1": 1.0, "nkey2": 2.0 }) data = [["label", d]] self.assertEqual(self.cli.train(data), 1) def test_classify(self): d = Datum({ "skey1": "val1", "skey2": "val2", "nkey1": 1.0, "nkey2": 2.0 }) data = [d] result = self.cli.classify(data) def test_set_label(self): self.assertEqual(self.cli.set_label("label"), True) def test_get_labels(self): self.cli.set_label("label") self.assertEqual(self.cli.get_labels(), {"label": 0}) def test_delete_label(self): self.cli.set_label("label") self.assertEqual(self.cli.delete_label("label"), True) def test_save(self): self.assertEqual(len(self.cli.save("classifier.save_test.model")), 1) def test_load(self): model_name = "classifier.load_test.model" self.cli.save(model_name) self.assertEqual(self.cli.load(model_name), True) def test_get_status(self): self.cli.get_status() def test_str(self): self.assertEqual("estimate_result{label: label, score: 1.0}", str(EstimateResult("label", 1.0)))
result = {} result[0] = '' result[1] = 0 for res in estm: if prob == None or res.score > prob: ans = res.label prob = res.score result[0] = ans result[1] = prob return result if __name__ == '__main__': options, remainder = parse_args() classifier = Classifier(options.server_ip, options.server_port, options.name, 10.0) print classifier.get_config() print classifier.get_status() for line in open('train.dat'): label, file = line[:-1].split(',') dat = open(file).read() datum = Datum({"message": dat}) classifier.train([LabeledDatum(label, datum)]) print classifier.get_status() print classifier.save("tutorial") print classifier.load("tutorial")
result[0] = '' result[1] = 0 for res in estm: if prob == None or res.score > prob : ans = res.label prob = res.score result[0] = ans result[1] = prob return result if __name__ == '__main__': options, remainder = parse_args() classifier = Classifier(options.server_ip,options.server_port, options.name, 10.0) print classifier.get_config() print classifier.get_status() for line in open('train.dat'): label, file = line[:-1].split(',') dat = open(file).read() datum = Datum({"message": dat}) classifier.train([LabeledDatum(label, datum)]) print classifier.get_status() print classifier.save("tutorial")
#!/usr/bin/env python from jubatus.classifier.client import Classifier client = Classifier('127.0.0.1', 9000, 'test', 0) client.do_mix()
def run(self): logging.debug('Start running with name: {0}, count: {1}'.format(self.name, self.count)) client = Classifier('127.0.0.1', 9199, 'test') for i in range(0, self.count): client.save(self.name + str(i)) logging.debug('Finished running')
import argparse import socket from jubatus.classifier.client import Classifier parser = argparse.ArgumentParser() parser.add_argument("-n", "--name", help="set the name of the file to load") parser.add_argument("--host", help="set the host address") parser.add_argument("--port", help="set the port number") args = parser.parse_args() print(args) host_ip = args.host if args.host else socket.gethostbyname( socket.gethostname()) port = args.port if args.port else 9199 client = Classifier(host_ip, port, "") if args.name: print(args.name) client.load(args.name) print("model " + args.name + " has been loaded") else: print("[Error] specify the model's name to be loaded!")