def test_str(self): d = Datum() d.add_string('name', 'john') d.add_number('age', 20) d.add_binary('image', b('0101')) s = str(d) self.assertTrue('datum{string_values: [[\'name\', \'john\']], num_values: [[\'age\', 20.0]], binary_values: [[\'image\', \'0101\']]}' == s or 'datum{string_values: [[\'name\', \'john\']], num_values: [[\'age\', 20.0]], binary_values: [[\'image\', b\'0101\']]}' == s)
def test_str(self): d = Datum() d.add_string('name', 'john') d.add_number('age', 20) d.add_binary('image', '0101') self.assertEquals('datum{string_values: [[\'name\', \'john\']], num_values: [[\'age\', 20.0]], binary_values: [[\'image\', \'0101\']]}', str(d))
def make_datum(data, headers): ''' ヘッダのリストとデータの行を1 行受け取り、 datum を作成する関数 ''' d = Datum() for k in headers: d.add_number(k, data[k]) return d
def test_str(self): d = Datum() d.add_string('name', 'john') d.add_number('age', 20) d.add_binary('image', b('0101')) s = str(d) self.assertTrue( 'datum{string_values: [[\'name\', \'john\']], num_values: [[\'age\', 20.0]], binary_values: [[\'image\', \'0101\']]}' == s or 'datum{string_values: [[\'name\', \'john\']], num_values: [[\'age\', 20.0]], binary_values: [[\'image\', b\'0101\']]}' == s)
def main(): args = parse_options() client = Classifier('127.0.0.1', args.port, 'test', 0) d = Datum() # Learn same data rand = random.randint(0, 1) d.add_number('key', 1.0 if rand else 2.0) print client.classify([d]) print client.get_labels()
def convert(self, args): if len(args) % 2 != 0: raise ValueError('value for the last datum key ({0}) is missing'.format(args[len(args) - 1])) d = Datum() for i in range(int(len(args) / 2)): feat_key = args[i*2] feat_val = args[i*2+1] try: d.add_number(feat_key, float(feat_val)) except ValueError: d.add_string(feat_key, feat_val) return (len(args), d)
def main(): cl = Regression("127.0.0.1", 9199, "test") d = Datum() for i in xrange(10): d.add_number('x', 1) d.add_number('y', 4) cl.train([[10.0, d]]) d = Datum() d.add_number('x', 1) d.add_number('y', 4) result = cl.estimate([d]) print("{0:30.30f}".format(result[0]))
def convert(self, args): if len(args) % 2 != 0: raise ValueError( 'value for the last datum key ({0}) is missing'.format( args[len(args) - 1])) d = Datum() for i in range(int(len(args) / 2)): feat_key = args[i * 2] feat_val = args[i * 2 + 1] try: d.add_number(feat_key, float(feat_val)) except ValueError: d.add_string(feat_key, feat_val) return (len(args), d)
def main(): args = parse_options() client = Classifier('127.0.0.1', args.port, 'test', 0) for i in range(0, 1000000): d = Datum() # Learn same data rand = random.randint(0, 1) d.add_number('key', 1.0 if rand else 2.0) ld = LabeledDatum('Pos' if rand else 'Neg', d) client.train([ld]) if not i % 10000: print 'train ' + str(i) + ' data'
def main(): # cl = jubatus.NearestNeighbor("localhost", 9199, "nn") cl = client.Recommender("localhost", 9199, "test") cl.clear() d = Datum() for i in xrange(10): d.add_number('x', 1) d.add_number('y', 4) cl.update_row(str(i), d) # scores = cl.similar_row_from_id(str(0), 10) # for score in scores: # print("{0} {1:30.30f}".format(score.id, score.score)) d = Datum() d.add_number('x', 1) d.add_number('y', 4) predicts = cl.similar_row_from_datum(d, 5) print(predicts) for predict in predicts: print("{0} {1:30.30f}".format(predict.id, predict.score))
def main(): # cl = jubatus.NearestNeighbor("localhost", 9199, "nn") cl = Classifier("localhost", 9199, "test") cl.clear() d = Datum() for i in xrange(10): d.add_number('x', 1) d.add_number('y', 4) cl.train([LabeledDatum("label1", d)]) # cl.set_row(str(i), d) # scores = cl.similar_row_from_id(str(0), 10) # for score in scores: # print("{0} {1:30.30f}".format(score.id, score.score)) d = Datum() d.add_number('x', 1) d.add_number('y', 4) predict = cl.classify([d]) print(predict) for score in predict: print("{0} {1:30.30f}".format(score[0].label, score[0].score))
def _juba_proc(self, clock, datadict, method="add"): #if DEBUG: # print datadict datum = Datum() for k in datadict.keys(): #print "key:%s value:%s" % (str(k), str(datadict[k])) if k == "hostid": datum.add_number(str(k), int(datadict[k])*1.0/ZBX_ITEMID_DIGITS) elif k == "weekday" or k == "hour": datum.add_number(str(k), datadict[k]) elif k != "hostid" and k != "weekday" and k != "hour": datum.add_number(str(k), self.norm(k, datadict[k])) #print datum retry_cnt = JUBA_RETRY_MAX while True: try: if method=="add": print datum ret = self.anom.add(datum) exit() if method=="calc": print datum score = self.anom.calc_score(datum) if score == float('Inf') or score > ML_LIMIT: #print datadict if self.alarm_on == False: self.alarm_on = True cf.log("[%s] score=%f" % (cf.clock2strjst(clock), score)) else: if self.alarm_on == True: self.alarm_on = False cf.log("[%s] score recovered to normal:score=%f" % (cf.clock2strjst(clock), score)) break except (msgpackrpc.error.TransportError, msgpackrpc.error.TimeoutError) as e: retry_count -= 1 if retry_count <= 0: raise self.anom.get_client().close() self.set_anom() print e time.sleep(JUBA_RETRY_INTERVAL) continue
def test_add_int(self): d = Datum() d.add_number('key', 1) self.assertEqual(Datum({'key': 1.0}).to_msgpack(), d.to_msgpack())
def make_datum(): d = Datum() d.add_string('string-key', 'str') d.add_number('number-key', 1.0) d.add_binary('binary-key', b'bin') return d
#!/usr/bin/env python import random import time from jubatus.classifier.client import Classifier from jubatus.classifier.types import LabeledDatum from jubatus.common import Datum data = [] for i in xrange(0, 100000): d = Datum() for j in xrange(0, 20): d.add_number(str(j) + "-" + str(i), random.random() + 1.0) ld = LabeledDatum("Pos" if random.randint(0, 1) else "Neg", d) data.append(ld) client = Classifier("127.0.0.1", 9199, "test", 0) start_time = time.time() client.train(data) end_time = time.time() print str(len(data)) + " ... " + str((end_time - start_time) * 1000) + " msec"
def test_add_int(self): d = Datum() d.add_number('key', 1) self.assertEquals(Datum({'key': 1.0}).to_msgpack(), d.to_msgpack())
# 2.学習用データの準備 mongo_dic = convertMongo() dic = mongo_dic.getDic() name = '' value = 0 for line in dic: name = dic[line]['name'] value = dic[line]['value'] datum = Datum() # for (k, v) in [ # ['name', name], # ]: # datum.add_string(k, v) for (k, v) in [ ['value', value], ]: datum.add_number(k, v) # 3.データの学習(学習モデルの更新) ret = anom.add(datum) # 4.結果の出力 if (ret.score != float('Inf')) and (ret.score != 1.0): col.insert({'result':'anomaly', 'value':value}) print ret, value elif (ret.score != float('Inf')) and (ret.score == 1.0): col.insert({'result':'nomaly', 'value':value}) print ret, value
#!/usr/bin/env python import random import time from jubatus.classifier.client import Classifier from jubatus.classifier.types import LabeledDatum from jubatus.common import Datum data = [] for i in xrange(0, 100000): d = Datum() for j in xrange(0, 20): d.add_number(str(j) + '-' + str(i), random.random() + 1.0) ld = LabeledDatum('Pos' if random.randint(0, 1) else 'Neg', d) data.append(ld) client = Classifier('127.0.0.1', 9199, 'test', 0) start_time = time.time() client.train(data) end_time = time.time() print str(len(data)) + ' ... ' + str((end_time - start_time) * 1000) + ' msec'
import jubatus from jubatus.common import Datum import random cl = jubatus.Recommender('127.0.0.1', 9199, 'test', 0) random.seed(1) datum_length = 100 for i in range(3): d = Datum() for x in range(datum_length): d.add_number("{}".format(x), random.random()) cl.update_row(str(i), d) print('ids:{}'.format(','.join(cl.get_all_rows()))) # 1, 2, 3 d = Datum() for x in range(datum_length): d.add_number("{}".format(x), random.random()) cl.update_row('3', d) print('ids:{}'.format(','.join(cl.get_all_rows()))) # unlearn 1 id cl.save('test') cl.clear() cl.load('test') print('ids:{}'.format(','.join(cl.get_all_rows()))) # should be same as before `save` d = Datum()
from jubatus.common import Datum import jubatus client = jubatus.Weight("127.0.0.1", 9199, "") d = Datum() d.add_number("user/age", 25) d.add_number("user/income", 1000) d.add_string("user/name", "Loren") d.add_string("message", "Hello") res = client.calc_weight(d) print(res)
float(dst_host_srv_count)], ["dst_host_same_srv_rate", float(dst_host_same_srv_rate)], [ "dst_host_same_src_port_rate", float(dst_host_same_src_port_rate) ], ["dst_host_diff_srv_rate", float(dst_host_diff_srv_rate)], [ "dst_host_srv_diff_host_rate", float(dst_host_srv_diff_host_rate) ], ["dst_host_serror_rate", float(dst_host_serror_rate)], ["dst_host_srv_serror_rate", float(dst_host_srv_serror_rate)], ["dst_host_rerror_rate", float(dst_host_rerror_rate)], ["dst_host_srv_rerror_rate", float(dst_host_srv_rerror_rate)], ]: datum.add_number(k, v) # 3. train data and update jubatus model ret = anom.add(datum) # 4. output results if (ret.score != float('Inf')) and (ret.score != 1.0): print(ret, label)