def test_str(self):
     d = Datum()
     d.add_string('name', 'john')
     d.add_number('age', 20)
     d.add_binary('image', b('0101'))
     s = str(d)
     self.assertTrue('datum{string_values: [[\'name\', \'john\']], num_values: [[\'age\', 20.0]], binary_values: [[\'image\', \'0101\']]}' == s or 'datum{string_values: [[\'name\', \'john\']], num_values: [[\'age\', 20.0]], binary_values: [[\'image\', b\'0101\']]}' == s)
 def test_str(self):
     d = Datum()
     d.add_string('name', 'john')
     d.add_number('age', 20)
     d.add_binary('image', '0101')
     self.assertEquals('datum{string_values: [[\'name\', \'john\']], num_values: [[\'age\', 20.0]], binary_values: [[\'image\', \'0101\']]}',
                       str(d))
def make_datum(data, headers):
    '''
    ヘッダのリストとデータの行を1 行受け取り、
    datum を作成する関数
    '''
    d = Datum()
    for k in headers:
        d.add_number(k, data[k])
    return d
Beispiel #4
0
 def test_str(self):
     d = Datum()
     d.add_string('name', 'john')
     d.add_number('age', 20)
     d.add_binary('image', b('0101'))
     s = str(d)
     self.assertTrue(
         'datum{string_values: [[\'name\', \'john\']], num_values: [[\'age\', 20.0]], binary_values: [[\'image\', \'0101\']]}'
         == s or
         'datum{string_values: [[\'name\', \'john\']], num_values: [[\'age\', 20.0]], binary_values: [[\'image\', b\'0101\']]}'
         == s)
Beispiel #5
0
def main():
  args = parse_options()

  client = Classifier('127.0.0.1', args.port, 'test', 0)

  d = Datum()

  # Learn same data
  rand = random.randint(0, 1)
  d.add_number('key', 1.0 if rand else 2.0)

  print client.classify([d])
  print client.get_labels()
Beispiel #6
0
def main():
    args = parse_options()

    client = Classifier('127.0.0.1', args.port, 'test', 0)

    d = Datum()

    # Learn same data
    rand = random.randint(0, 1)
    d.add_number('key', 1.0 if rand else 2.0)

    print client.classify([d])
    print client.get_labels()
Beispiel #7
0
  def convert(self, args):
    if len(args) % 2 != 0:
      raise ValueError('value for the last datum key ({0}) is missing'.format(args[len(args) - 1]))

    d = Datum()
    for i in range(int(len(args) / 2)):
      feat_key = args[i*2]
      feat_val = args[i*2+1]
      try:
        d.add_number(feat_key, float(feat_val))
      except ValueError:
        d.add_string(feat_key, feat_val)
    return (len(args), d)
def main():
    cl = Regression("127.0.0.1", 9199, "test")
    
    d = Datum()
    for i in xrange(10):
        d.add_number('x', 1)
        d.add_number('y', 4)
        cl.train([[10.0, d]])

    d = Datum()
    d.add_number('x', 1)
    d.add_number('y', 4)
    result = cl.estimate([d])
    print("{0:30.30f}".format(result[0]))
Beispiel #9
0
    def convert(self, args):
        if len(args) % 2 != 0:
            raise ValueError(
                'value for the last datum key ({0}) is missing'.format(
                    args[len(args) - 1]))

        d = Datum()
        for i in range(int(len(args) / 2)):
            feat_key = args[i * 2]
            feat_val = args[i * 2 + 1]
            try:
                d.add_number(feat_key, float(feat_val))
            except ValueError:
                d.add_string(feat_key, feat_val)
        return (len(args), d)
Beispiel #10
0
def main():
    args = parse_options()

    client = Classifier('127.0.0.1', args.port, 'test', 0)

    for i in range(0, 1000000):
        d = Datum()

        # Learn same data
        rand = random.randint(0, 1)
        d.add_number('key', 1.0 if rand else 2.0)
        ld = LabeledDatum('Pos' if rand else 'Neg', d)

        client.train([ld])

        if not i % 10000:
            print 'train ' + str(i) + ' data'
Beispiel #11
0
def main():
  args = parse_options()

  client = Classifier('127.0.0.1', args.port, 'test', 0)

  for i in range(0, 1000000):
    d = Datum()

    # Learn same data
    rand = random.randint(0, 1)
    d.add_number('key', 1.0 if rand else 2.0)
    ld = LabeledDatum('Pos' if rand else 'Neg', d)

    client.train([ld])

    if not i % 10000:
      print 'train ' + str(i) + ' data'
def main():
    # cl = jubatus.NearestNeighbor("localhost", 9199, "nn")
    cl = client.Recommender("localhost", 9199, "test")
    cl.clear()

    d = Datum()
    for i in xrange(10):
        d.add_number('x', 1)
        d.add_number('y', 4)
        
        cl.update_row(str(i), d)
        
    # scores = cl.similar_row_from_id(str(0), 10)
    # for score in scores:
    #     print("{0} {1:30.30f}".format(score.id, score.score))

    d = Datum()
    d.add_number('x', 1)
    d.add_number('y', 4)
    predicts = cl.similar_row_from_datum(d, 5)
    print(predicts)
    for predict  in predicts:
        print("{0} {1:30.30f}".format(predict.id, predict.score))
Beispiel #13
0
def main():
    # cl = jubatus.NearestNeighbor("localhost", 9199, "nn")
    cl = Classifier("localhost", 9199, "test")
    cl.clear()

    d = Datum()
    for i in xrange(10):
        d.add_number('x', 1)
        d.add_number('y', 4)
        cl.train([LabeledDatum("label1", d)])
        # cl.set_row(str(i), d)
        
    # scores = cl.similar_row_from_id(str(0), 10)
    # for score in scores:
    #     print("{0} {1:30.30f}".format(score.id, score.score))

    d = Datum()
    d.add_number('x', 1)
    d.add_number('y', 4)
    predict = cl.classify([d])
    print(predict)
    for score in predict:
        print("{0} {1:30.30f}".format(score[0].label, score[0].score))
    def _juba_proc(self, clock, datadict, method="add"):
        #if DEBUG:
        #    print datadict
        datum = Datum()
        for k in datadict.keys():
            #print "key:%s value:%s" % (str(k), str(datadict[k]))
            if k == "hostid":
                datum.add_number(str(k), int(datadict[k])*1.0/ZBX_ITEMID_DIGITS)
            elif k == "weekday" or k == "hour":
                datum.add_number(str(k), datadict[k])
            elif k != "hostid" and k != "weekday" and k != "hour":
                datum.add_number(str(k), self.norm(k, datadict[k]))
        #print datum

        retry_cnt = JUBA_RETRY_MAX
        while True:
            try:
                if method=="add":
                    print datum
                    ret = self.anom.add(datum)
                    exit()
                if method=="calc":
                    print datum
                    score = self.anom.calc_score(datum)
                    if score == float('Inf') or score > ML_LIMIT:
                        #print datadict
                        if self.alarm_on == False:
                            self.alarm_on = True
                            cf.log("[%s] score=%f" % (cf.clock2strjst(clock), score))
                    else:
                        if self.alarm_on == True:
                            self.alarm_on = False
                            cf.log("[%s] score recovered to normal:score=%f" % (cf.clock2strjst(clock), score))

                break
            except (msgpackrpc.error.TransportError, msgpackrpc.error.TimeoutError) as e:
                retry_count -= 1
                if retry_count <= 0:
                    raise
                self.anom.get_client().close()
                self.set_anom()

                print e
                time.sleep(JUBA_RETRY_INTERVAL)
                continue
Beispiel #15
0
 def test_add_int(self):
     d = Datum()
     d.add_number('key', 1)
     self.assertEqual(Datum({'key': 1.0}).to_msgpack(), d.to_msgpack())
Beispiel #16
0
def make_datum():
    d = Datum()
    d.add_string('string-key',   'str')
    d.add_number('number-key',     1.0)
    d.add_binary('binary-key',  b'bin')
    return d
Beispiel #17
0
#!/usr/bin/env python

import random
import time

from jubatus.classifier.client import Classifier
from jubatus.classifier.types import LabeledDatum
from jubatus.common import Datum

data = []
for i in xrange(0, 100000):
    d = Datum()
    for j in xrange(0, 20):
        d.add_number(str(j) + "-" + str(i), random.random() + 1.0)

    ld = LabeledDatum("Pos" if random.randint(0, 1) else "Neg", d)
    data.append(ld)

client = Classifier("127.0.0.1", 9199, "test", 0)


start_time = time.time()
client.train(data)
end_time = time.time()

print str(len(data)) + " ... " + str((end_time - start_time) * 1000) + " msec"
 def test_add_int(self):
     d = Datum()
     d.add_number('key', 1)
     self.assertEquals(Datum({'key': 1.0}).to_msgpack(),
                       d.to_msgpack())
Beispiel #19
0
    # 2.学習用データの準備
    mongo_dic =  convertMongo()
    dic = mongo_dic.getDic()
    name = '' 
    value = 0
    for line in dic:
        name = dic[line]['name']
        value = dic[line]['value']
        datum = Datum()

        # for (k, v) in [
        #         ['name', name],
        #         ]:
        #     datum.add_string(k, v)
        
        for (k, v) in [
                ['value', value],
                ]:
            datum.add_number(k, v)
        
        # 3.データの学習(学習モデルの更新)
        ret = anom.add(datum)
        
        # 4.結果の出力
        if (ret.score != float('Inf')) and (ret.score != 1.0):
            col.insert({'result':'anomaly', 'value':value})
            print ret, value
        elif (ret.score != float('Inf')) and (ret.score == 1.0):
            col.insert({'result':'nomaly', 'value':value})
            print ret, value
Beispiel #20
0
#!/usr/bin/env python

import random
import time

from jubatus.classifier.client import Classifier
from jubatus.classifier.types import LabeledDatum
from jubatus.common import Datum

data = []
for i in xrange(0, 100000):
    d = Datum()
    for j in xrange(0, 20):
        d.add_number(str(j) + '-' + str(i), random.random() + 1.0)

    ld = LabeledDatum('Pos' if random.randint(0, 1) else 'Neg', d)
    data.append(ld)

client = Classifier('127.0.0.1', 9199, 'test', 0)

start_time = time.time()
client.train(data)
end_time = time.time()

print str(len(data)) + ' ... ' + str((end_time - start_time) * 1000) + ' msec'
Beispiel #21
0
import jubatus
from jubatus.common import Datum

import random

cl = jubatus.Recommender('127.0.0.1', 9199, 'test', 0)

random.seed(1)
datum_length = 100

for i in range(3):
    d = Datum()
    for x in range(datum_length):
        d.add_number("{}".format(x), random.random())
    cl.update_row(str(i), d)

print('ids:{}'.format(','.join(cl.get_all_rows())))  # 1, 2, 3

d = Datum()
for x in range(datum_length):
    d.add_number("{}".format(x), random.random())
cl.update_row('3', d)
print('ids:{}'.format(','.join(cl.get_all_rows())))  # unlearn 1 id

cl.save('test')
cl.clear()
cl.load('test')

print('ids:{}'.format(','.join(cl.get_all_rows())))  # should be same as before `save`

d = Datum()
from jubatus.common import Datum
import jubatus

client = jubatus.Weight("127.0.0.1", 9199, "")

d = Datum()

d.add_number("user/age", 25)
d.add_number("user/income", 1000)
d.add_string("user/name", "Loren")
d.add_string("message", "Hello")

res = client.calc_weight(d)
print(res)
Beispiel #23
0
                 float(dst_host_srv_count)],
                ["dst_host_same_srv_rate",
                 float(dst_host_same_srv_rate)],
                [
                    "dst_host_same_src_port_rate",
                    float(dst_host_same_src_port_rate)
                ],
                ["dst_host_diff_srv_rate",
                 float(dst_host_diff_srv_rate)],
                [
                    "dst_host_srv_diff_host_rate",
                    float(dst_host_srv_diff_host_rate)
                ],
                ["dst_host_serror_rate",
                 float(dst_host_serror_rate)],
                ["dst_host_srv_serror_rate",
                 float(dst_host_srv_serror_rate)],
                ["dst_host_rerror_rate",
                 float(dst_host_rerror_rate)],
                ["dst_host_srv_rerror_rate",
                 float(dst_host_srv_rerror_rate)],
            ]:
                datum.add_number(k, v)

            # 3. train data and update jubatus model
            ret = anom.add(datum)

            # 4. output results
            if (ret.score != float('Inf')) and (ret.score != 1.0):
                print(ret, label)