Esempio n. 1
0
 def _generate_id(self):
     """
     使用md5(host+ip+timestamp)[0:8]slave_id
     :return: slave_id
     """
     host = Metric.get_host()
     ip = Metric.get_ip()
     return host + "-" + Encrypt.md5(host + ip + str(time.time()))[0:8]
Esempio n. 2
0
 def predict_model_cold_users(self):
     res = []
     for user in self.rg.testColdUserSet_u.keys():
         for item in self.rg.testColdUserSet_u[user].keys():
             rating = self.rg.testColdUserSet_u[user][item]
             pred = self.predict(user, item)
             # pred = sigmoid(pred)
             # denormalize
             pred = denormalize(pred, self.config.min_val, self.config.max_val)
             pred = self.checkRatingBoundary(pred)
             res.append([user, item, rating, pred])
     rmse = Metric.RMSE(res)
     mae = Metric.MAE(res)
     return rmse,mae
Esempio n. 3
0
    def _register(self):
        """
        向master注册slave节点,存入slave状态信息
        """
        if self.zk.exists("/jetsearch/slaves/" + self.id):
            self.id = self._generate_id()

        slave = {
            "id": self.id,
            "type": self.type,
            "host": Metric.get_host(),
            "addr": Metric.get_ip(),
            "heartbeat": Metric.get_heartbeat()
        }
        self.zk.create("/jetsearch/slaves/" + self.id, str(slave))
Esempio n. 4
0
    def extractFeatures(self, tasks):
        interactVec = Metric.interacMetric(tasks)
        
        moments = self.computeMoments(tasks)        
        fourier = self.computeFourier(tasks)

        norm = np.linalg.norm(moments)        
        moments = moments / (norm + 1e-15)

        norm = np.linalg.norm(fourier)
        fourier = fourier / (norm + 1e-15)
        
        features = moments.tolist() + fourier.tolist()

        # Idea to improve the feature vector
        # if necessary
        #nWeaklyInter = len([inter for inter in interacMetric if inter >= 1.0/4])

        #nInter = len([inter for inter in interacMetric if inter >= 1.0/2])                

        norm = np.linalg.norm(features)

        # normalize the features
        # to make learning more robust
        features = features / (norm + 1e-15)

        self.features = features

        return self.features
    def valid_model(self):
        res = []
        for ind, entry in enumerate(self.rg.validSet()):
            user, item, rating = entry
            # predict
            prediction = self.predict(user, item)
            # denormalize
            prediction = denormalize(prediction, self.config.min_val, self.config.max_val)

            pred = self.checkRatingBoundary(prediction)
            # add prediction in order to measure
            # self.dao.testData[ind].append(pred)
            res.append([user, item, rating, pred])
        rmse = Metric.RMSE(res)
        mae = Metric.MAE(res)
        self.iter_rmse.append(rmse)  # for plot
        self.iter_mae.append(mae)
        return rmse, mae
Esempio n. 6
0
 def predict_model(self):
     '''为测试集中的用户预测'''
     res = []
     for ind, entry in enumerate(self.rg.testSet()):
         user, item, rating = entry
         rating_length = len(self.rg.trainSet_u[user])
         #冷启动用户不进行预测评分
         if rating_length <= self.config.coldUserRating:
             continue
         to = time.time()
         prediction = self.predict(user, item)
         ti = time.time()
         pre_time = to - ti
         if self.config.verbose:
             print(user, item, rating, prediction, pre_time)
         res.append([user, item, rating, prediction])
     rmse = Metric.RMSE(res)
     mae = Metric.MAE(res)
     return rmse, mae
Esempio n. 7
0
    def predict_model(self):
        res = []
        for ind, entry in enumerate(self.rg.testSet()):
            user, item, rating = entry
            rating_length = len(self.rg.trainSet_u[user]) # remove cold start users for test
            if rating_length <= self.config.coldUserRating:
                continue

            prediction = self.predict(user, item)
            # denormalize
            prediction = denormalize(prediction, self.config.min_val, self.config.max_val)

            pred = self.checkRatingBoundary(prediction)
            # add prediction in order to measure
            res.append([user, item, rating, pred])
        rmse = Metric.RMSE(res)
        mae = Metric.MAE(res)
        self.iter_rmse.append(rmse)  # for plot
        self.iter_mae.append(mae)
        return rmse, mae
Esempio n. 8
0
 def run(self):
     """
     未收到终止信号前,循环向zookeeper节点写入节点状态
     :return:
     """
     while not self.end:
         slave = eval(str(self.zk.get("/jetsearch/slaves/" + self.slave_id)[0]))
         slave['heartbeat'] = Metric.get_heartbeat()
         slave['task_status'] = self.status
         self.zk.set("/jetsearch/slaves/" + self.slave_id, str(slave))
         time.sleep(1)
Esempio n. 9
0
    def __init__(self, master='127.0.0.1:2181', type='spider'):
        """
        子节点执行器
        :param master: 主节点地址
        :param type: 节点类别
        :return:
        """
        self.type = type
        self.job = None
        self.task = None
        self.job_status = {
            "total": 0,
            "success": 0,
            "fail": 0
        }
        self.wait_task_time = 0

        # 连接master的zookeeper-server
        # 默认zk为standalone模式下的127.0.0.1:2181
        self.zk = KazooClient(hosts=master)
        self.zk.start()

        # 生成slave-id并向master注册
        self.id = Metric.get_host()
        self._register()
        # 创建心跳线程
        self.health_check = Health(self.zk, self.id)
        self.health_check.update(self.job_status)

        # 获取分布式配置
        self.config = eval(self.zk.get("/jetsearch/config")[0])

        # 连接消息队列redis
        redis_host, redis_port = self.config.get("redis_url").split(":")
        self.redis = redis.Redis(host=redis_host, port=redis_port)

        # 连接数据库mongodb
        mongo_host, mongo_port = self.config.get("mongodb_url").split(":")
        self.mongodb = MongoClient(host=mongo_host, port=int(mongo_port))

        # 开始心跳
        self.health_check.start()
        log("[SUCCESS] slave init with id %s and type %s" % (self.id, self.type))

        # 监听任务发布
        @self.zk.DataWatch("/jetsearch/job")
        def job_watch(data, stat, event):
            if data:
                self.job = eval(data)
                log("[JOB] receve job: %s" % data)
            else:
                if self.job:
                    self.job = None
Esempio n. 10
0
 def predict_model(self):
     '''为测试集中的用户预测'''
     res = []
     for ind, entry in enumerate(self.rg.testSet()):
         user, item, rating = entry
         rating_length = len(self.rg.trainSet_u[user])
         #冷启动用户不进行预测评分
         if rating_length <= self.config.coldUserRating:
             continue
         to = time.time()
         prediction = self.predict(user, item)
         ti = time.time()
         pre_time = to-ti
         if self.config.verbose:
             print(user, item, rating, prediction, pre_time)
         res.append([user, item, rating, prediction])
     ##################################################333
     #0304更新推荐结果
     userList = list(set([i[0] for i in res]))
     preDict = {}
     for i in res:
         if i[0] not in preDict.keys():
             preDict[i[0]] = []
         preDict[i[0]].append(i[1:])
     finalRes = {}
     for i in userList:
         finalRes[i] = []
         dic = {}
         for j in preDict[i]:
             dic[j[0]] = preDict[i][-1]
         #z = sorted(dic)[:10]
         list1= sorted(dic.items(),key=lambda x:x[1])
         z = [i[0] for i in list1][:5]
         finalRes[i] = z 
     ####################################################33
     rmse = Metric.RMSE(res)
     mae = Metric.MAE(res)
     return rmse, mae, finalRes
         
         
Esempio n. 11
0
    def collectStat(self, ticks):
        tasks = self.scheduler.getAllTaks()

        alpha = self.scheduler.getAlpha()

        wFeatures = WorkloadFeatures(tasks)
        features = wFeatures.getFeatures()

        objVal = Metric.objFunction(tasks, self.scheduler.getCurTime())

        self.objValFile.write("%d %f\n" % (ticks, objVal))
        self.alphaFile.write("%d %s\n" % (ticks, str(alpha)))
        self.workloadFile.write("%d %s\n" % (ticks, str(features)))
Esempio n. 12
0
File: metric.py Progetto: Granha/AAS
 def test_objFunction(self):
     ioList1 = IOList([IO(10, 5), IO(13, 7)])
     ioList2 = IOList([IO(1, 3), IO(2, 6), IO(5, 8), IO(6, 8)])
     t1 = Task("Task 1", 0, 3, 20, ioList1)
     t2 = Task("Task 1", 0, 5, 40, ioList2)
     t1.setUsedCpuTime(14)
     t1.setTimesBlocked(2)
     t1.setTotalReadyWait(4)
     t1.setTimesScheduled(2)
     t2.setUsedCpuTime(7)
     t2.setTimesBlocked(4)
     t2.setTotalReadyWait(4)
     t2.setTimesScheduled(2)
     tasks = [t1, t2]
     self.assertTrue(isclose(11.1428, Metric.objFunction(tasks, 10)))
Esempio n. 13
0
    def computeMoments(self, tasks):
        interactVec = Metric.interacMetric(tasks)

        # use the counting measure        
        n = len(interactVec)        
        
        # note that we work with the count measure,
        # thus the following moment computation is
        # correct
        for i in xrange(1,WorkloadFeatures.NumMoment+1):

            self.moments[i-1] = sum([inter**i for inter \
                                     in interactVec ])/float(n)

        return self.moments
Esempio n. 14
0
    def computeFourier(self, tasks):
        interactVec = Metric.interacMetric(tasks)

        assert len(tasks) > 0

        coeff = list(fft(interactVec))

        # frequency zero corresponding to
        # the integral of the original signal
        coeff[0] = 0

        coeff = np.abs(coeff)
        maxFreq = np.argmax(coeff)
        maxCoeff = np.max(coeff)

        n = float(len(coeff)) 
        
        self.fourier = [ maxFreq/n, maxCoeff ]

        return self.fourier
Esempio n. 15
0
    def probe(self):

        # nothing to do
        if not self._isProbing:
            return False

        tasks = self.scheduler.getAllTaks()

        alpha = self.getCurAlpha()

        # first probe
        if self.alpha is None:
            self.alpha = alpha
            self.scheduler.setAlpha(alpha)
            return False

        wFeatures = WorkloadFeatures(tasks)

        objVal = Metric.objFunction(tasks, self.scheduler.getCurTime())

        self.relation.append((wFeatures, alpha, objVal))

        isOverflow = self.incCurIndices()
        alpha = self.getCurAlpha()

        self.scheduler.setAlpha(alpha)
        self.alpha = alpha

        # end probing phase
        if isOverflow:
            self.nPasses += 1

            if self.nPasses == WorkloadProber.AlphaMult:
                self._isProbing = False

                return True

        return False
Esempio n. 16
0
from metrics.metric import Metric

print(Metric.hits({0: {1: 1}}, {0: [(1, 1)]}))