/
master.py
125 lines (102 loc) · 3.76 KB
/
master.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#coding=utf8
import Queue
import threading
import traceback
import time
import common
from common import TaskStatus
import conf
from worker_manager import WorkerManager
from task_manager import TaskManager
from log import logging
class CheckWorkersThread(threading.Thread):
'''用于定期检查作业节点状态,并清除死亡节点的线程'''
def __init__(self, master):
threading.Thread.__init__(self)
self.master = master
def run(self):
while True:
try:
time.sleep(conf.DIE_THRESHOLD)
self.master.clean_death_workers()
except Exception,e:
traceback.print_exc()
class Master(object):
def __init__(self, task_loader, conf):
self.lock = threading.Lock()
self.workerManager = WorkerManager(conf)
self.taskManager = TaskManager(task_loader)
self.running_tasks = {}
self.conf = conf
self.load_tasks()
def get_status(self):
return {
'total_workers': self.workerManager.get_workers(),
'tasks': self.taskManager.get_tasks_stats(),
'idle_workers': self.workerManager.get_idle_workers()
}
def clean_death_workers(self):
'''定期检查worker的心跳信息,及时清除死亡worker'''
workers,tasks = self.workerManager.clean_death_workers()
logging.info("death workers:%s; relatedTasks:%s", workers, tasks)
for task in tasks:
self.taskManager.fail_task(task.uuid, TaskStatus.notReturned)
return workers
def register_worker(self, worker):
'''注册作业节点'''
logging.info("%s come in", worker)
status = "OK"
if worker is not None:
self.workerManager.add_worker(worker)
else:
status = "Invalid"
# logging.info(self.workerManager.get_workers())
return status
def remove_worker(self, worker):
status = "OK"
if worker is None:
status = "Invalid"
return status
identifier = worker.get_uuid()
w, tasks = self.workerManager.remove_worker(identifier)
for task in tasks:
self.taskManager.fail_task(task.get_uuid(), TaskStatus.notReturned)
if w is None:
status = "NOT EXISTS"
return status
def task_complete(self, worker, taskResult):
'''worker完成一个作业,返回作业统计信息,worker重新归于队列'''
self.workerManager.finish_task(worker, taskResult)
self.workerManager.update_worker(worker)
if taskResult.is_success():
self.taskManager.finish_task(taskResult.get_task_uuid())
else:
self.taskManager.fail_task(taskResult.get_task_uuid(), TaskStatus.failedToExecute)
return True
def heartbeat(self, worker):
'''收到心跳信息,更新该工作节点的信息'''
self.workerManager.update_worker(worker)
return True
def lookup_spider(self, spider):
pass
def load_tasks(self):
self.taskManager.load_tasks()
def schedule_next(self):
logging.info('tasks: %s',self.taskManager.get_tasks_stats())
task = self.taskManager.next_task()
worker = self.workerManager.next_worker()
self.workerManager.assign_task(worker, task)
try:
proxy = common.RPCServerProxy.get_proxy(worker)
proxy.assign_task(task)
except Exception,e:
traceback.print_exc()
self.remove_worker(worker)
def serve_forever(self):
check_thread = CheckWorkersThread(self)
check_thread.start()
while True:
try:
self.schedule_next()
except Exception,e:
traceback.print_exc()