def __init__(self): self.total_partitions = 0 self.task_id = 0 self.partition_id = 4 self.task_progress = {} self.result = {} self.scheduler = Scheduler() self.merger = {}
def post(self): task = { "start_url": self.get_argument("start_url"), "allowed_domain": self.get_argument("allowed_domain") } scheduler = Scheduler(task) scheduler.run() self.redirect("/")
import re from time import sleep import logging import logging.config logging.config.fileConfig('logging.conf') root_logger = logging.getLogger('root') from master.scheduler import Scheduler def print_error(message): print >> sys.stderr, message print >> sys.stderr, "Use --help to show usage." exit(2) if __name__ == "__main__": parser = OptionParser(usage="Usage: %prog [options]") parser.add_option("-c", "--config", help="configuration path", dest="config_path") (options, args) = parser.parse_args() """ 参数验证 """ if options.config_path: if not os.path.exists(options.config_path): print_error("Configuration path error: file not exists.") else: print_error("Configuration required") master = Scheduler(options.config_path) master.listen()
class ServerHandler: DEFAULT_PARTITIONS = 8 def __init__(self): self.total_partitions = 0 self.task_id = 0 self.partition_id = 4 self.task_progress = {} self.result = {} self.scheduler = Scheduler() self.merger = {} def hand_shake(self, paritions): #self.total_partitions += paritions print("Slave Connected") def schedule_task(self, df, func, partitions, merger): self.task_id += 1 print("Task %d Scheduled with partitions preference = %d" % (self.task_id, partitions)) df, func, merger = deserialize(df), deserialize(func), deserialize( merger) task = {"df": df, "func": func, "priority": 0, "task_id": self.task_id} self.task_progress.setdefault(task["task_id"], { "total": 0, "progress": 0 }) splitted_task = partition.split_task(task, partitions) self.task_progress[task["task_id"]]["total"] += len(splitted_task) if merger: self.merger[task["task_id"]] = merger self.scheduler.schedule_tasks(splitted_task) return self.task_id def submit_result(self, partition_id, res): print("Partition %s submitted" % partition_id) task_id = self.scheduler.finish_task(partition_id) if task_id != None: res = deserialize(res) self.task_progress[task_id]["progress"] += 1 if task_id in self.merger: if task_id in self.result: self.result[task_id] = self.merger[task_id]( self.result[task_id], res) else: self.result[task_id] = res else: self.result.setdefault(task_id, []) self.result[task_id].append(res) def offer_resources(self, partitions): assigned_tasks = self.scheduler.select_tasks(partitions) return serialize(assigned_tasks) def ready(self, task_id): return self.task_progress[task_id]["total"] == self.task_progress[ task_id]["progress"] def progress(self, task_id): return self.task_progress[task_id][ "progress"] * 100.0 / self.task_progress[task_id]["total"] def collect(self, task_id): return serialize(self.result[task_id])
def __init__(self, config: MasterConfig) -> None: self.config = config self.backend = MasterBackends(config.plugins.backends_dir).construct_backend(config.backend, config.backend_config) self.engine = Engine(self.backend) self.scheduler = Scheduler(self.backend, self.engine)
class MasterApp: def __init__(self, config: MasterConfig) -> None: self.config = config self.backend = MasterBackends(config.plugins.backends_dir).construct_backend(config.backend, config.backend_config) self.engine = Engine(self.backend) self.scheduler = Scheduler(self.backend, self.engine) def shutdown(self): self.scheduler.shutdown() @staticmethod def ping(*args): return ResultOk('pong') def list_graphs(self, args: dict, request: Request): graph_name = args.get('graph_name', None) with_info = (args.get('with_info', '1') == '1') limit = int(args.get('limit', '-1')) offset = int(args.get('offset', '0')) it = islice(self.backend.list_graph_struct(graph_name=graph_name, with_info=with_info), offset, offset + limit if limit >= 0 else None) return ResultOk([ graph_struct.to_json() if graph_struct else {'graph_name': graph_name, 'revision': revision} for graph_name, revision, graph_struct in it ]) def create_graph(self, args: dict, request: Request): graph_name = request.match_info.get('graph_name', args.get('graph_name', None)) if not graph_name: return ResultError(error='Graph name should be non-empty string') rev = self.engine.add_graph_struct(graph_name=graph_name, graph_struct=args) return ResultOk({'graph_name': graph_name, 'revision': rev}) def read_graph(self, args: dict, request: Request): graph_name = request.match_info.get('graph_name', None) revision = int(request.match_info.get('revision', args.get('revision', -1))) if not graph_name: return ResultError(error='graph_name should be set') try: graph_struct = self.backend.read_graph_struct(graph_name, revision) return ResultOk(graph_struct.to_json()) except KeyError: return ResultNotFound(error='Graph with revision not found', name=graph_name, revision=revision) except GraphStructureNotFound as ex: return ResultNotFound(error=str(ex), name=graph_name) def launch_graph(self, args: dict, request: Request): graph_name = request.match_info.get('graph_name', None) revision = int(request.match_info.get('revision', -1)) if not graph_name: return ResultError(error='graph_name should be set') graph_struct = self.backend.read_graph_struct(graph_name, revision) return ResultOk(self.engine.add_graph_instance(uuid4().hex, graph_struct).to_json()) def list_instances(self, args: dict, request: Request): with_info = (args.get('with_info', '1') == '1') limit = int(args.get('limit', '-1')) offset = int(args.get('offset', '0')) it = islice(self.backend.list_graph_instance_info(with_info=with_info), offset, offset + limit if limit >= 0 else None) return ResultOk([ instance_info.to_json() if instance_info else {'instance_id': instance_id} for instance_id, instance_info in it ]) def read_instance(self, args: dict, request: Request): instance_id = request.match_info.get('instance_id', None) if not instance_id: return ResultError(error='Instance id should be set') try: return ResultOk(self.backend.read_graph_instance_info(instance_id).to_json()) except KeyError: return ResultNotFound(error='Instance with needed id is not found', instance_id=instance_id) def start_instance(self, args: dict, request: Request): instance_id = request.match_info.get('instance_id', None) if not instance_id: return ResultError(error='instance_id field should be set') return self._set_instance_state(instance_id, GraphInstanceState.running) def stop_instance(self, args: dict, request: Request): instance_id = request.match_info.get('instance_id', None) if not instance_id: return ResultError(error='instance_id field should be set') return self._set_instance_state(instance_id, GraphInstanceState.stopped) def _set_instance_state(self, instance_id: str, instance_state_name: str): prev_state = self.engine.set_graph_instance_state(instance_id, instance_state_name) return ResultOk(prev_state=prev_state, new_state=instance_state_name) # TODO: move this proxy to storage layer def instance_logs(self, args: dict, request: Request): instance_id = request.match_info.get('instance_id', None) task_name = request.match_info.get('task_name', None) host = request.match_info.get('host', None) log_type = request.match_info.get('log_type', None) if not instance_id or not task_name or not host or not log_type: return ResultError(error='All fields from (instance_id, task_name, host, log_type) should be set') if log_type not in ('err', 'out'): return ResultError(error='Log type can be only from (err, out)') info = self.backend.read_graph_instance_info(instance_id) task_info = info.exec_stats.per_task_execution_info.get(task_name) if not task_info: return ResultNotFound(error='Graph instance doesn\'t have task with this name', instance_id=instance_id, task_name=task_name) host_info = task_info.per_host_info.get(host) if not host_info or not host_info.task_id: return ResultNotFound(error='Specified task doesn\'t have an execution entry on specified host', instance_id=instance_id, task_name=task_name, host=host) # FIXME: Port is not passed return ResultOk(instance_id=instance_id, task_name=task_name, host=host, log_type=log_type, data=WorkerApiClient(worker_host=host).get_task_log(host_info.task_id, log_type))