Beispiel #1
0
 def __init__(self):
     self.total_partitions = 0
     self.task_id = 0
     self.partition_id = 4
     self.task_progress = {}
     self.result = {}
     self.scheduler = Scheduler()
     self.merger = {}
Beispiel #2
0
    def post(self):
        task = {
            "start_url": self.get_argument("start_url"),
            "allowed_domain": self.get_argument("allowed_domain")
        }

        scheduler = Scheduler(task)
        scheduler.run()
        self.redirect("/")
Beispiel #3
0
    def post(self):
        task = {
            "start_url": self.get_argument("start_url"),
            "allowed_domain": self.get_argument("allowed_domain")
        }

        scheduler = Scheduler(task)
        scheduler.run()
        self.redirect("/")
Beispiel #4
0
import re
from time import sleep
import logging
import logging.config
logging.config.fileConfig('logging.conf')
root_logger = logging.getLogger('root')

from master.scheduler import Scheduler


def print_error(message):
    print >> sys.stderr, message
    print >> sys.stderr, "Use --help to show usage."
    exit(2)


if __name__ == "__main__":
    parser = OptionParser(usage="Usage: %prog [options]")
    parser.add_option("-c", "--config", help="configuration path", dest="config_path")

    (options, args) = parser.parse_args()

    """ 参数验证 """
    if options.config_path:
        if not os.path.exists(options.config_path):
            print_error("Configuration path error: file not exists.")
    else:
        print_error("Configuration required")

    master = Scheduler(options.config_path)
    master.listen()
Beispiel #5
0
class ServerHandler:

    DEFAULT_PARTITIONS = 8

    def __init__(self):
        self.total_partitions = 0
        self.task_id = 0
        self.partition_id = 4
        self.task_progress = {}
        self.result = {}
        self.scheduler = Scheduler()
        self.merger = {}

    def hand_shake(self, paritions):
        #self.total_partitions += paritions
        print("Slave Connected")

    def schedule_task(self, df, func, partitions, merger):
        self.task_id += 1
        print("Task %d Scheduled with partitions preference = %d" %
              (self.task_id, partitions))
        df, func, merger = deserialize(df), deserialize(func), deserialize(
            merger)
        task = {"df": df, "func": func, "priority": 0, "task_id": self.task_id}
        self.task_progress.setdefault(task["task_id"], {
            "total": 0,
            "progress": 0
        })
        splitted_task = partition.split_task(task, partitions)
        self.task_progress[task["task_id"]]["total"] += len(splitted_task)
        if merger:
            self.merger[task["task_id"]] = merger
        self.scheduler.schedule_tasks(splitted_task)
        return self.task_id

    def submit_result(self, partition_id, res):
        print("Partition %s submitted" % partition_id)
        task_id = self.scheduler.finish_task(partition_id)
        if task_id != None:
            res = deserialize(res)
            self.task_progress[task_id]["progress"] += 1
            if task_id in self.merger:
                if task_id in self.result:
                    self.result[task_id] = self.merger[task_id](
                        self.result[task_id], res)
                else:
                    self.result[task_id] = res
            else:
                self.result.setdefault(task_id, [])
                self.result[task_id].append(res)

    def offer_resources(self, partitions):
        assigned_tasks = self.scheduler.select_tasks(partitions)
        return serialize(assigned_tasks)

    def ready(self, task_id):
        return self.task_progress[task_id]["total"] == self.task_progress[
            task_id]["progress"]

    def progress(self, task_id):
        return self.task_progress[task_id][
            "progress"] * 100.0 / self.task_progress[task_id]["total"]

    def collect(self, task_id):
        return serialize(self.result[task_id])
Beispiel #6
0
 def __init__(self, config: MasterConfig) -> None:
     self.config = config
     self.backend = MasterBackends(config.plugins.backends_dir).construct_backend(config.backend,
                                                                                  config.backend_config)
     self.engine = Engine(self.backend)
     self.scheduler = Scheduler(self.backend, self.engine)
Beispiel #7
0
class MasterApp:
    def __init__(self, config: MasterConfig) -> None:
        self.config = config
        self.backend = MasterBackends(config.plugins.backends_dir).construct_backend(config.backend,
                                                                                     config.backend_config)
        self.engine = Engine(self.backend)
        self.scheduler = Scheduler(self.backend, self.engine)

    def shutdown(self):
        self.scheduler.shutdown()

    @staticmethod
    def ping(*args):
        return ResultOk('pong')

    def list_graphs(self, args: dict, request: Request):
        graph_name = args.get('graph_name', None)
        with_info = (args.get('with_info', '1') == '1')
        limit = int(args.get('limit', '-1'))
        offset = int(args.get('offset', '0'))
        it = islice(self.backend.list_graph_struct(graph_name=graph_name, with_info=with_info),
                    offset, offset + limit if limit >= 0 else None)
        return ResultOk([
            graph_struct.to_json() if graph_struct else {'graph_name': graph_name, 'revision': revision}
            for graph_name, revision, graph_struct in it
        ])

    def create_graph(self, args: dict, request: Request):
        graph_name = request.match_info.get('graph_name', args.get('graph_name', None))
        if not graph_name:
            return ResultError(error='Graph name should be non-empty string')
        rev = self.engine.add_graph_struct(graph_name=graph_name, graph_struct=args)
        return ResultOk({'graph_name': graph_name, 'revision': rev})

    def read_graph(self, args: dict, request: Request):
        graph_name = request.match_info.get('graph_name', None)
        revision = int(request.match_info.get('revision', args.get('revision', -1)))
        if not graph_name:
            return ResultError(error='graph_name should be set')
        try:
            graph_struct = self.backend.read_graph_struct(graph_name, revision)
            return ResultOk(graph_struct.to_json())
        except KeyError:
            return ResultNotFound(error='Graph with revision not found', name=graph_name, revision=revision)
        except GraphStructureNotFound as ex:
            return ResultNotFound(error=str(ex), name=graph_name)

    def launch_graph(self, args: dict, request: Request):
        graph_name = request.match_info.get('graph_name', None)
        revision = int(request.match_info.get('revision', -1))
        if not graph_name:
            return ResultError(error='graph_name should be set')
        graph_struct = self.backend.read_graph_struct(graph_name, revision)
        return ResultOk(self.engine.add_graph_instance(uuid4().hex, graph_struct).to_json())

    def list_instances(self, args: dict, request: Request):
        with_info = (args.get('with_info', '1') == '1')
        limit = int(args.get('limit', '-1'))
        offset = int(args.get('offset', '0'))
        it = islice(self.backend.list_graph_instance_info(with_info=with_info),
                    offset, offset + limit if limit >= 0 else None)
        return ResultOk([
            instance_info.to_json() if instance_info else {'instance_id': instance_id}
            for instance_id, instance_info in it
        ])

    def read_instance(self, args: dict, request: Request):
        instance_id = request.match_info.get('instance_id', None)
        if not instance_id:
            return ResultError(error='Instance id should be set')
        try:
            return ResultOk(self.backend.read_graph_instance_info(instance_id).to_json())
        except KeyError:
            return ResultNotFound(error='Instance with needed id is not found', instance_id=instance_id)

    def start_instance(self, args: dict, request: Request):
        instance_id = request.match_info.get('instance_id', None)
        if not instance_id:
            return ResultError(error='instance_id field should be set')
        return self._set_instance_state(instance_id, GraphInstanceState.running)

    def stop_instance(self, args: dict, request: Request):
        instance_id = request.match_info.get('instance_id', None)
        if not instance_id:
            return ResultError(error='instance_id field should be set')
        return self._set_instance_state(instance_id, GraphInstanceState.stopped)

    def _set_instance_state(self, instance_id: str, instance_state_name: str):
        prev_state = self.engine.set_graph_instance_state(instance_id, instance_state_name)
        return ResultOk(prev_state=prev_state, new_state=instance_state_name)

    # TODO: move this proxy to storage layer
    def instance_logs(self, args: dict, request: Request):
        instance_id = request.match_info.get('instance_id', None)
        task_name = request.match_info.get('task_name', None)
        host = request.match_info.get('host', None)
        log_type = request.match_info.get('log_type', None)
        if not instance_id or not task_name or not host or not log_type:
            return ResultError(error='All fields from (instance_id, task_name, host, log_type) should be set')
        if log_type not in ('err', 'out'):
            return ResultError(error='Log type can be only from (err, out)')
        info = self.backend.read_graph_instance_info(instance_id)
        task_info = info.exec_stats.per_task_execution_info.get(task_name)
        if not task_info:
            return ResultNotFound(error='Graph instance doesn\'t have task with this name',
                                  instance_id=instance_id, task_name=task_name)
        host_info = task_info.per_host_info.get(host)
        if not host_info or not host_info.task_id:
            return ResultNotFound(error='Specified task doesn\'t have an execution entry on specified host',
                                  instance_id=instance_id, task_name=task_name, host=host)
        # FIXME: Port is not passed
        return ResultOk(instance_id=instance_id, task_name=task_name, host=host, log_type=log_type,
                        data=WorkerApiClient(worker_host=host).get_task_log(host_info.task_id, log_type))