Ejemplo n.º 1
0
    def test_001_stop_etcd_daemon(self):
        """
        Test to stop etcd through daemon
        :return: None
        """
        cls = self.__class__
        proxy = EtcdProxy("http://localhost:2005", get_logger("Test", None, None))

        os.kill(cls.etcd1.pid, signal.SIGINT)
        cls.etcd1.join()
        time.sleep(10)
        self.assertEqual(
            proxy.add_and_get_members(None, None),
            {"etcd2": "http://localhost:2002", "etcd3": "http://localhost:2004"}
        )

        os.kill(cls.etcd2.pid, signal.SIGINT)
        cls.etcd2.join()
        time.sleep(10)
        self.assertEqual(
            proxy.add_and_get_members(None, None),
            {"etcd3": "http://localhost:2004"}
        )

        return
Ejemplo n.º 2
0
    def test_000_run_etcd_daemon(self):
        """
        Test to run etcd through etcd daemon
        :return: None
        """
        cls = self.__class__

        # Generate all daemon
        cls.etcd1 = multiprocessing.Process(target=run, args=("Etcd1", "etcd1/conf.json"))
        cls.etcd1.daemon = True
        cls.etcd1.start()
        time.sleep(2)
        cls.etcd4 = multiprocessing.Process(target=run, args=("Etcd4", "etcd4/conf.json"))
        cls.etcd4.daemon = True
        cls.etcd4.start()
        time.sleep(2)
        cls.etcd2 = multiprocessing.Process(target=run, args=("Etcd2", "etcd2/conf.json"))
        cls.etcd2.daemon = True
        cls.etcd2.start()
        cls.etcd3 = multiprocessing.Process(target=run, args=("Etcd3", "etcd3/conf.json"))
        cls.etcd3.daemon = True
        cls.etcd3.start()

        time.sleep(20)

        # Test if every etcd has joined the cluster
        proxy = EtcdProxy("http://localhost:2001", get_logger("Test", None, None))
        self.assertEqual(
            proxy.add_and_get_members(None, None),
            {"etcd1": "http://localhost:2000", "etcd2": "http://localhost:2002", "etcd3": "http://localhost:2004"}
        )
        proxy.set("key", "value")
        self.assertEqual(proxy.get("key"), "value")
        return
Ejemplo n.º 3
0
    def test_000_run_mongodb_daemon(self):
        """
        Test to run mongodb through mongodb daemon
        :return: None
        """
        cls = self.__class__

        time.sleep(10)

        # Generate all daemon
        cls.mongodb1 = multiprocessing.Process(target=run,
                                               args=("Mongodb1",
                                                     "etcd/conf.json",
                                                     "mongodb1/conf.json"))
        cls.mongodb1.daemon = True
        cls.mongodb1.start()
        cls.mongodb2 = multiprocessing.Process(target=run,
                                               args=("Mongodb2",
                                                     "etcd/conf.json",
                                                     "mongodb2/conf.json"))
        cls.mongodb2.daemon = True
        cls.mongodb2.start()
        cls.mongodb3 = multiprocessing.Process(target=run,
                                               args=("Mongodb3",
                                                     "etcd/conf.json",
                                                     "mongodb3/conf.json"))
        cls.mongodb3.daemon = True
        cls.mongodb3.start()

        time.sleep(20)

        # Test if every mongodb has joined the replica set
        cls.etcd = EtcdProxy("http://localhost:2001",
                             get_logger("Test", None, None))
        registered_member = list(cls.etcd.get("mongodb/register/").values())
        cls.mongo1 = pymongo.MongoClient("localhost", 3000)
        cls.mongo1_rs = pymongo.MongoClient("localhost", 3000, replicaSet="rs")
        conf = cls.mongo1.admin.command("replSetGetConfig",
                                        1)["config"]["members"]
        self.assertEqual(set([x["host"] for x in conf]),
                         set(registered_member))

        cls.mongo2 = pymongo.MongoClient("localhost", 3001)
        cls.mongo2_rs = pymongo.MongoClient("localhost", 3001, replicaSet="rs")
        cls.mongo3 = pymongo.MongoClient("localhost", 3002)
        cls.mongo3_rs = pymongo.MongoClient("localhost", 3002, replicaSet="rs")
        cls.res = cls.mongo2_rs.client["test_db"][
            "test_collection"].insert_one({"value": 0})
        self.assertEqual(
            cls.mongo1_rs.client["test_db"]["test_collection"].find_one(
                {"_id": cls.res.inserted_id})["value"], 0)
        self.assertEqual(
            cls.mongo3_rs.client["test_db"]["test_collection"].find_one(
                {"_id": cls.res.inserted_id})["value"], 0)
        return
Ejemplo n.º 4
0
    def __init__(self,
                 module_name="Gateway",
                 etcd_conf_path="config/etcd.json",
                 uwsgi_conf_path="config/uwsgi.json"):
        """
        Initialize the object with logger and configuration
        :param module_name:
        :param etcd_conf_path:
        :param uwsgi_conf_path:
        """
        # Load configuration
        with open(uwsgi_conf_path, "r") as f:
            self.conf = json.load(f)["server"]
        self.module_name = module_name

        super().__init__(__name__, template_folder=self.conf["template"])

        # Generate a logger
        if "log" in self.conf:
            get_logger(self.logger, self.conf["log"]["info"],
                       self.conf["log"]["error"])
        else:
            get_logger(self.logger, None, None)
        self.logger.info("%s server program started." % self.module_name)

        # Set flask configuration
        self.jinja_env.variable_start_string = "[["
        self.jinja_env.variable_end_string = "]]"
        self.config["MAX_CONTENT_LENGTH"] = TASK_DICTIONARY_MAX_SIZE

        # Generate proxy for etcd
        with open(etcd_conf_path, "r") as f:
            self.local_etcd = generate_local_etcd_proxy(
                json.load(f)["etcd"], self.logger)

        self.load_response_function()
        return
Ejemplo n.º 5
0
    def setUpClass(cls):
        """
        Initialization function
        :return: None
        """
        print("Initializing environment.\n")

        # Generate temp data dir
        for d in ["etcd", "mongodb1", "mongodb2", "mongodb3"]:
            os.mkdir(d)

        # Generate etcd
        etcd_conf = {
            "exe": "etcd",
            "name": "etcd",
            "data_dir": "etcd",
            "listen": {
                "address": "0.0.0.0",
                "peer_port": "2000",
                "client_port": "2001"
            },
            "advertise": {
                "address": "localhost",
                "peer_port": "2000",
                "client_port": "2001"
            },
            "cluster": {
                "type": "init"
            }
        }
        cls.etcd_proc = subprocess.Popen(
            etcd_generate_run_command(etcd_conf),
            stdout=sys.stdout,
            stderr=subprocess.STDOUT
        )

        # Generate mongodb configuration
        cls.mongodb1_conf = {
            "exe": "mongod",
            "name": "mongodb1",
            "data_dir": "mongodb1",
            "listen": {
                "address": "0.0.0.0",
                "port": "3000"
            },
            "advertise": {
                "address": "localhost",
                "port": "3000"
            },
            "replica_set": "rs"
        }
        cls.mongodb2_conf = {
            "exe": "mongod",
            "name": "mongodb2",
            "data_dir": "mongodb2",
            "listen": {
                "address": "0.0.0.0",
                "port": "3001"
            },
            "advertise": {
                "address": "localhost",
                "port": "3001"
            },
            "replica_set": "rs"
        }
        cls.mongodb3_conf = {
            "exe": "mongod",
            "name": "mongodb3",
            "data_dir": "mongodb3",
            "listen": {
                "address": "0.0.0.0",
                "port": "3002"
            },
            "advertise": {
                "address": "localhost",
                "port": "3002"
            },
            "replica_set": "rs"
        }
        cls.init_key = "mongodb/init"
        cls.reg_path = "mongodb/register/"

        cls.mongodb1_proc = None
        cls.mongodb2_proc = None
        cls.mongodb3_proc = None

        # Generate a logger and etcd proxy
        cls.logger = get_logger("Test", None, None)
        cls.etcd = EtcdProxy("http://localhost:2001", cls.logger)
        return
Ejemplo n.º 6
0
def run(service_list, module_name, module_description, conf_path="config/templates/boot.json"):
    """
    Parse args and start services with routine checks
    :param service_list: List of all services in the starting order
    :param module_name: The name of the running module
    :param module_description: The description of the module
    :param conf_path: Path to boot config
    :return: None
    """
    # Get a parser and add common args
    parser = argparse.ArgumentParser(description=module_description)
    parser.add_argument("--docker-sock", dest="docker_sock", default=None,
                        help="Path to mapped docker sock file")
    parser.add_argument("--retry-times", type=int, dest="retry_times", default=None,
                        help="Total retry time of key operations")
    parser.add_argument("--retry-interval", type=int, dest="retry_interval", default=None,
                        help="Interval between retries of key operations")

    parser.add_argument("--boot-check-interval", type=int, dest="boot_check_interval", default=None,
                        help="Interval between services check in boot module")
    parser.add_argument("--boot-print-log", dest="boot_print_log", action="store_const", const=True, default=False,
                        help="Print the log of boot module to stdout")

    # Add args for services
    for s in service_list:
        s["generator"] = s["args_parser"](parser, *s.get("args", tuple()), **s.get("kwargs", {}))

    # Parse args
    args = parser.parse_args()

    # Load configuration
    with open(conf_path, "r") as f:
        config = json_comment.load(f)
    if args.boot_check_interval is not None:
        config["check_interval"] = args.boot_check_interval
    if args.boot_print_log:
        config.pop("log", None)

    # Generate logger
    if "log" in config:
        logger = get_logger("boot", config["log"]["info"], config["log"]["error"])
    else:
        logger = get_logger("boot", None, None)
    logger.info("%s boot program started." % module_name)

    # If docker-sock is given, generate a docker client for it
    client = docker.APIClient(base_url=args.docker_sock) if args.docker_sock else None
    # Dictionary for services
    services = {}
    # Start order of services
    start_order = []

    # Load and modify config for all services by calling config generator
    for s in service_list:
        # Load config template
        with open(s["config_template"], "r") as f:
            config_sub = json_comment.load(f)
        # Modify services config
        s["generator"](args, config_sub, client, services, start_order)
        # Write config
        with open(s["config"], "w") as f:
            f.write(json.dumps(config_sub, indent=4))
        logger.info("Service %s configuration loaded." % s["name"])

    # Check whether service daemons are running regularly
    try:
        while True:
            for s in start_order:
                logger.info("Checking status of service %s." % s)
                # Try to open the pid file of the service
                try:
                    # Lock the pid file
                    with filelock.FileLock(services[s]["pid_file"] + ".lock", timeout=0):
                        logger.debug("Locked pid file %s." % services[s]["pid_file"])

                        # If the process is None or the process has exited, (re)start it and rewrite the pid file
                        if (not services[s]["process"]) or services[s]["process"].poll() is not None:
                            logger.warning("Service %s is down." % s)
                            # Start the service and write the pid file
                            with open(services[s]["pid_file"], "w") as f:
                                logger.info("Starting the service %s and writing pid file." % s)
                                services[s]["process"] = subprocess.Popen(services[s]["command"])
                                f.write(str(services[s]["process"].pid))
                        else:
                            logger.info("Service %s is running." % s)
                    logger.debug("Unlocked pid file %s." % services[s]["pid_file"])
                except KeyboardInterrupt:
                    raise
                except filelock.Timeout:
                    logger.warning("Failed to obtain lock for service %s. Skipping check." % s, exc_info=True)
                except:
                    logger.error("Failed to check status for service %s." % s, exc_info=True)

            time.sleep(config["check_interval"])

    except KeyboardInterrupt:
        logger.info("Received SIGINT. Stopping service check.", exc_info=True)

    # Clean all services
    for s in start_order[: : -1]:
        if services[s]["process"]:
            os.kill(services[s]["process"].pid, signal.SIGINT)
            logger.info("Killing service %s." % s)
            services[s]["process"].wait()
            logger.info("Killed Service %s." % s)

    logger.info("%s boot program exiting." % module_name)
    return
Ejemplo n.º 7
0
def run(module_name="Judicator",
        etcd_conf_path="config/etcd.json",
        mongodb_conf_path="config/mongodb.json"):
    """
    Load config and run mongodb
    :param module_name: Name of the caller module
    :param etcd_conf_path: Path to etcd config file
    :param mongodb_conf_path: Path to mongodb config file
    :return: None
    """
    global working

    # Load configuration
    with open(mongodb_conf_path, "r") as f:
        config = json.load(f)
    retry_times = config["daemon"]["retry"]["times"]
    retry_interval = config["daemon"]["retry"]["interval"]

    # Generate logger for daemon
    if "log_daemon" in config["daemon"]:
        daemon_logger = get_logger("mongodb_daemon",
                                   config["daemon"]["log_daemon"]["info"],
                                   config["daemon"]["log_daemon"]["error"])
    else:
        daemon_logger = get_logger("mongodb_daemon", None, None)
    daemon_logger.info("%s mongodb_daemon program started." % module_name)

    # Generate logger for mongodb forwarding raw log output to designated place
    if "log_mongodb" in config["daemon"]:
        mongodb_logger = get_logger("mongodb",
                                    config["daemon"]["log_mongodb"]["info"],
                                    config["daemon"]["log_mongodb"]["error"],
                                    True)
    else:
        mongodb_logger = get_logger("mongodb", None, None, True)

    # Get a etcd proxy for replica set operations
    with open(etcd_conf_path, "r") as f:
        local_etcd = generate_local_etcd_proxy(
            json.load(f)["etcd"], daemon_logger)
    # Get a local mongodb proxy
    local_mongodb = generate_local_mongodb_proxy(config["mongodb"], local_etcd,
                                                 daemon_logger)

    # Check whether the data dir of mongodb is empty
    # If not, delete it and create a new one
    if not check_empty_dir(config["mongodb"]["data_dir"]):
        shutil.rmtree(config["mongodb"]["data_dir"])
        os.mkdir(config["mongodb"]["data_dir"])
        daemon_logger.info(
            "Previous data directory deleted with a new one created.")

    # Check whether the init data dir of mongodb is empty
    # If not, copy it to the data dir
    if not check_empty_dir(config["mongodb"]["data_init_dir"]):
        shutil.rmtree(config["mongodb"]["data_dir"])
        shutil.copytree(config["mongodb"]["data_init_dir"],
                        config["mongodb"]["data_dir"])
        daemon_logger.info("Found existing data initialize directory.")

    # Generate command and run mongodb instance as a subprocess
    command = mongodb_generate_run_command(config["mongodb"])
    for c in command:
        daemon_logger.info("Starting mongodb with command: " + c)
    mongodb_proc = subprocess.Popen(command,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.STDOUT)

    # Create and start register thread
    register_thread = threading.Thread(target=register,
                                       args=(config, local_etcd, local_mongodb,
                                             mongodb_proc, daemon_logger))
    register_thread.setDaemon(True)
    register_thread.start()

    # Log the output of mongodb instance until EOF or terminated
    try:
        log_output(mongodb_logger, mongodb_proc.stdout,
                   config["daemon"]["raw_log_symbol_pos"])
        daemon_logger.info("Received EOF from mongodb.")
    except KeyboardInterrupt:
        daemon_logger.info("Received SIGINT. Cleaning up and exiting.",
                           exc_info=True)
        # Stop the register thread
        working = False
        register_thread.join()
        # Kill the process
        if not local_mongodb.shutdown_and_close():
            daemon_logger.error("Killing the process.")
            mongodb_proc.terminate()
        mongodb_proc.wait()
        daemon_logger.info("Mongodb process exited.")
        # Register this mongodb as exited one
        try_with_times(retry_times, retry_interval, False, daemon_logger,
                       "cancel registration of mongodb",
                       local_mongodb.cancel_registration,
                       config["daemon"]["etcd_path"]["register"])
        daemon_logger.info("Removed local mongodb registration on etcd.")
    except:
        daemon_logger.error(
            "Accidentally terminated. Killing mongodb process.", exc_info=True)
        mongodb_proc.terminate()
    # Wait until mongodb process exit
    mongodb_proc.wait()

    working = False
    daemon_logger.info("%s mongodb_daemon program exiting." % module_name)
    return
Ejemplo n.º 8
0
def run(module_name, etcd_conf_path="config/etcd.json"):
    """
    Load config and run etcd
    :param module_name: Name of the caller module
    :param etcd_conf_path: Path to etcd config file
    :return: None
    """
    # Load config file
    with open(etcd_conf_path, "r") as f:
        config = json.load(f)
    retry_times = config["daemon"]["retry"]["times"]
    retry_interval = config["daemon"]["retry"]["interval"]

    # Generate daemon logger from config file
    # If logger arguments exist in config file, write the log to the designated file
    # Else, forward the log to standard output
    if "log_daemon" in config["daemon"]:
        daemon_logger = get_logger(
            "etcd_daemon",
            config["daemon"]["log_daemon"]["info"],
            config["daemon"]["log_daemon"]["error"]
        )
    else:
        daemon_logger = get_logger("etcd_daemon", None, None)
    daemon_logger.info("%s etcd_daemon program started." % module_name)

    # Generate etcd logger forwarding etcd log to designated location
    if "log_etcd" in config["daemon"]:
        etcd_logger = get_logger(
            "etcd",
            config["daemon"]["log_etcd"]["info"],
            config["daemon"]["log_etcd"]["error"],
            True
        )
    else:
        etcd_logger = get_logger("etcd", None, None, True)

    # Generate a etcd proxy for local etcd
    local_etcd = generate_local_etcd_proxy(config["etcd"], daemon_logger)

    # Check whether the data dir of etcd is empty
    # If not, delete it and create a new one
    if not check_empty_dir(config["etcd"]["data_dir"]):
        shutil.rmtree(config["etcd"]["data_dir"])
        os.mkdir(config["etcd"]["data_dir"])
        daemon_logger.info("Previous data directory deleted with a new one created.")

    # Check whether the data dir of etcd is empty
    # If not, copy it to data dir, and the cluster initialization information should be skipped
    if "data_init_dir" in config["etcd"] and not check_empty_dir(config["etcd"]["data_init_dir"]):
        del config["etcd"]["cluster"]
        shutil.rmtree(config["etcd"]["data_dir"])
        shutil.copytree(config["etcd"]["data_init_dir"], config["etcd"]["data_dir"])
        daemon_logger.info("Found existing data initialize directory. Skipped cluster parameters.")

    # If cluster config exists, this node is going to either explicitly join a cluster or initialize one
    if "cluster" in config["etcd"]:
        # If service name of etcd member detection is specified, use it and find a member client
        if "service" in config["etcd"]["cluster"]:
            daemon_logger.info("Searching etcd member using docker service and dns.")
            # Get all running tasks of the specified service
            services = []
            try:
                client = docker.APIClient(base_url=config["daemon"]["docker_sock"])
                services = [
                    "http://" + config["etcd"]["cluster"]["service"] + "." + str(x["Slot"]) + "." + x["ID"] + ":" +
                    config["etcd"]["cluster"]["client_port"]
                    for x in sorted(
                        client.tasks({"service": config["etcd"]["cluster"]["service"]}), key=lambda x: x["CreatedAt"]
                    )
                    if x["DesiredState"] == "running"
                ]
            except:
                daemon_logger.error("Failed to connect to docker engine.", exc_info=True)
                daemon_logger.error("%s etcd_daemon program exiting." % module_name)
                exit(1)

            daemon_logger.debug("Found following members with service name %s:" % config["etcd"]["cluster"]["service"])
            for x in services:
                daemon_logger.debug("%s" % x)

            # When local etcd is not in proxy mode, try to delete current task from service list, if it is found
            # Else, exit as it should never happen
            if "proxy" not in config["etcd"]:
                try:
                    services.remove(
                        "http://" + config["etcd"]["advertise"]["address"] +
                        ":" + config["etcd"]["advertise"]["client_port"]
                    )
                except:
                    daemon_logger.error("Failed to find current docker task in list.", exc_info=True)
                    daemon_logger.error("%s etcd_daemon program exiting." % module_name)
                    exit(1)
            else:
                daemon_logger.info("Detected proxy mode. Skipped removing current docker task from service list.")

            # If one or more docker service tasks are running, join the first created one
            # Else, initialize the cluster by itself if it is not in proxy mode, or exit
            if services:
                daemon_logger.info("Found following available members:")
                for x in services:
                    daemon_logger.info("%s" % x)
                config["etcd"]["cluster"] = {"type": "join", "client": services[0]}
                daemon_logger.info("Selected %s as member client." % config["etcd"]["cluster"]["client"])
            else:
                daemon_logger.warning("No available member detected.")
                if "proxy" not in config["etcd"]:
                    config["etcd"]["cluster"] = {"type": "init"}
                    daemon_logger.info("Initializing cluster by local etcd.")
                else:
                    daemon_logger.info("%s etcd_daemon program exiting." % module_name)
                    exit(1)

        # If the node is going join a cluster without knowing existing members
        # Add itself to the cluster if not in proxy mode
        # And then (in or not in proxy mode), get all member information for etcd start up command
        if config["etcd"]["cluster"]["type"] == "join" and not "member" in config["etcd"]["cluster"]:
            # Generate a etcd proxy for the remote etcd to be joined
            remote_etcd = EtcdProxy(config["etcd"]["cluster"]["client"], daemon_logger)
            # Join the cluster by adding self information
            success, res = try_with_times(
                retry_times,
                retry_interval,
                False,
                daemon_logger,
                "get (and add) member to etcd cluster status",
                remote_etcd.add_and_get_members,
                config["etcd"]["name"],
                "http://" + config["etcd"]["advertise"]["address"] + ":" + config["etcd"]["advertise"]["peer_port"],
                "proxy" in config["etcd"]
            )
            if not success:
                daemon_logger.error("Failed to get (and add) member information to remote client. Exiting.")
                exit(1)
            # Generate member argument for the joining command
            config["etcd"]["cluster"]["member"] = ",".join([(k + "=" + v) for k, v in res.items()])
            daemon_logger.info("Existing members of cluster received.")
            daemon_logger.info("Etcd will be started with member arguments: %s." % config["etcd"]["cluster"]["member"])

    # Generate running command
    command = etcd_generate_run_command(config["etcd"])
    for c in command:
        daemon_logger.info("Starting etcd with command: " + c)
    # Run etcd in a subprocess.
    etcd_proc = subprocess.Popen(
        command,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT
    )

    # Create a check thread to check if etcd has been started up
    check_thread = threading.Thread(
        target=check,
        args=(retry_times, retry_interval, etcd_proc, local_etcd, daemon_logger)
    )
    check_thread.setDaemon(True)
    check_thread.start()

    # Log the raw output of etcd until it exit or terminated
    try:
        log_output(etcd_logger, etcd_proc.stdout, config["daemon"]["raw_log_symbol_pos"])
        daemon_logger.info("Received EOF from etcd.")
    except KeyboardInterrupt:
        daemon_logger.info("Received SIGINT. Cleaning up and exiting.", exc_info=True)
        if "proxy" not in config["etcd"]:
            try_with_times(
                retry_times,
                retry_interval,
                False,
                daemon_logger,
                "remove etcd from cluster",
                local_etcd.remove_member,
                config["etcd"]["name"],
                "http://" + config["etcd"]["advertise"]["address"] + ":" + config["etcd"]["advertise"]["peer_port"]
            )
        else:
            daemon_logger.info("Detected proxy mode. Skipped removing local etcd from cluster.")
        os.kill(etcd_proc.pid, signal.SIGINT)
    except:
        daemon_logger.error("Accidentally terminated. Killing etcd process.", exc_info=True)
        etcd_proc.terminate()
    # Wait for the subprocess
    etcd_proc.wait()

    daemon_logger.info("%s etcd_daemon program exiting." % module_name)
    return
Ejemplo n.º 9
0
def run(module_name="Judicator",
        etcd_conf_path="config/etcd.json",
        mongodb_conf_path="config/mongodb.json",
        main_conf_path="config/main.json"):
    """
    Load config and run judicator main program
    :param module_name: Name of the caller module
    :param etcd_conf_path: Path to etcd config file
    :param mongodb_conf_path: Path to mongodb config file
    :param main_conf_path: Path to main config file
    :return: None
    """
    global working

    # Load configuration
    with open(main_conf_path, "r") as f:
        config = json.load(f)

    # Generate logger
    if "log" in config:
        logger = get_logger("main", config["log"]["info"],
                            config["log"]["error"])
    else:
        logger = get_logger("main", None, None)
    logger.info("%s main program started." % module_name)

    # Generate proxy for etcd and mongodb
    with open(etcd_conf_path, "r") as f:
        local_etcd = generate_local_etcd_proxy(json.load(f)["etcd"], logger)
    with open(mongodb_conf_path, "r") as f:
        local_mongodb = generate_local_mongodb_proxy(
            json.load(f)["mongodb"], local_etcd, logger)
    # Get a connection to both task and executor collection in mongodb
    mongodb_task = local_mongodb.client[config["task"]["database"]][
        config["task"]["collection"]]
    mongodb_executor = local_mongodb.client[config["executor"]["database"]][
        config["executor"]["collection"]]

    # Create and start the register thread
    register_thread = threading.Thread(target=register,
                                       args=(config, local_etcd, local_mongodb,
                                             logger))
    register_thread.setDaemon(True)
    register_thread.start()

    # Create and start the
    lead_thread = threading.Thread(target=lead,
                                   args=(config, local_etcd, local_mongodb,
                                         mongodb_task, mongodb_executor,
                                         logger))
    lead_thread.setDaemon(True)
    lead_thread.start()

    # Start the rpc server and serve until terminated
    server = TServer.TThreadedServer(
        Judicator.Processor(RPCService(logger, mongodb_task,
                                       mongodb_executor)),
        TSocket.TServerSocket(config["listen"]["address"],
                              int(config["listen"]["port"])),
        TTransport.TBufferedTransportFactory(),
        TBinaryProtocol.TBinaryProtocolFactory())
    try:
        logger.info("Starting rpc server.")
        server.serve()
    except KeyboardInterrupt:
        logger.info(
            "Received SIGINT. Cancelling judicator registration on etcd.",
            exc_info=True)
        # Wait for the register thread to delete registration and then stop
        working = False
        register_thread.join()
    except:
        logger.error("Accidentally terminated.", exc_info=True)

    working = False
    logger.info("%s main program exiting." % module_name)
    return
Ejemplo n.º 10
0
def run(module_name="Executor",
        etcd_conf_path="config/etcd.json",
        main_conf_path="config/main.json"):
    """
    Load config and run executor main program
    :param module_name: Name of the caller module
    :param etcd_conf_path: Path to etcd config file
    :param main_conf_path: Path to main config file
    :return: None
    """
    global tasks, lock

    # Load configuration
    with open(main_conf_path, "r") as f:
        config = json.load(f)
    retry_times = config["retry"]["times"]
    retry_interval = config["retry"]["interval"]

    # Generate logger
    if "log" in config:
        logger = get_logger("main", config["log"]["info"],
                            config["log"]["error"])
    else:
        logger = get_logger("main", None, None)
    logger.info("%s main program started." % module_name)

    # Generate proxy for local etcd
    with open(etcd_conf_path, "r") as f:
        local_etcd = generate_local_etcd_proxy(json.load(f)["etcd"], logger)

    # Check whether the data dir of main is empty
    # If not, delete it and create a new one
    if not check_empty_dir(config["data_dir"]):
        shutil.rmtree(config["data_dir"])
        os.mkdir(config["data_dir"])
        logger.info("Previous data directory deleted with a new one created.")
    os.chmod(config["data_dir"], 0o700)
    logger.info("Data directory privilege changed to 0700.")

    # If task user id and group id is not specified, use the current user and group
    if "user" not in config["task"]:
        config["task"]["user"] = {"uid": os.getuid(), "gid": os.getgid()}
    logger.info("Task execution uid: %d, gid: %d." %
                (config["task"]["user"]["uid"], config["task"]["user"]["gid"]))

    # Report tasks execution status regularly
    logger.info("Starting executor routines.")
    try:
        while True:
            time.sleep(config["report_interval"])

            # Collect things to report
            logger.info("Collecting report content.")
            complete, executing = [], []
            vacant = config["task"]["vacant"]
            # Acquire lock first before modifying global variable
            with lock:
                try:
                    for t in tasks:
                        if not tasks[t]["cancel"]:
                            if tasks[t]["thread"] and not tasks[t][
                                    "thread"].is_alive():
                                complete.append(
                                    generate(tasks[t], False, False, False))
                                logger.info("Task %s added to complete list." %
                                            t)
                            else:
                                executing.append(generate(tasks[t], True))
                                vacant -= 1
                                logger.info(
                                    "Task %s added to executing list." % t)
                except KeyboardInterrupt:
                    raise
                except:
                    logger.error("Failed to collect report content.",
                                 exc_info=True)

            # Try to report to judicator and get response
            logger.info("Executor current vacancy: %d." % vacant)
            logger.info("Reporting to judicator.")
            success, res = try_with_times(retry_times, retry_interval, False,
                                          logger, "report to judicator",
                                          report, complete, executing, vacant,
                                          local_etcd, config, logger)
            if not success:
                logger.error(
                    "Failed to report to judicator. Skipping tasks update.")
                continue
            cancel, assign = [extract(x, brief=True)
                              for x in res[0]], [extract(x) for x in res[1]]
            logger.info("Reported to judicator with response:")
            logger.info("Cancel list: %s." % str([t["id"] for t in cancel]))
            logger.info("Assign list: %s." % str([t["id"] for t in assign]))

            # Update tasks information
            logger.info("Updating tasks information.")
            # Acquire lock first before modifying global variable
            with lock:
                try:
                    # Cancel tasks
                    logger.info("Checking tasks to be cancelled.")
                    for t in cancel:
                        logger.info("Cancelling task %s." % t["id"])
                        if not t["id"] in tasks:
                            continue
                        tasks[t["id"]]["cancel"] = True
                        # If the subprocess is still running, kill it
                        if tasks[t["id"]]["process"] and tasks[
                                t["id"]]["process"].poll() is None:
                            logger.info("Killing subprocess of task %s." %
                                        t["id"])
                            tasks[t["id"]]["process"].kill()
                        else:
                            logger.info("No subprocess to kill for task %s." %
                                        t["id"])

                    # Clean all tasks
                    # A list ot tasks index must be built beforehand, as the tasks is going to change
                    tasks_list = tuple(tasks.keys())
                    logger.info("Checking tasks to be deleted.")
                    for t in tasks_list:
                        if tasks[t]["thread"] and not tasks[t][
                                "thread"].is_alive():
                            # A task is can only be considered as all done (thus can be deleted)
                            # when thread.is_alive() is False (indicating the daemon thread has finished)
                            # and cancel (indicating the judicator has received the result) is True
                            if tasks[t]["cancel"]:
                                del tasks[t]
                                logger.info("Deleted task %s." % t)

                    # Handle newly assigned
                    logger.info("Checking tasks to be assigned.")
                    for t in assign:
                        logger.info("Assigned task %s." % t["id"])

                        tasks[t["id"]] = t
                        tasks[t["id"]]["process"] = None
                        tasks[t["id"]]["cancel"] = False

                        # Generate a thread and start it
                        tasks[t["id"]]["thread"] = threading.Thread(
                            target=execute, args=(t["id"], config, logger))
                        tasks[t["id"]]["thread"].setDaemon(True)
                        tasks[t["id"]]["thread"].start()
                except KeyboardInterrupt:
                    raise
                except:
                    logger.error("Failed to update tasks.", exc_info=True)

            logger.info("Finished executor routine work.")

    except KeyboardInterrupt:
        logger.info("Received SIGINT. Cleaning up all subprocess.",
                    exc_info=True)

    # Clean up and kill all subprocess
    # Acquire the lock until exit to ensure that no more subprocess are generated
    lock.acquire()
    for t in tasks:
        if tasks[t]["process"]:
            tasks[t]["process"].kill()
            tasks[t]["process"].wait()
            logger.info("Killed subprocess of task %s." % t)
        else:
            logger.info("Task %s has no subprocess running." % t)

    logger.info("%s main program exiting." % module_name)
    return