Example #1
0
 def __init_coordination(self, coordination_url):        
     if(coordination_url.startswith("advert://") or coordination_url.startswith("sqlasyncadvert://")):
         try:
             from coordination.bigjob_coordination_advert import bigjob_coordination
             logger.debug("Utilizing ADVERT Backend")
         except:
             logger.error("Advert Backend could not be loaded")
     elif (coordination_url.startswith("redis://")):
         try:
             from coordination.bigjob_coordination_redis import bigjob_coordination      
             logger.debug("Utilizing Redis Backend")
         except:
             logger.error("Error loading pyredis.")
     elif (coordination_url.startswith("tcp://")):
         try:
             from coordination.bigjob_coordination_zmq import bigjob_coordination
             logger.debug("Utilizing ZMQ Backend")
         except:
             logger.error("ZMQ Backend not found. Please install ZeroMQ (http://www.zeromq.org/intro:get-the-software) and " 
                   +"PYZMQ (http://zeromq.github.com/pyzmq/)")
     else:
         logger.error("No suitable coordination backend found.")
     
     logger.debug("Parsing URL: " + coordination_url)
     scheme, username, password, host, port, dbtype  = self.__parse_url(coordination_url) 
     
     if port == -1:
         port = None
     coordination = bigjob_coordination(server=host, server_port=port, username=username, 
                                        password=password, dbtype=dbtype, url_prefix=scheme)
     return coordination
Example #2
0
    def __init_coordination(self, coordination_url):

        bigjob_coordination = None
        if (coordination_url.startswith("advert://")
                or coordination_url.startswith("sqlasyncadvert://")):
            try:
                from coordination.bigjob_coordination_advert import bigjob_coordination
                logger.debug("Utilizing ADVERT Backend")
            except:
                logger.error("Advert Backend could not be loaded")
        elif (coordination_url.startswith("redis://")):
            try:
                from coordination.bigjob_coordination_redis import bigjob_coordination
                logger.debug("Utilizing Redis Backend")
            except:
                logger.error("Error loading pyredis.")
                self.__print_traceback()
        elif (coordination_url.startswith("tcp://")):
            try:
                from coordination.bigjob_coordination_zmq import bigjob_coordination
                logger.debug("Utilizing ZMQ Backend")
            except:
                logger.error(
                    "ZMQ Backend not found. Please install ZeroMQ (http://www.zeromq.org/intro:get-the-software) and "
                    + "PYZMQ (http://zeromq.github.com/pyzmq/)")
        else:
            logger.error("No suitable coordination backend found.")

        # check whether coordination subsystem could be initialized
        if bigjob_coordination == None:
            raise BigJobError(
                "Could not initialize coordination subsystem (Redis)")

        logger.debug("Parsing URL: " + coordination_url)
        scheme, username, password, host, port, dbtype = self.__parse_url(
            coordination_url)

        if port == -1:
            port = None
        coordination = bigjob_coordination(server=host,
                                           server_port=port,
                                           username=username,
                                           password=password,
                                           dbtype=dbtype,
                                           url_prefix=scheme)
        return coordination
    def __init__(self, args):

        self.coordination_url = args[1]
        # objects to store running jobs and processes
        self.jobs = []
        self.processes = {}
        self.freenodes = []
        self.busynodes = []
        self.restarted = {}

        # read config file
        # conf_file = os.path.dirname(args[0]) + "/" + CONFIG_FILE
        # conf_file = os.path.dirname(os.path.abspath( __file__ )) + "/" + CONFIG_FILE
        conf_file = os.path.dirname(os.path.abspath(__file__)) + "/../" + CONFIG_FILE
        config = ConfigParser.ConfigParser()
        logging.debug("read configfile: " + conf_file)
        config.read(conf_file)
        default_dict = config.defaults()
        self.CPR = default_dict["cpr"]
        self.SHELL = default_dict["shell"]
        self.MPIRUN = default_dict["mpirun"]
        logging.debug("cpr: " + self.CPR + " mpi: " + self.MPIRUN + " shell: " + self.SHELL)

        # init rms (SGE/PBS)
        self.init_rms()

        self.failed_polls = 0

        ##############################################################################
        # initialization of coordination and communication subsystem
        # Redis initialization
        self.base_url = args[2]
        logging.debug("BigJob Agent arguments: " + str(args))
        logging.debug("Initialize C&C subsystem to pilot-url: " + self.base_url)

        if self.coordination_url.startswith("advert://"):
            try:
                from coordination.bigjob_coordination_advert import bigjob_coordination

                logging.debug("Utilizing ADVERT Backend: " + self.coordination_url)
            except:
                logging.error("Advert Backend could not be loaded")
        elif self.coordination_url.startswith("redis://"):
            try:
                from coordination.bigjob_coordination_redis import bigjob_coordination

                logging.debug("Utilizing Redis Backend: " + self.coordination_url + ".")
            except:
                logger.error("Error loading pyredis. Please verify Redis is configured properly.")
        elif self.coordination_url.startswith("tcp://"):
            try:
                from coordination.bigjob_coordination_zmq import bigjob_coordination

                logging.debug("Utilizing ZMQ Backend")
            except:
                logging.error(
                    "ZMQ Backend not found. Please install ZeroMQ (http://www.zeromq.org/intro:get-the-software) and "
                    + "PYZMQ (http://zeromq.github.com/pyzmq/)"
                )

        self.coordination = bigjob_coordination(server_connect_url=self.coordination_url)

        # update state of pilot job to running
        self.coordination.set_pilot_state(self.base_url, str(bigjob.state.Running), False)

        ##############################################################################
        # start background thread for polling new jobs and monitoring current jobs
        self.resource_lock = threading.RLock()
        self.threadpool = ThreadPool(THREAD_POOL_SIZE)

        self.launcher_thread = threading.Thread(target=self.dequeue_new_jobs)
        self.launcher_thread.start()

        self.monitoring_thread = threading.Thread(target=self.start_background_thread)
        self.monitoring_thread.start()
Example #4
0
    def __init__(self, args):

        self.coordination_url = args[1]
        # objects to store running jobs and processes
        self.jobs = []
        self.processes = {}
        self.freenodes = []
        self.busynodes = []
        self.restarted = {}

        # read config file
        conf_file = os.path.dirname(
            os.path.abspath(__file__)) + "/../" + CONFIG_FILE
        if not os.path.exists(conf_file):
            conf_file = os.path.join(sys.prefix, CONFIG_FILE)
        logging.debug("read configfile: " + conf_file)
        config = ConfigParser.ConfigParser()
        config.read(conf_file)
        default_dict = config.defaults()
        self.CPR = False
        if default_dict.has_key("cpr"):
            self.CPR = default_dict["cpr"]
        self.SHELL = "/bin/bash"
        if default_dict.has_key("shell"):
            self.SHELL = default_dict["shell"]
        self.MPIRUN = "mpirun"
        # On TACC resources the default MPICH is
        # linked under mpirun_rsh
        if default_dict.has_key("mpirun"):
            self.MPIRUN = default_dict["mpirun"]

        if default_dict.has_key("number_executor_threads"):
            THREAD_POOL_SIZE = int(default_dict["number_executor_threads"])

        self.OUTPUT_TAR = False
        if default_dict.has_key("create_output_tar"):
            self.OUTPUT_TAR = eval(default_dict["create_output_tar"])
            logger.debug("Create output tar: %r", self.OUTPUT_TAR)

        self.failed_polls = 0

        ##############################################################################
        # initialization of coordination and communication subsystem
        # Redis initialization
        self.base_url = args[2]
        self.cds_queue_url = None
        if len(args) == 4:
            self.cds_queue_url = args[3]
        logger.debug("External queue: " + str(self.cds_queue_url))
        self.id = self.__get_bj_id(self.base_url)
        logger.debug("BigJob Agent arguments: " + str(args))
        logger.debug("Initialize C&C subsystem to pilot-url: " + self.base_url)
        logger.debug("BigJob ID: %s" % self.id)

        # create bj directory
        self.work_dir = os.getcwd()
        if self.work_dir.find(
                self.id) == -1:  # working directory already contains BJ id
            self.bj_dir = os.path.join(os.getcwd(), self.id)
            logger.debug("Agent working directory: %s" % self.bj_dir)
            try:
                os.makedirs(self.bj_dir)
            except:
                logger.debug("Directory already exists.")
        else:
            self.bj_dir = os.getcwd()

        os.chdir(self.bj_dir)

        if (self.coordination_url.startswith("advert://")
                or self.coordination_url.startswith("sqlasyncadvert://")):
            try:
                from coordination.bigjob_coordination_advert import bigjob_coordination
                logging.debug("Utilizing ADVERT Backend: " +
                              self.coordination_url)
            except:
                logger.error("Advert Backend could not be loaded")
                exc_type, exc_value, exc_traceback = sys.exc_info()
                traceback.print_exc(file=sys.stderr)
                traceback.print_tb(exc_traceback, file=sys.stderr)
        elif (self.coordination_url.startswith("redis://")):
            try:
                from coordination.bigjob_coordination_redis import bigjob_coordination
                logger.debug("Utilizing Redis Backend: " +
                             self.coordination_url + ".")
            except:
                logger.error(
                    "Error loading pyredis. Check configuration in bigjob_coordination_redis.py."
                )
        elif (self.coordination_url.startswith("tcp://")):
            try:
                from coordination.bigjob_coordination_zmq import bigjob_coordination
                logger.debug("Utilizing ZMQ Backend")
            except:
                logger.error(
                    "ZMQ Backend not found. Please install ZeroMQ (http://www.zeromq.org/intro:get-the-software) and "
                    + "PYZMQ (http://zeromq.github.com/pyzmq/)")

        ###
        # Initiate coordination sub-system of both BJ agent and Pilot Data
        self.coordination = bigjob_coordination(
            server_connect_url=self.coordination_url)
        try:
            # initialize coordination subsystem of pilot data
            self.pilot_data_service = PilotDataService(
                coordination_url=self.coordination_url)
        except:
            logger.warn("Pilot-Data could not be initialized.")

        # update state of pilot job to running
        logger.debug("set state to : " + str(bigjob.state.Running))
        self.coordination.set_pilot_state(self.base_url,
                                          str(bigjob.state.Running), False)
        self.pilot_description = self.coordination.get_pilot_description(
            self.base_url)
        try:
            self.pilot_description = ast.literal_eval(self.pilot_description)
        except:
            logger.warn("Unable to parse pilot description")
            self.pilot_description = None

        ############################################################################
        # Detect launch method
        self.LAUNCH_METHOD = "ssh"
        if default_dict.has_key("launch_method"):
            self.LAUNCH_METHOD = default_dict["launch_method"]

        self.LAUNCH_METHOD = self.__get_launch_method(self.LAUNCH_METHOD)

        logging.debug("Launch Method: " + self.LAUNCH_METHOD + " mpi: " +
                      self.MPIRUN + " shell: " + self.SHELL)

        # init rms (SGE/PBS)
        self.init_rms()

        ##############################################################################
        # start background thread for polling new jobs and monitoring current jobs
        # check whether user requested a certain threadpool size
        if self.pilot_description != None and self.pilot_description.has_key(
                "number_executor_threads"):
            THREAD_POOL_SIZE = int(
                self.pilot_description["number_executor_threads"])
        logger.debug("Creating executor thread pool of size: %d" %
                     (THREAD_POOL_SIZE))
        self.resource_lock = threading.RLock()
        self.threadpool = ThreadPool(THREAD_POOL_SIZE)

        self.launcher_thread = threading.Thread(target=self.dequeue_new_jobs)
        self.launcher_thread.start()

        self.monitoring_thread = threading.Thread(
            target=self.start_background_thread)
        self.monitoring_thread.start()
Example #5
0
    def __init__(self, args):

        self.coordination_url = args[1]
        # objects to store running jobs and processes
        self.jobs = []
        self.processes = {}
        self.freenodes = []
        self.busynodes = []
        self.restarted = {}

        # read config file
        # conf_file = os.path.dirname(args[0]) + "/" + CONFIG_FILE
        # conf_file = os.path.dirname(os.path.abspath( __file__ )) + "/" + CONFIG_FILE
        conf_file = os.path.dirname(
            os.path.abspath(__file__)) + "/../" + CONFIG_FILE
        config = ConfigParser.ConfigParser()
        logging.debug("read configfile: " + conf_file)
        config.read(conf_file)
        default_dict = config.defaults()
        self.CPR = default_dict["cpr"]
        self.SHELL = default_dict["shell"]
        self.MPIRUN = default_dict["mpirun"]
        logging.debug("cpr: " + self.CPR + " mpi: " + self.MPIRUN +
                      " shell: " + self.SHELL)

        # init rms (SGE/PBS)
        self.init_rms()

        self.failed_polls = 0

        ##############################################################################
        # initialization of coordination and communication subsystem
        # Redis initialization
        self.base_url = args[2]
        logging.debug("BigJob Agent arguments: " + str(args))
        logging.debug("Initialize C&C subsystem to pilot-url: " +
                      self.base_url)

        if (self.coordination_url.startswith("advert://")):
            try:
                from coordination.bigjob_coordination_advert import bigjob_coordination
                logging.debug("Utilizing ADVERT Backend: " +
                              self.coordination_url)
            except:
                logging.error("Advert Backend could not be loaded")
        elif (self.coordination_url.startswith("redis://")):
            try:
                from coordination.bigjob_coordination_redis import bigjob_coordination
                logging.debug(
                    "Utilizing Redis Backend: " + self.coordination_url +
                    ". Please make sure Redis server is configured in bigjob_coordination_redis.py"
                )
            except:
                logging.error("Error loading pyredis.")
        elif (self.coordination_url.startswith("tcp://")):
            try:
                from coordination.bigjob_coordination_zmq import bigjob_coordination
                logging.debug("Utilizing ZMQ Backend")
            except:
                logging.error(
                    "ZMQ Backend not found. Please install ZeroMQ (http://www.zeromq.org/intro:get-the-software) and "
                    + "PYZMQ (http://zeromq.github.com/pyzmq/)")

        self.coordination = bigjob_coordination(
            server_connect_url=self.coordination_url)

        # update state of pilot job to running
        self.coordination.set_pilot_state(self.base_url,
                                          str(bigjob.state.Running), False)

        ##############################################################################
        # start background thread for polling new jobs and monitoring current jobs
        self.resource_lock = threading.RLock()
        self.threadpool = ThreadPool(THREAD_POOL_SIZE)

        self.launcher_thread = threading.Thread(target=self.dequeue_new_jobs)
        self.launcher_thread.start()

        self.monitoring_thread = threading.Thread(
            target=self.start_background_thread)
        self.monitoring_thread.start()
Example #6
0
    def __init__(self, args):
        
        self.coordination_url = args[1]
        # objects to store running jobs and processes
        self.jobs = []
        self.processes = {}
        self.freenodes = []
        self.busynodes = []
        self.restarted = {}

        # read config file
        conf_file = os.path.dirname(os.path.abspath( __file__ )) + "/../" + CONFIG_FILE
        if not os.path.exists(conf_file):
            conf_file = os.path.join(sys.prefix, CONFIG_FILE)
        logging.debug ("read configfile: " + conf_file)
        config = ConfigParser.ConfigParser()
        config.read(conf_file)
        default_dict = config.defaults()        
        self.CPR=False
        if default_dict.has_key("cpr"):
            self.CPR = default_dict["cpr"]
        self.SHELL="/bin/bash"
        if default_dict.has_key("shell"):
            self.SHELL=default_dict["shell"]
        self.MPIRUN="mpirun"
        # On TACC resources the default MPICH is 
        # linked under mpirun_rsh
        if default_dict.has_key("mpirun"):
            self.MPIRUN=default_dict["mpirun"]
        self.OUTPUT_TAR=False
        if default_dict.has_key("create_output_tar"):
            self.OUTPUT_TAR=eval(default_dict["create_output_tar"])
            logger.debug("Create output tar: %r", self.OUTPUT_TAR)
        
        self.LAUNCH_METHOD="ssh"                    
        if default_dict.has_key("launch_method"):
            self.LAUNCH_METHOD=self.__get_launch_method(default_dict["launch_method"])
        
        logging.debug("Launch Method: " + self.LAUNCH_METHOD + " mpi: " + self.MPIRUN + " shell: " + self.SHELL)
        
        # init rms (SGE/PBS)
        self.init_rms()
        self.failed_polls = 0
        
        ##############################################################################
        # initialization of coordination and communication subsystem
        # Redis initialization
        self.base_url = args[2]
        self.cds_queue_url = None
        if len(args)==4:
            self.cds_queue_url = args[3]
        logger.debug("External queue: " + str(self.cds_queue_url))
        self.id = self.__get_bj_id(self.base_url)
        logger.debug("BigJob Agent arguments: " + str(args))
        logger.debug("Initialize C&C subsystem to pilot-url: " + self.base_url)
        logger.debug("BigJob ID: %s"%self.id)
        
        # create bj directory
        self.work_dir = os.getcwd()
        if self.work_dir.find(self.id)==-1: # working directory already contains BJ id
            self.bj_dir = os.path.join(os.getcwd(), self.id)
            logger.debug("Agent working directory: %s"%self.bj_dir)
            try:
                os.makedirs(self.bj_dir)
            except:
                logger.debug("Directory already exists.")
        else:
            self.bj_dir = os.getcwd()
        
        os.chdir(self.bj_dir)
        
        if(self.coordination_url.startswith("advert://") or self.coordination_url.startswith("sqlasyncadvert://")):
            try:
                from coordination.bigjob_coordination_advert import bigjob_coordination
                logging.debug("Utilizing ADVERT Backend: " + self.coordination_url)
            except:
                logger.error("Advert Backend could not be loaded")
                exc_type, exc_value, exc_traceback = sys.exc_info()
                traceback.print_exc(file=sys.stderr)
                traceback.print_tb(exc_traceback, file=sys.stderr)
        elif (self.coordination_url.startswith("redis://")):
            try:
                from coordination.bigjob_coordination_redis import bigjob_coordination      
                logger.debug("Utilizing Redis Backend: " + self.coordination_url + ". Please make sure Redis server is configured in bigjob_coordination_redis.py")
            except:
                logger.error("Error loading pyredis.")
        elif (self.coordination_url.startswith("tcp://")):
            try:
                from coordination.bigjob_coordination_zmq import bigjob_coordination
                logger.debug("Utilizing ZMQ Backend")
            except:
                logger.error("ZMQ Backend not found. Please install ZeroMQ (http://www.zeromq.org/intro:get-the-software) and " 
                      +"PYZMQ (http://zeromq.github.com/pyzmq/)")

        ###
        # Initiate coordination sub-system of both BJ agent and Pilot Data
        self.coordination = bigjob_coordination(server_connect_url=self.coordination_url)
        try:
            # initialize coordination subsystem of pilot data
            self.pilot_data_service = PilotDataService(coordination_url=self.coordination_url)
        except:
            logger.warn("Pilot-Data could not be initialized.")
            
        # update state of pilot job to running
        logger.debug("set state to : " +  str(bigjob.state.Running))
        self.coordination.set_pilot_state(self.base_url, str(bigjob.state.Running), False)
        self.pilot_description = self.coordination.get_pilot_description(self.base_url)
        
        ##############################################################################
        # start background thread for polling new jobs and monitoring current jobs
        self.resource_lock=threading.RLock()
        self.threadpool = ThreadPool(THREAD_POOL_SIZE)
        
        self.launcher_thread=threading.Thread(target=self.dequeue_new_jobs)
        self.launcher_thread.start()
        
        self.monitoring_thread=threading.Thread(target=self.start_background_thread)
        self.monitoring_thread.start()
Example #7
0
    def __init__(self, args):
        
        self.coordination_url = args[1]
        # objects to store running jobs and processes
        self.jobs = []
        self.processes = {}
        self.freenodes = []
        self.busynodes = []
        self.restarted = {}

        # read config file
        # conf_file = os.path.dirname(args[0]) + "/" + CONFIG_FILE
        # conf_file = os.path.dirname(os.path.abspath( __file__ )) + "/" + CONFIG_FILE
        conf_file = os.path.dirname(os.path.abspath( __file__ )) + "/../" + CONFIG_FILE
        config = ConfigParser.ConfigParser()
        logging.debug ("read configfile: " + conf_file)
        config.read(conf_file)
        default_dict = config.defaults()        
        self.CPR = default_dict["cpr"]
        self.SHELL=default_dict["shell"]
        self.MPIRUN=default_dict["mpirun"]
        self.LAUNCH_METHOD=self.__get_launch_method(default_dict["launch_method"])
        
        logging.debug("Launch Method: " + self.LAUNCH_METHOD + " mpi: " + self.MPIRUN + " shell: " + self.SHELL)
        
        # init rms (SGE/PBS)
        self.init_rms()
        self.failed_polls = 0
        
        ##############################################################################
        # initialization of coordination and communication subsystem
        # Redis initialization
        self.base_url = args[2]
        self.id = self.__get_bj_id(self.base_url)
        logger.debug("BigJob Agent arguments: " + str(args))
        logger.debug("Initialize C&C subsystem to pilot-url: " + self.base_url)
        logger.debug("BigJob ID: %s"%self.id)
        
        # create bj directory
        self.bj_dir = os.path.join(os.getcwd(), self.id)
        try:
            os.makedirs(self.bj_dir)
        except:
            logger.debug("Directory already exists.")
        
        os.chdir(self.bj_dir)
        
        if(self.coordination_url.startswith("advert://") or self.coordination_url.startswith("sqlasyncadvert://")):
            try:
                from coordination.bigjob_coordination_advert import bigjob_coordination
                logging.debug("Utilizing ADVERT Backend: " + self.coordination_url)
            except:
                logger.error("Advert Backend could not be loaded")
                exc_type, exc_value, exc_traceback = sys.exc_info()
                traceback.print_exc(file=sys.stderr)
                traceback.print_tb(exc_traceback, file=sys.stderr)
        elif (self.coordination_url.startswith("redis://")):
            try:
                from coordination.bigjob_coordination_redis import bigjob_coordination      
                logger.debug("Utilizing Redis Backend: " + self.coordination_url + ". Please make sure Redis server is configured in bigjob_coordination_redis.py")
            except:
                logger.error("Error loading pyredis.")
        elif (self.coordination_url.startswith("tcp://")):
            try:
                from coordination.bigjob_coordination_zmq import bigjob_coordination
                logger.debug("Utilizing ZMQ Backend")
            except:
                logger.error("ZMQ Backend not found. Please install ZeroMQ (http://www.zeromq.org/intro:get-the-software) and " 
                      +"PYZMQ (http://zeromq.github.com/pyzmq/)")

        self.coordination = bigjob_coordination(server_connect_url=self.coordination_url)
    
        # update state of pilot job to running
        self.coordination.set_pilot_state(self.base_url, str(bigjob.state.Running), False)

        
        ##############################################################################
        # start background thread for polling new jobs and monitoring current jobs
        self.resource_lock=threading.RLock()
        self.threadpool = ThreadPool(THREAD_POOL_SIZE)
        
        self.launcher_thread=threading.Thread(target=self.dequeue_new_jobs)
        self.launcher_thread.start()
        
        self.monitoring_thread=threading.Thread(target=self.start_background_thread)
        self.monitoring_thread.start()