def __init_coordination(self, coordination_url): if(coordination_url.startswith("advert://") or coordination_url.startswith("sqlasyncadvert://")): try: from coordination.bigjob_coordination_advert import bigjob_coordination logger.debug("Utilizing ADVERT Backend") except: logger.error("Advert Backend could not be loaded") elif (coordination_url.startswith("redis://")): try: from coordination.bigjob_coordination_redis import bigjob_coordination logger.debug("Utilizing Redis Backend") except: logger.error("Error loading pyredis.") elif (coordination_url.startswith("tcp://")): try: from coordination.bigjob_coordination_zmq import bigjob_coordination logger.debug("Utilizing ZMQ Backend") except: logger.error("ZMQ Backend not found. Please install ZeroMQ (http://www.zeromq.org/intro:get-the-software) and " +"PYZMQ (http://zeromq.github.com/pyzmq/)") else: logger.error("No suitable coordination backend found.") logger.debug("Parsing URL: " + coordination_url) scheme, username, password, host, port, dbtype = self.__parse_url(coordination_url) if port == -1: port = None coordination = bigjob_coordination(server=host, server_port=port, username=username, password=password, dbtype=dbtype, url_prefix=scheme) return coordination
def __init_coordination(self, coordination_url): bigjob_coordination = None if (coordination_url.startswith("advert://") or coordination_url.startswith("sqlasyncadvert://")): try: from coordination.bigjob_coordination_advert import bigjob_coordination logger.debug("Utilizing ADVERT Backend") except: logger.error("Advert Backend could not be loaded") elif (coordination_url.startswith("redis://")): try: from coordination.bigjob_coordination_redis import bigjob_coordination logger.debug("Utilizing Redis Backend") except: logger.error("Error loading pyredis.") self.__print_traceback() elif (coordination_url.startswith("tcp://")): try: from coordination.bigjob_coordination_zmq import bigjob_coordination logger.debug("Utilizing ZMQ Backend") except: logger.error( "ZMQ Backend not found. Please install ZeroMQ (http://www.zeromq.org/intro:get-the-software) and " + "PYZMQ (http://zeromq.github.com/pyzmq/)") else: logger.error("No suitable coordination backend found.") # check whether coordination subsystem could be initialized if bigjob_coordination == None: raise BigJobError( "Could not initialize coordination subsystem (Redis)") logger.debug("Parsing URL: " + coordination_url) scheme, username, password, host, port, dbtype = self.__parse_url( coordination_url) if port == -1: port = None coordination = bigjob_coordination(server=host, server_port=port, username=username, password=password, dbtype=dbtype, url_prefix=scheme) return coordination
def __init__(self, args): self.coordination_url = args[1] # objects to store running jobs and processes self.jobs = [] self.processes = {} self.freenodes = [] self.busynodes = [] self.restarted = {} # read config file # conf_file = os.path.dirname(args[0]) + "/" + CONFIG_FILE # conf_file = os.path.dirname(os.path.abspath( __file__ )) + "/" + CONFIG_FILE conf_file = os.path.dirname(os.path.abspath(__file__)) + "/../" + CONFIG_FILE config = ConfigParser.ConfigParser() logging.debug("read configfile: " + conf_file) config.read(conf_file) default_dict = config.defaults() self.CPR = default_dict["cpr"] self.SHELL = default_dict["shell"] self.MPIRUN = default_dict["mpirun"] logging.debug("cpr: " + self.CPR + " mpi: " + self.MPIRUN + " shell: " + self.SHELL) # init rms (SGE/PBS) self.init_rms() self.failed_polls = 0 ############################################################################## # initialization of coordination and communication subsystem # Redis initialization self.base_url = args[2] logging.debug("BigJob Agent arguments: " + str(args)) logging.debug("Initialize C&C subsystem to pilot-url: " + self.base_url) if self.coordination_url.startswith("advert://"): try: from coordination.bigjob_coordination_advert import bigjob_coordination logging.debug("Utilizing ADVERT Backend: " + self.coordination_url) except: logging.error("Advert Backend could not be loaded") elif self.coordination_url.startswith("redis://"): try: from coordination.bigjob_coordination_redis import bigjob_coordination logging.debug("Utilizing Redis Backend: " + self.coordination_url + ".") except: logger.error("Error loading pyredis. Please verify Redis is configured properly.") elif self.coordination_url.startswith("tcp://"): try: from coordination.bigjob_coordination_zmq import bigjob_coordination logging.debug("Utilizing ZMQ Backend") except: logging.error( "ZMQ Backend not found. Please install ZeroMQ (http://www.zeromq.org/intro:get-the-software) and " + "PYZMQ (http://zeromq.github.com/pyzmq/)" ) self.coordination = bigjob_coordination(server_connect_url=self.coordination_url) # update state of pilot job to running self.coordination.set_pilot_state(self.base_url, str(bigjob.state.Running), False) ############################################################################## # start background thread for polling new jobs and monitoring current jobs self.resource_lock = threading.RLock() self.threadpool = ThreadPool(THREAD_POOL_SIZE) self.launcher_thread = threading.Thread(target=self.dequeue_new_jobs) self.launcher_thread.start() self.monitoring_thread = threading.Thread(target=self.start_background_thread) self.monitoring_thread.start()
def __init__(self, args): self.coordination_url = args[1] # objects to store running jobs and processes self.jobs = [] self.processes = {} self.freenodes = [] self.busynodes = [] self.restarted = {} # read config file conf_file = os.path.dirname( os.path.abspath(__file__)) + "/../" + CONFIG_FILE if not os.path.exists(conf_file): conf_file = os.path.join(sys.prefix, CONFIG_FILE) logging.debug("read configfile: " + conf_file) config = ConfigParser.ConfigParser() config.read(conf_file) default_dict = config.defaults() self.CPR = False if default_dict.has_key("cpr"): self.CPR = default_dict["cpr"] self.SHELL = "/bin/bash" if default_dict.has_key("shell"): self.SHELL = default_dict["shell"] self.MPIRUN = "mpirun" # On TACC resources the default MPICH is # linked under mpirun_rsh if default_dict.has_key("mpirun"): self.MPIRUN = default_dict["mpirun"] if default_dict.has_key("number_executor_threads"): THREAD_POOL_SIZE = int(default_dict["number_executor_threads"]) self.OUTPUT_TAR = False if default_dict.has_key("create_output_tar"): self.OUTPUT_TAR = eval(default_dict["create_output_tar"]) logger.debug("Create output tar: %r", self.OUTPUT_TAR) self.failed_polls = 0 ############################################################################## # initialization of coordination and communication subsystem # Redis initialization self.base_url = args[2] self.cds_queue_url = None if len(args) == 4: self.cds_queue_url = args[3] logger.debug("External queue: " + str(self.cds_queue_url)) self.id = self.__get_bj_id(self.base_url) logger.debug("BigJob Agent arguments: " + str(args)) logger.debug("Initialize C&C subsystem to pilot-url: " + self.base_url) logger.debug("BigJob ID: %s" % self.id) # create bj directory self.work_dir = os.getcwd() if self.work_dir.find( self.id) == -1: # working directory already contains BJ id self.bj_dir = os.path.join(os.getcwd(), self.id) logger.debug("Agent working directory: %s" % self.bj_dir) try: os.makedirs(self.bj_dir) except: logger.debug("Directory already exists.") else: self.bj_dir = os.getcwd() os.chdir(self.bj_dir) if (self.coordination_url.startswith("advert://") or self.coordination_url.startswith("sqlasyncadvert://")): try: from coordination.bigjob_coordination_advert import bigjob_coordination logging.debug("Utilizing ADVERT Backend: " + self.coordination_url) except: logger.error("Advert Backend could not be loaded") exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exc(file=sys.stderr) traceback.print_tb(exc_traceback, file=sys.stderr) elif (self.coordination_url.startswith("redis://")): try: from coordination.bigjob_coordination_redis import bigjob_coordination logger.debug("Utilizing Redis Backend: " + self.coordination_url + ".") except: logger.error( "Error loading pyredis. Check configuration in bigjob_coordination_redis.py." ) elif (self.coordination_url.startswith("tcp://")): try: from coordination.bigjob_coordination_zmq import bigjob_coordination logger.debug("Utilizing ZMQ Backend") except: logger.error( "ZMQ Backend not found. Please install ZeroMQ (http://www.zeromq.org/intro:get-the-software) and " + "PYZMQ (http://zeromq.github.com/pyzmq/)") ### # Initiate coordination sub-system of both BJ agent and Pilot Data self.coordination = bigjob_coordination( server_connect_url=self.coordination_url) try: # initialize coordination subsystem of pilot data self.pilot_data_service = PilotDataService( coordination_url=self.coordination_url) except: logger.warn("Pilot-Data could not be initialized.") # update state of pilot job to running logger.debug("set state to : " + str(bigjob.state.Running)) self.coordination.set_pilot_state(self.base_url, str(bigjob.state.Running), False) self.pilot_description = self.coordination.get_pilot_description( self.base_url) try: self.pilot_description = ast.literal_eval(self.pilot_description) except: logger.warn("Unable to parse pilot description") self.pilot_description = None ############################################################################ # Detect launch method self.LAUNCH_METHOD = "ssh" if default_dict.has_key("launch_method"): self.LAUNCH_METHOD = default_dict["launch_method"] self.LAUNCH_METHOD = self.__get_launch_method(self.LAUNCH_METHOD) logging.debug("Launch Method: " + self.LAUNCH_METHOD + " mpi: " + self.MPIRUN + " shell: " + self.SHELL) # init rms (SGE/PBS) self.init_rms() ############################################################################## # start background thread for polling new jobs and monitoring current jobs # check whether user requested a certain threadpool size if self.pilot_description != None and self.pilot_description.has_key( "number_executor_threads"): THREAD_POOL_SIZE = int( self.pilot_description["number_executor_threads"]) logger.debug("Creating executor thread pool of size: %d" % (THREAD_POOL_SIZE)) self.resource_lock = threading.RLock() self.threadpool = ThreadPool(THREAD_POOL_SIZE) self.launcher_thread = threading.Thread(target=self.dequeue_new_jobs) self.launcher_thread.start() self.monitoring_thread = threading.Thread( target=self.start_background_thread) self.monitoring_thread.start()
def __init__(self, args): self.coordination_url = args[1] # objects to store running jobs and processes self.jobs = [] self.processes = {} self.freenodes = [] self.busynodes = [] self.restarted = {} # read config file # conf_file = os.path.dirname(args[0]) + "/" + CONFIG_FILE # conf_file = os.path.dirname(os.path.abspath( __file__ )) + "/" + CONFIG_FILE conf_file = os.path.dirname( os.path.abspath(__file__)) + "/../" + CONFIG_FILE config = ConfigParser.ConfigParser() logging.debug("read configfile: " + conf_file) config.read(conf_file) default_dict = config.defaults() self.CPR = default_dict["cpr"] self.SHELL = default_dict["shell"] self.MPIRUN = default_dict["mpirun"] logging.debug("cpr: " + self.CPR + " mpi: " + self.MPIRUN + " shell: " + self.SHELL) # init rms (SGE/PBS) self.init_rms() self.failed_polls = 0 ############################################################################## # initialization of coordination and communication subsystem # Redis initialization self.base_url = args[2] logging.debug("BigJob Agent arguments: " + str(args)) logging.debug("Initialize C&C subsystem to pilot-url: " + self.base_url) if (self.coordination_url.startswith("advert://")): try: from coordination.bigjob_coordination_advert import bigjob_coordination logging.debug("Utilizing ADVERT Backend: " + self.coordination_url) except: logging.error("Advert Backend could not be loaded") elif (self.coordination_url.startswith("redis://")): try: from coordination.bigjob_coordination_redis import bigjob_coordination logging.debug( "Utilizing Redis Backend: " + self.coordination_url + ". Please make sure Redis server is configured in bigjob_coordination_redis.py" ) except: logging.error("Error loading pyredis.") elif (self.coordination_url.startswith("tcp://")): try: from coordination.bigjob_coordination_zmq import bigjob_coordination logging.debug("Utilizing ZMQ Backend") except: logging.error( "ZMQ Backend not found. Please install ZeroMQ (http://www.zeromq.org/intro:get-the-software) and " + "PYZMQ (http://zeromq.github.com/pyzmq/)") self.coordination = bigjob_coordination( server_connect_url=self.coordination_url) # update state of pilot job to running self.coordination.set_pilot_state(self.base_url, str(bigjob.state.Running), False) ############################################################################## # start background thread for polling new jobs and monitoring current jobs self.resource_lock = threading.RLock() self.threadpool = ThreadPool(THREAD_POOL_SIZE) self.launcher_thread = threading.Thread(target=self.dequeue_new_jobs) self.launcher_thread.start() self.monitoring_thread = threading.Thread( target=self.start_background_thread) self.monitoring_thread.start()
def __init__(self, args): self.coordination_url = args[1] # objects to store running jobs and processes self.jobs = [] self.processes = {} self.freenodes = [] self.busynodes = [] self.restarted = {} # read config file conf_file = os.path.dirname(os.path.abspath( __file__ )) + "/../" + CONFIG_FILE if not os.path.exists(conf_file): conf_file = os.path.join(sys.prefix, CONFIG_FILE) logging.debug ("read configfile: " + conf_file) config = ConfigParser.ConfigParser() config.read(conf_file) default_dict = config.defaults() self.CPR=False if default_dict.has_key("cpr"): self.CPR = default_dict["cpr"] self.SHELL="/bin/bash" if default_dict.has_key("shell"): self.SHELL=default_dict["shell"] self.MPIRUN="mpirun" # On TACC resources the default MPICH is # linked under mpirun_rsh if default_dict.has_key("mpirun"): self.MPIRUN=default_dict["mpirun"] self.OUTPUT_TAR=False if default_dict.has_key("create_output_tar"): self.OUTPUT_TAR=eval(default_dict["create_output_tar"]) logger.debug("Create output tar: %r", self.OUTPUT_TAR) self.LAUNCH_METHOD="ssh" if default_dict.has_key("launch_method"): self.LAUNCH_METHOD=self.__get_launch_method(default_dict["launch_method"]) logging.debug("Launch Method: " + self.LAUNCH_METHOD + " mpi: " + self.MPIRUN + " shell: " + self.SHELL) # init rms (SGE/PBS) self.init_rms() self.failed_polls = 0 ############################################################################## # initialization of coordination and communication subsystem # Redis initialization self.base_url = args[2] self.cds_queue_url = None if len(args)==4: self.cds_queue_url = args[3] logger.debug("External queue: " + str(self.cds_queue_url)) self.id = self.__get_bj_id(self.base_url) logger.debug("BigJob Agent arguments: " + str(args)) logger.debug("Initialize C&C subsystem to pilot-url: " + self.base_url) logger.debug("BigJob ID: %s"%self.id) # create bj directory self.work_dir = os.getcwd() if self.work_dir.find(self.id)==-1: # working directory already contains BJ id self.bj_dir = os.path.join(os.getcwd(), self.id) logger.debug("Agent working directory: %s"%self.bj_dir) try: os.makedirs(self.bj_dir) except: logger.debug("Directory already exists.") else: self.bj_dir = os.getcwd() os.chdir(self.bj_dir) if(self.coordination_url.startswith("advert://") or self.coordination_url.startswith("sqlasyncadvert://")): try: from coordination.bigjob_coordination_advert import bigjob_coordination logging.debug("Utilizing ADVERT Backend: " + self.coordination_url) except: logger.error("Advert Backend could not be loaded") exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exc(file=sys.stderr) traceback.print_tb(exc_traceback, file=sys.stderr) elif (self.coordination_url.startswith("redis://")): try: from coordination.bigjob_coordination_redis import bigjob_coordination logger.debug("Utilizing Redis Backend: " + self.coordination_url + ". Please make sure Redis server is configured in bigjob_coordination_redis.py") except: logger.error("Error loading pyredis.") elif (self.coordination_url.startswith("tcp://")): try: from coordination.bigjob_coordination_zmq import bigjob_coordination logger.debug("Utilizing ZMQ Backend") except: logger.error("ZMQ Backend not found. Please install ZeroMQ (http://www.zeromq.org/intro:get-the-software) and " +"PYZMQ (http://zeromq.github.com/pyzmq/)") ### # Initiate coordination sub-system of both BJ agent and Pilot Data self.coordination = bigjob_coordination(server_connect_url=self.coordination_url) try: # initialize coordination subsystem of pilot data self.pilot_data_service = PilotDataService(coordination_url=self.coordination_url) except: logger.warn("Pilot-Data could not be initialized.") # update state of pilot job to running logger.debug("set state to : " + str(bigjob.state.Running)) self.coordination.set_pilot_state(self.base_url, str(bigjob.state.Running), False) self.pilot_description = self.coordination.get_pilot_description(self.base_url) ############################################################################## # start background thread for polling new jobs and monitoring current jobs self.resource_lock=threading.RLock() self.threadpool = ThreadPool(THREAD_POOL_SIZE) self.launcher_thread=threading.Thread(target=self.dequeue_new_jobs) self.launcher_thread.start() self.monitoring_thread=threading.Thread(target=self.start_background_thread) self.monitoring_thread.start()
def __init__(self, args): self.coordination_url = args[1] # objects to store running jobs and processes self.jobs = [] self.processes = {} self.freenodes = [] self.busynodes = [] self.restarted = {} # read config file # conf_file = os.path.dirname(args[0]) + "/" + CONFIG_FILE # conf_file = os.path.dirname(os.path.abspath( __file__ )) + "/" + CONFIG_FILE conf_file = os.path.dirname(os.path.abspath( __file__ )) + "/../" + CONFIG_FILE config = ConfigParser.ConfigParser() logging.debug ("read configfile: " + conf_file) config.read(conf_file) default_dict = config.defaults() self.CPR = default_dict["cpr"] self.SHELL=default_dict["shell"] self.MPIRUN=default_dict["mpirun"] self.LAUNCH_METHOD=self.__get_launch_method(default_dict["launch_method"]) logging.debug("Launch Method: " + self.LAUNCH_METHOD + " mpi: " + self.MPIRUN + " shell: " + self.SHELL) # init rms (SGE/PBS) self.init_rms() self.failed_polls = 0 ############################################################################## # initialization of coordination and communication subsystem # Redis initialization self.base_url = args[2] self.id = self.__get_bj_id(self.base_url) logger.debug("BigJob Agent arguments: " + str(args)) logger.debug("Initialize C&C subsystem to pilot-url: " + self.base_url) logger.debug("BigJob ID: %s"%self.id) # create bj directory self.bj_dir = os.path.join(os.getcwd(), self.id) try: os.makedirs(self.bj_dir) except: logger.debug("Directory already exists.") os.chdir(self.bj_dir) if(self.coordination_url.startswith("advert://") or self.coordination_url.startswith("sqlasyncadvert://")): try: from coordination.bigjob_coordination_advert import bigjob_coordination logging.debug("Utilizing ADVERT Backend: " + self.coordination_url) except: logger.error("Advert Backend could not be loaded") exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exc(file=sys.stderr) traceback.print_tb(exc_traceback, file=sys.stderr) elif (self.coordination_url.startswith("redis://")): try: from coordination.bigjob_coordination_redis import bigjob_coordination logger.debug("Utilizing Redis Backend: " + self.coordination_url + ". Please make sure Redis server is configured in bigjob_coordination_redis.py") except: logger.error("Error loading pyredis.") elif (self.coordination_url.startswith("tcp://")): try: from coordination.bigjob_coordination_zmq import bigjob_coordination logger.debug("Utilizing ZMQ Backend") except: logger.error("ZMQ Backend not found. Please install ZeroMQ (http://www.zeromq.org/intro:get-the-software) and " +"PYZMQ (http://zeromq.github.com/pyzmq/)") self.coordination = bigjob_coordination(server_connect_url=self.coordination_url) # update state of pilot job to running self.coordination.set_pilot_state(self.base_url, str(bigjob.state.Running), False) ############################################################################## # start background thread for polling new jobs and monitoring current jobs self.resource_lock=threading.RLock() self.threadpool = ThreadPool(THREAD_POOL_SIZE) self.launcher_thread=threading.Thread(target=self.dequeue_new_jobs) self.launcher_thread.start() self.monitoring_thread=threading.Thread(target=self.start_background_thread) self.monitoring_thread.start()