class Premium: def __init__(self): logging.basicConfig() broker_url = 'amqp://*****:*****@141.142.238.160:5672/%2Fbunny?autoAck=true' self._cons = Consumer(broker_url, 'ocs_dmcs_consume', "YAML") try: _thread.start_new_thread(self.do_it, ( "thread-1", 2, )) except e: print("Cannot start thread") print(e) def mycallback(self, ch, methon, properties, body): print(" ") print(">>>>>>>>>>>>>>><<<<<<<<<<<<<<<<") print((" [x] method Received %r" % methon)) print((" [y] properties Received %r" % properties)) print((" [z] body Received %r" % body)) print("Message done") print("Still listening...") def do_it(self, threadname, delay): print("Before run call") self._cons.run(self.mycallback) print("After run call - not blocking")
class Premium: def __init__(self): logging.basicConfig() #os.system('rabbitmqctl -p /tester purge_queue firehose') #os.system('rabbitmqctl -p /tester purge_queue ack_publish') #broker_url = 'amqp://*****:*****@141.142.208.191:5672/%2Fbunny' #broker_url = 'amqp://*****:*****@141.142.238.160:5672/%2Fbunny?heartbeat=300' broker_url = 'amqp://*****:*****@141.142.238.160:5672/%2Fbunny?autoAck=true' #broker_url = 'amqp://*****:*****@141.142.208.191:5672/%2Ftester' #broker_url = 'amqp://*****:*****@141.142.208.191:5672/%2Fbunny' #self._cons = FirehoseConsumer(broker_url, 'firehose', "YAML") self._cons = Consumer(broker_url, 'ocs_dmcs_consume', "YAML") try: _thread.start_new_thread( self.do_it, ("thread-1", 2,) ) except e: print("Cannot start thread") print(e) time.sleep(420) def mycallback(self, ch, methon, properties, body): print(" ") print(">>>>>>>>>>>>>>><<<<<<<<<<<<<<<<") print((" [x] method Received %r" % methon)) print((" [y] properties Received %r" % properties)) print((" [z] body Received %r" % body)) print("Message done") print("Still listening...") def do_it(self, threadname, delay): #example = ExampleConsumer('amqp://*****:*****@141.142.208.191:5672/%2Fbunny') print("Before run call") self._cons.run(self.mycallback) print("After run call - not blocking")
class Premium: def __init__(self): logging.basicConfig() #os.system('rabbitmqctl -p /tester purge_queue firehose') #os.system('rabbitmqctl -p /tester purge_queue ack_publish') #broker_url = 'amqp://*****:*****@141.142.238.160:5672/%2Fbunny?heartbeat=300' broker_url = 'amqp://*****:*****@141.142.238.160:5672/%2Fbunny' #pub_broker_url = 'amqp://*****:*****@141.142.238.160:5672/%2Fbunny' #broker_url = 'amqp://*****:*****@141.142.208.191:5672/%2Ftester' #broker_url = 'amqp://*****:*****@141.142.208.191:5672/%2Fbunny' #self._cons = FirehoseConsumer(broker_url, 'firehose', "YAML") self._cons = Consumer(broker_url, 'ar_foreman_consume', "YAML") try: _thread.start_new_thread( self.do_it, ("thread-1", 2,) ) except e: print("Cannot start thread") print(e) self._cons2 = Consumer(broker_url, 'pp_foreman_consume', "YAML") try: _thread.start_new_thread( self.do_it2, ("thread-3", 2,) ) except e: print("Cannot start thread") print(e) cdm = toolsmod.intake_yaml_file("L1SystemCfg.yaml") self.ccd_list = cdm['ROOT']['CCD_LIST'] def mycallback(self, ch, methon, properties, body): print(" ") print(">>>>>>>>>>>>>>><<<<<<<<<<<<<<<<") print((" [z] body Received %r" % body)) print(">>>>>>>>>>>>>>><<<<<<<<<<<<<<<<") #print("Message done") #print("Still listening...") def mycallback2(self, ch, methon, properties, body): print(" ") print("++++++++++++++-----------+++++++++++++++") print((" [z] body Received %r" % body)) print("++++++++++++++-----------+++++++++++++++") #print("Message done") #print("Still listening...") def do_it(self, threadname, delay): #example = ExampleConsumer('amqp://*****:*****@141.142.208.191:5672/%2Fbunny') print("Before run call") self._cons.run(self.mycallback) print("After run call - not blocking") def do_it2(self, threadname, delay): #example = ExampleConsumer('amqp://*****:*****@141.142.208.191:5672/%2Fbunny') print("Before run call") self._cons2.run(self.mycallback2) print("After run call - not blocking")
class Premium: def __init__(self): logging.basicConfig() self.ack_test = True #os.system('rabbitmqctl -p /tester purge_queue firehose') #os.system('rabbitmqctl -p /tester purge_queue ack_publish') broker_url = 'amqp://*****:*****@141.142.208.191:5672/%2Fbunny' #broker_url = 'amqp://*****:*****@141.142.208.191:5672/%2Ftester' #broker_url = 'amqp://*****:*****@141.142.208.191:5672/%2Fbunny' #self._cons = FirehoseConsumer(broker_url, 'firehose', "YAML") self.sp1 = SimplePublisher( 'amqp://*****:*****@141.142.208.191:5672/%2Fbunny') cdm = toolsmod.intake_yaml_file('ForemanCfg.yaml') self.fdict = cdm[ROOT]['XFER_COMPONENTS']['ARCHIVE_FORWARDERS'] self.fwdrs = list(self.fdict.keys()) self._cons = Consumer(broker_url, 'ar_foreman_ack_publish', "YAML") #self._cons = Consumer(broker_url, 'ar_foreman_consume', "YAML") try: _thread.start_new_thread(self.do_it, ( "thread-1", 2, )) except e: print("Cannot start thread") print(e) def mycallback(self, ch, methon, properties, body): print(" ") print(">>>>>>>>>>>>>>><<<<<<<<<<<<<<<<") print((" [x] method Received %r" % methon)) print((" [y] properties Received %r" % properties)) print((" [z] body Received %r" % body)) print("Message done") print("Still listening...") def do_it(self, threadname, delay): #example = ExampleConsumer('amqp://*****:*****@141.142.208.191:5672/%2Fbunny') print("Before run call") self._cons.run(self.mycallback) print("After run call - not blocking")
class Forwarder: """Presents a vanilla L1 Forwarder personality. In nightly operation, at least 21 of these components will be available at any time (one for each raft). """ def __init__(self): self._registered = False f = open('ForwarderCfg.yaml') #cfg data map... cdm = yaml.safe_load(f) try: self._name = cdm[NAME] self._passwd = cdm[PASSWD] self._fqn = cdm[FQN] self._base_broker_addr = cdm[BASE_BROKER_ADDR] self._consume_queue = cdm[CONSUME_QUEUE] #self._publish_queue = cdm[PUBLISH_QUEUE] self._hostname = cdm[HOSTNAME] self._ip_addr = cdm[IP_ADDR] self._DAQ_PATH = cdm['DAQ_PATH'] ## XXX FIX: Put in config file self.CHECKSUM_ENABLED = False except KeyError as e: print("Missing base keywords in yaml file... Bailing out...") sys.exit(99) #self._DAQ_PATH = "/home/F1/xfer_dir/" self._home_dir = "/home/" + self._name + "/" self._base_broker_url = "amqp://" + self._name + ":" + self._passwd + "@" + str( self._base_broker_addr) self._msg_actions = { FORWARDER_HEALTH_CHECK: self.process_health_check, FORWARDER_JOB_PARAMS: self.process_job_params, 'AR_FWDR_XFER_PARAMS': self.process_job_params, # Here if AR case needs different handler 'AR_FWDR_READOUT': self.process_foreman_readout, FORWARDER_READOUT: self.process_foreman_readout } self.setup_publishers() self.setup_consumers() self._job_scratchpad = Scratchpad(self._base_broker_url) def setup_publishers(self): self._publisher = SimplePublisher(self._base_broker_url) def setup_consumers(self): threadname = "thread-" + self._consume_queue print("Threadname is %s" % threadname) self._consumer = Consumer(self._base_broker_url, self._consume_queue) try: _thread.start_new_thread(self.run_consumer, ( threadname, 2, )) print("Started Consumer Thread") except: sys.exit(99) def run_consumer(self, threadname, delay): self._consumer.run(self.on_message) def on_message(self, ch, method, properties, body): ch.basic_ack(delivery_tag) print("INcoming PARAMS, body is:\n%s" % body) msg_dict = body handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def process_health_check(self, params): self.send_ack_response("FORWARDER_HEALTH_CHECK_ACK", params) def process_job_params(self, params): """ The structure of the incoming job params is identical to the way job params are sent to prompt processing forwarders: MSG_TYPE: AR_FWDR_XFER_PARAMS JOB_NUM: ..... ACK_ID: x1 REPLY_QUEUE: ..... FITS: FITS metadata someday? TRANSFER_PARAMS: DISTRIBUTOR: FQN: Name of entity receivine file NAME: login name for receiving entity HOSTNAME: Full host name for receiving entity IP_ADDR: ip addr of archive TARGET_DIR: Where to put file ## Below might be better as 'xfer_unit_list' for ccds or rafts, or other CCD_LIST: for example...[1,2,3,7,10,14] XFER_UNIT: CCD FITS: FITS metadata someday? After the xfer params arrive, and ack is returned, we set up some short cut helpers, such as: 1) Make a filename stub for job that leaves out all but the CCD number 2) Put together the scp/bbftp string with login name and ip addr, plus target dir """ job_params = copy.deepcopy(params) xfer_params = job_params['TRANSFER_PARAMS'] # Also RM fits files in xfer_dir cmd = "rm " + self._DAQ_PATH + "*.fits" os.system(cmd) filename_stub = str(job_params['JOB_NUM']) + "_" + str( job_params['VISIT_ID']) + "_" + str(job_params['IMAGE_ID']) + "_" login_str = str(xfer_params['DISTRIBUTOR']['NAME']) + "@" + str( xfer_params['DISTRIBUTOR']['IP_ADDR']) + ":" target_dir = str(xfer_params['DISTRIBUTOR']['TARGET_DIR']) #xfer_params = transfer_params['XFER_PARAMS'] s_params = {} s_params['CCD_LIST'] = xfer_params['CCD_LIST'] s_params['LOGIN_STR'] = login_str s_params['TARGET_DIR'] = target_dir s_params['FILENAME_STUB'] = filename_stub print("S_params are: %s" % s_params) # Now, s_params should have all we need for job. Place as value for job_num key self._job_scratchpad.set_job_transfer_params(params[JOB_NUM], s_params) self._job_scratchpad.set_job_state(params['JOB_NUM'], "READY_WITH_PARAMS") self.send_ack_response('FORWARDER_JOB_PARAMS_ACK', params) def process_foreman_readout(self, params): # self.send_ack_response("FORWARDER_READOUT_ACK", params) reply_queue = params['REPLY_QUEUE'] job_number = params[JOB_NUM] # Check and see if scratchpad has this job_num if job_number not in list(self._job_scratchpad.keys()): # Raise holy hell... pass # raw_files_dict is of the form { ccd: filename} like { 2: /home/F1/xfer_dir/ccd_2.data raw_files_dict = self.fetch(job_number) final_filenames = self.format(job_number, raw_files_dict) results = self.forward(job_number, final_filenames) msg = {} msg['MSG_TYPE'] = 'AR_ITEMS_XFERD_ACK' msg['JOB_NUM'] = job_number msg['IMAGE_ID'] = params['IMAGE_ID'] msg['COMPONENT'] = self._fqn msg['ACK_ID'] = params['ACK_ID'] msg['ACK_BOOL'] = True # See if num keys of results == len(ccd_list) from orig msg params msg['RESULT_LIST'] = results self._publisher.publish_message(reply_queue, msg) def fetch(self, job_num): raw_files_dict = {} ccd_list = self._job_scratchpad.get_job_value(job_num, 'CCD_LIST') for ccd in ccd_list: filename = "ccd_" + str(ccd) + ".data" raw_files_dict[ccd] = filename print("In Forwarder Fetch method, raw_files_dict is: \n%s" % raw_files_dict) return raw_files_dict """ format raw files to fits file with header data :param file_list: dictionary of file_name against raw file name :param mdata: primary meta data stream fetched from camera daq """ def format(self, file_list, mdata): final_filenames = [] for ccd_id, raw_file_name in file_list.items(): image_array = np.fromfile(raw_file_name, dtype=np.int32) header_data = mdata[ccd_id]["primary_metadata_chunk"] secondary_data = mdata[ccd_id]["secondary_metadata_chunk"] header_data.update(secondary_data) primary_header = pyfits.Header() for key, value in header_data.items(): primary_header[key] = value fits_file = pyfits.PrimaryHDU(header=primary_header, data=image_array) fits_file.writeto(ccd_id + ".fits") final_filenames.append(ccd_id + ".fits") return final_filenames def format(self, job_num, raw_files_dict): keez = list(raw_files_dict.keys()) filename_stub = self._job_scratchpad.get_job_value( job_num, 'FILENAME_STUB') final_filenames = {} for kee in keez: final_filename = filename_stub + "_" + kee + ".fits" target = self._DAQ_PATH + final_filename print("Final filename is %s" % final_filename) print("target is %s" % target) cmd1 = 'cat ' + self._DAQ_PATH + "ccd.header" + " >> " + target cmd2 = 'cat ' + self._DAQ_PATH + raw_files_dict[ kee] + " >> " + target dte = get_epoch_timestamp() print("DTE IS %s" % dte) cmd3 = 'echo ' + str(dte) + " >> " + target print("cmd1 is %s" % cmd1) print("cmd2 is %s" % cmd2) os.system(cmd1) os.system(cmd2) os.system(cmd3) final_filenames[kee] = final_filename print("Done in format()...file list is: %s" % final_filenames) print("In format method, final_filenames are:\n%s" % final_filenames) return final_filenames def forward(self, job_num, final_filenames): print("Start Time of READOUT IS: %s" % get_timestamp()) login_str = self._job_scratchpad.get_job_value(job_num, 'LOGIN_STR') target_dir = self._job_scratchpad.get_job_value(job_num, 'TARGET_DIR') results = {} CCD_LIST = [] FILENAME_LIST = [] CHECKSUM_LIST = [] ccds = list(final_filenames.keys()) for ccd in ccds: final_file = final_filenames[ccd] pathway = self._DAQ_PATH + final_file with open(pathway) as file_to_calc: if self.CHECKSUM_ENABLED: data = file_to_calc.read() resulting_md5 = hashlib.md5(data).hexdigest() else: resulting_md5 = '0' minidict = {} CCD_LIST.append(ccd) CHECKSUM_LIST.append(resulting_md5) FILENAME_LIST.append(target_dir + final_file) cmd = 'scp ' + pathway + " " + login_str + target_dir + final_file print("Finish Time of SCP'ing %s IS: %s" % (pathway, get_timestamp())) print("In forward() method, cmd is %s" % cmd) os.system(cmd) results['CCD_LIST'] = CCD_LIST results['FILENAME_LIST'] = FILENAME_LIST results['CHECKSUM_LIST'] = CHECKSUM_LIST print("END Time of READOUT XFER IS: %s" % get_timestamp()) print("In forward method, results are: \n%s" % results) return results #cmd = 'cd ~/xfer_dir && scp -r $(ls -t)' + ' ' + str(self._xfer_login) + ':xfer_dir' #pass def send_ack_response(self, type, params): timed_ack = params.get("ACK_ID") job_num = params.get(JOB_NUM) response_queue = params['RESPONSE_QUEUE'] msg_params = {} msg_params[MSG_TYPE] = type msg_params[JOB_NUM] = job_num msg_params['COMPONENT'] = self._fqn msg_params[ACK_BOOL] = "TRUE" msg_params[ACK_ID] = timed_ack self._publisher.publish_message(response_queue, msg_params) def register(self): pass
import copy import sys sys.path.append("../ctrl_iip/python/lsst/iip/") from SimplePublisher import SimplePublisher from Consumer import Consumer pub = SimplePublisher( "amqp://*****:*****@140.252.32.128:5672/%2ftest_at") def on_message(ch, method, properties, body): ch.basic_ack(method.delivery_tag) msg = copy.deepcopy(body) msg_type = body["MSG_TYPE"] + "_ACK" msg["MSG_TYPE"] = msg_type msg["ACK_BOOL"] = 1 msg["ACK_STATEMENT"] = "everything went well" pub.publish_message("dmcs_ocs_publish", msg) sub = Consumer("amqp://*****:*****@140.252.32.128:5672/%2ftest_at", "ocs_dmcs_consume", "hello", on_message, "YAML") sub.run()
class Distributor: """This is a basic Distributor class. The cadence of the file is very similar to its workmate the Forwarder class and begins to viollate the DRY rule. It may be that this class and the Forwarder class are eventually combined into single class so that a personality can be chosen at the time of initialization. Or a parent class for both may be a better approach... but at this point, they are separate classes until it is certain that individual classes are definetely not necessary. """ def __init__(self): LOGGER.info("Initializing Distributor object") self._registered = False f = open('DistributorCfg.yaml') # data map cdm = yaml.safe_load(f) try: self._name = cdm[NAME] self._passwd = cdm[PASSWD] self._fqn_name = cdm[FQN] self._ncsa_broker_addr = cdm[NCSA_BROKER_ADDR] self._consume_queue = cdm[CONSUME_QUEUE] self._publish_queue = cdm[PUBLISH_QUEUE] self._hostname = cdm[HOSTNAME] self._ip_addr = cdm[IP_ADDR] self._target_dir = cdm[TARGET_DIR] self._sentinel_file = cdm[SENTINEL_FILE] except KeyError as e: LOGGER.critical(e) print("Key error reading cfg file.") print("Bailing out...") sys.exit(99) self._home_dir = "/home/" + self._name + "/" self._ncsa_broker_url = "amqp://" + self._name + ":" + self._passwd + "@" + str( self._ncsa_broker_addr) self._msg_actions = { DISTRIBUTOR_HEALTH_CHECK: self.process_health_check, DISTRIBUTOR_JOB_PARAMS: self.process_job_params, DISTRIBUTOR_READOUT: self.process_foreman_readout } self.setup_publishers() self.setup_consumers() self._job_scratchpad = Scratchpad(self._ncsa_broker_url) def setup_publishers(self): LOGGER.info('Setting up publisher for Distributor on %s', self._ncsa_broker_url) self._publisher = SimplePublisher(self._ncsa_broker_url) def setup_consumers(self): LOGGER.info('Distributor %s setting up consumer on %s', self._name, self._ncsa_broker_url) LOGGER.info('Starting new thread on consumer method') threadname = "thread-" + self._consume_queue self._consumer = Consumer(self._ncsa_broker_url, self._consume_queue) try: _thread.start_new_thread(self.run_consumer, ( threadname, 2, )) LOGGER.info('Started distributor consumer thread %s', threadname) except: LOGGER.critical( 'Cannot start Distributor consumer thread, exiting...') sys.exit(107) def run_consumer(self, threadname, delay): self._consumer.run(self.on_message) def on_message(self, ch, method, properties, body): ch.basic_ack(method.delivery_tag) msg_dict = yaml.load(body) LOGGER.info('In %s message callback', self._name) LOGGER.debug('Thread in %s callback is %s', self._name, _thread.get_ident()) LOGGER.debug('%s callback message body is: %s', self._name, str(msg_dict)) handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def process_health_check(self, params): job_number = params[JOB_NUM] self._job_scratchpad.set_job_value(job_number, "STATE", "ADD_JOB") self._job_scratchpad.set_job_value(job_number, "ADD_JOB_TIME", get_timestamp()) self.send_ack_response("DISTRIBUTOR_HEALTH_ACK", params) def process_job_params(self, params): transfer_params = params[TRANSFER_PARAMS] self._job_scratchpad.set_job_transfer_params(params[JOB_NUM], transfer_params) self._job_scratchpad.set_job_value(job_number, "STATE", "READY_WITH_PARAMS") self._job_scratchpad.set_job_value(job_number, "READY_WITH_PARAMS_TIME", get_timestamp()) self.send_ack_response(DISTRIBUTOR_JOB_PARAMS_ACK, params) def process_foreman_readout(self, params): LOGGER.info('At Top of Distributor readout') job_number = params[JOB_NUM] cmd = self._target_dir + "check_sentinel.sh" result = subprocess.check_output(cmd, shell=True) LOGGER.info('check_sentinel test is complete') # xfer complete #xfer_time = "" """ ###########XXXXXXXXXXXXXXX############### #### Checking for and processing image file goes here """ command = "cat " + self._target_dir + "rcv_logg.test" cat_result = subprocess.check_output(command, shell=True) #filename = self._target_dir + "rcv_logg.test" #f = open(filename, 'r') #for line in f: # xfer_time = xfer_time + line + "\n" msg = {} msg[MSG_TYPE] = 'XFER_TIME' msg[NAME] = self._name msg[JOB_NUM] = job_number msg['COMPONENT'] = "DISTRIBUTOR" msg['COMMENT1'] = "Result from xfer command is: %s" % result msg['COMMENT2'] = "cat_result is --> %s" % cat_result msg['COMMENT3'] = "Command used to call check_sentinel.sh is %s" % cmd self._publisher.publish_message("reports", yaml.dump(msg)) readout_dict = {} readout_dict[MSG_TYPE] = "DISTRIBUTOR_READOUT_ACK" readout_dict[JOB_NUM] = params[JOB_NUM] readout_dict["COMPONENT"] = self._fqn_name readout_dict["ACK_BOOL"] = True readout_dict["ACK_ID"] = params["TIMED_ACK_ID"] self._publisher.publish_message(self._publish_queue, yaml.dump(readout_dict)) def send_ack_response(self, type, params): timed_ack = params.get("TIMED_ACK_ID") job_num = params.get(JOB_NUM) if timed_ack is None: LOGGER.info('%s failed, missing TIMED_ACK_ID', type) elif job_num is None: LOGGER.info('%s failed, missing JOB_NUM for ACK ID: %s', type) else: msg_params = {} msg_params[MSG_TYPE] = type msg_params[JOB_NUM] = job_num msg_params[NAME] = "DISTRIBUTOR_" + self._name msg_params[ACK_BOOL] = "TRUE" msg_params[TIMED_ACK] = timed_ack self._publisher.publish_message("reports", yaml.dump(msg_params)) LOGGER.info('%s sent for ACK ID: %s and JOB_NUM: %s', type, timed_ack, job_num) def register(self): pass
from Consumer import Consumer ConsumerLow = Consumer('ConsumerLow', '3') ConsumerLow.run()
class Foreman: # Parent class for Base and NCSA Foremen. # Contains set up for forwarder and distributor machines # along with code that was common between them. def parent_init(self, db_num, prog_name, type): custom_print.define_new_name(self.PROGRAM_NAME) # Create machine scoreboard self._sb_mach = Scoreboard(db_num, prog_name, type) # Messaging URL (rabbitmq server IP) self._broker_url = "amqp://" + AMQP_BF_USER + ":" + AMQP_BF_PSWD + "@" + AMQP_BROKER_ADDR + ":" + AMQP_BROKER_PORT + "/" + AMQP_BROKER_VHOST # Publisher object for sending messages to rabbit printc("Creating publisher...") self._publisher = SimplePublisher(self._broker_url) # Machine messages self._msg_actions_mach = { 'TRANSFER_DONE': self.process_transfer_done, # Machine done with the current job 'REGISTER': self.process_register, # New machine wants to join 'DEREGISTER': self.process_deregister, # Machine is leaving 'STATE_UPDATE': self.process_state_update # Machine updating us on its state } # Machines register with us and let us know how they are doing printc("Creating machine consumer...") self._mach_consumer = Consumer(self._broker_url, self._machine_publish_q) try: thread.start_new_thread(self.run_mach_consumer, ()) except: printc("Thread run_mach_consumer failed, quitting...") sys.exit() return def run_mach_consumer(self): # Consume messages continuously printc("Machine message consumer is running...") self._mach_consumer.run(self.on_mach_message) return def on_mach_message(self, ch, method, properties, body): # Callback from consumer to process machine messages # Load the message which came in yaml format msg_dict = yaml.load(body) # Determine which function needs to be called for this message type try: af_handler = self._msg_actions_mach.get(msg_dict[MSG_TYPE]) except: printc("Bad machine message received...") ch.basic_ack(delivery_tag=method.delivery_tag) return # Call that function and provide it with the message af_handler(msg_dict) # Acknowledge that we processed the message so rabbit can remove it from the queue ch.basic_ack(delivery_tag=method.delivery_tag) return def process_register(self, msg_params): # Process a request for a machine that is registering with us printc("Processing name request...") tmp_name = self.pick_name() while False == self._sb_mach.register_machine(tmp_name): tmp_name = self.pick_name() self._sb_mach._redis.hset(tmp_name, 'IP_ADDR', msg_params['IP_ADDR']) printc("%s has registered. (%s)" % (tmp_name, msg_params['IP_ADDR'])) msg = {} msg[MSG_TYPE] = 'REGISTRATION' msg[NAME] = tmp_name self._publisher.publish_message(self._machine_consume_q, yaml.dump(msg)) return def pick_name(self): # Name creation tmp_name = ''.join(random.choice(string.ascii_letters) for x in range(NAME_LENGTH)) return self._machine_prefix + tmp_name def process_deregister(self, msg_params): # Machine is deregistering with us printc("%s has deregistered." % msg_params[NAME]) self._sb_mach.machine_deregister(msg_params[NAME]) return def process_state_update(self, msg_params): # Machine is updating us on something, report it in the Scoreboard self._sb_mach.machine_update(msg_params['KEY'], msg_params['FIELD'], msg_params['VALUE']) return def process_transfer_done(self, msg_params): return
class BaseForeman: FWD_SCBD = None JOB_SCBD = None ACK_SCBD = None ACK_PUBLISH = "ack_publish" YAML = 'YAML' def __init__(self, filename=None): toolsmod.singleton(self) self._config_file = 'ForemanCfg.yaml' if filename != None: self._config_file = filename cdm = toolsmod.intake_yaml_file(self._config_file) try: self._base_name = cdm[ROOT][BASE_BROKER_NAME] # Message broker user & passwd self._base_passwd = cdm[ROOT][BASE_BROKER_PASSWD] self._ncsa_name = cdm[ROOT][NCSA_BROKER_NAME] self._ncsa_passwd = cdm[ROOT][NCSA_BROKER_PASSWD] self._base_broker_addr = cdm[ROOT][BASE_BROKER_ADDR] self._ncsa_broker_addr = cdm[ROOT][NCSA_BROKER_ADDR] forwarder_dict = cdm[ROOT][XFER_COMPONENTS][FORWARDERS] except KeyError as e: print("Dictionary error") print("Bailing out...") sys.exit(99) if 'QUEUE_PURGES' in cdm[ROOT]: self.purge_broker(cdm['ROOT']['QUEUE_PURGES']) self._base_msg_format = self.YAML self._ncsa_msg_format = self.YAML if 'BASE_MSG_FORMAT' in cdm[ROOT]: self._base_msg_format = cdm[ROOT][BASE_MSG_FORMAT] if 'NCSA_MSG_FORMAT' in cdm[ROOT]: self._ncsa_msg_format = cdm[ROOT][NCSA_MSG_FORMAT] self._base_broker_url = 'amqp_url' self._ncsa_broker_url = 'amqp_url' self._next_timed_ack_id = 0 # Create Redis Forwarder table with Forwarder info self.FWD_SCBD = ForwarderScoreboard(forwarder_dict) self.JOB_SCBD = JobScoreboard() self.ACK_SCBD = AckScoreboard() self._msg_actions = { 'NEW_JOB': self.process_dmcs_new_job, 'READOUT': self.process_dmcs_readout, 'NCSA_RESOURCE_QUERY_ACK': self.process_ack, 'NCSA_STANDBY_ACK': self.process_ack, 'NCSA_READOUT_ACK': self.process_ack, 'FORWARDER_HEALTH_ACK': self.process_ack, 'FORWARDER_JOB_PARAMS_ACK': self.process_ack, 'FORWARDER_READOUT_ACK': self.process_ack, 'NEW_JOB_ACK': self.process_ack } self._base_broker_url = "amqp://" + self._base_name + ":" + self._base_passwd + "@" + str(self._base_broker_addr) self._ncsa_broker_url = "amqp://" + self._ncsa_name + ":" + self._ncsa_passwd + "@" + str(self._ncsa_broker_addr) LOGGER.info('Building _base_broker_url. Result is %s', self._base_broker_url) LOGGER.info('Building _ncsa_broker_url. Result is %s', self._ncsa_broker_url) self.setup_publishers() self.setup_consumers() #self._ncsa_broker_url = "" #self.setup_federated_exchange() def setup_consumers(self): """This method sets up a message listener from each entity with which the BaseForeman has contact here. These listeners are instanced in this class, but their run methods are each called as a separate thread. While pika does not claim to be thread safe, the manner in which the listeners are invoked below is a safe implementation that provides non-blocking, fully asynchronous messaging to the BaseForeman. The code in this file expects message bodies to arrive as YAML'd python dicts, while in fact, message bodies are sent on the wire as XML; this way message format can be validated, versioned, and specified in just one place. To make this work, there is an object that translates the params dict to XML, and visa versa. The translation object is instantiated by the consumer and acts as a filter before sending messages on to the registered callback for processing. """ LOGGER.info('Setting up consumers on %s', self._base_broker_url) LOGGER.info('Running start_new_thread on all consumer methods') self._dmcs_consumer = Consumer(self._base_broker_url, self.DMCS_PUBLISH, self._base_msg_format) try: _thread.start_new_thread( self.run_dmcs_consumer, ("thread-dmcs-consumer", 2,) ) except: LOGGER.critical('Cannot start DMCS consumer thread, exiting...') sys.exit(99) self._forwarder_consumer = Consumer(self._base_broker_url, self.FORWARDER_PUBLISH, self._base_msg_format) try: _thread.start_new_thread( self.run_forwarder_consumer, ("thread-forwarder-consumer", 2,) ) except: LOGGER.critical('Cannot start FORWARDERS consumer thread, exiting...') sys.exit(100) self._ncsa_consumer = Consumer(self._base_broker_url, self.NCSA_PUBLISH, self._base_msg_format) try: _thread.start_new_thread( self.run_ncsa_consumer, ("thread-ncsa-consumer", 2,) ) except: LOGGER.critical('Cannot start NCSA consumer thread, exiting...') sys.exit(101) self._ack_consumer = Consumer(self._base_broker_url, self.ACK_PUBLISH, self._base_msg_format) try: _thread.start_new_thread( self.run_ack_consumer, ("thread-ack-consumer", 2,) ) except: LOGGER.critical('Cannot start ACK consumer thread, exiting...') sys.exit(102) LOGGER.info('Finished starting all three consumer threads') def run_dmcs_consumer(self, threadname, delay): self._dmcs_consumer.run(self.on_dmcs_message) def run_forwarder_consumer(self, threadname, delay): self._forwarder_consumer.run(self.on_forwarder_message) def run_ncsa_consumer(self, threadname, delay): self._ncsa_consumer.run(self.on_ncsa_message) def run_ack_consumer(self, threadname, delay): self._ack_consumer.run(self.on_ack_message) def setup_publishers(self): LOGGER.info('Setting up Base publisher on %s using %s', self._base_broker_url, self._base_msg_format) LOGGER.info('Setting up NCSA publisher on %s using %s', self._ncsa_broker_url, self._ncsa_msg_format) self._base_publisher = SimplePublisher(self._base_broker_url, self._base_msg_format) self._ncsa_publisher = SimplePublisher(self._ncsa_broker_url, self._ncsa_msg_format) # def setup_federated_exchange(self): # # Set up connection URL for NCSA Broker here. # self._ncsa_broker_url = "amqp://" + self._name + ":" + self._passwd + "@" + str(self._ncsa_broker_addr) # LOGGER.info('Building _ncsa_broker_url. Result is %s', self._ncsa_broker_url) # pass def on_dmcs_message(self, ch, method, properties, body): ch.basic_ack(method.delivery_tag) #msg_dict = yaml.load(body) msg_dict = body LOGGER.info('In DMCS message callback') LOGGER.debug('Thread in DMCS callback is %s', _thread.get_ident()) LOGGER.info('Message from DMCS callback message body is: %s', str(msg_dict)) handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def on_forwarder_message(self, ch, method, properties, body): ch.basic_ack(method.delivery_tag) LOGGER.info('In Forwarder message callback, thread is %s', _thread.get_ident()) LOGGER.info('forwarder callback msg body is: %s', str(body)) pass def on_ncsa_message(self,ch, method, properties, body): ch.basic_ack(method.delivery_tag) LOGGER.info('In ncsa message callback, thread is %s', _thread.get_ident()) #msg_dict = yaml.load(body) msg_dict = body LOGGER.info('ncsa msg callback body is: %s', str(msg_dict)) handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def on_ack_message(self, ch, method, properties, body): ch.basic_ack(method.delivery_tag) msg_dict = body LOGGER.info('In ACK message callback') LOGGER.debug('Thread in ACK callback is %s', _thread.get_ident()) LOGGER.info('Message from ACK callback message body is: %s', str(msg_dict)) handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def process_dmcs_new_job(self, params): input_params = params needed_workers = len(input_params[RAFTS]) ack_id = self.forwarder_health_check(input_params) self.ack_timer(7) # This is a HUGE num seconds for now..final setting will be milliseconds healthy_forwarders = self.ACK_SCBD.get_components_for_timed_ack(timed_ack) num_healthy_forwarders = len(healthy_forwarders) if needed_workers > num_healthy_forwarders: result = self.insufficient_base_resources(input_params, healthy_forwarders) return result else: healthy_status = {"STATUS": "HEALTHY", "STATE":"READY_WITHOUT_PARAMS"} self.FWD_SCBD.set_forwarder_params(healthy_forwarders, healthy_status) ack_id = self.ncsa_resources_query(input_params, healthy_forwarders) self.ack_timer(3) #Check ACK scoreboard for response from NCSA ncsa_response = self.ACK_SCBD.get_components_for_timed_ack(ack_id) if ncsa_response: pairs = {} ack_bool = None try: ack_bool = ncsa_response[ACK_BOOL] if ack_bool == True: pairs = ncsa_response[PAIRS] except KeyError as e: pass # Distribute job params and tell DMCS I'm ready. if ack_bool == TRUE: fwd_ack_id = self.distribute_job_params(input_params, pairs) self.ack_timer(3) fwd_params_response = self.ACK_SCBD.get_components_for_timed_ack(fwd_ack_id) if fwd_params_response and (len(fwd_params_response) == len(fwders)): self.JOB_SCBD.set_value_for_job(job_num, "STATE", "BASE_TASK_PARAMS_SENT") self.JOB_SCBD.set_value_for_job(job_num, "TIME_BASE_TASK_PARAMS_SENT", get_timestamp()) in_ready_state = {'STATE':'READY_WITH_PARAMS'} self.FWD_SCBD.set_forwarder_params(fwders, in_ready_state) # Tell DMCS we are ready result = self.accept_job(job_num) else: #not enough ncsa resources to do job - Notify DMCS idle_param = {'STATE': 'IDLE'} self.FWD_SCBD.set_forwarder_params(healthy_forwarders, idle_params) result = self.insufficient_ncsa_resources(ncsa_response) return result else: result = self.ncsa_no_response(input_params) idle_param = {'STATE': 'IDLE'} self.FWD_SCBD.set_forwarder_params(list(forwarder_candidate_dict.keys()), idle_params) return result def forwarder_health_check(self, params): job_num = str(params[JOB_NUM]) raft_list = params['RAFTS'] needed_workers = len(raft_list) self.JOB_SCBD.add_job(job_num, needed_workers) self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ADDED", get_timestamp()) self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ADDED_E", get_epoch_timestamp()) LOGGER.info('Received new job %s. Needed workers is %s', job_num, needed_workers) # run forwarder health check # get timed_ack_id timed_ack = self.get_next_timed_ack_id("FORWARDER_HEALTH_CHECK_ACK") forwarders = self.FWD_SCBD.return_available_forwarders_list() # Mark all healthy Forwarders Unknown state_status = {"STATE": "HEALTH_CHECK", "STATUS": "UNKNOWN"} self.FWD_SCBD.set_forwarder_params(forwarders, state_status) # send health check messages ack_params = {} ack_params[MSG_TYPE] = FORWARDER_HEALTH_CHECK ack_params["ACK_ID"] = timed_ack ack_params[JOB_NUM] = job_num self.JOB_SCBD.set_value_for_job(job_num, "STATE", "BASE_RESOURCE_QUERY") self.JOB_SCBD.set_value_for_job(job_num, "TIME_BASE_RESOURCE_QUERY", get_timestamp()) audit_params = {} audit_params['DATA_TYPE'] = 'FOREMAN_ACK_REQUEST' audit_params['SUB_TYPE'] = 'FORWARDER_HEALTH_CHECK_ACK' audit_params['ACK_ID'] = timed_ack audit_parsms['COMPONENT_NAME'] = 'BASE_FOREMAN' audit_params['TIME'] = get_epoch_timestamp() for forwarder in forwarders: self._base_publisher.publish_message(self.FWD_SCBD.get_value_for_forwarder(forwarder,"CONSUME_QUEUE"), ack_params) return timed_ack def insufficient_base_resources(self, params, healthy_forwarders): # send response msg to dmcs refusing job job_num = str(params[JOB_NUM]) raft_list = params[RAFTS] ack_id = params['ACK_ID'] needed_workers = len(raft_list) LOGGER.info('Reporting to DMCS that there are insufficient healthy forwarders for job #%s', job_num) dmcs_params = {} fail_dict = {} dmcs_params[MSG_TYPE] = NEW_JOB_ACK dmcs_params[JOB_NUM] = job_num dmcs_params[ACK_BOOL] = False dmcs_params[ACK_ID] = ack_id ### NOTE FOR DMCS ACK PROCESSING: ### if ACK_BOOL == True, there will NOT be a FAIL_DETAILS section ### If ACK_BOOL == False, there will always be a FAIL_DICT to examine AND there will always be a ### BASE_RESOURCES inside the FAIL_DICT ### If ACK_BOOL == False, and the BASE_RESOURCES inside FAIL_DETAILS == 0, ### there will be only NEEDED and AVAILABLE Forwarder params - nothing more ### If ACK_BOOL == False and BASE_RESOURCES inside FAIL_DETAILS == 1, there will always be a ### NCSA_RESOURCES inside FAIL_DETAILS set to either 0 or 'NO_RESPONSE' ### if NCSA_RESPONSE == 0, there will be NEEDED and AVAILABLE Distributor params ### if NCSA_RESOURCES == 'NO_RESPONSE' there will be nothing else fail_dict['BASE_RESOURCES'] = '0' fail_dict[NEEDED_FORWARDERS] = str(needed_workers) fail_dict[AVAILABLE_FORWARDERS] = str(len(healthy_forwarders)) dmcs_params['FAIL_DETAILS'] = fail_dict self._base_publisher.publish_message("dmcs_consume", dmcs_params) # mark job refused, and leave Forwarders in Idle state self.JOB_SCBD.set_value_for_job(job_num, "STATE", "JOB_ABORTED") self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ABORTED_BASE_RESOURCES", get_timestamp()) idle_state = {"STATE": "IDLE"} self.FWD_SCBD.set_forwarder_params(healthy_forwarders, idle_state) return False def ncsa_resources_query(self, params, healthy_forwarders): job_num = str(params[JOB_NUM]) raft_list = params[RAFTS] needed_workers = len(raft_list) LOGGER.info('Sufficient forwarders have been found. Checking NCSA') self._pairs_dict = {} forwarder_candidate_dict = {} for i in range (0, needed_workers): forwarder_candidate_dict[healthy_forwarders[i]] = raft_list[i] self.FWD_SCBD.set_forwarder_status(healthy_forwarders[i], NCSA_RESOURCES_QUERY) # Call this method for testing... # There should be a message sent to NCSA here asking for available resources timed_ack_id = self.get_next_timed_ack_id("NCSA_Ack") ncsa_params = {} ncsa_params[MSG_TYPE] = "NCSA_RESOURCES_QUERY" ncsa_params[JOB_NUM] = job_num #ncsa_params[RAFT_NUM] = needed_workers ncsa_params[ACK_ID] = timed_ack_id ncsa_params["FORWARDERS"] = forwarder_candidate_dict self.JOB_SCBD.set_value_for_job(job_num, "STATE", "NCSA_RESOURCES_QUERY_SENT") self.JOB_SCBD.set_value_for_job(job_num, "TIME_NCSA_RESOURCES_QUERY_SENT", get_timestamp()) self._ncsa_publisher.publish_message(self.NCSA_CONSUME, ncsa_params) LOGGER.info('The following forwarders have been sent to NCSA for pairing:') LOGGER.info(forwarder_candidate_dict) return timed_ack_id def distribute_job_params(self, params, pairs): #ncsa has enough resources... job_num = str(params[JOB_NUM]) self.JOB_SCBD.set_pairs_for_job(job_num, pairs) self.JOB_SCBD.set_value_for_job(job_num, "TIME_PAIRS_ADDED", get_timestamp()) LOGGER.info('The following pairs will be used for Job #%s: %s', job_num, pairs) fwd_ack_id = self.get_next_timed_ack_id("FWD_PARAMS_ACK") fwders = list(pairs.keys()) fwd_params = {} fwd_params[MSG_TYPE] = "FORWARDER_JOB_PARAMS" fwd_params[JOB_NUM] = job_num fwd_params[ACK_ID] = fwd_ack_id for fwder in fwders: fwd_params["TRANSFER_PARAMS"] = pairs[fwder] route_key = self.FWD_SCBD.get_value_for_forwarder(fwder, "CONSUME_QUEUE") self._base_publisher.publish_message(route_key, fwd_params) return fwd_ack_id def accept_job(self, job_num): dmcs_message = {} dmcs_message[JOB_NUM] = job_num dmcs_message[MSG_TYPE] = NEW_JOB_ACK dmcs_message[ACK_BOOL] = True self.JOB_SCBD.set_value_for_job(job_num, STATE, "JOB_ACCEPTED") self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ACCEPTED", get_timestamp()) self._base_publisher.publish_message("dmcs_consume", dmcs_message) return True def insufficient_ncsa_resources(self, ncsa_response): dmcs_params = {} dmcs_params[MSG_TYPE] = "NEW_JOB_ACK" dmcs_params[JOB_NUM] = job_num dmcs_params[ACK_BOOL] = False dmcs_params[BASE_RESOURCES] = '1' dmcs_params[NCSA_RESOURCES] = '0' dmcs_params[NEEDED_DISTRIBUTORS] = ncsa_response[NEEDED_DISTRIBUTORS] dmcs_params[AVAILABLE_DISTRIBUTORS] = ncsa_response[AVAILABLE_DISTRIBUTORS] #try: FIXME - catch exception self._base_publisher.publish_message("dmcs_consume", dmcs_params ) #except L1MessageError e: # return False return True def ncsa_no_response(self,params): #No answer from NCSA... job_num = str(params[JOB_NUM]) raft_list = params[RAFTS] needed_workers = len(raft_list) dmcs_params = {} dmcs_params[MSG_TYPE] = "NEW_JOB_ACK" dmcs_params[JOB_NUM] = job_num dmcs_params[ACK_BOOL] = False dmcs_params[BASE_RESOURCES] = '1' dmcs_params[NCSA_RESOURCES] = 'NO_RESPONSE' self._base_publisher.publish_message("dmcs_consume", dmcs_params ) def process_dmcs_readout(self, params): job_number = params[JOB_NUM] pairs = self.JOB_SCBD.get_pairs_for_job(job_number) date - get_timestamp() self.JOB_SCBD.set_value_for_job(job_number, TIME_START_READOUT, date) # The following line extracts the distributor FQNs from pairs dict using # list comprehension values; faster than for loops distributors = [v['FQN'] for v in list(pairs.values())] forwarders = list(pairs.keys()) ack_id = self.get_next_timed_ack_id('NCSA_READOUT') ### Send READOUT to NCSA with ACK_ID ncsa_params = {} ncsa_params[MSG_TYPE] = 'NCSA_READOUT' ncsa_params[ACK_ID] = ack_id self._ncsa_publisher.publish_message(NCSA_CONSUME, yaml.dump(ncsa_params)) self.ack_timer(4) ncsa_response = self.ACK_SCBD.get_components_for_timed_ack(ack_id) if ncsa_response: if ncsa_response['ACK_BOOL'] == True: #inform forwarders fwd_ack_id = self.get_next_timed_ack_id('FORWARDER_READOUT') for forwarder in forwarders: name = self.FWD_SCBD.get_value_for_forwarder(forwarder, NAME) routing_key = self.FWD_SCBD.get_routing_key(forwarder) msg_params = {} msg_params[MSG_TYPE] = 'FORWARDER_READOUT' msg_params[JOB_NUM] = job_number msg_params['ACK_ID'] = fwd_ack_id self.FWD_SCBD.set_forwarder_state(forwarder, START_READOUT) self._publisher.publish_message(routing_key, yaml.dump(msg_params)) self.ack_timer(4) forwarder_responses = self.ACK_SCBD.get_components_for_timed_ack(fwd_ack_id) if len(forwarder_responses) == len(forwarders): dmcs_params = {} dmcs_params[MSG_TYPE] = 'READOUT_ACK' dmcs_params[JOB_NUM] = job_number dmcs_params['ACK_BOOL'] = True dmcs_params['COMMENT'] = "Readout begun at %s" % get_timestamp() self._publisher.publish_message('dmcs_consume', yaml.dump(dmcs_params)) else: #send problem with ncsa to DMCS dmcs_params = {} dmcs_params[MSG_TYPE] = 'READOUT_ACK' dmcs_params[JOB_NUM] = job_number dmcs_params['ACK_BOOL'] = False dmcs_params['COMMENT'] = 'Readout Failed: Problem at NCSA - Expected Distributor Acks is %s, Number of Distributor Acks received is %s' % (ncsa_response['EXPECTED_DISTRIBUTOR_ACKS'], ncsa_response['RECEIVED_DISTRIBUTOR_ACKS']) self._base_publisher.publish_message('dmcs_consume', yaml.dump(dmcs_params)) else: #send 'no response from ncsa' to DMCS ) dmcs_params = {} dmcs_params[MSG_TYPE] = 'READOUT_ACK' dmcs_params[JOB_NUM] = job_number dmcs_params['ACK_BOOL'] = False dmcs_params['COMMENT'] = "Readout Failed: No Response from NCSA" self._base_publisher.publish_message('dmcs_consume', yaml.dump(dmcs_params)) def process_ack(self, params): self.ACK_SCBD.add_timed_ack(params) def get_next_timed_ack_id(self, ack_type): self._next_timed_ack_id = self._next_timed_ack_id + 1 retval = ack_type + "_" + str(self._next_timed_ack_id).zfill(6) return retval def ack_timer(self, seconds): sleep(seconds) return True def purge_broker(self, queues): for q in queues: cmd = "rabbitmqctl -p /tester purge_queue " + q os.system(cmd)
from Consumer import Consumer ConsumerMedium = Consumer('ConsumerMedium', '3') ConsumerMedium.run()
class Premium: def __init__(self): logging.basicConfig() #os.system('rabbitmqctl -p /tester purge_queue firehose') #os.system('rabbitmqctl -p /tester purge_queue ack_publish') self.sp2 = SimplePublisher( 'amqp://*****:*****@141.142.238.160:5672/%2Fbunny', "YAML") time.sleep(3) broker_url = 'amqp://*****:*****@141.142.238.160:5672/%2Fbunny' #broker_url = 'amqp://*****:*****@141.142.208.191:5672/%2Ftester' #broker_url = 'amqp://*****:*****@141.142.208.191:5672/%2Fbunny' #self._cons = FirehoseConsumer(broker_url, 'firehose', "YAML") self._cons = Consumer(broker_url, 'f1_consume', "YAML") #self._cons = Consumer(broker_url, 'pp_foreman_consume', "YAML") self._cons2 = Consumer(broker_url, 'ncsa_consume', "YAML") try: _thread.start_new_thread(self.do_it, ( "thread-1", 2, )) except e: print("Cannot start thread") print(e) try: _thread.start_new_thread(self.do_it2, ( "thread-2", 2, )) except e: print("Cannot start thread") print(e) def mycallback(self, ch, methon, properties, body): print(" ") print("+++++++++++++=========++++++++++++++++") print(" f1_consume msg:") print(body) def mycallback2(self, ch, methon, properties, body): print(" ") print(">>>>>>>>>>>>>>><<<<<<<<<<<<<<<<") print(" f2_consume msg:") print(body) if body['MSG_TYPE'] == 'NEXT_VISIT': return msg = {} msg['ACK_ID'] = body['ACK_ID'] msg['MSG_TYPE'] = 'NCSA_START_INTEGRATION_ACK' msg['COMPONENT_NAME'] = 'NCSA_FOREMAN' fwdrs = copy.deepcopy(body['FORWARDERS']) pp = pprint.PrettyPrinter(indent=2) print("In callback2, fwdrs dict is:") pp.pprint(fwdrs) fwdrs_keys = list(fwdrs.keys()) i = 1 for fwdr in fwdrs_keys: dists = {} dists['FQN'] = "Distributor_" + str(i) dists['NAME'] = "D" + str(i) dists['HOSTNAME'] = "D" + str(i) dists['TARGET_DIR'] = "/dev/null" dists['IP_ADDR'] = "141.142.237.16" + str(i) fwdrs[fwdr]['DISTRIBUTOR'] = dists i = i + 1 #for fwdr in fwdrs_keys: # dists = {} # dists[fwdr] = {} # dists[fwdr]['FQN'] = "Distributor_" + str(i) # dists[fwdr]['NAME'] = "D" + str(i) # dists[fwdr]['HOSTNAME'] = "D" + str(i) # dists[fwdr]['TARGET_DIR'] = "/dev/null" # dists[fwdr]['IP_ADDR'] = "141.142.237.16" + str(i) # fwdrs[fwdr]['DISTRIBUTOR'] = dists # i = i + 1 msg['PAIRS'] = fwdrs msg['ACK_BOOL'] = True msg['JOB_NUM'] = body['JOB_NUM'] msg['IMAGE_ID'] = body['IMAGE_ID'] msg['VISIT_ID'] = body['VISIT_ID'] msg['SESSION_ID'] = body['SESSION_ID'] self.sp2.publish_message("pp_foreman_ack_publish", msg) def do_it(self, threadname, delay): print("Before run call") self._cons.run(self.mycallback) def do_it2(self, threadname, delay): print("Before run call") self._cons2.run(self.mycallback2)
class BaseForeman(Foreman): # BaseForeman receives messages from DMCS # and coordinates with forwarders and NCSAForeman. PROGRAM_NAME = "BASE" def __init__(self): printc("Starting...") # Job Scoreboard self._sb_job = Scoreboard(SCOREBOARD_DB_JOB, PROGRAM_NAME, 'NONE') # ACK Scoreboard self._sb_ack = Scoreboard(SCOREBOARD_DB_ACK, PROGRAM_NAME, 'NONE') # Clean redis job and ack database, done only by BaseForeman right at the start self._sb_job.flush_db() # Start internal job number at 0 self._sb_job.reset_internal_job() self._machine_prefix = 'F:' self._machine_publish_q = Q_FORW_PUBLISH self._machine_consume_q = Q_FORW_CONSUME # Types of messages we expect to recieve from each kind of queue # DMCS messages self._msg_actions_dmcs = { 'JOB': self.process_dmcs_job, 'STANDBY': self.process_dmcs_standby, 'READOUT': self.process_dmcs_readout, 'CANCEL': self.process_dmcs_cancel } # NCSA messages self._msg_actions_ncsa = { 'None' } # ACK messages self._msg_actions_ack = { 'ACK_RECEIVED': self.process_ack_received } # Run parent init, starts forwarder scoreboard and consumer self.parent_init(SCOREBOARD_DB_FORW, PROGRAM_NAME, REGISTER_FORWARDER) # DMCS consumer gets messages about new jobs and changing job states printc("Creating DMCS consumer...") self._dmcs_consumer = Consumer(self._broker_url, Q_DMCS_PUBLISH) try: thread.start_new_thread(self.run_dmcs_consumer, ()) except: printc("Thread run_dmcs_consumer failed, quitting...") sys.exit() # NCSA Foreman is monitoring the distributors and reports pairings printc("Creating NCSA consumer...") self._ncsa_consumer = Consumer(self._broker_url, Q_NCSA_PUBLISH) try: thread.start_new_thread(self.run_ncsa_consumer, ()) except: printc("Thread run_ncsa_consumer failed, quitting...") sys.exit() # ACK consumer gets messages from the forwarders and NCSA confirming # if the orginial message was received printc("Creating ACK consumer...") self._ack_consumer = Consumer(self._broker_url, Q_ACK_PUBLISH) try: thread.start_new_thread(self.run_ack_consumer, ()) except: printc("Thread run_ack_consumer failed, quitting...") sys.exit() return # Message consumer functions, each blocks while waiting for a new message def run_dmcs_consumer(self): printc("DMCS message consumer is running...") self._dmcs_consumer.run(self.on_dmcs_message) printc("Exiting dmcs consumer") return def run_ncsa_consumer(self): printc("NCSA message consumer is running...") self._ncsa_consumer.run(self.on_ncsa_message) return def run_ack_consumer(self): printc("ACK message consumer is running...") self._ack_consumer.run(self.on_ack_message) return # Forwarder-specific messaging def process_transfer_done(self, msg_params): # Forwarder is finished with its current job forw_finished = msg_params[NAME] # Only continue if the forw reported finish on the job it is on if (msg_params[JOB_NUM] == self._sb_mach.get_machine_job_num(forw_finished)): self._sb_mach.change_machine_status_to_idle(forw_finished) cur_workers = int(self._sb_job.get_job_value(msg_params[JOB_NUM], 'ASSIGNED_WORKERS')) - 1 self._sb_job.add_job_value(msg_params[JOB_NUM], 'ASSIGNED_WORKERS', cur_workers) # If this was the last working, the just is finished if 0 == cur_workers: self._sb_job.add_job_value(msg_params[JOB_NUM], 'TIME_FINISHED', self._sb_job._redis.time()[0]) self._sb_job.add_job_value(msg_params[JOB_NUM], 'STATUS', 'INACTIVE') self._sb_job.set_job_state(msg_params[JOB_NUM], FINISHED) return # DMCS messaging def on_dmcs_message(self, ch, method, properties, body): # Consumer callback function msg_dict = yaml.load(body) try: af_handler = self._msg_actions_dmcs.get(msg_dict[MSG_TYPE]) except: printc("Bad DMCS message received...") ch.basic_ack(delivery_tag=method.delivery_tag) return af_handler(msg_dict) ch.basic_ack(delivery_tag=method.delivery_tag) return def process_dmcs_job(self, msg_params): # DMCS is sending a new job # DMCS's job number external_job_val = str(msg_params[JOB_NUM]) # BaseForeman's internal job value current_job = self._sb_job.new_job() current_raft = int(msg_params[RAFT_NUM]) # Add the current job to the Job Scoreboard self._sb_job.add_job_value(current_job, 'TIME_STARTED', self._sb_job._redis.time()[0]) self._sb_job.add_job_value(current_job, RAFTS, current_raft) self._sb_job.add_job_value(current_job, 'EXT_JOB_NUM', external_job_val) self._sb_job.add_job_value(current_job, 'STATUS', 'ACTIVE') self._sb_job.set_job_state(current_job, CHECKING_RESOURCES) self._sb_job.add_job(current_job) # Check if the forwarders needed are available # Number of rafts is number of forwarders we need forwarders_needed = current_raft # Get the total amount of idle forwarders num_healthy_forwarders = self._sb_mach.count_idle(LIST_FORWARDERS) printc("DMCS is requesting %d pairs, have %d pairs available." % (forwarders_needed, num_healthy_forwarders)) # If there are not enough forwarders then # tell DMCS we cannot accept this job right now if forwarders_needed > num_healthy_forwarders: self._sb_job.set_job_state(current_job, 'JOB_FAILED_INSUF_FORWDRS') self._sb_job.add_job_value(current_job, 'TIME_FAILED', self._sb_job._redis.time()[0]) # Send message to DMCS that we cannot do this job yet. failed_msg = {} failed_msg[MSG_TYPE] = INSUFFICIENT_FORWARDERS failed_msg[JOB_NUM] = msg_params[JOB_NUM] failed_msg[NEEDED_WORKERS] = str(forwarders_needed) failed_msg[AVAILABLE_FORWARDERS] = str(num_healthy_forwarders) self._publisher.publish_message(Q_DMCS_CONSUME, yaml.dump(failed_msg)) return # Otherwise, we have the needed forwarders. # Now we need to ask NCSA if they have the needed distributors else: # Get number of forwarders we need and set them to BUSY from IDLE forw_list = self._sb_mach.get_idle_list(LIST_FORWARDERS, forwarders_needed, current_job) # Update the Job Scoreboard self._sb_job.add_job_value(current_job, 'AVAIL_FORW', num_healthy_forwarders) self._sb_job.add_job_value(current_job, 'FORW_NEEDED', forwarders_needed) self._sb_job.set_job_state(current_job, 'WAITING_FOR_NCSA_RESP') # Send NCSA a Job Request # This is a heads up, we do not check for a response to this job_request = {} job_request[MSG_TYPE] = JOB_REQUEST printc("Sending Job Request to NCSA...") self._publisher.publish_message(Q_NCSA_CONSUME, yaml.dump(job_request)) # Send forwarders New Job message (i.e. Health Check) # This is a timed event timer_id = 'ACK:1_Health:' + current_job new_job = {} new_job[MSG_TYPE] = JOB new_job[ACK_ID] = timer_id new_job[ACK_TYPE] = HEALTH new_job[JOB_NUM] = current_job printc("Sending New Job Request to the Forwarders...") for forw in forw_list: routing_key = forw + "_consume" self._publisher.publish_message(routing_key, yaml.dump(new_job)) printc("Starting the New Job Request Timer") if not self.timer(3, timer_id, forw_list): printc("Timer Expired without all Forwarders reporting...") # Check Ack SB to see which ones did not report # Tell DMCS we cannot do this job failed_msg = {} failed_msg[MSG_TYPE] = INSUFFICIENT_FORWARDERS failed_msg[JOB_NUM] = msg_params[JOB_NUM] failed_msg[NEEDED_WORKERS] = str(forwarders_needed) failed_msg[AVAILABLE_FORWARDERS] = str(self._sb_ack.count_ack(timer_id, forw_list)) self._publisher.publish_message(Q_DMCS_CONSUME, yaml.dump(failed_msg)) return # Send NCSA a distributor request message # This is a timed event # Must reset the global dict timer_id = 'ACK:2_Dist_req:' + current_job global glb_pair_list glb_pair_list = None ncsa_dist_request = {} ncsa_dist_request[MSG_TYPE] = DISTRIBUTOR_REQUEST ncsa_dist_request[DIST_NEEDED] = forwarders_needed ncsa_dist_request['FORW_LIST'] = forw_list ncsa_dist_request[JOB_NUM] = msg_params[JOB_NUM] ncsa_dist_request[ACK_ID] = timer_id ncsa_dist_request[ACK_TYPE] = 'PAIR' self._publisher.publish_message(Q_NCSA_CONSUME, yaml.dump(ncsa_dist_request)) printc("Starting Timer for NCSA Reporting...") if not self.timer(2, timer_id, {'PAIRING'}): printc("Timer Expired without NCSA reporting") # Tell DMCS we reject the job failed_msg = {} failed_msg[MSG_TYPE] = 'NO_NCSA_RESP_TO_DIST_REQ' failed_msg[JOB_NUM] = msg_params[JOB_NUM] self._publisher.publish_message(Q_DMCS_CONSUME, yaml.dump(failed_msg)) return # Check if pair list is still set to None # This means NCSA responded with ACK_BOOL as FALSE if glb_pair_list is None: printc("No pair list, NCSA didn't have enough distributors.") # NCSA did not have enough distributors available, reject this job for now current_job = msg_params[JOB_NUM] forw_list = self._sb_mach.machine_find_all_m(LIST_FORWARDERS, current_job) # Set forwarders we reserved for this job from BUSY back to IDLE self._sb_mach.set_list_to_idle(forw_list) self._sb_job.set_job_state(current_job, 'STANDBY_JOB_DENIED_INSUF_DIST') self._sb_job.add_job_value(current_job, 'TIME_FAILED', self._sb_job._redis.time()[0]) self._sb_job.add_job_value(current_job, 'STATUS', 'INACTIVE') # Tell DMCS we reject the job failed_msg = {} failed_msg[MSG_TYPE] = 'INSUFFICIENT_DISTRIBUTORS' failed_msg[JOB_NUM] = msg_params[JOB_NUM] self._publisher.publish_message(Q_DMCS_CONSUME, yaml.dump(failed_msg)) return # Quick sanity check to make sure we # got back the number we asked for if len(glb_pair_list) != forwarders_needed: printc("Invalid pair list, failed to accept job.") # Tell DMCS we reject the job failed_msg = {} failed_msg[MSG_TYPE] = 'INVALID_PAIR_LIST' failed_msg[JOB_NUM] = msg_params[JOB_NUM] self._publisher.publish_message(Q_DMCS_CONSUME, yaml.dump(failed_msg)) return printc("Pair list is %r" % glb_pair_list) # Update Job scoreboard self._sb_job.add_job_value(current_job, 'ASSIGNED_WORKERS', len(glb_pair_list)) self._sb_job.add_job_value(current_job, 'PAIRS', glb_pair_list) # Report to DMCS that we accept the job accept_job_msg = {} accept_job_msg[MSG_TYPE] = 'JOB_ACCEPTED' accept_job_msg[JOB_NUM] = msg_params[JOB_NUM] self._publisher.publish_message(Q_DMCS_CONSUME, yaml.dump(accept_job_msg) ) self._sb_job.set_job_state(current_job, 'JOB_ACCEPTED') return def process_dmcs_standby(self, msg_params): # A job has been moved to the STANDBY step. # Forwarders need to start pulling header data and # distributors need to be ready to catch. external_job_val = str(msg_params[JOB_NUM]) # Get the internal job value that correlates with this external job current_job = self._sb_job.find_ext_job(external_job_val) if "NOT_FOUND" == current_job: printc("External job %s is not on the job scoreboard." % external_job_val) printc("STANDBY failed.") return # Generate a file name (for testing only) xfer_file_main = ("%04d%02d%02d-%02d%02d%02d-%06d" % (datetime.datetime.today().year, datetime.datetime.today().month, datetime.datetime.today().day, datetime.datetime.today().hour, datetime.datetime.today().minute, datetime.datetime.today().second, datetime.datetime.today().microsecond)) # Update job scoreboard self._sb_job.add_job_value(current_job, XFER_FILE, xfer_file_main) self._sb_job.set_job_state(current_job, 'STANDBY_FORW_DIST_ALERTING') # Alert NCSA Foreman this job is entering STANDBY # This is a timed event timer_id = 'ACK:3_Standby:' + current_job ncsa_standby_alert = {} ncsa_standby_alert[MSG_TYPE] = STANDBY ncsa_standby_alert[JOB_NUM] = current_job ncsa_standby_alert[XFER_FILE] = xfer_file_main ncsa_standby_alert[ACK_ID] = timer_id ncsa_standby_alert[ACK_NAME] = 'STANDBY' printc("Telling NCSA we are moving to STANDBY") self._publisher.publish_message(Q_NCSA_CONSUME, yaml.dump(ncsa_standby_alert)) # Send STANDBY to all the forwarders in this job global glb_pair_list # pairs = self._sb_mach.machine_find_all_pairs(current_job) pairs = glb_pair_list forwarders = pairs.keys() for forwarder in forwarders: printc("Sending %s standby..." % forwarder) fw_msg = {} fw_msg[MSG_TYPE] = STANDBY fw_msg[MATE] = pairs[forwarder] fw_msg[JOB_NUM] = current_job fw_msg[XFER_FILE] = string.replace(xfer_file_main + '_' + pairs[forwarder] + '.raw', "D:", "") fw_msg[ACK_ID] = timer_id fw_msg[ACK_TYPE] = STANDBY routing_key = forwarder + "_consume" self._publisher.publish_message(routing_key, yaml.dump(fw_msg)) # Append STANDBY to the expected acks list forwarders.append('NCSA_STANDBY') printc("Starting the STANDBY timer") if not self.timer(4, timer_id, forwarders): printc("Timer Expired without NCSA and FORWARDERS reporting in STANDBY") # Check ACK SB to see who did not report if not self._sb_ack.check_ack(timer_id, 'NCSA_STANDBY'): failed_msg = {} failed_msg[MSG_TYPE] = 'NO_NCSA_RESP_TO_STANDBY' failed_msg[JOB_NUM] = msg_params[JOB_NUM] self._publisher.publish_message(Q_DMCS_CONSUME, yaml.dump(failed_msg)) return else: failed_msg = {} failed_msg[MSG_TYPE] = 'MISSING_FORW_STANDBY' failed_msg[JOB_NUM] = msg_params[JOB_NUM] failed_msg['MISSING'] = str(self._sb_ack.missing_acks(timer_id, forwarders)) self._publisher.publish_message(Q_DMCS_CONSUME, yaml.dump(failed_msg)) return # Report to DMCS that the job is still good to go standby_job_msg = {} standby_job_msg[MSG_TYPE] = 'STANDBY_COMPLETE' standby_job_msg[JOB_NUM] = msg_params[JOB_NUM] self._publisher.publish_message(Q_DMCS_CONSUME, yaml.dump(standby_job_msg) ) self._sb_job.set_job_state(current_job, 'STANDBY') return def process_dmcs_readout(self, msg_params): # A job has been moved to the READOUT step. # Forwarders need to pull images from the camera buffer, # append the data and send the image files to the distributors external_job_val = str(msg_params[JOB_NUM]) # Get the internal job value that correlates with this external job current_job = self._sb_job.find_ext_job(external_job_val) if "NOT_FOUND" == current_job: printc("External job %s is not on the job scoreboard." % external_job_val) printc("READOUT failed.") return # If job was not in STANDBY, recover by calling that function first if 'STANDBY' != self._sb_job.get_job_state(current_job): printc("READOUT without STANDBY for Job ID %s, calling STANDBY first." % current_job) self.process_dmcs_standby(msg_params) printc("READOUT processing for Job ID %s." % current_job) self._sb_job.set_job_state(current_job, 'READOUT') # Alert NCSA Foreman we are entering READOUT # This is a timed event timer_id = 'ACK:4_Readout:' + current_job ncsa_readout_alert = {} ncsa_readout_alert[MSG_TYPE] = READOUT ncsa_readout_alert[JOB_NUM] = current_job ncsa_readout_alert[ACK_ID] = timer_id ncsa_readout_alert[ACK_TYPE] = READOUT self._publisher.publish_message(Q_NCSA_CONSUME, yaml.dump(ncsa_readout_alert)) printc("Starting the NCSA READOUT timer...") if not self.timer(4, timer_id, {'READOUT'} ): printc("Timer expired without NCSA reporting in READOUT") failed_msg = {} failed_msg[MSG_TYPE] = 'NO_NCSA_RESP_TO_READOUT' failed_msg[JOB_NUM] = msg_params[JOB_NUM] self._publisher.publish_message(Q_DMCS_CONSUME, yaml.dump(failed_msg)) return # Send READOUT to forwarders # This is a timed event # In this prototype, we will wait for forwarders acks # instead of Condor, since we do not have that created timer_id = 'ACK:5_Condor:' + current_job pairs = self._sb_mach.machine_find_all_pairs(current_job) printc("%r" % pairs) forwarders = pairs.keys() for forwarder in forwarders: printc("Sending %s readout..." % forwarder) fw_msg = {} fw_msg[MSG_TYPE] = READOUT fw_msg[JOB_NUM] = msg_params[JOB_NUM] fw_msg[ACK_ID] = timer_id fw_msg[ACK_TYPE] = READOUT routing_key = forwarder + "_consume" self._publisher.publish_message(routing_key, yaml.dump(fw_msg)) printc("Starting the timer for Condor ACK... ") # For now it is just waiting for the acks from the forwarders if not self.timer(4, timer_id, forwarders): printc("Timer Expired without Condor reporting in READOUT") # Can check ACK_SB to see who did not report failed_msg = {} failed_msg[MSG_TYPE] = 'MISSING_CONDOR_RESP' failed_msg[JOB_NUM] = msg_params[JOB_NUM] failed_msg[NEEDED_WORKERS] = str(forwarders_needed) failed_msg['MISSING'] = str(self._sb_ack.missing_acks(timer_id, forwarders)) self._publisher.publish_message(Q_DMCS_CONSUME, yaml.dump(failed_msg)) return # Report to DMCS that the job was completed completed_job_msg = {} completed_job_msg[MSG_TYPE] = 'JOB_COMPLETE' completed_job_msg[JOB_NUM] = msg_params[JOB_NUM] self._publisher.publish_message(Q_DMCS_CONSUME, yaml.dump(completed_job_msg) ) self._sb_job.set_job_state(current_job, 'JOB_COMPLETE') return def process_dmcs_cancel(self, msg_params): # Job was canceled, attempt to stop it. job_to_stop = self._sb_job.find_ext_job(msg_params[JOB_NUM]) if "NOT_FOUND" == job_to_stop: printc("External job %s is not on the job scoreboard." % external_job_val) printc("CANCEL failed.") return cur_state = self._sb_job.get_job_value(job_to_stop, 'STATE') if FINISHED == cur_state: printc("Can't cancel this job, it is already done.") return printc("Canceling external job %s (internal job %s)..." % (msg_params[JOB_NUM], job_to_stop)) self._sb_job.set_job_state(job_to_stop, 'JOB_CANCELED') self._sb_job.add_job_value(job_to_stop, 'TIME_CANCELED', self._sb_job._redis.time()[0]) self._sb_job.add_job_value(job_to_stop, 'STATUS', 'INACTIVE') # Tell NCSA Foreman we are canceling this job stop_msg = {} stop_msg[MSG_TYPE] = 'CANCEL' stop_msg[JOB_NUM] = str(job_to_stop) self._publisher.publish_message(Q_NCSA_CONSUME, yaml.dump(stop_msg)) # Tell forwarders we are canceling this job list_of_q = self._sb_mach.machine_find_job(LIST_FORWARDERS, job_to_stop) for q in list_of_q: self._publisher.publish_message(q, yaml.dump(stop_msg)) printc("Job canceled. (Hopefully)") return # NCSA messaging def on_ncsa_message(self, ch, method, properties, body): # Consumer callback Function msg_dict = yaml.load(body) af_handler = self._msg_actions_ncsa.get(msg_dict[MSG_TYPE]) af_handler(msg_dict) ch.basic_ack(delivery_tag=method.delivery_tag) return # Acknowledgment messaging def on_ack_message(self, ch, method, properties, body): msg_dict = yaml.load(body) af_handler = self._msg_actions_ack.get(msg_dict.get(MSG_TYPE)) if af_handler is not None: af_handler(msg_dict) ch.basic_ack(delivery_tag = method.delivery_tag) return def process_ack_received(self, msg_params): ack_id = msg_params.get(ACK_ID) ack_type = msg_params.get(ACK_TYPE) ack_name = msg_params.get(ACK_NAME) ack_bool = msg_params.get(ACK_BOOL) printc("Received ACK with name %s with ID %s" % (ack_name, ack_id)) # Update ACK Scoreboard if not self._sb_ack.update_ack(ack_id, ack_name): printc("Unable to add the ack...") if PAIRS in msg_params and ack_bool: printc("Updating the pair list") global glb_pair_list glb_pair_list = msg_params.get(PAIRS) return def timer(self, delay, ack_id, ack_expected): count = delay * TIMER_PRECISION while (count and not self._sb_ack.check_ack(ack_id, ack_expected) ): time.sleep(1/float(TIMER_PRECISION)) count = count - 1 return self._sb_ack.check_ack(ack_id, ack_expected)
from Consumer import Consumer ConsumerHigh= Consumer('ConsumerHigh','3') ConsumerHigh.run()
class DMCS: # This DMCS simulator is how we interact with the test system. # Commands can be sent manually to simulate how to DMCS might work. def __init__(self): printc("Setting up DMCS...") self._options = "\ 1 - (READY) Send Job Information\n\ 2 - (SET) Send Standby Message\n\ 3 - (GO) Send Readout Message\n\ 4 - (RESET) Cancel a Job\n\ 0 - (EXIT) Quit DMCS Simulator\n" self._broker_url = 'amqp://' + AMQP_DMCS_USER + ':' + AMQP_DMCS_PSWD + '@' + AMQP_BROKER_ADDR + ':' + AMQP_BROKER_PORT + '/' + AMQP_BROKER_VHOST printc('Using broker url: %s' % self._broker_url) printc("Declaring and binding exchanges...") printc("Attempting to create a consumer for the '%s' queue." % (Q_DMCS_CONSUME)) self._dmcs_consumer = Consumer(self._broker_url, Q_DMCS_CONSUME) try: printc("Attempting to start the consumer thread...") thread.start_new_thread(self.run_dmcs_consumer, ()) except: printc("Failed to start consumer thread, quitting...") sys.exit() printc("Done setting up consumer thread.") printc("Setting up publisher...") self._publisher = SimplePublisher(self._broker_url) printc("Done creating publisher.") self._job_msg = {} self._job_msg['MSG_TYPE'] = 'JOB' self._job_msg['JOB_NUM'] = 0 self._job_msg['RAFT_NUM'] = 1 self._standby_msg = {} self._standby_msg['MSG_TYPE'] = 'STANDBY' self._readout_msg = {} self._readout_msg['MSG_TYPE'] = 'READOUT' self._stop_msg = {} self._stop_msg['MSG_TYPE'] = 'CANCEL' self._shutdown_msg = {} self._shutdown_msg['MSG_TYPE'] = 'SHUTDOWN' def on_dmcs_messages(self, ch, method, properties, body): msg_dict = yaml.load(body) printc("Received: %r" % msg_dict) ch.basic_ack(delivery_tag=method.delivery_tag) return def run_dmcs_consumer(self): self._dmcs_consumer.run(self.on_dmcs_messages) return def run(self): keep_running = True while keep_running: try: user_input = int(raw_input(self._options)) except: user_input = -1 # New Job if 1 == user_input: good_input = True try: new_raft_num = int(raw_input("How many pairs? : ")) except: good_input = False printc("Bad input...") if good_input: self._job_msg['JOB_NUM'] = self._job_msg['JOB_NUM'] + 1 self._job_msg['RAFT_NUM'] = new_raft_num self._publisher.publish_message(Q_DMCS_PUBLISH, yaml.dump(self._job_msg)) pass # Standby elif 2 == user_input: good_input = True try: new_job_num = int(raw_input("STANDBY which job? : ")) except: good_input = False printc("Bad input...") if good_input: self._standby_msg['JOB_NUM'] = str(new_job_num) self._publisher.publish_message(Q_DMCS_PUBLISH, yaml.dump(self._standby_msg)) pass # Readout elif 3 == user_input: good_input = True try: new_job_num = int(raw_input("READOUT which job? : ")) except: good_input = False printc("Bad input...") if good_input: self._readout_msg['JOB_NUM'] = str(new_job_num) self._publisher.publish_message(Q_DMCS_PUBLISH, yaml.dump(self._readout_msg)) pass # Cancel elif 4 == user_input: good_input = True try: job_cancel = int(raw_input("Cancel which job? : ")) except: good_input = False printc("Bad input...") if good_input: self._stop_msg['JOB_NUM'] = job_cancel self._publisher.publish_message(Q_DMCS_PUBLISH, yaml.dump(self._stop_msg)) pass # Exit elif 0 == user_input: keep_running = False else: printc("Invalid input...\n") return
class NCSAForeman(Foreman): # NCSAForeman receives messages from BaseForeman # and coordinates with distributors PROGRAM_NAME = "NCSA" def __init__(self): printc("Starting...") custom_print.define_new_name(self.PROGRAM_NAME) self._sb_mach = Scoreboard self._machine_prefix = 'D:' self._machine_publish_q = Q_DIST_PUBLISH self._machine_consume_q = Q_DIST_CONSUME # Messages we can recieve from BaseForeman self._msg_actions_bf = { JOB_REQUEST: self.process_bf_job_request, DISTRIBUTOR_REQUEST: self.process_bf_distributor_request, STANDBY: self.process_bf_standby, READOUT: self.process_bf_readout, CANCEL: self.process_bf_cancel, TRANSFER_DONE: self.process_bf_transfer_done } # Run parent init, starts distributor scoreboard and consumer self.parent_init(SCOREBOARD_DB_DIST, PROGRAM_NAME, REGISTER_DISTRIBUTOR) # BaseForeman consumer printc("Creating BaseForeman consumer...") self._bf_consumer = Consumer(self._broker_url, Q_NCSA_CONSUME) try: thread.start_new_thread(self.run_bf_consumer, ()) except: printc("Thread run_bf_consumer failed, quitting...") sys.exit() return def run_bf_consumer(self): printc("BaseForeman message consumer is running...") self._bf_consumer.run(self.on_bf_message) return # BaseForeman messaging def on_bf_message(self, ch, method, properties, body): # BaseForeman message consumer msg_dict = yaml.load(body) try: af_handler = self._msg_actions_bf.get(msg_dict[MSG_TYPE]) except: printc("Bad message received...") ch.basic_ack(delivery_tag=method.delivery_tag) return af_handler(msg_dict) ch.basic_ack(delivery_tag=method.delivery_tag) return def process_bf_job_request(self, msg_params): # NCSA would perform any clean up needed as this point to prepare # for a new job. printc("NCSA Foreman is online and ready for the Job") return def process_bf_distributor_request(self, msg_params): # BaseForeman wants some distributors for a job # Get the amount of distributors requested dist_needed = int(msg_params[DIST_NEEDED]) # Check how many we have available num_healthy_distributors = self._sb_mach.count_idle(LIST_DISTRIBUTORS) printc("%d pairs available, %d were requested." % (num_healthy_distributors, dist_needed)) # If we don't have enough in IDLE, report back that we don't have enough resources if dist_needed > num_healthy_distributors: printc("Not enough distributors.") insuff_dist_msg = {} insuff_dist_msg[MSG_TYPE] = ACK_RECEIVED insuff_dist_msg[JOB_NUM] = msg_params[JOB_NUM] insuff_dist_msg[PAIRS] = None insuff_dist_msg[ACK_ID] = msg_params[ACK_ID] insuff_dist_msg[ACK_NAME] = PAIRING insuff_dist_msg[ACK_BOOL] = False self._publisher.publish_message(Q_ACK_PUBLISH, yaml.dump(insuff_dist_msg)) # Otherwise, take the list of forwarders sent to us and pair each one # with an IDLE distributor and then send that pairing list back else: # Get a list that is the size we need of IDLE distributors self._dist_list = self._sb_mach.get_idle_list(LIST_DISTRIBUTORS, dist_needed, msg_params[JOB_NUM]) printc("Generating pairs list...") forw_list = msg_params['FORW_LIST'] current_pairs = {} counter = 0 for each in forw_list: current_pairs[each] = self._dist_list[counter] counter = counter + 1 # Send the pairing list back pair_msg = {} pair_msg[MSG_TYPE] = ACK_RECEIVED pair_msg[JOB_NUM] = msg_params[JOB_NUM] pair_msg[PAIRS] = current_pairs pair_msg[ACK_ID] = msg_params[ACK_ID] pair_msg[ACK_NAME] = PAIRING pair_msg[ACK_BOOL] = True self._publisher.publish_message(Q_ACK_PUBLISH, yaml.dump(pair_msg)) return def process_bf_standby(self, msg_params): # A job has moved to STANDBY job_num_tmp = str(msg_params[JOB_NUM]) # Get the name of the file the distributors will be receiving xfer_file_main = msg_params[XFER_FILE] # Confirm distributors are working on this job if self._sb_mach.machine_find_all_m_check(LIST_DISTRIBUTORS, job_num_tmp) > 0: # Make a list of all the distributors for this job distributors = self._sb_mach.machine_find_all_m(LIST_DISTRIBUTORS, job_num_tmp) for distributor in distributors: ds_msg = {} ds_msg[MSG_TYPE] = STANDBY ds_msg[XFER_FILE] = string.replace(xfer_file_main + '_' + distributor + '.raw', "D:", "") routing_key = distributor + "_consume" self._publisher.publish_message(routing_key, yaml.dump(ds_msg)) printc("Distributors have been sent the STANDBY message.") # Would probably wait and after getting ACK's from Distributors printc("Sending the STANDBY ACK...") ack_msg = {} ack_msg[ACK_ID] = msg_params[ACK_ID] ack_msg[ACK_NAME] = 'NCSA_STANDBY' ack_msg[MSG_TYPE] = ACK_RECEIVED ack_msg[JOB_NUM] = msg_params[JOB_NUM] self._publisher.publish_message(Q_ACK_PUBLISH, yaml.dump(ack_msg)) else: printc("No distributors are assigned to job %s, no STANDBY sent." % job_num_tmp) return def process_bf_readout(self, msg_params): # A job has moved to READOUT job_num_tmp = str(msg_params[JOB_NUM]) # Confirm there are distributors working on this job and alert them if self._sb_mach.machine_find_all_m_check(LIST_DISTRIBUTORS, job_num_tmp) > 0: distributors = self._sb_mach.machine_find_all_m(LIST_DISTRIBUTORS, job_num_tmp) for distributor in distributors: dist_start = {} dist_start[MSG_TYPE] = READOUT dist_start[JOB_NUM] = msg_params[JOB_NUM] routing_key = distributor + "_consume" self._publisher.publish_message(routing_key, yaml.dump(dist_start)) printc("Distributors have been sent the READOUT message.") printc("Sending the READOUT ACK...") ack_msg = {} ack_msg[ACK_ID] = msg_params[ACK_ID] ack_msg[ACK_NAME] = READOUT ack_msg[MSG_TYPE] = ACK_RECEIVED ack_msg[JOB_NUM] = msg_params[JOB_NUM] self._publisher.publish_message(Q_ACK_PUBLISH, yaml.dump(ack_msg)) else: printc("No distributors are assigned to job %s, no READOUT sent." % job_num_tmp) return def process_bf_cancel(self, msg_params): # A job was canceled # Get the job number being canceled job_to_stop = int(msg_params[JOB_NUM]) printc("Telling distributors to cancel job %d..." % job_to_stop) # Tell distributors to stop stop_msg = {} stop_msg[MSG_TYPE] = 'CANCEL' stop_msg[JOB_NUM] = str(job_to_stop) # Find the distributors on that job list_of_q = self._sb_mach.machine_find_job(LIST_DISTRIBUTORS, job_to_stop) for q in list_of_q: self._publisher.publish_message(q, yaml.dump(stop_msg)) return def process_bf_transfer_done(self, msg_params): # Distributors should know when they are done and tell us, # probably no need for Base Foreman to do it return
class AuditListener: def __init__(self, filename=None): if filename == None: file = 'L1SystemCfg.yaml' try: f = open(file) except IOError: print("Can't open %s" % file) raise L1Error self.cdm = yaml.safe_load(f) broker_address = self.cdm['ROOT']['BASE_BROKER_ADDR'] name = self.cdm['ROOT']['AUDIT_BROKER_NAME'] passwd = self.cdm['ROOT']['AUDIT_BROKER_PASSWD'] self.broker_url = "amqp://" + name + ":" + passwd + "@" + str( broker_address) self.influx_db = 'MMM' #self.influx_db = self.cdm['ROOT']['INFLUX_DB'] self.audit_format = "YAML" if 'AUDIT_MSG_FORMAT' in self.cdm['ROOT']: self.audit_format = self.cdm['ROOT']['AUDIT_MSG_FORMAT'] self.msg_actions = { 'ACK_SCOREBOARD_DB': self.process_ack_scbd, 'DIST_SCOREBOARD_DB': self.process_dist_scbd, 'FWD_SCOREBOARD_DB': self.process_fwd_scbd, 'JOB_SCOREBOARD_DB': self.process_job_scbd, 'DMCS_SCOREBOARD_DB': self.process_dmcs_scbd, 'BACKLOG_SCOREBOARD_DB': self.process_backlog_scbd, 'FOREMAN_ACK_REQUEST': self.process_foreman_ack_request } self.job_sub_actions = { 'SESSION': self.process_job_session, 'VISIT': self.process_job_visit, 'JOB_STATE': self.process_job_state, 'JOB_STATUS': self.process_job_status, 'JOB_PAIRS': self.process_job_pairs } self.influx_client = InfluxDBClient('localhost', 8086) self.influx_client.switch_database(self.influx_db) self.start_consumer(self.broker_url, self.audit_format) def start_consumer(self, broker_url, format): self.influx_consumer = Consumer(self.broker_url, "audit_consume", format) try: _thread.start_new_thread(self.run_influx_consumer, ( "thread-influx-consumer", 2, )) except: LOGGER.critical('Cannot start influx consumer thread, exiting...') sys.exit(99) def run_influx_consumer(self, threadname, delay): self.influx_consumer.run(self.on_influx_message) def on_influx_message(self, ch, method, properties, msg): #print "In audit, msg contents is: %s" % msg ch.basic_ack(method.delivery_tag) handler = self.msg_actions.get(msg['DATA_TYPE']) result = handler(msg) def process_ack_scbd(self, msg): L = [] tags_dict = {} tags_dict['ack_type'] = msg['SUB_TYPE'] tags_dict['component'] = msg['COMPONENT'] tags_dict['job'] = msg['JOB_NUM'] tags_dict['ack_id'] = msg['ACK_ID'] tags_dict['image_id'] = msg['IMAGE_ID'] fields_dict = {} fields_dict['ack_result'] = msg['ACK_BOOL'] if_dict = {} if_dict["measurement"] = 'acks' if_dict["time"] = msg['TIME'] if_dict["tags"] = tags_dict if_dict["fields"] = fields_dict L.append(if_dict) self.influx_client.write_points(L) def process_dist_scbd(self, body): pass def process_fwd_scbd(self, msg): pass def process_job_scbd(self, msg): handler = self.job_sub_actions.get(msg['SUB_TYPE']) result = handler(msg) def process_job_state(self, msg): L = [] tags_dict = {} tags_dict['job'] = msg['JOB_NUM'] tags_dict['session'] = msg['SESSION_ID'] tags_dict['visit'] = msg['VISIT_ID'] tags_dict['image_id'] = msg['IMAGE_ID'] fields_dict = {} fields_dict['state'] = msg['STATE'] if_dict = {} if_dict["measurement"] = msg['SUB_TYPE'] if_dict["time"] = msg['TIME'] if_dict["tags"] = tags_dict if_dict["fields"] = fields_dict L.append(if_dict) self.influx_client.write_points(L) def process_job_status(self, msg): L = [] tags_dict = {} tags_dict['job'] = msg['JOB_NUM'] tags_dict['session'] = msg['SESSION_ID'] tags_dict['visit'] = msg['VISIT_ID'] tags_dict['image_id'] = msg['IMAGE_ID'] fields_dict = {} fields_dict['status'] = msg['STATUS'] if_dict = {} if_dict["measurement"] = msg['SUB_TYPE'] if_dict["time"] = msg['TIME'] if_dict["tags"] = tags_dict if_dict["fields"] = fields_dict L.append(if_dict) self.influx_client.write_points(L) def process_job_session(self, msg): L = [] tags_dict = {} tags_dict['sessions'] = "wha?" fields_dict = {} fields_dict['session'] = msg['SESSION_ID'] if_dict = {} if_dict["measurement"] = msg['SUB_TYPE'] if_dict["time"] = msg['TIME'] if_dict["fields"] = fields_dict if_dict["tags"] = tags_dict L.append(if_dict) self.influx_client.write_points(L) def process_job_visit(self, msg): L = [] tags_dict = {} tags_dict['session'] = msg['SESSION_ID'] fields_dict = {} fields_dict['visit'] = msg['VISIT_ID'] if_dict = {} if_dict["measurement"] = msg['SUB_TYPE'] if_dict["time"] = msg['TIME'] if_dict["tags"] = tags_dict if_dict["fields"] = fields_dict L.append(if_dict) self.influx_client.write_points(L) def process_foreman_ack_request(self, msg): L = [] tags_dict = {} tags_dict['ack_type'] = msg['SUB_TYPE'] tags_dict['component'] = msg['COMPONENT'] fields_dict = {} fields_dict['ack_id'] = msg['ACK_ID'] if_dict = {} if_dict["measurement"] = msg['SUB_TYPE'] if_dict["time"] = msg['TIME'] if_dict["fields"] = fields_dict L.append(if_dict) self.influx_client.write_points(L) def process_job_pairs(self, msg): pass def process_dmcs_scbd(self, msg): pass def process_backlog_scbd(self, msg): pass def run(self): print("Starting AuditListener...") while (1): pass
class Machine: # Parent class for forwarders and distributors for their shared attributes. def __init__(self): try: # Create a lock for critical sections (used when canceling jobs) self._lock = thread.allocate_lock() self.child_init() # Create a temporary name until unique name is assigned c_name = ''.join(random.choice(string.ascii_letters) for x in range(NAME_LENGTH)) c_name = self._type + "_" + c_name custom_print.define_new_name(c_name) printc("Starting...") # Register the machine with foreman before doing anything else self._register = Registration(self._publish_queue, self._consume_queue) # Block until a unique name is received self._name = self._register.request_name() except: return try: # Debug print setup custom_print.define_new_name(self._name) # Internal variable of the current state self._current_state = IDLE # Messaging setup self._broker_url = "amqp://" + AMQP_MACH_USER + ":" + AMQP_MACH_PSWD + "@" + AMQP_BROKER_ADDR + ":" + AMQP_BROKER_PORT + "/" + AMQP_BROKER_VHOST self._consume_queue = self._name + "_consume" self._home_dir = XFER_DIR # Messages foreman can send to us self._msg_actions = { JOB: self.process_foreman_job, STANDBY: self.process_foreman_standby, READOUT: self.process_foreman_readout, CANCEL: self.process_foreman_cancel } # Publisher to send messages to foreman printc("Setting up publisher...") self._publisher = SimplePublisher(self._broker_url) # Consumer for getting messages from foreman printc("Setting up consumer...") self._consumer = Consumer(self._broker_url, self._consume_queue) except: pass # Run blocking consumer try: self.run_consumer() except: pass # Alert foreman this machine is shutting down self.deregister() return def run_consumer(self): # Consumer blocking function self._consumer.run(self.on_message) return def child_init(self): # Function that child can call override to add variables during init _type = 'UNSET' return def deregister(self): # Send message to foreman to deregister machine msg = {} msg[MSG_TYPE] = 'DEREGISTER' msg[NAME] = self._name self._publisher.publish_message(self._publish_queue, yaml.dump(msg)) return def state_update(self, key, field, value): # Send message to foreman about a state update msg = {} msg[MSG_TYPE] = 'STATE_UPDATE' msg['KEY'] = key msg['FIELD'] = field msg['VALUE'] = value self._publisher.publish_message(self._publish_queue, yaml.dump(msg)) return # Foreman messaging def on_message(self, ch, method, properties, body): # Consumer callback function printc("Processing message...") msg_dict = yaml.load(body) try: af_handler = self._msg_actions.get(msg_dict[MSG_TYPE]) except KeyError: printc("Invalid message received, cannot process.") ch.basic_ack(delivery_tag=method.delivery_tag) return af_handler(msg_dict) ch.basic_ack(delivery_tag=method.delivery_tag) # If there was an ACK_ID in this message, they want a response # Initial idea is ACK to be sent after the callback has been called if 'ACK_ID' in msg_dict: self.send_ack(msg_dict['ACK_ID'], msg_dict['ACK_TYPE']) return def send_ack(self, session_id, type = None): if session_id is None: return if type is None: type = 'UNDEFINED' ack_msg = {} ack_msg['MSG_TYPE'] = 'ACK_RECEIVED' ack_msg['ACK_ID'] = str(session_id) ack_msg['ACK_NAME'] = str(self._name) ack_msg['ACK_TYPE'] = str(type) ack_msg['ACK_BOOL'] = 'TRUE' self._publisher.publish_message(Q_ACK_PUBLISH, yaml.dump(ack_msg)) return def process_foreman_job(self, msg_params): return def process_foreman_standby(self, msg_params): return def process_foreman_readout(self, msg_params): return def process_foreman_cancel(self, msg_params): return