class BaseForeman: FWD_SCBD = None JOB_SCBD = None ACK_SCBD = None ACK_PUBLISH = "ack_publish" YAML = 'YAML' def __init__(self, filename=None): toolsmod.singleton(self) self._config_file = 'ForemanCfg.yaml' if filename != None: self._config_file = filename cdm = toolsmod.intake_yaml_file(self._config_file) try: self._base_name = cdm[ROOT][ BASE_BROKER_NAME] # Message broker user & passwd self._base_passwd = cdm[ROOT][BASE_BROKER_PASSWD] self._ncsa_name = cdm[ROOT][NCSA_BROKER_NAME] self._ncsa_passwd = cdm[ROOT][NCSA_BROKER_PASSWD] self._base_broker_addr = cdm[ROOT][BASE_BROKER_ADDR] self._ncsa_broker_addr = cdm[ROOT][NCSA_BROKER_ADDR] forwarder_dict = cdm[ROOT][XFER_COMPONENTS][FORWARDERS] except KeyError as e: print("Dictionary error") print("Bailing out...") sys.exit(99) if 'QUEUE_PURGES' in cdm[ROOT]: self.purge_broker(cdm['ROOT']['QUEUE_PURGES']) self._base_msg_format = self.YAML self._ncsa_msg_format = self.YAML if 'BASE_MSG_FORMAT' in cdm[ROOT]: self._base_msg_format = cdm[ROOT][BASE_MSG_FORMAT] if 'NCSA_MSG_FORMAT' in cdm[ROOT]: self._ncsa_msg_format = cdm[ROOT][NCSA_MSG_FORMAT] self._base_broker_url = 'amqp_url' self._ncsa_broker_url = 'amqp_url' self._next_timed_ack_id = 0 # Create Redis Forwarder table with Forwarder info self.FWD_SCBD = ForwarderScoreboard(forwarder_dict) self.JOB_SCBD = JobScoreboard() self.ACK_SCBD = AckScoreboard() self._msg_actions = { 'NEW_JOB': self.process_dmcs_new_job, 'READOUT': self.process_dmcs_readout, 'NCSA_RESOURCE_QUERY_ACK': self.process_ack, 'NCSA_STANDBY_ACK': self.process_ack, 'NCSA_READOUT_ACK': self.process_ack, 'FORWARDER_HEALTH_ACK': self.process_ack, 'FORWARDER_JOB_PARAMS_ACK': self.process_ack, 'FORWARDER_READOUT_ACK': self.process_ack, 'NEW_JOB_ACK': self.process_ack } self._base_broker_url = "amqp://" + self._base_name + ":" + self._base_passwd + "@" + str( self._base_broker_addr) self._ncsa_broker_url = "amqp://" + self._ncsa_name + ":" + self._ncsa_passwd + "@" + str( self._ncsa_broker_addr) LOGGER.info('Building _base_broker_url. Result is %s', self._base_broker_url) LOGGER.info('Building _ncsa_broker_url. Result is %s', self._ncsa_broker_url) self.setup_publishers() self.setup_consumers() #self._ncsa_broker_url = "" #self.setup_federated_exchange() def setup_consumers(self): """This method sets up a message listener from each entity with which the BaseForeman has contact here. These listeners are instanced in this class, but their run methods are each called as a separate thread. While pika does not claim to be thread safe, the manner in which the listeners are invoked below is a safe implementation that provides non-blocking, fully asynchronous messaging to the BaseForeman. The code in this file expects message bodies to arrive as YAML'd python dicts, while in fact, message bodies are sent on the wire as XML; this way message format can be validated, versioned, and specified in just one place. To make this work, there is an object that translates the params dict to XML, and visa versa. The translation object is instantiated by the consumer and acts as a filter before sending messages on to the registered callback for processing. """ LOGGER.info('Setting up consumers on %s', self._base_broker_url) LOGGER.info('Running start_new_thread on all consumer methods') self._dmcs_consumer = Consumer(self._base_broker_url, self.DMCS_PUBLISH, self._base_msg_format) try: _thread.start_new_thread(self.run_dmcs_consumer, ( "thread-dmcs-consumer", 2, )) except: LOGGER.critical('Cannot start DMCS consumer thread, exiting...') sys.exit(99) self._forwarder_consumer = Consumer(self._base_broker_url, self.FORWARDER_PUBLISH, self._base_msg_format) try: _thread.start_new_thread(self.run_forwarder_consumer, ( "thread-forwarder-consumer", 2, )) except: LOGGER.critical( 'Cannot start FORWARDERS consumer thread, exiting...') sys.exit(100) self._ncsa_consumer = Consumer(self._base_broker_url, self.NCSA_PUBLISH, self._base_msg_format) try: _thread.start_new_thread(self.run_ncsa_consumer, ( "thread-ncsa-consumer", 2, )) except: LOGGER.critical('Cannot start NCSA consumer thread, exiting...') sys.exit(101) self._ack_consumer = Consumer(self._base_broker_url, self.ACK_PUBLISH, self._base_msg_format) try: _thread.start_new_thread(self.run_ack_consumer, ( "thread-ack-consumer", 2, )) except: LOGGER.critical('Cannot start ACK consumer thread, exiting...') sys.exit(102) LOGGER.info('Finished starting all three consumer threads') def run_dmcs_consumer(self, threadname, delay): self._dmcs_consumer.run(self.on_dmcs_message) def run_forwarder_consumer(self, threadname, delay): self._forwarder_consumer.run(self.on_forwarder_message) def run_ncsa_consumer(self, threadname, delay): self._ncsa_consumer.run(self.on_ncsa_message) def run_ack_consumer(self, threadname, delay): self._ack_consumer.run(self.on_ack_message) def setup_publishers(self): LOGGER.info('Setting up Base publisher on %s using %s', self._base_broker_url, self._base_msg_format) LOGGER.info('Setting up NCSA publisher on %s using %s', self._ncsa_broker_url, self._ncsa_msg_format) self._base_publisher = SimplePublisher(self._base_broker_url, self._base_msg_format) self._ncsa_publisher = SimplePublisher(self._ncsa_broker_url, self._ncsa_msg_format) # def setup_federated_exchange(self): # # Set up connection URL for NCSA Broker here. # self._ncsa_broker_url = "amqp://" + self._name + ":" + self._passwd + "@" + str(self._ncsa_broker_addr) # LOGGER.info('Building _ncsa_broker_url. Result is %s', self._ncsa_broker_url) # pass def on_dmcs_message(self, ch, method, properties, body): ch.basic_ack(method.delivery_tag) #msg_dict = yaml.load(body) msg_dict = body LOGGER.info('In DMCS message callback') LOGGER.debug('Thread in DMCS callback is %s', _thread.get_ident()) LOGGER.info('Message from DMCS callback message body is: %s', str(msg_dict)) handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def on_forwarder_message(self, ch, method, properties, body): ch.basic_ack(method.delivery_tag) LOGGER.info('In Forwarder message callback, thread is %s', _thread.get_ident()) LOGGER.info('forwarder callback msg body is: %s', str(body)) pass def on_ncsa_message(self, ch, method, properties, body): ch.basic_ack(method.delivery_tag) LOGGER.info('In ncsa message callback, thread is %s', _thread.get_ident()) #msg_dict = yaml.load(body) msg_dict = body LOGGER.info('ncsa msg callback body is: %s', str(msg_dict)) handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def on_ack_message(self, ch, method, properties, body): ch.basic_ack(method.delivery_tag) msg_dict = body LOGGER.info('In ACK message callback') LOGGER.debug('Thread in ACK callback is %s', _thread.get_ident()) LOGGER.info('Message from ACK callback message body is: %s', str(msg_dict)) handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def process_dmcs_new_job(self, params): input_params = params needed_workers = len(input_params[RAFTS]) ack_id = self.forwarder_health_check(input_params) self.ack_timer( 7 ) # This is a HUGE num seconds for now..final setting will be milliseconds healthy_forwarders = self.ACK_SCBD.get_components_for_timed_ack( timed_ack) num_healthy_forwarders = len(healthy_forwarders) if needed_workers > num_healthy_forwarders: result = self.insufficient_base_resources(input_params, healthy_forwarders) return result else: healthy_status = { "STATUS": "HEALTHY", "STATE": "READY_WITHOUT_PARAMS" } self.FWD_SCBD.set_forwarder_params(healthy_forwarders, healthy_status) ack_id = self.ncsa_resources_query(input_params, healthy_forwarders) self.ack_timer(3) #Check ACK scoreboard for response from NCSA ncsa_response = self.ACK_SCBD.get_components_for_timed_ack(ack_id) if ncsa_response: pairs = {} ack_bool = None try: ack_bool = ncsa_response[ACK_BOOL] if ack_bool == True: pairs = ncsa_response[PAIRS] except KeyError as e: pass # Distribute job params and tell DMCS I'm ready. if ack_bool == TRUE: fwd_ack_id = self.distribute_job_params( input_params, pairs) self.ack_timer(3) fwd_params_response = self.ACK_SCBD.get_components_for_timed_ack( fwd_ack_id) if fwd_params_response and (len(fwd_params_response) == len(fwders)): self.JOB_SCBD.set_value_for_job( job_num, "STATE", "BASE_TASK_PARAMS_SENT") self.JOB_SCBD.set_value_for_job( job_num, "TIME_BASE_TASK_PARAMS_SENT", get_timestamp()) in_ready_state = {'STATE': 'READY_WITH_PARAMS'} self.FWD_SCBD.set_forwarder_params( fwders, in_ready_state) # Tell DMCS we are ready result = self.accept_job(job_num) else: #not enough ncsa resources to do job - Notify DMCS idle_param = {'STATE': 'IDLE'} self.FWD_SCBD.set_forwarder_params(healthy_forwarders, idle_params) result = self.insufficient_ncsa_resources(ncsa_response) return result else: result = self.ncsa_no_response(input_params) idle_param = {'STATE': 'IDLE'} self.FWD_SCBD.set_forwarder_params( list(forwarder_candidate_dict.keys()), idle_params) return result def forwarder_health_check(self, params): job_num = str(params[JOB_NUM]) raft_list = params['RAFTS'] needed_workers = len(raft_list) self.JOB_SCBD.add_job(job_num, needed_workers) self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ADDED", get_timestamp()) self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ADDED_E", get_epoch_timestamp()) LOGGER.info('Received new job %s. Needed workers is %s', job_num, needed_workers) # run forwarder health check # get timed_ack_id timed_ack = self.get_next_timed_ack_id("FORWARDER_HEALTH_CHECK_ACK") forwarders = self.FWD_SCBD.return_available_forwarders_list() # Mark all healthy Forwarders Unknown state_status = {"STATE": "HEALTH_CHECK", "STATUS": "UNKNOWN"} self.FWD_SCBD.set_forwarder_params(forwarders, state_status) # send health check messages ack_params = {} ack_params[MSG_TYPE] = FORWARDER_HEALTH_CHECK ack_params["ACK_ID"] = timed_ack ack_params[JOB_NUM] = job_num self.JOB_SCBD.set_value_for_job(job_num, "STATE", "BASE_RESOURCE_QUERY") self.JOB_SCBD.set_value_for_job(job_num, "TIME_BASE_RESOURCE_QUERY", get_timestamp()) audit_params = {} audit_params['DATA_TYPE'] = 'FOREMAN_ACK_REQUEST' audit_params['SUB_TYPE'] = 'FORWARDER_HEALTH_CHECK_ACK' audit_params['ACK_ID'] = timed_ack audit_parsms['COMPONENT_NAME'] = 'BASE_FOREMAN' audit_params['TIME'] = get_epoch_timestamp() for forwarder in forwarders: self._base_publisher.publish_message( self.FWD_SCBD.get_value_for_forwarder(forwarder, "CONSUME_QUEUE"), ack_params) return timed_ack def insufficient_base_resources(self, params, healthy_forwarders): # send response msg to dmcs refusing job job_num = str(params[JOB_NUM]) raft_list = params[RAFTS] ack_id = params['ACK_ID'] needed_workers = len(raft_list) LOGGER.info( 'Reporting to DMCS that there are insufficient healthy forwarders for job #%s', job_num) dmcs_params = {} fail_dict = {} dmcs_params[MSG_TYPE] = NEW_JOB_ACK dmcs_params[JOB_NUM] = job_num dmcs_params[ACK_BOOL] = False dmcs_params[ACK_ID] = ack_id ### NOTE FOR DMCS ACK PROCESSING: ### if ACK_BOOL == True, there will NOT be a FAIL_DETAILS section ### If ACK_BOOL == False, there will always be a FAIL_DICT to examine AND there will always be a ### BASE_RESOURCES inside the FAIL_DICT ### If ACK_BOOL == False, and the BASE_RESOURCES inside FAIL_DETAILS == 0, ### there will be only NEEDED and AVAILABLE Forwarder params - nothing more ### If ACK_BOOL == False and BASE_RESOURCES inside FAIL_DETAILS == 1, there will always be a ### NCSA_RESOURCES inside FAIL_DETAILS set to either 0 or 'NO_RESPONSE' ### if NCSA_RESPONSE == 0, there will be NEEDED and AVAILABLE Distributor params ### if NCSA_RESOURCES == 'NO_RESPONSE' there will be nothing else fail_dict['BASE_RESOURCES'] = '0' fail_dict[NEEDED_FORWARDERS] = str(needed_workers) fail_dict[AVAILABLE_FORWARDERS] = str(len(healthy_forwarders)) dmcs_params['FAIL_DETAILS'] = fail_dict self._base_publisher.publish_message("dmcs_consume", dmcs_params) # mark job refused, and leave Forwarders in Idle state self.JOB_SCBD.set_value_for_job(job_num, "STATE", "JOB_ABORTED") self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ABORTED_BASE_RESOURCES", get_timestamp()) idle_state = {"STATE": "IDLE"} self.FWD_SCBD.set_forwarder_params(healthy_forwarders, idle_state) return False def ncsa_resources_query(self, params, healthy_forwarders): job_num = str(params[JOB_NUM]) raft_list = params[RAFTS] needed_workers = len(raft_list) LOGGER.info('Sufficient forwarders have been found. Checking NCSA') self._pairs_dict = {} forwarder_candidate_dict = {} for i in range(0, needed_workers): forwarder_candidate_dict[healthy_forwarders[i]] = raft_list[i] self.FWD_SCBD.set_forwarder_status(healthy_forwarders[i], NCSA_RESOURCES_QUERY) # Call this method for testing... # There should be a message sent to NCSA here asking for available resources timed_ack_id = self.get_next_timed_ack_id("NCSA_Ack") ncsa_params = {} ncsa_params[MSG_TYPE] = "NCSA_RESOURCES_QUERY" ncsa_params[JOB_NUM] = job_num #ncsa_params[RAFT_NUM] = needed_workers ncsa_params[ACK_ID] = timed_ack_id ncsa_params["FORWARDERS"] = forwarder_candidate_dict self.JOB_SCBD.set_value_for_job(job_num, "STATE", "NCSA_RESOURCES_QUERY_SENT") self.JOB_SCBD.set_value_for_job(job_num, "TIME_NCSA_RESOURCES_QUERY_SENT", get_timestamp()) self._ncsa_publisher.publish_message(self.NCSA_CONSUME, ncsa_params) LOGGER.info( 'The following forwarders have been sent to NCSA for pairing:') LOGGER.info(forwarder_candidate_dict) return timed_ack_id def distribute_job_params(self, params, pairs): #ncsa has enough resources... job_num = str(params[JOB_NUM]) self.JOB_SCBD.set_pairs_for_job(job_num, pairs) self.JOB_SCBD.set_value_for_job(job_num, "TIME_PAIRS_ADDED", get_timestamp()) LOGGER.info('The following pairs will be used for Job #%s: %s', job_num, pairs) fwd_ack_id = self.get_next_timed_ack_id("FWD_PARAMS_ACK") fwders = list(pairs.keys()) fwd_params = {} fwd_params[MSG_TYPE] = "FORWARDER_JOB_PARAMS" fwd_params[JOB_NUM] = job_num fwd_params[ACK_ID] = fwd_ack_id for fwder in fwders: fwd_params["TRANSFER_PARAMS"] = pairs[fwder] route_key = self.FWD_SCBD.get_value_for_forwarder( fwder, "CONSUME_QUEUE") self._base_publisher.publish_message(route_key, fwd_params) return fwd_ack_id def accept_job(self, job_num): dmcs_message = {} dmcs_message[JOB_NUM] = job_num dmcs_message[MSG_TYPE] = NEW_JOB_ACK dmcs_message[ACK_BOOL] = True self.JOB_SCBD.set_value_for_job(job_num, STATE, "JOB_ACCEPTED") self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ACCEPTED", get_timestamp()) self._base_publisher.publish_message("dmcs_consume", dmcs_message) return True def insufficient_ncsa_resources(self, ncsa_response): dmcs_params = {} dmcs_params[MSG_TYPE] = "NEW_JOB_ACK" dmcs_params[JOB_NUM] = job_num dmcs_params[ACK_BOOL] = False dmcs_params[BASE_RESOURCES] = '1' dmcs_params[NCSA_RESOURCES] = '0' dmcs_params[NEEDED_DISTRIBUTORS] = ncsa_response[NEEDED_DISTRIBUTORS] dmcs_params[AVAILABLE_DISTRIBUTORS] = ncsa_response[ AVAILABLE_DISTRIBUTORS] #try: FIXME - catch exception self._base_publisher.publish_message("dmcs_consume", dmcs_params) #except L1MessageError e: # return False return True def ncsa_no_response(self, params): #No answer from NCSA... job_num = str(params[JOB_NUM]) raft_list = params[RAFTS] needed_workers = len(raft_list) dmcs_params = {} dmcs_params[MSG_TYPE] = "NEW_JOB_ACK" dmcs_params[JOB_NUM] = job_num dmcs_params[ACK_BOOL] = False dmcs_params[BASE_RESOURCES] = '1' dmcs_params[NCSA_RESOURCES] = 'NO_RESPONSE' self._base_publisher.publish_message("dmcs_consume", dmcs_params) def process_dmcs_readout(self, params): job_number = params[JOB_NUM] pairs = self.JOB_SCBD.get_pairs_for_job(job_number) date - get_timestamp() self.JOB_SCBD.set_value_for_job(job_number, TIME_START_READOUT, date) # The following line extracts the distributor FQNs from pairs dict using # list comprehension values; faster than for loops distributors = [v['FQN'] for v in list(pairs.values())] forwarders = list(pairs.keys()) ack_id = self.get_next_timed_ack_id('NCSA_READOUT') ### Send READOUT to NCSA with ACK_ID ncsa_params = {} ncsa_params[MSG_TYPE] = 'NCSA_READOUT' ncsa_params[ACK_ID] = ack_id self._ncsa_publisher.publish_message(NCSA_CONSUME, yaml.dump(ncsa_params)) self.ack_timer(4) ncsa_response = self.ACK_SCBD.get_components_for_timed_ack(ack_id) if ncsa_response: if ncsa_response['ACK_BOOL'] == True: #inform forwarders fwd_ack_id = self.get_next_timed_ack_id('FORWARDER_READOUT') for forwarder in forwarders: name = self.FWD_SCBD.get_value_for_forwarder( forwarder, NAME) routing_key = self.FWD_SCBD.get_routing_key(forwarder) msg_params = {} msg_params[MSG_TYPE] = 'FORWARDER_READOUT' msg_params[JOB_NUM] = job_number msg_params['ACK_ID'] = fwd_ack_id self.FWD_SCBD.set_forwarder_state(forwarder, START_READOUT) self._publisher.publish_message(routing_key, yaml.dump(msg_params)) self.ack_timer(4) forwarder_responses = self.ACK_SCBD.get_components_for_timed_ack( fwd_ack_id) if len(forwarder_responses) == len(forwarders): dmcs_params = {} dmcs_params[MSG_TYPE] = 'READOUT_ACK' dmcs_params[JOB_NUM] = job_number dmcs_params['ACK_BOOL'] = True dmcs_params[ 'COMMENT'] = "Readout begun at %s" % get_timestamp() self._publisher.publish_message('dmcs_consume', yaml.dump(dmcs_params)) else: #send problem with ncsa to DMCS dmcs_params = {} dmcs_params[MSG_TYPE] = 'READOUT_ACK' dmcs_params[JOB_NUM] = job_number dmcs_params['ACK_BOOL'] = False dmcs_params[ 'COMMENT'] = 'Readout Failed: Problem at NCSA - Expected Distributor Acks is %s, Number of Distributor Acks received is %s' % ( ncsa_response['EXPECTED_DISTRIBUTOR_ACKS'], ncsa_response['RECEIVED_DISTRIBUTOR_ACKS']) self._base_publisher.publish_message('dmcs_consume', yaml.dump(dmcs_params)) else: #send 'no response from ncsa' to DMCS ) dmcs_params = {} dmcs_params[MSG_TYPE] = 'READOUT_ACK' dmcs_params[JOB_NUM] = job_number dmcs_params['ACK_BOOL'] = False dmcs_params['COMMENT'] = "Readout Failed: No Response from NCSA" self._base_publisher.publish_message('dmcs_consume', yaml.dump(dmcs_params)) def process_ack(self, params): self.ACK_SCBD.add_timed_ack(params) def get_next_timed_ack_id(self, ack_type): self._next_timed_ack_id = self._next_timed_ack_id + 1 retval = ack_type + "_" + str(self._next_timed_ack_id).zfill(6) return retval def ack_timer(self, seconds): sleep(seconds) return True def purge_broker(self, queues): for q in queues: cmd = "rabbitmqctl -p /tester purge_queue " + q os.system(cmd)
class Forwarder: """Presents a vanilla L1 Forwarder personality. In nightly operation, at least 21 of these components will be available at any time (one for each raft). """ def __init__(self): self._registered = False f = open('ForwarderCfg.yaml') # cfg data map... cdm = yaml.safe_load(f) try: self._name = cdm[NAME] self._passwd = cdm[PASSWD] self._fqn = cdm[FQN] self._base_broker_addr = cdm[BASE_BROKER_ADDR] self._consume_queue = cdm[CONSUME_QUEUE] # self._publish_queue = cdm[PUBLISH_QUEUE] self._hostname = cdm[HOSTNAME] self._ip_addr = cdm[IP_ADDR] self._DAQ_PATH = cdm['DAQ_PATH'] # XXX FIX: Put in config file self.CHECKSUM_ENABLED = False except KeyError: print("Missing base keywords in yaml file... Bailing out...") sys.exit(99) self._home_dir = "/home/" + self._name + "/" self._base_broker_url = "amqp://%s:%s@%s" % (self._name, self._passwd, self._base_broker_addr) self._msg_actions = {FORWARDER_HEALTH_CHECK: self.process_health_check, FORWARDER_JOB_PARAMS: self.process_job_params, # Here if AR case needs different handler 'AR_FWDR_XFER_PARAMS': self.process_job_params, 'AR_FWDR_READOUT': self.process_foreman_readout, FORWARDER_READOUT: self.process_foreman_readout} self.setup_publishers() self.setup_consumers() self._job_scratchpad = Scratchpad(self._base_broker_url) def setup_publishers(self): self._publisher = SimplePublisher(self._base_broker_url) def setup_consumers(self): threadname = "thread-" + self._consume_queue print("Threadname is %s" % threadname) self._consumer = Consumer(self._base_broker_url, self._consume_queue) try: _thread.start_new_thread(self.run_consumer, (threadname, 2,)) print("Started Consumer Thread") except Exception: sys.exit(99) def run_consumer(self, threadname, delay): self._consumer.run(self.on_message) def on_message(self, ch, method, properties, body): ch.basic_ack(delivery_tag) print("INcoming PARAMS, body is:\n%s" % body) msg_dict = body handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def process_health_check(self, params): self.send_ack_response("FORWARDER_HEALTH_CHECK_ACK", params) def process_job_params(self, params): """ The structure of the incoming job params is identical to the way job params are sent to prompt processing forwarders: MSG_TYPE: AR_FWDR_XFER_PARAMS JOB_NUM: ..... ACK_ID: x1 REPLY_QUEUE: ..... FITS: FITS metadata someday? TRANSFER_PARAMS: DISTRIBUTOR: FQN: Name of entity receivine file NAME: login name for receiving entity HOSTNAME: Full host name for receiving entity IP_ADDR: ip addr of archive TARGET_DIR: Where to put file ## Below might be better as 'xfer_unit_list' for ccds or rafts, or other CCD_LIST: for example...[1,2,3,7,10,14] XFER_UNIT: CCD FITS: FITS metadata someday? After the xfer params arrive, and ack is returned, we set up some short cut helpers, such as: 1) Make a filename stub for job that leaves out all but the CCD number 2) Put together the scp/bbftp string with login name and ip addr, plus target dir """ job_params = copy.deepcopy(params) xfer_params = job_params['TRANSFER_PARAMS'] # Also RM fits files in xfer_dir cmd = "rm " + self._DAQ_PATH + "*.fits" os.system(cmd) filename_stub = "%s_%s_%s_" % (job_params['JOB_NUM'], job_params['VISIT_ID'], job_params['IMAGE_ID']) login_str = "%s@%s:" % (xfer_params['DISTRIBUTOR']['NAME'], xfer_params['DISTRIBUTOR']['IP_ADDR']) target_dir = str(xfer_params['DISTRIBUTOR']['TARGET_DIR']) # xfer_params = transfer_params['XFER_PARAMS'] s_params = {} s_params['CCD_LIST'] = xfer_params['CCD_LIST'] s_params['LOGIN_STR'] = login_str s_params['TARGET_DIR'] = target_dir s_params['FILENAME_STUB'] = filename_stub print("S_params are: %s" % s_params) # Now, s_params should have all we need for job. Place as value for job_num key self._job_scratchpad.set_job_transfer_params(params[JOB_NUM], s_params) self._job_scratchpad.set_job_state(params['JOB_NUM'], "READY_WITH_PARAMS") self.send_ack_response('FORWARDER_JOB_PARAMS_ACK', params) def process_foreman_readout(self, params): # self.send_ack_response("FORWARDER_READOUT_ACK", params) reply_queue = params['REPLY_QUEUE'] job_number = params[JOB_NUM] # Check and see if scratchpad has this job_num if job_number not in list(self._job_scratchpad.keys()): # Raise holy hell... pass # raw_files_dict is of the form { ccd: filename} like { 2: /home/F1/xfer_dir/ccd_2.data raw_files_dict = self.fetch(job_number) final_filenames = self.format(job_number, raw_files_dict) results = self.forward(job_number, final_filenames) msg = {} msg['MSG_TYPE'] = 'AR_ITEMS_XFERD_ACK' msg['JOB_NUM'] = job_number msg['IMAGE_ID'] = params['IMAGE_ID'] msg['COMPONENT'] = self._fqn msg['ACK_ID'] = params['ACK_ID'] msg['ACK_BOOL'] = True # See if num keys of results == len(ccd_list) from orig msg params msg['RESULT_LIST'] = results self._publisher.publish_message(reply_queue, msg) def fetch(self, job_num): raw_files_dict = {} ccd_list = self._job_scratchpad.get_job_value(job_num, 'CCD_LIST') for ccd in ccd_list: filename = "ccd_" + str(ccd) + ".data" raw_files_dict[ccd] = filename print("In Forwarder Fetch method, raw_files_dict is: \n%s" % raw_files_dict) return raw_files_dict """ format raw files to fits file with header data :param file_list: dictionary of file_name against raw file name :param mdata: primary meta data stream fetched from camera daq """ def format(self, file_list, mdata): final_filenames = [] for ccd_id, raw_file_name in file_list.items(): image_array = np.fromfile(raw_file_name, dtype=np.int32) header_data = mdata[ccd_id]["primary_metadata_chunk"] secondary_data = mdata[ccd_id]["secondary_metadata_chunk"] header_data.update(secondary_data) primary_header = pyfits.Header() for key, value in header_data.items(): primary_header[key] = value fits_file = pyfits.PrimaryHDU(header=primary_header, data=image_array) fits_file.writeto(ccd_id + ".fits") final_filenames.append(ccd_id + ".fits") return final_filenames def format2(self, job_num, raw_files_dict): keez = list(raw_files_dict.keys()) filename_stub = self._job_scratchpad.get_job_value(job_num, 'FILENAME_STUB') final_filenames = {} for kee in keez: final_filename = filename_stub + "_" + kee + ".fits" target = self._DAQ_PATH + final_filename print("Final filename is %s" % final_filename) print("target is %s" % target) cmd1 = 'cat ' + self._DAQ_PATH + "ccd.header" + " >> " + target cmd2 = 'cat ' + self._DAQ_PATH + raw_files_dict[kee] + " >> " + target dte = get_epoch_timestamp() print("DTE IS %s" % dte) cmd3 = 'echo ' + str(dte) + " >> " + target print("cmd1 is %s" % cmd1) print("cmd2 is %s" % cmd2) os.system(cmd1) os.system(cmd2) os.system(cmd3) final_filenames[kee] = final_filename print("Done in format()...file list is: %s" % final_filenames) print("In format method, final_filenames are:\n%s" % final_filenames) return final_filenames def forward(self, job_num, final_filenames): print("Start Time of READOUT IS: %s" % get_timestamp()) login_str = self._job_scratchpad.get_job_value(job_num, 'LOGIN_STR') target_dir = self._job_scratchpad.get_job_value(job_num, 'TARGET_DIR') results = {} CCD_LIST = [] FILENAME_LIST = [] CHECKSUM_LIST = [] ccds = list(final_filenames.keys()) for ccd in ccds: final_file = final_filenames[ccd] pathway = self._DAQ_PATH + final_file with open(pathway) as file_to_calc: if self.CHECKSUM_ENABLED: data = file_to_calc.read() resulting_md5 = hashlib.md5(data).hexdigest() else: resulting_md5 = '0' CCD_LIST.append(ccd) CHECKSUM_LIST.append(resulting_md5) FILENAME_LIST.append(target_dir + final_file) cmd = 'scp ' + pathway + " " + login_str + target_dir + final_file print("Finish Time of SCP'ing %s IS: %s" % (pathway, get_timestamp())) print("In forward() method, cmd is %s" % cmd) os.system(cmd) results['CCD_LIST'] = CCD_LIST results['FILENAME_LIST'] = FILENAME_LIST results['CHECKSUM_LIST'] = CHECKSUM_LIST print("END Time of READOUT XFER IS: %s" % get_timestamp()) print("In forward method, results are: \n%s" % results) return results def send_ack_response(self, type, params): timed_ack = params.get("ACK_ID") job_num = params.get(JOB_NUM) response_queue = params['RESPONSE_QUEUE'] msg_params = {} msg_params[MSG_TYPE] = type msg_params[JOB_NUM] = job_num msg_params['COMPONENT'] = self._fqn msg_params[ACK_BOOL] = "TRUE" msg_params[ACK_ID] = timed_ack self._publisher.publish_message(response_queue, msg_params) def register(self): pass
passwd = cred.getPasswd('service_passwd') url = 'amqp://%s:%[email protected]:5672/%%2Ftest_at' % (user, passwd) sp1 = SimplePublisher(url, "YAML") msg = {} msg['MSG_TYPE'] = "DMCS_AT_START_INTEGRATION" msg['IMAGE_ID'] = 'AT_O_20190315_000003' msg['IMAGE_INDEX'] = '2' msg['IMAGE_SEQUENCE_NAME'] = 'MAIN' msg['IMAGES_IN_SEQUENCE'] = '3' msg['ACK_ID'] = 'START_INT_ACK_76' msg['REPLY_QUEUE'] = "dmcs_ack_consume" time.sleep(8) print("Start Integration Message") sp1.publish_message("ocs_dmcs_consume", msg) msg = {} msg['MSG_TYPE'] = "DMCS_AT_END_READOUT" msg['IMAGE_ID'] = 'AT_O_20190315_000003' msg['IMAGE_INDEX'] = '2' msg['IMAGE_SEQUENCE_NAME'] = 'MAIN' msg['IMAGES_IN_SEQUENCE'] = '3' msg['RESPONSE_QUEUE'] = "dmcs_ack_consume" msg['ACK_ID'] = 'READOUT_ACK_77' time.sleep(5) print("READOUT Message") sp1.publish_message("ocs_dmcs_consume", msg) print("Sending HEADER1 information") msg = {}
class PromptProcessDevice(iip_base): PP_JOB_SCBD = None PP_FWD_SCBD = None PP_ACK_SCBD = None COMPONENT_NAME = 'PROMPT_PROCESS_FOREMAN' PP_FOREMAN_CONSUME = "pp_foreman_consume" PP_FOREMAN_ACK_PUBLISH = "pp_foreman_ack_publish" PP_START_INTEGRATION_ACK = "PP_START_INTEGRATION_ACK" NCSA_PUBLISH = "ncsa_publish" NCSA_CONSUME = "ncsa_consume" NCSA_NO_RESPONSE = 5705 FORWARDER_NO_RESPONSE = 5605 FORWARDER_PUBLISH = "forwarder_publish" ERROR_CODE_PREFIX = 5500 prp = toolsmod.prp def __init__(self, filename=None): toolsmod.singleton(self) LOGGER.info('Extracting values from Config dictionary') try: self.extract_config_values(filename) except Exception as e: LOGGER.error("PP_Device problem configuring with file %s: %s" % (self._config_file, e.arg)) print("PP_Device unable to read Config file %s: %s" % (self._config_file, e.arg)) sys.exit(self.ErrorCodePrefix + 20) #self.purge_broker(cdm['ROOT']['QUEUE_PURGES']) self._msg_actions = { 'PP_NEW_SESSION': self.set_session, 'PP_NEXT_VISIT': self.set_visit, 'PP_START_INTEGRATION': self.process_start_integration, 'PP_READOUT': self.process_dmcs_readout, 'NCSA_RESOURCE_QUERY_ACK': self.process_ack, 'NCSA_START_INTEGRATION_ACK': self.process_ack, 'NCSA_READOUT_ACK': self.process_ack, 'PP_FWDR_HEALTH_CHECK_ACK': self.process_ack, 'PP_FWDR_XFER_PARAMS_ACK': self.process_ack, 'PP_FWDR_READOUT_ACK': self.process_ack, 'PENDING_ACK': self.process_pending_ack, 'NCSA_NEXT_VISIT_ACK': self.process_ack } self._next_timed_ack_id = 0 try: self.setup_publishers() except L1PublisherError as e: LOGGER.error("PP_Device unable to start Publishers: %s" % e.arg) print("PP_Device unable to start Publishers: %s" % e.arg) sys.exit(self.ErrorCodePrefix + 31) self.setup_scoreboards() LOGGER.info('pp foreman consumer setup') self.thread_manager = None try: self.setup_consumer_threads() except L1Exception as e: LOGGER.error("PP_Device unable to launch ThreadManager: %s" % e.arg) print("PP_Device unable to launch ThreadManager: %s" % e.arg) sys.exit(self.ErrorCodePrefix + 1) LOGGER.info('Prompt Process Foreman Init complete') def setup_publishers(self): self._pub_base_broker_url = "amqp://" + self._pub_name + ":" + \ self._pub_passwd + "@" + \ str(self._base_broker_addr) self._pub_ncsa_broker_url = "amqp://" + self._pub_ncsa_name + ":" + \ self._pub_ncsa_passwd + "@" + \ str(self._ncsa_broker_addr) try: LOGGER.info('Setting up Base publisher on %s using %s', \ self._pub_base_broker_url, self._base_msg_format) self._base_publisher = SimplePublisher(self._pub_base_broker_url, self._base_msg_format) LOGGER.info('Setting up NCSA publisher on %s using %s', \ self._pub_ncsa_broker_url, self._ncsa_msg_format) self._ncsa_publisher = SimplePublisher(self._pub_ncsa_broker_url, self._ncsa_msg_format) except Exception as e: LOGGER.error("PP_Device unable to start Publishers: %s" % e.arg) print("PP_Device unable to start Publishers: %s" % e.arg) raise L1PublisherError("Critical Error: Unable to create Publishers: %s" % e.arg) def on_dmcs_message(self, ch, method, properties, body): ch.basic_ack(method.delivery_tag) #msg_dict = yaml.load(body) msg_dict = body LOGGER.info('In DMCS message callback') LOGGER.info('Message from DMCS callback message body is: %s', str(msg_dict)) handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def on_forwarder_message(self, ch, method, properties, body): ch.basic_ack(method.delivery_tag) LOGGER.info('In Forwarder message callback, thread is %s', _thread.get_ident()) LOGGER.info('forwarder callback msg body is: %s', str(body)) pass def on_ncsa_message(self,ch, method, properties, body): ch.basic_ack(method.delivery_tag) msg_dict = body LOGGER.info('ncsa msg callback body is: %s', str(msg_dict)) handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def on_ack_message(self, ch, method, properties, body): ch.basic_ack(method.delivery_tag) msg_dict = body LOGGER.info('In ACK message callback') LOGGER.info('Message from ACK callback message body is: %s', str(msg_dict)) handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def set_session(self, params): self.JOB_SCBD.set_session(params['SESSION_ID']) ack_id = params['ACK_ID'] msg = {} msg['MSG_TYPE'] = 'PP_NEW_SESSION_ACK' msg['COMPONENT'] = self.COMPONENT_NAME msg['ACK_ID'] = ack_id msg['ACK_BOOL'] = True route_key = params['REPLY_QUEUE'] self._base_publisher.publish_message(route_key, msg) def set_visit(self, params): bore_sight = params['BORE_SIGHT'] visit_id = params['VISIT_ID'] self.JOB_SCBD.set_visit_id(visit_id, bore_sight) ack_id = params['ACK_ID'] msg = {} ncsa_result = self.send_visit_boresight_to_ncsa(visit_id, bore_sight) msg['MSG_TYPE'] = 'PP_NEXT_VISIT_ACK' msg['COMPONENT'] = self.COMPONENT_NAME msg['ACK_ID'] = ack_id msg['ACK_BOOL'] = True route_key = params['REPLY_QUEUE'] self._base_publisher.publish_message(route_key, msg) def send_visit_boresight_to_ncsa(self, visit_id, bore_sight): msg = {} msg['MSG_TYPE'] = 'NCSA_NEXT_VISIT' msg['VISIT_ID'] = visit_id msg['BORE_SIGHT'] = bore_sight msg['SESSION_ID'] = self.JOB_SCBD.get_current_session() ack_id = self.get_next_timed_ack_id('NCSA_NEXT_VISIT_ACK') msg['ACK_ID'] = ack_id msg['REPLY_QUEUE'] = self.PP_FOREMAN_ACK_PUBLISH self._ncsa_publisher.publish_message(self.NCSA_CONSUME, msg) wait_time = 4 acks = [] acks.append(ack_id) self.set_pending_nonblock_acks(acks, wait_time) def process_start_integration(self, input_params): """ Add job to Job Scoreboard Check forwarder health Check Policy, bail if necessary Mark Forwarder scoreboard as a result of above Divide work and assemble as a forwarder dictionary for NCSA Send work division to NCSA Check Policy, bail if necessary Persist pairings to Job Scoreboard Send params to Forwarders Confirm Forwarder Acks Send confirm to DMCS """ ccd_list = input_params['CCD_LIST'] job_num = str(input_params[JOB_NUM]) visit_id = input_params['VISIT_ID'] image_id = input_params['IMAGE_ID'] self.JOB_SCBD.add_job(job_num, image_id, visit_id, ccd_list) unknown_status = {"STATUS": "UNKNOWN", "STATE":"UNRESPONSIVE"} self.FWD_SCBD.setall_forwarder_params(unknown_status) ack_id = self.forwarder_health_check(input_params) self.ack_timer(2.5) healthy_forwarders = self.ACK_SCBD.get_components_for_timed_ack(ack_id) if healthy_forwarders == None: self.JOB_SCBD.set_job_state(job_number, 'SCRUBBED') self.JOB_SCBD.set_job_status(job_number, 'INACTIVE') self.send_fault("No Response From Forwarders", self.FORWARDER_NO_RESPONSE, job_num, self.COMPONENT_NAME) raise L1ForwarderError("No response from any Forwarder when sending job params") healthy_forwarders_list = list(healthy_forwarders.keys()) for forwarder in healthy_forwarders_list: self.FWD_SCBD.set_forwarder_state(forwarder, 'BUSY') self.FWD_SCBD.set_forwarder_status(forwarder, 'HEALTHY') num_healthy_forwarders = len(healthy_forwarders_list) ready_status = {"STATUS": "HEALTHY", "STATE":"READY_WITHOUT_PARAMS"} self.FWD_SCBD.set_forwarder_params(healthy_forwarders_list, ready_status) work_schedule = self.divide_work(healthy_forwarders_list, ccd_list) ack_id = self.ncsa_resources_query(input_params, work_schedule) ncsa_response = self.progressive_ack_timer(ack_id, 1, 2.0) #Check ACK scoreboard for response from NCSA if ncsa_response: pairs = [] pairs = ncsa_response['NCSA_FOREMAN']['PAIRS'] # Distribute job params and tell DMCS we are ready. fwd_ack_id = self.distribute_job_params(input_params, pairs) num_fwdrs = len(pairs) fwdr_params_response = self.progressive_ack_timer(fwd_ack_id, num_fwdrs, 3.0) if fwdr_params_response: self.JOB_SCBD.set_value_for_job(job_num, "STATE", "FWDR_PARAMS_RECEIVED") in_ready_state = {'STATE':'READY_WITH_PARAMS'} self.FWD_SCBD.set_forwarder_params(healthy_forwarders_list, in_ready_state) # Tell DMCS we are ready result = self.accept_job(input_params['ACK_ID'],job_num) else: idle_params = {'STATE': 'IDLE'} self.FWD_SCBD.set_forwarder_params(needed_forwarders, idle_params) self.send_fault("No RESPONSE FROM NCSA FOREMAN", self.NCSA_NO_RESPONSE, job_num, self.COMPONENT_NAME) raise L1NcsaForemanError("No Response From NCSA Foreman") else: result = self.ncsa_no_response(input_params) idle_params = {'STATE': 'IDLE'} self.FWD_SCBD.set_forwarder_params(needed_forwarders, idle_params) return result def forwarder_health_check(self, params): # get timed_ack_id timed_ack = self.get_next_timed_ack_id("PP_FWDR_HEALTH_CHECK_ACK") forwarders = self.FWD_SCBD.return_forwarders_list() job_num = params[JOB_NUM] # send health check messages msg_params = {} msg_params[MSG_TYPE] = 'PP_FWDR_HEALTH_CHECK' msg_params['ACK_ID'] = timed_ack msg_params['REPLY_QUEUE'] = self.PP_FOREMAN_ACK_PUBLISH msg_params[JOB_NUM] = job_num self.JOB_SCBD.set_value_for_job(job_num, "STATE", "HEALTH_CHECK") for forwarder in forwarders: self._base_publisher.publish_message(self.FWD_SCBD.get_routing_key(forwarder), msg_params) return timed_ack def divide_work(self, fwdrs_list, ccd_list): num_fwdrs = len(fwdrs_list) num_ccds = len(ccd_list) schedule = {} schedule['FORWARDER_LIST'] = [] schedule['CCD_LIST'] = [] # A list of ccd lists; index of main list matches same forwarder list index FORWARDER_LIST = [] CCD_LIST = [] # This is a 'list of lists' if num_fwdrs == 1: FORWARDER_LIST.append(fwdrs_list[0]) CCD_LIST.append(ccd_list) schedule['FORWARDERS_LIST'] = FORWARDER_LIST schedule['CCD_LIST'] = CCD_LIST return schedule if num_ccds <= num_fwdrs: for k in range (0, num_ccds): little_list = [] FORWARDER_LIST.append(fwdrs_list[k]) little_list.append(ccd_list[k]) CCD_LIST.append(list(little_list)) # Need a copy here... schedule['FORWARDER_LIST'] = FORWARDER_LIST schedule['CCD_LIST'] = CCD_LIST else: ccds_per_fwdr = len(ccd_list) // num_fwdrs remainder_ccds = len(ccd_list) % num_fwdrs offset = 0 for i in range(0, num_fwdrs): tmp_list = [] for j in range (offset, (ccds_per_fwdr + offset)): if (j) >= num_ccds: break tmp_list.append(ccd_list[j]) # CCD_LIST.append(ccd_list[j]) offset = offset + ccds_per_fwdr if remainder_ccds != 0 and i == 0: for k in range(offset, offset + remainder_ccds): tmp_list.append(ccd_list[k]) offset = offset + remainder_ccds FORWARDER_LIST.append(fwdrs_list[i]) CCD_LIST.append(list(tmp_list)) #schedule[fwdrs_list[i]] = {} #schedule[fwdrs_list[i]]['CCD_LIST'] = tmp_list schedule['FORWARDER_LIST'] = FORWARDER_LIST schedule['CCD_LIST'] = CCD_LIST return schedule def ncsa_resources_query(self, params, work_schedule): job_num = str(params[JOB_NUM]) timed_ack_id = self.get_next_timed_ack_id("NCSA_START_INTEGRATION_ACK") ncsa_params = {} ncsa_params[MSG_TYPE] = "NCSA_START_INTEGRATION" ncsa_params[JOB_NUM] = job_num ncsa_params['VISIT_ID'] = params['VISIT_ID'] ncsa_params['IMAGE_ID'] = params['IMAGE_ID'] ncsa_params['SESSION_ID'] = params['SESSION_ID'] ncsa_params['REPLY_QUEUE'] = self.PP_FOREMAN_ACK_PUBLISH ncsa_params[ACK_ID] = timed_ack_id ncsa_params["FORWARDERS"] = work_schedule self.JOB_SCBD.set_value_for_job(job_num, "STATE", "NCSA_START_INT_SENT") self._ncsa_publisher.publish_message(self.NCSA_CONSUME, ncsa_params) LOGGER.info('The following forwarders schedule has been sent to NCSA for pairing:') LOGGER.info(work_schedule) return timed_ack_id def distribute_job_params(self, params, pairs): """ pairs param is a list of dicts. (look at messages.yaml, search for 'PAIR' key, and copy here """ #ncsa has enough resources... job_num = str(params[JOB_NUM]) self.JOB_SCBD.set_pairs_for_job(job_num, pairs) LOGGER.info('The following pairs will be used for Job #%s: %s', job_num, pairs) fwd_ack_id = self.get_next_timed_ack_id("FWD_PARAMS_ACK") fwd_params = {} fwd_params[MSG_TYPE] = "PP_FWDR_XFER_PARAMS" fwd_params[JOB_NUM] = job_num fwd_params['IMAGE_ID'] = params['IMAGE_ID'] fwd_params['VISIT_ID'] = params['VISIT_ID'] fwd_params['REPLY_QUEUE'] = self.PP_FOREMAN_ACK_PUBLISH fwd_params[ACK_ID] = fwd_ack_id fwd_params['XFER_PARAMS'] = {} for i in range(0, len(pairs)): ddict = {} ddict = pairs[i] fwdr = ddict['FORWARDER'] fwd_params['XFER_PARAMS']['CCD_LIST'] = ddict['CCD_LIST'] fwd_params['XFER_PARAMS']['DISTRIBUTOR'] = ddict['DISTRIBUTOR'] route_key = self.FWD_SCBD.get_value_for_forwarder(fwdr, "CONSUME_QUEUE") self._base_publisher.publish_message(route_key, fwd_params) return fwd_ack_id def accept_job(self, ack_id, job_num): dmcs_message = {} dmcs_message[JOB_NUM] = job_num dmcs_message[MSG_TYPE] = self.PP_START_INTEGRATION_ACK dmcs_message['COMPONENT'] = self.COMPONENT_NAME dmcs_message[ACK_BOOL] = True dmcs_message['ACK_ID'] = ack_id self.JOB_SCBD.set_value_for_job(job_num, STATE, "JOB_ACCEPTED") self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ACCEPTED", get_timestamp()) self._base_publisher.publish_message("dmcs_ack_consume", dmcs_message) return True def process_dmcs_readout(self, params): job_number = params[JOB_NUM] pairs = self.JOB_SCBD.get_pairs_for_job(job_number) ### Send READOUT to NCSA with ACK_ID ack_id = self.get_next_timed_ack_id('NCSA_READOUT_ACK') ncsa_params = {} ncsa_params[MSG_TYPE] = 'NCSA_READOUT' ncsa_params['JOB_NUM'] = job_number ncsa_params['VISIT_ID'] = params['VISIT_ID'] ncsa_params['SESSION_ID'] = params['SESSION_ID'] ncsa_params['IMAGE_ID'] = params['IMAGE_ID'] ncsa_params['REPLY_QUEUE'] = 'pp_foreman_ack_publish' ncsa_params[ACK_ID] = ack_id self._ncsa_publisher.publish_message(self.NCSA_CONSUME, ncsa_params) ncsa_response = self.progressive_ack_timer(ack_id, 1, 3.0) if ncsa_response: if ncsa_response['NCSA_FOREMAN']['ACK_BOOL'] == True: #inform forwarders fwd_ack_id = self.get_next_timed_ack_id('PP_FWDR_READOUT_ACK') len_pairs = len(pairs) for i in range(0, len_pairs): forwarder = pairs[i]['FORWARDER'] routing_key = self.FWD_SCBD.get_routing_key(forwarder) msg_params = {} msg_params[MSG_TYPE] = 'PP_FWDR_READOUT' msg_params[JOB_NUM] = job_number msg_params['REPLY_QUEUE'] = 'pp_foreman_ack_publish' msg_params['ACK_ID'] = fwd_ack_id self.FWD_SCBD.set_forwarder_state(forwarder, 'START_READOUT') self._base_publisher.publish_message(routing_key, msg_params) forwarder_responses = self.progressive_ack_timer(fwd_ack_id, len_pairs, 4.0) if forwarder_responses: dmcs_params = {} dmcs_params[MSG_TYPE] = 'PP_READOUT_ACK' dmcs_params[JOB_NUM] = job_number dmcs_params['COMPONENT'] = self.COMPONENT_NAME dmcs_params['ACK_BOOL'] = True dmcs_params['ACK_ID'] = params['ACK_ID'] self._base_publisher.publish_message(params['REPLY_QUEUE'], dmcs_params) else: #send problem with ncsa to DMCS dmcs_params = {} dmcs_params[MSG_TYPE] = 'PP_READOUT_ACK' dmcs_params[JOB_NUM] = job_number dmcs_params['COMPONENT'] = self.COMPONENT_NAME dmcs_params['ACK_BOOL'] = False dmcs_params['ACK_ID'] = params['ACK_ID'] self._base_publisher.publish_message('dmcs_ack_consume', dmcs_params) else: #send 'no response from ncsa' to DMCS ) dmcs_params = {} dmcs_params[MSG_TYPE] = 'PP_READOUT_ACK' dmcs_params[JOB_NUM] = job_number dmcs_params['COMPONENT'] = self.COMPONENT_NAME dmcs_params['ACK_BOOL'] = False dmcs_params['ACK_ID'] = params['ACK_ID'] self._base_publisher.publish_message(params['REPLY_QUEUE'], dmcs_params) def process_ack(self, params): self.ACK_SCBD.add_timed_ack(params) def get_next_timed_ack_id(self, ack_type): self._next_timed_ack_id = self._next_timed_ack_id + 1 return (ack_type + "_" + str(self._next_timed_ack_id).zfill(6)) def ack_timer(self, seconds): sleep(seconds) return True def progressive_ack_timer(self, ack_id, expected_replies, seconds): counter = 0.0 while (counter < seconds): counter = counter + 0.5 sleep(0.5) response = self.ACK_SCBD.get_components_for_timed_ack(ack_id) if response == None: continue if len(list(response.keys())) == expected_replies: return response ## Try one final time response = self.ACK_SCBD.get_components_for_timed_ack(ack_id) if response == None: return None elif len(list(response.keys())) == expected_replies: return response else: return None def set_pending_nonblock_acks(self, acks, wait_time): start_time = datetime.datetime.now().time() expiry_time = self.add_seconds(start_time, wait_time) ack_msg = {} ack_msg[MSG_TYPE] = 'PENDING_ACK' ack_msg['EXPIRY_TIME'] = expiry_time for ack in acks: ack_msg[ACK_ID] = ack self._base_publisher.publish_message(self.PP_FOREMAN_ACK_PUBLISH, ack_msg) def process_pending_ack(self, params): self.ACK_SCBD.add_pending_nonblock_ack(params) def add_seconds(self, intime, secs): basetime = datetime.datetime(100, 1, 1, intime.hour, intime.minute, intime.second) newtime = basetime + datetime.timedelta(seconds=secs) return newtime.time() def extract_config_values(self, filename): LOGGER.info('Reading YAML Config file %s' % self._config_file) try: cdm = self.loadConfigFile(filename) except IOError as e: LOGGER.critical("Unable to find CFG Yaml file %s\n" % self._config_file) print("Unable to find CFG Yaml file %s\n" % self._config_file) raise L1ConfigIOError("Trouble opening CFG Yaml file %s: %s" % (self._config_file, e.arg)) try: self._sub_name = cdm[ROOT][PFM_BROKER_NAME] # Message broker user & passwd self._sub_passwd = cdm[ROOT][PFM_BROKER_PASSWD] self._pub_name = cdm[ROOT]['PFM_BROKER_PUB_NAME'] # Message broker user & passwd self._pub_passwd = cdm[ROOT]['PFM_BROKER_PUB_PASSWD'] self._sub_ncsa_name = cdm[ROOT]['PFM_NCSA_BROKER_NAME'] self._sub_ncsa_passwd = cdm[ROOT]['PFM_NCSA_BROKER_PASSWD'] self._pub_ncsa_name = cdm[ROOT]['PFM_NCSA_BROKER_PUB_NAME'] self._pub_ncsa_passwd = cdm[ROOT]['PFM_NCSA_BROKER_PUB_PASSWD'] self._base_broker_addr = cdm[ROOT][BASE_BROKER_ADDR] self._ncsa_broker_addr = cdm[ROOT][NCSA_BROKER_ADDR] self._forwarder_dict = cdm[ROOT][XFER_COMPONENTS]['PP_FORWARDERS'] self._scbd_dict = cdm[ROOT]['SCOREBOARDS'] self.DMCS_FAULT_QUEUE = cdm[ROOT]['DMCS_FAULT_QUEUE'] self._policy_max_ccds_per_fwdr = int(cdm[ROOT]['POLICY']['MAX_CCDS_PER_FWDR']) except KeyError as e: LOGGER.critical("CDM Dictionary Key error") LOGGER.critical("Offending Key is %s", str(e)) LOGGER.critical("Bailing out...") print("KeyError when reading CFG file. Check logs...exiting...") raise L1ConfigKeyError("Key Error when reading config file: %s" % e.arg) self._base_msg_format = 'YAML' self._ncsa_msg_format = 'YAML' if 'BASE_MSG_FORMAT' in cdm[ROOT]: self._base_msg_format = cdm[ROOT][BASE_MSG_FORMAT] if 'NCSA_MSG_FORMAT' in cdm[ROOT]: self._ncsa_msg_format = cdm[ROOT][NCSA_MSG_FORMAT] def setup_consumer_threads(self): LOGGER.info('Building _base_broker_url') base_broker_url = "amqp://" + self._sub_name + ":" + \ self._sub_passwd + "@" + \ str(self._base_broker_addr) ncsa_broker_url = "amqp://" + self._sub_ncsa_name + ":" + \ self._sub_ncsa_passwd + "@" + \ str(self._ncsa_broker_addr) self.shutdown_event = threading.Event() self.shutdown_event.clear() # Set up kwargs that describe consumers to be started # The Archive Device needs three message consumers kws = {} md = {} md['amqp_url'] = base_broker_url md['name'] = 'Thread-pp_foreman_consume' md['queue'] = 'pp_foreman_consume' md['callback'] = self.on_dmcs_message md['format'] = "YAML" md['test_val'] = None kws[md['name']] = md md = {} md['amqp_url'] = base_broker_url md['name'] = 'Thread-pp_foreman_ack_publish' md['queue'] = 'pp_foreman_ack_publish' md['callback'] = self.on_ack_message md['format'] = "YAML" md['test_val'] = 'test_it' kws[md['name']] = md md = {} md['amqp_url'] = ncsa_broker_url md['name'] = 'Thread-ncsa_publish' md['queue'] = 'ncsa_publish' md['callback'] = self.on_ncsa_message md['format'] = "YAML" md['test_val'] = 'test_it' kws[md['name']] = md try: self.thread_manager = ThreadManager('thread-manager', kws, self.shutdown_event) self.thread_manager.start() except ThreadError as e: LOGGER.error("PP_Device unable to launch Consumers - Thread Error: %s" % e.arg) print("PP_Device unable to launch Consumers - Thread Error: %s" % e.arg) raise L1ConsumerError("Thread problem preventing Consumer launch: %s" % e.arg) except Exception as e: LOGGER.error("PP_Device unable to launch Consumers: %s" % e.arg) print("PP_Device unable to launch Consumers: %s" % e.arg) raise L1Error("PP_Device unable to launch Consumers - Rabbit Problem?: %s" % e.arg) def setup_scoreboards(self): try: # Create Redis Forwarder table with Forwarder info self.FWD_SCBD = ForwarderScoreboard('PP_FWD_SCBD', self._scbd_dict['PP_FWD_SCBD'], self._forwarder_dict) self.JOB_SCBD = JobScoreboard('PP_JOB_SCBD', self._scbd_dict['PP_JOB_SCBD']) self.ACK_SCBD = AckScoreboard('PP_ACK_SCBD', self._scbd_dict['PP_ACK_SCBD']) except L1RabbitConnectionError as e: LOGGER.error("PP_Device unable to complete setup_scoreboards-No Rabbit Connect: %s" % e.arg) print("PP_Device unable to complete setup_scoreboards - No Rabbit Connection: %s" % e.arg) sys.exit(self.ErrorCodePrefix + 11) except L1RedisError as e: LOGGER.error("PP_Device unable to complete setup_scoreboards - no Redis connect: %s" % e.arg) print("PP_Device unable to complete setup_scoreboards - no Redis connection: %s" % e.arg) sys.exit(self.ErrorCodePrefix + 12) except Exception as e: LOGGER.error("PP_Device init unable to complete setup_scoreboards: %s" % e.arg) print("PP_Device unable to complete setup_scoreboards: %s" % e.arg) sys.exit(self.ErrorCodePrefix + 10) def send_fault(error_string, error_code, job_num, component_name): msg = {} msg['MSG_TYPE'] = 'FAULT' msg['COMPONENT'] = component_name msg['JOB_NUM'] = job_num msg['ERROR_CODE'] = str(error_code) msg["DESCRIPTION"] = error_string self._base_publisher.publish_message(self.DMCS_FAULT_QUEUE, msg) def purge_broker(self, queues): for q in queues: cmd = "rabbitmqctl -p /tester purge_queue " + q os.system(cmd) def shutdown(self): LOGGER.debug("PromptProcessDevice: Shutting down Consumer threads.") self.shutdown_event.set() LOGGER.debug("Thread Manager shutting down and app exiting...") print("\n") os._exit(0)
class PromptProcessDevice(iip_base): PP_JOB_SCBD = None PP_FWD_SCBD = None PP_ACK_SCBD = None COMPONENT_NAME = 'PROMPT_PROCESS_FOREMAN' PP_FOREMAN_CONSUME = "pp_foreman_consume" PP_FOREMAN_ACK_PUBLISH = "pp_foreman_ack_publish" PP_START_INTEGRATION_ACK = "PP_START_INTEGRATION_ACK" NCSA_PUBLISH = "ncsa_publish" NCSA_CONSUME = "ncsa_consume" NCSA_NO_RESPONSE = 5705 FORWARDER_NO_RESPONSE = 5605 FORWARDER_PUBLISH = "forwarder_publish" ERROR_CODE_PREFIX = 5500 prp = toolsmod.prp def __init__(self, filename=None): toolsmod.singleton(self) LOGGER.info('Extracting values from Config dictionary') try: self.extract_config_values(filename) except Exception as e: LOGGER.error("PP_Device problem configuring with file %s: %s" % (self._config_file, e.arg)) print("PP_Device unable to read Config file %s: %s" % (self._config_file, e.arg)) sys.exit(self.ErrorCodePrefix + 20) #self.purge_broker(cdm['ROOT']['QUEUE_PURGES']) self._msg_actions = { 'PP_NEW_SESSION': self.set_session, 'PP_NEXT_VISIT': self.set_visit, 'PP_START_INTEGRATION': self.process_start_integration, 'PP_READOUT': self.process_dmcs_readout, 'NCSA_RESOURCE_QUERY_ACK': self.process_ack, 'NCSA_START_INTEGRATION_ACK': self.process_ack, 'NCSA_READOUT_ACK': self.process_ack, 'PP_FWDR_HEALTH_CHECK_ACK': self.process_ack, 'PP_FWDR_XFER_PARAMS_ACK': self.process_ack, 'PP_FWDR_READOUT_ACK': self.process_ack, 'PENDING_ACK': self.process_pending_ack, 'NCSA_NEXT_VISIT_ACK': self.process_ack } self._next_timed_ack_id = 0 try: self.setup_publishers() except L1PublisherError as e: LOGGER.error("PP_Device unable to start Publishers: %s" % e.arg) print("PP_Device unable to start Publishers: %s" % e.arg) sys.exit(self.ErrorCodePrefix + 31) self.setup_scoreboards() LOGGER.info('pp foreman consumer setup') self.thread_manager = None try: self.setup_consumer_threads() except L1Exception as e: LOGGER.error("PP_Device unable to launch ThreadManager: %s" % e.arg) print("PP_Device unable to launch ThreadManager: %s" % e.arg) sys.exit(self.ErrorCodePrefix + 1) LOGGER.info('Prompt Process Foreman Init complete') def setup_publishers(self): self._pub_base_broker_url = "amqp://" + self._pub_name + ":" + \ self._pub_passwd + "@" + \ str(self._base_broker_addr) self._pub_ncsa_broker_url = "amqp://" + self._pub_ncsa_name + ":" + \ self._pub_ncsa_passwd + "@" + \ str(self._ncsa_broker_addr) try: LOGGER.info('Setting up Base publisher on %s using %s', \ self._pub_base_broker_url, self._base_msg_format) self._base_publisher = SimplePublisher(self._pub_base_broker_url, self._base_msg_format) LOGGER.info('Setting up NCSA publisher on %s using %s', \ self._pub_ncsa_broker_url, self._ncsa_msg_format) self._ncsa_publisher = SimplePublisher(self._pub_ncsa_broker_url, self._ncsa_msg_format) except Exception as e: LOGGER.error("PP_Device unable to start Publishers: %s" % e.arg) print("PP_Device unable to start Publishers: %s" % e.arg) raise L1PublisherError( "Critical Error: Unable to create Publishers: %s" % e.arg) def on_dmcs_message(self, ch, method, properties, body): ch.basic_ack(method.delivery_tag) #msg_dict = yaml.load(body) msg_dict = body LOGGER.info('In DMCS message callback') LOGGER.info('Message from DMCS callback message body is: %s', str(msg_dict)) handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def on_forwarder_message(self, ch, method, properties, body): ch.basic_ack(method.delivery_tag) LOGGER.info('In Forwarder message callback, thread is %s', _thread.get_ident()) LOGGER.info('forwarder callback msg body is: %s', str(body)) pass def on_ncsa_message(self, ch, method, properties, body): ch.basic_ack(method.delivery_tag) msg_dict = body LOGGER.info('ncsa msg callback body is: %s', str(msg_dict)) handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def on_ack_message(self, ch, method, properties, body): ch.basic_ack(method.delivery_tag) msg_dict = body LOGGER.info('In ACK message callback') LOGGER.info('Message from ACK callback message body is: %s', str(msg_dict)) handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def set_session(self, params): self.JOB_SCBD.set_session(params['SESSION_ID']) ack_id = params['ACK_ID'] msg = {} msg['MSG_TYPE'] = 'PP_NEW_SESSION_ACK' msg['COMPONENT'] = self.COMPONENT_NAME msg['ACK_ID'] = ack_id msg['ACK_BOOL'] = True route_key = params['REPLY_QUEUE'] self._base_publisher.publish_message(route_key, msg) def set_visit(self, params): bore_sight = params['BORE_SIGHT'] visit_id = params['VISIT_ID'] self.JOB_SCBD.set_visit_id(visit_id, bore_sight) ack_id = params['ACK_ID'] msg = {} ncsa_result = self.send_visit_boresight_to_ncsa(visit_id, bore_sight) msg['MSG_TYPE'] = 'PP_NEXT_VISIT_ACK' msg['COMPONENT'] = self.COMPONENT_NAME msg['ACK_ID'] = ack_id msg['ACK_BOOL'] = True route_key = params['REPLY_QUEUE'] self._base_publisher.publish_message(route_key, msg) def send_visit_boresight_to_ncsa(self, visit_id, bore_sight): msg = {} msg['MSG_TYPE'] = 'NCSA_NEXT_VISIT' msg['VISIT_ID'] = visit_id msg['BORE_SIGHT'] = bore_sight msg['SESSION_ID'] = self.JOB_SCBD.get_current_session() ack_id = self.get_next_timed_ack_id('NCSA_NEXT_VISIT_ACK') msg['ACK_ID'] = ack_id msg['REPLY_QUEUE'] = self.PP_FOREMAN_ACK_PUBLISH self._ncsa_publisher.publish_message(self.NCSA_CONSUME, msg) wait_time = 4 acks = [] acks.append(ack_id) self.set_pending_nonblock_acks(acks, wait_time) def process_start_integration(self, input_params): """ Add job to Job Scoreboard Check forwarder health Check Policy, bail if necessary Mark Forwarder scoreboard as a result of above Divide work and assemble as a forwarder dictionary for NCSA Send work division to NCSA Check Policy, bail if necessary Persist pairings to Job Scoreboard Send params to Forwarders Confirm Forwarder Acks Send confirm to DMCS """ ccd_list = input_params['CCD_LIST'] job_num = str(input_params[JOB_NUM]) visit_id = input_params['VISIT_ID'] image_id = input_params['IMAGE_ID'] self.JOB_SCBD.add_job(job_num, image_id, visit_id, ccd_list) unknown_status = {"STATUS": "UNKNOWN", "STATE": "UNRESPONSIVE"} self.FWD_SCBD.setall_forwarder_params(unknown_status) ack_id = self.forwarder_health_check(input_params) self.ack_timer(2.5) healthy_forwarders = self.ACK_SCBD.get_components_for_timed_ack(ack_id) if healthy_forwarders == None: self.JOB_SCBD.set_job_state(job_number, 'SCRUBBED') self.JOB_SCBD.set_job_status(job_number, 'INACTIVE') self.send_fault("No Response From Forwarders", self.FORWARDER_NO_RESPONSE, job_num, self.COMPONENT_NAME) raise L1ForwarderError( "No response from any Forwarder when sending job params") healthy_forwarders_list = list(healthy_forwarders.keys()) for forwarder in healthy_forwarders_list: self.FWD_SCBD.set_forwarder_state(forwarder, 'BUSY') self.FWD_SCBD.set_forwarder_status(forwarder, 'HEALTHY') num_healthy_forwarders = len(healthy_forwarders_list) ready_status = {"STATUS": "HEALTHY", "STATE": "READY_WITHOUT_PARAMS"} self.FWD_SCBD.set_forwarder_params(healthy_forwarders_list, ready_status) work_schedule = self.divide_work(healthy_forwarders_list, ccd_list) ack_id = self.ncsa_resources_query(input_params, work_schedule) ncsa_response = self.progressive_ack_timer(ack_id, 1, 2.0) #Check ACK scoreboard for response from NCSA if ncsa_response: pairs = [] pairs = ncsa_response['NCSA_FOREMAN']['PAIRS'] # Distribute job params and tell DMCS we are ready. fwd_ack_id = self.distribute_job_params(input_params, pairs) num_fwdrs = len(pairs) fwdr_params_response = self.progressive_ack_timer( fwd_ack_id, num_fwdrs, 3.0) if fwdr_params_response: self.JOB_SCBD.set_value_for_job(job_num, "STATE", "FWDR_PARAMS_RECEIVED") in_ready_state = {'STATE': 'READY_WITH_PARAMS'} self.FWD_SCBD.set_forwarder_params(healthy_forwarders_list, in_ready_state) # Tell DMCS we are ready result = self.accept_job(input_params['ACK_ID'], job_num) else: idle_params = {'STATE': 'IDLE'} self.FWD_SCBD.set_forwarder_params(needed_forwarders, idle_params) self.send_fault("No RESPONSE FROM NCSA FOREMAN", self.NCSA_NO_RESPONSE, job_num, self.COMPONENT_NAME) raise L1NcsaForemanError("No Response From NCSA Foreman") else: result = self.ncsa_no_response(input_params) idle_params = {'STATE': 'IDLE'} self.FWD_SCBD.set_forwarder_params(needed_forwarders, idle_params) return result def forwarder_health_check(self, params): # get timed_ack_id timed_ack = self.get_next_timed_ack_id("PP_FWDR_HEALTH_CHECK_ACK") forwarders = self.FWD_SCBD.return_forwarders_list() job_num = params[JOB_NUM] # send health check messages msg_params = {} msg_params[MSG_TYPE] = 'PP_FWDR_HEALTH_CHECK' msg_params['ACK_ID'] = timed_ack msg_params['REPLY_QUEUE'] = self.PP_FOREMAN_ACK_PUBLISH msg_params[JOB_NUM] = job_num self.JOB_SCBD.set_value_for_job(job_num, "STATE", "HEALTH_CHECK") for forwarder in forwarders: self._base_publisher.publish_message( self.FWD_SCBD.get_routing_key(forwarder), msg_params) return timed_ack def divide_work(self, fwdrs_list, ccd_list): num_fwdrs = len(fwdrs_list) num_ccds = len(ccd_list) schedule = {} schedule['FORWARDER_LIST'] = [] schedule['CCD_LIST'] = [ ] # A list of ccd lists; index of main list matches same forwarder list index FORWARDER_LIST = [] CCD_LIST = [] # This is a 'list of lists' if num_fwdrs == 1: FORWARDER_LIST.append(fwdrs_list[0]) CCD_LIST.append(ccd_list) schedule['FORWARDERS_LIST'] = FORWARDER_LIST schedule['CCD_LIST'] = CCD_LIST return schedule if num_ccds <= num_fwdrs: for k in range(0, num_ccds): little_list = [] FORWARDER_LIST.append(fwdrs_list[k]) little_list.append(ccd_list[k]) CCD_LIST.append(list(little_list)) # Need a copy here... schedule['FORWARDER_LIST'] = FORWARDER_LIST schedule['CCD_LIST'] = CCD_LIST else: ccds_per_fwdr = len(ccd_list) // num_fwdrs remainder_ccds = len(ccd_list) % num_fwdrs offset = 0 for i in range(0, num_fwdrs): tmp_list = [] for j in range(offset, (ccds_per_fwdr + offset)): if (j) >= num_ccds: break tmp_list.append(ccd_list[j]) # CCD_LIST.append(ccd_list[j]) offset = offset + ccds_per_fwdr if remainder_ccds != 0 and i == 0: for k in range(offset, offset + remainder_ccds): tmp_list.append(ccd_list[k]) offset = offset + remainder_ccds FORWARDER_LIST.append(fwdrs_list[i]) CCD_LIST.append(list(tmp_list)) #schedule[fwdrs_list[i]] = {} #schedule[fwdrs_list[i]]['CCD_LIST'] = tmp_list schedule['FORWARDER_LIST'] = FORWARDER_LIST schedule['CCD_LIST'] = CCD_LIST return schedule def ncsa_resources_query(self, params, work_schedule): job_num = str(params[JOB_NUM]) timed_ack_id = self.get_next_timed_ack_id("NCSA_START_INTEGRATION_ACK") ncsa_params = {} ncsa_params[MSG_TYPE] = "NCSA_START_INTEGRATION" ncsa_params[JOB_NUM] = job_num ncsa_params['VISIT_ID'] = params['VISIT_ID'] ncsa_params['IMAGE_ID'] = params['IMAGE_ID'] ncsa_params['SESSION_ID'] = params['SESSION_ID'] ncsa_params['REPLY_QUEUE'] = self.PP_FOREMAN_ACK_PUBLISH ncsa_params[ACK_ID] = timed_ack_id ncsa_params["FORWARDERS"] = work_schedule self.JOB_SCBD.set_value_for_job(job_num, "STATE", "NCSA_START_INT_SENT") self._ncsa_publisher.publish_message(self.NCSA_CONSUME, ncsa_params) LOGGER.info( 'The following forwarders schedule has been sent to NCSA for pairing:' ) LOGGER.info(work_schedule) return timed_ack_id def distribute_job_params(self, params, pairs): """ pairs param is a list of dicts. (look at messages.yaml, search for 'PAIR' key, and copy here """ #ncsa has enough resources... job_num = str(params[JOB_NUM]) self.JOB_SCBD.set_pairs_for_job(job_num, pairs) LOGGER.info('The following pairs will be used for Job #%s: %s', job_num, pairs) fwd_ack_id = self.get_next_timed_ack_id("FWD_PARAMS_ACK") fwd_params = {} fwd_params[MSG_TYPE] = "PP_FWDR_XFER_PARAMS" fwd_params[JOB_NUM] = job_num fwd_params['IMAGE_ID'] = params['IMAGE_ID'] fwd_params['VISIT_ID'] = params['VISIT_ID'] fwd_params['REPLY_QUEUE'] = self.PP_FOREMAN_ACK_PUBLISH fwd_params[ACK_ID] = fwd_ack_id fwd_params['XFER_PARAMS'] = {} for i in range(0, len(pairs)): ddict = {} ddict = pairs[i] fwdr = ddict['FORWARDER'] fwd_params['XFER_PARAMS']['CCD_LIST'] = ddict['CCD_LIST'] fwd_params['XFER_PARAMS']['DISTRIBUTOR'] = ddict['DISTRIBUTOR'] route_key = self.FWD_SCBD.get_value_for_forwarder( fwdr, "CONSUME_QUEUE") self._base_publisher.publish_message(route_key, fwd_params) return fwd_ack_id def accept_job(self, ack_id, job_num): dmcs_message = {} dmcs_message[JOB_NUM] = job_num dmcs_message[MSG_TYPE] = self.PP_START_INTEGRATION_ACK dmcs_message['COMPONENT'] = self.COMPONENT_NAME dmcs_message[ACK_BOOL] = True dmcs_message['ACK_ID'] = ack_id self.JOB_SCBD.set_value_for_job(job_num, STATE, "JOB_ACCEPTED") self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ACCEPTED", get_timestamp()) self._base_publisher.publish_message("dmcs_ack_consume", dmcs_message) return True def process_dmcs_readout(self, params): job_number = params[JOB_NUM] pairs = self.JOB_SCBD.get_pairs_for_job(job_number) ### Send READOUT to NCSA with ACK_ID ack_id = self.get_next_timed_ack_id('NCSA_READOUT_ACK') ncsa_params = {} ncsa_params[MSG_TYPE] = 'NCSA_READOUT' ncsa_params['JOB_NUM'] = job_number ncsa_params['VISIT_ID'] = params['VISIT_ID'] ncsa_params['SESSION_ID'] = params['SESSION_ID'] ncsa_params['IMAGE_ID'] = params['IMAGE_ID'] ncsa_params['REPLY_QUEUE'] = 'pp_foreman_ack_publish' ncsa_params[ACK_ID] = ack_id self._ncsa_publisher.publish_message(self.NCSA_CONSUME, ncsa_params) ncsa_response = self.progressive_ack_timer(ack_id, 1, 3.0) if ncsa_response: if ncsa_response['NCSA_FOREMAN']['ACK_BOOL'] == True: #inform forwarders fwd_ack_id = self.get_next_timed_ack_id('PP_FWDR_READOUT_ACK') len_pairs = len(pairs) for i in range(0, len_pairs): forwarder = pairs[i]['FORWARDER'] routing_key = self.FWD_SCBD.get_routing_key(forwarder) msg_params = {} msg_params[MSG_TYPE] = 'PP_FWDR_READOUT' msg_params[JOB_NUM] = job_number msg_params['REPLY_QUEUE'] = 'pp_foreman_ack_publish' msg_params['ACK_ID'] = fwd_ack_id self.FWD_SCBD.set_forwarder_state(forwarder, 'START_READOUT') self._base_publisher.publish_message( routing_key, msg_params) forwarder_responses = self.progressive_ack_timer( fwd_ack_id, len_pairs, 4.0) if forwarder_responses: dmcs_params = {} dmcs_params[MSG_TYPE] = 'PP_READOUT_ACK' dmcs_params[JOB_NUM] = job_number dmcs_params['COMPONENT'] = self.COMPONENT_NAME dmcs_params['ACK_BOOL'] = True dmcs_params['ACK_ID'] = params['ACK_ID'] self._base_publisher.publish_message( params['REPLY_QUEUE'], dmcs_params) else: #send problem with ncsa to DMCS dmcs_params = {} dmcs_params[MSG_TYPE] = 'PP_READOUT_ACK' dmcs_params[JOB_NUM] = job_number dmcs_params['COMPONENT'] = self.COMPONENT_NAME dmcs_params['ACK_BOOL'] = False dmcs_params['ACK_ID'] = params['ACK_ID'] self._base_publisher.publish_message('dmcs_ack_consume', dmcs_params) else: #send 'no response from ncsa' to DMCS ) dmcs_params = {} dmcs_params[MSG_TYPE] = 'PP_READOUT_ACK' dmcs_params[JOB_NUM] = job_number dmcs_params['COMPONENT'] = self.COMPONENT_NAME dmcs_params['ACK_BOOL'] = False dmcs_params['ACK_ID'] = params['ACK_ID'] self._base_publisher.publish_message(params['REPLY_QUEUE'], dmcs_params) def process_ack(self, params): self.ACK_SCBD.add_timed_ack(params) def get_next_timed_ack_id(self, ack_type): self._next_timed_ack_id = self._next_timed_ack_id + 1 return (ack_type + "_" + str(self._next_timed_ack_id).zfill(6)) def ack_timer(self, seconds): sleep(seconds) return True def progressive_ack_timer(self, ack_id, expected_replies, seconds): counter = 0.0 while (counter < seconds): counter = counter + 0.5 sleep(0.5) response = self.ACK_SCBD.get_components_for_timed_ack(ack_id) if response == None: continue if len(list(response.keys())) == expected_replies: return response ## Try one final time response = self.ACK_SCBD.get_components_for_timed_ack(ack_id) if response == None: return None elif len(list(response.keys())) == expected_replies: return response else: return None def set_pending_nonblock_acks(self, acks, wait_time): start_time = datetime.datetime.now().time() expiry_time = self.add_seconds(start_time, wait_time) ack_msg = {} ack_msg[MSG_TYPE] = 'PENDING_ACK' ack_msg['EXPIRY_TIME'] = expiry_time for ack in acks: ack_msg[ACK_ID] = ack self._base_publisher.publish_message(self.PP_FOREMAN_ACK_PUBLISH, ack_msg) def process_pending_ack(self, params): self.ACK_SCBD.add_pending_nonblock_ack(params) def add_seconds(self, intime, secs): basetime = datetime.datetime(100, 1, 1, intime.hour, intime.minute, intime.second) newtime = basetime + datetime.timedelta(seconds=secs) return newtime.time() def extract_config_values(self, filename): LOGGER.info('Reading YAML Config file %s' % self._config_file) try: cdm = self.loadConfigFile(filename) except IOError as e: LOGGER.critical("Unable to find CFG Yaml file %s\n" % self._config_file) print("Unable to find CFG Yaml file %s\n" % self._config_file) raise L1ConfigIOError("Trouble opening CFG Yaml file %s: %s" % (self._config_file, e.arg)) try: self._sub_name = cdm[ROOT][ PFM_BROKER_NAME] # Message broker user & passwd self._sub_passwd = cdm[ROOT][PFM_BROKER_PASSWD] self._pub_name = cdm[ROOT][ 'PFM_BROKER_PUB_NAME'] # Message broker user & passwd self._pub_passwd = cdm[ROOT]['PFM_BROKER_PUB_PASSWD'] self._sub_ncsa_name = cdm[ROOT]['PFM_NCSA_BROKER_NAME'] self._sub_ncsa_passwd = cdm[ROOT]['PFM_NCSA_BROKER_PASSWD'] self._pub_ncsa_name = cdm[ROOT]['PFM_NCSA_BROKER_PUB_NAME'] self._pub_ncsa_passwd = cdm[ROOT]['PFM_NCSA_BROKER_PUB_PASSWD'] self._base_broker_addr = cdm[ROOT][BASE_BROKER_ADDR] self._ncsa_broker_addr = cdm[ROOT][NCSA_BROKER_ADDR] self._forwarder_dict = cdm[ROOT][XFER_COMPONENTS]['PP_FORWARDERS'] self._scbd_dict = cdm[ROOT]['SCOREBOARDS'] self.DMCS_FAULT_QUEUE = cdm[ROOT]['DMCS_FAULT_QUEUE'] self._policy_max_ccds_per_fwdr = int( cdm[ROOT]['POLICY']['MAX_CCDS_PER_FWDR']) except KeyError as e: LOGGER.critical("CDM Dictionary Key error") LOGGER.critical("Offending Key is %s", str(e)) LOGGER.critical("Bailing out...") print("KeyError when reading CFG file. Check logs...exiting...") raise L1ConfigKeyError("Key Error when reading config file: %s" % e.arg) self._base_msg_format = 'YAML' self._ncsa_msg_format = 'YAML' if 'BASE_MSG_FORMAT' in cdm[ROOT]: self._base_msg_format = cdm[ROOT][BASE_MSG_FORMAT] if 'NCSA_MSG_FORMAT' in cdm[ROOT]: self._ncsa_msg_format = cdm[ROOT][NCSA_MSG_FORMAT] def setup_consumer_threads(self): LOGGER.info('Building _base_broker_url') base_broker_url = "amqp://" + self._sub_name + ":" + \ self._sub_passwd + "@" + \ str(self._base_broker_addr) ncsa_broker_url = "amqp://" + self._sub_ncsa_name + ":" + \ self._sub_ncsa_passwd + "@" + \ str(self._ncsa_broker_addr) self.shutdown_event = threading.Event() self.shutdown_event.clear() # Set up kwargs that describe consumers to be started # The Archive Device needs three message consumers kws = {} md = {} md['amqp_url'] = base_broker_url md['name'] = 'Thread-pp_foreman_consume' md['queue'] = 'pp_foreman_consume' md['callback'] = self.on_dmcs_message md['format'] = "YAML" md['test_val'] = None kws[md['name']] = md md = {} md['amqp_url'] = base_broker_url md['name'] = 'Thread-pp_foreman_ack_publish' md['queue'] = 'pp_foreman_ack_publish' md['callback'] = self.on_ack_message md['format'] = "YAML" md['test_val'] = 'test_it' kws[md['name']] = md md = {} md['amqp_url'] = ncsa_broker_url md['name'] = 'Thread-ncsa_publish' md['queue'] = 'ncsa_publish' md['callback'] = self.on_ncsa_message md['format'] = "YAML" md['test_val'] = 'test_it' kws[md['name']] = md try: self.thread_manager = ThreadManager('thread-manager', kws, self.shutdown_event) self.thread_manager.start() except ThreadError as e: LOGGER.error( "PP_Device unable to launch Consumers - Thread Error: %s" % e.arg) print("PP_Device unable to launch Consumers - Thread Error: %s" % e.arg) raise L1ConsumerError( "Thread problem preventing Consumer launch: %s" % e.arg) except Exception as e: LOGGER.error("PP_Device unable to launch Consumers: %s" % e.arg) print("PP_Device unable to launch Consumers: %s" % e.arg) raise L1Error( "PP_Device unable to launch Consumers - Rabbit Problem?: %s" % e.arg) def setup_scoreboards(self): try: # Create Redis Forwarder table with Forwarder info self.FWD_SCBD = ForwarderScoreboard('PP_FWD_SCBD', self._scbd_dict['PP_FWD_SCBD'], self._forwarder_dict) self.JOB_SCBD = JobScoreboard('PP_JOB_SCBD', self._scbd_dict['PP_JOB_SCBD']) self.ACK_SCBD = AckScoreboard('PP_ACK_SCBD', self._scbd_dict['PP_ACK_SCBD']) except L1RabbitConnectionError as e: LOGGER.error( "PP_Device unable to complete setup_scoreboards-No Rabbit Connect: %s" % e.arg) print( "PP_Device unable to complete setup_scoreboards - No Rabbit Connection: %s" % e.arg) sys.exit(self.ErrorCodePrefix + 11) except L1RedisError as e: LOGGER.error( "PP_Device unable to complete setup_scoreboards - no Redis connect: %s" % e.arg) print( "PP_Device unable to complete setup_scoreboards - no Redis connection: %s" % e.arg) sys.exit(self.ErrorCodePrefix + 12) except Exception as e: LOGGER.error( "PP_Device init unable to complete setup_scoreboards: %s" % e.arg) print("PP_Device unable to complete setup_scoreboards: %s" % e.arg) sys.exit(self.ErrorCodePrefix + 10) def send_fault(error_string, error_code, job_num, component_name): msg = {} msg['MSG_TYPE'] = 'FAULT' msg['COMPONENT'] = component_name msg['JOB_NUM'] = job_num msg['ERROR_CODE'] = str(error_code) msg["DESCRIPTION"] = error_string self._base_publisher.publish_message(self.DMCS_FAULT_QUEUE, msg) def purge_broker(self, queues): for q in queues: cmd = "rabbitmqctl -p /tester purge_queue " + q os.system(cmd) def shutdown(self): LOGGER.debug("PromptProcessDevice: Shutting down Consumer threads.") self.shutdown_event.set() LOGGER.debug("Thread Manager shutting down and app exiting...") print("\n") os._exit(0)
class ArchiveDevice(iip_base): """ The Archive Device is a commandable device which coordinates the ingest of images from the telescope camera and then the transfer of those images to the base site archive storage. It receives jobs and divides and assigns the work to forwarders, records state and status change of forwarders, and sends messages accordingly. """ COMPONENT_NAME = 'ARCHIVE_FOREMAN' AR_FOREMAN_CONSUME = "ar_foreman_consume" ARCHIVE_CTRL_PUBLISH = "archive_ctrl_publish" ARCHIVE_CTRL_CONSUME = "archive_ctrl_consume" AR_FOREMAN_ACK_PUBLISH = "ar_foreman_ack_publish" START_INTEGRATION_XFER_PARAMS = {} prp = toolsmod.prp DP = toolsmod.DP def __init__(self, filename=None): """ Create a new instance of the Archive Device class. Instantiate the instance, raise assertion error if already instantiated. Extract config values from yaml file. Store handler methods for each message type. Set up base broker url, publishers, and scoreboards. Consumer threads are started within a Thread Manager object so that they can be monitored for health and shutdown/joined cleanly when the app exits. :params filename: configuration file :return: None. """ toolsmod.singleton(self) LOGGER.info('Extracting values from Config dictionary') self.extract_config_values(filename) #self.purge_broker(cdm['ROOT']['QUEUE_PURGES']) self._msg_actions = { 'AR_NEW_SESSION': self.set_session, 'AR_NEXT_VISIT': self.process_next_visit, 'AR_READOUT': self.process_dmcs_readout, 'AR_FWDR_HEALTH_CHECK_ACK': self.process_ack, 'AR_FWDR_XFER_PARAMS_ACK': self.process_ack, 'AR_FWDR_READOUT_ACK': self.process_ack, 'AR_FWDR_TAKE_IMAGES_DONE_ACK': self.process_ack, 'AR_ITEMS_XFERD_ACK': self.process_ack, 'NEW_ARCHIVE_ITEM_ACK': self.process_ack, 'AR_TAKE_IMAGES': self.take_images, 'AR_HEADER_READY': self.process_header_ready_event, 'AR_END_READOUT': self.process_end_readout, 'AR_TAKE_IMAGES_DONE': self.take_images_done } self._next_timed_ack_id = 0 self.setup_publishers() self.setup_scoreboards() LOGGER.info('ar foreman consumer setup') self.thread_manager = None self.setup_consumer_threads() LOGGER.info('Archive Foreman Init complete') def setup_publishers(self): """ Set up base publisher with pub_base_broker_url by creating a new instance of SimplePublisher class with yaml format :params: None. :return: None. """ self.pub_base_broker_url = "amqp://" + self._msg_pub_name + ":" + \ self._msg_pub_passwd + "@" + \ str(self._base_broker_addr) LOGGER.info('Setting up Base publisher on %s using %s', self.pub_base_broker_url, self._base_msg_format) self._publisher = SimplePublisher(self.pub_base_broker_url, self._base_msg_format) def on_ar_foreman_message(self, ch, method, properties, body): """ Calls the appropriate AR message action handler according to message type. :params ch: Channel to message broker, unused unless testing. :params method: Delivery method from Pika, unused unless testing. :params properties: Properties from DMCS to AR Foreman callback message body, unused unless testing. :params body: A dictionary that stores the message body. :return: None. """ #msg_dict = yaml.load(body) ch.basic_ack(method.delivery_tag) msg_dict = body LOGGER.info('In AR Foreman message callback') LOGGER.info( 'Message from DMCS to AR Foreman callback message body is: %s', str(msg_dict)) handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def on_archive_message(self, ch, method, properties, body): """ Calls the appropriate AR message action handler according to message type. :params ch: Channel to message broker, unused unless testing. :params method: Delivery method from Pika, unused unless testing. :params properties: Properties from AR CTRL callback message body, unused unless testing. :params body: A dictionary that stores the message body. :return: None. """ ch.basic_ack(method.delivery_tag) LOGGER.info('AR CTRL callback msg body is: %s', str(body)) handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def on_ack_message(self, ch, method, properties, body): """ Calls the appropriate AR message action handler according to message type. :params ch: Channel to message broker, unused unless testing. :params method: Delivery method from Pika, unused unless testing. :params properties: Properties from ACK callback message body, unused unless testing. :params body: A dictionary that stores the message body. :return: None. """ ch.basic_ack(method.delivery_tag) msg_dict = body LOGGER.info('In ACK message callback') LOGGER.info('Message from ACK callback message body is: %s', str(msg_dict)) print("In AR_DEV ack handler, msg is: ") self.prp.pprint(body) print("--------------------------- ") handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def process_next_visit(self, params): # When this method is invoked, the following must happen: # 0) Store new VISIT_ID in Scoreboard # 1) Health check all forwarders # 2) Divide work and generate dict of forwarders and which rafts/ccds they are fetching # 3) Get Archive info from ArchiveController # 4) Inform each forwarder which rafts they are responsible for ra = params['RA'] dec = params['DEC'] angle = params['ANGLE'] visit_id = params['VISIT_ID'] self.JOB_SCBD.set_visit_id(params['VISIT_ID'], ra, dec, angle) # receive new job_number and image_id; session and visit are current # and deep copy it with some additions such as session and visit session_id = self.get_current_session() visit_id = self.get_current_visit() job_number = params[JOB_NUM] raft_list = params['RAFT_LIST'] raft_ccd_list = params['RAFT_CCD_LIST'] next_visit_reply_queue = params['REPLY_QUEUE'] next_visit_ack_id = params[ACK_ID] # next, run health check health_check_ack_id = self.get_next_timed_ack_id('AR_FWDR_HEALTH_ACK') num_fwdrs_checked = self.fwdr_health_check(health_check_ack_id) # Add job scbd entry self.JOB_SCBD.add_job(job_number, visit_id, raft_list, raft_ccd_list) self.JOB_SCBD.set_value_for_job(job_number, 'VISIT_ID', visit_id) self.ack_timer(1.5) healthy_fwdrs = self.ACK_SCBD.get_components_for_timed_ack( health_check_ack_id) if healthy_fwdrs == None: self.refuse_job(params, "No forwarders available") self.JOB_SCBD.set_job_state(job_number, 'SCRUBBED') self.JOB_SCBD.set_job_status(job_number, 'INACTIVE') ### FIX send error code for this... return for forwarder in healthy_fwdrs: self.FWD_SCBD.set_forwarder_state(forwarder, 'BUSY') self.FWD_SCBD.set_forwarder_status(forwarder, 'HEALTHY') # send new_archive_item msg to archive controller new_items_params = {} ac_timed_ack = self.get_next_timed_ack_id('AR_CTRL_NEW_ITEM') new_items_params[MSG_TYPE] = 'NEW_ARCHIVE_ITEM' new_items_params['ACK_ID'] = ac_timed_ack new_items_params['JOB_NUM'] = job_number new_items_params['SESSION_ID'] = session_id new_items_params['VISIT_ID'] = visit_id new_items_params['REPLY_QUEUE'] = self.AR_FOREMAN_ACK_PUBLISH self.JOB_SCBD.set_job_state(job_number, 'AR_NEW_ITEM_QUERY') self._publisher.publish_message(self.ARCHIVE_CTRL_CONSUME, new_items_params) #ar_response = self.progressive_ack_timer(ac_timed_ack, 1, 2.0) ### FIX - Go back to orig timer val ar_response = self.progressive_ack_timer(ac_timed_ack, 1, 6.0) if ar_response == None: # FIXME raise L1 exception and bail out print("B-B-BAD Trouble; no ar_response") #target_location = ar_response['ARCHIVE_CTRL']['TARGET_LOCATION'] target_location = "/tmp/gunk" self.JOB_SCBD.set_job_params(job_number, { 'STATE': 'AR_NEW_ITEM_RESPONSE', 'TARGET_LOCATION': target_location }) # divide image fetch across forwarders list_of_fwdrs = list(healthy_fwdrs.keys()) print("Just before divide_work...list_of_fwdrs is:") self.prp.pprint(list_of_fwdrs) print("------------------------------") work_schedule = self.divide_work(list_of_fwdrs, raft_list, raft_ccd_list) if self.DP: print( "Here is the work schedule hot off of the divide_work stack:") self.prp.pprint(work_schedule) print("------------- Done Printing Work Schedule --------------") # send target dir, and job, session,visit and work to do to healthy forwarders self.JOB_SCBD.set_value_for_job(job_number, 'STATE', 'SENDING_XFER_PARAMS') set_sched_result = self.JOB_SCBD.set_work_schedule_for_job( job_number, work_schedule) if set_sched_result == False: # FIXME Raise L1 exception and bail print("BIG PROBLEM - CANNOT SET WORK SCHED IN SCBD") xfer_params_ack_id = self.get_next_timed_ack_id("AR_FWDR_PARAMS_ACK") fwdr_new_target_params = {} fwdr_new_target_params['XFER_PARAMS'] = {} fwdr_new_target_params[MSG_TYPE] = 'AR_FWDR_XFER_PARAMS' fwdr_new_target_params[SESSION_ID] = session_id fwdr_new_target_params[VISIT_ID] = visit_id fwdr_new_target_params[JOB_NUM] = job_number fwdr_new_target_params[ACK_ID] = xfer_params_ack_id fwdr_new_target_params[REPLY_QUEUE] = self.AR_FOREMAN_ACK_PUBLISH final_target_location = self.archive_name + "@" + self.archive_ip + ":" + target_location fwdr_new_target_params['TARGET_LOCATION'] = final_target_location len_fwdrs_list = len(work_schedule['FORWARDER_LIST']) for i in range(0, len_fwdrs_list): fwdr = work_schedule['FORWARDER_LIST'][i] xfer_params_dict = {} xfer_params_dict['RAFT_LIST'] = work_schedule['RAFT_LIST'][i] xfer_params_dict['RAFT_CCD_LIST'] = work_schedule['RAFT_CCD_LIST'][ i] #fwdr_new_target_params['RAFT_LIST'] = work_schedule['RAFT_LIST'][i] #fwdr_new_target_params['RAFT_CCD_LIST'] = work_schedule['RAFT_CCD_LIST'][i] # record work order in scoreboard self.FWD_SCBD.set_work_by_job(fwdr, job_number, xfer_params_dict) xfer_params_dict['AR_FWDR'] = fwdr fwdr_new_target_params['XFER_PARAMS'] = xfer_params_dict route_key = self.FWD_SCBD.get_value_for_forwarder( fwdr, "CONSUME_QUEUE") print(" sending xfer_params...route_key is %s" % route_key) print(" sending xfer_params...fwdr is %s" % fwdr) print("Publishing string xfger params... %s" % str(fwdr_new_target_params)) self._publisher.publish_message(route_key, fwdr_new_target_params) # receive ack back from forwarders that they have job params params_acks = self.progressive_ack_timer(xfer_params_ack_id, len_fwdrs_list, 3.0) ### FIX # if params_acks == None: # raise L1Exception and bail self.JOB_SCBD.set_value_for_job(job_number, 'STATE', 'XFER_PARAMS_SENT') # accept job by Ach'ing True ar_next_visit_ack = {} ar_next_visit_ack['MSG_TYPE'] = 'AR_NEXT_VISIT_ACK' ar_next_visit_ack['ACK_ID'] = next_visit_ack_id ar_next_visit_ack['ACK_BOOL'] = True ar_next_visit_ack['COMPONENT'] = self.COMPONENT_NAME self.accept_job(next_visit_reply_queue, ar_next_visit_ack) self.JOB_SCBD.set_value_for_job(job_number, STATE, "JOB_ACCEPTED") fscbd_params = {'STATE': 'AWAITING_READOUT'} self.FWD_SCBD.set_forwarder_params(healthy_fwdrs, fscbd_params) def fwdr_health_check(self, ack_id): """ Send AR_FWDR_HEALTH_CHECK message to ar_foreman_ack_publish queue. Retrieve available forwarders from ForwarderScoreboard, set their state to HEALTH_CHECK, status to UNKNOWN, and publish the message. :params ack_id: Ack id for AR forwarder health check. :return: Number of health checks sent. """ msg_params = {} msg_params[MSG_TYPE] = 'AR_FWDR_HEALTH_CHECK' msg_params[ACK_ID] = ack_id msg_params[REPLY_QUEUE] = self.AR_FOREMAN_ACK_PUBLISH forwarders = self.FWD_SCBD.return_available_forwarders_list() state_status = {"STATE": "HEALTH_CHECK", "STATUS": "UNKNOWN"} self.FWD_SCBD.set_forwarder_params(forwarders, state_status) for forwarder in forwarders: self._publisher.publish_message( self.FWD_SCBD.get_value_for_forwarder(forwarder, "CONSUME_QUEUE"), msg_params) return len(forwarders) def divide_work(self, fwdrs_list, raft_list, raft_ccd_list): """ Divide work (ccds) among forwarders. If only one forwarder available, give it all the work. If have less or equal ccds then forwarders, give the first few forwarders one ccd each. Else, evenly distribute ccds among forwarders, and give extras to the first forwarder, make sure that ccd list for each forwarder is continuous. :params fwdrs_list: List of available forwarders for the job. :params ccd_list: List of ccds to be distributed. :return schedule: Distribution of ccds among forwarders. """ num_fwdrs = len(fwdrs_list) num_rafts = len(raft_list) schedule = {} schedule['FORWARDER_LIST'] = [] schedule['CCD_LIST'] = [ ] # A list of ccd lists; index of main list matches same forwarder list index FORWARDER_LIST = [] RAFT_LIST = [] # This is a 'list of lists' tmp_raft_list = [] RAFT_CCD_LIST = [] # This is a 'list of lists of lists' tmp_raft_ccd_list = [] if num_fwdrs == 1: FORWARDER_LIST.append(fwdrs_list[0]) for item in raft_list: tmp_raft_list.append(item) RAFT_LIST.append(list(tmp_raft_list)) for item in raft_ccd_list: tmp_raft_ccd_list.append(list(item)) RAFT_CCD_LIST.append(list(tmp_raft_ccd_list)) schedule['FORWARDER_LIST'] = FORWARDER_LIST schedule['RAFT_LIST'] = RAFT_LIST schedule['RAFT_CCD_LIST'] = RAFT_CCD_LIST if self.DP: print("In divide work one fwdr case, finished schedule is:") self.prp.pprint(schedule) print("Finished divide work one fwdr case") return schedule if num_rafts <= num_fwdrs: for k in range(0, num_rafts): FORWARDER_LIST.append(fwdrs_list[k]) #little_list.append(ccd_list[k]) RAFT_LIST.append(raft_list[k]) # Need a copy here... RAFT_CCD_LIST.append = deepcopy(raft_ccd_list[k]) schedule['FORWARDER_LIST'] = FORWARDER_LIST schedule['RAFT_LIST'] = RAFT_LIST schedule['RAFT_CCD_LIST'] = RAFT_CCD_LIST else: rafts_per_fwdr = len(raft_list) // num_fwdrs remainder_rafts = len(raft_list) % num_fwdrs offset = 0 for i in range(0, num_fwdrs): tmp_raft_list = [] tmp_raft_ccd_list = [] for j in range(offset, (rafts_per_fwdr + offset)): if (j) >= num_rafts: break tmp_raft_list.append(raft_list[j]) tmp_raft_ccd_list.append(deepcopy(raft_ccd_list[j])) offset = offset + rafts_per_fwdr # If num_fwdrs divided into num_rafts equally, we are done...else, deal with remainder if remainder_rafts != 0 and i == 0: for k in range(offset, offset + remainder_rafts): tmp_list.append(raft_list[k]) tmp_raft_list.append(deepcopy(raft_ccd_list[k])) offset = offset + remainder_rafts FORWARDER_LIST.append(fwdrs_list[i]) RAFT_LIST.append(list(tmp_raft_list)) RAFT_CCD_LIST.append(list(tmp_raft_ccd_list)) schedule['FORWARDER_LIST'] = FORWARDER_LIST schedule['RAFT_LIST'] = RAFT_LIST schedule['RAFT_CCD_LIST'] = RAFT_CCD_LIST if self.DP: print("In divide work one fwdr case, finished schedule is:") self.prp.pprint(schedule) print("Finished divide work one fwdr case") return schedule def accept_job(self, reply_queue, dmcs_message): """ Send AR_START_INTEGRATION_ACK message with ack_bool equals True (job accepted) and other job specs to dmcs_ack_consume queue. :params dmcs_message: A dictionary that stores info of a job. :return: None. """ self._publisher.publish_message(reply_queue, dmcs_message) def refuse_job(self, params, fail_details): """ Send AR_START_INTEGRATION_ACK message with ack_bool equals False (job refused) and other job specs to dmcs_ack_consume queue. Set job state as JOB_REFUSED in JobScoreboard. :params parmas: A dictionary that stores info of a job. :params fail_details: A string that describes what went wrong, not used for now. :return: None. """ dmcs_message = {} dmcs_message[JOB_NUM] = params[JOB_NUM] dmcs_message[MSG_TYPE] = 'AR_START_INTEGRATION_ACK' dmcs_message['ACK_ID'] = params['ACK_ID'] dmcs_message['SESSION_ID'] = params['SESSION_ID'] dmcs_message['VISIT_ID'] = params['VISIT_ID'] dmcs_message[ACK_BOOL] = False dmcs_message['COMPONENT'] = self.COMPONENT_NAME self.JOB_SCBD.set_value_for_job(params[JOB_NUM], STATE, "JOB_REFUSED") self._publisher.publish_message("dmcs_ack_consume", dmcs_message) def take_images(self, params): num_images = params['NUM_IMAGES'] job_num = params[JOB_NUM] self.JOB_SCBD.set_value_for_job(job_num, 'NUM_IMAGES', num_images) work_sched = self.JOB_SCBD.get_work_schedule_for_job(job_num) fwdrs = work_sched['FORWARDER_LIST'] msg = {} msg['MSG_TYPE'] = 'AR_FWDR_TAKE_IMAGES' msg['NUM_IMAGES'] = num_images for fwdr in fwdrs: route_key = self.FWD_SCBD.get_value_for_forwarder( fwdr, 'CONSUME_QUEUE') self._publisher.publish_message(route_key, msg) #################################################################### ### XXX FIX, must associate each header with each fwdr raft/ccd_list def process_header_ready_event(self, params): fname = params['FILENAME'] msg = {} msg['MSG_TYPE'] = 'AR_FWDR_HEADER_READY' msg['FILENAME'] = fname job_num = params[JOB_NUM] self.JOB_SCBD.set_value_for_job(job_num, 'HDR_FNAME', fname) work_sched = self.JOB_SCBD.get_work_schedule_for_job(job_num) fwdrs = work_sched['FORWARDER_LIST'] for fwdr in fwdrs: route_key = self.FWD_SCBD.get_value_for_forwarder( fwdr, 'CONSUME_QUEUE') self._publisher.publish_message(route_key, msg) ### NOTE: Deprecated... def process_dmcs_readout(self, params): """ Set job state as PREPARE_READOUT in JobScoreboard. Send readout to forwarders. Set job state as READOUT_STARTED in JobScoreboard. Wait to retrieve and process readout responses. :params parmas: A dictionary that stores info of a job. :return: None. """ reply_queue = params['REPLY_QUEUE'] readout_ack_id = params[ACK_ID] job_number = params[JOB_NUM] image_id = params[IMAGE_ID] # send readout to forwarders self.JOB_SCBD.set_value_for_job(job_number, 'STATE', 'PREPARE_READOUT') fwdr_readout_ack = self.get_next_timed_ack_id("AR_FWDR_READOUT_ACK") work_schedule = self.JOB_SCBD.get_work_schedule_for_job(job_number) fwdrs = work_schedule['FORWARDER_LIST'] self.send_readout(params, fwdrs, fwdr_readout_ack) self.JOB_SCBD.set_value_for_job(job_number, 'STATE', 'READOUT_STARTED') readout_responses = self.progressive_ack_timer(fwdr_readout_ack, len(fwdrs), 4.0) # if readout_responses == None: # raise L1 exception self.process_readout_responses(readout_ack_id, reply_queue, image_id, readout_responses) def process_end_readout(self, params): """ Set job state as PREPARE_READOUT in JobScoreboard. Send readout to forwarders. Set job state as READOUT_STARTED in JobScoreboard. Wait to retrieve and process readout responses. :params parmas: A dictionary that stores info of a job. :return: None. """ reply_queue = params['REPLY_QUEUE'] readout_ack_id = params[ACK_ID] job_number = params[JOB_NUM] image_id = params[IMAGE_ID] # send readout to forwarders self.JOB_SCBD.set_value_for_job(job_number, 'STATE', 'READOUT') fwdr_readout_ack = self.get_next_timed_ack_id( "AR_FWDR_END_READOUT_ACK") work_schedule = self.JOB_SCBD.get_work_schedule_for_job(job_number) fwdrs = work_schedule['FORWARDER_LIST'] len_fwdrs = len(fwdrs) msg = {} msg[MSG_TYPE] = 'AR_FWDR_END_READOUT' msg[JOB_NUM] = job_number msg[IMAGE_ID] = image_id for i in range(0, len_fwdrs): route_key = self.FWD_SCBD.get_value_for_forwarder( fwdrs[i], 'CONSUME_QUEUE') self._publisher.publish_message(route_key, msg) #readout_responses = self.progressive_ack_timer(fwdr_readout_ack, len(fwdrs), 4.0) # if readout_responses == None: # raise L1 exception #self.process_readout_responses(readout_ack_id, reply_queue, image_id, readout_responses) def process_readout_responses(self, readout_ack_id, reply_queue, image_id, readout_responses): """ From readout_responses param, retrieve image_id and job_number, and create list of ccd, filename, and checksum from all forwarders. Store into xfer_list_msg and send to archive to confirm each file made it intact. Send AR_READOUT_ACK message with results and ack_bool equals True to dmcs_ack_comsume queue. :params readout_ack_id: Ack id for AR_READOUT_ACK message. :params image_id: :params readout_responses: Readout responses from AckScoreboard. :return: None. """ job_number = None image_id = None confirm_ack = self.get_next_timed_ack_id('AR_ITEMS_XFERD_ACK') fwdrs = list(readout_responses.keys()) CCD_LIST = [] FILENAME_LIST = [] CHECKSUM_LIST = [] for fwdr in fwdrs: ccds = readout_responses[fwdr]['RESULT_LIST']['CCD_LIST'] num_ccds = len(ccds) fnames = readout_responses[fwdr]['RESULT_LIST']['FILENAME_LIST'] csums = readout_responses[fwdr]['RESULT_LIST']['CHECKSUM_LIST'] for i in range(0, num_ccds): msg = {} CCD_LIST.append(ccds[i]) FILENAME_LIST.append(fnames[i]) CHECKSUM_LIST.append(csums[i]) job_number = readout_responses[fwdr][JOB_NUM] image_id = readout_responses[fwdr]['IMAGE_ID'] xfer_list_msg = {} xfer_list_msg[MSG_TYPE] = 'AR_ITEMS_XFERD' xfer_list_msg[ACK_ID] = confirm_ack xfer_list_msg['IMAGE_ID'] = image_id xfer_list_msg['REPLY_QUEUE'] = self.AR_FOREMAN_ACK_PUBLISH xfer_list_msg['RESULT_LIST'] = {} xfer_list_msg['RESULT_LIST']['CCD_LIST'] = CCD_LIST xfer_list_msg['RESULT_LIST']['FILENAME_LIST'] = FILENAME_LIST xfer_list_msg['RESULT_LIST']['CHECKSUM_LIST'] = CHECKSUM_LIST self._publisher.publish_message(self.ARCHIVE_CTRL_CONSUME, xfer_list_msg) xfer_check_responses = self.progressive_ack_timer(confirm_ack, 1, 4.0) # if xfer_check_responses == None: # raise L1 exception and bail results = xfer_check_responses['ARCHIVE_CTRL']['RESULT_LIST'] ack_msg = {} ack_msg['MSG_TYPE'] = 'AR_READOUT_ACK' ack_msg['JOB_NUM'] = job_number ack_msg['COMPONENT'] = self.COMPONENT_NAME ack_msg['ACK_ID'] = readout_ack_id ack_msg['ACK_BOOL'] = True ack_msg['RESULT_LIST'] = results self._publisher.publish_message(reply_queue, ack_msg) ### FIXME Set state as complete for Job def send_readout(self, params, fwdrs, readout_ack): """ Send AR_FWDR_READOUT message to each forwarder working on the job with ar_foreman_ack_publish queue as reply queue. :params params: A dictionary that stores info of a job. :params readout_ack: Ack id for AR_FWDR_READOUT message. :return: None. """ ro_params = {} job_number = params['JOB_NUM'] ro_params['MSG_TYPE'] = 'AR_FWDR_READOUT' ro_params['JOB_NUM'] = job_number ro_params['SESSION_ID'] = self.get_current_session() ro_params['VISIT_ID'] = self.get_current_visit() ro_params['IMAGE_ID'] = params['IMAGE_ID'] ro_params['ACK_ID'] = readout_ack ro_params['REPLY_QUEUE'] = self.AR_FOREMAN_ACK_PUBLISH for fwdr in fwdrs: route_key = self.FWD_SCBD.get_value_for_forwarder( fwdr, "CONSUME_QUEUE") self._publisher.publish_message(route_key, ro_params) def take_images_done(self, params): reply_queue = params['REPLY_QUEUE'] readout_ack_id = params[ACK_ID] job_number = params[JOB_NUM] self.JOB_SCBD.set_value_for_job(job_number, 'STATE', 'TAKE_IMAGES_DONE') fwdr_readout_ack = self.get_next_timed_ack_id( "AR_FWDR_TAKE_IMAGES_DONE_ACK") work_schedule = self.JOB_SCBD.get_work_schedule_for_job(job_number) fwdrs = work_schedule['FORWARDER_LIST'] len_fwdrs = len(fwdrs) msg = {} msg[MSG_TYPE] = 'AR_FWDR_TAKE_IMAGES_DONE' msg[JOB_NUM] = job_number msg['REPLY_QUEUE'] = self.AR_FOREMAN_ACK_PUBLISH msg[ACK_ID] = fwdr_readout_ack for i in range(0, len_fwdrs): route_key = self.FWD_SCBD.get_value_for_forwarder( fwdrs[i], 'CONSUME_QUEUE') self._publisher.publish_message(route_key, msg) ### FIX Check Archive Controller # wait up to 15 sec for readout responses fwdr_readout_responses = self.progressive_ack_timer( fwdr_readout_ack, len_fwdrs, 15.0) fwdr_responses = list(fwdr_readout_responses.keys()) RESULT_SET = {} RESULT_SET['IMAGE_ID_LIST'] = [] RESULT_SET['CHECKSUM_LIST'] = [] RESULT_SET['FILENAME_LIST'] = {} for fwdr_comp in fwdr_responses: RESULT_SET['RAFT_PLUS_CCD_LIST'] += fwdr_comp['RESULT_SET'][ 'RAFT_PLUS_CCD_LIST'] RESULT_SET['CHECKSUM_LIST'] += fwdr_comp['RESULT_SET'][ 'CHECKSUM_LIST'] RESULT_SET['FILENAME_LIST'] += fwdr_comp['RESULT_SET'][ 'FILENAME_LIST'] ar_xferd_ack = self.get_next_timed_ack_id("AR_ITEMS_XFERD_ACK") arc_msg = {} arc_msg['MSG_TYPE'] = 'AR_ITEMS_XFERD' arc_msg['ACK_ID'] = ar_xferd_ack arc_msg['REPLY_QUEUE'] = self.AR_FOREMAN_ACK_PUBLISH arc_msg['RESULT_SET'] = RESULT_SET self._publisher.publish_message(self.ARCHIVE_CTRL_CONSUME, arc_msg) ar_ctrl_response = self.progressive_ack_timer(ar_xferd_ack, 1, 11.0) # wait up to 15 sec for Ar Ctrl response ### FIX Add Final Response to DMCS #send result set to DMCS #num_images - dmcs_msg = {} dmcs_msg['AR_TAKE_IMAGES_DONE_ACK'] dmcs_msg['ACK_ID'] = readout_ack_id dmcs_msg['ACK_BOOL'] = True dmcs_msg['JOB_NUM'] = job_number dmcs_msg['COMPONENT'] = self.COMPONENT_NAME dmcs_msg['RESULT_SET'] = ar_ctrl_response['RESULT_SET'] self._publisher.publish_message(reply_queue, dmcs_msg) def process_ack(self, params): """ Add new ACKS for a particular ACK_ID to the Ack Scoreboards where they are collated. :params: New ack to be checked in. :return: None. """ self.ACK_SCBD.add_timed_ack(params) def get_next_timed_ack_id(self, ack_type): """ Increment ack id by 1, and store it. Return ack id with ack type as a string. :params ack_type: Informational string to prepend Ack ID. :return retval: String with ack type followed by next ack id. """ self._next_timed_ack_id = self._next_timed_ack_id + 1 return (ack_type + "_" + str(self._next_timed_ack_id).zfill(6)) def set_session(self, params): """ Record new session in JobScoreboard. Send AR_NEW_SESSION_ACK message with ack_bool equals True to specified reply queue. :params params: Dictionary with info about new session. :return: None. """ self.JOB_SCBD.set_session(params['SESSION_ID']) ack_id = params['ACK_ID'] msg = {} msg['MSG_TYPE'] = 'AR_NEW_SESSION_ACK' msg['COMPONENT'] = self.COMPONENT_NAME msg['ACK_ID'] = ack_id msg['ACK_BOOL'] = True route_key = params['REPLY_QUEUE'] self._publisher.publish_message(route_key, msg) def get_current_session(self): """ Retreive current session from JobSocreboard. :params: None. :return: Current session returned by JobSocreboard. """ return self.JOB_SCBD.get_current_session() def set_visit(self, params): """ Set current visit_id in JobScoreboard. Send AR_NEXT_VISIT_ACK message with ack_bool equals True to specified reply queue. :params params: Message dictionary with info about new visit. :return: None. """ bore_sight = params['BORE_SIGHT'] self.JOB_SCBD.set_visit_id(params['VISIT_ID'], bore_sight) ack_id = params['ACK_ID'] msg = {} ## XXX FIXME Do something with the bore sight in params['BORE_SIGHT'] msg['MSG_TYPE'] = 'AR_NEXT_VISIT_ACK' msg['COMPONENT'] = self.COMPONENT_NAME msg['ACK_ID'] = ack_id msg['ACK_BOOL'] = True route_key = params['REPLY_QUEUE'] self._publisher.publish_message(route_key, msg) def get_current_visit(self): """ Retrieve current visit from JobSocreboard. :params: None. :return: Current visit returned by JobSocreboard. """ return self.JOB_SCBD.get_current_visit() def ack_timer(self, seconds): """ Sleeps for user-defined seconds. :params seconds: Time to sleep in seconds. :return: True. """ sleep(seconds) return True def progressive_ack_timer(self, ack_id, expected_replies, seconds): """ Sleeps for user-defined seconds, or less if everyone has reported back in. :params ack_id: Ack ID to wait for. :params expected_replies: Number of components expected to ack.. :params seconds: Maximum time to wait in seconds. :return: The dictionary that represents the responses from the components ack'ing. Note: If only one component will ack, this method breaks out of its loop after the one ack shows up - effectively beating the maximum wait time. """ counter = 0.0 while (counter < seconds): counter = counter + 0.5 sleep(0.5) response = self.ACK_SCBD.get_components_for_timed_ack(ack_id) if response == None: continue if len(list(response.keys())) == expected_replies: return response ## Try one final time response = self.ACK_SCBD.get_components_for_timed_ack(ack_id) if response == None: return None elif len(list(response.keys())) == expected_replies: return response else: return None def extract_config_values(self, filename): """ Parse system config yaml file. Throw error messages if Yaml file or key not found. :params: None. :return: True. """ LOGGER.info('Reading YAML Config file') try: cdm = loadYAMLConfigFile(filename) except IOError as e: LOGGER.critical("Unable to find CFG Yaml file %s\n" % self._config_file) sys.exit(101) try: self._msg_name = cdm[ROOT][ AFM_BROKER_NAME] # Message broker user & passwd self._msg_passwd = cdm[ROOT][AFM_BROKER_PASSWD] self._msg_pub_name = cdm[ROOT][ 'AFM_BROKER_PUB_NAME'] # Message broker user & passwd self._msg_pub_passwd = cdm[ROOT]['AFM_BROKER_PUB_PASSWD'] self._ncsa_name = cdm[ROOT][NCSA_BROKER_NAME] self._ncsa_passwd = cdm[ROOT][NCSA_BROKER_PASSWD] self._base_broker_addr = cdm[ROOT][BASE_BROKER_ADDR] self._ncsa_broker_addr = cdm[ROOT][NCSA_BROKER_ADDR] self._forwarder_dict = cdm[ROOT][XFER_COMPONENTS][ 'ARCHIVE_FORWARDERS'] self._scbd_dict = cdm[ROOT]['SCOREBOARDS'] # Placeholder until eventually worked out by Data Backbone team self.archive_fqn = cdm[ROOT]['ARCHIVE']['ARCHIVE_NAME'] self.archive_name = cdm[ROOT]['ARCHIVE']['ARCHIVE_LOGIN'] self.archive_ip = cdm[ROOT]['ARCHIVE']['ARCHIVE_IP'] except KeyError as e: print("Dictionary error") print("Bailing out...") sys.exit(99) self._base_msg_format = 'YAML' if 'BASE_MSG_FORMAT' in cdm[ROOT]: self._base_msg_format = cdm[ROOT]['BASE_MSG_FORMAT'] def setup_consumer_threads(self): """ Create ThreadManager object with base broker url and kwargs to setup consumers. :params: None. :return: None. """ base_broker_url = "amqp://" + self._msg_name + ":" + \ self._msg_passwd + "@" + \ str(self._base_broker_addr) LOGGER.info('Building _base_broker_url. Result is %s', base_broker_url) self.shutdown_event = threading.Event() self.shutdown_event.clear() # Set up kwargs that describe consumers to be started # The Archive Device needs three message consumers kws = {} md = {} md['amqp_url'] = base_broker_url md['name'] = 'Thread-ar_foreman_consume' md['queue'] = 'ar_foreman_consume' md['callback'] = self.on_ar_foreman_message md['format'] = "YAML" md['test_val'] = None kws[md['name']] = md md = {} md['amqp_url'] = base_broker_url md['name'] = 'Thread-ar_foreman_ack_publish' md['queue'] = 'ar_foreman_ack_publish' md['callback'] = self.on_ack_message md['format'] = "YAML" md['test_val'] = 'test_it' kws[md['name']] = md md = {} md['amqp_url'] = base_broker_url md['name'] = 'Thread-archive_ctrl_publish' md['queue'] = 'archive_ctrl_publish' md['callback'] = self.on_archive_message md['format'] = "YAML" md['test_val'] = 'test_it' kws[md['name']] = md self.thread_manager = ThreadManager('thread-manager', kws, self.shutdown_event) self.thread_manager.start() def setup_scoreboards(self): """ Create Redis Forwarder table with Forwarder info. Create Job and Ack Scoreboard objects with values retrieved from configuration file. :params: None. :return: None. """ # Create Redis Forwarder table with Forwarder info self.FWD_SCBD = ForwarderScoreboard('AR_FWD_SCBD', self._scbd_dict['AR_FWD_SCBD'], self._forwarder_dict) self.JOB_SCBD = JobScoreboard('AR_JOB_SCBD', self._scbd_dict['AR_JOB_SCBD']) self.ACK_SCBD = AckScoreboard('AR_ACK_SCBD', self._scbd_dict['AR_ACK_SCBD']) def shutdown(self): LOGGER.info("Shutting down Consumer threads.") self.shutdown_event.set() LOGGER.debug("Thread Manager shutting down and app exiting...") print("\n") os._exit(0)
class NcsaForeman(iip_base): NCSA_CONSUME = "ncsa_consume" NCSA_PUBLISH = "ncsa_publish" COMPONENT_NAME = 'NCSA_FOREMAN' DISTRIBUTOR_PUBLISH = "distributor_publish" ACK_PUBLISH = "ack_publish" prp = toolsmod.prp def __init__(self, filename): toolsmod.singleton(self) print('Extracting values from configuation dictionary %s' % filename) cdm = self.extract_config_values(filename) logging_dir = cdm[ROOT].get('LOGGING_DIR', None) log_file = self.setupLogging(logging_dir, 'NcsaForeman.log') print("Logs will be written to %s" % log_file) self._msg_actions = { 'NCSA_NEXT_VISIT': self.set_visit, 'NCSA_NEW_SESSION': self.set_session, 'NCSA_START_INTEGRATION': self.process_start_integration, 'NCSA_READOUT': self.process_readout, 'DISTRIBUTOR_HEALTH_CHECK_ACK': self.process_ack, 'DISTRIBUTOR_XFER_PARAMS_ACK': self.process_ack, 'DISTRIBUTOR_READOUT_ACK': self.process_ack } self._next_timed_ack_id = 10000 self.setup_publishers() self.setup_scoreboards() self.setup_publishers() self.setup_consumer_threads() LOGGER.info('Ncsa foreman consumer setup') self.thread_manager = None self.setup_consumer_threads() LOGGER.info('Ncsa Foreman Init complete') def setup_publishers(self): self._pub_base_broker_url = "amqp://" + self._pub_base_name + ":" + \ self._pub_base_passwd + "@" + \ str(self._base_broker_addr) self._pub_ncsa_broker_url = "amqp://" + self._pub_ncsa_name + ":" + \ self._pub_ncsa_passwd + "@" + \ str(self._ncsa_broker_addr) LOGGER.info('Setting up Base publisher on %s using %s', \ self._pub_base_broker_url, self._base_msg_format) self._base_publisher = SimplePublisher(self._pub_base_broker_url, self._base_msg_format) LOGGER.info('Setting up NCSA publisher on %s using %s', \ self._pub_ncsa_broker_url, self._ncsa_msg_format) self._ncsa_publisher = SimplePublisher(self._pub_ncsa_broker_url, self._ncsa_msg_format) def on_pp_message(self,ch, method, properties, body): ch.basic_ack(method.delivery_tag) msg_dict = body LOGGER.debug('Message from PP callback message body is: %s', self.prp.pformat(msg_dict)) handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def on_ack_message(self, ch, method, properties, body): ch.basic_ack(method.delivery_tag) msg_dict = body LOGGER.info('In ACK message callback') LOGGER.debug('Message from ACK callback message body is: %s', self.prp.pformat(msg_dict)) handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def set_visit(self, params): bore_sight = params['BORE_SIGHT'] visit_id = params['VISIT_ID'] self.JOB_SCBD.set_visit_id(visit_id, bore_sight) ack_id = params['ACK_ID'] msg = {} ### ### Send Boresight to Someone here... ### msg['MSG_TYPE'] = 'NCSA_NEXT_VISIT_ACK' msg['COMPONENT'] = self.COMPONENT_NAME msg['ACK_ID'] = ack_id msg['ACK_BOOL'] = True route_key = params['REPLY_QUEUE'] self._base_publisher.publish_message(route_key, msg) def set_session(self, params): self.JOB_SCBD.set_session(params['SESSION_ID']) ack_id = params['ACK_ID'] msg = {} msg['MSG_TYPE'] = 'NCSA_NEW_SESSION_ACK' msg['COMPONENT'] = self.COMPONENT_NAME msg['ACK_ID'] = ack_id msg['ACK_BOOL'] = True route_key = params['REPLY_QUEUE'] self._base_publisher.publish_message(route_key, msg) def process_start_integration(self, params): job_num = str(params[JOB_NUM]) image_id = params['IMAGE_ID'] visit_id = params['VISIT_ID'] response_timed_ack_id = params["ACK_ID"] LOGGER.info('NCSA received Start Integration message from Base') LOGGER.debug('NCSA Start Integration incoming message: %s' % params) forwarders_list = params['FORWARDERS']['FORWARDER_LIST'] ccd_list = params['FORWARDERS']['CCD_LIST'] # A list of lists... len_forwarders_list = len(forwarders_list) self.JOB_SCBD.add_job(job_num, image_id, visit_id, ccd_list) LOGGER.info('Received new job %s. Needed workers is %s', job_num, str(len_forwarders_list)) # run distributor health check # get timed_ack_id timed_ack = self.get_next_timed_ack_id("DISTRIBUTOR_HEALTH_CHECK_ACK") distributors = self.DIST_SCBD.return_distributors_list() # Mark all healthy distributors Unknown state_unknown = {"STATE": "HEALTH_CHECK", "STATUS": "UNKNOWN"} self.DIST_SCBD.set_distributor_params(distributors, state_unknown) # send health check messages ack_params = {} ack_params[MSG_TYPE] = "DISTRIBUTOR_HEALTH_CHECK" ack_params['REPLY_QUEUE'] = 'ncsa_foreman_ack_publish' ack_params["ACK_ID"] = timed_ack ack_params[JOB_NUM] = job_num for distributor in distributors: self._ncsa_publisher.publish_message(self.DIST_SCBD.get_value_for_distributor (distributor,"CONSUME_QUEUE"), ack_params) # start timers self.ack_timer(2) # at end of timer, get list of distributors dicts_of_distributors = self.ACK_SCBD.get_components_for_timed_ack(timed_ack) healthy_distributors = list(dicts_of_distributors.keys()) # update distributor scoreboard with healthy distributors healthy_status = {"STATUS": "HEALTHY"} self.DIST_SCBD.set_distributor_params(healthy_distributors, healthy_status) num_healthy_distributors = len(healthy_distributors) if len_forwarders_list > num_healthy_distributors: print("Cannot Do Job - more fwdrs than dists") # send response msg to base refusing job LOGGER.info('Reporting to base insufficient healthy distributors for job #%s', job_num) ncsa_params = {} ncsa_params[MSG_TYPE] = "NCSA_RESOURCES_QUERY_ACK" ncsa_params[JOB_NUM] = job_num ncsa_params["ACK_BOOL"] = False ncsa_params["ACK_ID"] = response_timed_ack_id self._base_publisher.publish_message(NCSA_PUBLISH, yaml.dump(ncsa_params)) # delete job and leave distributors in Idle state self.JOB_SCBD.delete_job(job_num) idle_state = {"STATE": "IDLE"} self.DIST_SCBD.set_distributor_params(healthy_distributors, idle_state) else: Pairs = self.assemble_pairs(forwarders_list, ccd_list, healthy_distributors) self.JOB_SCBD.set_pairs_for_job(job_num, Pairs) # send pair info to each distributor job_params_ack = self.get_next_timed_ack_id('DISTRIBUTOR_XFER_PARAMS_ACK') for j in range(0, len(Pairs)): tmp_msg = {} tmp_msg[MSG_TYPE] = 'DISTRIBUTOR_XFER_PARAMS' tmp_msg['XFER_PARAMS'] = Pairs[j] tmp_msg[JOB_NUM] = job_num tmp_msg[ACK_ID] = job_params_ack tmp_msg['REPLY_QUEUE'] = 'ncsa_foreman_ack_publish' tmp_msg['VISIT_ID'] = visit_id tmp_msg['IMAGE_ID'] = image_id fqn = Pairs[j]['DISTRIBUTOR']['FQN'] route_key = self.DIST_SCBD.get_value_for_distributor(fqn, 'CONSUME_QUEUE') self._ncsa_publisher.publish_message(route_key, tmp_msg) self.DIST_SCBD.set_distributor_params(healthy_distributors, {STATE: IN_READY_STATE}) dist_params_response = self.progressive_ack_timer(job_params_ack, num_healthy_distributors, 2.0) if dist_params_response == None: print("RECEIVED NO ACK RESPONSES FROM DISTRIBUTORS AFTER SENDING XFER PARAMS") pass #Do something such as raise a system wide exception # Now inform PP Foreman that all is in ready state ncsa_params = {} ncsa_params[MSG_TYPE] = "NCSA_START_INTEGRATION_ACK" ncsa_params[JOB_NUM] = job_num ncsa_params['IMAGE_ID'] = image_id ncsa_params['VISIT_ID'] = visit_id ncsa_params['SESSION_ID'] = params['SESSION_ID'] ncsa_params['COMPONENT'] = 'NCSA_FOREMAN' ncsa_params[ACK_BOOL] = True ncsa_params["ACK_ID"] = response_timed_ack_id ncsa_params["PAIRS"] = Pairs self._base_publisher.publish_message(params['REPLY_QUEUE'], ncsa_params) LOGGER.info('Sufficient distributors and workers are available. Informing Base') LOGGER.debug('NCSA Start Integration incoming message: %s' % ncsa_params) LOGGER.info('The following pairings have been sent to the Base for job %s:' % job_num) LOGGER.info(Pairs) def assemble_pairs(self, forwarders_list, ccd_list, healthy_distributors): #build dict... PAIRS = [] for i in range (0, len(forwarders_list)): tmp_dict = {} sub_dict = {} tmp_dict['FORWARDER'] = forwarders_list[i] tmp_dict['CCD_LIST'] = ccd_list[i] tmp_dict['DISTRIBUTOR'] = {} distributor = healthy_distributors[i] sub_dict['FQN'] = distributor sub_dict['HOSTNAME'] = self.DIST_SCBD.get_value_for_distributor(distributor, HOSTNAME) sub_dict['NAME'] = self.DIST_SCBD.get_value_for_distributor(distributor, NAME) sub_dict['IP_ADDR'] = self.DIST_SCBD.get_value_for_distributor(distributor, IP_ADDR) sub_dict['TARGET_DIR'] = self.DIST_SCBD.get_value_for_distributor(distributor, TARGET_DIR) tmp_dict['DISTRIBUTOR'] = sub_dict PAIRS.append(tmp_dict) return PAIRS def process_readout(self, params): job_number = params[JOB_NUM] response_ack_id = params[ACK_ID] pairs = self.JOB_SCBD.get_pairs_for_job(job_number) sleep(3) len_pairs = len(pairs) ack_id = self.get_next_timed_ack_id(DISTRIBUTOR_READOUT_ACK) # The following line extracts the distributor FQNs from pairs dict using # list comprehension values; faster than for loops # distributors = [v['FQN'] for v in list(pairs.values())] for i in range(0, len_pairs): # Pairs is a list of dictionaries distributor = pairs[i]['DISTRIBUTOR']['FQN'] msg_params = {} msg_params[MSG_TYPE] = DISTRIBUTOR_READOUT msg_params[JOB_NUM] = job_number msg_params['REPLY_QUEUE'] = 'ncsa_foreman_ack_publish' msg_params[ACK_ID] = ack_id routing_key = self.DIST_SCBD.get_routing_key(distributor) self.DIST_SCBD.set_distributor_state(distributor, 'START_READOUT') self._ncsa_publisher.publish_message(routing_key, msg_params) distributor_responses = self.progressive_ack_timer(ack_id, len_pairs, 24) if distributor_responses != None: RESULT_LIST = {} CCD_LIST = [] RECEIPT_LIST = [] ncsa_params = {} ncsa_params[MSG_TYPE] = NCSA_READOUT_ACK ncsa_params[JOB_NUM] = job_number ncsa_params['IMAGE_ID'] = params['IMAGE_ID'] ncsa_params['VISIT_ID'] = params['VISIT_ID'] ncsa_params['SESSION_ID'] = params['SESSION_ID'] ncsa_params['COMPONENT'] = 'NCSA_FOREMAN' ncsa_params[ACK_ID] = response_ack_id ncsa_params[ACK_BOOL] = True distributors = list(distributor_responses.keys()) for dist in distributors: ccd_list = distributor_responses[dist]['RESULT_LIST']['CCD_LIST'] receipt_list = distributor_responses[dist]['RESULT_LIST']['RECEIPT_LIST'] for i in range (0, len(ccd_list)): CCD_LIST.append(ccd_list[i]) RECEIPT_LIST.append(receipt_list[i]) RESULT_LIST['CCD_LIST'] = CCD_LIST RESULT_LIST['RECEIPT_LIST'] = RECEIPT_LIST ncsa_params['RESULT_LIST'] = RESULT_LIST self._base_publisher.publish_message(params['REPLY_QUEUE'], msg_params) else: ncsa_params = {} ncsa_params[MSG_TYPE] = NCSA_READOUT_ACK ncsa_params[JOB_NUM] = job_number ncsa_params['COMPONENT_NAME'] = NCSA_FOREMAN ncsa_params['IMAGE_ID'] = params['IMAGE_ID'] ncsa_params['VISIT_ID'] = params['VISIT_ID'] ncsa_params['SESSION_ID'] = params['SESSION_ID'] ncsa_params[ACK_ID] = response_ack_id ncsa_params[ACK_BOOL] = FALSE ncsa_params['RESULT_LIST'] = {} ncsa_params['RESULT_LIST']['CCD_LIST'] = None ncsa_params['RESULT_LIST']['RECEIPT_LIST'] = None self._base_publisher.publish_message(params['REPLY_QUEUE'], msg_params) def process_ack(self, params): self.ACK_SCBD.add_timed_ack(params) def get_next_timed_ack_id(self, ack_type): self._next_timed_ack_id = self._next_timed_ack_id + 1 retval = ack_type + "_" + str(self._next_timed_ack_id).zfill(6) return retval def ack_timer(self, seconds): sleep(seconds) return True def progressive_ack_timer(self, ack_id, expected_replies, seconds): counter = 0.0 while (counter < seconds): counter = counter + 0.5 sleep(0.5) response = self.ACK_SCBD.get_components_for_timed_ack(ack_id) if response == None: continue if len(list(response.keys())) == expected_replies: return response ## Try one final time response = self.ACK_SCBD.get_components_for_timed_ack(ack_id) if response == None: return None elif len(list(response.keys())) == expected_replies: return response else: return None def extract_config_values(self, filename): cdm = None try: cdm = self.loadConfigFile(filename) except IOError as e: LOGGER.critical("Unable to find CFG Yaml file %s\n") sys.exit(101) try: self._base_broker_addr = cdm[ROOT][BASE_BROKER_ADDR] self._ncsa_broker_addr = cdm[ROOT][NCSA_BROKER_ADDR] self._sub_base_name = cdm[ROOT]['NFM_BASE_BROKER_NAME'] # Message broker user & passwd self._sub_base_passwd = cdm[ROOT]['NFM_BASE_BROKER_PASSWD'] self._sub_ncsa_name = cdm[ROOT]['NFM_NCSA_BROKER_NAME'] # Message broker user & passwd self._sub_ncsa_passwd = cdm[ROOT]['NFM_NCSA_BROKER_PASSWD'] self._pub_base_name = cdm[ROOT]['BASE_BROKER_PUB_NAME'] self._pub_base_passwd = cdm[ROOT]['BASE_BROKER_PUB_PASSWD'] self._pub_ncsa_name = cdm[ROOT]['NCSA_BROKER_PUB_NAME'] self._pub_ncsa_passwd = cdm[ROOT]['NCSA_BROKER_PUB_PASSWD'] self._scbd_dict = cdm[ROOT]['SCOREBOARDS'] self.distributor_dict = cdm[ROOT][XFER_COMPONENTS][DISTRIBUTORS] except KeyError as e: LOGGER.critical("CDM Dictionary error - missing Key") LOGGER.critical("Offending Key is %s", str(e)) LOGGER.critical("Bailing Out...") sys.exit(99) self._base_msg_format = 'YAML' self._ncsa_msg_format = 'YAML' if 'BASE_MSG_FORMAT' in cdm[ROOT]: self._base_msg_format = cdm[ROOT][BASE_MSG_FORMAT] if 'NCSA_MSG_FORMAT' in cdm[ROOT]: self._ncsa_msg_format = cdm[ROOT][NCSA_MSG_FORMAT] return cdm def setup_consumer_threads(self): LOGGER.info('Building _base_broker_url') base_broker_url = "amqp://" + self._sub_base_name + ":" + \ self._sub_base_passwd + "@" + \ str(self._base_broker_addr) ncsa_broker_url = "amqp://" + self._sub_ncsa_name + ":" + \ self._sub_ncsa_passwd + "@" + \ str(self._ncsa_broker_addr) self.shutdown_event = threading.Event() # Set up kwargs that describe consumers to be started # The Archive Device needs three message consumers kws = {} md = {} md['amqp_url'] = ncsa_broker_url md['name'] = 'Thread-ncsa_foreman_ack_publish' md['queue'] = 'ncsa_foreman_ack_publish' md['callback'] = self.on_ack_message md['format'] = "YAML" md['test_val'] = 'test_it' kws[md['name']] = md md = {} md['amqp_url'] = base_broker_url md['name'] = 'Thread-ncsa_consume' md['queue'] = 'ncsa_consume' md['callback'] = self.on_pp_message md['format'] = "YAML" md['test_val'] = 'test_it' kws[md['name']] = md self.thread_manager = ThreadManager('thread-manager', self.shutdown_event) self.thread_manager.start() self.thread_manager.add_thread_groups(kws) def setup_scoreboards(self): # Create Redis Distributor table with Distributor info self.DIST_SCBD = DistributorScoreboard('NCSA_DIST_SCBD', \ self._scbd_dict['NCSA_DIST_SCBD'], \ self.distributor_dict) self.JOB_SCBD = JobScoreboard('NCSA_JOB_SCBD', self._scbd_dict['NCSA_JOB_SCBD']) self.ACK_SCBD = AckScoreboard('NCSA_ACK_SCBD', self._scbd_dict['NCSA_ACK_SCBD']) def shutdown(self): LOGGER.debug("NCSA Foreman: Shutting down Consumer threads.") self.shutdown_event.set() LOGGER.debug("Thread Manager shutting down and app exiting...") print("\n") os._exit(0)
class NcsaForeman(iip_base): NCSA_CONSUME = "ncsa_consume" NCSA_PUBLISH = "ncsa_publish" COMPONENT_NAME = 'NCSA_FOREMAN' DISTRIBUTOR_PUBLISH = "distributor_publish" ACK_PUBLISH = "ack_publish" prp = toolsmod.prp def __init__(self, filename): toolsmod.singleton(self) print('Extracting values from configuation dictionary %s' % filename) cdm = self.extract_config_values(filename) logging_dir = cdm[ROOT].get('LOGGING_DIR', None) log_file = self.setupLogging(logging_dir, 'NcsaForeman.log') print("Logs will be written to %s" % log_file) self._msg_actions = { 'NCSA_NEXT_VISIT': self.set_visit, 'NCSA_NEW_SESSION': self.set_session, 'NCSA_START_INTEGRATION': self.process_start_integration, 'NCSA_READOUT': self.process_readout, 'DISTRIBUTOR_HEALTH_CHECK_ACK': self.process_ack, 'DISTRIBUTOR_XFER_PARAMS_ACK': self.process_ack, 'DISTRIBUTOR_READOUT_ACK': self.process_ack } self._next_timed_ack_id = 10000 self.setup_publishers() self.setup_scoreboards() self.setup_publishers() self.setup_consumer_threads() LOGGER.info('Ncsa foreman consumer setup') self.thread_manager = None self.setup_consumer_threads() LOGGER.info('Ncsa Foreman Init complete') def setup_publishers(self): self._pub_base_broker_url = "amqp://" + self._pub_base_name + ":" + \ self._pub_base_passwd + "@" + \ str(self._base_broker_addr) self._pub_ncsa_broker_url = "amqp://" + self._pub_ncsa_name + ":" + \ self._pub_ncsa_passwd + "@" + \ str(self._ncsa_broker_addr) LOGGER.info('Setting up Base publisher on %s using %s', \ self._pub_base_broker_url, self._base_msg_format) self._base_publisher = SimplePublisher(self._pub_base_broker_url, self._base_msg_format) LOGGER.info('Setting up NCSA publisher on %s using %s', \ self._pub_ncsa_broker_url, self._ncsa_msg_format) self._ncsa_publisher = SimplePublisher(self._pub_ncsa_broker_url, self._ncsa_msg_format) def on_pp_message(self, ch, method, properties, body): ch.basic_ack(method.delivery_tag) msg_dict = body LOGGER.debug('Message from PP callback message body is: %s', self.prp.pformat(msg_dict)) handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def on_ack_message(self, ch, method, properties, body): ch.basic_ack(method.delivery_tag) msg_dict = body LOGGER.info('In ACK message callback') LOGGER.debug('Message from ACK callback message body is: %s', self.prp.pformat(msg_dict)) handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def set_visit(self, params): bore_sight = params['BORE_SIGHT'] visit_id = params['VISIT_ID'] self.JOB_SCBD.set_visit_id(visit_id, bore_sight) ack_id = params['ACK_ID'] msg = {} ### ### Send Boresight to Someone here... ### msg['MSG_TYPE'] = 'NCSA_NEXT_VISIT_ACK' msg['COMPONENT'] = self.COMPONENT_NAME msg['ACK_ID'] = ack_id msg['ACK_BOOL'] = True route_key = params['REPLY_QUEUE'] self._base_publisher.publish_message(route_key, msg) def set_session(self, params): self.JOB_SCBD.set_session(params['SESSION_ID']) ack_id = params['ACK_ID'] msg = {} msg['MSG_TYPE'] = 'NCSA_NEW_SESSION_ACK' msg['COMPONENT'] = self.COMPONENT_NAME msg['ACK_ID'] = ack_id msg['ACK_BOOL'] = True route_key = params['REPLY_QUEUE'] self._base_publisher.publish_message(route_key, msg) def process_start_integration(self, params): job_num = str(params[JOB_NUM]) image_id = params['IMAGE_ID'] visit_id = params['VISIT_ID'] response_timed_ack_id = params["ACK_ID"] LOGGER.info('NCSA received Start Integration message from Base') LOGGER.debug('NCSA Start Integration incoming message: %s' % params) forwarders_list = params['FORWARDERS']['FORWARDER_LIST'] ccd_list = params['FORWARDERS']['CCD_LIST'] # A list of lists... len_forwarders_list = len(forwarders_list) self.JOB_SCBD.add_job(job_num, image_id, visit_id, ccd_list) LOGGER.info('Received new job %s. Needed workers is %s', job_num, str(len_forwarders_list)) # run distributor health check # get timed_ack_id timed_ack = self.get_next_timed_ack_id("DISTRIBUTOR_HEALTH_CHECK_ACK") distributors = self.DIST_SCBD.return_distributors_list() # Mark all healthy distributors Unknown state_unknown = {"STATE": "HEALTH_CHECK", "STATUS": "UNKNOWN"} self.DIST_SCBD.set_distributor_params(distributors, state_unknown) # send health check messages ack_params = {} ack_params[MSG_TYPE] = "DISTRIBUTOR_HEALTH_CHECK" ack_params['REPLY_QUEUE'] = 'ncsa_foreman_ack_publish' ack_params["ACK_ID"] = timed_ack ack_params[JOB_NUM] = job_num for distributor in distributors: self._ncsa_publisher.publish_message( self.DIST_SCBD.get_value_for_distributor( distributor, "CONSUME_QUEUE"), ack_params) # start timers self.ack_timer(2) # at end of timer, get list of distributors dicts_of_distributors = self.ACK_SCBD.get_components_for_timed_ack( timed_ack) healthy_distributors = list(dicts_of_distributors.keys()) # update distributor scoreboard with healthy distributors healthy_status = {"STATUS": "HEALTHY"} self.DIST_SCBD.set_distributor_params(healthy_distributors, healthy_status) num_healthy_distributors = len(healthy_distributors) if len_forwarders_list > num_healthy_distributors: print("Cannot Do Job - more fwdrs than dists") # send response msg to base refusing job LOGGER.info( 'Reporting to base insufficient healthy distributors for job #%s', job_num) ncsa_params = {} ncsa_params[MSG_TYPE] = "NCSA_RESOURCES_QUERY_ACK" ncsa_params[JOB_NUM] = job_num ncsa_params["ACK_BOOL"] = False ncsa_params["ACK_ID"] = response_timed_ack_id self._base_publisher.publish_message(NCSA_PUBLISH, yaml.dump(ncsa_params)) # delete job and leave distributors in Idle state self.JOB_SCBD.delete_job(job_num) idle_state = {"STATE": "IDLE"} self.DIST_SCBD.set_distributor_params(healthy_distributors, idle_state) else: Pairs = self.assemble_pairs(forwarders_list, ccd_list, healthy_distributors) self.JOB_SCBD.set_pairs_for_job(job_num, Pairs) # send pair info to each distributor job_params_ack = self.get_next_timed_ack_id( 'DISTRIBUTOR_XFER_PARAMS_ACK') for j in range(0, len(Pairs)): tmp_msg = {} tmp_msg[MSG_TYPE] = 'DISTRIBUTOR_XFER_PARAMS' tmp_msg['XFER_PARAMS'] = Pairs[j] tmp_msg[JOB_NUM] = job_num tmp_msg[ACK_ID] = job_params_ack tmp_msg['REPLY_QUEUE'] = 'ncsa_foreman_ack_publish' tmp_msg['VISIT_ID'] = visit_id tmp_msg['IMAGE_ID'] = image_id fqn = Pairs[j]['DISTRIBUTOR']['FQN'] route_key = self.DIST_SCBD.get_value_for_distributor( fqn, 'CONSUME_QUEUE') self._ncsa_publisher.publish_message(route_key, tmp_msg) self.DIST_SCBD.set_distributor_params(healthy_distributors, {STATE: IN_READY_STATE}) dist_params_response = self.progressive_ack_timer( job_params_ack, num_healthy_distributors, 2.0) if dist_params_response == None: print( "RECEIVED NO ACK RESPONSES FROM DISTRIBUTORS AFTER SENDING XFER PARAMS" ) pass #Do something such as raise a system wide exception # Now inform PP Foreman that all is in ready state ncsa_params = {} ncsa_params[MSG_TYPE] = "NCSA_START_INTEGRATION_ACK" ncsa_params[JOB_NUM] = job_num ncsa_params['IMAGE_ID'] = image_id ncsa_params['VISIT_ID'] = visit_id ncsa_params['SESSION_ID'] = params['SESSION_ID'] ncsa_params['COMPONENT'] = 'NCSA_FOREMAN' ncsa_params[ACK_BOOL] = True ncsa_params["ACK_ID"] = response_timed_ack_id ncsa_params["PAIRS"] = Pairs self._base_publisher.publish_message(params['REPLY_QUEUE'], ncsa_params) LOGGER.info( 'Sufficient distributors and workers are available. Informing Base' ) LOGGER.debug('NCSA Start Integration incoming message: %s' % ncsa_params) LOGGER.info( 'The following pairings have been sent to the Base for job %s:' % job_num) LOGGER.info(Pairs) def assemble_pairs(self, forwarders_list, ccd_list, healthy_distributors): #build dict... PAIRS = [] for i in range(0, len(forwarders_list)): tmp_dict = {} sub_dict = {} tmp_dict['FORWARDER'] = forwarders_list[i] tmp_dict['CCD_LIST'] = ccd_list[i] tmp_dict['DISTRIBUTOR'] = {} distributor = healthy_distributors[i] sub_dict['FQN'] = distributor sub_dict['HOSTNAME'] = self.DIST_SCBD.get_value_for_distributor( distributor, HOSTNAME) sub_dict['NAME'] = self.DIST_SCBD.get_value_for_distributor( distributor, NAME) sub_dict['IP_ADDR'] = self.DIST_SCBD.get_value_for_distributor( distributor, IP_ADDR) sub_dict['TARGET_DIR'] = self.DIST_SCBD.get_value_for_distributor( distributor, TARGET_DIR) tmp_dict['DISTRIBUTOR'] = sub_dict PAIRS.append(tmp_dict) return PAIRS def process_readout(self, params): job_number = params[JOB_NUM] response_ack_id = params[ACK_ID] pairs = self.JOB_SCBD.get_pairs_for_job(job_number) sleep(3) len_pairs = len(pairs) ack_id = self.get_next_timed_ack_id(DISTRIBUTOR_READOUT_ACK) # The following line extracts the distributor FQNs from pairs dict using # list comprehension values; faster than for loops # distributors = [v['FQN'] for v in list(pairs.values())] for i in range(0, len_pairs): # Pairs is a list of dictionaries distributor = pairs[i]['DISTRIBUTOR']['FQN'] msg_params = {} msg_params[MSG_TYPE] = DISTRIBUTOR_READOUT msg_params[JOB_NUM] = job_number msg_params['REPLY_QUEUE'] = 'ncsa_foreman_ack_publish' msg_params[ACK_ID] = ack_id routing_key = self.DIST_SCBD.get_routing_key(distributor) self.DIST_SCBD.set_distributor_state(distributor, 'START_READOUT') self._ncsa_publisher.publish_message(routing_key, msg_params) distributor_responses = self.progressive_ack_timer( ack_id, len_pairs, 24) if distributor_responses != None: RESULT_LIST = {} CCD_LIST = [] RECEIPT_LIST = [] ncsa_params = {} ncsa_params[MSG_TYPE] = NCSA_READOUT_ACK ncsa_params[JOB_NUM] = job_number ncsa_params['IMAGE_ID'] = params['IMAGE_ID'] ncsa_params['VISIT_ID'] = params['VISIT_ID'] ncsa_params['SESSION_ID'] = params['SESSION_ID'] ncsa_params['COMPONENT'] = 'NCSA_FOREMAN' ncsa_params[ACK_ID] = response_ack_id ncsa_params[ACK_BOOL] = True distributors = list(distributor_responses.keys()) for dist in distributors: ccd_list = distributor_responses[dist]['RESULT_LIST'][ 'CCD_LIST'] receipt_list = distributor_responses[dist]['RESULT_LIST'][ 'RECEIPT_LIST'] for i in range(0, len(ccd_list)): CCD_LIST.append(ccd_list[i]) RECEIPT_LIST.append(receipt_list[i]) RESULT_LIST['CCD_LIST'] = CCD_LIST RESULT_LIST['RECEIPT_LIST'] = RECEIPT_LIST ncsa_params['RESULT_LIST'] = RESULT_LIST self._base_publisher.publish_message(params['REPLY_QUEUE'], msg_params) else: ncsa_params = {} ncsa_params[MSG_TYPE] = NCSA_READOUT_ACK ncsa_params[JOB_NUM] = job_number ncsa_params['COMPONENT_NAME'] = NCSA_FOREMAN ncsa_params['IMAGE_ID'] = params['IMAGE_ID'] ncsa_params['VISIT_ID'] = params['VISIT_ID'] ncsa_params['SESSION_ID'] = params['SESSION_ID'] ncsa_params[ACK_ID] = response_ack_id ncsa_params[ACK_BOOL] = FALSE ncsa_params['RESULT_LIST'] = {} ncsa_params['RESULT_LIST']['CCD_LIST'] = None ncsa_params['RESULT_LIST']['RECEIPT_LIST'] = None self._base_publisher.publish_message(params['REPLY_QUEUE'], msg_params) def process_ack(self, params): self.ACK_SCBD.add_timed_ack(params) def get_next_timed_ack_id(self, ack_type): self._next_timed_ack_id = self._next_timed_ack_id + 1 retval = ack_type + "_" + str(self._next_timed_ack_id).zfill(6) return retval def ack_timer(self, seconds): sleep(seconds) return True def progressive_ack_timer(self, ack_id, expected_replies, seconds): counter = 0.0 while (counter < seconds): counter = counter + 0.5 sleep(0.5) response = self.ACK_SCBD.get_components_for_timed_ack(ack_id) if response == None: continue if len(list(response.keys())) == expected_replies: return response ## Try one final time response = self.ACK_SCBD.get_components_for_timed_ack(ack_id) if response == None: return None elif len(list(response.keys())) == expected_replies: return response else: return None def extract_config_values(self, filename): cdm = None try: cdm = self.loadConfigFile(filename) except IOError as e: LOGGER.critical("Unable to find CFG Yaml file %s\n") sys.exit(101) try: self._base_broker_addr = cdm[ROOT][BASE_BROKER_ADDR] self._ncsa_broker_addr = cdm[ROOT][NCSA_BROKER_ADDR] self._sub_base_name = cdm[ROOT][ 'NFM_BASE_BROKER_NAME'] # Message broker user & passwd self._sub_base_passwd = cdm[ROOT]['NFM_BASE_BROKER_PASSWD'] self._sub_ncsa_name = cdm[ROOT][ 'NFM_NCSA_BROKER_NAME'] # Message broker user & passwd self._sub_ncsa_passwd = cdm[ROOT]['NFM_NCSA_BROKER_PASSWD'] self._pub_base_name = cdm[ROOT]['BASE_BROKER_PUB_NAME'] self._pub_base_passwd = cdm[ROOT]['BASE_BROKER_PUB_PASSWD'] self._pub_ncsa_name = cdm[ROOT]['NCSA_BROKER_PUB_NAME'] self._pub_ncsa_passwd = cdm[ROOT]['NCSA_BROKER_PUB_PASSWD'] self._scbd_dict = cdm[ROOT]['SCOREBOARDS'] self.distributor_dict = cdm[ROOT][XFER_COMPONENTS][DISTRIBUTORS] except KeyError as e: LOGGER.critical("CDM Dictionary error - missing Key") LOGGER.critical("Offending Key is %s", str(e)) LOGGER.critical("Bailing Out...") sys.exit(99) self._base_msg_format = 'YAML' self._ncsa_msg_format = 'YAML' if 'BASE_MSG_FORMAT' in cdm[ROOT]: self._base_msg_format = cdm[ROOT][BASE_MSG_FORMAT] if 'NCSA_MSG_FORMAT' in cdm[ROOT]: self._ncsa_msg_format = cdm[ROOT][NCSA_MSG_FORMAT] return cdm def setup_consumer_threads(self): LOGGER.info('Building _base_broker_url') base_broker_url = "amqp://" + self._sub_base_name + ":" + \ self._sub_base_passwd + "@" + \ str(self._base_broker_addr) ncsa_broker_url = "amqp://" + self._sub_ncsa_name + ":" + \ self._sub_ncsa_passwd + "@" + \ str(self._ncsa_broker_addr) self.shutdown_event = threading.Event() # Set up kwargs that describe consumers to be started # The Archive Device needs three message consumers kws = {} md = {} md['amqp_url'] = ncsa_broker_url md['name'] = 'Thread-ncsa_foreman_ack_publish' md['queue'] = 'ncsa_foreman_ack_publish' md['callback'] = self.on_ack_message md['format'] = "YAML" md['test_val'] = 'test_it' kws[md['name']] = md md = {} md['amqp_url'] = base_broker_url md['name'] = 'Thread-ncsa_consume' md['queue'] = 'ncsa_consume' md['callback'] = self.on_pp_message md['format'] = "YAML" md['test_val'] = 'test_it' kws[md['name']] = md self.thread_manager = ThreadManager('thread-manager', self.shutdown_event) self.thread_manager.start() self.thread_manager.add_thread_groups(kws) def setup_scoreboards(self): # Create Redis Distributor table with Distributor info self.DIST_SCBD = DistributorScoreboard('NCSA_DIST_SCBD', \ self._scbd_dict['NCSA_DIST_SCBD'], \ self.distributor_dict) self.JOB_SCBD = JobScoreboard('NCSA_JOB_SCBD', self._scbd_dict['NCSA_JOB_SCBD']) self.ACK_SCBD = AckScoreboard('NCSA_ACK_SCBD', self._scbd_dict['NCSA_ACK_SCBD']) def shutdown(self): LOGGER.debug("NCSA Foreman: Shutting down Consumer threads.") self.shutdown_event.set() LOGGER.debug("Thread Manager shutting down and app exiting...") print("\n") os._exit(0)
class BaseForeman: FWD_SCBD = None JOB_SCBD = None ACK_SCBD = None ACK_PUBLISH = "ack_publish" YAML = 'YAML' def __init__(self, filename=None): toolsmod.singleton(self) self._config_file = 'ForemanCfg.yaml' if filename != None: self._config_file = filename cdm = toolsmod.intake_yaml_file(self._config_file) try: self._base_name = cdm[ROOT][BASE_BROKER_NAME] # Message broker user & passwd self._base_passwd = cdm[ROOT][BASE_BROKER_PASSWD] self._ncsa_name = cdm[ROOT][NCSA_BROKER_NAME] self._ncsa_passwd = cdm[ROOT][NCSA_BROKER_PASSWD] self._base_broker_addr = cdm[ROOT][BASE_BROKER_ADDR] self._ncsa_broker_addr = cdm[ROOT][NCSA_BROKER_ADDR] forwarder_dict = cdm[ROOT][XFER_COMPONENTS][FORWARDERS] except KeyError as e: print("Dictionary error") print("Bailing out...") sys.exit(99) if 'QUEUE_PURGES' in cdm[ROOT]: self.purge_broker(cdm['ROOT']['QUEUE_PURGES']) self._base_msg_format = self.YAML self._ncsa_msg_format = self.YAML if 'BASE_MSG_FORMAT' in cdm[ROOT]: self._base_msg_format = cdm[ROOT][BASE_MSG_FORMAT] if 'NCSA_MSG_FORMAT' in cdm[ROOT]: self._ncsa_msg_format = cdm[ROOT][NCSA_MSG_FORMAT] self._base_broker_url = 'amqp_url' self._ncsa_broker_url = 'amqp_url' self._next_timed_ack_id = 0 # Create Redis Forwarder table with Forwarder info self.FWD_SCBD = ForwarderScoreboard(forwarder_dict) self.JOB_SCBD = JobScoreboard() self.ACK_SCBD = AckScoreboard() self._msg_actions = { 'NEW_JOB': self.process_dmcs_new_job, 'READOUT': self.process_dmcs_readout, 'NCSA_RESOURCE_QUERY_ACK': self.process_ack, 'NCSA_STANDBY_ACK': self.process_ack, 'NCSA_READOUT_ACK': self.process_ack, 'FORWARDER_HEALTH_ACK': self.process_ack, 'FORWARDER_JOB_PARAMS_ACK': self.process_ack, 'FORWARDER_READOUT_ACK': self.process_ack, 'NEW_JOB_ACK': self.process_ack } self._base_broker_url = "amqp://" + self._base_name + ":" + self._base_passwd + "@" + str(self._base_broker_addr) self._ncsa_broker_url = "amqp://" + self._ncsa_name + ":" + self._ncsa_passwd + "@" + str(self._ncsa_broker_addr) LOGGER.info('Building _base_broker_url. Result is %s', self._base_broker_url) LOGGER.info('Building _ncsa_broker_url. Result is %s', self._ncsa_broker_url) self.setup_publishers() self.setup_consumers() #self._ncsa_broker_url = "" #self.setup_federated_exchange() def setup_consumers(self): """This method sets up a message listener from each entity with which the BaseForeman has contact here. These listeners are instanced in this class, but their run methods are each called as a separate thread. While pika does not claim to be thread safe, the manner in which the listeners are invoked below is a safe implementation that provides non-blocking, fully asynchronous messaging to the BaseForeman. The code in this file expects message bodies to arrive as YAML'd python dicts, while in fact, message bodies are sent on the wire as XML; this way message format can be validated, versioned, and specified in just one place. To make this work, there is an object that translates the params dict to XML, and visa versa. The translation object is instantiated by the consumer and acts as a filter before sending messages on to the registered callback for processing. """ LOGGER.info('Setting up consumers on %s', self._base_broker_url) LOGGER.info('Running start_new_thread on all consumer methods') self._dmcs_consumer = Consumer(self._base_broker_url, self.DMCS_PUBLISH, self._base_msg_format) try: _thread.start_new_thread( self.run_dmcs_consumer, ("thread-dmcs-consumer", 2,) ) except: LOGGER.critical('Cannot start DMCS consumer thread, exiting...') sys.exit(99) self._forwarder_consumer = Consumer(self._base_broker_url, self.FORWARDER_PUBLISH, self._base_msg_format) try: _thread.start_new_thread( self.run_forwarder_consumer, ("thread-forwarder-consumer", 2,) ) except: LOGGER.critical('Cannot start FORWARDERS consumer thread, exiting...') sys.exit(100) self._ncsa_consumer = Consumer(self._base_broker_url, self.NCSA_PUBLISH, self._base_msg_format) try: _thread.start_new_thread( self.run_ncsa_consumer, ("thread-ncsa-consumer", 2,) ) except: LOGGER.critical('Cannot start NCSA consumer thread, exiting...') sys.exit(101) self._ack_consumer = Consumer(self._base_broker_url, self.ACK_PUBLISH, self._base_msg_format) try: _thread.start_new_thread( self.run_ack_consumer, ("thread-ack-consumer", 2,) ) except: LOGGER.critical('Cannot start ACK consumer thread, exiting...') sys.exit(102) LOGGER.info('Finished starting all three consumer threads') def run_dmcs_consumer(self, threadname, delay): self._dmcs_consumer.run(self.on_dmcs_message) def run_forwarder_consumer(self, threadname, delay): self._forwarder_consumer.run(self.on_forwarder_message) def run_ncsa_consumer(self, threadname, delay): self._ncsa_consumer.run(self.on_ncsa_message) def run_ack_consumer(self, threadname, delay): self._ack_consumer.run(self.on_ack_message) def setup_publishers(self): LOGGER.info('Setting up Base publisher on %s using %s', self._base_broker_url, self._base_msg_format) LOGGER.info('Setting up NCSA publisher on %s using %s', self._ncsa_broker_url, self._ncsa_msg_format) self._base_publisher = SimplePublisher(self._base_broker_url, self._base_msg_format) self._ncsa_publisher = SimplePublisher(self._ncsa_broker_url, self._ncsa_msg_format) # def setup_federated_exchange(self): # # Set up connection URL for NCSA Broker here. # self._ncsa_broker_url = "amqp://" + self._name + ":" + self._passwd + "@" + str(self._ncsa_broker_addr) # LOGGER.info('Building _ncsa_broker_url. Result is %s', self._ncsa_broker_url) # pass def on_dmcs_message(self, ch, method, properties, body): ch.basic_ack(method.delivery_tag) #msg_dict = yaml.load(body) msg_dict = body LOGGER.info('In DMCS message callback') LOGGER.debug('Thread in DMCS callback is %s', _thread.get_ident()) LOGGER.info('Message from DMCS callback message body is: %s', str(msg_dict)) handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def on_forwarder_message(self, ch, method, properties, body): ch.basic_ack(method.delivery_tag) LOGGER.info('In Forwarder message callback, thread is %s', _thread.get_ident()) LOGGER.info('forwarder callback msg body is: %s', str(body)) pass def on_ncsa_message(self,ch, method, properties, body): ch.basic_ack(method.delivery_tag) LOGGER.info('In ncsa message callback, thread is %s', _thread.get_ident()) #msg_dict = yaml.load(body) msg_dict = body LOGGER.info('ncsa msg callback body is: %s', str(msg_dict)) handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def on_ack_message(self, ch, method, properties, body): ch.basic_ack(method.delivery_tag) msg_dict = body LOGGER.info('In ACK message callback') LOGGER.debug('Thread in ACK callback is %s', _thread.get_ident()) LOGGER.info('Message from ACK callback message body is: %s', str(msg_dict)) handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def process_dmcs_new_job(self, params): input_params = params needed_workers = len(input_params[RAFTS]) ack_id = self.forwarder_health_check(input_params) self.ack_timer(7) # This is a HUGE num seconds for now..final setting will be milliseconds healthy_forwarders = self.ACK_SCBD.get_components_for_timed_ack(timed_ack) num_healthy_forwarders = len(healthy_forwarders) if needed_workers > num_healthy_forwarders: result = self.insufficient_base_resources(input_params, healthy_forwarders) return result else: healthy_status = {"STATUS": "HEALTHY", "STATE":"READY_WITHOUT_PARAMS"} self.FWD_SCBD.set_forwarder_params(healthy_forwarders, healthy_status) ack_id = self.ncsa_resources_query(input_params, healthy_forwarders) self.ack_timer(3) #Check ACK scoreboard for response from NCSA ncsa_response = self.ACK_SCBD.get_components_for_timed_ack(ack_id) if ncsa_response: pairs = {} ack_bool = None try: ack_bool = ncsa_response[ACK_BOOL] if ack_bool == True: pairs = ncsa_response[PAIRS] except KeyError as e: pass # Distribute job params and tell DMCS I'm ready. if ack_bool == TRUE: fwd_ack_id = self.distribute_job_params(input_params, pairs) self.ack_timer(3) fwd_params_response = self.ACK_SCBD.get_components_for_timed_ack(fwd_ack_id) if fwd_params_response and (len(fwd_params_response) == len(fwders)): self.JOB_SCBD.set_value_for_job(job_num, "STATE", "BASE_TASK_PARAMS_SENT") self.JOB_SCBD.set_value_for_job(job_num, "TIME_BASE_TASK_PARAMS_SENT", get_timestamp()) in_ready_state = {'STATE':'READY_WITH_PARAMS'} self.FWD_SCBD.set_forwarder_params(fwders, in_ready_state) # Tell DMCS we are ready result = self.accept_job(job_num) else: #not enough ncsa resources to do job - Notify DMCS idle_param = {'STATE': 'IDLE'} self.FWD_SCBD.set_forwarder_params(healthy_forwarders, idle_params) result = self.insufficient_ncsa_resources(ncsa_response) return result else: result = self.ncsa_no_response(input_params) idle_param = {'STATE': 'IDLE'} self.FWD_SCBD.set_forwarder_params(list(forwarder_candidate_dict.keys()), idle_params) return result def forwarder_health_check(self, params): job_num = str(params[JOB_NUM]) raft_list = params['RAFTS'] needed_workers = len(raft_list) self.JOB_SCBD.add_job(job_num, needed_workers) self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ADDED", get_timestamp()) self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ADDED_E", get_epoch_timestamp()) LOGGER.info('Received new job %s. Needed workers is %s', job_num, needed_workers) # run forwarder health check # get timed_ack_id timed_ack = self.get_next_timed_ack_id("FORWARDER_HEALTH_CHECK_ACK") forwarders = self.FWD_SCBD.return_available_forwarders_list() # Mark all healthy Forwarders Unknown state_status = {"STATE": "HEALTH_CHECK", "STATUS": "UNKNOWN"} self.FWD_SCBD.set_forwarder_params(forwarders, state_status) # send health check messages ack_params = {} ack_params[MSG_TYPE] = FORWARDER_HEALTH_CHECK ack_params["ACK_ID"] = timed_ack ack_params[JOB_NUM] = job_num self.JOB_SCBD.set_value_for_job(job_num, "STATE", "BASE_RESOURCE_QUERY") self.JOB_SCBD.set_value_for_job(job_num, "TIME_BASE_RESOURCE_QUERY", get_timestamp()) audit_params = {} audit_params['DATA_TYPE'] = 'FOREMAN_ACK_REQUEST' audit_params['SUB_TYPE'] = 'FORWARDER_HEALTH_CHECK_ACK' audit_params['ACK_ID'] = timed_ack audit_parsms['COMPONENT_NAME'] = 'BASE_FOREMAN' audit_params['TIME'] = get_epoch_timestamp() for forwarder in forwarders: self._base_publisher.publish_message(self.FWD_SCBD.get_value_for_forwarder(forwarder,"CONSUME_QUEUE"), ack_params) return timed_ack def insufficient_base_resources(self, params, healthy_forwarders): # send response msg to dmcs refusing job job_num = str(params[JOB_NUM]) raft_list = params[RAFTS] ack_id = params['ACK_ID'] needed_workers = len(raft_list) LOGGER.info('Reporting to DMCS that there are insufficient healthy forwarders for job #%s', job_num) dmcs_params = {} fail_dict = {} dmcs_params[MSG_TYPE] = NEW_JOB_ACK dmcs_params[JOB_NUM] = job_num dmcs_params[ACK_BOOL] = False dmcs_params[ACK_ID] = ack_id ### NOTE FOR DMCS ACK PROCESSING: ### if ACK_BOOL == True, there will NOT be a FAIL_DETAILS section ### If ACK_BOOL == False, there will always be a FAIL_DICT to examine AND there will always be a ### BASE_RESOURCES inside the FAIL_DICT ### If ACK_BOOL == False, and the BASE_RESOURCES inside FAIL_DETAILS == 0, ### there will be only NEEDED and AVAILABLE Forwarder params - nothing more ### If ACK_BOOL == False and BASE_RESOURCES inside FAIL_DETAILS == 1, there will always be a ### NCSA_RESOURCES inside FAIL_DETAILS set to either 0 or 'NO_RESPONSE' ### if NCSA_RESPONSE == 0, there will be NEEDED and AVAILABLE Distributor params ### if NCSA_RESOURCES == 'NO_RESPONSE' there will be nothing else fail_dict['BASE_RESOURCES'] = '0' fail_dict[NEEDED_FORWARDERS] = str(needed_workers) fail_dict[AVAILABLE_FORWARDERS] = str(len(healthy_forwarders)) dmcs_params['FAIL_DETAILS'] = fail_dict self._base_publisher.publish_message("dmcs_consume", dmcs_params) # mark job refused, and leave Forwarders in Idle state self.JOB_SCBD.set_value_for_job(job_num, "STATE", "JOB_ABORTED") self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ABORTED_BASE_RESOURCES", get_timestamp()) idle_state = {"STATE": "IDLE"} self.FWD_SCBD.set_forwarder_params(healthy_forwarders, idle_state) return False def ncsa_resources_query(self, params, healthy_forwarders): job_num = str(params[JOB_NUM]) raft_list = params[RAFTS] needed_workers = len(raft_list) LOGGER.info('Sufficient forwarders have been found. Checking NCSA') self._pairs_dict = {} forwarder_candidate_dict = {} for i in range (0, needed_workers): forwarder_candidate_dict[healthy_forwarders[i]] = raft_list[i] self.FWD_SCBD.set_forwarder_status(healthy_forwarders[i], NCSA_RESOURCES_QUERY) # Call this method for testing... # There should be a message sent to NCSA here asking for available resources timed_ack_id = self.get_next_timed_ack_id("NCSA_Ack") ncsa_params = {} ncsa_params[MSG_TYPE] = "NCSA_RESOURCES_QUERY" ncsa_params[JOB_NUM] = job_num #ncsa_params[RAFT_NUM] = needed_workers ncsa_params[ACK_ID] = timed_ack_id ncsa_params["FORWARDERS"] = forwarder_candidate_dict self.JOB_SCBD.set_value_for_job(job_num, "STATE", "NCSA_RESOURCES_QUERY_SENT") self.JOB_SCBD.set_value_for_job(job_num, "TIME_NCSA_RESOURCES_QUERY_SENT", get_timestamp()) self._ncsa_publisher.publish_message(self.NCSA_CONSUME, ncsa_params) LOGGER.info('The following forwarders have been sent to NCSA for pairing:') LOGGER.info(forwarder_candidate_dict) return timed_ack_id def distribute_job_params(self, params, pairs): #ncsa has enough resources... job_num = str(params[JOB_NUM]) self.JOB_SCBD.set_pairs_for_job(job_num, pairs) self.JOB_SCBD.set_value_for_job(job_num, "TIME_PAIRS_ADDED", get_timestamp()) LOGGER.info('The following pairs will be used for Job #%s: %s', job_num, pairs) fwd_ack_id = self.get_next_timed_ack_id("FWD_PARAMS_ACK") fwders = list(pairs.keys()) fwd_params = {} fwd_params[MSG_TYPE] = "FORWARDER_JOB_PARAMS" fwd_params[JOB_NUM] = job_num fwd_params[ACK_ID] = fwd_ack_id for fwder in fwders: fwd_params["TRANSFER_PARAMS"] = pairs[fwder] route_key = self.FWD_SCBD.get_value_for_forwarder(fwder, "CONSUME_QUEUE") self._base_publisher.publish_message(route_key, fwd_params) return fwd_ack_id def accept_job(self, job_num): dmcs_message = {} dmcs_message[JOB_NUM] = job_num dmcs_message[MSG_TYPE] = NEW_JOB_ACK dmcs_message[ACK_BOOL] = True self.JOB_SCBD.set_value_for_job(job_num, STATE, "JOB_ACCEPTED") self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ACCEPTED", get_timestamp()) self._base_publisher.publish_message("dmcs_consume", dmcs_message) return True def insufficient_ncsa_resources(self, ncsa_response): dmcs_params = {} dmcs_params[MSG_TYPE] = "NEW_JOB_ACK" dmcs_params[JOB_NUM] = job_num dmcs_params[ACK_BOOL] = False dmcs_params[BASE_RESOURCES] = '1' dmcs_params[NCSA_RESOURCES] = '0' dmcs_params[NEEDED_DISTRIBUTORS] = ncsa_response[NEEDED_DISTRIBUTORS] dmcs_params[AVAILABLE_DISTRIBUTORS] = ncsa_response[AVAILABLE_DISTRIBUTORS] #try: FIXME - catch exception self._base_publisher.publish_message("dmcs_consume", dmcs_params ) #except L1MessageError e: # return False return True def ncsa_no_response(self,params): #No answer from NCSA... job_num = str(params[JOB_NUM]) raft_list = params[RAFTS] needed_workers = len(raft_list) dmcs_params = {} dmcs_params[MSG_TYPE] = "NEW_JOB_ACK" dmcs_params[JOB_NUM] = job_num dmcs_params[ACK_BOOL] = False dmcs_params[BASE_RESOURCES] = '1' dmcs_params[NCSA_RESOURCES] = 'NO_RESPONSE' self._base_publisher.publish_message("dmcs_consume", dmcs_params ) def process_dmcs_readout(self, params): job_number = params[JOB_NUM] pairs = self.JOB_SCBD.get_pairs_for_job(job_number) date - get_timestamp() self.JOB_SCBD.set_value_for_job(job_number, TIME_START_READOUT, date) # The following line extracts the distributor FQNs from pairs dict using # list comprehension values; faster than for loops distributors = [v['FQN'] for v in list(pairs.values())] forwarders = list(pairs.keys()) ack_id = self.get_next_timed_ack_id('NCSA_READOUT') ### Send READOUT to NCSA with ACK_ID ncsa_params = {} ncsa_params[MSG_TYPE] = 'NCSA_READOUT' ncsa_params[ACK_ID] = ack_id self._ncsa_publisher.publish_message(NCSA_CONSUME, yaml.dump(ncsa_params)) self.ack_timer(4) ncsa_response = self.ACK_SCBD.get_components_for_timed_ack(ack_id) if ncsa_response: if ncsa_response['ACK_BOOL'] == True: #inform forwarders fwd_ack_id = self.get_next_timed_ack_id('FORWARDER_READOUT') for forwarder in forwarders: name = self.FWD_SCBD.get_value_for_forwarder(forwarder, NAME) routing_key = self.FWD_SCBD.get_routing_key(forwarder) msg_params = {} msg_params[MSG_TYPE] = 'FORWARDER_READOUT' msg_params[JOB_NUM] = job_number msg_params['ACK_ID'] = fwd_ack_id self.FWD_SCBD.set_forwarder_state(forwarder, START_READOUT) self._publisher.publish_message(routing_key, yaml.dump(msg_params)) self.ack_timer(4) forwarder_responses = self.ACK_SCBD.get_components_for_timed_ack(fwd_ack_id) if len(forwarder_responses) == len(forwarders): dmcs_params = {} dmcs_params[MSG_TYPE] = 'READOUT_ACK' dmcs_params[JOB_NUM] = job_number dmcs_params['ACK_BOOL'] = True dmcs_params['COMMENT'] = "Readout begun at %s" % get_timestamp() self._publisher.publish_message('dmcs_consume', yaml.dump(dmcs_params)) else: #send problem with ncsa to DMCS dmcs_params = {} dmcs_params[MSG_TYPE] = 'READOUT_ACK' dmcs_params[JOB_NUM] = job_number dmcs_params['ACK_BOOL'] = False dmcs_params['COMMENT'] = 'Readout Failed: Problem at NCSA - Expected Distributor Acks is %s, Number of Distributor Acks received is %s' % (ncsa_response['EXPECTED_DISTRIBUTOR_ACKS'], ncsa_response['RECEIVED_DISTRIBUTOR_ACKS']) self._base_publisher.publish_message('dmcs_consume', yaml.dump(dmcs_params)) else: #send 'no response from ncsa' to DMCS ) dmcs_params = {} dmcs_params[MSG_TYPE] = 'READOUT_ACK' dmcs_params[JOB_NUM] = job_number dmcs_params['ACK_BOOL'] = False dmcs_params['COMMENT'] = "Readout Failed: No Response from NCSA" self._base_publisher.publish_message('dmcs_consume', yaml.dump(dmcs_params)) def process_ack(self, params): self.ACK_SCBD.add_timed_ack(params) def get_next_timed_ack_id(self, ack_type): self._next_timed_ack_id = self._next_timed_ack_id + 1 retval = ack_type + "_" + str(self._next_timed_ack_id).zfill(6) return retval def ack_timer(self, seconds): sleep(seconds) return True def purge_broker(self, queues): for q in queues: cmd = "rabbitmqctl -p /tester purge_queue " + q os.system(cmd)
class Premium: def __init__(self): logging.basicConfig() self.sp2 = SimplePublisher( 'amqp://*****:*****@141.142.238.160:5672/%2Fbunny', "YAML") time.sleep(3) broker_url = 'amqp://*****:*****@141.142.238.160:5672/%2Fbunny' self._cons = Consumer(broker_url, 'f1_consume', "YAML") self._cons2 = Consumer(broker_url, 'ncsa_consume', "YAML") try: _thread.start_new_thread(self.do_it, ( "thread-1", 2, )) except Exception as e: print("Cannot start thread") print(e) try: _thread.start_new_thread(self.do_it2, ( "thread-2", 2, )) except Exception as e: print("Cannot start thread") print(e) def mycallback(self, ch, methon, properties, body): print(" ") print("+++++++++++++=========++++++++++++++++") print(" f1_consume msg:") print(body) def mycallback2(self, ch, methon, properties, body): print(" ") print(">>>>>>>>>>>>>>><<<<<<<<<<<<<<<<") print(" f2_consume msg:") print(body) if body['MSG_TYPE'] == 'NEXT_VISIT': return msg = {} msg['ACK_ID'] = body['ACK_ID'] msg['MSG_TYPE'] = 'NCSA_START_INTEGRATION_ACK' msg['COMPONENT_NAME'] = 'NCSA_FOREMAN' fwdrs = copy.deepcopy(body['FORWARDERS']) pp = pprint.PrettyPrinter(indent=2) print("In callback2, fwdrs dict is:") pp.pprint(fwdrs) fwdrs_keys = list(fwdrs.keys()) i = 1 for fwdr in fwdrs_keys: dists = {} dists['FQN'] = "Distributor_" + str(i) dists['NAME'] = "D" + str(i) dists['HOSTNAME'] = "D" + str(i) dists['TARGET_DIR'] = "/dev/null" dists['IP_ADDR'] = "141.142.237.16" + str(i) fwdrs[fwdr]['DISTRIBUTOR'] = dists i = i + 1 msg['PAIRS'] = fwdrs msg['ACK_BOOL'] = True msg['JOB_NUM'] = body['JOB_NUM'] msg['IMAGE_ID'] = body['IMAGE_ID'] msg['VISIT_ID'] = body['VISIT_ID'] msg['SESSION_ID'] = body['SESSION_ID'] self.sp2.publish_message("pp_foreman_ack_publish", msg) def do_it(self, threadname, delay): print("Before run call") self._cons.run(self.mycallback) def do_it2(self, threadname, delay): print("Before run call") self._cons2.run(self.mycallback2)
print("Before run call") self._cons2.run(self.mycallback2) if __name__ == "__main__": premium = Premium() sp1 = SimplePublisher( 'amqp://*****:*****@141.142.238.160:5672/%2Fbunny', "YAML") msg = {} msg['MSG_TYPE'] = "NEW_SESSION" msg['SESSION_ID'] = 'session_RZ_22' msg['RESPONSE_QUEUE'] = 'dmcs_consume' msg['ACK_ID'] = 'NEW_SESSION_ACK_14' time.sleep(4) sp1.publish_message("pp_foreman_consume", msg) msg = {} msg['MSG_TYPE'] = "NEXT_VISIT" msg['SESSION_ID'] = 'session_RZ_22' msg['VISIT_ID'] = 'XX_28272' msg['BORE_SIGHT'] = 'A LITTLE TO THE LEFT' msg['RESPONSE_QUEUE'] = 'dmcs_consume' msg['ACK_ID'] = 'NEXT_VISIT_ACK_15' time.sleep(4) sp1.publish_message("pp_foreman_consume", msg) ccd_list = [ 1, 2, 12, 17, 9, 22, 43, 44, 46, 47, 55, 71, 15, 78, 79, 82, 84, 85 ] msg = {}
class AuxDeviceTestCase(): AUX_ACK_QUEUE = 'at_foreman_ack_publish' DMCS_PUBLISH_QUEUE = 'at_foreman_consume' DMCS_ACK_QUEUE = 'dmcs_ack_consume' ARCHIVE_PUBLISH_QUEUE = 'archive_ctrl_publish' # FIXME: load from standard configuration area CFG_FILE = '../etc/config/L1SystemCfg_Test.yaml' FORWARDER = 'FORWARDER_99' ARCHIVE_CTRL = 'ARCHIVE_CTRL' JOB_NUM = 'job_100' SESSION_ID = 'session_100' IMAGE_ID = 'IMG_100' IMAGE_INDEX = 0 IMAGE_SEQUENCE_NAME = 'seq_1000' IMAGES_IN_SEQUENCE = 1 def setUp(self): cred = Credentials("iip_cred.yaml") user = cred.getUser() passwd = cred.getPasswd() # read CFG file self._cfg_root = toolsmod.intake_yaml_file(self.CFG_FILE)['ROOT'] # setup forwarder publisher self._fwdr_cfg = self._cfg_root['XFER_COMPONENTS']['AUX_FORWARDERS'][ self.FORWARDER] self._fwdr_amqp = 'amqp://%s:%s@%s' % ( user, passwd, self._cfg_root['BASE_BROKER_ADDR']) self._fwdr_publisher = SimplePublisher(self._fwdr_amqp, 'YAML') # setup dmcs publisher self._dmcs_amqp = 'amqp://%s:%s@%s' % ( user, passwd, self._cfg_root['BASE_BROKER_ADDR']) self._dmcs_publisher = SimplePublisher(self._dmcs_amqp, 'YAML') # setup archiveController publisher self._at_ctrl_amqp = 'amqp://%s:%s@%s' % ( user, passwd, self._cfg_root['BASE_BROKER_ADDR']) self._at_ctrl_publisher = SimplePublisher(self._at_ctrl_amqp, 'YAML') def tearDown(self): pass def test_at_start_integration(self): msg = {} msg['MSG_TYPE'] = 'AT_START_INTEGRATION' msg['JOB_NUM'] = 'job_6' msg['SESSION_ID'] = self.SESSION_ID msg['IMAGE_ID'] = 'IMG_100' msg['IMAGE_INDEX'] = 0 msg['IMAGE_SEQUENCE_NAME'] = 'seq_1' msg['IMAGES_IN_SEQUENCE'] = 1 msg['RAFT_LIST'] = ['ats'] msg['RAFT_CCD_LIST'] = [['wfs_ccd']] msg['REPLY_QUEUE'] = self.DMCS_ACK_QUEUE msg['ACK_ID'] = msg['MSG_TYPE'] + '_100' self._dmcs_publisher.publish_message(self.DMCS_PUBLISH_QUEUE, msg) def test_at_new_session(self): msg = {} msg['MSG_TYPE'] = 'AT_NEW_SESSION' msg['SESSION_ID'] = self.SESSION_ID msg['REPLY_QUEUE'] = self.DMCS_ACK_QUEUE msg['ACK_ID'] = msg['MSG_TYPE'] + '_100' self._dmcs_publisher.publish_message(self.DMCS_PUBLISH_QUEUE, msg) def test_at_fwdr_health_check_ack(self): msg = {} msg['MSG_TYPE'] = 'AT_FWDR_HEALTH_CHECK_ACK' msg['ACK_BOOL'] = 'True' msg['COMPONENT'] = self.FORWARDER msg['ACK_ID'] = msg['MSG_TYPE'] + '_100' self._fwdr_publisher.publish_message(self.AUX_ACK_QUEUE, msg) def test_at_fwdr_xfer_params_ack(self): msg = {} msg['MSG_TYPE'] = 'AT_FWDR_XFER_PARAMS_ACK' msg['COMPONENT'] = self.FORWARDER msg['ACK_BOOL'] = "true" msg['ACK_ID'] = msg['MSG_TYPE'] + '_100' self._fwdr_publisher.publish_message(self.AUX_ACK_QUEUE, msg) def test_at_fwdr_end_readout_ack(self): # Currently not used msg = {} msg['MSG_TYPE'] = 'AT_FWDR_END_READOUT_ACK' msg['COMPONENT'] = self.FORWARDER msg['JOB_NUM'] = self.JOB_NUM msg['SESSION_ID'] = self.SESSION_ID msg['IMAGE_ID'] = self.IMAGE_ID msg['ACK_ID'] = msg['MSG_TYPE'] + '_101' msg['ACK_BOOL'] = 'True' result_set = {} result_set['FILENAME_LIST'] = 'xxx' result_set['CHECKSUM_LIST'] = 'csum1lk123lkj' msg['RESULT_SET'] = result_set self._fwdr_publisher.publish_message(self.AUX_ACK_QUEUE, msg) def test_at_items_xferd_ack(self): # Currently not used msg = {} msg['MSG_TYPE'] = 'AT_ITEMS_XFERD_ACK' msg['COMPONENT'] = self.ARCHIVE_CTRL msg['ACK_ID'] = msg['MSG_TYPE'] + '_100' msg['ACK_BOOL'] = True msg['REPLY_QUEUE'] = self.AUX_ACK_QUEUE result_set = {} result_set['RECEIPT_LIST'] = ['receipt1'] result_set['FILENAME_LIST'] = ['file1'] msg['RESULT_SET'] = result_set self._at_ctrl_publisher.publish_message(self.AUX_ACK_QUEUE, msg) def test_at_header_ready(self): msg = {} msg['MSG_TYPE'] = 'AT_HEADER_READY' msg['IMAGE_ID'] = 'IMG_100' msg['FILENAME'] = 'http://141.142.238.177:8000/AT_O_20190312_000007.header' msg['ACK_ID'] = msg['MSG_TYPE'] + '_100' msg['REPLY_QUEUE'] = 'at_foreman_ack_publish' self._dmcs_publisher.publish_message(self.DMCS_PUBLISH_QUEUE, msg) def test_at_fwdr_header_ready_ack(self): msg = {} msg['MSG_TYPE'] = 'AT_FWDR_HEADER_READY_ACK' msg['COMPONENT'] = self.FORWARDER msg['ACK_ID'] = msg['MSG_TYPE'] + '_100' msg['ACK_BOOL'] = 'True' self._fwdr_publisher.publish_message(self.AUX_ACK_QUEUE, msg) def test_new_archive_item_ack(self): # don't think this is used pass def test_new_ar_archive_item_ack(self): # don't think this is used pass def test_new_at_archive_item_ack(self): msg = {} msg['MSG_TYPE'] = 'NEW_AT_ARCHIVE_ITEM_ACK' msg['COMPONENT'] = self.ARCHIVE_CTRL msg['ACK_ID'] = msg['MSG_TYPE'] + '_100' msg['TARGET_DIR'] = '/tmp/source' msg['ACK_BOOL'] = True msg['JOB_NUM'] = 'job_100' msg['IMAGE_ID'] = 'IMG_100' self._at_ctrl_publisher.publish_message(self.ARCHIVE_PUBLISH_QUEUE, msg) def test_at_end_readout(self): msg = {} msg['MSG_TYPE'] = 'AT_END_READOUT' msg['JOB_NUM'] = self.JOB_NUM msg['IMAGE_ID'] = self.IMAGE_ID msg['IMAGE_INDEX'] = self.IMAGE_INDEX msg['IMAGE_SEQUENCE_NAME'] = self.IMAGE_SEQUENCE_NAME msg['IMAGES_IN_SEQUENCE'] = self.IMAGES_IN_SEQUENCE msg['SESSION_ID'] = self.SESSION_ID msg['REPLY_QUEUE'] = self.DMCS_ACK_QUEUE msg['ACK_ID'] = msg['MSG_TYPE'] + '_100' self._dmcs_publisher.publish_message(self.DMCS_PUBLISH_QUEUE, msg) def run(self): self.setUp() time.sleep(5) print('[x] Finished setting up publishers.') self.test_at_new_session() time.sleep(5) print('[x] Finished setting up new session. Ready to receive params') print('[x] DMCS sending start integration to Aux') self.test_at_start_integration() time.sleep(5) print('[x] Aux: Checking if there are healthy forwarders') self.test_at_fwdr_health_check_ack() time.sleep(5) print('[x] Received one healthy ack from forwarder') print('[x] Aux: Sending items to archive to ArchiveController') self.test_new_at_archive_item_ack() time.sleep(5) print('[x] Received ack from ArchiveController') print('[x] Aux: Sending parameters for file transfer to Forwarder') self.test_at_fwdr_xfer_params_ack() time.sleep(5) print('[x] Received transfer param ack from Forwarder') print('[x] DMCS sending header ready msg to Aux') self.test_at_header_ready() time.sleep(5) print('[x] Aux: Sending header ready message to Forwarder') self.test_at_fwdr_header_ready_ack() time.sleep(5) print('[x] Received header ready ack from Forwarder') print('[x] DMCS sending end readout to Aux') self.test_at_end_readout() time.sleep(5) print('[DONE] Sender testing complete') self.tearDown()
class Forwarder: """Presents a vanilla L1 Forwarder personality. In nightly operation, at least 21 of these components will be available at any time (one for each raft). """ def __init__(self): self._registered = False f = open('ForwarderCfg.yaml') # cfg data map... cdm = yaml.safe_load(f) try: self._name = cdm[NAME] self._passwd = cdm[PASSWD] self._fqn = cdm[FQN] self._base_broker_addr = cdm[BASE_BROKER_ADDR] self._consume_queue = cdm[CONSUME_QUEUE] # self._publish_queue = cdm[PUBLISH_QUEUE] self._hostname = cdm[HOSTNAME] self._ip_addr = cdm[IP_ADDR] self._DAQ_PATH = cdm['DAQ_PATH'] # XXX FIX: Put in config file self.CHECKSUM_ENABLED = False except KeyError: print("Missing base keywords in yaml file... Bailing out...") sys.exit(99) self._home_dir = "/home/" + self._name + "/" self._base_broker_url = "amqp://%s:%s@%s" % (self._name, self._passwd, self._base_broker_addr) self._msg_actions = { FORWARDER_HEALTH_CHECK: self.process_health_check, FORWARDER_JOB_PARAMS: self.process_job_params, # Here if AR case needs different handler 'AR_FWDR_XFER_PARAMS': self.process_job_params, 'AR_FWDR_READOUT': self.process_foreman_readout, FORWARDER_READOUT: self.process_foreman_readout } self.setup_publishers() self.setup_consumers() self._job_scratchpad = Scratchpad(self._base_broker_url) def setup_publishers(self): self._publisher = SimplePublisher(self._base_broker_url) def setup_consumers(self): threadname = "thread-" + self._consume_queue print("Threadname is %s" % threadname) self._consumer = Consumer(self._base_broker_url, self._consume_queue) try: _thread.start_new_thread(self.run_consumer, ( threadname, 2, )) print("Started Consumer Thread") except Exception: sys.exit(99) def run_consumer(self, threadname, delay): self._consumer.run(self.on_message) def on_message(self, ch, method, properties, body): ch.basic_ack(delivery_tag) print("INcoming PARAMS, body is:\n%s" % body) msg_dict = body handler = self._msg_actions.get(msg_dict[MSG_TYPE]) result = handler(msg_dict) def process_health_check(self, params): self.send_ack_response("FORWARDER_HEALTH_CHECK_ACK", params) def process_job_params(self, params): """ The structure of the incoming job params is identical to the way job params are sent to prompt processing forwarders: MSG_TYPE: AR_FWDR_XFER_PARAMS JOB_NUM: ..... ACK_ID: x1 REPLY_QUEUE: ..... FITS: FITS metadata someday? TRANSFER_PARAMS: DISTRIBUTOR: FQN: Name of entity receivine file NAME: login name for receiving entity HOSTNAME: Full host name for receiving entity IP_ADDR: ip addr of archive TARGET_DIR: Where to put file ## Below might be better as 'xfer_unit_list' for ccds or rafts, or other CCD_LIST: for example...[1,2,3,7,10,14] XFER_UNIT: CCD FITS: FITS metadata someday? After the xfer params arrive, and ack is returned, we set up some short cut helpers, such as: 1) Make a filename stub for job that leaves out all but the CCD number 2) Put together the scp/bbftp string with login name and ip addr, plus target dir """ job_params = copy.deepcopy(params) xfer_params = job_params['TRANSFER_PARAMS'] # Also RM fits files in xfer_dir cmd = "rm " + self._DAQ_PATH + "*.fits" os.system(cmd) filename_stub = "%s_%s_%s_" % (job_params['JOB_NUM'], job_params['VISIT_ID'], job_params['IMAGE_ID']) login_str = "%s@%s:" % (xfer_params['DISTRIBUTOR']['NAME'], xfer_params['DISTRIBUTOR']['IP_ADDR']) target_dir = str(xfer_params['DISTRIBUTOR']['TARGET_DIR']) # xfer_params = transfer_params['XFER_PARAMS'] s_params = {} s_params['CCD_LIST'] = xfer_params['CCD_LIST'] s_params['LOGIN_STR'] = login_str s_params['TARGET_DIR'] = target_dir s_params['FILENAME_STUB'] = filename_stub print("S_params are: %s" % s_params) # Now, s_params should have all we need for job. Place as value for job_num key self._job_scratchpad.set_job_transfer_params(params[JOB_NUM], s_params) self._job_scratchpad.set_job_state(params['JOB_NUM'], "READY_WITH_PARAMS") self.send_ack_response('FORWARDER_JOB_PARAMS_ACK', params) def process_foreman_readout(self, params): # self.send_ack_response("FORWARDER_READOUT_ACK", params) reply_queue = params['REPLY_QUEUE'] job_number = params[JOB_NUM] # Check and see if scratchpad has this job_num if job_number not in list(self._job_scratchpad.keys()): # Raise holy hell... pass # raw_files_dict is of the form { ccd: filename} like { 2: /home/F1/xfer_dir/ccd_2.data raw_files_dict = self.fetch(job_number) final_filenames = self.format(job_number, raw_files_dict) results = self.forward(job_number, final_filenames) msg = {} msg['MSG_TYPE'] = 'AR_ITEMS_XFERD_ACK' msg['JOB_NUM'] = job_number msg['IMAGE_ID'] = params['IMAGE_ID'] msg['COMPONENT'] = self._fqn msg['ACK_ID'] = params['ACK_ID'] msg['ACK_BOOL'] = True # See if num keys of results == len(ccd_list) from orig msg params msg['RESULT_LIST'] = results self._publisher.publish_message(reply_queue, msg) def fetch(self, job_num): raw_files_dict = {} ccd_list = self._job_scratchpad.get_job_value(job_num, 'CCD_LIST') for ccd in ccd_list: filename = "ccd_" + str(ccd) + ".data" raw_files_dict[ccd] = filename print("In Forwarder Fetch method, raw_files_dict is: \n%s" % raw_files_dict) return raw_files_dict """ format raw files to fits file with header data :param file_list: dictionary of file_name against raw file name :param mdata: primary meta data stream fetched from camera daq """ def format(self, file_list, mdata): final_filenames = [] for ccd_id, raw_file_name in file_list.items(): image_array = np.fromfile(raw_file_name, dtype=np.int32) header_data = mdata[ccd_id]["primary_metadata_chunk"] secondary_data = mdata[ccd_id]["secondary_metadata_chunk"] header_data.update(secondary_data) primary_header = pyfits.Header() for key, value in header_data.items(): primary_header[key] = value fits_file = pyfits.PrimaryHDU(header=primary_header, data=image_array) fits_file.writeto(ccd_id + ".fits") final_filenames.append(ccd_id + ".fits") return final_filenames def format2(self, job_num, raw_files_dict): keez = list(raw_files_dict.keys()) filename_stub = self._job_scratchpad.get_job_value( job_num, 'FILENAME_STUB') final_filenames = {} for kee in keez: final_filename = filename_stub + "_" + kee + ".fits" target = self._DAQ_PATH + final_filename print("Final filename is %s" % final_filename) print("target is %s" % target) cmd1 = 'cat ' + self._DAQ_PATH + "ccd.header" + " >> " + target cmd2 = 'cat ' + self._DAQ_PATH + raw_files_dict[ kee] + " >> " + target dte = get_epoch_timestamp() print("DTE IS %s" % dte) cmd3 = 'echo ' + str(dte) + " >> " + target print("cmd1 is %s" % cmd1) print("cmd2 is %s" % cmd2) os.system(cmd1) os.system(cmd2) os.system(cmd3) final_filenames[kee] = final_filename print("Done in format()...file list is: %s" % final_filenames) print("In format method, final_filenames are:\n%s" % final_filenames) return final_filenames def forward(self, job_num, final_filenames): print("Start Time of READOUT IS: %s" % get_timestamp()) login_str = self._job_scratchpad.get_job_value(job_num, 'LOGIN_STR') target_dir = self._job_scratchpad.get_job_value(job_num, 'TARGET_DIR') results = {} CCD_LIST = [] FILENAME_LIST = [] CHECKSUM_LIST = [] ccds = list(final_filenames.keys()) for ccd in ccds: final_file = final_filenames[ccd] pathway = self._DAQ_PATH + final_file with open(pathway) as file_to_calc: if self.CHECKSUM_ENABLED: data = file_to_calc.read() resulting_md5 = hashlib.md5(data).hexdigest() else: resulting_md5 = '0' CCD_LIST.append(ccd) CHECKSUM_LIST.append(resulting_md5) FILENAME_LIST.append(target_dir + final_file) cmd = 'scp ' + pathway + " " + login_str + target_dir + final_file print("Finish Time of SCP'ing %s IS: %s" % (pathway, get_timestamp())) print("In forward() method, cmd is %s" % cmd) os.system(cmd) results['CCD_LIST'] = CCD_LIST results['FILENAME_LIST'] = FILENAME_LIST results['CHECKSUM_LIST'] = CHECKSUM_LIST print("END Time of READOUT XFER IS: %s" % get_timestamp()) print("In forward method, results are: \n%s" % results) return results def send_ack_response(self, type, params): timed_ack = params.get("ACK_ID") job_num = params.get(JOB_NUM) response_queue = params['RESPONSE_QUEUE'] msg_params = {} msg_params[MSG_TYPE] = type msg_params[JOB_NUM] = job_num msg_params['COMPONENT'] = self._fqn msg_params[ACK_BOOL] = "TRUE" msg_params[ACK_ID] = timed_ack self._publisher.publish_message(response_queue, msg_params) def register(self): pass