def forward(self, job_num, final_filenames): print("Start Time of READOUT IS: %s" % get_timestamp()) login_str = self._job_scratchpad.get_job_value(job_num, 'LOGIN_STR') target_dir = self._job_scratchpad.get_job_value(job_num, 'TARGET_DIR') results = {} CCD_LIST = [] FILENAME_LIST = [] CHECKSUM_LIST = [] ccds = list(final_filenames.keys()) for ccd in ccds: final_file = final_filenames[ccd] pathway = self._DAQ_PATH + final_file with open(pathway) as file_to_calc: if self.CHECKSUM_ENABLED: data = file_to_calc.read() resulting_md5 = hashlib.md5(data).hexdigest() else: resulting_md5 = '0' minidict = {} CCD_LIST.append(ccd) CHECKSUM_LIST.append(resulting_md5) FILENAME_LIST.append(target_dir + final_file) cmd = 'scp ' + pathway + " " + login_str + target_dir + final_file print("Finish Time of SCP'ing %s IS: %s" % (pathway, get_timestamp())) print("In forward() method, cmd is %s" % cmd) os.system(cmd) results['CCD_LIST'] = CCD_LIST results['FILENAME_LIST'] = FILENAME_LIST results['CHECKSUM_LIST'] = CHECKSUM_LIST print("END Time of READOUT XFER IS: %s" % get_timestamp()) print("In forward method, results are: \n%s" % results) return results
def forward(self, job_num, final_filenames): print("Start Time of READOUT IS: %s" % get_timestamp()) login_str = self._job_scratchpad.get_job_value(job_num, 'LOGIN_STR') target_dir = self._job_scratchpad.get_job_value(job_num, 'TARGET_DIR') results = {} CCD_LIST = [] FILENAME_LIST = [] CHECKSUM_LIST = [] ccds = list(final_filenames.keys()) for ccd in ccds: final_file = final_filenames[ccd] pathway = self._DAQ_PATH + final_file with open(pathway) as file_to_calc: if self.CHECKSUM_ENABLED: data = file_to_calc.read() resulting_md5 = hashlib.md5(data).hexdigest() else: resulting_md5 = '0' CCD_LIST.append(ccd) CHECKSUM_LIST.append(resulting_md5) FILENAME_LIST.append(target_dir + final_file) cmd = 'scp ' + pathway + " " + login_str + target_dir + final_file print("Finish Time of SCP'ing %s IS: %s" % (pathway, get_timestamp())) print("In forward() method, cmd is %s" % cmd) os.system(cmd) results['CCD_LIST'] = CCD_LIST results['FILENAME_LIST'] = FILENAME_LIST results['CHECKSUM_LIST'] = CHECKSUM_LIST print("END Time of READOUT XFER IS: %s" % get_timestamp()) print("In forward method, results are: \n%s" % results) return results
def insufficient_base_resources(self, params, healthy_forwarders): # send response msg to dmcs refusing job job_num = str(params[JOB_NUM]) raft_list = params[RAFTS] ack_id = params['ACK_ID'] needed_workers = len(raft_list) LOGGER.info('Reporting to DMCS that there are insufficient healthy forwarders for job #%s', job_num) dmcs_params = {} fail_dict = {} dmcs_params[MSG_TYPE] = NEW_JOB_ACK dmcs_params[JOB_NUM] = job_num dmcs_params[ACK_BOOL] = False dmcs_params[ACK_ID] = ack_id ### NOTE FOR DMCS ACK PROCESSING: ### if ACK_BOOL == True, there will NOT be a FAIL_DETAILS section ### If ACK_BOOL == False, there will always be a FAIL_DICT to examine AND there will always be a ### BASE_RESOURCES inside the FAIL_DICT ### If ACK_BOOL == False, and the BASE_RESOURCES inside FAIL_DETAILS == 0, ### there will be only NEEDED and AVAILABLE Forwarder params - nothing more ### If ACK_BOOL == False and BASE_RESOURCES inside FAIL_DETAILS == 1, there will always be a ### NCSA_RESOURCES inside FAIL_DETAILS set to either 0 or 'NO_RESPONSE' ### if NCSA_RESPONSE == 0, there will be NEEDED and AVAILABLE Distributor params ### if NCSA_RESOURCES == 'NO_RESPONSE' there will be nothing else fail_dict['BASE_RESOURCES'] = '0' fail_dict[NEEDED_FORWARDERS] = str(needed_workers) fail_dict[AVAILABLE_FORWARDERS] = str(len(healthy_forwarders)) dmcs_params['FAIL_DETAILS'] = fail_dict self._base_publisher.publish_message("dmcs_consume", dmcs_params) # mark job refused, and leave Forwarders in Idle state self.JOB_SCBD.set_value_for_job(job_num, "STATE", "JOB_ABORTED") self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ABORTED_BASE_RESOURCES", get_timestamp()) idle_state = {"STATE": "IDLE"} self.FWD_SCBD.set_forwarder_params(healthy_forwarders, idle_state) return False
def ncsa_resources_query(self, params, healthy_forwarders): job_num = str(params[JOB_NUM]) raft_list = params[RAFTS] needed_workers = len(raft_list) LOGGER.info('Sufficient forwarders have been found. Checking NCSA') self._pairs_dict = {} forwarder_candidate_dict = {} for i in range (0, needed_workers): forwarder_candidate_dict[healthy_forwarders[i]] = raft_list[i] self.FWD_SCBD.set_forwarder_status(healthy_forwarders[i], NCSA_RESOURCES_QUERY) # Call this method for testing... # There should be a message sent to NCSA here asking for available resources timed_ack_id = self.get_next_timed_ack_id("NCSA_Ack") ncsa_params = {} ncsa_params[MSG_TYPE] = "NCSA_RESOURCES_QUERY" ncsa_params[JOB_NUM] = job_num #ncsa_params[RAFT_NUM] = needed_workers ncsa_params[ACK_ID] = timed_ack_id ncsa_params["FORWARDERS"] = forwarder_candidate_dict self.JOB_SCBD.set_value_for_job(job_num, "STATE", "NCSA_RESOURCES_QUERY_SENT") self.JOB_SCBD.set_value_for_job(job_num, "TIME_NCSA_RESOURCES_QUERY_SENT", get_timestamp()) self._ncsa_publisher.publish_message(self.NCSA_CONSUME, ncsa_params) LOGGER.info('The following forwarders have been sent to NCSA for pairing:') LOGGER.info(forwarder_candidate_dict) return timed_ack_id
def accept_job(self, job_num): dmcs_message = {} dmcs_message[JOB_NUM] = job_num dmcs_message[MSG_TYPE] = NEW_JOB_ACK dmcs_message[ACK_BOOL] = True self.JOB_SCBD.set_value_for_job(job_num, STATE, "JOB_ACCEPTED") self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ACCEPTED", get_timestamp()) self._base_publisher.publish_message("dmcs_consume", dmcs_message) return True
def process_job_params(self, params): transfer_params = params[TRANSFER_PARAMS] self._job_scratchpad.set_job_transfer_params(params[JOB_NUM], transfer_params) self._job_scratchpad.set_job_value(job_number, "STATE", "READY_WITH_PARAMS") self._job_scratchpad.set_job_value(job_number, "READY_WITH_PARAMS_TIME", get_timestamp()) self.send_ack_response(DISTRIBUTOR_JOB_PARAMS_ACK, params)
def build_monitor_data(self, params): monitor_data = {} keez = list(params.keys()) for kee in keez: monitor_data[kee] = params[kee] monitor_data['SESSION_ID'] = self.get_current_session() monitor_data['VISIT_ID'] = self.get_current_visit() monitor_data['TIME'] = get_timestamp() monitor_data['DATA_TYPE'] = self.DB_TYPE return monitor_data
def accept_job(self, ack_id, job_num): dmcs_message = {} dmcs_message[JOB_NUM] = job_num dmcs_message[MSG_TYPE] = self.PP_START_INTEGRATION_ACK dmcs_message['COMPONENT'] = self.COMPONENT_NAME dmcs_message[ACK_BOOL] = True dmcs_message['ACK_ID'] = ack_id self.JOB_SCBD.set_value_for_job(job_num, STATE, "JOB_ACCEPTED") self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ACCEPTED", get_timestamp()) self._base_publisher.publish_message("dmcs_ack_consume", dmcs_message) return True
def forwarder_health_check(self, params): job_num = str(params[JOB_NUM]) raft_list = params['RAFTS'] needed_workers = len(raft_list) self.JOB_SCBD.add_job(job_num, needed_workers) self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ADDED", get_timestamp()) self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ADDED_E", get_epoch_timestamp()) LOGGER.info('Received new job %s. Needed workers is %s', job_num, needed_workers) # run forwarder health check # get timed_ack_id timed_ack = self.get_next_timed_ack_id("FORWARDER_HEALTH_CHECK_ACK") forwarders = self.FWD_SCBD.return_available_forwarders_list() # Mark all healthy Forwarders Unknown state_status = {"STATE": "HEALTH_CHECK", "STATUS": "UNKNOWN"} self.FWD_SCBD.set_forwarder_params(forwarders, state_status) # send health check messages ack_params = {} ack_params[MSG_TYPE] = FORWARDER_HEALTH_CHECK ack_params["ACK_ID"] = timed_ack ack_params[JOB_NUM] = job_num self.JOB_SCBD.set_value_for_job(job_num, "STATE", "BASE_RESOURCE_QUERY") self.JOB_SCBD.set_value_for_job(job_num, "TIME_BASE_RESOURCE_QUERY", get_timestamp()) audit_params = {} audit_params['DATA_TYPE'] = 'FOREMAN_ACK_REQUEST' audit_params['SUB_TYPE'] = 'FORWARDER_HEALTH_CHECK_ACK' audit_params['ACK_ID'] = timed_ack audit_parsms['COMPONENT_NAME'] = 'BASE_FOREMAN' audit_params['TIME'] = get_epoch_timestamp() for forwarder in forwarders: self._base_publisher.publish_message(self.FWD_SCBD.get_value_for_forwarder(forwarder,"CONSUME_QUEUE"), ack_params) return timed_ack
def distribute_job_params(self, params, pairs): #ncsa has enough resources... job_num = str(params[JOB_NUM]) self.JOB_SCBD.set_pairs_for_job(job_num, pairs) self.JOB_SCBD.set_value_for_job(job_num, "TIME_PAIRS_ADDED", get_timestamp()) LOGGER.info('The following pairs will be used for Job #%s: %s', job_num, pairs) fwd_ack_id = self.get_next_timed_ack_id("FWD_PARAMS_ACK") fwders = list(pairs.keys()) fwd_params = {} fwd_params[MSG_TYPE] = "FORWARDER_JOB_PARAMS" fwd_params[JOB_NUM] = job_num fwd_params[ACK_ID] = fwd_ack_id for fwder in fwders: fwd_params["TRANSFER_PARAMS"] = pairs[fwder] route_key = self.FWD_SCBD.get_value_for_forwarder(fwder, "CONSUME_QUEUE") self._base_publisher.publish_message(route_key, fwd_params) return fwd_ack_id
def process_health_check(self, params): job_number = params[JOB_NUM] self._job_scratchpad.set_job_value(job_number, "STATE", "ADD_JOB") self._job_scratchpad.set_job_value(job_number, "ADD_JOB_TIME", get_timestamp()) self.send_ack_response("DISTRIBUTOR_HEALTH_ACK", params)
def process_dmcs_readout(self, params): job_number = params[JOB_NUM] pairs = self.JOB_SCBD.get_pairs_for_job(job_number) date - get_timestamp() self.JOB_SCBD.set_value_for_job(job_number, TIME_START_READOUT, date) # The following line extracts the distributor FQNs from pairs dict using # list comprehension values; faster than for loops distributors = [v['FQN'] for v in list(pairs.values())] forwarders = list(pairs.keys()) ack_id = self.get_next_timed_ack_id('NCSA_READOUT') ### Send READOUT to NCSA with ACK_ID ncsa_params = {} ncsa_params[MSG_TYPE] = 'NCSA_READOUT' ncsa_params[ACK_ID] = ack_id self._ncsa_publisher.publish_message(NCSA_CONSUME, yaml.dump(ncsa_params)) self.ack_timer(4) ncsa_response = self.ACK_SCBD.get_components_for_timed_ack(ack_id) if ncsa_response: if ncsa_response['ACK_BOOL'] == True: #inform forwarders fwd_ack_id = self.get_next_timed_ack_id('FORWARDER_READOUT') for forwarder in forwarders: name = self.FWD_SCBD.get_value_for_forwarder(forwarder, NAME) routing_key = self.FWD_SCBD.get_routing_key(forwarder) msg_params = {} msg_params[MSG_TYPE] = 'FORWARDER_READOUT' msg_params[JOB_NUM] = job_number msg_params['ACK_ID'] = fwd_ack_id self.FWD_SCBD.set_forwarder_state(forwarder, START_READOUT) self._publisher.publish_message(routing_key, yaml.dump(msg_params)) self.ack_timer(4) forwarder_responses = self.ACK_SCBD.get_components_for_timed_ack(fwd_ack_id) if len(forwarder_responses) == len(forwarders): dmcs_params = {} dmcs_params[MSG_TYPE] = 'READOUT_ACK' dmcs_params[JOB_NUM] = job_number dmcs_params['ACK_BOOL'] = True dmcs_params['COMMENT'] = "Readout begun at %s" % get_timestamp() self._publisher.publish_message('dmcs_consume', yaml.dump(dmcs_params)) else: #send problem with ncsa to DMCS dmcs_params = {} dmcs_params[MSG_TYPE] = 'READOUT_ACK' dmcs_params[JOB_NUM] = job_number dmcs_params['ACK_BOOL'] = False dmcs_params['COMMENT'] = 'Readout Failed: Problem at NCSA - Expected Distributor Acks is %s, Number of Distributor Acks received is %s' % (ncsa_response['EXPECTED_DISTRIBUTOR_ACKS'], ncsa_response['RECEIVED_DISTRIBUTOR_ACKS']) self._base_publisher.publish_message('dmcs_consume', yaml.dump(dmcs_params)) else: #send 'no response from ncsa' to DMCS ) dmcs_params = {} dmcs_params[MSG_TYPE] = 'READOUT_ACK' dmcs_params[JOB_NUM] = job_number dmcs_params['ACK_BOOL'] = False dmcs_params['COMMENT'] = "Readout Failed: No Response from NCSA" self._base_publisher.publish_message('dmcs_consume', yaml.dump(dmcs_params))
def process_dmcs_new_job(self, params): input_params = params needed_workers = len(input_params[RAFTS]) ack_id = self.forwarder_health_check(input_params) self.ack_timer(7) # This is a HUGE num seconds for now..final setting will be milliseconds healthy_forwarders = self.ACK_SCBD.get_components_for_timed_ack(timed_ack) num_healthy_forwarders = len(healthy_forwarders) if needed_workers > num_healthy_forwarders: result = self.insufficient_base_resources(input_params, healthy_forwarders) return result else: healthy_status = {"STATUS": "HEALTHY", "STATE":"READY_WITHOUT_PARAMS"} self.FWD_SCBD.set_forwarder_params(healthy_forwarders, healthy_status) ack_id = self.ncsa_resources_query(input_params, healthy_forwarders) self.ack_timer(3) #Check ACK scoreboard for response from NCSA ncsa_response = self.ACK_SCBD.get_components_for_timed_ack(ack_id) if ncsa_response: pairs = {} ack_bool = None try: ack_bool = ncsa_response[ACK_BOOL] if ack_bool == True: pairs = ncsa_response[PAIRS] except KeyError as e: pass # Distribute job params and tell DMCS I'm ready. if ack_bool == TRUE: fwd_ack_id = self.distribute_job_params(input_params, pairs) self.ack_timer(3) fwd_params_response = self.ACK_SCBD.get_components_for_timed_ack(fwd_ack_id) if fwd_params_response and (len(fwd_params_response) == len(fwders)): self.JOB_SCBD.set_value_for_job(job_num, "STATE", "BASE_TASK_PARAMS_SENT") self.JOB_SCBD.set_value_for_job(job_num, "TIME_BASE_TASK_PARAMS_SENT", get_timestamp()) in_ready_state = {'STATE':'READY_WITH_PARAMS'} self.FWD_SCBD.set_forwarder_params(fwders, in_ready_state) # Tell DMCS we are ready result = self.accept_job(job_num) else: #not enough ncsa resources to do job - Notify DMCS idle_param = {'STATE': 'IDLE'} self.FWD_SCBD.set_forwarder_params(healthy_forwarders, idle_params) result = self.insufficient_ncsa_resources(ncsa_response) return result else: result = self.ncsa_no_response(input_params) idle_param = {'STATE': 'IDLE'} self.FWD_SCBD.set_forwarder_params(list(forwarder_candidate_dict.keys()), idle_params) return result