Beispiel #1
0
    def forward(self, job_num, final_filenames):
        print("Start Time of READOUT IS: %s" % get_timestamp())
        login_str = self._job_scratchpad.get_job_value(job_num, 'LOGIN_STR')
        target_dir = self._job_scratchpad.get_job_value(job_num, 'TARGET_DIR')
        results = {}
        CCD_LIST = []
        FILENAME_LIST = []
        CHECKSUM_LIST = []
        ccds = list(final_filenames.keys())
        for ccd in ccds:
            final_file = final_filenames[ccd]
            pathway = self._DAQ_PATH + final_file
            with open(pathway) as file_to_calc:
                if self.CHECKSUM_ENABLED:
                    data = file_to_calc.read()
                    resulting_md5 = hashlib.md5(data).hexdigest()
                else:
                    resulting_md5 = '0'
                minidict = {}
                CCD_LIST.append(ccd)
                CHECKSUM_LIST.append(resulting_md5)
                FILENAME_LIST.append(target_dir + final_file)
                cmd = 'scp ' + pathway + " " + login_str + target_dir + final_file
                print("Finish Time of SCP'ing %s IS: %s" %
                      (pathway, get_timestamp()))
                print("In forward() method, cmd is %s" % cmd)
                os.system(cmd)
                results['CCD_LIST'] = CCD_LIST
                results['FILENAME_LIST'] = FILENAME_LIST
                results['CHECKSUM_LIST'] = CHECKSUM_LIST

        print("END Time of READOUT XFER IS: %s" % get_timestamp())
        print("In forward method, results are: \n%s" % results)
        return results
Beispiel #2
0
    def forward(self, job_num, final_filenames):
        print("Start Time of READOUT IS: %s" % get_timestamp())
        login_str = self._job_scratchpad.get_job_value(job_num, 'LOGIN_STR')
        target_dir = self._job_scratchpad.get_job_value(job_num, 'TARGET_DIR')
        results = {}
        CCD_LIST = []
        FILENAME_LIST = []
        CHECKSUM_LIST = []
        ccds = list(final_filenames.keys())
        for ccd in ccds:
            final_file = final_filenames[ccd]
            pathway = self._DAQ_PATH + final_file
            with open(pathway) as file_to_calc:
                if self.CHECKSUM_ENABLED:
                    data = file_to_calc.read()
                    resulting_md5 = hashlib.md5(data).hexdigest()
                else:
                    resulting_md5 = '0'
                CCD_LIST.append(ccd)
                CHECKSUM_LIST.append(resulting_md5)
                FILENAME_LIST.append(target_dir + final_file)
                cmd = 'scp ' + pathway + " " + login_str + target_dir + final_file
                print("Finish Time of SCP'ing %s IS: %s" % (pathway, get_timestamp()))
                print("In forward() method, cmd is %s" % cmd)
                os.system(cmd)
                results['CCD_LIST'] = CCD_LIST
                results['FILENAME_LIST'] = FILENAME_LIST
                results['CHECKSUM_LIST'] = CHECKSUM_LIST

        print("END Time of READOUT XFER IS: %s" % get_timestamp())
        print("In forward method, results are: \n%s" % results)
        return results
    def insufficient_base_resources(self, params, healthy_forwarders):
        # send response msg to dmcs refusing job
        job_num = str(params[JOB_NUM])
        raft_list = params[RAFTS]
        ack_id = params['ACK_ID']
        needed_workers = len(raft_list)
        LOGGER.info('Reporting to DMCS that there are insufficient healthy forwarders for job #%s', job_num)
        dmcs_params = {}
        fail_dict = {}
        dmcs_params[MSG_TYPE] = NEW_JOB_ACK
        dmcs_params[JOB_NUM] = job_num
        dmcs_params[ACK_BOOL] = False
        dmcs_params[ACK_ID] = ack_id

        ### NOTE FOR DMCS ACK PROCESSING:
        ### if ACK_BOOL == True, there will NOT be a FAIL_DETAILS section
        ### If ACK_BOOL == False, there will always be a FAIL_DICT to examine AND there will always be a 
        ###   BASE_RESOURCES inside the FAIL_DICT
        ### If ACK_BOOL == False, and the BASE_RESOURCES inside FAIL_DETAILS == 0,
        ###   there will be only NEEDED and AVAILABLE Forwarder params - nothing more
        ### If ACK_BOOL == False and BASE_RESOURCES inside FAIL_DETAILS == 1, there will always be a 
        ###   NCSA_RESOURCES inside FAIL_DETAILS set to either 0 or 'NO_RESPONSE'
        ### if NCSA_RESPONSE == 0, there will be NEEDED and AVAILABLE Distributor params
        ### if NCSA_RESOURCES == 'NO_RESPONSE' there will be nothing else 
        fail_dict['BASE_RESOURCES'] = '0'
        fail_dict[NEEDED_FORWARDERS] = str(needed_workers)
        fail_dict[AVAILABLE_FORWARDERS] = str(len(healthy_forwarders))
        dmcs_params['FAIL_DETAILS'] = fail_dict
        self._base_publisher.publish_message("dmcs_consume", dmcs_params)
        # mark job refused, and leave Forwarders in Idle state
        self.JOB_SCBD.set_value_for_job(job_num, "STATE", "JOB_ABORTED")
        self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ABORTED_BASE_RESOURCES", get_timestamp())
        idle_state = {"STATE": "IDLE"}
        self.FWD_SCBD.set_forwarder_params(healthy_forwarders, idle_state)
        return False
 def ncsa_resources_query(self, params, healthy_forwarders):
     job_num = str(params[JOB_NUM])
     raft_list = params[RAFTS]
     needed_workers = len(raft_list)
     LOGGER.info('Sufficient forwarders have been found. Checking NCSA')
     self._pairs_dict = {}
     forwarder_candidate_dict = {}
     for i in range (0, needed_workers):
         forwarder_candidate_dict[healthy_forwarders[i]] = raft_list[i]
         self.FWD_SCBD.set_forwarder_status(healthy_forwarders[i], NCSA_RESOURCES_QUERY)
         # Call this method for testing...
         # There should be a message sent to NCSA here asking for available resources
     timed_ack_id = self.get_next_timed_ack_id("NCSA_Ack") 
     ncsa_params = {}
     ncsa_params[MSG_TYPE] = "NCSA_RESOURCES_QUERY"
     ncsa_params[JOB_NUM] = job_num
     #ncsa_params[RAFT_NUM] = needed_workers
     ncsa_params[ACK_ID] = timed_ack_id
     ncsa_params["FORWARDERS"] = forwarder_candidate_dict
     self.JOB_SCBD.set_value_for_job(job_num, "STATE", "NCSA_RESOURCES_QUERY_SENT")
     self.JOB_SCBD.set_value_for_job(job_num, "TIME_NCSA_RESOURCES_QUERY_SENT", get_timestamp())
     self._ncsa_publisher.publish_message(self.NCSA_CONSUME, ncsa_params) 
     LOGGER.info('The following forwarders have been sent to NCSA for pairing:')
     LOGGER.info(forwarder_candidate_dict)
     return timed_ack_id
 def accept_job(self, job_num):
     dmcs_message = {}
     dmcs_message[JOB_NUM] = job_num
     dmcs_message[MSG_TYPE] = NEW_JOB_ACK
     dmcs_message[ACK_BOOL] = True
     self.JOB_SCBD.set_value_for_job(job_num, STATE, "JOB_ACCEPTED")
     self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ACCEPTED", get_timestamp())
     self._base_publisher.publish_message("dmcs_consume", dmcs_message)
     return True
Beispiel #6
0
 def process_job_params(self, params):
     transfer_params = params[TRANSFER_PARAMS]
     self._job_scratchpad.set_job_transfer_params(params[JOB_NUM],
                                                  transfer_params)
     self._job_scratchpad.set_job_value(job_number, "STATE",
                                        "READY_WITH_PARAMS")
     self._job_scratchpad.set_job_value(job_number,
                                        "READY_WITH_PARAMS_TIME",
                                        get_timestamp())
     self.send_ack_response(DISTRIBUTOR_JOB_PARAMS_ACK, params)
 def build_monitor_data(self, params):
     monitor_data = {}
     keez = list(params.keys())
     for kee in keez:
         monitor_data[kee] = params[kee]
     monitor_data['SESSION_ID'] = self.get_current_session()
     monitor_data['VISIT_ID'] = self.get_current_visit()
     monitor_data['TIME'] = get_timestamp()
     monitor_data['DATA_TYPE'] = self.DB_TYPE
     return monitor_data
Beispiel #8
0
 def accept_job(self, ack_id, job_num):
     dmcs_message = {}
     dmcs_message[JOB_NUM] = job_num
     dmcs_message[MSG_TYPE] = self.PP_START_INTEGRATION_ACK
     dmcs_message['COMPONENT'] = self.COMPONENT_NAME
     dmcs_message[ACK_BOOL] = True
     dmcs_message['ACK_ID'] = ack_id
     self.JOB_SCBD.set_value_for_job(job_num, STATE, "JOB_ACCEPTED")
     self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ACCEPTED",
                                     get_timestamp())
     self._base_publisher.publish_message("dmcs_ack_consume", dmcs_message)
     return True
    def forwarder_health_check(self, params):
        job_num = str(params[JOB_NUM])
        raft_list = params['RAFTS']
        needed_workers = len(raft_list)

        self.JOB_SCBD.add_job(job_num, needed_workers)
        self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ADDED", get_timestamp())
        self.JOB_SCBD.set_value_for_job(job_num, "TIME_JOB_ADDED_E", get_epoch_timestamp())
        LOGGER.info('Received new job %s. Needed workers is %s', job_num, needed_workers)

        # run forwarder health check
        # get timed_ack_id
        timed_ack = self.get_next_timed_ack_id("FORWARDER_HEALTH_CHECK_ACK")

        forwarders = self.FWD_SCBD.return_available_forwarders_list()
        # Mark all healthy Forwarders Unknown
        state_status = {"STATE": "HEALTH_CHECK", "STATUS": "UNKNOWN"}
        self.FWD_SCBD.set_forwarder_params(forwarders, state_status)
        # send health check messages
        ack_params = {}
        ack_params[MSG_TYPE] = FORWARDER_HEALTH_CHECK
        ack_params["ACK_ID"] = timed_ack
        ack_params[JOB_NUM] = job_num
        
        self.JOB_SCBD.set_value_for_job(job_num, "STATE", "BASE_RESOURCE_QUERY")
        self.JOB_SCBD.set_value_for_job(job_num, "TIME_BASE_RESOURCE_QUERY", get_timestamp())
        audit_params = {}
        audit_params['DATA_TYPE'] = 'FOREMAN_ACK_REQUEST'
        audit_params['SUB_TYPE'] = 'FORWARDER_HEALTH_CHECK_ACK'
        audit_params['ACK_ID'] = timed_ack
        audit_parsms['COMPONENT_NAME'] = 'BASE_FOREMAN'
        audit_params['TIME'] = get_epoch_timestamp()
        for forwarder in forwarders:
            self._base_publisher.publish_message(self.FWD_SCBD.get_value_for_forwarder(forwarder,"CONSUME_QUEUE"),
                                            ack_params)

        return timed_ack
    def distribute_job_params(self, params, pairs):
        #ncsa has enough resources...
        job_num = str(params[JOB_NUM])
        self.JOB_SCBD.set_pairs_for_job(job_num, pairs)          
        self.JOB_SCBD.set_value_for_job(job_num, "TIME_PAIRS_ADDED", get_timestamp())
        LOGGER.info('The following pairs will be used for Job #%s: %s',
                     job_num, pairs)
        fwd_ack_id = self.get_next_timed_ack_id("FWD_PARAMS_ACK")
        fwders = list(pairs.keys())
        fwd_params = {}
        fwd_params[MSG_TYPE] = "FORWARDER_JOB_PARAMS"
        fwd_params[JOB_NUM] = job_num
        fwd_params[ACK_ID] = fwd_ack_id
        for fwder in fwders:
            fwd_params["TRANSFER_PARAMS"] = pairs[fwder]
            route_key = self.FWD_SCBD.get_value_for_forwarder(fwder, "CONSUME_QUEUE")
            self._base_publisher.publish_message(route_key, fwd_params)

        return fwd_ack_id
Beispiel #11
0
 def process_health_check(self, params):
     job_number = params[JOB_NUM]
     self._job_scratchpad.set_job_value(job_number, "STATE", "ADD_JOB")
     self._job_scratchpad.set_job_value(job_number, "ADD_JOB_TIME",
                                        get_timestamp())
     self.send_ack_response("DISTRIBUTOR_HEALTH_ACK", params)
    def process_dmcs_readout(self, params):
        job_number = params[JOB_NUM]
        pairs = self.JOB_SCBD.get_pairs_for_job(job_number)
        date - get_timestamp()
        self.JOB_SCBD.set_value_for_job(job_number, TIME_START_READOUT, date) 
        # The following line extracts the distributor FQNs from pairs dict using 
        # list comprehension values; faster than for loops
        distributors = [v['FQN'] for v in list(pairs.values())]
        forwarders = list(pairs.keys())

        ack_id = self.get_next_timed_ack_id('NCSA_READOUT')
### Send READOUT to NCSA with ACK_ID
        ncsa_params = {}
        ncsa_params[MSG_TYPE] = 'NCSA_READOUT'
        ncsa_params[ACK_ID] = ack_id
        self._ncsa_publisher.publish_message(NCSA_CONSUME, yaml.dump(ncsa_params))


        self.ack_timer(4)

        ncsa_response = self.ACK_SCBD.get_components_for_timed_ack(ack_id)
        if ncsa_response:
            if ncsa_response['ACK_BOOL'] == True:
                #inform forwarders
                fwd_ack_id = self.get_next_timed_ack_id('FORWARDER_READOUT')
                for forwarder in forwarders:
                    name = self.FWD_SCBD.get_value_for_forwarder(forwarder, NAME)
                    routing_key = self.FWD_SCBD.get_routing_key(forwarder)
                    msg_params = {}
                    msg_params[MSG_TYPE] = 'FORWARDER_READOUT'
                    msg_params[JOB_NUM] = job_number
                    msg_params['ACK_ID'] = fwd_ack_id
                    self.FWD_SCBD.set_forwarder_state(forwarder, START_READOUT)
                    self._publisher.publish_message(routing_key, yaml.dump(msg_params))
                self.ack_timer(4)
                forwarder_responses = self.ACK_SCBD.get_components_for_timed_ack(fwd_ack_id)
                if len(forwarder_responses) == len(forwarders):
                    dmcs_params = {}
                    dmcs_params[MSG_TYPE] = 'READOUT_ACK' 
                    dmcs_params[JOB_NUM] = job_number
                    dmcs_params['ACK_BOOL'] = True
                    dmcs_params['COMMENT'] = "Readout begun at %s" % get_timestamp()
                    self._publisher.publish_message('dmcs_consume', yaml.dump(dmcs_params))
                    
            else:
                #send problem with ncsa to DMCS
                dmcs_params = {}
                dmcs_params[MSG_TYPE] = 'READOUT_ACK' 
                dmcs_params[JOB_NUM] = job_number
                dmcs_params['ACK_BOOL'] = False
                dmcs_params['COMMENT'] = 'Readout Failed: Problem at NCSA - Expected Distributor Acks is %s, Number of Distributor Acks received is %s' % (ncsa_response['EXPECTED_DISTRIBUTOR_ACKS'], ncsa_response['RECEIVED_DISTRIBUTOR_ACKS'])
                self._base_publisher.publish_message('dmcs_consume', yaml.dump(dmcs_params))
                    
        else:
            #send 'no response from ncsa' to DMCS               )
            dmcs_params = {}
            dmcs_params[MSG_TYPE] = 'READOUT_ACK' 
            dmcs_params[JOB_NUM] = job_number
            dmcs_params['ACK_BOOL'] = False
            dmcs_params['COMMENT'] = "Readout Failed: No Response from NCSA"
            self._base_publisher.publish_message('dmcs_consume', yaml.dump(dmcs_params))
    def process_dmcs_new_job(self, params):
        input_params = params
        needed_workers = len(input_params[RAFTS])
        ack_id = self.forwarder_health_check(input_params)
        
        self.ack_timer(7)  # This is a HUGE num seconds for now..final setting will be milliseconds
        healthy_forwarders = self.ACK_SCBD.get_components_for_timed_ack(timed_ack)

        num_healthy_forwarders = len(healthy_forwarders)
        if needed_workers > num_healthy_forwarders:
            result = self.insufficient_base_resources(input_params, healthy_forwarders)
            return result
        else:
            healthy_status = {"STATUS": "HEALTHY", "STATE":"READY_WITHOUT_PARAMS"}
            self.FWD_SCBD.set_forwarder_params(healthy_forwarders, healthy_status)

            ack_id = self.ncsa_resources_query(input_params, healthy_forwarders)

            self.ack_timer(3)

            #Check ACK scoreboard for response from NCSA
            ncsa_response = self.ACK_SCBD.get_components_for_timed_ack(ack_id)
            if ncsa_response:
                pairs = {}
                ack_bool = None
                try:
                    ack_bool = ncsa_response[ACK_BOOL]
                    if ack_bool == True:
                        pairs = ncsa_response[PAIRS] 
                except KeyError as e:
                    pass 
                # Distribute job params and tell DMCS I'm ready.
                if ack_bool == TRUE:
                    fwd_ack_id = self.distribute_job_params(input_params, pairs)
                    self.ack_timer(3)

                    fwd_params_response = self.ACK_SCBD.get_components_for_timed_ack(fwd_ack_id)
                    if fwd_params_response and (len(fwd_params_response) == len(fwders)):
                        self.JOB_SCBD.set_value_for_job(job_num, "STATE", "BASE_TASK_PARAMS_SENT")
                        self.JOB_SCBD.set_value_for_job(job_num, "TIME_BASE_TASK_PARAMS_SENT", get_timestamp())
                        in_ready_state = {'STATE':'READY_WITH_PARAMS'}
                        self.FWD_SCBD.set_forwarder_params(fwders, in_ready_state) 
                        # Tell DMCS we are ready
                        result = self.accept_job(job_num)
                else:
                    #not enough ncsa resources to do job - Notify DMCS
                    idle_param = {'STATE': 'IDLE'}
                    self.FWD_SCBD.set_forwarder_params(healthy_forwarders, idle_params)
                    result = self.insufficient_ncsa_resources(ncsa_response)
                    return result

            else:
                result = self.ncsa_no_response(input_params)
                idle_param = {'STATE': 'IDLE'}
                self.FWD_SCBD.set_forwarder_params(list(forwarder_candidate_dict.keys()), idle_params)
                return result
Beispiel #14
0
 def process_job_params(self, params):
     transfer_params = params[TRANSFER_PARAMS]
     self._job_scratchpad.set_job_transfer_params(params[JOB_NUM], transfer_params)
     self._job_scratchpad.set_job_value(job_number, "STATE", "READY_WITH_PARAMS")
     self._job_scratchpad.set_job_value(job_number, "READY_WITH_PARAMS_TIME", get_timestamp())
     self.send_ack_response(DISTRIBUTOR_JOB_PARAMS_ACK, params)
Beispiel #15
0
 def process_health_check(self, params):
     job_number = params[JOB_NUM]
     self._job_scratchpad.set_job_value(job_number, "STATE", "ADD_JOB")
     self._job_scratchpad.set_job_value(job_number, "ADD_JOB_TIME", get_timestamp())
     self.send_ack_response("DISTRIBUTOR_HEALTH_ACK", params)