Example #1
0
    def create_workers(self, worker_spec_list):
        """
        Creates a worker
        """
        start_time = time.time()
        tmp_log = core_utils.make_logger(_base_logger,
                                         'harvester_id={0}'.format(
                                             self.harvester_id),
                                         method_name='create_workers')

        if not self.__active:
            tmp_log.debug('APFMon reporting not enabled')
            return

        try:
            tmp_log.debug('start')

            url = '{0}/jobs'.format(self.base_url)

            for worker_spec_shard in generic_utils.create_shards(
                    worker_spec_list, 20):
                apfmon_workers = []
                for worker_spec in worker_spec_shard:
                    batch_id = worker_spec.batchID
                    worker_id = worker_spec.workerID
                    if not batch_id:
                        tmp_log.debug(
                            'no batchID found for workerID {0}... skipping'.
                            format(worker_id))
                        continue
                    factory = self.harvester_id
                    computingsite = worker_spec.computingSite
                    try:
                        ce = clean_ce(worker_spec.computingElement)
                    except AttributeError:
                        tmp_log.debug(
                            'no CE found for workerID {0} batchID {1}'.format(
                                worker_id, batch_id))
                        ce = NO_CE

                    # extract the log URLs
                    stdout_url = ''
                    stderr_url = ''
                    log_url = ''
                    jdl_url = ''

                    work_attribs = worker_spec.workAttributes
                    if work_attribs:
                        if 'stdOut' in work_attribs:
                            stdout_url = work_attribs['stdOut']
                            # jdl_url = '{0}.jdl'.format(stdout_url[:-4])
                        if 'stdErr' in work_attribs:
                            stderr_url = work_attribs['stdErr']
                        if 'batchLog' in work_attribs:
                            log_url = work_attribs['batchLog']
                        if 'jdl' in work_attribs:
                            jdl_url = work_attribs['jdl']

                    apfmon_worker = {
                        'cid': batch_id,
                        'factory': factory,
                        'label': '{0}-{1}'.format(computingsite, ce),
                        'jdlurl': jdl_url,
                        'stdouturl': stdout_url,
                        'stderrurl': stderr_url,
                        'logurl': log_url
                    }
                    tmp_log.debug('packed worker: {0}'.format(apfmon_worker))
                    apfmon_workers.append(apfmon_worker)

                payload = json.dumps(apfmon_workers)

                try:
                    r = requests.put(url,
                                     data=payload,
                                     timeout=self.__worker_timeout)
                    tmp_log.debug(
                        'worker creation for {0} ended with {1} {2}'.format(
                            apfmon_workers, r.status_code, r.text))
                except:
                    tmp_log.debug('worker creation for {0} failed with'.format(
                        apfmon_workers, format(traceback.format_exc())))

            end_time = time.time()
            tmp_log.debug('done (took {0})'.format(end_time - start_time))
        except:
            tmp_log.error('Excepted with: {0}'.format(traceback.format_exc()))
Example #2
0
    def create_labels(self):
        """
        Creates or updates a collection of labels (=panda queue+CE)
        """
        start_time = time.time()
        tmp_log = core_utils.make_logger(_base_logger,
                                         'harvester_id={0}'.format(
                                             self.harvester_id),
                                         method_name='create_labels')

        if not self.__active:
            tmp_log.debug('APFMon reporting not enabled')
            return

        try:
            tmp_log.debug('start')

            url = '{0}/labels'.format(self.base_url)

            # get the active queues from the config mapper
            all_sites = self.queue_config_mapper.get_active_queues().keys()
            panda_queues_dict = PandaQueuesDict()

            # publish the active queues to APF mon in shards
            for sites in generic_utils.create_shards(all_sites, 20):
                labels = []
                for site in sites:
                    try:
                        site_info = panda_queues_dict.get(site, dict())
                        if not site_info:
                            tmp_log.warning(
                                'No site info for {0}'.format(site))
                            continue

                        # when no CEs associated to a queue, e.g. P1, HPCs, etc. Try to see if there is something
                        # in local configuration, otherwise set it to a dummy value
                        try:
                            ce = self.queue_config_mapper.queueConfig[
                                site].submitter['ceEndpoint']
                            queues = [{'ce_endpoint': ce}]
                        except KeyError:
                            if site_info['queues']:
                                queues = site_info['queues']
                            else:
                                queues = [{'ce_endpoint': NO_CE}]

                        for queue in queues:
                            try:
                                ce = clean_ce(queue['ce_endpoint'])
                            except:
                                ce = ''

                            try:
                                ce_queue_id = queue['ce_queue_id']
                            except KeyError:
                                ce_queue_id = 0

                            labels.append({
                                'name': '{0}-{1}'.format(site, ce),
                                'wmsqueue': site,
                                'ce_queue_id': ce_queue_id,
                                'factory': self.harvester_id
                            })
                    except:
                        tmp_log.error('Excepted for site {0} with: {1}'.format(
                            site, traceback.format_exc()))
                        continue

                payload = json.dumps(labels)

                r = requests.put(url,
                                 data=payload,
                                 timeout=self.__label_timeout)
                tmp_log.debug(
                    'label creation for {0} ended with {1} {2}'.format(
                        sites, r.status_code, r.text))

            end_time = time.time()
            tmp_log.debug('done (took {0})'.format(end_time - start_time))
        except:
            tmp_log.error('Excepted with: {0}'.format(traceback.format_exc()))