Beispiel #1
0
    def get_new_history_entry(self, req_dict, depth=0):
        """
        Form a new history entry dictionary for given request.
        """
        output_datasets = req_dict.get('OutputDatasets', [])
        output_datasets_set = set(output_datasets)
        if len(output_datasets) == 0:
            return None

        history_entry = {'Time': int(time.time()), 'Datasets': {}}
        dataset_list_url = '/dbs/prod/global/DBSReader/datasetlist'
        dbs_dataset_list = make_cmsweb_request(dataset_list_url, {
            'dataset': output_datasets,
            'detail': 1
        })
        for dbs_dataset in dbs_dataset_list:
            dataset_name = dbs_dataset['dataset']
            history_entry['Datasets'][dataset_name] = {
                'Type': dbs_dataset['dataset_access_type'],
                'Events': self.get_event_count_from_dbs(dataset_name)
            }
            output_datasets_set.remove(dataset_name)

        for dataset in output_datasets_set:
            history_entry['Datasets'][dataset] = {'Type': 'NONE', 'Events': 0}

        if len(history_entry['Datasets']) != len(output_datasets):
            self.logger.error(
                'Wrong number of datasets for %s, returning None' %
                (req_dict['_id']))
            return None

        return history_entry
Beispiel #2
0
 def get_new_dict_from_reqmgr2(self, request_name):
     """
     Get request dictionary from RequestManager.
     """
     url = '/couchdb/reqmgr_workload_cache/%s' % (request_name)
     req_dict = make_cmsweb_request(url)
     expected_events = self.get_expected_events_with_dict(req_dict)
     campaigns = self.get_campaigns_from_request(req_dict)
     req_dict = pick_attributes(req_dict, [
         'AcquisitionEra', 'InputDataset', 'Memory', 'OutputDatasets',
         'PrepID', 'RequestName', 'RequestPriority', 'RequestTransition',
         'RequestType', 'SizePerEvent', 'TimePerEvent'
     ])
     req_dict['RequestTransition'] = [{
         'Status': tr['Status'],
         'UpdateTime': tr['UpdateTime']
     } for tr in req_dict.get('RequestTransition', [])]
     req_dict['_id'] = request_name
     req_dict['TotalEvents'] = expected_events
     req_dict['Campaigns'] = campaigns
     req_dict['OutputDatasets'] = self.sort_datasets(
         req_dict['OutputDatasets'])
     req_dict['EventNumberHistory'] = []
     req_dict['RequestPriority'] = int(req_dict.get('RequestPriority', 0))
     return req_dict
Beispiel #3
0
    def get_event_count_from_dbs(self, dataset_name):
        """
        Get event count for specified dataset from DBS.
        """
        query_url = '/dbs/prod/global/DBSReader/filesummaries?dataset=%s' % (
            dataset_name)
        filesummaries = make_cmsweb_request(query_url)
        if len(filesummaries) == 0:
            return 0

        return int(filesummaries[0]['num_event'])
Beispiel #4
0
 def get_updated_dataset_list_from_dbs(self, since_timestamp=0):
     """
     Get list of datasets that changed since last update.
     """
     url = '/dbs/prod/global/DBSReader/datasets?min_ldate=%d&dataset_access_type=*' % (
         since_timestamp)
     self.logger.info(
         'Getting the list of modified datasets since %d from %s' %
         (since_timestamp, url))
     dataset_list = make_cmsweb_request(url)
     dataset_list = [dataset['dataset'] for dataset in dataset_list]
     self.logger.info('Got %d datasets' % (len(dataset_list)))
     return dataset_list
Beispiel #5
0
    def get_expected_events_with_dict(self, req_dict):
        """
        Get number of expected events of a request.
        """
        if 'FilterEfficiency' in req_dict:
            f = float(req_dict['FilterEfficiency'])
        elif 'Task1' in req_dict and 'FilterEfficiency' in req_dict['Task1']:
            f = float(req_dict['Task1']['FilterEfficiency'])
        elif 'Step1' in req_dict and 'FilterEfficiency' in req_dict['Step1']:
            f = float(req_dict['Step1']['FilterEfficiency'])
        else:
            f = 1.

        req_type = req_dict.get('RequestType', '').lower()
        if req_type != 'resubmission':
            if req_dict.get('TotalInputFiles', 0) > 0:
                if 'TotalInputEvents' in req_dict:
                    return int(f * req_dict['TotalInputEvents'])

            if 'RequestNumEvents' in req_dict and req_dict[
                    'RequestNumEvents'] is not None:
                return int(req_dict['RequestNumEvents'])
            elif 'Task1' in req_dict and 'RequestNumEvents' in req_dict[
                    'Task1']:
                return int(req_dict['Task1']['RequestNumEvents'])
            elif 'Step1' in req_dict and 'RequestNumEvents' in req_dict[
                    'Step1']:
                return int(req_dict['Step1']['RequestNumEvents'])
            elif 'Task1' in req_dict and 'InputDataset' in req_dict['Task1']:
                return self.get_event_count_from_dbs(
                    req_dict['Task1']['InputDataset'])
            elif 'Step1' in req_dict and 'InputDataset' in req_dict['Step1']:
                return self.get_event_count_from_dbs(
                    req_dict['Step1']['InputDataset'])

        else:
            prep_id = req_dict['PrepID']
            url = '/reqmgr2/data/request?mask=TotalInputEvents&mask=RequestType&prep_id=%s' % (
                prep_id)
            ret = make_cmsweb_request(url)
            ret = ret['result']
            if len(ret) > 0:
                ret = ret[0]
                for r in ret:
                    if ret[r]['RequestType'].lower() != 'resubmission' and ret[
                            r]['TotalInputEvents'] is not None:
                        return int(f * ret[r]['TotalInputEvents'])

        self.logger.error('%s does not have total events!' % (req_dict['_id']))
        return -1
Beispiel #6
0
    def get_active_requests_from_wmstats(self):
        """
        Get list of requests which are currently putting data to DBS.
        """
        self.logger.info(
            'Will get list of requests which are currently putting data to DBS'
        )
        url = '/wmstatsserver/data/filtered_requests?mask=RequestName'
        request_list = make_cmsweb_request(url).get('result', [])
        request_list = [request['RequestName'] for request in request_list]

        self.logger.info(
            'Found %d requests which are currently putting data to DBS' %
            (len(request_list)))
        return request_list
Beispiel #7
0
    def get_new_dict_from_reqmgr2(self, workflow_name):
        """
        Get workflow dictionary from RequestManager.
        """
        url = f'/couchdb/reqmgr_workload_cache/{workflow_name}'
        wf_dict = make_cmsweb_request(url)
        expected_events = self.get_expected_events_with_dict(wf_dict)
        campaigns = self.get_campaigns_from_workflow(wf_dict)
        requests = self.get_requests_from_workflow(wf_dict)
        attributes = ['AcquisitionEra',
                      'CMSSWVersion',
                      'InputDataset',
                      'OutputDatasets',
                      'PrepID',
                      'ProcessingString',
                      'RequestName',
                      'RequestPriority',
                      'RequestTransition',
                      'RequestType',
                      'SizePerEvent',
                      'TimePerEvent']
        if 'Task1' in wf_dict and 'InputDataset' in wf_dict['Task1']:
            wf_dict['InputDataset'] = wf_dict['Task1']['InputDataset']
        elif 'Step1' in wf_dict and 'InputDataset' in wf_dict['Step1']:
            wf_dict['InputDataset'] = wf_dict['Step1']['InputDataset']

        if 'Task1' in wf_dict and 'ProcessingString' in wf_dict['Task1']:
            wf_dict['ProcessingString'] = wf_dict['Task1']['ProcessingString']
        elif 'Step1' in wf_dict and 'ProcessingString' in wf_dict['Step1']:
            wf_dict['ProcessingString'] = wf_dict['Step1']['ProcessingString']

        wf_dict = pick_attributes(wf_dict, attributes)
        wf_dict['RequestTransition'] = [{'Status': tr['Status'],
                                         'UpdateTime': tr['UpdateTime']} for tr in wf_dict.get('RequestTransition', [])]
        wf_dict['_id'] = workflow_name
        wf_dict['TotalEvents'] = expected_events
        wf_dict['Campaigns'] = campaigns
        wf_dict['Requests'] = requests
        wf_dict['OutputDatasets'] = self.sort_datasets(self.flat_list(wf_dict['OutputDatasets']))
        wf_dict['EventNumberHistory'] = []
        wf_dict['RequestPriority'] = int(wf_dict.get('RequestPriority', 0))
        if 'ProcessingString' in wf_dict and not isinstance(wf_dict['ProcessingString'], str):
            del wf_dict['ProcessingString']

        if 'PrepID' in wf_dict and wf_dict['PrepID'] is None:
            del wf_dict['PrepID']

        return wf_dict
Beispiel #8
0
    def get_expected_events_with_dict(self, wf_dict):
        """
        Get number of expected events of a workflow.
        """
        if 'FilterEfficiency' in wf_dict:
            filter_eff = float(wf_dict['FilterEfficiency'])
        elif 'Task1' in wf_dict and 'FilterEfficiency' in wf_dict['Task1']:
            filter_eff = float(wf_dict['Task1']['FilterEfficiency'])
        elif 'Step1' in wf_dict and 'FilterEfficiency' in wf_dict['Step1']:
            filter_eff = float(wf_dict['Step1']['FilterEfficiency'])
        else:
            filter_eff = 1.

        wf_type = wf_dict.get('RequestType', '').lower()
        if wf_type != 'resubmission':
            if wf_dict.get('TotalInputFiles', 0) > 0:
                if 'TotalInputEvents' in wf_dict:
                    return int(filter_eff * wf_dict['TotalInputEvents'])

            if 'RequestNumEvents' in wf_dict and wf_dict['RequestNumEvents'] is not None:
                return int(wf_dict['RequestNumEvents'])

            if 'Task1' in wf_dict and 'RequestNumEvents' in wf_dict['Task1']:
                return int(wf_dict['Task1']['RequestNumEvents'])

            if 'Step1' in wf_dict and 'RequestNumEvents' in wf_dict['Step1']:
                return int(wf_dict['Step1']['RequestNumEvents'])

            if 'Task1' in wf_dict and 'InputDataset' in wf_dict['Task1']:
                return self.get_event_count_from_dbs(wf_dict['Task1']['InputDataset'])

            if 'Step1' in wf_dict and 'InputDataset' in wf_dict['Step1']:
                return self.get_event_count_from_dbs(wf_dict['Step1']['InputDataset'])

        else:
            prep_id = wf_dict['PrepID']
            url = f'/reqmgr2/data/request?mask=TotalInputEvents&mask=RequestType&prep_id={prep_id}'
            ret = make_cmsweb_request(url)
            ret = ret['result']
            if ret:
                ret = ret[0]
                for request_name in ret:
                    if ret[request_name]['RequestType'].lower() != 'resubmission' and ret[request_name]['TotalInputEvents'] is not None:
                        return int(filter_eff * ret[request_name]['TotalInputEvents'])

        self.logger.error('%s does not have total events!', wf_dict['_id'])
        return -1
Beispiel #9
0
 def get_list_of_changed_workflows(self):
     """
     Get list of workflows that changed in RequestManager since last update.
     """
     last_seq = self.database.get_setting('last_reqmgr_sequence', 0)
     url = f'/couchdb/reqmgr_workload_cache/_changes?since={last_seq}'
     self.logger.info('Getting the list of all workflows since %d from %s', last_seq, url)
     response = make_cmsweb_request(url)
     last_seq = int(response['last_seq'])
     wf_list = response['results']
     changed_wf_list = list(filter(lambda x: not x.get('deleted', False), wf_list))
     changed_wf_list = [wf['id'] for wf in changed_wf_list]
     changed_wf_list = list(filter(lambda x: '_design' not in x, changed_wf_list))
     deleted_wf_list = list(filter(lambda x: x.get('deleted', False), wf_list))
     deleted_wf_list = [wf['id'] for wf in deleted_wf_list]
     deleted_wf_list = list(filter(lambda x: '_design' not in x, deleted_wf_list))
     self.logger.info('Got %d updated workflows. Got %d deleted workflows.',
                      len(changed_wf_list),
                      len(deleted_wf_list))
     return changed_wf_list, deleted_wf_list, last_seq
Beispiel #10
0
    def get_active_workflows_from_wmstats(self):
        """
        Get list of workflows which are currently putting data to DBS.
        """
        self.logger.info('Will get list of workflows which are currently putting data to DBS')
        url = '/wmstatsserver/data/filtered_requests?mask=RequestName'
        try:
            workflow_list = make_cmsweb_request(url, timeout=600, keep_open=False)
        except AttributeError as ae:
            self.logger.error(ae)
            workflow_list = None

        if workflow_list is None:
            self.logger.error('Could not get list of workflows from wmstats')
            return []

        workflow_list = workflow_list.get('result', [])
        workflow_list = [workflow['RequestName'] for workflow in workflow_list]

        self.logger.info('Found %d workflows which are currently putting data to DBS',
                         len(workflow_list))
        return workflow_list