def get_new_history_entry(self, req_dict, depth=0): """ Form a new history entry dictionary for given request. """ output_datasets = req_dict.get('OutputDatasets', []) output_datasets_set = set(output_datasets) if len(output_datasets) == 0: return None history_entry = {'Time': int(time.time()), 'Datasets': {}} dataset_list_url = '/dbs/prod/global/DBSReader/datasetlist' dbs_dataset_list = make_cmsweb_request(dataset_list_url, { 'dataset': output_datasets, 'detail': 1 }) for dbs_dataset in dbs_dataset_list: dataset_name = dbs_dataset['dataset'] history_entry['Datasets'][dataset_name] = { 'Type': dbs_dataset['dataset_access_type'], 'Events': self.get_event_count_from_dbs(dataset_name) } output_datasets_set.remove(dataset_name) for dataset in output_datasets_set: history_entry['Datasets'][dataset] = {'Type': 'NONE', 'Events': 0} if len(history_entry['Datasets']) != len(output_datasets): self.logger.error( 'Wrong number of datasets for %s, returning None' % (req_dict['_id'])) return None return history_entry
def get_new_dict_from_reqmgr2(self, request_name): """ Get request dictionary from RequestManager. """ url = '/couchdb/reqmgr_workload_cache/%s' % (request_name) req_dict = make_cmsweb_request(url) expected_events = self.get_expected_events_with_dict(req_dict) campaigns = self.get_campaigns_from_request(req_dict) req_dict = pick_attributes(req_dict, [ 'AcquisitionEra', 'InputDataset', 'Memory', 'OutputDatasets', 'PrepID', 'RequestName', 'RequestPriority', 'RequestTransition', 'RequestType', 'SizePerEvent', 'TimePerEvent' ]) req_dict['RequestTransition'] = [{ 'Status': tr['Status'], 'UpdateTime': tr['UpdateTime'] } for tr in req_dict.get('RequestTransition', [])] req_dict['_id'] = request_name req_dict['TotalEvents'] = expected_events req_dict['Campaigns'] = campaigns req_dict['OutputDatasets'] = self.sort_datasets( req_dict['OutputDatasets']) req_dict['EventNumberHistory'] = [] req_dict['RequestPriority'] = int(req_dict.get('RequestPriority', 0)) return req_dict
def get_event_count_from_dbs(self, dataset_name): """ Get event count for specified dataset from DBS. """ query_url = '/dbs/prod/global/DBSReader/filesummaries?dataset=%s' % ( dataset_name) filesummaries = make_cmsweb_request(query_url) if len(filesummaries) == 0: return 0 return int(filesummaries[0]['num_event'])
def get_updated_dataset_list_from_dbs(self, since_timestamp=0): """ Get list of datasets that changed since last update. """ url = '/dbs/prod/global/DBSReader/datasets?min_ldate=%d&dataset_access_type=*' % ( since_timestamp) self.logger.info( 'Getting the list of modified datasets since %d from %s' % (since_timestamp, url)) dataset_list = make_cmsweb_request(url) dataset_list = [dataset['dataset'] for dataset in dataset_list] self.logger.info('Got %d datasets' % (len(dataset_list))) return dataset_list
def get_expected_events_with_dict(self, req_dict): """ Get number of expected events of a request. """ if 'FilterEfficiency' in req_dict: f = float(req_dict['FilterEfficiency']) elif 'Task1' in req_dict and 'FilterEfficiency' in req_dict['Task1']: f = float(req_dict['Task1']['FilterEfficiency']) elif 'Step1' in req_dict and 'FilterEfficiency' in req_dict['Step1']: f = float(req_dict['Step1']['FilterEfficiency']) else: f = 1. req_type = req_dict.get('RequestType', '').lower() if req_type != 'resubmission': if req_dict.get('TotalInputFiles', 0) > 0: if 'TotalInputEvents' in req_dict: return int(f * req_dict['TotalInputEvents']) if 'RequestNumEvents' in req_dict and req_dict[ 'RequestNumEvents'] is not None: return int(req_dict['RequestNumEvents']) elif 'Task1' in req_dict and 'RequestNumEvents' in req_dict[ 'Task1']: return int(req_dict['Task1']['RequestNumEvents']) elif 'Step1' in req_dict and 'RequestNumEvents' in req_dict[ 'Step1']: return int(req_dict['Step1']['RequestNumEvents']) elif 'Task1' in req_dict and 'InputDataset' in req_dict['Task1']: return self.get_event_count_from_dbs( req_dict['Task1']['InputDataset']) elif 'Step1' in req_dict and 'InputDataset' in req_dict['Step1']: return self.get_event_count_from_dbs( req_dict['Step1']['InputDataset']) else: prep_id = req_dict['PrepID'] url = '/reqmgr2/data/request?mask=TotalInputEvents&mask=RequestType&prep_id=%s' % ( prep_id) ret = make_cmsweb_request(url) ret = ret['result'] if len(ret) > 0: ret = ret[0] for r in ret: if ret[r]['RequestType'].lower() != 'resubmission' and ret[ r]['TotalInputEvents'] is not None: return int(f * ret[r]['TotalInputEvents']) self.logger.error('%s does not have total events!' % (req_dict['_id'])) return -1
def get_active_requests_from_wmstats(self): """ Get list of requests which are currently putting data to DBS. """ self.logger.info( 'Will get list of requests which are currently putting data to DBS' ) url = '/wmstatsserver/data/filtered_requests?mask=RequestName' request_list = make_cmsweb_request(url).get('result', []) request_list = [request['RequestName'] for request in request_list] self.logger.info( 'Found %d requests which are currently putting data to DBS' % (len(request_list))) return request_list
def get_new_dict_from_reqmgr2(self, workflow_name): """ Get workflow dictionary from RequestManager. """ url = f'/couchdb/reqmgr_workload_cache/{workflow_name}' wf_dict = make_cmsweb_request(url) expected_events = self.get_expected_events_with_dict(wf_dict) campaigns = self.get_campaigns_from_workflow(wf_dict) requests = self.get_requests_from_workflow(wf_dict) attributes = ['AcquisitionEra', 'CMSSWVersion', 'InputDataset', 'OutputDatasets', 'PrepID', 'ProcessingString', 'RequestName', 'RequestPriority', 'RequestTransition', 'RequestType', 'SizePerEvent', 'TimePerEvent'] if 'Task1' in wf_dict and 'InputDataset' in wf_dict['Task1']: wf_dict['InputDataset'] = wf_dict['Task1']['InputDataset'] elif 'Step1' in wf_dict and 'InputDataset' in wf_dict['Step1']: wf_dict['InputDataset'] = wf_dict['Step1']['InputDataset'] if 'Task1' in wf_dict and 'ProcessingString' in wf_dict['Task1']: wf_dict['ProcessingString'] = wf_dict['Task1']['ProcessingString'] elif 'Step1' in wf_dict and 'ProcessingString' in wf_dict['Step1']: wf_dict['ProcessingString'] = wf_dict['Step1']['ProcessingString'] wf_dict = pick_attributes(wf_dict, attributes) wf_dict['RequestTransition'] = [{'Status': tr['Status'], 'UpdateTime': tr['UpdateTime']} for tr in wf_dict.get('RequestTransition', [])] wf_dict['_id'] = workflow_name wf_dict['TotalEvents'] = expected_events wf_dict['Campaigns'] = campaigns wf_dict['Requests'] = requests wf_dict['OutputDatasets'] = self.sort_datasets(self.flat_list(wf_dict['OutputDatasets'])) wf_dict['EventNumberHistory'] = [] wf_dict['RequestPriority'] = int(wf_dict.get('RequestPriority', 0)) if 'ProcessingString' in wf_dict and not isinstance(wf_dict['ProcessingString'], str): del wf_dict['ProcessingString'] if 'PrepID' in wf_dict and wf_dict['PrepID'] is None: del wf_dict['PrepID'] return wf_dict
def get_expected_events_with_dict(self, wf_dict): """ Get number of expected events of a workflow. """ if 'FilterEfficiency' in wf_dict: filter_eff = float(wf_dict['FilterEfficiency']) elif 'Task1' in wf_dict and 'FilterEfficiency' in wf_dict['Task1']: filter_eff = float(wf_dict['Task1']['FilterEfficiency']) elif 'Step1' in wf_dict and 'FilterEfficiency' in wf_dict['Step1']: filter_eff = float(wf_dict['Step1']['FilterEfficiency']) else: filter_eff = 1. wf_type = wf_dict.get('RequestType', '').lower() if wf_type != 'resubmission': if wf_dict.get('TotalInputFiles', 0) > 0: if 'TotalInputEvents' in wf_dict: return int(filter_eff * wf_dict['TotalInputEvents']) if 'RequestNumEvents' in wf_dict and wf_dict['RequestNumEvents'] is not None: return int(wf_dict['RequestNumEvents']) if 'Task1' in wf_dict and 'RequestNumEvents' in wf_dict['Task1']: return int(wf_dict['Task1']['RequestNumEvents']) if 'Step1' in wf_dict and 'RequestNumEvents' in wf_dict['Step1']: return int(wf_dict['Step1']['RequestNumEvents']) if 'Task1' in wf_dict and 'InputDataset' in wf_dict['Task1']: return self.get_event_count_from_dbs(wf_dict['Task1']['InputDataset']) if 'Step1' in wf_dict and 'InputDataset' in wf_dict['Step1']: return self.get_event_count_from_dbs(wf_dict['Step1']['InputDataset']) else: prep_id = wf_dict['PrepID'] url = f'/reqmgr2/data/request?mask=TotalInputEvents&mask=RequestType&prep_id={prep_id}' ret = make_cmsweb_request(url) ret = ret['result'] if ret: ret = ret[0] for request_name in ret: if ret[request_name]['RequestType'].lower() != 'resubmission' and ret[request_name]['TotalInputEvents'] is not None: return int(filter_eff * ret[request_name]['TotalInputEvents']) self.logger.error('%s does not have total events!', wf_dict['_id']) return -1
def get_list_of_changed_workflows(self): """ Get list of workflows that changed in RequestManager since last update. """ last_seq = self.database.get_setting('last_reqmgr_sequence', 0) url = f'/couchdb/reqmgr_workload_cache/_changes?since={last_seq}' self.logger.info('Getting the list of all workflows since %d from %s', last_seq, url) response = make_cmsweb_request(url) last_seq = int(response['last_seq']) wf_list = response['results'] changed_wf_list = list(filter(lambda x: not x.get('deleted', False), wf_list)) changed_wf_list = [wf['id'] for wf in changed_wf_list] changed_wf_list = list(filter(lambda x: '_design' not in x, changed_wf_list)) deleted_wf_list = list(filter(lambda x: x.get('deleted', False), wf_list)) deleted_wf_list = [wf['id'] for wf in deleted_wf_list] deleted_wf_list = list(filter(lambda x: '_design' not in x, deleted_wf_list)) self.logger.info('Got %d updated workflows. Got %d deleted workflows.', len(changed_wf_list), len(deleted_wf_list)) return changed_wf_list, deleted_wf_list, last_seq
def get_active_workflows_from_wmstats(self): """ Get list of workflows which are currently putting data to DBS. """ self.logger.info('Will get list of workflows which are currently putting data to DBS') url = '/wmstatsserver/data/filtered_requests?mask=RequestName' try: workflow_list = make_cmsweb_request(url, timeout=600, keep_open=False) except AttributeError as ae: self.logger.error(ae) workflow_list = None if workflow_list is None: self.logger.error('Could not get list of workflows from wmstats') return [] workflow_list = workflow_list.get('result', []) workflow_list = [workflow['RequestName'] for workflow in workflow_list] self.logger.info('Found %d workflows which are currently putting data to DBS', len(workflow_list)) return workflow_list