def get_jobdetail(self): if self.jobdetail_ is None: self.jobdetail_ = get_json( self.url_, f"/wmstatsserver/data/jobdetail/{self.name_}", use_cert=True) return self.jobdetail_
def get_workflow_parameters(self): """ Get the workflow parameters from ReqMgr2, or returns a cached value. See the `ReqMgr 2 wiki <https://github.com/dmwm/WMCore/wiki/reqmgr2-apis>`_ for more details. :returns: Parameters for the workflow from ReqMgr2. :rtype: dict """ try: result = get_json(self.url, '/reqmgr2/data/request', params={'name': self.workflow}, use_https=True, use_cert=True) for params in result['result']: for key, item in params.iteritems(): if key == self.workflow: return item except Exception as error: print 'Failed to get from reqmgr', self.workflow print str(error) return None
def explain_errors(workflow, errorcode): """ Get example errors for a given workflow and errorcode :param str workflow: is the workflow name :param str errorcode: is the error code :returns: a dict of log snippets from different sites. :rtype: list """ result = get_json('cmsweb.cern.ch', '/wmstatsserver/data/jobdetail/%s' % workflow, use_cert=True) output = [] if not result['results']: return output for stepdata in result['result'][0].get(workflow, {}).values(): for sitedata in stepdata.get('jobfailed', {}).get(errorcode, {}).values(): for samples in sitedata['samples'][0]['errors'].values(): output.extend(samples) return output
def get_workflow_parameters(self): """ Get the workflow parameters from ReqMgr2, or returns a cached value. See the `ReqMgr 2 wiki <https://github.com/dmwm/WMCore/wiki/reqmgr2-apis>`_ for more details. :returns: Parameters for the workflow from ReqMgr2. :rtype: dict """ try: result = get_json(self.url, '/reqmgr2/data/request', params={'name': self.workflow}, use_https=True, use_cert=True) for params in result['result']: for key, item in params.items(): if key == self.workflow: return item except Exception as error: print('Failed to get from reqmgr', self.workflow) print(str(error)) return None
def __init__(self, prepid, url="cmsweb.cern.ch"): self.name_ = prepid self.url_ = url result = get_json(self.url_, '/reqmgr2/data/request', params={'prep_id': self.name_, 'detail': 'true'}, use_cert=True) result = result.get('result', []) self.data_ = result[0] if result else {}
def _get_jobdetail(self): """ Get the jobdetail from the wmstatsserver :returns: The job detail json from the server or cache :rtype: dict """ return get_json(self.url, '/wmstatsserver/data/jobdetail/%s' % self.workflow, use_cert=True)
def get_jobdetail(self): """fetch job detail from wmstatsserver, containing error info, available for running workflows. :return: job details :rtype: dict """ if not self.jobdetail_: self.jobdetail_ = webtools.get_json( self.url_, f"/wmstatsserver/data/jobdetail/{self.name_}", use_cert=True) return self.jobdetail_
def open_statuses(location): if os.path.isfile(location): with open(location, 'r') as input_file: return json.load(input_file) components = urlparse.urlparse(location) cookie_stuff = serverconfig.config_dict()['data'] return get_json(components.netloc, components.path, use_https=True, cookie_file=cookie_stuff.get('cookie_file'), cookie_pem=cookie_stuff.get('cookie_pem'), cookie_key=cookie_stuff.get('cookie_key'))
def open_location(data_location): """ This function assumes that the contents of the location is in JSON format. It opens the data location and returns the dictionary. :param str data_location: The location of the file or url :returns: information in the JSON file :rtype: dict """ config_dict = serverconfig.config_dict() if 'oracle' in config_dict: oracle_db_conn = cx_Oracle.connect(*config_dict['oracle']) # pylint:disable=c-extension-no-member oracle_cursor = oracle_db_conn.cursor() oracle_cursor.execute( "SELECT NAME FROM CMS_UNIFIED_ADMIN.workflow WHERE lower(STATUS) LIKE '%manual%'" ) wkfs = [row for row, in oracle_cursor] oracle_db_conn.close() return errors_from_list(wkfs) raw = None if os.path.isfile(data_location): with open(data_location, 'r') as input_file: raw = json.load(input_file) elif validators.url(data_location): components = urlparse.urlparse(data_location) # Anything we need for the Shibboleth cookie could be in the config file cookie_stuff = config_dict['data'] raw = get_json(components.netloc, components.path, use_https=True, cookie_file=cookie_stuff.get('cookie_file'), cookie_pem=cookie_stuff.get('cookie_pem'), cookie_key=cookie_stuff.get('cookie_key')) if raw is None: return raw keys = raw.keys() if not (keys and isinstance(raw[keys[0]], list)): return raw return errors_from_list([ workflow for workflow, statuses in raw.iteritems() if True in ['manual' in status for status in statuses] ])
def _get_acdc_response(wfstr): """ debug """ from cmstoolbox.webtools import get_json response = get_json( 'cmsweb.cern.ch', '/couchdb/acdcserver/_design/ACDC/_view/byCollectionName', {'key': '"{0}"'.format( wfstr), 'include_docs': 'true', 'reduce': 'false'}, use_cert=True ) return response
def get_reqdetail(self): if self.reqdetail_ is None: reqDetail = {self.name_: dict()} raw = get_json(self.url_, f'/wmstatsserver/data/request/{self.name_}', use_cert=True) result = raw.get('result', None) if result is None: return reqDetail reqDetail[self.name_] = result[0].get(self.name_, {}) self.reqdetail_ = reqDetail return self.reqdetail_
def open_location(data_location): """ This function assumes that the contents of the location is in JSON format. It opens the data location and returns the dictionary. :param str data_location: The location of the file or url :returns: information in the JSON file :rtype: dict """ config_dict = serverconfig.config_dict() if 'oracle' in config_dict: oracle_db_conn = cx_Oracle.connect(*config_dict['oracle']) # pylint:disable=c-extension-no-member oracle_cursor = oracle_db_conn.cursor() oracle_cursor.execute( "SELECT NAME FROM CMS_UNIFIED_ADMIN.workflow WHERE lower(STATUS) LIKE '%manual%'") wkfs = [row for row, in oracle_cursor] oracle_db_conn.close() return errors_from_list(wkfs) raw = None if os.path.isfile(data_location): with open(data_location, 'r') as input_file: raw = json.load(input_file) elif validators.url(data_location): components = urlparse.urlparse(data_location) # Anything we need for the Shibboleth cookie could be in the config file cookie_stuff = config_dict['data'] raw = get_json(components.netloc, components.path, use_https=True, cookie_file=cookie_stuff.get('cookie_file'), cookie_pem=cookie_stuff.get('cookie_pem'), cookie_key=cookie_stuff.get('cookie_key')) if raw is None: return raw keys = raw.keys() if not (keys and isinstance(raw[keys[0]], list)): return raw return errors_from_list([ workflow for workflow, statuses in raw.iteritems() if True in ['manual' in status for status in statuses] ])
def get_recovery_info(self): """ Get the recovery info for this workflow. :returns: a dictionary containing the information used in recovery. The keys in this dictionary are arranged like the following:: { task: { 'sites_to_run': list(sites), 'missing_to_run': int() } } :rtype: dict """ recovery_info = {} docs = get_json( self.url, '/couchdb/acdcserver/_design/ACDC/_view/byCollectionName', params={ 'key': '"%s"' % self.workflow, 'include_docs': 'true', 'reduce': 'false' }, use_cert=True) recovery_docs = [row['doc'] for row in docs.get('rows', [])] site_white_list = set(self.get_workflow_parameters()['SiteWhitelist']) for doc in recovery_docs: task = doc['fileset_name'] # For each task, we have the following keys: # sites - a set of sites that the recovery docs say to run on. for replica, info in doc['files'].iteritems(): # For fake files, just return the site whitelist if replica.startswith('MCFakeFile'): locations = site_white_list else: locations = set(info['locations']) vals = recovery_info.get(task, {}) if not vals: recovery_info[task] = {} recovery_info[task]['sites_to_run'] = \ list(set(vals.get('sites_to_run', set())) | locations) recovery_info[task]['missing_to_run'] = \ (vals.get('missing_to_run', 0) + info['events']) return recovery_info
def get_requests(self): """ :returns: The requests for the Prep ID from ReqMgr2 API :rtype: dict """ if self.prep_id == 'NoPrepID': return None result = get_json(self.url, '/reqmgr2/data/request', params={'prep_id': self.prep_id, 'detail': 'true'}, use_cert=True) if not result['result']: return None return result['result'][0]
def list_workflows(status): """ Get the list of workflows currently in a given status. For a list of valid requests, visit the `Request Manager Interface <https://cmsweb.cern.ch/reqmgr2/>`_. :param str status: The status of the workflow lists being looked for :returns: A list of workflows matching the status :rtype: list """ request = get_json('cmsweb.cern.ch', '/reqmgr2/data/request', params={'status': status, 'detail': 'false'}, use_cert=True) return request['result']
def get_recovery_info(self): """ Get the recovery info for this workflow. :returns: a dictionary containing the information used in recovery. The keys in this dictionary are arranged like the following:: { task: { 'sites_to_run': list(sites), 'missing_to_run': int() } } :rtype: dict """ recovery_info = {} docs = get_json(self.url, '/couchdb/acdcserver/_design/ACDC/_view/byCollectionName', params={'key': '"%s"' % self.workflow, 'include_docs': 'true', 'reduce': 'false'}, use_cert=True) recovery_docs = [row['doc'] for row in docs.get('rows', [])] site_white_list = set(self.get_workflow_parameters()['SiteWhitelist']) for doc in recovery_docs: task = doc['fileset_name'] # For each task, we have the following keys: # sites - a set of sites that the recovery docs say to run on. for replica, info in doc['files'].iteritems(): # For fake files, just return the site whitelist if replica.startswith('MCFakeFile'): locations = site_white_list else: locations = set(info['locations']) vals = recovery_info.get(task, {}) if not vals: recovery_info[task] = {} recovery_info[task]['sites_to_run'] = \ list(set(vals.get('sites_to_run', set())) | locations) recovery_info[task]['missing_to_run'] = \ (vals.get('missing_to_run', 0) + info['events']) return recovery_info
def get_errors(self): output = {} jobdetail = self.get_jobdetail() if jobdetail.get('result', None): for step, stepdata in jobdetail['result'][0].get(self.name_, {}).items(): errors = {} for code, codedata in stepdata.get('jobfailed', {}).items(): sites = {} for site, sitedata in codedata.items(): if sitedata['errorCount']: sites[site] = sitedata['errorCount'] if sites: errors[code] = sites if errors: output[step] = errors acdc_server_response = get_json( self.url_, '/couchdb/acdcserver/_design/ACDC/_view/byCollectionName', { 'key': f'"{self.name_}"', 'include_docs': 'true', 'reduce': 'false' }, use_cert=True) for row in acdc_server_response.get('rows', []): task = row['doc']['fileset_name'] new_output = output.get(task, {}) new_errorcode = new_output.get('NotReported', {}) for file_replica in row['doc']['files'].values(): for site in file_replica['locations']: new_errorcode[site] = 0 new_output['NotReported'] = new_errorcode output[task] = new_output for step in list(output): if True in [(steptype in step) for steptype in ['LogCollect', 'Cleanup']]: output.pop(step) return output
def get_reqparams(self): """fetch workflow parameters from reqmgr2 example: https://cmsweb.cern.ch/reqmgr2/data/request?name=pdmvserv_task_B2G-RunIIFall17wmLHEGS-00287__v1_T_180427_163824_4799 :return: workflow parameters :rtype: dict """ if not self.reqparams_: result = webtools.get_json(self.url_, '/reqmgr2/data/request', params={'name': self.name_}, use_https=True, use_cert=True) for params in result['result']: for key, item in params.items(): if key == self.name_: self.reqparams_ = item return self.reqparams_
def _get_reqdetail(self): """ Get the request detail from the wmstatsserver :returns: The request detail json from the server or cache :rtype: dict """ reqDetail = {self.workflow : {}} raw = get_json(self.url, '/wmstatsserver/data/request/%s' % self.workflow, use_cert=True) result = raw.get('result', []) if not result: return reqDetail reqDetail[self.workflow] = result[0].get(self.workflow, {}) return reqDetail
def get_errors(self, get_unreported=False): """ A wrapper for :py:func:`errors_for_workflow` if you happen to have a :py:class:`WorkflowInfo` object already. :param bool get_unreported: Get the unreported errors from ACDC server :returns: a dictionary containing error codes in the following format:: {step: {errorcode: {site: number_errors}}} :rtype: dict """ output = errors_for_workflow(self.workflow, self.url) if get_unreported: acdc_server_response = get_json( 'cmsweb.cern.ch', '/couchdb/acdcserver/_design/ACDC/_view/byCollectionName', { 'key': '"%s"' % self.workflow, 'include_docs': 'true', 'reduce': 'false' }, use_cert=True) for row in acdc_server_response['rows']: task = row['doc']['fileset_name'] new_output = output.get(task, {}) new_errorcode = new_output.get('NotReported', {}) for file_replica in row['doc']['files'].values(): for site in file_replica['locations']: new_errorcode[site] = 0 new_output['NotReported'] = new_errorcode output[task] = new_output for step in list(output): if True in [(steptype in step) for steptype in ['LogCollect', 'Cleanup']]: output.pop(step) return output
def get_reqdetail(self): """fetch request details from wmstatsserver, available for running workflows. :return: request detail :rtype: dict """ if not self.reqdetail_: reqDetail = {self.name_: dict()} raw = webtools.get_json( self.url_, f'/wmstatsserver/data/request/{self.name_}', use_cert=True) result = raw.get('result', None) if result is None: return reqDetail reqDetail[self.name_] = result[0].get(self.name_, {}) self.reqdetail_ = reqDetail return self.reqdetail_
def get_files(site, dataset): """ Get the list of file replicas at a site for a given dataset. This is done via the PhEDEx ``filereplicas`` API. :param str site: The name of the site to check :param str dataset: The name of the dataset to check :returns: A list of files at the site for a given dataset :rtype: list """ phedex_response = get_json('cmsweb.cern.ch', '/phedex/datasvc/json/prod/filereplicas', { 'node': site, 'dataset': dataset }, use_https=True) return [ fileinfo['name'] for block in phedex_response['phedex']['block'] for fileinfo in block['file'] ]
def get_errors(self, get_unreported=False): """ A wrapper for :py:func:`errors_for_workflow` if you happen to have a :py:class:`WorkflowInfo` object already. :param bool get_unreported: Get the unreported errors from ACDC server :returns: a dictionary containing error codes in the following format:: {step: {errorcode: {site: number_errors}}} :rtype: dict """ output = errors_for_workflow(self.workflow, self.url) if get_unreported: acdc_server_response = get_json( 'cmsweb.cern.ch', '/couchdb/acdcserver/_design/ACDC/_view/byCollectionName', {'key': '"%s"' % self.workflow, 'include_docs': 'true', 'reduce': 'false'}, use_cert=True) for row in acdc_server_response['rows']: task = row['doc']['fileset_name'] new_output = output.get(task, {}) new_errorcode = new_output.get('NotReported', {}) for file_replica in row['doc']['files'].values(): for site in file_replica['locations']: new_errorcode[site] = 0 new_output['NotReported'] = new_errorcode output[task] = new_output for step in list(output): if True in [(steptype in step) for steptype in ['LogCollect', 'Cleanup']]: output.pop(step) return output
def errors_for_workflow(workflow, url='cmsweb.cern.ch'): """ Get the useful status information from a workflow :param str workflow: the name of the workflow request :param str url: the base url to find the information at :returns: a dictionary containing error codes in the following format:: {step: {errorcode: {site: number_errors}}} :rtype: dict """ result = get_json(url, '/wmstatsserver/data/jobdetail/%s' % workflow, use_cert=True) output = {} if not result['result']: return output for step, stepdata in result['result'][0].get(workflow, {}).iteritems(): errors = {} for code, codedata in stepdata.get('jobfailed', {}).iteritems(): sites = {} for site, sitedata in codedata.iteritems(): if sitedata['errorCount']: sites[site] = sitedata['errorCount'] if sites: errors[code] = sites if errors: output[step] = errors return output
def errors_for_workflow(workflow, url='cmsweb.cern.ch'): """ Get the useful status information from a workflow :param str workflow: the name of the workflow request :param str url: the base url to find the information at :returns: a dictionary containing error codes in the following format:: {step: {errorcode: {site: number_errors}}} :rtype: dict """ result = get_json(url, '/wmstatsserver/data/jobdetail/%s' % workflow, use_cert=True) output = {} if not result['result']: return output for step, stepdata in result['result'][0].get(workflow, {}).items(): errors = {} for code, codedata in stepdata.get('jobfailed', {}).items(): sites = {} for site, sitedata in codedata.items(): if sitedata['errorCount']: sites[site] = sitedata['errorCount'] if sites: errors[code] = sites if errors: output[step] = errors return output
def deletion_requests(site): """ Get a list of datasets with approved deletion requests at a given site that were created within the number of days matching the **IgnoreAge** configuration parameter. This request is done via the PhEDEx ``deleterequests`` API. :param str site: The site that we want the list of deletion requests for. :returns: Datasets that are in deletion requests :rtype: set """ created_since = int(time.time() - float(config.config_dict().get('IgnoreAge', 0)) * 24 * 3600) # Get deletion requests in PhEDEx deletion_request = get_json('cmsweb.cern.ch', '/phedex/datasvc/json/prod/deleterequests', { 'node': site, 'approval': 'approved', 'create_since': created_since }, use_https=True) # PhEDEx APIs are ridiculous # Here I get the dataset names of approved deletion requests in a single list datasets_for_deletion = set([ block['name'].split('#')[0] for request in deletion_request['phedex']['request'] for block in request['data']['dbs']['block'] ] + [ dataset['name'] for request in deletion_request['phedex']['request'] for dataset in request['data']['dbs']['dataset'] ]) if deletion_request else set() return datasets_for_deletion