Beispiel #1
0
def agentsSites(url):
    "Return list of sites known in CMS WMAgents"
    sites_ready_in_agent = set()
    headers = {'Accept': 'application/json'}
    params = {}
    mgr = RequestHandler()
    res = mgr.getdata(url,
                      params=params,
                      headers=headers,
                      ckey=ckey(),
                      cert=cert())
    data = json.loads(res)
    agents = {}
    for r in [i['value'] for i in data['rows']]:
        team = r['agent_team']
        if team != 'production':
            continue
        agents.setdefault(team, []).append(r)
    for team, agents in viewitems(agents):
        for agent in agents:
            if agent['status'] != 'ok':
                continue
            for site, sinfo in viewitems(agent['WMBS_INFO']['thresholds']):
                if sinfo['state'] in ['Normal']:
                    sites_ready_in_agent.add(site)
    return sites_ready_in_agent
Beispiel #2
0
 def postRequest(self, apiName, params):
     "Perform POST request to our MicroService"
     headers = {'Content-type': 'application/json'}
     url = self.url + "/%s" % apiName
     data = self.mgr.getdata(url, params=params, headers=headers, \
                             verb='POST', cert=cert(), ckey=ckey(), encode=True, decode=True)
     print("### post call data %s" % data)
     return data
Beispiel #3
0
 def getSpec(self, request, reqSpecs=None):
     "Get request from workload cache"
     if reqSpecs and request['RequestName'] in reqSpecs:
         return reqSpecs[request['RequestName']]
     url = str('%s/%s/spec' %
               (self.msConfig['reqmgrCacheUrl'], request['RequestName']))
     mgr = RequestHandler()
     data = mgr.getdata(url, params={}, cert=cert(), ckey=ckey())
     return pickle.loads(data)
Beispiel #4
0
 def _getRequestSpecs(self, requestNames):
     "Helper function to get all specs for given set of request names"
     urls = [
         str('%s/%s/spec' % (self.msConfig['reqmgrCacheUrl'], r))
         for r in requestNames
     ]
     data = multi_getdata(urls, ckey(), cert())
     rdict = {}
     for row in data:
         req = row['url'].split('/')[-2]
         rdict[req] = pickle.loads(row['data'])
     return rdict
Beispiel #5
0
def getNodes(kind):
    "Get list of PhEDEx nodes"
    params = {}
    headers = {'Accept': 'application/json'}
    url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/nodes'
    mgr = RequestHandler()
    data = mgr.getdata(url,
                       params=params,
                       headers=headers,
                       ckey=ckey(),
                       cert=cert())
    nodes = json.loads(data)['phedex']['node']
    return [node['name'] for node in nodes if node['kind'] == kind]
Beispiel #6
0
def getNodeQueues():
    "Helper function to fetch nodes usage from PhEDEx data service"
    headers = {'Accept': 'application/json'}
    params = {}
    mgr = RequestHandler()
    url = 'https://cmsweb.cern.ch/phedex/datasvc/json/prod/nodeusagehistory'
    res = mgr.getdata(url,
                      params=params,
                      headers=headers,
                      ckey=ckey(),
                      cert=cert())
    data = json.loads(res)
    ret = defaultdict(int)
    for node in data['phedex']['node']:
        for usage in node['usage']:
            ret[node['name']] += int(usage['miss_bytes'] / 1023.**4)  #in TB
    return ret
Beispiel #7
0
 def _getRequestWorkflows(self, requestNames):
     "Helper function to get all specs for given set of request names"
     urls = [
         str('%s/data/request/%s' % (self.msConfig['reqmgr2Url'], r))
         for r in requestNames
     ]
     self.logger.debug("getRequestWorkflows")
     for u in urls:
         self.logger.debug("url %s", u)
     data = multi_getdata(urls, ckey(), cert())
     rdict = {}
     for row in data:
         req = row['url'].split('/')[-1]
         try:
             data = json.loads(row['data'])
             rdict[req] = data['result'][
                 0]  # we get back {'result': [workflow]} dict
         except Exception as exp:
             self.logger.error("fail to process row %s", row)
             self.logger.exception(
                 "fail to load data as json record, error=%s", str(exp))
     return rdict
Beispiel #8
0
    def getMSOutputTransferInfo(self, wflow):
        """
        Fetches the transfer information from the MSOutput REST interface for
        the given workflow.
        :param  wflow:   A MSRuleCleaner workflow representation
        :return:         The workflow object
        """
        headers = {'Accept': 'application/json'}
        params = {}
        url = '%s/data/info?request=%s' % (self.msConfig['msOutputUrl'],
                                           wflow['RequestName'])
        try:
            res = self.curlMgr.getdata(url, params=params, headers=headers, ckey=ckey(), cert=cert())
            data = json.loads(res)['result'][0]
            transferInfo = data['transferDoc']
        except Exception as ex:
            msg = "General exception while fetching TransferInfo from MSOutput for %s. "
            msg += "Error: %s"
            self.logger.exception(msg, wflow['RequestName'], str(ex))

        # Set Transfer status - information fetched from MSOutput only
        if transferInfo is not None and transferInfo['TransferStatus'] == 'done':
            wflow['TransferDone'] = True

        # Set Tape rules status - information fetched from Rucio (tape rule ids from MSOutput)
        if transferInfo is not None and transferInfo['OutputMap']:
            tapeRulesStatusList = []
            # For setting 'TransferTape' = True we require either no tape rules for the
            # workflow have been created or all existing tape rules to be in status 'OK',
            # so every empty TapeRuleID we consider as completed.
            for mapRecord in transferInfo['OutputMap']:
                if not mapRecord['TapeRuleID']:
                    continue
                rucioRule = self.rucio.getRule(mapRecord['TapeRuleID'])
                if not rucioRule:
                    tapeRulesStatusList.append(False)
                    msg = "Tape rule: %s not found for workflow: %s "
                    msg += "Possible server side error."
                    self.logger.error(msg, mapRecord['TapeRuleID'], wflow['RequestName'])
                    continue
                if rucioRule['state'] == 'OK':
                    tapeRulesStatusList.append(True)
                    msg = "Tape rule: %s in final state: %s for workflow: %s"
                    self.logger.info(msg, mapRecord['TapeRuleID'], rucioRule['state'], wflow['RequestName'])
                else:
                    tapeRulesStatusList.append(False)
                    msg = "Tape rule: %s in non final state: %s for workflow: %s"
                    self.logger.info(msg, mapRecord['TapeRuleID'], rucioRule['state'], wflow['RequestName'])
            if all(tapeRulesStatusList):
                wflow['TransferTape'] = True

        return wflow
Beispiel #9
0
 def fetch(self):
     "Fetch information about sites from various CMS data-services"
     tfile = tempfile.NamedTemporaryFile()
     dashboardUrl = "http://dashb-ssb.cern.ch/dashboard/request.py"
     urls = [
         '%s/getplotdata?columnid=106&batch=1&lastdata=1' % dashboardUrl,
         '%s/getplotdata?columnid=107&batch=1&lastdata=1' % dashboardUrl,
         '%s/getplotdata?columnid=108&batch=1&lastdata=1' % dashboardUrl,
         '%s/getplotdata?columnid=109&batch=1&lastdata=1' % dashboardUrl,
         '%s/getplotdata?columnid=136&batch=1&lastdata=1' % dashboardUrl,
         '%s/getplotdata?columnid=158&batch=1&lastdata=1' % dashboardUrl,
         '%s/getplotdata?columnid=159&batch=1&lastdata=1' % dashboardUrl,
         '%s/getplotdata?columnid=160&batch=1&lastdata=1' % dashboardUrl,
         '%s/getplotdata?columnid=237&batch=1&lastdata=1' % dashboardUrl,
         ### FIXME: these calls to gwmsmon are failing pretty badly with
         ### "302 Found" and failing to decode, causing a huge error dump
         ### to the logs
         # 'https://cms-gwmsmon.cern.ch/totalview/json/site_summary',
         # 'https://cms-gwmsmon.cern.ch/prodview/json/site_summary',
         # 'https://cms-gwmsmon.cern.ch/poolview/json/totals',
         # 'https://cms-gwmsmon.cern.ch/prodview/json/maxusedcpus',
         'http://cmsgwms-frontend-global.cern.ch/vofrontend/stage/mcore_siteinfo.json',
         'http://t3serv001.mit.edu/~cmsprod/IntelROCCS/Detox/SitesInfo.txt',
         'http://cmsmonitoring.web.cern.ch/cmsmonitoring/storageoverview/latest/StorageOverview.json',
     ]
     cookie = {}
     ssbids = [
         '106', '107', '108', '109', '136', '158', '159', '160', '237'
     ]
     sids = ['1', '2', 'm1', 'm3', 'm4', 'm5', 'm6']
     for url in urls:
         if 'gwmsmon' in url:
             cern_sso_cookie(url, tfile.name, cert(), ckey())
             cookie.update({url: tfile.name})
     gen = multi_getdata(urls, ckey(), cert(), cookie=cookie)
     siteInfo = {}
     for row in gen:
         if 'Detox' in row['url']:
             data = row['data']
         else:
             try:
                 data = json.loads(row['data'])
             except Exception as exc:
                 self.logger.exception('error %s for row %s', str(exc), row)
                 data = {}
         if 'ssb' in row['url']:
             for ssbid in ssbids:
                 if ssbid in row['url']:
                     siteInfo['ssb_%s' % ssbid] = data
         elif 'prodview/json/site_summary' in row['url']:
             siteInfo['gwmsmon_prod_site_summary'] = data
         elif 'totalview/json/site_summary' in row['url']:
             siteInfo['gwmsmon_site_summary'] = data
         elif 'totals' in row['url']:
             siteInfo['gwmsmon_totals'] = data
         elif 'maxusedcpus' in row['url']:
             siteInfo['gwmsmon_prod_maxused'] = data
         elif 'mcore' in row['url']:
             siteInfo['mcore'] = data
         elif 'Detox' in row['url']:
             siteInfo['detox_sites'] = data
         elif 'monitoring' in row['url']:
             siteInfo['mss_usage'] = data
         elif 'stuck' in row['url']:
             for sid in sids:
                 if sid in row['url']:
                     siteInfo['stuck_%s' % sid] = data
         siteInfo['site_queues'] = getNodeQueues()
     return siteInfo