Beispiel #1
0
    def __init__(self, msConfig, **kwargs):
        """
        Provides setup for MSTransferor and MSMonitor classes

        :param config: MS service configuration
        :param kwargs: can be used to skip the initialization of specific services, such as:
            logger: logger object
            skipReqMgr: boolean to skip ReqMgr initialization
            skipReqMgrAux: boolean to skip ReqMgrAux initialization
            skipRucio: boolean to skip Rucio initialization
        """
        self.logger = getMSLogger(getattr(msConfig, 'verbose', False), kwargs.get("logger"))
        self.msConfig = msConfig
        self.logger.info("Configuration including default values:\n%s", self.msConfig)

        if not kwargs.get("skipReqMgr", False):
            self.reqmgr2 = ReqMgr(self.msConfig['reqmgr2Url'], logger=self.logger)
        if not kwargs.get("skipReqMgrAux", False):
            self.reqmgrAux = ReqMgrAux(self.msConfig['reqmgr2Url'],
                                       httpDict={'cacheduration': 1.0}, logger=self.logger)

        self.phedex = None
        self.rucio = None
        if not kwargs.get("skipRucio", False):
            self.rucio = Rucio(acct=self.msConfig['rucioAccount'],
                               hostUrl=self.msConfig['rucioUrl'],
                               authUrl=self.msConfig['rucioAuthUrl'],
                               configDict={"logger": self.logger, "user_agent": "wmcore-microservices"})
Beispiel #2
0
 def __init__(self, nworkers=10, name='TaskManager', logger=None):
     self.logger = getMSLogger(verbose=True, logger=logger)
     self.name = name
     self.pids = set()
     self.uids = UidSet()
     self.tasks = Queue()
     self.workers = [Worker(name, self.tasks, self.pids, self.uids, logger) \
                     for _ in range(0, nworkers)]
Beispiel #3
0
 def __init__(self, name, taskq, pidq, uidq, logger=None):
     self.logger = getMSLogger(verbose=True, logger=logger)
     threading.Thread.__init__(self, name=name)
     self.exit = 0
     self.tasks = taskq
     self.pids = pidq
     self.uids = uidq
     self.daemon = True
     self.start()
Beispiel #4
0
    def __init__(self, reqName, reqData, logger=None, verbose=False):
        self.reqName = reqName
        self.data = reqData
        self.logger = getMSLogger(verbose, logger)

        self.inputDataset = ""
        self.parentDataset = ""
        self.pileupDatasets = set()
        self.pileupRSEList = set()

        self.campaigns = set()
        self.dataCampaignMap = []
        # these blocks structure will be key'ed by the block name and value'd by the block size
        self.primaryBlocks = {}
        self.parentBlocks = {}
        # sort of duplicate info, but we need to have a way to link input to parent block(s)
        self.childToParentBlocks = {}
        # pileup don't need to get resolved into blocks, store only their total size and location
        self.secondarySummaries = {}

        self.setDataCampaignMap()
        self.setInputData()
Beispiel #5
0
    def __init__(self, dataAcct, quotaFraction, **kwargs):
        """
        Executes a basic setup, including proper logging.
        :param dataAcct: string with the Rucio account
        :param quotaFraction: float point number representing the fraction of the quota
        :param kwargs: the supported keyword arguments are:
          minimumThreshold: integer value defining the minimum available space required
          verbose: logger verbosity
          logger: logger object
        """
        self.dataAcct = dataAcct
        self.quotaFraction = quotaFraction

        self.minimumSpace = kwargs["minimumThreshold"]
        self.logger = getMSLogger(kwargs.get("verbose"), kwargs.get("logger"))
        msg = "RSEQuotas started with parameters: dataAcct=%s, quotaFraction=%s, "
        msg += "minimumThreshold=%s GB"
        self.logger.info(msg, dataAcct, quotaFraction,
                         gigaBytes(self.minimumSpace))

        self.nodeUsage = {}
        self.availableRSEs = set()
        self.outOfSpaceNodes = set()
Beispiel #6
0
 def __init__(self, mode=None, logger=None):
     self.logger = getMSLogger(verbose=True, logger=logger)
     if mode == 'test':
         self.siteInfo = {}
     else:
         self.siteInfo = self.fetch()
Beispiel #7
0
    def __init__(self, uConfig, mode=None, logger=None):
        self.logger = getMSLogger(verbose=True, logger=logger)
        self.siteCache = SiteCache(mode, logger)
        self.config = uConfig

        self.sites_ready_in_agent = self.siteCache.get('ready_in_agent', [])

        self.sites_ready = []
        self.sites_not_ready = []
        self.all_sites = []
        self.sites_banned = self.config.get('sites_banned', [])

        #         try:
        #             sites_full = json.loads(open('sites_full.json').read())
        #         except:
        #             pass

        data = self.siteCache.get('ssb_237', {'csvdata': []})
        for siteInfo in data['csvdata']:
            if not siteInfo['Tier'] in [0, 1, 2, 3]:
                continue
            self.all_sites.append(siteInfo['VOName'])
            if siteInfo['VOName'] in self.sites_banned:
                continue
            if self.sites_ready_in_agent and siteInfo[
                    'VOName'] in self.sites_ready_in_agent:
                self.sites_ready.append(siteInfo['VOName'])
            elif self.sites_ready_in_agent and not siteInfo[
                    'VOName'] in self.sites_ready_in_agent:
                self.sites_not_ready.append(siteInfo['VOName'])
            elif siteInfo['Status'] == 'enabled':
                self.sites_ready.append(siteInfo['VOName'])
            else:
                self.sites_not_ready.append(siteInfo['VOName'])

        self.sites_auto_approve = self.config.get('sites_auto_approve')

        self.sites_eos = [s for s in self.sites_ready \
                if s in ['T2_CH_CERN', 'T2_CH_CERN_HLT']]
        self.sites_T3s = [s for s in self.sites_ready if s.startswith('T3_')]
        self.sites_T2s = [s for s in self.sites_ready if s.startswith('T2_')]
        self.sites_T1s = [s for s in self.sites_ready if s.startswith('T1_')]
        self.sites_T0s = [s for s in self.sites_ready if s.startswith('T0_')]

        self.sites_T3s_all = [s for s in self.all_sites if s.startswith('T3_')]
        self.sites_T2s_all = [s for s in self.all_sites if s.startswith('T2_')]
        self.sites_T1s_all = [s for s in self.all_sites if s.startswith('T1_')]
        self.sites_T0s_all = [s for s in self.all_sites if s.startswith('T0_')]

        self.sites_AAA = list(set(self.sites_ready) - set(['T2_CH_CERN_HLT']))
        ## could this be an SSB metric ?
        self.sites_with_goodIO = self.config.get('sites_with_goodIO', [])
        #restrict to those that are actually ON
        self.sites_with_goodIO = [
            s for s in self.sites_with_goodIO if s in self.sites_ready
        ]
        self.sites_veto_transfer = [
        ]  ## do not prevent any transfer by default

        ## new site lists for better matching
        self.sites_with_goodAAA = self.sites_with_goodIO \
                + ['T3_IN_TIFRCloud', 'T3_US_NERSC'] ## like this for now
        self.sites_with_goodAAA = [
            s for s in self.sites_with_goodAAA if s in self.sites_ready
        ]

        self.storage = defaultdict(int)
        self.disk = defaultdict(int)
        self.queue = defaultdict(int)
        self.free_disk = defaultdict(int)
        self.quota = defaultdict(int)
        self.locked = defaultdict(int)
        self.cpu_pledges = defaultdict(int)
        self.addHocStorage = {
            'T2_CH_CERN_T0': 'T2_CH_CERN',
            'T2_CH_CERN_HLT': 'T2_CH_CERN',
            'T2_CH_CERN_AI': 'T2_CH_CERN',
            'T3_IN_TIFRCloud': 'T2_IN_TIFR',
            #'T3_US_NERSC' : 'T1_US_FNAL_Disk'
        }
        ## list here the site which can accomodate high memory requests
        self.sites_memory = {}

        self.sites_mcore_ready = []
        mcore_mask = self.siteCache.get('mcore')
        if mcore_mask:
            self.sites_mcore_ready = \
                    [s for s in mcore_mask['sites_for_mcore'] if s in self.sites_ready]
        else:
            pass

        for sname in self.all_sites:
            self.cpu_pledges[sname] = 1  # will get it later from SSB
            self.disk[self.ce2SE(sname)] = 0  # will get it later from SSB

        tapes = getNodes('MSS')
        for mss in tapes:
            if mss in self.sites_banned:
                continue  # not using these tapes for MC familly
            self.storage[mss] = 0

        ## and get SSB sync
        self.fetch_ssb_info()

        mss_usage = self.siteCache.get('mss_usage')
        sites_space_override = self.config.get('sites_space_override', {})
        if mss_usage:
            use_field = 'Usable'
            for mss in self.storage:
                if not mss in mss_usage['Tape'][use_field]:
                    self.storage[mss] = 0
                else:
                    self.storage[mss] = mss_usage['Tape'][use_field][mss]
                if mss in sites_space_override:
                    self.storage[mss] = sites_space_override[mss]

        self.fetch_queue_info()
        ## and detox info
        self.fetch_detox_info(\
                buffer_level=self.config.get('DDM_buffer_level', None),\
                sites_space_override=sites_space_override)

        ## transform no disks in veto transfer
        for dse, free in viewitems(self.disk):
            if free <= 0:
                if not dse in self.sites_veto_transfer:
                    self.sites_veto_transfer.append(dse)

        ## and glidein info
        self.fetch_glidein_info()
Beispiel #8
0
    def __init__(self, config=None, logger=None):
        """
        Initialize MSManager class with given configuration,
        logger, ReqMgr2/ReqMgrAux/PhEDEx/Rucio objects,
        and start transferor and monitoring threads.
        :param config: reqmgr2ms service configuration
        :param logger:
        """
        self.config = config
        self.logger = getMSLogger(getattr(config, 'verbose', False), logger)
        self._parseConfig(config)
        self.logger.info("Configuration including default values:\n%s",
                         self.msConfig)
        self.statusTrans = {}
        self.statusMon = {}
        self.statusOutput = {}
        self.statusRuleCleaner = {}
        self.statusUnmerged = {}

        # initialize transferor module
        if 'transferor' in self.services:
            self.msTransferor = MSTransferor(self.msConfig, logger=self.logger)
            thname = 'MSTransferor'
            self.transfThread = start_new_thread(
                thname, daemon, (self.transferor, 'assigned',
                                 self.msConfig['interval'], self.logger))
            self.logger.info("### Running %s thread %s", thname,
                             self.transfThread.running())

        # initialize monitoring module
        if 'monitor' in self.services:
            self.msMonitor = MSMonitor(self.msConfig, logger=self.logger)
            thname = 'MSMonitor'
            self.monitThread = start_new_thread(
                thname, daemon, (self.monitor, 'staging',
                                 self.msConfig['interval'], self.logger))
            self.logger.info("+++ Running %s thread %s", thname,
                             self.monitThread.running())

        # initialize output module
        if 'output' in self.services:
            reqStatus = ['closed-out', 'announced']
            # thread safe cache to keep the last X requests processed in MSOutput
            requestNamesCached = deque(
                maxlen=self.msConfig.get("cacheRequestSize", 10000))

            thname = 'MSOutputConsumer'
            self.msOutputConsumer = MSOutput(self.msConfig,
                                             mode=thname,
                                             reqCache=requestNamesCached,
                                             logger=self.logger)
            # set the consumer to run twice faster than the producer
            consumerInterval = self.msConfig['interval'] // 2
            self.outputConsumerThread = start_new_thread(
                thname, daemon, (self.outputConsumer, reqStatus,
                                 consumerInterval, self.logger))
            self.logger.info("=== Running %s thread %s", thname,
                             self.outputConsumerThread.running())

            thname = 'MSOutputProducer'
            self.msOutputProducer = MSOutput(self.msConfig,
                                             mode=thname,
                                             reqCache=requestNamesCached,
                                             logger=self.logger)
            self.outputProducerThread = start_new_thread(
                thname, daemon, (self.outputProducer, reqStatus,
                                 self.msConfig['interval'], self.logger))
            self.logger.info("=== Running %s thread %s", thname,
                             self.outputProducerThread.running())

        # initialize rule cleaner module
        if 'ruleCleaner' in self.services:
            reqStatus = ['announced', 'aborted-completed', 'rejected']
            self.msRuleCleaner = MSRuleCleaner(self.msConfig,
                                               logger=self.logger)
            thname = 'MSRuleCleaner'
            self.ruleCleanerThread = start_new_thread(
                thname, daemon, (self.ruleCleaner, reqStatus,
                                 self.msConfig['interval'], self.logger))
            self.logger.info("--- Running %s thread %s", thname,
                             self.ruleCleanerThread.running())

        # initialize unmerged module
        if 'unmerged' in self.services:
            self.msUnmerged = MSUnmerged(self.msConfig, logger=self.logger)
            thname = 'MSUnmerged'
            self.unmergedThread = start_new_thread(
                thname, daemonOpt,
                (self.unmerged, self.msConfig['interval'], self.logger))
            self.logger.info("--- Running %s thread %s", thname,
                             self.unmergedThread.running())