Example #1
0
 def test_crab(self):
     "Test crab functions"
     print ""
     expected = 'T2_US_Nebraska'
     crab = CRABService(config=self.config)
     query = 'GLIDEIN_CMSSite =?= "%s"' % ('T2_US_Nebraska')
     attributes = ["GLIDEIN_CMSSite"]
     ads = crab.fetch_cluster_ads(query=query, attributes=attributes)
     for ad in ads:
         self.assertEqual(ad['GLIDEIN_CMSSite'], expected)
Example #2
0
 def __init__(self, config=dict()):
     self.logger = logging.getLogger(__name__)
     self.config = config
     self.intelroccs = IntelROCCSService(self.config)
     self.crab = CRABService(self.config)
     self.storage = StorageManager(self.config)
Example #3
0
class SiteManager(object):
    """
    Keep track of site data
    """
    def __init__(self, config=dict()):
        self.logger = logging.getLogger(__name__)
        self.config = config
        self.intelroccs = IntelROCCSService(self.config)
        self.crab = CRABService(self.config)
        self.storage = StorageManager(self.config)

    def initiate_db(self):
        """
        Initiate Site database
        Does exactly the same as update_db
        """
        self.update_db()

    def update_db(self):
        """
        Initiate site data in database
        Get general data about all sites
        """
        api = 'Detox'
        file_ = 'SitesInfo.txt'
        intelroccs_data = self.intelroccs.fetch(api=api, params=file_, secure=False)
        for site_data in get_json(intelroccs_data, 'data'):
            self.insert_site_data(site_data)

    def insert_site_data(self, site_data):
        """
        Insert site into database
        """
        coll = 'site_data'
        site_name = str(site_data[4])
        site_status = int(site_data[0])
        site_quota = int(site_data[1])*10**3
        query = {'name':site_name}
        data = {'$set':{'name':site_name, 'status':site_status, 'quota_gb':site_quota}}
        self.storage.update_data(coll=coll, query=query, data=data, upsert=True)

    def update_cpu(self):
        """
        Update maximum CPU capacity for site
        """
        active_sites = self.get_active_sites()
        for site_name in active_sites:
            # remove older values
            date = datetime.datetime.utcnow() - datetime.timedelta(days=30)
            coll = 'site_data'
            query = {'name':site_name}
            data = {'$pull':{'cpu_data':{'date':{'$lt':date}}}}
            self.storage.update_data(coll=coll, query=query, data=data)
            # get CRAB data about site
            query = 'GLIDEIN_CMSSite =?= "%s" && CPUs > 0' % (site_name)
            attributes = ['GLIDEIN_CMSSite', 'CPUs']
            ads = self.crab.fetch_cluster_ads(query, attributes=attributes)
            cpus = 0
            for ad in ads:
                cpus += ad['CPUs']
            # insert new data
            date = datetime.datetime.utcnow()
            query = {'name':site_name}
            data = {'$push':{'cpu_data':{'date':date, 'cpus':cpus}}}
            self.storage.update_data(coll=coll, query=query, data=data)

    def get_active_sites(self):
        """
        Get all sites which are active, includes sites which are not available for replication
        """
        coll = 'site_data'
        pipeline = list()
        match = {'$match':{'status':{'$in':[1, 2]}}}
        pipeline.append(match)
        project = {'$project':{'name':1, '_id':0}}
        pipeline.append(project)
        sites_data = self.storage.get_data(coll=coll, pipeline=pipeline)
        return [site_data['name'] for site_data in sites_data]

    def get_available_sites(self):
        """
        Get all sites which are available for replication
        """
        coll = 'site_data'
        pipeline = list()
        match = {'$match':{'status':1}}
        pipeline.append(match)
        project = {'$project':{'name':1, '_id':0}}
        pipeline.append(project)
        data = self.storage.get_data(coll=coll, pipeline=pipeline)
        return [site['name'] for site in data]

    def get_available_storage(self, site_name):
        """
        Get total AnalysisOps storage available at the site
        """
        coll = 'dataset_data'
        pipeline = list()
        match = {'$match':{'replicas':site_name}}
        pipeline.append(match)
        group = {'$group':{'_id':None, 'size_bytes':{'$sum':'$size_bytes'}}}
        pipeline.append(group)
        project = {'$project':{'size_bytes':1, '_id':0}}
        pipeline.append(project)
        data = self.storage.get_data(coll=coll, pipeline=pipeline)
        try:
            size = data[0]['size_bytes']/10**9
        except:
            return 0
        coll = 'site_data'
        pipeline = list()
        match = {'$match':{'name':site_name}}
        pipeline.append(match)
        project = {'$project':{'quota_gb':1, '_id':0}}
        pipeline.append(project)
        data = self.storage.get_data(coll=coll, pipeline=pipeline)
        quota = data[0]['quota_gb']
        available_gb = (0.95*quota) - size
        return available_gb

    def get_performance(self, site_name):
        """
        Get the maximum number of CPU's for site in last 30 days
        """
        # get maximum number of CPU's and quota
        coll = 'site_data'
        pipeline = list()
        match = {'$match':{'name':site_name}}
        pipeline.append(match)
        unwind = {'$unwind':'$cpu_data'}
        pipeline.append(unwind)
        group = {'$group':{'_id':'$name', 'quota_gb':{'$max':'$quota_gb'}, 'max_cpus':{'$max':'$cpu_data.cpus'}}}
        pipeline.append(group)
        project = {'$project':{'quota_gb':1, 'max_cpus':1, '_id':0}}
        pipeline.append(project)
        data = self.storage.get_data(coll=coll, pipeline=pipeline)
        try:
            max_cpus = data[0]['max_cpus']
            quota = float(data[0]['quota_gb'])/10**3
        except:
            self.logger.warning('Could not get site performance for %s', site_name)
            max_cpus = 0
            quota = 0
        try:
            performance = float(max_cpus)/float(quota)
        except:
            performance = 0.0
        if not (performance > 0):
            performance = 0.0
        return performance