Example #1
0
 def clean(self, dataset_rankings, site_rankings):
     """
     Suggest deletions based on dataset and site rankings
     """
     deletions = list()
     deleted_gb = 0
     while site_rankings:
         tmp_site_rankings = dict()
         dataset = min(dataset_rankings.iteritems(), key=operator.itemgetter(1))
         dataset_name = dataset[0]
         size_gb = self.datasets.get_size(dataset_name)
         available_sites = set(self.datasets.get_sites(dataset_name))
         for site_name in available_sites:
             try:
                 tmp_site_rankings[site_name] = site_rankings[site_name]
             except:
                 continue
         if not tmp_site_rankings:
             del dataset_rankings[dataset_name]
             continue
         site_name = weighted_choice(tmp_site_rankings)
         deletion = (dataset_name, site_name)
         deletions.append(deletion)
         deleted_gb += size_gb
         site_rankings[site_name] -= size_gb
         dataset_rankings[dataset_name] += 1
         if site_rankings[site_name] <= 0:
             del site_rankings[site_name]
     self.logger.info("Deleted %dGB", deleted_gb)
     return deletions
Example #2
0
 def balance(self):
     """
     Balance system by creating new replicas based on popularity
     """
     subscriptions = list()
     dataset_rankings = self.rankings.dataset_rankings()
     site_rankings = self.rankings.site_rankings()
     subscribed_gb = 0
     while subscribed_gb < self.max_gb:
         tmp_site_rankings = site_rankings
         dataset_name = weighted_choice(dataset_rankings)
         if (not dataset_name) or (dataset_rankings[dataset_name] < self.min_rank):
             break
         size_gb = self.datasets.get_size(dataset_name)
         unavailable_sites = set(self.datasets.get_sites(dataset_name))
         for site_name in tmp_site_rankings.keys():
             if (self.sites.get_available_storage(site_name) < size_gb) or (tmp_site_rankings[site_name] <= 0):
                 unavailable_sites.add(site_name)
         for site_name in unavailable_sites:
             try:
                 del tmp_site_rankings[site_name]
             except:
                 continue
         if not tmp_site_rankings:
             break
         site_name = weighted_choice(tmp_site_rankings)
         subscription = (dataset_name, site_name)
         subscriptions.append(subscription)
         subscribed_gb += size_gb
         avail_storage = self.sites.get_available_storage(site_name)
         self.logger.info('rank: %s\tsize: %.2f\tdataset: %s', dataset_rankings[dataset_name], size_gb, dataset_name)
         self.logger.info('rank: %s\tstorage: %d\site: %s', site_rankings[site_name], avail_storage, site_name)
         new_avail_storage = avail_storage - self.datasets.get_size(dataset_name)
         if new_avail_storage > 0:
             new_rank = 0.0
         else:
             new_rank = (site_rankings[site_name]/avail_storage)*new_avail_storage
         site_rankings[site_name] = new_rank
         del dataset_rankings[dataset_name]
     self.logger.info('Subscribed %dGB', subscribed_gb)
     return subscriptions
Example #3
0
 def replicate(self, dataset_rankings, site_rankings):
     """
     Balance system by creating new replicas based on popularity
     """
     subscriptions = list()
     subscribed_gb = 0
     sites_available_storage_gb = self.sites.get_all_available_storage()
     while (subscribed_gb < self.max_gb) and site_rankings:
         tmp_site_rankings = dict()
         for k, v in site_rankings.items():
             tmp_site_rankings[k] = v
         dataset = max(dataset_rankings.iteritems(), key=operator.itemgetter(1))
         dataset_name = dataset[0]
         dataset_rank = dataset[1]
         if (not dataset_name) or (dataset_rank < 1):
             break
         size_gb = self.datasets.get_size(dataset_name)
         unavailable_sites = set(self.datasets.get_sites(dataset_name))
         for site_name in tmp_site_rankings.keys():
             if (self.sites.get_available_storage(site_name) < size_gb) or (tmp_site_rankings[site_name] <= 0):
                 unavailable_sites.add(site_name)
         for site_name in unavailable_sites:
             try:
                 del tmp_site_rankings[site_name]
             except:
                 continue
         if not tmp_site_rankings:
             del dataset_rankings[dataset_name]
             continue
         site_name = weighted_choice(tmp_site_rankings)
         subscription = (dataset_name, site_name)
         subscriptions.append(subscription)
         subscribed_gb += size_gb
         sites_available_storage_gb[site_name] -= size_gb
         self.logger.info("%s : added", dataset_name)
         if sites_available_storage_gb[site_name] <= 0:
             del site_rankings[site_name]
         dataset_rankings[dataset_name] -= 1
     self.logger.info("Subscribed %dGB", subscribed_gb)
     return subscriptions
Example #4
0
 def replicate(self, dataset_rankings, site_rankings):
     """
     Balance system by creating new replicas based on popularity
     """
     subscriptions = list()
     subscribed_gb = 0
     sites_available_storage_gb = self.sites.get_all_available_storage()
     while (subscribed_gb < self.max_gb) and site_rankings:
         tmp_site_rankings = dict()
         for k, v in site_rankings.items():
             tmp_site_rankings[k] = v
         dataset = max(dataset_rankings.iteritems(), key=operator.itemgetter(1))
         dataset_name = dataset[0]
         dataset_rank = dataset[1]
         if (not dataset_name) or (dataset_rank < 1):
             break
         size_gb = self.datasets.get_size(dataset_name)
         unavailable_sites = set(self.datasets.get_sites(dataset_name))
         for site_name in tmp_site_rankings.keys():
             if (self.sites.get_available_storage(site_name) < size_gb) or (tmp_site_rankings[site_name] <= 0):
                 unavailable_sites.add(site_name)
         for site_name in unavailable_sites:
             try:
                 del tmp_site_rankings[site_name]
             except:
                 continue
         if not tmp_site_rankings:
             del dataset_rankings[dataset_name]
             continue
         site_name = weighted_choice(tmp_site_rankings)
         subscription = (dataset_name, site_name)
         subscriptions.append(subscription)
         subscribed_gb += size_gb
         sites_available_storage_gb[site_name] -= size_gb
         self.logger.info('%s : added', dataset_name)
         if sites_available_storage_gb[site_name] <= 0:
             del site_rankings[site_name]
         dataset_rankings[dataset_name] -= 1
     self.logger.info('Subscribed %dGB', subscribed_gb)
     return subscriptions
Example #5
0
 def test_weighted_choice(self):
     "Test weighted_choice function"
     choices = {'foo': 1.5, 'bar': 5.9}
     expected = choices.keys()
     result = weighted_choice(choices)
     self.assertTrue(result in expected)
Example #6
0
 def test_weighted_choice(self):
     "Test weighted_choice function"
     choices = {'foo':1.5, 'bar':5.9}
     expected = choices.keys()
     result = weighted_choice(choices)
     self.assertTrue(result in expected)