def check_did(self, did): decision = {'did': ':'.join(did)} if (self._added_cache.check_dataset(':'.join(did))): decision['error_reason'] = 'already added replica for this did in the last 24h' return decision if (not did[0].startswith('data')) and (not did[0].startswith('mc')): decision['error_reason'] = 'not a data or mc dataset' return decision datatype = did[1].split('.')[4].split('_')[0] if datatype not in self._datatypes: decision['error_reason'] = 'wrong datatype' return decision try: meta = get_did(did[0], did[1]) except DataIdentifierNotFound: decision['error_reason'] = 'did does not exist' return decision if meta['length'] is None: meta['length'] = 0 if meta['bytes'] is None: meta['bytes'] = 0 logging.debug('got %s:%s, num_files: %d, bytes: %d' % (did[0], did[1], meta['length'], meta['bytes'])) decision['length'] = meta['length'] decision['bytes'] = meta['bytes'] total_added_bytes = sum(self._added_bytes.get_series('total')) total_added_files = sum(self._added_files.get_series('total')) logging.debug("total_added_bytes: %d" % total_added_bytes) logging.debug("total_added_files: %d" % total_added_files) if ((total_added_bytes + meta['bytes']) > self._max_bytes_hour): decision['error_reason'] = 'above bytes limit of %d bytes' % self._max_bytes_hour return decision if ((total_added_files + meta['length']) > self._max_files_hour): decision['error_reason'] = 'above files limit of %d files' % self._max_files_hour return decision last_accesses = self._dc.get_did(did) self._dc.add_did(did) decision['last_accesses'] = last_accesses try: pop = get_popularity(did) decision['popularity'] = pop or 0.0 except Exception: decision['error_reason'] = 'problems connecting to ES' return decision if (last_accesses < self._min_recent_requests) and (pop < self._min_popularity): decision['error_reason'] = 'did not popular enough' return decision return decision
def place(self, did): self.__update_penalties() decision = {'did': ':'.join(did)} if (not did[0].startswith('data')) and (not did[0].startswith('mc')): decision['error_reason'] = 'not a data or mc dataset' return decision try: meta = get_did(did[0], did[1]) except DataIdentifierNotFound: decision['error_reason'] = 'did does not exist' return decision if meta['length'] is None: meta['length'] = 0 if meta['bytes'] is None: meta['bytes'] = 0 logging.debug('got %s:%s, num_files: %d, bytes: %d' % (did[0], did[1], meta['length'], meta['bytes'])) decision['length'] = meta['length'] decision['bytes'] = meta['bytes'] last_accesses = self._dc.get_did(did) self._dc.add_did(did) decision['last_accesses'] = last_accesses pop = get_popularity(did) decision['popularity'] = pop or 0.0 if (last_accesses < 5) and (pop < 10.0): decision['error_reason'] = 'did not popular enough' return decision free_rses = self._rses available_reps = [] reps = list_dataset_replicas(did[0], did[1]) num_reps = 0 for rep in reps: rse_attr = list_rse_attributes(rep['rse']) if 'type' not in rse_attr: continue if rse_attr['type'] != 'DATADISK': continue if rep['state'] == ReplicaState.AVAILABLE: if rep['rse'] in free_rses: free_rses.remove(rep['rse']) available_reps.append(rep['rse']) num_reps += 1 decision['replica_rses'] = available_reps decision['num_replicas'] = num_reps if num_reps >= 5: decision['error_reason'] = 'more than 4 replicas already exist' return decision rse_ratios = {} space_info = self._fsc.get_rse_space() for rse in free_rses: rse_space = space_info[rse] penalty = self._penalties[rse] rse_ratios[rse] = float(rse_space['free']) / float( rse_space['total']) * 100.0 / penalty sorted_rses = sorted(rse_ratios.items(), key=itemgetter(1), reverse=True) decision['destination_rse'] = sorted_rses[0][0] decision['rse_ratios'] = sorted_rses self._penalties[sorted_rses[0][0]] = 10.0 return decision