def status_data(cutoff=DEFAULT_CUTOFF, mirror_id=None): cutoff_time = now() - cutoff if mirror_id is not None: params = [cutoff_time, mirror_id] mirror_where = 'AND u.mirror_id = %s' else: params = [cutoff_time] mirror_where = '' vendor = database_vendor(MirrorUrl) if vendor == 'sqlite': sql = """ SELECT l.url_id, u.mirror_id, COUNT(l.id) AS check_count, COUNT(l.last_sync) AS success_count, MAX(l.last_sync) AS last_sync, MAX(l.check_time) AS last_check, AVG(l.duration) AS duration_avg, 0.0 AS duration_stddev, AVG(STRFTIME('%%s', check_time) - STRFTIME('%%s', last_sync)) AS delay FROM mirrors_mirrorlog l JOIN mirrors_mirrorurl u ON u.id = l.url_id WHERE l.check_time >= %s """ + mirror_where + """ GROUP BY l.url_id, u.mirror_id """ else: sql = """ SELECT l.url_id, u.mirror_id, COUNT(l.id) AS check_count, COUNT(l.last_sync) AS success_count, MAX(l.last_sync) AS last_sync, MAX(l.check_time) AS last_check, AVG(l.duration) AS duration_avg, STDDEV(l.duration) AS duration_stddev, AVG(check_time - last_sync) AS delay FROM mirrors_mirrorlog l JOIN mirrors_mirrorurl u ON u.id = l.url_id WHERE l.check_time >= %s """ + mirror_where + """ GROUP BY l.url_id, u.mirror_id """ cursor = connection.cursor() cursor.execute(sql, params) url_data = dictfetchall(cursor) # sqlite loves to return less than ideal types if vendor == 'sqlite': for item in url_data: if item['delay'] is not None: item['delay'] = timedelta(seconds=item['delay']) if item['last_sync'] is not None: item['last_sync'] = parse_datetime(item['last_sync']) item['last_check'] = parse_datetime(item['last_check']) return {item['url_id']: item for item in url_data}
def status_data(cutoff_time, mirror_id=None): if mirror_id is not None: params = [cutoff_time, mirror_id] mirror_where = 'AND u.mirror_id = %s' else: params = [cutoff_time] mirror_where = '' vendor = database_vendor(MirrorUrl) if vendor == 'sqlite': sql = """ SELECT l.url_id, u.mirror_id, COUNT(l.id) AS check_count, COUNT(l.last_sync) AS success_count, MAX(l.last_sync) AS last_sync, MAX(l.check_time) AS last_check, AVG(l.duration) AS duration_avg, 0.0 AS duration_stddev, AVG(STRFTIME('%%s', check_time) - STRFTIME('%%s', last_sync)) AS delay FROM mirrors_mirrorlog l JOIN mirrors_mirrorurl u ON u.id = l.url_id WHERE l.check_time >= %s """ + mirror_where + """ GROUP BY l.url_id, u.mirror_id """ else: sql = """ SELECT l.url_id, u.mirror_id, COUNT(l.id) AS check_count, COUNT(l.last_sync) AS success_count, MAX(l.last_sync) AS last_sync, MAX(l.check_time) AS last_check, AVG(l.duration) AS duration_avg, STDDEV(l.duration) AS duration_stddev, AVG(check_time - last_sync) AS delay FROM mirrors_mirrorlog l JOIN mirrors_mirrorurl u ON u.id = l.url_id WHERE l.check_time >= %s """ + mirror_where + """ GROUP BY l.url_id, u.mirror_id """ cursor = connection.cursor() cursor.execute(sql, params) url_data = dictfetchall(cursor) # sqlite loves to return less than ideal types if vendor == 'sqlite': for item in url_data: if item['delay'] is not None: item['delay'] = timedelta(seconds=item['delay']) if item['last_sync'] is not None: item['last_sync'] = parse_datetime(item['last_sync']) item['last_check'] = parse_datetime(item['last_check']) return {item['url_id']: item for item in url_data}
def get_mirror_statuses(cutoff=DEFAULT_CUTOFF): cutoff_time = now() - cutoff # I swear, this actually has decent performance... urls = MirrorUrl.objects.select_related('mirror', 'protocol').filter( mirror__active=True, mirror__public=True, logs__check_time__gte=cutoff_time).annotate( check_count=Count('logs'), success_count=Count('logs__duration'), last_sync=Max('logs__last_sync'), last_check=Max('logs__check_time'), duration_avg=Avg('logs__duration')) vendor = database_vendor(MirrorUrl) if vendor != 'sqlite': urls = urls.annotate(duration_stddev=StdDev('logs__duration')) # The Django ORM makes it really hard to get actual average delay in the # above query, so run a seperate query for it and we will process the # results here. times = MirrorLog.objects.filter(is_success=True, last_sync__isnull=False, check_time__gte=cutoff_time) delays = {} for log in times: delay = log.check_time - log.last_sync delays.setdefault(log.url_id, []).append(delay) if urls: last_check = max([u.last_check for u in urls]) num_checks = max([u.check_count for u in urls]) check_info = MirrorLog.objects.filter( check_time__gte=cutoff_time).aggregate( mn=Min('check_time'), mx=Max('check_time')) if num_checks > 1: check_frequency = (check_info['mx'] - check_info['mn']) \ / (num_checks - 1) else: check_frequency = None else: last_check = None num_checks = 0 check_frequency = None for url in urls: # fake the standard deviation for local testing setups if vendor == 'sqlite': setattr(url, 'duration_stddev', 0.0) annotate_url(url, delays) return { 'cutoff': cutoff, 'last_check': last_check, 'num_checks': num_checks, 'check_frequency': check_frequency, 'urls': urls, }
def run(self): logger.debug("starting threads") for thread in self.threads: thread.start() logger.debug("joining on all threads") self.tasks.join() logger.debug("processing %d log entries", len(self.logs)) if database_vendor(MirrorLog, mode='write') == 'sqlite': for log in self.logs: log.save(force_insert=True) else: MirrorLog.objects.bulk_create(self.logs) logger.debug("log entries saved")
def multilib_differences(): return [] # Query for checking multilib out of date-ness if database_vendor(Package) == 'sqlite': pkgname_sql = """ CASE WHEN ml.pkgname LIKE %s THEN SUBSTR(ml.pkgname, 7) WHEN ml.pkgname LIKE %s THEN SUBSTR(ml.pkgname, 1, LENGTH(ml.pkgname) - 9) ELSE ml.pkgname END """ else: pkgname_sql = """ CASE WHEN ml.pkgname LIKE %s THEN SUBSTRING(ml.pkgname, 7) WHEN ml.pkgname LIKE %s THEN SUBSTRING(ml.pkgname FROM 1 FOR CHAR_LENGTH(ml.pkgname) - 9) ELSE ml.pkgname END """ sql = """ SELECT ml.id, reg.id FROM packages ml JOIN packages reg ON ( reg.pkgname = (""" + pkgname_sql + """) AND reg.pkgver != ml.pkgver ) JOIN repos r ON reg.repo_id = r.id WHERE ml.repo_id = %s AND r.testing = %s AND r.staging = %s AND reg.arch_id = %s ORDER BY ml.last_update """ multilib = Repo.objects.get(name__iexact='multilib') x86_64 = Arch.objects.get(name='x86_64') params = ['lib32-%', '%-multilib', multilib.id, False, False, x86_64.id] cursor = connection.cursor() cursor.execute(sql, params) results = cursor.fetchall() # fetch all of the necessary packages to_fetch = set(chain.from_iterable(results)) pkgs = Package.objects.normal().in_bulk(to_fetch) return [(pkgs[ml], pkgs[reg]) for ml, reg in results]
def multilib_differences(): # Query for checking multilib out of date-ness if database_vendor(Package) == 'sqlite': pkgname_sql = """ CASE WHEN ml.pkgname LIKE %s THEN SUBSTR(ml.pkgname, 7) WHEN ml.pkgname LIKE %s THEN SUBSTR(ml.pkgname, 1, LENGTH(ml.pkgname) - 9) ELSE ml.pkgname END """ else: pkgname_sql = """ CASE WHEN ml.pkgname LIKE %s THEN SUBSTRING(ml.pkgname, 7) WHEN ml.pkgname LIKE %s THEN SUBSTRING(ml.pkgname FROM 1 FOR CHAR_LENGTH(ml.pkgname) - 9) ELSE ml.pkgname END """ sql = """ SELECT ml.id, reg.id FROM packages ml JOIN packages reg ON ( reg.pkgname = (""" + pkgname_sql + """) AND reg.pkgver != ml.pkgver ) JOIN repos r ON reg.repo_id = r.id WHERE ml.repo_id = %s AND r.testing = %s AND r.staging = %s AND reg.arch_id = %s ORDER BY ml.last_update """ multilib = Repo.objects.get(name__iexact='multilib') i686 = Arch.objects.get(name='i686') params = ['lib32-%', '%-multilib', multilib.id, False, False, i686.id] cursor = connection.cursor() cursor.execute(sql, params) results = cursor.fetchall() # fetch all of the necessary packages to_fetch = set(chain.from_iterable(results)) pkgs = Package.objects.normal().in_bulk(to_fetch) return [(pkgs[ml], pkgs[reg]) for ml, reg in results]
def batched_bulk_create(model, all_objects): # for short lists, just bulk_create as we should be fine if len(all_objects) < 20: return model.objects.bulk_create(all_objects) if database_vendor(model, mode='write') == 'sqlite': # 999 max variables in each SQL statement incr = 999 // len(model._meta.fields) else: incr = 1000 def chunks(): offset = 0 while offset < len(all_objects): yield all_objects[offset:offset + incr] offset += incr for items in chunks(): model.objects.bulk_create(items)
def log_update(self, old_pkg, new_pkg): '''Utility method to help log an update. This will determine the type based on how many packages are passed in, and will pull the relevant necessary fields off the given packages. Note that in some cases, this is a no-op if we know this database type supports triggers to add these rows instead.''' if database_vendor(Package, 'write') in ('sqlite', 'postgresql'): # we log updates using database triggers for these backends return update = Update() if new_pkg: update.action_flag = ADDITION update.package = new_pkg update.arch = new_pkg.arch update.repo = new_pkg.repo update.pkgname = new_pkg.pkgname update.pkgbase = new_pkg.pkgbase update.new_pkgver = new_pkg.pkgver update.new_pkgrel = new_pkg.pkgrel update.new_epoch = new_pkg.epoch if old_pkg: if new_pkg: update.action_flag = CHANGE # ensure we should even be logging this if (old_pkg.pkgver == new_pkg.pkgver and old_pkg.pkgrel == new_pkg.pkgrel and old_pkg.epoch == new_pkg.epoch): # all relevant fields were the same; e.g. a force update return else: update.action_flag = DELETION update.arch = old_pkg.arch update.repo = old_pkg.repo update.pkgname = old_pkg.pkgname update.pkgbase = old_pkg.pkgbase update.old_pkgver = old_pkg.pkgver update.old_pkgrel = old_pkg.pkgrel update.old_epoch = old_pkg.epoch update.save(force_insert=True) return update
def log_update(self, old_pkg, new_pkg): '''Utility method to help log an update. This will determine the type based on how many packages are passed in, and will pull the relevant necesary fields off the given packages. Note that in some cases, this is a no-op if we know this database type supports triggers to add these rows instead.''' if database_vendor(Package, 'write') in ('sqlite', 'postgresql'): # we log updates using database triggers for these backends return update = Update() if new_pkg: update.action_flag = ADDITION update.package = new_pkg update.arch = new_pkg.arch update.repo = new_pkg.repo update.pkgname = new_pkg.pkgname update.pkgbase = new_pkg.pkgbase update.new_pkgver = new_pkg.pkgver update.new_pkgrel = new_pkg.pkgrel update.new_epoch = new_pkg.epoch if old_pkg: if new_pkg: update.action_flag = CHANGE # ensure we should even be logging this if (old_pkg.pkgver == new_pkg.pkgver and old_pkg.pkgrel == new_pkg.pkgrel and old_pkg.epoch == new_pkg.epoch): # all relevant fields were the same; e.g. a force update return else: update.action_flag = DELETION update.arch = old_pkg.arch update.repo = old_pkg.repo update.pkgname = old_pkg.pkgname update.pkgbase = old_pkg.pkgbase update.old_pkgver = old_pkg.pkgver update.old_pkgrel = old_pkg.pkgrel update.old_epoch = old_pkg.epoch update.save(force_insert=True) return update