def check_mirror_url(mirror_url): url = mirror_url.url + 'lastsync' logger.info("checking URL %s", url) log = MirrorLog(url=mirror_url, check_time=utc_now()) try: start = time.time() result = urllib2.urlopen(url, timeout=10) data = result.read() result.close() end = time.time() # lastsync should be an epoch value created by us parsed_time = None try: parsed_time = datetime.utcfromtimestamp(int(data)) parsed_time = parsed_time.replace(tzinfo=utc) except ValueError: # it is bad news to try logging the lastsync value; # sometimes we get a crazy-encoded web page. pass log.last_sync = parsed_time # if we couldn't parse a time, this is a failure if parsed_time is None: log.error = "Could not parse time from lastsync" log.is_success = False log.duration = end - start logger.debug("success: %s, %.2f", url, log.duration) except urllib2.HTTPError, e: if e.code == 404: # we have a duration, just not a success end = time.time() log.duration = end - start log.is_success = False log.error = str(e) logger.debug("failed: %s, %s", url, log.error)
def populate_files(dbpkg, repopkg, force=False): if not force: if not pkg_same_version(repopkg, dbpkg): logger.info("DB version (%s) didn't match repo version " "(%s) for package %s, skipping file list addition", dbpkg.full_version, repopkg.full_version, dbpkg.pkgname) return if not dbpkg.files_last_update or not dbpkg.last_update: pass elif dbpkg.files_last_update > dbpkg.last_update: return # only delete files if we are reading a DB that contains them if repopkg.has_files: delete_pkg_files(dbpkg) logger.info("adding %d files for package %s", len(repopkg.files), dbpkg.pkgname) pkg_files = [] for f in repopkg.files: dirname, filename = f.rsplit('/', 1) if filename == '': filename = None pkgfile = PackageFile(pkg=dbpkg, is_directory=(filename is None), directory=dirname + '/', filename=filename) pkg_files.append(pkgfile) PackageFile.objects.bulk_create(pkg_files) dbpkg.files_last_update = utc_now() dbpkg.save()
def get_mirror_url_for_download(cutoff=default_cutoff): '''Find a good mirror URL to use for package downloads. If we have mirror status data available, it is used to determine a good choice by looking at the last batch of status rows.''' cutoff_time = utc_now() - cutoff status_data = MirrorLog.objects.filter( check_time__gte=cutoff_time).aggregate( Max('check_time'), Max('last_sync')) if status_data['check_time__max'] is not None: min_check_time = status_data['check_time__max'] - timedelta(minutes=5) min_sync_time = status_data['last_sync__max'] - timedelta(minutes=30) best_logs = MirrorLog.objects.filter(is_success=True, check_time__gte=min_check_time, last_sync__gte=min_sync_time, url__mirror__public=True, url__mirror__active=True, url__protocol__protocol__iexact='HTTP').order_by( 'duration')[:1] if best_logs: return MirrorUrl.objects.get(id=best_logs[0].url_id) mirror_urls = MirrorUrl.objects.filter( mirror__public=True, mirror__active=True, protocol__protocol__iexact='HTTP') # look first for an 'Any' URL, then fall back to any HTTP URL filtered_urls = mirror_urls.filter(mirror__country='Any')[:1] if not filtered_urls: filtered_urls = mirror_urls[:1] if not filtered_urls: return None return filtered_urls[0]
def set_news_fields(sender, **kwargs): news = kwargs['instance'] now = utc_now() news.last_modified = now if not news.postdate: news.postdate = now # http://diveintomark.org/archives/2004/05/28/howto-atom-id news.guid = 'tag:%s,%s:%s' % (Site.objects.get_current(), now.strftime('%Y-%m-%d'), news.get_absolute_url())
def perform_updates(): now = utc_now() pkgs.update(flag_date=now) # store our flag request flag_request = FlagRequest(created=now, user_email=email, message=message, ip_address=ip_addr, pkgbase=pkg.pkgbase, version=version, repo=pkg.repo, num_packages=len(flagged_pkgs)) if request.user.is_authenticated(): flag_request.user = request.user flag_request.save()
def get_mirror_statuses(cutoff=default_cutoff): cutoff_time = utc_now() - cutoff protocols = list(MirrorProtocol.objects.filter(is_download=True)) # I swear, this actually has decent performance... urls = MirrorUrl.objects.select_related('mirror', 'protocol').filter( mirror__active=True, mirror__public=True, protocol__in=protocols, logs__check_time__gte=cutoff_time).annotate( check_count=Count('logs'), success_count=Count('logs__duration'), last_sync=Max('logs__last_sync'), last_check=Max('logs__check_time'), duration_avg=Avg('logs__duration'), duration_stddev=StdDev('logs__duration') ).order_by('-last_sync', '-duration_avg') # The Django ORM makes it really hard to get actual average delay in the # above query, so run a seperate query for it and we will process the # results here. times = MirrorLog.objects.filter(is_success=True, last_sync__isnull=False, check_time__gte=cutoff_time) delays = {} for log in times: delay = log.check_time - log.last_sync delays.setdefault(log.url_id, []).append(delay) if urls: last_check = max([u.last_check for u in urls]) num_checks = max([u.check_count for u in urls]) check_info = MirrorLog.objects.filter( check_time__gte=cutoff_time).aggregate( mn=Min('check_time'), mx=Max('check_time')) if num_checks > 1: check_frequency = (check_info['mx'] - check_info['mn']) \ / (num_checks - 1) else: check_frequency = None else: last_check = None num_checks = 0 check_frequency = None for url in urls: annotate_url(url, delays) return { 'cutoff': cutoff, 'last_check': last_check, 'num_checks': num_checks, 'check_frequency': check_frequency, 'urls': urls, }
def get_mirror_errors(cutoff=default_cutoff): cutoff_time = utc_now() - cutoff errors = MirrorLog.objects.filter( is_success=False, check_time__gte=cutoff_time, url__mirror__active=True, url__mirror__public=True).values( 'url__url', 'url__country', 'url__protocol__protocol', 'url__mirror__country', 'error').annotate( error_count=Count('error'), last_occurred=Max('check_time') ).order_by('-last_occurred', '-error_count') errors = list(errors) for err in errors: err['country'] = err['url__country'] or err['url__mirror__country'] return errors
def clock(request): devs = User.objects.filter(is_active=True).order_by( 'first_name', 'last_name').select_related('userprofile') now = utc_now() page_dict = { 'developers': devs, 'utc_now': now, } response = direct_to_template(request, 'devel/clock.html', page_dict) if not response.has_header('Expires'): expire_time = now.replace(second=0, microsecond=0) expire_time += timedelta(minutes=1) expire_time = time.mktime(expire_time.timetuple()) response['Expires'] = http_date(expire_time) return response
def handle(self, *args, **options): parser = IsoListParser() isonames = Iso.objects.values_list('name', flat=True) active_isos = parser.parse(settings.ISO_LIST_URL) for iso in active_isos: # create any names that don't already exist if iso not in isonames: new = Iso(name=iso, active=True) new.save() # update those that do if they were marked inactive else: existing = Iso.objects.get(name=iso) if not existing.active: existing.active = True existing.removed = None existing.save() now = utc_now() # and then mark all other names as no longer active Iso.objects.filter(active=True).exclude(name__in=active_isos).update( active=False, removed=now)
def signoff_package(request, name, repo, arch, revoke=False): packages = get_list_or_404(Package, pkgbase=name, arch__name=arch, repo__name__iexact=repo, repo__testing=True) package = packages[0] spec = SignoffSpecification.objects.get_or_default_from_package(package) if revoke: try: signoff = Signoff.objects.get_from_package( package, request.user, False) except Signoff.DoesNotExist: raise Http404 signoff.revoked = utc_now() signoff.save() created = False else: # ensure we should even be accepting signoffs if spec.known_bad or not spec.enabled: return render(request, '403.html', status=403) signoff, created = Signoff.objects.get_or_create_from_package( package, request.user) all_signoffs = Signoff.objects.for_package(package) if request.is_ajax(): data = { 'created': created, 'revoked': bool(signoff.revoked), 'approved': approved_by_signoffs(all_signoffs, spec), 'required': spec.required, 'enabled': spec.enabled, 'known_bad': spec.known_bad, 'user': str(request.user), } return HttpResponse(simplejson.dumps(data, ensure_ascii=False), mimetype='application/json') return redirect('package-signoffs')
def report(request, report_name, username=None): title = 'Developer Report' packages = Package.objects.normal() names = attrs = user = None if username: user = get_object_or_404(User, username=username, is_active=True) maintained = PackageRelation.objects.filter(user=user, type=PackageRelation.MAINTAINER).values('pkgbase') packages = packages.filter(pkgbase__in=maintained) maints = User.objects.filter(id__in=PackageRelation.objects.filter( type=PackageRelation.MAINTAINER).values('user')) if report_name == 'old': title = 'Packages last built more than one year ago' cutoff = utc_now() - timedelta(days=365) packages = packages.filter( build_date__lt=cutoff).order_by('build_date') elif report_name == 'long-out-of-date': title = 'Packages marked out-of-date more than 90 days ago' cutoff = utc_now() - timedelta(days=90) packages = packages.filter( flag_date__lt=cutoff).order_by('flag_date') elif report_name == 'big': title = 'Packages with compressed size > 50 MiB' cutoff = 50 * 1024 * 1024 packages = packages.filter( compressed_size__gte=cutoff).order_by('-compressed_size') names = [ 'Compressed Size', 'Installed Size' ] attrs = [ 'compressed_size_pretty', 'installed_size_pretty' ] # Format the compressed and installed sizes with MB/GB/etc suffixes for package in packages: package.compressed_size_pretty = filesizeformat( package.compressed_size) package.installed_size_pretty = filesizeformat( package.installed_size) elif report_name == 'badcompression': title = 'Packages that have little need for compression' cutoff = 0.90 * F('installed_size') packages = packages.filter(compressed_size__gt=0, installed_size__gt=0, compressed_size__gte=cutoff).order_by('-compressed_size') names = [ 'Compressed Size', 'Installed Size', 'Ratio', 'Type' ] attrs = [ 'compressed_size_pretty', 'installed_size_pretty', 'ratio', 'compress_type' ] # Format the compressed and installed sizes with MB/GB/etc suffixes for package in packages: package.compressed_size_pretty = filesizeformat( package.compressed_size) package.installed_size_pretty = filesizeformat( package.installed_size) ratio = package.compressed_size / float(package.installed_size) package.ratio = '%.2f' % ratio package.compress_type = package.filename.split('.')[-1] elif report_name == 'uncompressed-man': title = 'Packages with uncompressed manpages' # checking for all '.0'...'.9' + '.n' extensions bad_files = PackageFile.objects.filter(is_directory=False, directory__contains='/man/', filename__regex=r'\.[0-9n]').exclude( filename__endswith='.gz').exclude( filename__endswith='.xz').exclude( filename__endswith='.bz2').exclude( filename__endswith='.html') if username: pkg_ids = set(packages.values_list('id', flat=True)) bad_files = bad_files.filter(pkg__in=pkg_ids) bad_files = bad_files.values_list('pkg_id', flat=True).distinct() packages = packages.filter(id__in=set(bad_files)) elif report_name == 'uncompressed-info': title = 'Packages with uncompressed infopages' # we don't worry about looking for '*.info-1', etc., given that an # uncompressed root page probably exists in the package anyway bad_files = PackageFile.objects.filter(is_directory=False, directory__endswith='/info/', filename__endswith='.info') if username: pkg_ids = set(packages.values_list('id', flat=True)) bad_files = bad_files.filter(pkg__in=pkg_ids) bad_files = bad_files.values_list('pkg_id', flat=True).distinct() packages = packages.filter(id__in=set(bad_files)) elif report_name == 'unneeded-orphans': title = 'Orphan packages required by no other packages' owned = PackageRelation.objects.all().values('pkgbase') required = PackageDepend.objects.all().values('depname') # The two separate calls to exclude is required to do the right thing packages = packages.exclude(pkgbase__in=owned).exclude( pkgname__in=required) elif report_name == 'mismatched-signature': title = 'Packages with mismatched signatures' names = [ 'Signature Date', 'Signed By', 'Packager' ] attrs = [ 'sig_date', 'sig_by', 'packager' ] cutoff = timedelta(hours=24) finder = UserFinder() filtered = [] packages = packages.filter(pgp_signature__isnull=False) for package in packages: sig_date = package.signature.datetime.replace(tzinfo=pytz.utc) package.sig_date = sig_date.date() key_id = package.signature.key_id signer = finder.find_by_pgp_key(key_id) package.sig_by = signer or key_id if signer is None or signer.id != package.packager_id: filtered.append(package) elif sig_date > package.build_date + cutoff: filtered.append(package) packages = filtered else: raise Http404 context = { 'all_maintainers': maints, 'title': title, 'maintainer': user, 'packages': packages, 'column_names': names, 'column_attrs': attrs, } return direct_to_template(request, 'devel/packages.html', context)
def db_update(archname, reponame, pkgs, force=False): """ Parses a list of packages and updates the packages database accordingly. """ logger.info('Updating %s (%s)', reponame, archname) dbpkgs = update_common(archname, reponame, pkgs, True) repository = Repo.objects.get(name__iexact=reponame) architecture = Arch.objects.get(name__iexact=archname) # This makes our inner loop where we find packages by name *way* more # efficient by not having to go to the database for each package to # SELECT them by name. dbdict = dict((dbpkg.pkgname, dbpkg) for dbpkg in dbpkgs) dbset = set(dbdict.keys()) syncset = set([pkg.name for pkg in pkgs]) in_sync_not_db = syncset - dbset logger.info("%d packages in sync not db", len(in_sync_not_db)) # packages in syncdb and not in database (add to database) for pkg in (pkg for pkg in pkgs if pkg.name in in_sync_not_db): logger.info("Adding package %s", pkg.name) dbpkg = Package(pkgname=pkg.name, arch=architecture, repo=repository) try: with transaction.commit_on_success(): populate_pkg(dbpkg, pkg, timestamp=utc_now()) except IntegrityError: logger.warning("Could not add package %s; " "not fatal if another thread beat us to it.", pkg.name, exc_info=True) # packages in database and not in syncdb (remove from database) for pkgname in (dbset - syncset): logger.info("Removing package %s", pkgname) dbpkg = dbdict[pkgname] with transaction.commit_on_success(): # no race condition here as long as simultaneous threads both # issue deletes; second delete will be a no-op delete_pkg_files(dbpkg) dbpkg.delete() # packages in both database and in syncdb (update in database) pkg_in_both = syncset & dbset for pkg in (x for x in pkgs if x.name in pkg_in_both): logger.debug("Checking package %s", pkg.name) dbpkg = dbdict[pkg.name] timestamp = None # for a force, we don't want to update the timestamp. # for a non-force, we don't want to do anything at all. if not force and pkg_same_version(pkg, dbpkg): continue elif not force: timestamp = utc_now() # The odd select_for_update song and dance here are to ensure # simultaneous updates don't happen on a package, causing # files/depends/all related items to be double-imported. with transaction.commit_on_success(): dbpkg = Package.objects.select_for_update().get(id=dbpkg.id) if not force and pkg_same_version(pkg, dbpkg): logger.debug("Package %s was already updated", pkg.name) continue logger.info("Updating package %s", pkg.name) populate_pkg(dbpkg, pkg, force=force, timestamp=timestamp) logger.info('Finished updating arch: %s', archname)
def generate_report(email, repo_name): repo = Repo.objects.get(name__iexact=repo_name) # Collect all existing signoffs for these packages signoff_groups = sorted(get_signoff_groups([repo]), key=attrgetter('target_repo', 'arch', 'pkgbase')) disabled = [] bad = [] complete = [] incomplete = [] new = [] old = [] new_hours = 24 old_days = 14 now = utc_now() new_cutoff = now - timedelta(hours=new_hours) old_cutoff = now - timedelta(days=old_days) if len(signoff_groups) == 0: # no need to send an email at all return for group in signoff_groups: spec = group.specification if spec.known_bad: bad.append(group) elif not spec.enabled: disabled.append(group) elif group.approved(): complete.append(group) else: incomplete.append(group) if group.package.last_update > new_cutoff: new.append(group) if group.package.last_update < old_cutoff: old.append(group) old.sort(key=attrgetter('last_update')) proto = 'https' domain = Site.objects.get_current().domain signoffs_url = '%s://%s%s' % (proto, domain, reverse('package-signoffs')) # and the fun bit Leader = namedtuple('Leader', ['user', 'count']) leaders = Signoff.objects.filter(created__gt=new_cutoff, revoked__isnull=True).values_list('user').annotate( signoff_count=Count('pk')).order_by('-signoff_count')[:5] users = User.objects.in_bulk([l[0] for l in leaders]) leaders = (Leader(users[l[0]], l[1]) for l in leaders) subject = 'Signoff report for [%s]' % repo.name.lower() t = loader.get_template('packages/signoff_report.txt') c = Context({ 'repo': repo, 'signoffs_url': signoffs_url, 'disabled': disabled, 'bad': bad, 'all': signoff_groups, 'incomplete': incomplete, 'complete': complete, 'new': new, 'new_hours': new_hours, 'old': old, 'old_days': old_days, 'leaders': leaders, }) from_addr = 'Arch Website Notification <*****@*****.**>' send_mail(subject, t.render(c), from_addr, [email])