Example #1
0
def check_mirror_url(mirror_url):
    url = mirror_url.url + 'lastsync'
    logger.info("checking URL %s", url)
    log = MirrorLog(url=mirror_url, check_time=utc_now())
    try:
        start = time.time()
        result = urllib2.urlopen(url, timeout=10)
        data = result.read()
        result.close()
        end = time.time()
        # lastsync should be an epoch value created by us
        parsed_time = None
        try:
            parsed_time = datetime.utcfromtimestamp(int(data))
            parsed_time = parsed_time.replace(tzinfo=utc)
        except ValueError:
            # it is bad news to try logging the lastsync value;
            # sometimes we get a crazy-encoded web page.
            pass

        log.last_sync = parsed_time
        # if we couldn't parse a time, this is a failure
        if parsed_time is None:
            log.error = "Could not parse time from lastsync"
            log.is_success = False
        log.duration = end - start
        logger.debug("success: %s, %.2f", url, log.duration)
    except urllib2.HTTPError, e:
        if e.code == 404:
            # we have a duration, just not a success
            end = time.time()
            log.duration = end - start
        log.is_success = False
        log.error = str(e)
        logger.debug("failed: %s, %s", url, log.error)
Example #2
0
def populate_files(dbpkg, repopkg, force=False):
    if not force:
        if not pkg_same_version(repopkg, dbpkg):
            logger.info("DB version (%s) didn't match repo version "
                    "(%s) for package %s, skipping file list addition",
                    dbpkg.full_version, repopkg.full_version, dbpkg.pkgname)
            return
        if not dbpkg.files_last_update or not dbpkg.last_update:
            pass
        elif dbpkg.files_last_update > dbpkg.last_update:
            return

    # only delete files if we are reading a DB that contains them
    if repopkg.has_files:
        delete_pkg_files(dbpkg)
        logger.info("adding %d files for package %s",
                len(repopkg.files), dbpkg.pkgname)
        pkg_files = []
        for f in repopkg.files:
            dirname, filename = f.rsplit('/', 1)
            if filename == '':
                filename = None
            pkgfile = PackageFile(pkg=dbpkg,
                    is_directory=(filename is None),
                    directory=dirname + '/',
                    filename=filename)
            pkg_files.append(pkgfile)
        PackageFile.objects.bulk_create(pkg_files)
        dbpkg.files_last_update = utc_now()
        dbpkg.save()
Example #3
0
def get_mirror_url_for_download(cutoff=default_cutoff):
    '''Find a good mirror URL to use for package downloads. If we have mirror
    status data available, it is used to determine a good choice by looking at
    the last batch of status rows.'''
    cutoff_time = utc_now() - cutoff
    status_data = MirrorLog.objects.filter(
            check_time__gte=cutoff_time).aggregate(
            Max('check_time'), Max('last_sync'))
    if status_data['check_time__max'] is not None:
        min_check_time = status_data['check_time__max'] - timedelta(minutes=5)
        min_sync_time = status_data['last_sync__max'] - timedelta(minutes=30)
        best_logs = MirrorLog.objects.filter(is_success=True,
                check_time__gte=min_check_time, last_sync__gte=min_sync_time,
                url__mirror__public=True, url__mirror__active=True,
                url__protocol__protocol__iexact='HTTP').order_by(
                'duration')[:1]
        if best_logs:
            return MirrorUrl.objects.get(id=best_logs[0].url_id)

    mirror_urls = MirrorUrl.objects.filter(
            mirror__public=True, mirror__active=True,
            protocol__protocol__iexact='HTTP')
    # look first for an 'Any' URL, then fall back to any HTTP URL
    filtered_urls = mirror_urls.filter(mirror__country='Any')[:1]
    if not filtered_urls:
        filtered_urls = mirror_urls[:1]
    if not filtered_urls:
        return None
    return filtered_urls[0]
Example #4
0
def set_news_fields(sender, **kwargs):
    news = kwargs['instance']
    now = utc_now()
    news.last_modified = now
    if not news.postdate:
        news.postdate = now
        # http://diveintomark.org/archives/2004/05/28/howto-atom-id
        news.guid = 'tag:%s,%s:%s' % (Site.objects.get_current(),
                now.strftime('%Y-%m-%d'), news.get_absolute_url())
Example #5
0
 def perform_updates():
     now = utc_now()
     pkgs.update(flag_date=now)
     # store our flag request
     flag_request = FlagRequest(created=now,
             user_email=email, message=message,
             ip_address=ip_addr, pkgbase=pkg.pkgbase,
             version=version, repo=pkg.repo,
             num_packages=len(flagged_pkgs))
     if request.user.is_authenticated():
         flag_request.user = request.user
     flag_request.save()
Example #6
0
def get_mirror_statuses(cutoff=default_cutoff):
    cutoff_time = utc_now() - cutoff
    protocols = list(MirrorProtocol.objects.filter(is_download=True))
    # I swear, this actually has decent performance...
    urls = MirrorUrl.objects.select_related('mirror', 'protocol').filter(
            mirror__active=True, mirror__public=True,
            protocol__in=protocols,
            logs__check_time__gte=cutoff_time).annotate(
            check_count=Count('logs'),
            success_count=Count('logs__duration'),
            last_sync=Max('logs__last_sync'),
            last_check=Max('logs__check_time'),
            duration_avg=Avg('logs__duration'),
            duration_stddev=StdDev('logs__duration')
            ).order_by('-last_sync', '-duration_avg')

    # The Django ORM makes it really hard to get actual average delay in the
    # above query, so run a seperate query for it and we will process the
    # results here.
    times = MirrorLog.objects.filter(is_success=True, last_sync__isnull=False,
            check_time__gte=cutoff_time)
    delays = {}
    for log in times:
        delay = log.check_time - log.last_sync
        delays.setdefault(log.url_id, []).append(delay)

    if urls:
        last_check = max([u.last_check for u in urls])
        num_checks = max([u.check_count for u in urls])
        check_info = MirrorLog.objects.filter(
                check_time__gte=cutoff_time).aggregate(
                mn=Min('check_time'), mx=Max('check_time'))
        if num_checks > 1:
            check_frequency = (check_info['mx'] - check_info['mn']) \
                    / (num_checks - 1)
        else:
            check_frequency = None
    else:
        last_check = None
        num_checks = 0
        check_frequency = None

    for url in urls:
        annotate_url(url, delays)

    return {
        'cutoff': cutoff,
        'last_check': last_check,
        'num_checks': num_checks,
        'check_frequency': check_frequency,
        'urls': urls,
    }
Example #7
0
def get_mirror_errors(cutoff=default_cutoff):
    cutoff_time = utc_now() - cutoff
    errors = MirrorLog.objects.filter(
            is_success=False, check_time__gte=cutoff_time,
            url__mirror__active=True, url__mirror__public=True).values(
            'url__url', 'url__country', 'url__protocol__protocol',
            'url__mirror__country', 'error').annotate(
            error_count=Count('error'), last_occurred=Max('check_time')
            ).order_by('-last_occurred', '-error_count')
    errors = list(errors)
    for err in errors:
        err['country'] = err['url__country'] or err['url__mirror__country']
    return errors
Example #8
0
def clock(request):
    devs = User.objects.filter(is_active=True).order_by(
            'first_name', 'last_name').select_related('userprofile')

    now = utc_now()
    page_dict = {
            'developers': devs,
            'utc_now': now,
    }

    response = direct_to_template(request, 'devel/clock.html', page_dict)
    if not response.has_header('Expires'):
        expire_time = now.replace(second=0, microsecond=0)
        expire_time += timedelta(minutes=1)
        expire_time = time.mktime(expire_time.timetuple())
        response['Expires'] = http_date(expire_time)
    return response
Example #9
0
    def handle(self, *args, **options):
        parser = IsoListParser()
        isonames = Iso.objects.values_list('name', flat=True)
        active_isos = parser.parse(settings.ISO_LIST_URL)

        for iso in active_isos:
            # create any names that don't already exist
            if iso not in isonames:
                new = Iso(name=iso, active=True)
                new.save()
            # update those that do if they were marked inactive
            else:
                existing = Iso.objects.get(name=iso)
                if not existing.active:
                    existing.active = True
                    existing.removed = None
                    existing.save()
        now = utc_now()
        # and then mark all other names as no longer active
        Iso.objects.filter(active=True).exclude(name__in=active_isos).update(
                active=False, removed=now)
Example #10
0
def signoff_package(request, name, repo, arch, revoke=False):
    packages = get_list_or_404(Package, pkgbase=name,
            arch__name=arch, repo__name__iexact=repo, repo__testing=True)
    package = packages[0]

    spec = SignoffSpecification.objects.get_or_default_from_package(package)

    if revoke:
        try:
            signoff = Signoff.objects.get_from_package(
                    package, request.user, False)
        except Signoff.DoesNotExist:
            raise Http404
        signoff.revoked = utc_now()
        signoff.save()
        created = False
    else:
        # ensure we should even be accepting signoffs
        if spec.known_bad or not spec.enabled:
            return render(request, '403.html', status=403)
        signoff, created = Signoff.objects.get_or_create_from_package(
                package, request.user)

    all_signoffs = Signoff.objects.for_package(package)

    if request.is_ajax():
        data = {
            'created': created,
            'revoked': bool(signoff.revoked),
            'approved': approved_by_signoffs(all_signoffs, spec),
            'required': spec.required,
            'enabled': spec.enabled,
            'known_bad': spec.known_bad,
            'user': str(request.user),
        }
        return HttpResponse(simplejson.dumps(data, ensure_ascii=False),
                mimetype='application/json')

    return redirect('package-signoffs')
Example #11
0
def report(request, report_name, username=None):
    title = 'Developer Report'
    packages = Package.objects.normal()
    names = attrs = user = None

    if username:
        user = get_object_or_404(User, username=username, is_active=True)
        maintained = PackageRelation.objects.filter(user=user,
                type=PackageRelation.MAINTAINER).values('pkgbase')
        packages = packages.filter(pkgbase__in=maintained)

    maints = User.objects.filter(id__in=PackageRelation.objects.filter(
        type=PackageRelation.MAINTAINER).values('user'))

    if report_name == 'old':
        title = 'Packages last built more than one year ago'
        cutoff = utc_now() - timedelta(days=365)
        packages = packages.filter(
                build_date__lt=cutoff).order_by('build_date')
    elif report_name == 'long-out-of-date':
        title = 'Packages marked out-of-date more than 90 days ago'
        cutoff = utc_now() - timedelta(days=90)
        packages = packages.filter(
                flag_date__lt=cutoff).order_by('flag_date')
    elif report_name == 'big':
        title = 'Packages with compressed size > 50 MiB'
        cutoff = 50 * 1024 * 1024
        packages = packages.filter(
                compressed_size__gte=cutoff).order_by('-compressed_size')
        names = [ 'Compressed Size', 'Installed Size' ]
        attrs = [ 'compressed_size_pretty', 'installed_size_pretty' ]
        # Format the compressed and installed sizes with MB/GB/etc suffixes
        for package in packages:
            package.compressed_size_pretty = filesizeformat(
                package.compressed_size)
            package.installed_size_pretty = filesizeformat(
                package.installed_size)
    elif report_name == 'badcompression':
        title = 'Packages that have little need for compression'
        cutoff = 0.90 * F('installed_size')
        packages = packages.filter(compressed_size__gt=0, installed_size__gt=0,
                compressed_size__gte=cutoff).order_by('-compressed_size')
        names = [ 'Compressed Size', 'Installed Size', 'Ratio', 'Type' ]
        attrs = [ 'compressed_size_pretty', 'installed_size_pretty',
                'ratio', 'compress_type' ]
        # Format the compressed and installed sizes with MB/GB/etc suffixes
        for package in packages:
            package.compressed_size_pretty = filesizeformat(
                package.compressed_size)
            package.installed_size_pretty = filesizeformat(
                package.installed_size)
            ratio = package.compressed_size / float(package.installed_size)
            package.ratio = '%.2f' % ratio
            package.compress_type = package.filename.split('.')[-1]
    elif report_name == 'uncompressed-man':
        title = 'Packages with uncompressed manpages'
        # checking for all '.0'...'.9' + '.n' extensions
        bad_files = PackageFile.objects.filter(is_directory=False,
                directory__contains='/man/',
                filename__regex=r'\.[0-9n]').exclude(
                filename__endswith='.gz').exclude(
                filename__endswith='.xz').exclude(
                filename__endswith='.bz2').exclude(
                filename__endswith='.html')
        if username:
            pkg_ids = set(packages.values_list('id', flat=True))
            bad_files = bad_files.filter(pkg__in=pkg_ids)
        bad_files = bad_files.values_list('pkg_id', flat=True).distinct()
        packages = packages.filter(id__in=set(bad_files))
    elif report_name == 'uncompressed-info':
        title = 'Packages with uncompressed infopages'
        # we don't worry about looking for '*.info-1', etc., given that an
        # uncompressed root page probably exists in the package anyway
        bad_files = PackageFile.objects.filter(is_directory=False,
                directory__endswith='/info/', filename__endswith='.info')
        if username:
            pkg_ids = set(packages.values_list('id', flat=True))
            bad_files = bad_files.filter(pkg__in=pkg_ids)
        bad_files = bad_files.values_list('pkg_id', flat=True).distinct()
        packages = packages.filter(id__in=set(bad_files))
    elif report_name == 'unneeded-orphans':
        title = 'Orphan packages required by no other packages'
        owned = PackageRelation.objects.all().values('pkgbase')
        required = PackageDepend.objects.all().values('depname')
        # The two separate calls to exclude is required to do the right thing
        packages = packages.exclude(pkgbase__in=owned).exclude(
                pkgname__in=required)
    elif report_name == 'mismatched-signature':
        title = 'Packages with mismatched signatures'
        names = [ 'Signature Date', 'Signed By', 'Packager' ]
        attrs = [ 'sig_date', 'sig_by', 'packager' ]
        cutoff = timedelta(hours=24)
        finder = UserFinder()
        filtered = []
        packages = packages.filter(pgp_signature__isnull=False)
        for package in packages:
            sig_date = package.signature.datetime.replace(tzinfo=pytz.utc)
            package.sig_date = sig_date.date()
            key_id = package.signature.key_id
            signer = finder.find_by_pgp_key(key_id)
            package.sig_by = signer or key_id
            if signer is None or signer.id != package.packager_id:
                filtered.append(package)
            elif sig_date > package.build_date + cutoff:
                filtered.append(package)
        packages = filtered
    else:
        raise Http404

    context = {
        'all_maintainers': maints,
        'title': title,
        'maintainer': user,
        'packages': packages,
        'column_names': names,
        'column_attrs': attrs,
    }
    return direct_to_template(request, 'devel/packages.html', context)
Example #12
0
def db_update(archname, reponame, pkgs, force=False):
    """
    Parses a list of packages and updates the packages database accordingly.
    """
    logger.info('Updating %s (%s)', reponame, archname)
    dbpkgs = update_common(archname, reponame, pkgs, True)
    repository = Repo.objects.get(name__iexact=reponame)
    architecture = Arch.objects.get(name__iexact=archname)

    # This makes our inner loop where we find packages by name *way* more
    # efficient by not having to go to the database for each package to
    # SELECT them by name.
    dbdict = dict((dbpkg.pkgname, dbpkg) for dbpkg in dbpkgs)

    dbset = set(dbdict.keys())
    syncset = set([pkg.name for pkg in pkgs])

    in_sync_not_db = syncset - dbset
    logger.info("%d packages in sync not db", len(in_sync_not_db))
    # packages in syncdb and not in database (add to database)
    for pkg in (pkg for pkg in pkgs if pkg.name in in_sync_not_db):
        logger.info("Adding package %s", pkg.name)
        dbpkg = Package(pkgname=pkg.name, arch=architecture, repo=repository)
        try:
            with transaction.commit_on_success():
                populate_pkg(dbpkg, pkg, timestamp=utc_now())
        except IntegrityError:
            logger.warning("Could not add package %s; "
                    "not fatal if another thread beat us to it.",
                    pkg.name, exc_info=True)

    # packages in database and not in syncdb (remove from database)
    for pkgname in (dbset - syncset):
        logger.info("Removing package %s", pkgname)
        dbpkg = dbdict[pkgname]
        with transaction.commit_on_success():
            # no race condition here as long as simultaneous threads both
            # issue deletes; second delete will be a no-op
            delete_pkg_files(dbpkg)
            dbpkg.delete()

    # packages in both database and in syncdb (update in database)
    pkg_in_both = syncset & dbset
    for pkg in (x for x in pkgs if x.name in pkg_in_both):
        logger.debug("Checking package %s", pkg.name)
        dbpkg = dbdict[pkg.name]
        timestamp = None
        # for a force, we don't want to update the timestamp.
        # for a non-force, we don't want to do anything at all.
        if not force and pkg_same_version(pkg, dbpkg):
            continue
        elif not force:
            timestamp = utc_now()

        # The odd select_for_update song and dance here are to ensure
        # simultaneous updates don't happen on a package, causing
        # files/depends/all related items to be double-imported.
        with transaction.commit_on_success():
            dbpkg = Package.objects.select_for_update().get(id=dbpkg.id)
            if not force and pkg_same_version(pkg, dbpkg):
                logger.debug("Package %s was already updated", pkg.name)
                continue
            logger.info("Updating package %s", pkg.name)
            populate_pkg(dbpkg, pkg, force=force, timestamp=timestamp)

    logger.info('Finished updating arch: %s', archname)
Example #13
0
def generate_report(email, repo_name):
    repo = Repo.objects.get(name__iexact=repo_name)
    # Collect all existing signoffs for these packages
    signoff_groups = sorted(get_signoff_groups([repo]),
            key=attrgetter('target_repo', 'arch', 'pkgbase'))
    disabled = []
    bad = []
    complete = []
    incomplete = []
    new = []
    old = []

    new_hours = 24
    old_days = 14
    now = utc_now()
    new_cutoff = now - timedelta(hours=new_hours)
    old_cutoff = now - timedelta(days=old_days)

    if len(signoff_groups) == 0:
        # no need to send an email at all
        return

    for group in signoff_groups:
        spec = group.specification
        if spec.known_bad:
            bad.append(group)
        elif not spec.enabled:
            disabled.append(group)
        elif group.approved():
            complete.append(group)
        else:
            incomplete.append(group)

        if group.package.last_update > new_cutoff:
            new.append(group)
        if group.package.last_update < old_cutoff:
            old.append(group)

    old.sort(key=attrgetter('last_update'))

    proto = 'https'
    domain = Site.objects.get_current().domain
    signoffs_url = '%s://%s%s' % (proto, domain, reverse('package-signoffs'))

    # and the fun bit
    Leader = namedtuple('Leader', ['user', 'count'])
    leaders = Signoff.objects.filter(created__gt=new_cutoff,
            revoked__isnull=True).values_list('user').annotate(
                    signoff_count=Count('pk')).order_by('-signoff_count')[:5]
    users = User.objects.in_bulk([l[0] for l in leaders])
    leaders = (Leader(users[l[0]], l[1]) for l in leaders)

    subject = 'Signoff report for [%s]' % repo.name.lower()
    t = loader.get_template('packages/signoff_report.txt')
    c = Context({
        'repo': repo,
        'signoffs_url': signoffs_url,
        'disabled': disabled,
        'bad': bad,
        'all': signoff_groups,
        'incomplete': incomplete,
        'complete': complete,
        'new': new,
        'new_hours': new_hours,
        'old': old,
        'old_days': old_days,
        'leaders': leaders,
    })
    from_addr = 'Arch Website Notification <*****@*****.**>'
    send_mail(subject, t.render(c), from_addr, [email])