def datachecks(superficial, autofix): u""" Checks that Region, District, Municipality and Neighbourhood relations are consistent. """ # Checks that municipality.region is municipality.district.region municipalities = (Municipality.objects .filter(~Q(district__region=F(u'region'))) .select_related(u'district') ) if superficial: municipalities = municipalities[:5+1] issues = [u'{} has region_id="{}" but district.region_id="{}"'.format( m, m.region_id, m.district.region_id) for m in municipalities] if superficial and issues: if len(issues) > 5: issues[-1] = u'More municipalities have invalid region references' issues = [u'; '.join(issues)] for issue in issues: yield datacheck.Error(issue + u'.') # Checks that neighbourhood.district is neighbourhood.municipality.district neighbourhoods = (Neighbourhood.objects .filter(~Q(municipality__district=F(u'district'))) .select_related(u'municipality') ) if superficial: neighbourhoods = neighbourhoods[:5+1] issues = [u'{} has district_id="{}" but municipality.district_id="{}"'.format( n, n.district_id, n.municipality.district_id) for n in neighbourhoods] if superficial and issues: if len(issues) > 5: issues[-1] = u'More neighbourhoods have invalid district references' issues = [u'; '.join(issues)] for issue in issues: yield datacheck.Error(issue + u'.') # Checks that neighbourhood.region is neighbourhood.district.region neighbourhoods = (Neighbourhood.objects .filter(~Q(district__region=F(u'region'))) .select_related(u'district') ) if superficial: neighbourhoods = neighbourhoods[:5+1] issues = [u'{} has region_id="{}" but district.region_id="{}"'.format( n, n.region_id, n.district.region_id) for n in neighbourhoods] if superficial and issues: if len(issues) > 5: issues[-1] = u'More neighbourhoods have invalid region references' issues = [u'; '.join(issues)] for issue in issues: yield datacheck.Error(issue + u'.')
def datachecks(superficial, autofix): u""" Checks that every ``Attachment`` instance has its file working, and there are not any orphaned attachment files. """ # This check is a bit slow. We skip it if running from cron or the user asked for # superficial tests only. if superficial: return attachments = Attachment.objects.all() attachment_names = {a.file.name for a in attachments} for attachment in attachments: try: try: attachment.file.open(u'rb') finally: attachment.file.close() except IOError: yield datacheck.Error(u'%r is missing its file: "%s".', attachment, attachment.file.name) field = Attachment._meta.get_field(u'file') if not field.storage.exists(field.upload_to): return for file_name in field.storage.listdir(field.upload_to)[1]: attachment_name = u'%s/%s' % (field.upload_to, file_name) timedelta = utc_now() - utc_datetime_from_local( field.storage.modified_time(attachment_name)) if timedelta > datetime.timedelta( days=5) and attachment_name not in attachment_names: yield datacheck.Info( u'There is no Attachment instance for file: "%s". The file is %d days old, so you can probably remove it.', attachment_name, timedelta.days)
def datachecks(superficial, autofix): u""" Checks that all obligee subgroups have their parent groups. """ groups = ObligeeGroup.objects.all() keys = set(g.key for g in groups) for group in groups: if u'/' not in group.key: continue parent_key = group.key.rsplit(u'/', 1)[0] if parent_key not in keys: yield datacheck.Error( u'{} has key="{}" but thare is no group with key="{}"', group, group.key, parent_key)
def datachecks(superficial, autofix): u""" Checks that every ``Message`` has exactly one ``InforequestEmail`` relation to ``Inforequest``. """ emails = (Message.objects .annotate(Count(u'inforequest')) .filter(inforequest__count__gt=1) ) if superficial: emails = emails[:5+1] issues = [u'{} is assigned to {} inforequests'.format(m, m.inforequest__count) for m in emails] if superficial and issues: if len(issues) > 5: issues[-1] = u'More messages are assigned to multiple inforequests' issues = [u'; '.join(issues)] for issue in issues: yield datacheck.Error(issue + u'.')
def datachecks(superficial, autofix): u""" Checks that every ``Inforequest`` instance has exactly one main branch. """ inforequests = (Inforequest.objects.annotate( Count(u'branch', only=Q(branch__advanced_by=None))).filter(~Q(branch__count=1))) if superficial: inforequests = inforequests[:5 + 1] issues = [ u'%r has %d main branches' % (r, r.branch__count) for r in inforequests ] if superficial and issues: if len(issues) > 5: issues[ -1] = u'More inforequests have invalid number of main branches' issues = [u'; '.join(issues)] for issue in issues: yield datacheck.Error(issue + u'.')
def datachecks(superficial, autofix): u""" Checks that every ``Action.email`` is assigned to ``Action.branch.inforequest``. """ actions = (Action.objects .filter(email__isnull=False) .annotate(Count(u'branch__inforequest__email_set', only=Q(branch__inforequest__email_set=F(u'email')))) .filter(branch__inforequest__email_set__count=0) ) if superficial: actions = actions[:5+1] issues = [u'{} email is assigned to another inforequest'.format(a) for a in actions] if superficial and issues: if len(issues) > 5: issues[-1] = u'More action emails are assigned to other inforequests' issues = [u'; '.join(issues)] for issue in issues: yield datacheck.Error(issue + u'.')
def datachecks(superficial, autofix): u""" Checks that every advanced ``Branch`` instance is advanced by an action from the same inforequest. """ branches = (Branch.objects.filter(advanced_by__isnull=False).filter(~Q( advanced_by__branch__inforequest=F(u'inforequest'))).select_related( 'advanced_by__branch')) if superficial: branches = branches[:5 + 1] issues = [ u'{} has inforequest_id = {} but advanced_by.branch.inforequest_id = {}' .format(b, b.inforequest_id, b.advanced_by.branch.inforequest_id) for b in branches ] if superficial and issues: if len(issues) > 5: issues[-1] = u'More branches have invalid advanced by references' issues = [u'; '.join(issues)] for issue in issues: yield datacheck.Error(issue + u'.')
def _check_rec(lang, basedir, rootdir, curdir, autofix): filenames = set(os.listdir(curdir)) # Configuration file filename = u'page.conf' if filename in filenames: filenames.remove(filename) filepath = os.path.join(curdir, filename) filerel = os.path.relpath(filepath, basedir) if not os.path.isfile(filepath) or os.path.islink(filepath): yield datacheck.Error(u'Page config /%s is not a regular file', filerel) else: try: config = pages.Config(filepath) except pages.ParseConfigError as e: yield datacheck.Error(u'Page config /%s parse error: %s', filerel, e) else: # Check lang_* options config_fixes = {} for trans_lang, _ in settings.LANGUAGES: trans_key = u'lang_%s' % trans_lang trans_path = config.get(trans_key) if trans_path is not None: try: trans_page = pages.Page(trans_path, trans_lang) except pages.InvalidPageError as e: yield datacheck.Error( u'Page /%s has invalid %s translation: %s', filerel, trans_lang.upper(), e) else: if trans_page.path != trans_path: yield datacheck.Warning( u'Page /%s %s translation is %s but its canonical form is %s', filerel, trans_lang.upper(), trans_path, trans_page.path, autofixable=True) if autofix: config_fixes[trans_key] = trans_page.path if config_fixes: config.set_multiple(**config_fixes) config.write(filepath) # Template file filename = u'page.html' if filename in filenames: filenames.remove(filename) filepath = os.path.join(curdir, filename) filerel = os.path.relpath(filepath, basedir) if not os.path.isfile(filepath) or os.path.islink(filepath): yield datacheck.Error(u'Page template /%s is not a regular file', filerel) else: try: with open(filepath) as f: template = f.read() except IOError as e: yield datacheck.Error(u'Page template /%s read error: %s', filerel, e) else: try: compiled = Template(template) except TemplateSyntaxError as e: yield datacheck.Error(u'Page template /%s parse error: %s', filerel, e) # Subpages and redirects for filename in list(filenames): if pages.slug_regex.match(filename): filenames.remove(filename) filepath = os.path.join(curdir, filename) filerel = os.path.relpath(filepath, basedir) if os.path.islink(filepath): link = os.readlink(filepath) if u'/@/' in link: target = os.path.realpath( os.path.join(rootdir, link.split(u'/@/', 1)[1])) else: target = os.path.realpath(os.path.join(curdir, link)) target_path = target[len(rootdir):] + u'/' target_link = os.path.relpath(rootdir, curdir) + u'/@' + target_path if not target.startswith(rootdir + os.sep) and target != rootdir: yield datacheck.Error( u'Redirect /%s goes outside root dir %s', filerel, link) elif not os.path.isdir(target): yield datacheck.Error( u'Redirect /%s points to %s which expands to %s which is not a directory', filerel, link, target_path) elif not pages.path_regex.match(target_path): yield datacheck.Error( u'Redirect /%s points to %s which expands to %s which is not a valid path', filerel, link, target_path) elif link != target_link: yield datacheck.Warning( u'Redirect /%s points to %s but its canonical form is %s', filerel, link, target_link, autofixable=True) if autofix: os.remove(filepath) os.symlink(target_link, filepath) elif os.path.isdir(filepath): for issue in _check_rec(lang, basedir, rootdir, filepath, autofix): yield issue else: yield datacheck.Error( u'Subpage /%s is not a directory or a symlink', filerel) # Rootlinks if curdir == rootdir: filenames.discard(u'@') rootlink = os.path.join(rootdir, u'@') if not os.path.islink(rootlink) or os.readlink(rootlink) != u'.': yield datacheck.Warning(u'Invalid or missing rootlink /%s/@', lang, autofixable=True) if autofix: if os.path.lexists(rootlink): os.remove(rootlink) os.symlink(u'.', rootlink) # Unexpected files if filenames: dirrel = os.path.relpath(curdir, basedir) yield datacheck.Warning(u'Unexpected files in /%s: %s', dirrel, u', '.join(filenames))