class InAppMessage(models.Model): """ A message, composed in the Django admin interface, displayed to regular users within the application """ uid = KpiUidField(uid_prefix="iam") title = models.CharField(max_length=255) snippet = MarkdownxField() body = MarkdownxField() # Could change to `django.contrib.auth.get_user_model()` in Django 1.11+ published = models.BooleanField( default=False, help_text='When published, this message appears to all users. ' 'It otherwise appears only to the last editor') # Make the author deliberately set these dates to something valid valid_from = models.DateTimeField(default=EPOCH_BEGINNING) valid_until = models.DateTimeField(default=EPOCH_BEGINNING) last_editor = models.ForeignKey(settings.AUTH_USER_MODEL) def __str__(self): return '{} ({})'.format(self.title, self.uid) @property def html(self): # TODO: Djangerz template processing... # Make `request.user.extra_detail` available in the context as `user` MARKDOWN_FIELDS_TO_CONVERT = ('snippet', 'body') result = {} for field in MARKDOWN_FIELDS_TO_CONVERT: result[field] = markdownify(getattr(self, field)) return result
class UserCollectionSubscription(models.Model): """ Record a user's subscription to a publicly-discoverable collection, i.e. one that has `discoverable_when_public = True` """ collection = models.ForeignKey(Collection, on_delete=models.CASCADE) user = models.ForeignKey('auth.User', on_delete=models.CASCADE) uid = KpiUidField(uid_prefix='b') class Meta: unique_together = ('collection', 'user')
class UserAssetSubscription(models.Model): """ Record a user's subscription to a publicly-discoverable collection, i.e. one where the anonymous user has been granted `discover_asset` """ asset = models.ForeignKey(Asset, on_delete=models.CASCADE) user = models.ForeignKey('auth.User', on_delete=models.CASCADE) uid = KpiUidField(uid_prefix='b') class Meta: unique_together = ('asset', 'user')
class AssetVersion(models.Model): uid = KpiUidField(uid_prefix='v') asset = models.ForeignKey('Asset', related_name='asset_versions', on_delete=models.CASCADE) name = models.CharField(null=True, max_length=255) date_modified = models.DateTimeField(default=timezone.now) # preserving _reversion_version in case we don't save all that we # need to in the first migration from reversion to AssetVersion _reversion_version = models.OneToOneField( Version, null=True, on_delete=models.SET_NULL, ) version_content = JSONBField() uid_aliases = JSONBField(null=True) deployed_content = JSONBField(null=True) _deployment_data = JSONBField(default=dict) deployed = models.BooleanField(default=False) class Meta: ordering = ['-date_modified'] def _deployed_content(self): if self.deployed_content is not None: return self.deployed_content legacy_names = self._reversion_version is not None if legacy_names: return to_xlsform_structure(self.version_content, deprecated_autoname=True) else: return to_xlsform_structure(self.version_content, move_autonames=True) def to_formpack_schema(self): return { 'content': expand_content(self._deployed_content()), 'version': self.uid, 'version_id_key': '__version__', } @property def content_hash(self): # used to determine changes in the content from version to version # not saved, only compared with other asset_versions _json_string = json.dumps(self.version_content, sort_keys=True) return hashlib.sha1(hashable_str(_json_string)).hexdigest() def __str__(self): return '{}@{} T{}{}'.format( self.asset.uid, self.uid, self.date_modified.strftime('%Y-%m-%d %H:%M'), ' (deployed)' if self.deployed else '')
class AssetExportSettings(models.Model): uid = KpiUidField(uid_prefix='es') asset = models.ForeignKey('Asset', related_name='asset_export_settings', on_delete=models.CASCADE) date_modified = models.DateTimeField() name = models.CharField(max_length=255, blank=True, default='') export_settings = JSONBField(default=dict) def save(self, *args, **kwargs): self.date_modified = timezone.now() super().save(*args, **kwargs) class Meta: ordering = ['-date_modified'] unique_together = ('asset', 'name') def __str__(self): return f'{self.name} {self.uid}'
class AssetFile(models.Model): # More to come! MAP_LAYER = 'map_layer' TYPE_CHOICES = ((MAP_LAYER, MAP_LAYER), ) uid = KpiUidField(uid_prefix='af') asset = models.ForeignKey('Asset', related_name='asset_files', on_delete=models.CASCADE) # Keep track of the uploading user, who could be anyone with `change_asset` # rights, not just the asset owner user = models.ForeignKey('auth.User', related_name='asset_files', on_delete=models.CASCADE) file_type = models.CharField(choices=TYPE_CHOICES, max_length=32) name = models.CharField(max_length=255) date_created = models.DateTimeField(default=timezone.now) # TODO: Handle deletion! The file won't be deleted automatically when the # object is removed from the database content = PrivateFileField(upload_to=upload_to, max_length=380) metadata = JSONBField(default=dict)
class Collection(ObjectPermissionMixin, TagStringMixin, MPTTModel): name = models.CharField(max_length=255) parent = TreeForeignKey('self', null=True, blank=True, related_name='children', on_delete=models.CASCADE) owner = models.ForeignKey('auth.User', related_name='owned_collections', on_delete=models.CASCADE) editors_can_change_permissions = models.BooleanField(default=True) discoverable_when_public = models.BooleanField(default=False) uid = KpiUidField(uid_prefix='c') date_created = models.DateTimeField(auto_now_add=True) date_modified = models.DateTimeField(auto_now=True) objects = CollectionManager() tags = TaggableManager(manager=KpiTaggableManager) permissions = GenericRelation(ObjectPermission) @property def kind(self): return 'collection' class Meta: ordering = ('-date_modified', ) permissions = ( # change_, add_, and delete_collection are provided automatically # by Django (PERM_VIEW_COLLECTION, 'Can view collection'), (PERM_SHARE_COLLECTION, "Can change this collection's sharing settings"), ) # Since Django 2.1, 4 permissions are added for each registered model: # - add # - change # - delete # - view # See https://docs.djangoproject.com/en/2.2/topics/auth/default/#default-permissions # for more detail. # `view_collection` clashes with newly built-in one. # The simplest way to fix this is to keep old behaviour default_permissions = ('add', 'change', 'delete') # Assignable permissions that are stored in the database ASSIGNABLE_PERMISSIONS = (PERM_VIEW_COLLECTION, PERM_CHANGE_COLLECTION) # Calculated permissions that are neither directly assignable nor stored # in the database, but instead implied by assignable permissions CALCULATED_PERMISSIONS = (PERM_SHARE_COLLECTION, PERM_DELETE_COLLECTION) # Granting some permissions implies also granting other permissions IMPLIED_PERMISSIONS = { # Format: explicit: (implied, implied, ...) PERM_CHANGE_COLLECTION: (PERM_VIEW_COLLECTION, ), } def get_ancestors_or_none(self): # ancestors are ordered from farthest to nearest ancestors = self.get_ancestors() if ancestors.exists(): return ancestors else: return None def get_mixed_children(self): """ Returns all children, both Assets and Collections """ return CollectionChildrenQuerySet(self) def __str__(self): return self.name
class Asset(ObjectPermissionMixin, TagStringMixin, DeployableMixin, XlsExportable, FormpackXLSFormUtils, models.Model): name = models.CharField(max_length=255, blank=True, default='') date_created = models.DateTimeField(auto_now_add=True) date_modified = models.DateTimeField(auto_now=True) content = JSONBField(default=dict) summary = JSONBField(default=dict) report_styles = JSONBField(default=dict) report_custom = JSONBField(default=dict) map_styles = LazyDefaultJSONBField(default=dict) map_custom = LazyDefaultJSONBField(default=dict) asset_type = models.CharField(choices=ASSET_TYPES, max_length=20, default=ASSET_TYPE_SURVEY) parent = models.ForeignKey('Collection', related_name='assets', null=True, blank=True, on_delete=models.CASCADE) owner = models.ForeignKey('auth.User', related_name='assets', null=True, on_delete=models.CASCADE) # TODO: remove this flag; support for it has been removed from # ObjectPermissionMixin editors_can_change_permissions = models.BooleanField(default=False) uid = KpiUidField(uid_prefix='a') tags = TaggableManager(manager=KpiTaggableManager) settings = JSONBField(default=dict) # _deployment_data should be accessed through the `deployment` property # provided by `DeployableMixin` _deployment_data = JSONBField(default=dict) permissions = GenericRelation(ObjectPermission) objects = AssetManager() @property def kind(self): return 'asset' class Meta: ordering = ('-date_modified', ) permissions = ( # change_, add_, and delete_asset are provided automatically # by Django (PERM_VIEW_ASSET, _('Can view asset')), (PERM_SHARE_ASSET, _("Can change asset's sharing settings")), # Permissions for collected data, i.e. submissions (PERM_ADD_SUBMISSIONS, _('Can submit data to asset')), (PERM_VIEW_SUBMISSIONS, _('Can view submitted data for asset')), (PERM_PARTIAL_SUBMISSIONS, _('Can make partial actions on ' 'submitted data for asset ' 'for specific users')), (PERM_CHANGE_SUBMISSIONS, _('Can modify submitted data for asset')), (PERM_DELETE_SUBMISSIONS, _('Can delete submitted data for asset')), (PERM_SHARE_SUBMISSIONS, _("Can change sharing settings for " "asset's submitted data")), (PERM_VALIDATE_SUBMISSIONS, _("Can validate submitted data asset")), # TEMPORARY Issue #1161: A flag to indicate that permissions came # solely from `sync_kobocat_xforms` and not from any user # interaction with KPI (PERM_FROM_KC_ONLY, 'INTERNAL USE ONLY; DO NOT ASSIGN')) # Since Django 2.1, 4 permissions are added for each registered model: # - add # - change # - delete # - view # See https://docs.djangoproject.com/en/2.2/topics/auth/default/#default-permissions # for more detail. # `view_asset` clashes with newly built-in one. # The simplest way to fix this is to keep old behaviour default_permissions = ('add', 'change', 'delete') # Labels for each `asset_type` as they should be presented to users ASSET_TYPE_LABELS = { ASSET_TYPE_SURVEY: _('form'), ASSET_TYPE_TEMPLATE: _('template'), ASSET_TYPE_BLOCK: _('block'), ASSET_TYPE_QUESTION: _('question'), ASSET_TYPE_TEXT: _('text'), # unused? ASSET_TYPE_EMPTY: _('empty'), # unused? #ASSET_TYPE_COLLECTION: _('collection'), } # Assignable permissions that are stored in the database. # The labels are templates used by `get_label_for_permission()`, which you # should call instead of accessing this dictionary directly ASSIGNABLE_PERMISSIONS_WITH_LABELS = { PERM_VIEW_ASSET: _('View ##asset_type_label##'), PERM_CHANGE_ASSET: _('Edit ##asset_type_label##'), PERM_ADD_SUBMISSIONS: _('Add submissions'), PERM_VIEW_SUBMISSIONS: _('View submissions'), PERM_PARTIAL_SUBMISSIONS: _('View submissions only from specific users'), PERM_CHANGE_SUBMISSIONS: _('Edit submissions'), PERM_DELETE_SUBMISSIONS: _('Delete submissions'), PERM_VALIDATE_SUBMISSIONS: _('Validate submissions'), } ASSIGNABLE_PERMISSIONS = tuple(ASSIGNABLE_PERMISSIONS_WITH_LABELS.keys()) # Depending on our `asset_type`, only some permissions might be applicable ASSIGNABLE_PERMISSIONS_BY_TYPE = { ASSET_TYPE_SURVEY: ASSIGNABLE_PERMISSIONS, # all of them ASSET_TYPE_TEMPLATE: (PERM_VIEW_ASSET, PERM_CHANGE_ASSET), ASSET_TYPE_BLOCK: (PERM_VIEW_ASSET, PERM_CHANGE_ASSET), ASSET_TYPE_QUESTION: (PERM_VIEW_ASSET, PERM_CHANGE_ASSET), ASSET_TYPE_TEXT: (), # unused? ASSET_TYPE_EMPTY: (), # unused? #ASSET_TYPE_COLLECTION: # tbd } # Calculated permissions that are neither directly assignable nor stored # in the database, but instead implied by assignable permissions CALCULATED_PERMISSIONS = (PERM_SHARE_ASSET, PERM_DELETE_ASSET, PERM_SHARE_SUBMISSIONS) # Certain Collection permissions carry over to Asset MAPPED_PARENT_PERMISSIONS = { PERM_VIEW_COLLECTION: PERM_VIEW_ASSET, PERM_CHANGE_COLLECTION: PERM_CHANGE_ASSET } # Granting some permissions implies also granting other permissions IMPLIED_PERMISSIONS = { # Format: explicit: (implied, implied, ...) PERM_CHANGE_ASSET: (PERM_VIEW_ASSET, ), PERM_ADD_SUBMISSIONS: (PERM_VIEW_ASSET, ), PERM_VIEW_SUBMISSIONS: (PERM_VIEW_ASSET, ), PERM_PARTIAL_SUBMISSIONS: (PERM_VIEW_ASSET, ), PERM_CHANGE_SUBMISSIONS: (PERM_VIEW_SUBMISSIONS, ), PERM_DELETE_SUBMISSIONS: (PERM_VIEW_SUBMISSIONS, ), PERM_VALIDATE_SUBMISSIONS: (PERM_VIEW_SUBMISSIONS, ) } CONTRADICTORY_PERMISSIONS = { PERM_PARTIAL_SUBMISSIONS: ( PERM_VIEW_SUBMISSIONS, PERM_CHANGE_SUBMISSIONS, PERM_DELETE_SUBMISSIONS, PERM_VALIDATE_SUBMISSIONS, ), PERM_VIEW_SUBMISSIONS: (PERM_PARTIAL_SUBMISSIONS, ), PERM_CHANGE_SUBMISSIONS: (PERM_PARTIAL_SUBMISSIONS, ), PERM_DELETE_SUBMISSIONS: (PERM_PARTIAL_SUBMISSIONS, ), PERM_VALIDATE_SUBMISSIONS: (PERM_PARTIAL_SUBMISSIONS, ), } # Some permissions must be copied to KC KC_PERMISSIONS_MAP = { # keys are KPI's codenames, values are KC's PERM_CHANGE_SUBMISSIONS: 'change_xform', # "Can Edit" in KC UI PERM_VIEW_SUBMISSIONS: 'view_xform', # "Can View" in KC UI PERM_ADD_SUBMISSIONS: 'report_xform', # "Can submit to" in KC UI PERM_DELETE_SUBMISSIONS: 'delete_data_xform', # "Can Delete Data" in KC UI PERM_VALIDATE_SUBMISSIONS: 'validate_xform', # "Can Validate" in KC UI } KC_CONTENT_TYPE_KWARGS = {'app_label': 'logger', 'model': 'xform'} # KC records anonymous access as flags on the `XForm` KC_ANONYMOUS_PERMISSIONS_XFORM_FLAGS = { PERM_VIEW_SUBMISSIONS: { 'shared': True, 'shared_data': True } } def __str__(self): return '{} ({})'.format(self.name, self.uid) def adjust_content_on_save(self): """ This is called on save by default if content exists. Can be disabled / skipped by calling with parameter: asset.save(adjust_content=False) """ self._standardize(self.content) self._make_default_translation_first(self.content) self._strip_empty_rows(self.content) self._assign_kuids(self.content) self._autoname(self.content) self._unlink_list_items(self.content) self._remove_empty_expressions(self.content) settings = self.content['settings'] _title = settings.pop('form_title', None) id_string = settings.get('id_string') filename = self.summary.pop('filename', None) if filename: # if we have filename available, set the id_string # and/or form_title from the filename. if not id_string: id_string = sluggify_label(filename) settings['id_string'] = id_string if not _title: _title = filename if self.asset_type not in [ASSET_TYPE_SURVEY, ASSET_TYPE_TEMPLATE]: # instead of deleting the settings, simply clear them out self.content['settings'] = {} if _title is not None: self.name = _title def clone(self, version_uid=None): # not currently used, but this is how "to_clone_dict" should work return Asset.objects.create(**self.to_clone_dict(version_uid)) @property def deployed_versions(self): return self.asset_versions.filter( deployed=True).order_by('-date_modified') def get_ancestors_or_none(self): # ancestors are ordered from farthest to nearest if self.parent is not None: return self.parent.get_ancestors(include_self=True) else: return None def get_filters_for_partial_perm(self, user_id, perm=PERM_VIEW_SUBMISSIONS): """ Returns the list of filters for a specific permission `perm` and this specific asset. :param user_id: :param perm: see `constants.*_SUBMISSIONS` :return: """ if not perm.endswith( SUFFIX_SUBMISSIONS_PERMS) or perm == PERM_PARTIAL_SUBMISSIONS: raise BadPermissionsException( _('Only partial permissions for ' 'submissions are supported')) perms = self.get_partial_perms(user_id, with_filters=True) if perms: return perms.get(perm) return None def get_label_for_permission(self, permission_or_codename): try: codename = permission_or_codename.codename permission = permission_or_codename except AttributeError: codename = permission_or_codename permission = None try: label = self.ASSIGNABLE_PERMISSIONS_WITH_LABELS[codename] except KeyError: if not permission: # Seems expensive. Cache it? permission = Permission.objects.get( # `content_type` and `codename` are `unique_together` # https://github.com/django/django/blob/e893c0ad8b0b5b0a1e5be3345c287044868effc4/django/contrib/auth/models.py#L69 content_type=ContentType.objects.get_for_model(self), codename=codename) label = permission.name label = label.replace( '##asset_type_label##', # Raises TypeError if not coerced explicitly str(self.ASSET_TYPE_LABELS[self.asset_type])) return label def get_partial_perms(self, user_id, with_filters=False): """ Returns the list of permissions the user is restricted to, for this specific asset. If `with_filters` is `True`, it returns a dict of permissions (as keys) and the filters (as values) to apply on query to narrow down the results. For example: `get_partial_perms(user1_obj.id)` would return ``` ['view_submissions',] ``` `get_partial_perms(user1_obj.id, with_filters=True)` would return ``` { 'view_submissions: [ {'_submitted_by': {'$in': ['user1', 'user2']}}, {'_submitted_by': 'user3'} ], } ``` If user doesn't have any partial permissions, it returns `None`. :param user_obj: auth.User :param with_filters: boolean. Optional :return: list|dict|None """ perms = self.asset_partial_permissions.filter(user_id=user_id)\ .values_list("permissions", flat=True).first() if perms: if with_filters: return perms else: return list(perms) return None @property def has_active_hooks(self): """ Returns if asset has active hooks. Useful to update `kc.XForm.has_kpi_hooks` field. :return: {boolean} """ return self.hooks.filter(active=True).exists() @property def latest_deployed_version(self): return self.deployed_versions.first() @property def latest_version(self): versions = None try: versions = self.prefetched_latest_versions except AttributeError: versions = self.asset_versions.order_by('-date_modified') try: return versions[0] except IndexError: return None @staticmethod def optimize_queryset_for_list(queryset): """ Used by serializers to improve performance when listing assets """ queryset = queryset.defer( # Avoid pulling these from the database because they are often huge # and we don't need them for list views. 'content', 'report_styles' ).select_related( # We only need `username`, but `select_related('owner__username')` # actually pulled in the entire `auth_user` table under Django 1.8. # In Django 1.9+, "select_related() prohibits non-relational fields # for nested relations." 'owner', ).prefetch_related( # We previously prefetched `permissions__content_object`, but that # actually pulled the entirety of each permission's linked asset # from the database! For now, the solution is to remove # `content_object` here *and* from # `ObjectPermissionNestedSerializer`. 'permissions__permission', 'permissions__user', # `Prefetch(..., to_attr='prefetched_list')` stores the prefetched # related objects in a list (`prefetched_list`) that we can use in # other methods to avoid additional queries; see: # https://docs.djangoproject.com/en/1.8/ref/models/querysets/#prefetch-objects Prefetch('tags', to_attr='prefetched_tags'), Prefetch( 'asset_versions', queryset=AssetVersion.objects.order_by('-date_modified').only( 'uid', 'asset', 'date_modified', 'deployed'), to_attr='prefetched_latest_versions', ), ) return queryset def rename_translation(self, _from, _to): if not self._has_translations(self.content, 2): raise ValueError('no translations available') self._rename_translation(self.content, _from, _to) # todo: test and implement this method # todo 2019-04-25: Still needed, `revert_to_version` does the same? # def restore_version(self, uid): # _version_to_restore = self.asset_versions.get(uid=uid) # self.content = _version_to_restore.version_content # self.name = _version_to_restore.name def revert_to_version(self, version_uid): av = self.asset_versions.get(uid=version_uid) self.content = av.version_content self.save() def save(self, *args, **kwargs): if self.content is None: self.content = {} # in certain circumstances, we don't want content to # be altered on save. (e.g. on asset.deploy()) if kwargs.pop('adjust_content', True): self.adjust_content_on_save() # populate summary self._populate_summary() # infer asset_type only between question and block if self.asset_type in [ASSET_TYPE_QUESTION, ASSET_TYPE_BLOCK]: try: row_count = int(self.summary.get('row_count')) except TypeError: pass else: if row_count == 1: self.asset_type = ASSET_TYPE_QUESTION elif row_count > 1: self.asset_type = ASSET_TYPE_BLOCK self._populate_report_styles() _create_version = kwargs.pop('create_version', True) super().save(*args, **kwargs) if _create_version: self.asset_versions.create( name=self.name, version_content=self.content, _deployment_data=self._deployment_data, # asset_version.deployed is set in the # DeploymentSerializer deployed=False, ) @property def snapshot(self): return self._snapshot(regenerate=False) def to_clone_dict(self, version_uid=None, version=None): """ Returns a dictionary of the asset based on version_uid or version. If `version` is specified, there are no needs to provide `version_uid` and make another request to DB. :param version_uid: string :param version: AssetVersion :return: dict """ if not isinstance(version, AssetVersion): if version_uid: version = self.asset_versions.get(uid=version_uid) else: version = self.asset_versions.first() return { 'name': version.name, 'content': version.version_content, 'asset_type': self.asset_type, 'tag_string': self.tag_string, } def to_ss_structure(self): return flatten_content(self.content, in_place=False) @property def version__content_hash(self): # Avoid reading the propery `self.latest_version` more than once, since # it may execute a database query each time it's read latest_version = self.latest_version if latest_version: return latest_version.content_hash @property def version_id(self): # Avoid reading the propery `self.latest_version` more than once, since # it may execute a database query each time it's read latest_version = self.latest_version if latest_version: return latest_version.uid def _populate_report_styles(self): default = self.report_styles.get(DEFAULT_REPORTS_KEY, {}) specifieds = self.report_styles.get(SPECIFIC_REPORTS_KEY, {}) kuids_to_variable_names = self.report_styles.get('kuid_names', {}) for (index, row) in enumerate(self.content.get('survey', [])): if '$kuid' not in row: if 'name' in row: row['$kuid'] = json_hash([self.uid, row['name']]) else: row['$kuid'] = json_hash([self.uid, index, row]) _identifier = row.get('name', row['$kuid']) kuids_to_variable_names[_identifier] = row['$kuid'] if _identifier not in specifieds: specifieds[_identifier] = {} self.report_styles = { DEFAULT_REPORTS_KEY: default, SPECIFIC_REPORTS_KEY: specifieds, 'kuid_names': kuids_to_variable_names, } def _populate_summary(self): if self.content is None: self.content = {} self.summary = {} return analyzer = AssetContentAnalyzer(**self.content) self.summary = analyzer.summary @transaction.atomic def _snapshot(self, regenerate=True): asset_version = self.latest_version try: snapshot = AssetSnapshot.objects.get(asset=self, asset_version=asset_version) if regenerate: snapshot.delete() snapshot = False except AssetSnapshot.MultipleObjectsReturned: # how did multiple snapshots get here? snaps = AssetSnapshot.objects.filter(asset=self, asset_version=asset_version) snaps.delete() snapshot = False except AssetSnapshot.DoesNotExist: snapshot = False if not snapshot: if self.name != '': form_title = self.name else: _settings = self.content.get('settings', {}) form_title = _settings.get('id_string', 'Untitled') self._append(self.content, settings={ 'form_title': form_title, }) snapshot = AssetSnapshot.objects.create( asset=self, asset_version=asset_version, source=self.content) return snapshot def _update_partial_permissions(self, user_id, perm, remove=False, partial_perms=None): """ Updates partial permissions relation table according to `perm`. If `perm` == `PERM_PARTIAL_SUBMISSIONS`, then If `partial_perms` is not `None`, it should be a dict with filters mapped to their corresponding permission. Each filter is used to narrow down results when querying Mongo. e.g.: ``` { 'view_submissions': [{ '_submitted_by': { '$in': [ 'someuser', 'anotheruser' ] } }], } ``` Even if we can only restrict an user to view another's submissions so far, this code wants to be future-proof and supports other permissions such as: - `change_submissions` - `validate_submissions` `partial_perms` could be passed as: ``` { 'change_submissions': [{ '_submitted_by': { '$in': [ 'someuser', 'anotheruser' ] } }] 'validate_submissions': [{ '_submitted_by': 'someuser' }], } ``` :param user_id: int. :param perm: str. see Asset.ASSIGNABLE_PERMISSIONS :param remove: boolean. Default is false. :param partial_perms: dict. Default is None. :return: """ def clean_up_table(): # Because of the unique constraint, there should be only # one record that matches this query. # We don't look for record existence to avoid extra query. self.asset_partial_permissions.filter(user_id=user_id).delete() if perm == PERM_PARTIAL_SUBMISSIONS: if remove: clean_up_table() return if user_id == self.owner.pk: raise BadPermissionsException( _("Can not assign '{}' permission to owner".format(perm))) if not partial_perms: raise BadPermissionsException( _("Can not assign '{}' permission. " "Partial permissions are missing.".format(perm))) new_partial_perms = {} for partial_perm, filters in partial_perms.items(): implied_perms = [ implied_perm for implied_perm in self.get_implied_perms(partial_perm) if implied_perm.endswith(SUFFIX_SUBMISSIONS_PERMS) ] implied_perms.append(partial_perm) for implied_perm in implied_perms: if implied_perm not in new_partial_perms: new_partial_perms[implied_perm] = [] new_partial_perms[implied_perm] += filters AssetUserPartialPermission.objects.update_or_create( asset_id=self.pk, user_id=user_id, defaults={'permissions': new_partial_perms}) elif perm in self.CONTRADICTORY_PERMISSIONS.get( PERM_PARTIAL_SUBMISSIONS): clean_up_table()
class AssetFile(OpenRosaManifestInterface, models.Model): # More to come! MAP_LAYER = 'map_layer' FORM_MEDIA = 'form_media' TYPE_CHOICES = ( (MAP_LAYER, MAP_LAYER), (FORM_MEDIA, FORM_MEDIA), ) ALLOWED_MIME_TYPES = { FORM_MEDIA: ('image', 'audio', 'video', 'text/csv', 'application/xml'), MAP_LAYER: ( 'text/csv', 'application/vnd.google-earth.kml+xml', 'application/vnd.google-earth.kmz', 'application/wkt', 'application/geo+json', 'application/json', ), } uid = KpiUidField(uid_prefix='af') asset = models.ForeignKey('Asset', related_name='asset_files', on_delete=models.CASCADE) # Keep track of the uploading user, who could be anyone with `change_asset` # rights, not just the asset owner user = models.ForeignKey('auth.User', related_name='asset_files', on_delete=models.CASCADE) file_type = models.CharField(choices=TYPE_CHOICES, max_length=32) description = models.CharField(max_length=255) date_created = models.DateTimeField(default=timezone.now) content = PrivateFileField(upload_to=upload_to, max_length=380, null=True) metadata = JSONBField(default=dict) date_deleted = models.DateTimeField(null=True, default=None) def delete(self, using=None, keep_parents=False, force=False): # Delete object and files on storage if `force` is True or file type # is anything else than 'form_media' if force or self.file_type != self.FORM_MEDIA: if not self.is_remote_url: self.content.delete(save=False) return super().delete(using=using, keep_parents=keep_parents) # Otherwise, just flag the file as deleted. self.date_deleted = timezone.now() self.save(update_fields=['date_deleted']) @property def filename(self): """ Implements `OpenRosaManifestInterface.filename()` """ if hasattr(self, '__filename'): return self.__filename self.set_filename() self.__filename = self.metadata['filename'] return self.__filename def get_download_url(self, request): """ Implements `OpenRosaManifestInterface.get_download_url()` """ return reverse('asset-file-content', args=(self.asset.uid, self.uid), request=request) @staticmethod def get_path(asset, file_type, filename): return posixpath.join(asset.owner.username, 'asset_files', asset.uid, file_type, filename) @property def hash(self): """ Implements `OpenRosaManifestInterface.hash()` """ if hasattr(self, '__hash'): return self.__hash self.set_hash() self.__hash = self.metadata['hash'] return self.__hash @property def is_remote_url(self): try: self.metadata['redirect_url'] except KeyError: return False return True def save(self, force_insert=False, force_update=False, using=None, update_fields=None): if self.pk is None: self.set_filename() self.set_hash() self.set_mimetype() return super().save(force_insert, force_update, using, update_fields) def set_filename(self): if not self.metadata.get('filename'): self.metadata['filename'] = self.content.name def set_hash(self): if not self.metadata.get('hash'): if self.is_remote_url: md5_hash = get_hash(self.metadata['redirect_url']) else: md5_hash = get_hash(self.content.file.read()) self.metadata['hash'] = f'md5:{md5_hash}' def set_mimetype(self): mimetype, _ = guess_type(self.metadata['filename']) self.metadata['mimetype'] = mimetype
class ExportTaskBase(ImportExportTask): """ An (asynchronous) submission data export job. The instantiator must set the `data` attribute to a dictionary with the following keys: * `type`: required; `xls`, `csv`, or `spss_labels` * `source`: required; URL of a deployed `Asset` * `lang`: optional; the name of the translation to be used for headers and response values. Specify `_xml` to use question and choice names instead of labels. Leave unset, or use `_default` for labels in the default language * `hierarchy_in_labels`: optional; when `true`, include the labels for all ancestor groups in each field label, separated by `group_sep`. Defaults to `False` * `group_sep`: optional; separator to use when labels contain group hierarchy. Defaults to `/` * `fields_from_all_versions`: optional; defaults to `True`. When `False`, only fields from the latest deployed version are included * `tag_cols_for_header`: optional; a list of tag columns in the form definition to include as header rows in the export. For example, given the following form definition: | type | name | label | hxl | |---------|-----------|-------------------------|-----------| | integer | displaced | How many are displaced? | #affected | an export with `tag_cols_for_header = ['hxl']` might look like: | How many persons are displaced? | | #affected | |---------------------------------| | 123 | The default is `['hxl']` """ uid = KpiUidField(uid_prefix='e') last_submission_time = models.DateTimeField(null=True) result = PrivateFileField(upload_to=export_upload_to, max_length=380) COPY_FIELDS = ( IdCopyField, '_uuid', SubmissionTimeCopyField, ValidationStatusCopyField, NotesCopyField, # '_status' is always 'submitted_via_web' unless the submission was # made via KoBoCAT's bulk-submission-form; in that case, it's 'zip': # https://github.com/kobotoolbox/kobocat/blob/78133d519f7b7674636c871e3ba5670cd64a7227/onadata/apps/logger/import_tools.py#L67 '_status', '_submitted_by', TagsCopyField, ) # It's not very nice to ask our API users to submit `null` or `false`, # so replace friendlier language strings with the constants that formpack # expects API_LANGUAGE_TO_FORMPACK_LANGUAGE = { '_default': formpack.constants.UNTRANSLATED, '_xml': formpack.constants.UNSPECIFIED_TRANSLATION, } TIMESTAMP_KEY = '_submission_time' # Above 244 seems to cause 'Download error' in Chrome 64/Linux MAXIMUM_FILENAME_LENGTH = 240 class InaccessibleData(Exception): def __str__(self): return t( 'This data does not exist or you do not have access to it') class Meta: abstract = True ordering = ['-date_created'] def _build_export_filename(self, export, export_type): """ Internal method to build the export filename based on the export title (which should be set when calling the `FormPack()` constructor), whether the latest or all versions are included, the label language, the current date and time, and the appropriate extension for the given `export_type` """ if export_type == 'xls': extension = 'xlsx' elif export_type == 'spss_labels': extension = 'zip' else: extension = export_type if export_type == 'spss_labels': lang = 'SPSS Labels' elif export.lang == formpack.constants.UNTRANSLATED: lang = 'labels' else: lang = export.lang # TODO: translate this? Would we have to delegate to the front end? if self._fields_from_all_versions: version = 'all versions' else: version = 'latest version' filename_template = ( '{{title}} - {version} - {{lang}} - {date:%Y-%m-%d-%H-%M-%S}' '.{ext}'.format(version=version, date=utcnow(), ext=extension)) title = export.title filename = filename_template.format(title=title, lang=lang) overrun = len(filename) - self.MAXIMUM_FILENAME_LENGTH if overrun <= 0: return filename # TODO: trim the title in a right-to-left-friendly way # TODO: deal with excessively long language names title = ellipsize(title, len(title) - overrun) filename = filename_template.format(title=title, lang=lang) return filename def _build_export_options(self, pack: formpack.FormPack) -> Dict: """ Internal method to build formpack `Export` constructor arguments based on the options set in `self.data` """ group_sep = self.data.get('group_sep', '/') multiple_select = self.data.get('multiple_select', 'both') translations = pack.available_translations lang = self.data.get('lang', None) or next(iter(translations), None) fields = self.data.get('fields', []) xls_types_as_text = self.data.get('xls_types_as_text', True) include_media_url = self.data.get('include_media_url', False) force_index = True if not fields or '_index' in fields else False try: # If applicable, substitute the constants that formpack expects for # friendlier language strings used by the API lang = self.API_LANGUAGE_TO_FORMPACK_LANGUAGE[lang] except KeyError: pass tag_cols_for_header = self.data.get('tag_cols_for_header', ['hxl']) return { 'versions': pack.versions.keys(), 'group_sep': group_sep, 'multiple_select': multiple_select, 'lang': lang, 'hierarchy_in_labels': self._hierarchy_in_labels, 'copy_fields': self.COPY_FIELDS, 'force_index': force_index, 'tag_cols_for_header': tag_cols_for_header, 'filter_fields': fields, 'xls_types_as_text': xls_types_as_text, 'include_media_url': include_media_url, } @property def _fields_from_all_versions(self) -> bool: fields_from_versions = self.data.get('fields_from_all_versions', True) # v1 exports expects a string if isinstance(fields_from_versions, str): return fields_from_versions.lower() == 'true' return fields_from_versions @staticmethod def _get_fields_and_groups(fields: List[str]) -> List[str]: """ Ensure repeat groups are included when filtering for specific fields by appending the path items. For example, a field with path of `group1/group2/field` will be added to the list as: ['group1/group2/field', 'group1/group2', 'group1'] """ if not fields: return [] # Some fields are attached to the submission and must be included in # addition to the user-selected fields additional_fields = ['_attachments'] field_groups = set() for field in fields: if '/' not in field: continue items = [] while field: _path = split(field)[0] if _path: items.append(_path) field = _path field_groups.update(items) fields += list(field_groups) + additional_fields return fields @property def _hierarchy_in_labels(self) -> bool: hierarchy_in_labels = self.data.get('hierarchy_in_labels', False) # v1 exports expects a string if isinstance(hierarchy_in_labels, str): return hierarchy_in_labels.lower() == 'true' return hierarchy_in_labels def _record_last_submission_time(self, submission_stream): """ Internal generator that yields each submission in the given `submission_stream` while recording the most recent submission timestamp in `self.last_submission_time` """ # FIXME: Mongo has only per-second resolution. Brutal. for submission in submission_stream: try: timestamp = submission[self.TIMESTAMP_KEY] except KeyError: pass else: timestamp = dateutil.parser.parse(timestamp) # Mongo timestamps are UTC, but their string representation # does not indicate that timestamp = timestamp.replace(tzinfo=pytz.UTC) if (self.last_submission_time is None or timestamp > self.last_submission_time): self.last_submission_time = timestamp yield submission def _run_task(self, messages): """ Generate the export and store the result in the `self.result` `PrivateFileField`. Should be called by the `run()` method of the superclass. The `submission_stream` method is provided for testing """ source_url = self.data.get('source', False) flatten = self.data.get('flatten', True) export_type = self.data.get('type', '').lower() if export_type == 'xlsx': # Excel exports are always returned in XLSX format, but they're # referred to internally as `xls` export_type = 'xls' if export_type not in ('xls', 'csv', 'geojson', 'spss_labels'): raise NotImplementedError( 'only `xls`, `csv`, `geojson`, and `spss_labels` ' 'are valid export types') export, submission_stream = self.get_export_object() filename = self._build_export_filename(export, export_type) self.result.save(filename, ContentFile('')) # FileField files are opened read-only by default and must be # closed and reopened to allow writing # https://code.djangoproject.com/ticket/13809 self.result.close() self.result.file.close() with self.result.storage.open(self.result.name, 'wb') as output_file: if export_type == 'csv': for line in export.to_csv(submission_stream): output_file.write((line + "\r\n").encode('utf-8')) elif export_type == 'geojson': for line in export.to_geojson(submission_stream, flatten=flatten): output_file.write(line.encode('utf-8')) elif export_type == 'xls': # XLSX export actually requires a filename (limitation of # pyexcelerate?) with tempfile.NamedTemporaryFile( prefix='export_xlsx', mode='rb') as xlsx_output_file: export.to_xlsx(xlsx_output_file.name, submission_stream) # TODO: chunk again once # https://github.com/jschneier/django-storages/issues/449 # is fixed # TODO: Check if monkey-patch (line 57) can restore writing # by chunk """ while True: chunk = xlsx_output_file.read(5 * 1024 * 1024) if chunk: output_file.write(chunk) else: break """ output_file.write(xlsx_output_file.read()) elif export_type == 'spss_labels': export.to_spss_labels(output_file) # Restore the FileField to its typical state self.result.open('rb') self.save(update_fields=['last_submission_time']) def delete(self, *args, **kwargs): # removing exported file from storage self.result.delete(save=False) super().delete(*args, **kwargs) def get_export_object( self, source: Optional[Asset] = None ) -> Tuple[formpack.reporting.Export, Generator]: """ Get the formpack Export object and submission stream for processing. """ fields = self.data.get('fields', []) query = self.data.get('query', {}) submission_ids = self.data.get('submission_ids', []) if source is None: source_url = self.data.get('source', False) if not source_url: raise Exception('no source specified for the export') try: source = resolve_url_to_asset(source_url) except Asset.DoesNotExist: raise self.InaccessibleData source_perms = source.get_perms(self.user) if (PERM_VIEW_SUBMISSIONS not in source_perms and PERM_PARTIAL_SUBMISSIONS not in source_perms): raise self.InaccessibleData if not source.has_deployment: raise Exception('the source must be deployed prior to export') # Include the group name in `fields` for Mongo to correctly filter # for repeat groups fields = self._get_fields_and_groups(fields) submission_stream = source.deployment.get_submissions( user=self.user, fields=fields, submission_ids=submission_ids, query=query, ) pack, submission_stream = build_formpack( source, submission_stream, self._fields_from_all_versions) # Wrap the submission stream in a generator that records the most # recent timestamp submission_stream = self._record_last_submission_time( submission_stream) options = self._build_export_options(pack) return pack.export(**options), submission_stream @classmethod @transaction.atomic def log_and_mark_stuck_as_errored(cls, user, source): """ Set the status to ERROR and log a warning for any export that's been in an incomplete state for too long. `source` is the source URL as included in the `data` attribute. """ # How long can an export possibly run, not including time spent waiting # in the Celery queue? max_export_run_time = getattr(settings, 'CELERY_TASK_TIME_LIMIT', 2100) # Allow a generous grace period max_allowed_export_age = datetime.timedelta( seconds=max_export_run_time * 4) this_moment = datetime.datetime.now(tz=pytz.UTC) oldest_allowed_timestamp = this_moment - max_allowed_export_age stuck_exports = cls.objects.filter( user=user, date_created__lt=oldest_allowed_timestamp, data__source=source, ).exclude(status__in=(cls.COMPLETE, cls.ERROR)) for stuck_export in stuck_exports: logging.warning( 'Stuck export {}: type {}, username {}, source {}, ' 'age {}'.format( stuck_export.uid, stuck_export.data.get('type'), stuck_export.user.username, stuck_export.data.get('source'), this_moment - stuck_export.date_created, )) stuck_export.status = cls.ERROR stuck_export.save() @classmethod @transaction.atomic def remove_excess(cls, user, source): """ Remove a user's oldest exports if they have more than settings.MAXIMUM_EXPORTS_PER_USER_PER_FORM exports for a particular form. Returns the number of exports removed. `source` is the source URL as included in the `data` attribute. """ user_source_exports = cls.objects.filter( user=user, data__source=source).order_by('-date_created') excess_exports = user_source_exports[ settings.MAXIMUM_EXPORTS_PER_USER_PER_FORM:] for export in excess_exports: export.delete()
class ImportTask(ImportExportTask): uid = KpiUidField(uid_prefix='i') """ Something that would be done after the file has uploaded ...although we probably would need to store the file in a blob """ def _run_task(self, messages): self.status = self.PROCESSING self.save(update_fields=['status']) dest_item = has_necessary_perm = False if 'destination' in self.data and self.data['destination']: _d = self.data.get('destination') dest_item = resolve_url_to_asset(_d) if not dest_item.has_perm(self.user, PERM_CHANGE_ASSET): raise exceptions.PermissionDenied('user cannot update asset') else: has_necessary_perm = True if 'url' in self.data: # Retrieve file name from URL self._load_assets_from_url( messages=messages, url=self.data.get('url'), destination=dest_item, has_necessary_perm=has_necessary_perm, ) return # Get filename try: filename = self.data['filename'] except KeyError: filename = None if 'single_xls_url' in self.data: # Retrieve file name from URL # TODO: merge with `url` handling above; currently kept separate # because `_load_assets_from_url()` uses complex logic to deal with # multiple XLS files in a directory structure within a ZIP archive response = requests.get(self.data['single_xls_url']) response.raise_for_status() encoded_xls = to_str(base64.b64encode(response.content)) # if filename is empty or None, try to retrieve # file name from the response headers if not filename: filename_from_header = parse_options_header( response.headers['Content-Disposition']) try: filename = filename_from_header[1]['filename'] except (TypeError, IndexError, KeyError): pass self.data['base64Encoded'] = encoded_xls if 'base64Encoded' in self.data: # When a file is uploaded as base64, # no name is provided in the encoded string # We should rely on self.data.get(:filename:) self._parse_b64_upload( base64_encoded_upload=self.data['base64Encoded'], filename=filename, messages=messages, library=self.data.get('library', False), desired_type=self.data.get('desired_type', None), destination=dest_item, has_necessary_perm=has_necessary_perm, ) return raise Exception( 'ImportTask data must contain `base64Encoded`, `url`, or ' '`single_xls_url`') def _load_assets_from_url(self, url, messages, **kwargs): destination = kwargs.get('destination', False) has_necessary_perm = kwargs.get('has_necessary_perm', False) req = requests.get(url, allow_redirects=True) fif = HttpContentParse(request=req).parse() fif.remove_invalid_assets() fif.remove_empty_collections() destination_collection = destination \ if destination.asset_type == ASSET_TYPE_COLLECTION else False if destination_collection and not has_necessary_perm: # redundant check raise exceptions.PermissionDenied( 'user cannot load assets into this collection') collections_to_assign = [] for item in fif._parsed: extra_args = { 'owner': self.user, 'name': item._name_base, } if item.get_type() == 'collection': # FIXME: seems to allow importing nested collections, even # though uploading from a file does not (`_parse_b64_upload()` # raises `NotImplementedError`) item._orm = create_assets(item.get_type(), extra_args) elif item.get_type() == 'asset': try: kontent = xlsx_to_dict(item.readable) except InvalidFileException: kontent = xls_to_dict(item.readable) if not destination: extra_args['content'] = _strip_header_keys(kontent) item._orm = create_assets(item.get_type(), extra_args) else: # The below is copied from `_parse_b64_upload` pretty much as is # TODO: review and test carefully asset = destination asset.content = kontent asset.save() messages['updated'].append({ 'uid': asset.uid, 'kind': 'asset', 'owner__username': self.user.username, }) if item.parent: collections_to_assign.append([ item._orm, item.parent._orm, ]) elif destination_collection: collections_to_assign.append([ item._orm, destination_collection, ]) for (orm_obj, parent_item) in collections_to_assign: orm_obj.parent = parent_item orm_obj.save() def _parse_b64_upload(self, base64_encoded_upload, messages, **kwargs): filename = kwargs.get('filename', False) desired_type = kwargs.get('desired_type') # don't try to splitext() on None, False, etc. if filename: filename = splitext(filename)[0] else: filename = '' library = kwargs.get('library') survey_dict = _b64_xls_to_dict(base64_encoded_upload) survey_dict_keys = survey_dict.keys() destination = kwargs.get('destination', False) has_necessary_perm = kwargs.get('has_necessary_perm', False) if destination and not has_necessary_perm: # redundant check raise exceptions.PermissionDenied('user cannot update item') if destination and destination.asset_type == ASSET_TYPE_COLLECTION: raise NotImplementedError('cannot import into a collection at this' ' time') if 'library' in survey_dict_keys: if not library: raise ValueError('a library cannot be imported into the' ' form list') if destination: raise SyntaxError('libraries cannot be imported into assets') collection = _load_library_content({ 'content': survey_dict, 'owner': self.user, 'name': filename }) messages['created'].append({ 'uid': collection.uid, 'kind': 'collection', 'owner__username': self.user.username, }) elif 'survey' in survey_dict_keys: if not destination: if desired_type: asset_type = desired_type elif library and len(survey_dict.get('survey')) > 1: asset_type = 'block' elif library: asset_type = 'question' else: asset_type = 'survey' if asset_type in [ASSET_TYPE_SURVEY, ASSET_TYPE_TEMPLATE]: _append_kobo_locking_profiles(base64_encoded_upload, survey_dict) asset = Asset.objects.create( owner=self.user, content=survey_dict, asset_type=asset_type, summary={'filename': filename}, ) msg_key = 'created' else: asset = destination if not asset.name: asset.name = filename if asset.asset_type == ASSET_TYPE_EMPTY: asset.asset_type = ASSET_TYPE_SURVEY if asset.asset_type in [ ASSET_TYPE_SURVEY, ASSET_TYPE_TEMPLATE ]: _append_kobo_locking_profiles(base64_encoded_upload, survey_dict) asset.content = survey_dict asset.save() msg_key = 'updated' messages[msg_key].append({ 'uid': asset.uid, 'summary': asset.summary, 'kind': 'asset', 'owner__username': self.user.username, }) else: raise SyntaxError( 'xls upload must have one of these sheets: {}'.format( 'survey, library'))
class AssetFile(models.Model, AbstractFormMedia): # More to come! MAP_LAYER = 'map_layer' FORM_MEDIA = 'form_media' PAIRED_DATA = 'paired_data' TYPE_CHOICES = ( (MAP_LAYER, MAP_LAYER), (FORM_MEDIA, FORM_MEDIA), (PAIRED_DATA, PAIRED_DATA), ) ALLOWED_MIME_TYPES = { FORM_MEDIA: ( 'image', 'audio', 'video', 'text/csv', 'application/xml', 'application/zip', ), PAIRED_DATA: ('application/xml', ), MAP_LAYER: ( 'text/csv', 'application/vnd.google-earth.kml+xml', 'application/vnd.google-earth.kmz', 'application/wkt', 'application/geo+json', 'application/json', ), } uid = KpiUidField(uid_prefix='af') asset = models.ForeignKey('Asset', related_name='asset_files', on_delete=models.CASCADE) # Keep track of the uploading user, who could be anyone with `change_asset` # rights, not just the asset owner user = models.ForeignKey('auth.User', related_name='asset_files', on_delete=models.CASCADE) file_type = models.CharField(choices=TYPE_CHOICES, max_length=32) description = models.CharField(max_length=255) date_created = models.DateTimeField(default=timezone.now) content = PrivateFileField(upload_to=upload_to, max_length=380, null=True) metadata = JSONBField(default=dict) date_deleted = models.DateTimeField(null=True, default=None) date_modified = models.DateTimeField(default=timezone.now) synced_with_backend = models.BooleanField(default=False) @property def backend_media_id(self): """ Implements `SyncBackendMediaInterface.backend_media_id()` """ return (self.metadata['redirect_url'] if self.is_remote_url else self.filename) def delete(self, using=None, keep_parents=False, force=False): # Delete object and files on storage if `force` is True or file type # is anything else than 'form_media' if force or self.file_type != self.FORM_MEDIA: if not self.is_remote_url: self.content.delete(save=False) return super().delete(using=using, keep_parents=keep_parents) # Otherwise, just flag the file as deleted. self.date_deleted = timezone.now() self.synced_with_backend = False self.save(update_fields=['date_deleted', 'synced_with_backend']) @property def deleted_at(self): """ Implements: - `SyncBackendMediaInterface.deleted_at()` """ return self.date_deleted @property def filename(self): """ Implements: - `OpenRosaManifestInterface.filename()` - `SyncBackendMediaInterface.filename()` """ self.set_filename() return self.metadata['filename'] def get_download_url(self, request): """ Implements `OpenRosaManifestInterface.get_download_url()` """ return reverse('asset-file-content', args=(self.asset.uid, self.uid), request=request) @staticmethod def get_path(asset, file_type, filename): return posixpath.join(asset.owner.username, 'asset_files', asset.uid, file_type, filename) @property def md5_hash(self): """ Implements: - `OpenRosaManifestInterface.md5_hash()` - `SyncBackendMediaInterface.md5_hash()` """ if not self.metadata.get('hash'): self.set_md5_hash() return self.metadata['hash'] @property def is_remote_url(self): """ Implements `SyncBackendMediaInterface.is_remote_url()` """ try: self.metadata['redirect_url'] except KeyError: return False return True @property def mimetype(self): """ Implements `SyncBackendMediaInterface.mimetype()` """ self.set_mimetype() return self.metadata['mimetype'] def save(self, force_insert=False, force_update=False, using=None, update_fields=None): if self.pk is None: self.set_filename() self.set_md5_hash() self.set_mimetype() else: self.date_modified = timezone.now() return super().save(force_insert, force_update, using, update_fields) def set_filename(self): if not self.metadata.get('filename'): self.metadata['filename'] = self.content.name def set_md5_hash(self, md5_hash: Optional[str] = None): """ Calculate md5 hash and store it in `metadata` field if it does not exist or empty. Value can be also set with the optional `md5_hash` parameter. If `md5_hash` is an empty string, the hash is recalculated. """ if md5_hash is not None: self.metadata['hash'] = md5_hash if not self.metadata.get('hash'): if self.is_remote_url: md5_hash = calculate_hash(self.metadata['redirect_url'], prefix=True) else: try: md5_hash = calculate_hash(self.content.file.read(), prefix=True) except ValueError: md5_hash = None self.metadata['hash'] = md5_hash def set_mimetype(self): if not self.metadata.get('mimetype'): mimetype, _ = guess_type(self.filename) self.metadata['mimetype'] = mimetype
class Asset(ObjectPermissionMixin, DeployableMixin, XlsExportableMixin, FormpackXLSFormUtilsMixin, models.Model): name = models.CharField(max_length=255, blank=True, default='') date_created = models.DateTimeField(auto_now_add=True) date_modified = models.DateTimeField(auto_now=True) content = JSONBField(default=dict) summary = JSONBField(default=dict) report_styles = JSONBField(default=dict) report_custom = JSONBField(default=dict) map_styles = LazyDefaultJSONBField(default=dict) map_custom = LazyDefaultJSONBField(default=dict) asset_type = models.CharField(choices=ASSET_TYPES, max_length=20, default=ASSET_TYPE_SURVEY) parent = models.ForeignKey('Asset', related_name='children', null=True, blank=True, on_delete=models.CASCADE) owner = models.ForeignKey('auth.User', related_name='assets', null=True, on_delete=models.CASCADE) uid = KpiUidField(uid_prefix='a') tags = TaggableManager(manager=KpiTaggableManager) settings = JSONBField(default=dict) # `_deployment_data` must **NOT** be touched directly by anything except # the `deployment` property provided by `DeployableMixin`. # ToDo Move the field to another table with one-to-one relationship _deployment_data = JSONBField(default=dict) # JSON with subset of fields to share # { # 'enable': True, # 'fields': [] # shares all when empty # } data_sharing = LazyDefaultJSONBField(default=dict) # JSON with source assets' information # { # <source_uid>: { # 'fields': [] # includes all fields shared by source when empty # 'paired_data_uid': 'pdxxxxxxx' # auto-generated read-only # 'filename: 'xxxxx.xml' # }, # ... # <source_uid>: { # 'fields': [] # 'paired_data_uid': 'pdxxxxxxx' # 'filename: 'xxxxx.xml' # } # } paired_data = LazyDefaultJSONBField(default=dict) objects = AssetManager() @property def kind(self): return 'asset' class Meta: # Example in Django documentation represents `ordering` as a list # (even if it can be a list or a tuple). We enforce the type to `list` # because `rest_framework.filters.OrderingFilter` work with lists. # `AssetOrderingFilter` inherits from this class and it is used ` # in `AssetViewSet to sort the result. # It avoids back and forth between types and/or coercing where # ordering is needed ordering = [ '-date_modified', ] permissions = ( # change_, add_, and delete_asset are provided automatically # by Django (PERM_VIEW_ASSET, t('Can view asset')), (PERM_DISCOVER_ASSET, t('Can discover asset in public lists')), (PERM_MANAGE_ASSET, t('Can manage all aspects of asset')), # Permissions for collected data, i.e. submissions (PERM_ADD_SUBMISSIONS, t('Can submit data to asset')), (PERM_VIEW_SUBMISSIONS, t('Can view submitted data for asset')), (PERM_PARTIAL_SUBMISSIONS, t('Can make partial actions on ' 'submitted data for asset ' 'for specific users')), (PERM_CHANGE_SUBMISSIONS, t('Can modify submitted data for asset')), (PERM_DELETE_SUBMISSIONS, t('Can delete submitted data for asset')), (PERM_VALIDATE_SUBMISSIONS, t("Can validate submitted data asset")), # TEMPORARY Issue #1161: A flag to indicate that permissions came # solely from `sync_kobocat_xforms` and not from any user # interaction with KPI (PERM_FROM_KC_ONLY, 'INTERNAL USE ONLY; DO NOT ASSIGN')) # Since Django 2.1, 4 permissions are added for each registered model: # - add # - change # - delete # - view # See https://docs.djangoproject.com/en/2.2/topics/auth/default/#default-permissions # for more detail. # `view_asset` clashes with newly built-in one. # The simplest way to fix this is to keep old behaviour default_permissions = ('add', 'change', 'delete') # Labels for each `asset_type` as they should be presented to users. Can be # strings or callables if special logic is needed. Callables receive the # codename of the permission for which a label is being created ASSET_TYPE_LABELS_FOR_PERMISSIONS = { ASSET_TYPE_SURVEY: (lambda p: t('project') if p == PERM_MANAGE_ASSET else t('form')), ASSET_TYPE_TEMPLATE: t('template'), ASSET_TYPE_BLOCK: t('block'), ASSET_TYPE_QUESTION: t('question'), ASSET_TYPE_TEXT: t('text'), # unused? ASSET_TYPE_EMPTY: t('empty'), # unused? ASSET_TYPE_COLLECTION: t('collection'), } # Assignable permissions that are stored in the database. # The labels are templates used by `get_label_for_permission()`, which you # should call instead of accessing this dictionary directly ASSIGNABLE_PERMISSIONS_WITH_LABELS = { PERM_VIEW_ASSET: t('View ##asset_type_label##'), PERM_CHANGE_ASSET: t('Edit ##asset_type_label##'), PERM_DISCOVER_ASSET: t('Discover ##asset_type_label##'), PERM_MANAGE_ASSET: t('Manage ##asset_type_label##'), PERM_ADD_SUBMISSIONS: t('Add submissions'), PERM_VIEW_SUBMISSIONS: t('View submissions'), PERM_PARTIAL_SUBMISSIONS: { 'default': t('Act on submissions only from specific users'), PERM_VIEW_SUBMISSIONS: t('View submissions only from specific users'), PERM_CHANGE_SUBMISSIONS: t('Edit submissions only from specific users'), PERM_DELETE_SUBMISSIONS: t('Delete submissions only from specific users'), PERM_VALIDATE_SUBMISSIONS: t('Validate submissions only from specific users'), }, PERM_CHANGE_SUBMISSIONS: t('Edit submissions'), PERM_DELETE_SUBMISSIONS: t('Delete submissions'), PERM_VALIDATE_SUBMISSIONS: t('Validate submissions'), } ASSIGNABLE_PERMISSIONS = tuple(ASSIGNABLE_PERMISSIONS_WITH_LABELS.keys()) # Depending on our `asset_type`, only some permissions might be applicable ASSIGNABLE_PERMISSIONS_BY_TYPE = { ASSET_TYPE_SURVEY: tuple((p for p in ASSIGNABLE_PERMISSIONS if p != PERM_DISCOVER_ASSET)), ASSET_TYPE_TEMPLATE: ( PERM_VIEW_ASSET, PERM_CHANGE_ASSET, PERM_MANAGE_ASSET, ), ASSET_TYPE_BLOCK: ( PERM_VIEW_ASSET, PERM_CHANGE_ASSET, PERM_MANAGE_ASSET, ), ASSET_TYPE_QUESTION: ( PERM_VIEW_ASSET, PERM_CHANGE_ASSET, PERM_MANAGE_ASSET, ), ASSET_TYPE_TEXT: (), # unused? ASSET_TYPE_EMPTY: ( PERM_VIEW_ASSET, PERM_CHANGE_ASSET, PERM_MANAGE_ASSET, ), ASSET_TYPE_COLLECTION: ( PERM_VIEW_ASSET, PERM_CHANGE_ASSET, PERM_DISCOVER_ASSET, PERM_MANAGE_ASSET, ), } # Calculated permissions that are neither directly assignable nor stored # in the database, but instead implied by assignable permissions CALCULATED_PERMISSIONS = (PERM_DELETE_ASSET, ) # Only certain permissions can be inherited HERITABLE_PERMISSIONS = { # parent permission: child permission PERM_VIEW_ASSET: PERM_VIEW_ASSET, PERM_CHANGE_ASSET: PERM_CHANGE_ASSET } # Granting some permissions implies also granting other permissions IMPLIED_PERMISSIONS = { # Format: explicit: (implied, implied, ...) PERM_CHANGE_ASSET: (PERM_VIEW_ASSET, ), PERM_DISCOVER_ASSET: (PERM_VIEW_ASSET, ), PERM_MANAGE_ASSET: tuple((p for p in ASSIGNABLE_PERMISSIONS if p not in (PERM_MANAGE_ASSET, PERM_PARTIAL_SUBMISSIONS))), PERM_ADD_SUBMISSIONS: (PERM_VIEW_ASSET, ), PERM_VIEW_SUBMISSIONS: (PERM_VIEW_ASSET, ), PERM_PARTIAL_SUBMISSIONS: (PERM_VIEW_ASSET, ), PERM_CHANGE_SUBMISSIONS: ( PERM_VIEW_SUBMISSIONS, PERM_ADD_SUBMISSIONS, ), PERM_DELETE_SUBMISSIONS: (PERM_VIEW_SUBMISSIONS, ), PERM_VALIDATE_SUBMISSIONS: (PERM_VIEW_SUBMISSIONS, ), } CONTRADICTORY_PERMISSIONS = { PERM_PARTIAL_SUBMISSIONS: ( PERM_VIEW_SUBMISSIONS, PERM_CHANGE_SUBMISSIONS, PERM_DELETE_SUBMISSIONS, PERM_VALIDATE_SUBMISSIONS, PERM_MANAGE_ASSET, ), PERM_VIEW_SUBMISSIONS: (PERM_PARTIAL_SUBMISSIONS, ), PERM_CHANGE_SUBMISSIONS: (PERM_PARTIAL_SUBMISSIONS, ), PERM_DELETE_SUBMISSIONS: (PERM_PARTIAL_SUBMISSIONS, ), PERM_VALIDATE_SUBMISSIONS: (PERM_PARTIAL_SUBMISSIONS, ), } # Some permissions must be copied to KC KC_PERMISSIONS_MAP = { # keys are KPI's codenames, values are KC's PERM_CHANGE_SUBMISSIONS: 'change_xform', # "Can Edit" in KC UI PERM_VIEW_SUBMISSIONS: 'view_xform', # "Can View" in KC UI PERM_ADD_SUBMISSIONS: 'report_xform', # "Can submit to" in KC UI PERM_DELETE_SUBMISSIONS: 'delete_data_xform', # "Can Delete Data" in KC UI PERM_VALIDATE_SUBMISSIONS: 'validate_xform', # "Can Validate" in KC UI } KC_CONTENT_TYPE_KWARGS = {'app_label': 'logger', 'model': 'xform'} # KC records anonymous access as flags on the `XForm` KC_ANONYMOUS_PERMISSIONS_XFORM_FLAGS = { PERM_VIEW_SUBMISSIONS: { 'shared': True, 'shared_data': True } } def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # The two fields below are needed to keep a trace of the object state # before any alteration. See `__self.__copy_hidden_fields()` for details # They must be set with an invalid value for their counterparts to # be the comparison is accurate. self.__parent_id_copy = -1 self.__deployment_data_copy = None self.__copy_hidden_fields() def __str__(self): return '{} ({})'.format(self.name, self.uid) def adjust_content_on_save(self): """ This is called on save by default if content exists. Can be disabled / skipped by calling with parameter: asset.save(adjust_content=False) """ self._standardize(self.content) self._make_default_translation_first(self.content) self._strip_empty_rows(self.content) self._assign_kuids(self.content) self._autoname(self.content) self._unlink_list_items(self.content) self._remove_empty_expressions(self.content) settings = self.content['settings'] _title = settings.pop('form_title', None) id_string = settings.get('id_string') filename = self.summary.pop('filename', None) if filename: # if we have filename available, set the id_string # and/or form_title from the filename. if not id_string: id_string = sluggify_label(filename) settings['id_string'] = id_string if not _title: _title = filename if self.asset_type not in [ASSET_TYPE_SURVEY, ASSET_TYPE_TEMPLATE]: # instead of deleting the settings, simply clear them out self.content['settings'] = {} strip_kobo_locking_profile(self.content) if _title is not None: self.name = _title def clone(self, version_uid=None): # not currently used, but this is how "to_clone_dict" should work return Asset.objects.create(**self.to_clone_dict(version=version_uid)) def create_version(self) -> [AssetVersion, None]: """ Create a version of current asset. Asset has to belong to `ASSET_TYPE_WITH_CONTENT` otherwise no version is created and `None` is returned. """ if self.asset_type not in ASSET_TYPES_WITH_CONTENT: return return self.asset_versions.create( name=self.name, version_content=self.content, _deployment_data=self._deployment_data, # Any new version starts out as not-deployed, # even if the asset itself is already deployed. # Note: `asset_version.deployed` is set in the # serializer `DeploymentSerializer` deployed=False, ) @property def deployed_versions(self): return self.asset_versions.filter( deployed=True).order_by('-date_modified') @property def discoverable_when_public(self): # This property is only needed when `self` is a collection. # We want to make a distinction between a collection which is not # discoverable and an asset which is not a collection # (which implies cannot be discoverable) if self.asset_type != ASSET_TYPE_COLLECTION: return None return self.permissions.filter( permission__codename=PERM_DISCOVER_ASSET, user_id=settings.ANONYMOUS_USER_ID).exists() def get_filters_for_partial_perm( self, user_id: int, perm: str = PERM_VIEW_SUBMISSIONS) -> Union[list, None]: """ Returns the list of filters for a specific permission `perm` and this specific asset. `perm` can only one of the submission permissions. """ if (not perm.endswith(SUFFIX_SUBMISSIONS_PERMS) or perm == PERM_PARTIAL_SUBMISSIONS): raise BadPermissionsException( t('Only partial permissions for ' 'submissions are supported')) perms = self.get_partial_perms(user_id, with_filters=True) if perms: try: return perms[perm] except KeyError: # User has some partial permissions but not the good one. # Return a false condition to avoid showing any results. return [{'_id': -1}] return None def get_label_for_permission( self, permission_or_codename: Union[Permission, str]) -> str: """ Get the correct label for a permission (object or codename) based on the type of this asset """ try: codename = permission_or_codename.codename permission = permission_or_codename except AttributeError: codename = permission_or_codename permission = None try: label = self.ASSIGNABLE_PERMISSIONS_WITH_LABELS[codename] except KeyError: if permission: label = permission.name else: cached_code_names = get_cached_code_names() label = cached_code_names[codename]['name'] asset_type_label = self.ASSET_TYPE_LABELS_FOR_PERMISSIONS[ self.asset_type] try: # Some labels may be callables asset_type_label = asset_type_label(codename) except TypeError: # Others are just strings pass # For partial permissions, label is a dict. # There is no replacements to do in the nested labels, but these lines # are there to support in case we need it one day if isinstance(label, dict): labels = copy.deepcopy(label) for key_ in labels.keys(): labels[key_] = labels[key_].replace( '##asset_type_label##', # Raises TypeError if not coerced explicitly due to # ugettext_lazy() str(asset_type_label)) return labels else: return label.replace( '##asset_type_label##', # Raises TypeError if not coerced explicitly due to # ugettext_lazy() str(asset_type_label)) def get_partial_perms( self, user_id: int, with_filters: bool = False) -> Union[list, dict, None]: """ Returns the list of permissions the user is restricted to, for this specific asset. If `with_filters` is `True`, it returns a dict of permissions (as keys) and the filters (as values) to apply on query to narrow down the results. For example: `get_partial_perms(user1_obj.id)` would return ``` ['view_submissions',] ``` `get_partial_perms(user1_obj.id, with_filters=True)` would return ``` { 'view_submissions: [ {'_submitted_by': {'$in': ['user1', 'user2']}}, {'_submitted_by': 'user3'} ], } ``` If user doesn't have any partial permissions, it returns `None`. """ perms = self.asset_partial_permissions.filter(user_id=user_id)\ .values_list("permissions", flat=True).first() if perms: if with_filters: return perms else: return list(perms) return None @property def has_active_hooks(self): """ Returns if asset has active hooks. Useful to update `kc.XForm.has_kpi_hooks` field. :return: {boolean} """ return self.hooks.filter(active=True).exists() def has_subscribed_user(self, user_id): # This property is only needed when `self` is a collection. # We want to make a distinction between a collection which does not have # the subscribed user and an asset which is not a collection # (which implies cannot have subscriptions) if self.asset_type != ASSET_TYPE_COLLECTION: return None # ToDo: See if using a loop can reduce the number of SQL queries. return self.userassetsubscription_set.filter(user_id=user_id).exists() @property def latest_deployed_version(self): return self.deployed_versions.first() @property def latest_version(self): versions = None try: versions = self.prefetched_latest_versions except AttributeError: versions = self.asset_versions.order_by('-date_modified') try: return versions[0] except IndexError: return None @staticmethod def optimize_queryset_for_list(queryset): """ Used by serializers to improve performance when listing assets """ queryset = queryset.defer( # Avoid pulling these from the database because they are often huge # and we don't need them for list views. 'content', 'report_styles' ).select_related( # We only need `username`, but `select_related('owner__username')` # actually pulled in the entire `auth_user` table under Django 1.8. # In Django 1.9+, "select_related() prohibits non-relational fields # for nested relations." 'owner', ).prefetch_related( 'permissions__permission', 'permissions__user', # `Prefetch(..., to_attr='prefetched_list')` stores the prefetched # related objects in a list (`prefetched_list`) that we can use in # other methods to avoid additional queries; see: # https://docs.djangoproject.com/en/1.8/ref/models/querysets/#prefetch-objects Prefetch('tags', to_attr='prefetched_tags'), Prefetch( 'asset_versions', queryset=AssetVersion.objects.order_by('-date_modified').only( 'uid', 'asset', 'date_modified', 'deployed'), to_attr='prefetched_latest_versions', ), ) return queryset def refresh_from_db(self, using=None, fields=None): super().refresh_from_db(using=using, fields=fields) # Refresh hidden fields too self.__copy_hidden_fields(fields) def rename_translation(self, _from, _to): if not self._has_translations(self.content, 2): raise ValueError('no translations available') self._rename_translation(self.content, _from, _to) # todo: test and implement this method # todo 2019-04-25: Still needed, `revert_to_version` does the same? # def restore_version(self, uid): # _version_to_restore = self.asset_versions.get(uid=uid) # self.content = _version_to_restore.version_content # self.name = _version_to_restore.name def revert_to_version(self, version_uid): av = self.asset_versions.get(uid=version_uid) self.content = av.version_content self.save() def save(self, force_insert=False, force_update=False, update_fields=None, adjust_content=True, create_version=True, update_parent_languages=True, *args, **kwargs): is_new = self.pk is None if self.asset_type not in ASSET_TYPES_WITH_CONTENT: # so long as all of the operations in this overridden `save()` # method pertain to content, bail out if it's impossible for this # asset to have content in the first place super().save(force_insert=force_insert, force_update=force_update, update_fields=update_fields, *args, **kwargs) return if self.content is None: self.content = {} # in certain circumstances, we don't want content to # be altered on save. (e.g. on asset.deploy()) if adjust_content: self.adjust_content_on_save() # populate summary self._populate_summary() # infer asset_type only between question and block if self.asset_type in [ASSET_TYPE_QUESTION, ASSET_TYPE_BLOCK]: try: row_count = int(self.summary.get('row_count')) except TypeError: pass else: if row_count == 1: self.asset_type = ASSET_TYPE_QUESTION elif row_count > 1: self.asset_type = ASSET_TYPE_BLOCK self._populate_report_styles() # Ensure `_deployment_data` is not saved directly try: stored_data_key = self._deployment_data['_stored_data_key'] except KeyError: if self._deployment_data != self.__deployment_data_copy: raise DeploymentDataException else: if stored_data_key != self.deployment.stored_data_key: raise DeploymentDataException else: self._deployment_data.pop('_stored_data_key', None) self.__copy_hidden_fields() super().save(force_insert=force_insert, force_update=force_update, update_fields=update_fields, *args, **kwargs) # Update languages for parent and previous parent. # e.g. if a survey has been moved from one collection to another, # we want both collections to be updated. if self.parent is not None and update_parent_languages: if (self.parent_id != self.__parent_id_copy and self.__parent_id_copy is not None): try: previous_parent = Asset.objects.get( pk=self.__parent_id_copy) previous_parent.update_languages() self.__parent_id_copy = self.parent_id except Asset.DoesNotExist: pass # If object is new, we can add its languages to its parent without # worrying about removing its old values. It avoids an extra query. if is_new: self.parent.update_languages([self]) else: # Otherwise, because we cannot know which languages are from # this object, update will be performed with all parent's # children. self.parent.update_languages() if self.has_deployment: self.deployment.sync_media_files(AssetFile.PAIRED_DATA) if create_version: self.create_version() @property def snapshot(self): return self._snapshot(regenerate=False) @property def tag_string(self): try: tag_list = self.prefetched_tags except AttributeError: tag_names = self.tags.values_list('name', flat=True) else: tag_names = [t.name for t in tag_list] return ','.join(tag_names) @tag_string.setter def tag_string(self, value): intended_tags = value.split(',') self.tags.set(*intended_tags) def to_clone_dict(self, version: Union[str, AssetVersion] = None) -> dict: """ Returns a dictionary of the asset based on its version. :param version: Optional. It can be an object or its unique id :return dict """ if not isinstance(version, AssetVersion): if version: version = self.asset_versions.get(uid=version) else: version = self.asset_versions.first() if not version: version = self.create_version() return { 'name': version.name, 'content': version.version_content, 'asset_type': self.asset_type, 'tag_string': self.tag_string, } def to_ss_structure(self): return flatten_content(self.content, in_place=False) def update_languages(self, children=None): """ Updates object's languages by aggregating all its children's languages Args: children (list<Asset>): Optional. When specified, `children`'s languages are merged with `self`'s languages. Otherwise, when it's `None`, DB is fetched to build the list according to `self.children` """ # If object is not a collection, it should not have any children. # No need to go further. if self.asset_type != ASSET_TYPE_COLLECTION: return obj_languages = self.summary.get('languages', []) languages = set() if children: languages = set(obj_languages) children_languages = [ child.summary.get('languages') for child in children if child.summary.get('languages') ] else: children_languages = list( self.children.values_list( 'summary__languages', flat=True).exclude( Q(summary__languages=[]) | Q(summary__languages=[None])).order_by()) if children_languages: # Flatten `children_languages` to 1-dimension list. languages.update(reduce(add, children_languages)) languages.discard(None) # Object of type set is not JSON serializable languages = list(languages) # If languages are still the same, no needs to update the object if sorted(obj_languages) == sorted(languages): return self.summary['languages'] = languages self.save(update_fields=['summary']) @property def version__content_hash(self): # Avoid reading the property `self.latest_version` more than once, since # it may execute a database query each time it's read latest_version = self.latest_version if latest_version: return latest_version.content_hash @property def version_id(self): # Avoid reading the property `self.latest_version` more than once, since # it may execute a database query each time it's read latest_version = self.latest_version if latest_version: return latest_version.uid @property def version_number_and_date(self) -> str: # Returns the count of all deployed versions (plus one for the current # version if it is not deployed) and the date the asset was last # modified count = self.deployed_versions.count() if not self.latest_version.deployed: count = count + 1 return f'{count} {self.date_modified:(%Y-%m-%d %H:%M:%S)}' # TODO: take leading underscore off of `_snapshot()` and call it directly? # we would also have to remove or rename the `snapshot` property def versioned_snapshot( self, version_uid: str, root_node_name: Optional[str] = None) -> AssetSnapshot: return self._snapshot( regenerate=False, version_uid=version_uid, root_node_name=root_node_name, ) def _populate_report_styles(self): default = self.report_styles.get(DEFAULT_REPORTS_KEY, {}) specifieds = self.report_styles.get(SPECIFIC_REPORTS_KEY, {}) kuids_to_variable_names = self.report_styles.get('kuid_names', {}) for (index, row) in enumerate(self.content.get('survey', [])): if '$kuid' not in row: if 'name' in row: row['$kuid'] = json_hash([self.uid, row['name']]) else: row['$kuid'] = json_hash([self.uid, index, row]) _identifier = row.get('name', row['$kuid']) kuids_to_variable_names[_identifier] = row['$kuid'] if _identifier not in specifieds: specifieds[_identifier] = {} self.report_styles = { DEFAULT_REPORTS_KEY: default, SPECIFIC_REPORTS_KEY: specifieds, 'kuid_names': kuids_to_variable_names, } def _populate_summary(self): if self.content is None: self.content = {} self.summary = {} return analyzer = AssetContentAnalyzer(**self.content) self.summary = analyzer.summary @transaction.atomic def _snapshot( self, regenerate: bool = True, version_uid: Optional[str] = None, root_node_name: Optional[str] = None, ) -> AssetSnapshot: if version_uid: asset_version = self.asset_versions.get(uid=version_uid) else: asset_version = self.latest_version try: snapshot = AssetSnapshot.objects.get(asset=self, asset_version=asset_version) if regenerate: snapshot.delete() snapshot = False except AssetSnapshot.MultipleObjectsReturned: # how did multiple snapshots get here? snaps = AssetSnapshot.objects.filter(asset=self, asset_version=asset_version) snaps.delete() snapshot = False except AssetSnapshot.DoesNotExist: snapshot = False if not snapshot: try: form_title = asset_version.form_title content = asset_version.version_content except AttributeError: form_title = self.form_title content = self.content settings_ = {'form_title': form_title} if root_node_name: # `name` may not sound like the right setting to control the # XML root node name, but it is, according to the XLSForm # specification: # https://xlsform.org/en/#specify-xforms-root-node-name settings_['name'] = root_node_name settings_['id_string'] = root_node_name self._append(content, settings=settings_) snapshot = AssetSnapshot.objects.create( asset=self, asset_version=asset_version, source=content) return snapshot def _update_partial_permissions( self, user: '******', perm: str, remove: bool = False, partial_perms: Optional[dict] = None, ): """ Stores, updates, and removes permissions that apply only to a subset of submissions in a project (also called row-level permissions or partial permissions). If `perm = PERM_PARTIAL_SUBMISSIONS`, it must be accompanied by `partial_perms`, which is a dictionary of permissions mapped to MongoDB filters. Each key of that dictionary is a permission string (codename), and each value is a list of MongoDB queries that specify which submissions the permission affects. A submission is affected if it matches *ANY* of the queries in the list. For example, to allow `user` to edit submissions made by 'alice' or 'bob', and to allow `user` also to validate only submissions made by 'bob', the following `partial_perms` could be used: ``` { 'change_submissions': [{ '_submitted_by': { '$in': [ 'alice', 'bob' ] } }], 'validate_submissions': [{ '_submitted_by': 'bob' }], } ``` If `perm` is something other than `PERM_PARTIAL_SUBMISSIONS`, and that permission contradicts `PERM_PARTIAL_SUBMISSIONS`, *all* partial permission assignments for `user` on this asset are removed from the database. If the permission does not conflict, no action is taken. `remove = True` deletes all partial permissions assignments for `user` on this asset. """ def clean_up_table(): # Because of the unique constraint, there should be only # one record that matches this query. # We don't look for record existence to avoid extra query. self.asset_partial_permissions.filter(user_id=user.pk).delete() if perm == PERM_PARTIAL_SUBMISSIONS: if remove: clean_up_table() return if user.pk == self.owner.pk: raise BadPermissionsException( t("Can not assign '{}' permission to owner".format(perm))) if not partial_perms: raise BadPermissionsException( t("Can not assign '{}' permission. " "Partial permissions are missing.".format(perm))) new_partial_perms = AssetUserPartialPermission\ .update_partial_perms_to_include_implied( self, partial_perms ) AssetUserPartialPermission.objects.update_or_create( asset_id=self.pk, user_id=user.pk, defaults={'permissions': new_partial_perms}) # There are no real partial permissions for 'add_submissions' but # 'change_submissions' implies it. So if 'add_submissions' is in the # partial permissions list, it must be assigned to the user to the # user as well to let them perform edit actions on their subset of # data. Otherwise, KC will reject some actions. if PERM_ADD_SUBMISSIONS in new_partial_perms: self.assign_perm(user_obj=user, perm=PERM_ADD_SUBMISSIONS, defer_recalc=True) elif perm in self.CONTRADICTORY_PERMISSIONS.get( PERM_PARTIAL_SUBMISSIONS): clean_up_table() def __copy_hidden_fields(self, fields: Optional[list] = None): """ Save a copy of `parent_id` and `_deployment_data` for these purposes `save()` respectively. - `self.__parent_id_copy` is used to detect whether asset is linked a different parent - `self.__deployment_data_copy` is used to detect whether `_deployment_data` has been altered directly """ # When fields are deferred, Django instantiates another copy # of the current Asset object to retrieve the value of the # requested field. Because we need to get a copy at the very # first beginning of the life of the object, this method is # called in the object constructor. Thus, trying to copy # deferred fields would create an infinite loop. # If `fields` is provided, fields are no longer deferred and should be # copied right away. if (fields is None and 'parent_id' not in self.get_deferred_fields() or fields and 'parent_id' in fields): self.__parent_id_copy = self.parent_id if (fields is None and '_deployment_data' not in self.get_deferred_fields() or fields and '_deployment_data' in fields): self.__deployment_data_copy = copy.deepcopy(self._deployment_data)
class AssetSnapshot( models.Model, AbstractFormList, XlsExportableMixin, FormpackXLSFormUtilsMixin, ): """ This model serves as a cache of the XML that was exported by the installed version of pyxform. TODO: come up with a policy to clear this cache out. DO NOT: depend on these snapshots existing for more than a day until a policy is set. Done with https://github.com/kobotoolbox/kpi/pull/2434. Remove above lines when PR is merged """ xml = models.TextField() source = JSONBField(default=dict) details = JSONBField(default=dict) owner = models.ForeignKey('auth.User', related_name='asset_snapshots', null=True, on_delete=models.CASCADE) asset = models.ForeignKey('Asset', null=True, on_delete=models.CASCADE) _reversion_version_id = models.IntegerField(null=True) asset_version = models.OneToOneField('AssetVersion', on_delete=models.CASCADE, null=True) date_created = models.DateTimeField(auto_now_add=True) uid = KpiUidField(uid_prefix='s') @property def content(self): return self.source @property def description(self): """ Implements `OpenRosaFormListInterface.description` """ return self.asset.settings.get('description', '') @property def form_id(self): """ Implements `OpenRosaFormListInterface.form_id()` """ return self.uid def get_download_url(self, request): """ Implements `OpenRosaFormListInterface.get_download_url()` """ return reverse( viewname='assetsnapshot-detail', format='xml', kwargs={'uid': self.uid}, request=request ) def get_manifest_url(self, request): """ Implements `OpenRosaFormListInterface.get_manifest_url()` """ return reverse( viewname='assetsnapshot-manifest', format='xml', kwargs={'uid': self.uid}, request=request ) @property def md5_hash(self): """ Implements `OpenRosaFormListInterface.md5_hash()` """ return f'{calculate_hash(self.xml, prefix=True)}' @property def name(self): """ Implements `OpenRosaFormListInterface.name()` """ return self.asset.name def save(self, *args, **kwargs): if self.asset is not None: # Previously, `self.source` was a nullable field. It must now # either contain valid content or be an empty dictionary. assert self.asset is not None if not self.source: if self.asset_version is None: self.asset_version = self.asset.latest_version self.source = self.asset_version.version_content if self.owner is None: self.owner = self.asset.owner _note = self.details.pop('note', None) _source = copy.deepcopy(self.source) self._standardize(_source) self._make_default_translation_first(_source) self._strip_empty_rows(_source) self._autoname(_source) self._remove_empty_expressions(_source) # TODO: move these inside `generate_xml_from_source()`? _settings = _source.get('settings', {}) form_title = _settings.get('form_title') id_string = _settings.get('id_string') root_node_name = _settings.get('name') self.xml, self.details = self.generate_xml_from_source( _source, include_note=_note, root_node_name=root_node_name, form_title=form_title, id_string=id_string, ) self.source = _source return super().save(*args, **kwargs) def generate_xml_from_source(self, source, include_note=False, root_node_name=None, form_title=None, id_string=None): if not root_node_name: if self.asset and self.asset.uid: root_node_name = self.asset.uid else: root_node_name = 'snapshot_xml' if not form_title: if self.asset and self.asset.name: form_title = self.asset.name else: form_title = 'Snapshot XML' if id_string is None: id_string = root_node_name if include_note and 'survey' in source: _translations = source.get('translations', []) _label = include_note if len(_translations) > 0: _label = [_label for t in _translations] source['survey'].append({'type': 'note', 'name': 'prepended_note', 'label': _label}) source_copy = copy.deepcopy(source) self._expand_kobo_qs(source_copy) self._populate_fields_with_autofields(source_copy) self._strip_kuids(source_copy) allow_choice_duplicates(source_copy) warnings = [] details = {} try: xml = FormPack({'content': source_copy}, root_node_name=root_node_name, id_string=id_string, title=form_title)[0].to_xml(warnings=warnings) details.update({ 'status': 'success', 'warnings': warnings, }) except Exception as err: err_message = str(err) logging.error('Failed to generate xform for asset', extra={ 'src': source, 'id_string': id_string, 'uid': self.uid, '_msg': err_message, 'warnings': warnings, }) xml = '' details.update({ 'status': 'failure', 'error_type': type(err).__name__, 'error': err_message, 'warnings': warnings, }) return xml, details
class HookLog(models.Model): hook = models.ForeignKey("Hook", related_name="logs", on_delete=models.CASCADE) uid = KpiUidField(uid_prefix="hl") instance_id = models.IntegerField( default=0, db_index=True) # `kc.logger.Instance.id`. tries = models.PositiveSmallIntegerField(default=0) status = models.PositiveSmallIntegerField( default=HOOK_LOG_PENDING ) # Could use status_code, but will speed-up queries. status_code = models.IntegerField(default=KOBO_INTERNAL_ERROR_STATUS_CODE, null=True, blank=True) message = models.TextField(default="") date_created = models.DateTimeField(auto_now_add=True) date_modified = models.DateTimeField(auto_now_add=True) class Meta: ordering = ["-date_created"] def can_retry(self): """ Returns whether instance can be resent to external endpoint. Notice: even if returns false, `self.retry()` can be triggered. :return: bool """ if self.hook.active: seconds = HookLog.get_elapsed_seconds( constance.config.HOOK_MAX_RETRIES) threshold = timezone.now() - timedelta(seconds=seconds) # We can retry only if system has already tried 3 times. # If log is still pending after 3 times, there was an issue, we allow the retry return self.status == HOOK_LOG_FAILED or \ (self.date_modified < threshold and self.status == HOOK_LOG_PENDING) return False def change_status(self, status=HOOK_LOG_PENDING, message=None, status_code=None): self.status = status if message: self.message = message if status_code: self.status_code = status_code self.save(reset_status=True) @staticmethod def get_elapsed_seconds(retries_count): """ Calculate number of elapsed seconds since first try :param retries_count: int. :return: int. Number of seconds """ # We need to sum all seconds between each retry seconds = 0 for retries_count in range(retries_count): seconds += HookLog.get_remaining_seconds( retries_count) # Range is zero-indexed return seconds @staticmethod def get_remaining_seconds(retries_count): """ Calculate number of remaining seconds before next retry :param retries_count: int. :return: int. Number of seconds """ return 60 * (10**retries_count) def retry(self): """ Retries to send data to external service :return: boolean """ try: ServiceDefinition = self.hook.get_service_definition() service_definition = ServiceDefinition(self.hook, self.instance_id) service_definition.send() self.refresh_from_db() except Exception as e: logging.error("HookLog.retry - {}".format(str(e)), exc_info=True) self.change_status(HOOK_LOG_FAILED) return False return True def save(self, *args, **kwargs): # Update date_modified each time object is saved self.date_modified = timezone.now() # We don't want to alter tries when we only change the status if kwargs.pop("reset_status", False) is False: self.tries += 1 self.hook.reset_totals() super().save(*args, **kwargs) @property def status_str(self): if self.status == HOOK_LOG_PENDING: return "Pending" elif self.status == HOOK_LOG_FAILED: return "Failed" elif self.status == HOOK_LOG_SUCCESS: return "Success" def __str__(self): return "<HookLog {uid}>".format(uid=self.uid)
class Hook(models.Model): # Export types XML = "xml" JSON = "json" # Authentication levels NO_AUTH = "no_auth" BASIC_AUTH = "basic_auth" # Export types list EXPORT_TYPE_CHOICES = ((XML, XML), (JSON, JSON)) # Authentication levels list AUTHENTICATION_LEVEL_CHOICES = ((NO_AUTH, NO_AUTH), (BASIC_AUTH, BASIC_AUTH)) asset = models.ForeignKey("kpi.Asset", related_name="hooks", on_delete=models.CASCADE) uid = KpiUidField(uid_prefix="h") name = models.CharField(max_length=255, blank=False) endpoint = models.CharField(max_length=500, blank=False) active = models.BooleanField(default=True) export_type = models.CharField(choices=EXPORT_TYPE_CHOICES, default=JSON, max_length=10) auth_level = models.CharField(choices=AUTHENTICATION_LEVEL_CHOICES, default=NO_AUTH, max_length=10) settings = JSONBField(default=dict) date_created = models.DateTimeField(default=timezone.now) date_modified = models.DateTimeField(default=timezone.now) class Meta: ordering = ["name"] def __init__(self, *args, **kwargs): self.__totals = {} return super(Hook, self).__init__(*args, **kwargs) def save(self, *args, **kwargs): # Update date_modified each time object is saved self.date_modified = timezone.now() super(Hook, self).save(*args, **kwargs) def __unicode__(self): return u"%s:%s - %s" % (self.asset, self.name, self.endpoint) def get_service_definition(self): mod = import_module("kobo.apps.hook.services.service_{}".format( self.export_type)) return getattr(mod, "ServiceDefinition") @property def success_count(self): if not self.__totals: self._get_totals() return self.__totals.get(HOOK_LOG_SUCCESS) @property def failed_count(self): if not self.__totals: self._get_totals() return self.__totals.get(HOOK_LOG_FAILED) @property def pending_count(self): if not self.__totals: self._get_totals() return self.__totals.get(HOOK_LOG_PENDING) def _get_totals(self): # TODO add some cache queryset = self.logs.values("status").annotate( values_count=models.Count("status")) queryset.query.clear_ordering(True) # Initialize totals self.__totals = { HOOK_LOG_SUCCESS: 0, HOOK_LOG_FAILED: 0, HOOK_LOG_PENDING: 0 } for record in queryset: self.__totals[record.get("status")] = record.get("values_count") def reset_totals(self): # TODO remove cache when it's enabled self.__totals = {}
class AssetSnapshot(models.Model, XlsExportable, FormpackXLSFormUtils): """ This model serves as a cache of the XML that was exported by the installed version of pyxform. TODO: come up with a policy to clear this cache out. DO NOT: depend on these snapshots existing for more than a day until a policy is set. Done with https://github.com/kobotoolbox/kpi/pull/2434. Remove above lines when PR is merged """ xml = models.TextField() source = JSONField(default=dict) details = JSONField(default=dict) owner = models.ForeignKey('auth.User', related_name='asset_snapshots', null=True, on_delete=models.CASCADE) asset = models.ForeignKey(Asset, null=True, on_delete=models.CASCADE) _reversion_version_id = models.IntegerField(null=True) asset_version = models.OneToOneField('AssetVersion', on_delete=models.CASCADE, null=True) date_created = models.DateTimeField(auto_now_add=True) uid = KpiUidField(uid_prefix='s') @property def content(self): return self.source def save(self, *args, **kwargs): if self.asset is not None: if self.source is None: if self.asset_version is None: self.asset_version = self.asset.latest_version self.source = self.asset_version.version_content if self.owner is None: self.owner = self.asset.owner _note = self.details.pop('note', None) _source = copy.deepcopy(self.source) if _source is None: _source = {} self._standardize(_source) self._make_default_translation_first(_source) self._strip_empty_rows(_source) self._autoname(_source) self._remove_empty_expressions(_source) _settings = _source.get('settings', {}) form_title = _settings.get('form_title') id_string = _settings.get('id_string') (self.xml, self.details) = \ self.generate_xml_from_source(_source, include_note=_note, root_node_name='data', form_title=form_title, id_string=id_string) self.source = _source return super().save(*args, **kwargs) def generate_xml_from_source(self, source, include_note=False, root_node_name='snapshot_xml', form_title=None, id_string=None): if form_title is None: form_title = 'Snapshot XML' if id_string is None: id_string = 'snapshot_xml' if include_note and 'survey' in source: _translations = source.get('translations', []) _label = include_note if len(_translations) > 0: _label = [_label for t in _translations] source['survey'].append({ 'type': 'note', 'name': 'prepended_note', 'label': _label }) source_copy = copy.deepcopy(source) self._expand_kobo_qs(source_copy) self._populate_fields_with_autofields(source_copy) self._strip_kuids(source_copy) warnings = [] details = {} try: xml = FormPack({'content': source_copy}, root_node_name=root_node_name, id_string=id_string, title=form_title)[0].to_xml(warnings=warnings) details.update({ 'status': 'success', 'warnings': warnings, }) except Exception as err: err_message = str(err) logging.error('Failed to generate xform for asset', extra={ 'src': source, 'id_string': id_string, 'uid': self.uid, '_msg': err_message, 'warnings': warnings, }) xml = '' details.update({ 'status': 'failure', 'error_type': type(err).__name__, 'error': err_message, 'warnings': warnings, }) return xml, details