def hash(self, request): """ Creates an hash of `version_id` of all accessible assets by the user. Useful to detect changes between each request. :param request: :return: JSON """ user = self.request.user if user.is_anonymous: raise exceptions.NotAuthenticated() else: accessible_assets = (get_objects_for_user( user, 'view_asset', Asset).filter(asset_type=ASSET_TYPE_SURVEY).order_by("uid")) assets_version_ids = [ asset.version_id for asset in accessible_assets if asset.version_id is not None ] # Sort alphabetically assets_version_ids.sort() if len(assets_version_ids) > 0: hash_ = calculate_hash(''.join(assets_version_ids), algorithm='md5') else: hash_ = '' return Response({'hash': hash_})
def test_version_content_hash(self): _content = { 'survey': [{ 'type': 'note', 'label': 'Read me', 'name': 'n1' }], } new_asset = Asset.objects.create(asset_type='survey', content=_content) expected_hash = calculate_hash( json.dumps(new_asset.content, sort_keys=True), 'sha1') self.assertEqual(new_asset.latest_version.content_hash, expected_hash) return new_asset
def set_md5_hash(self, md5_hash: Optional[str] = None): """ Calculate md5 hash and store it in `metadata` field if it does not exist or empty. Value can be also set with the optional `md5_hash` parameter. If `md5_hash` is an empty string, the hash is recalculated. """ if md5_hash is not None: self.metadata['hash'] = md5_hash if not self.metadata.get('hash'): if self.is_remote_url: md5_hash = calculate_hash(self.metadata['redirect_url'], prefix=True) else: try: md5_hash = calculate_hash(self.content.file.read(), prefix=True) except ValueError: md5_hash = None self.metadata['hash'] = md5_hash
def md5_hash(self): """ Implements: - `OpenRosaManifestInterface.md5_hash()` - `SyncBackendMediaInterface.md5_hash()` """ if self.asset_file: # If an AssetFile object is attached to this object, return its hash return self.asset_file.md5_hash else: # Fallback on this custom hash which does NOT represent the real # content but changes everytime to force its synchronization with # the deployment back end. # AssetFile object will be created on call to 'xml-external' endpoint return calculate_hash( f'{str(time.time())}.{self.backend_media_id}', prefix=True ) + '-time' return self.asset_file.md5_hash
def content_hash(self): # used to determine changes in the content from version to version # not saved, only compared with other asset_versions _json_string = json.dumps(self.version_content, sort_keys=True) return calculate_hash(_json_string, 'sha1')
def sluggify(_str, _opts): """ this method is ported over from coffeescript: jsapp/xlform/src/model.utils.coffee """ _initial = _str if _str == '': return '' opts = dict(DEFAULT_OPTS, **_opts) if opts['lrstrip']: _str = _str.strip() elif opts['lstrip']: _str = _str.lstrip() elif opts['rstrip']: _str = _str.rstrip() if opts['lowerCase']: _str = _str.lower() if opts['underscores']: _str = re.sub(r'\s', '_', _str) # .replace(/[_]+/g, "_") <- replaces duplicates? if opts['replaceNonWordCharacters']: if opts['nonWordCharsExceptions']: regex = r'[^a-zA-Z0-9_{}]'.format(opts['nonWordCharsExceptions']) else: regex = r'[^a-zA-Z0-9_]+' # Cannot use `\W`. Different behaviour with Python 2 & 3 _str = re.sub(regex, '_', _str) if _str != '_' and re.search('_$', _str): _str = re.sub('_$', '', _str) if opts['characterLimit']: _limit = opts['characterLimit'] if opts['characterLimit_shorten_method'] == 'ends': _str = _shorten_long_name(_str, _limit, join_with='_') else: _str = _str[0:opts['characterLimit']] if opts['validXmlTag']: if re.search(r'^\d', _str): _str = '_' + _str if opts['preventDuplicateUnderscores']: while re.search('__', _str): _str = re.sub('__', '_', _str) names = opts.get('other_names', opts['preventDuplicates']) if isinstance(names, list): names_lc = [name.lower() for name in names] attempt_base = _str if len(attempt_base) == 0: # empty string because arabic / cyrillic characters _str = 'h{}'.format(calculate_hash(_initial[0:7])[0:7]) attempt = attempt_base incremented = 0 while attempt.lower() in names_lc: incremented += 1 attempt = "{0}_{1:03d}".format(attempt_base, incremented) _str = attempt return _str
def md5_hash(self): return calculate_hash(self.xml)
def gravatar_url(email, https=True): return "%s://www.gravatar.com/avatar/%s?%s" % ( 'https' if https else 'http', calculate_hash(email.lower()), urlencode({'s': '40'}), )
def external(self, request, paired_data_uid, **kwargs): """ Returns an XML which contains data submitted to paired asset Creates the endpoints - /api/v2/assets/<parent_lookup_asset>/paired-data/<paired_data_uid>/external/ - /api/v2/assets/<parent_lookup_asset>/paired-data/<paired_data_uid>/external.xml/ """ paired_data = self.get_object() # Retrieve the source if it exists source_asset = paired_data.get_source() if not source_asset: # We can enter this condition when source data sharing has been # deactivated after it has been paired with current form. # We don't want to keep zombie files on storage. try: asset_file = self.asset.asset_files.get(uid=paired_data_uid) except AssetFile.DoesNotExist: pass else: asset_file.delete() raise Http404 if not source_asset.has_deployment or not self.asset.has_deployment: raise Http404 old_hash = None # Retrieve data from related asset file. # If data has already been fetched once, an `AssetFile` should exist. # Otherwise, we create one to store the generated XML. try: asset_file = self.asset.asset_files.get(uid=paired_data_uid) except AssetFile.DoesNotExist: asset_file = AssetFile( uid=paired_data_uid, asset=self.asset, file_type=AssetFile.PAIRED_DATA, user=self.asset.owner, ) # When asset file is new, we consider its content as expired to # force its creation below has_expired = True else: if not asset_file.content: # if `asset_file` exists but does not have any content, it means # `paired_data` has changed since last time this endpoint has been # called. E.g.: Project owner has changed the questions they want # to include in the `xml-external` file has_expired = True else: old_hash = asset_file.md5_hash timedelta = timezone.now() - asset_file.date_modified has_expired = (timedelta.total_seconds() > settings.PAIRED_DATA_EXPIRATION) # ToDo evaluate adding headers for caching and a HTTP 304 status code if not has_expired: return Response(asset_file.content.file.read().decode()) # If the content of `asset_file' has expired, let's regenerate the XML submissions = source_asset.deployment.get_submissions( self.asset.owner, format_type=SUBMISSION_FORMAT_TYPE_XML) parsed_submissions = [] for submission in submissions: # Use `rename_root_node_to='data'` to rename the root node of each # submission to `data` so that form authors do not have to rewrite # their `xml-external` formulas any time the asset UID changes, # e.g. when cloning a form or creating a project from a template. # Set `use_xpath=True` because `paired_data.fields` uses full group # hierarchies, not just question names. parsed_submissions.append( strip_nodes( submission, paired_data.allowed_fields, use_xpath=True, rename_root_node_to='data', )) filename = paired_data.filename parsed_submissions_to_str = ''.join(parsed_submissions) root_tag_name = SubmissionXMLRenderer.root_tag_name xml_ = add_xml_declaration(f'<{root_tag_name}>' f'{parsed_submissions_to_str}' f'</{root_tag_name}>') if not parsed_submissions: # We do not want to cache an empty file return Response(xml_) # We need to delete the current file (if it exists) when filename # has changed. Otherwise, it would leave an orphan file on storage if asset_file.pk and asset_file.content.name != filename: asset_file.content.delete() asset_file.content = ContentFile(xml_.encode(), name=filename) # `xml_` is already there in memory, let's use its content to get its # hash and store it within `asset_file` metadata asset_file.set_md5_hash(calculate_hash(xml_, prefix=True)) asset_file.save() if old_hash != asset_file.md5_hash: # resync paired data to the deployment backend self.asset.deployment.sync_media_files(AssetFile.PAIRED_DATA) return Response(xml_)