def view_xml(request): """Displays XML representation of resource. """ urn = request.ductus.resource.urn return StreamingHttpResponse(get_resource_database().get_xml(urn), content_type='application/xml; charset=utf-8')
def validate(self): from ductus.resource import get_resource_database from ductus.resource.ductmodels import DuctModelMismatchError resource_database = get_resource_database() for r in self._re_objects: match = r.match(self.uri) if match is not None: hash_type, digest = match.group(1), match.group(2) urn = "urn:%s:%s" % (hash_type, digest) if urn not in resource_database: raise forms.ValidationError( _(u"This urn cannot be found on the server you are currently accessing." )) try: Picture.load(urn) except DuctModelMismatchError: raise forms.ValidationError( _(u"This urn represents content that is not a picture." )) # fixme: handle exception raised by get_resource_object if it's # actually a blob self.urn = urn return # this should never be reached assert self.handles(self.uri)
def edit_textwiki(request): resource_database = get_resource_database() if request.method == 'POST': handle_blueprint_post(request, Wikitext) resource = None if hasattr(request, 'ductus') and getattr(request.ductus, 'resource', None): resource = request.ductus.resource # handle old creole content: make it look like ductus-html5 so we can edit it # content is not saved to creole anymore, only to ductus-html5 if resource.blob.markup_language == 'creole-1.0': resource.blob.markup_language = 'ductus-html5' resource.text = creole(resource.text, resource.blob.natural_language) return render_to_response('textwiki/edit_wiki.html', { 'resource_json': resource, 'writable_directories': get_writable_directories_for_user(request.user), }, context_instance=RequestContext(request))
def view_xml(request): """Displays XML representation of resource. """ urn = request.ductus.resource.urn return HttpResponse(list(get_resource_database().get_xml(urn)), # see django #6527 content_type='application/xml; charset=utf-8')
def view_xml_as_text(request): """Displays XML representation of resource in text/plain format. """ urn = request.ductus.resource.urn return HttpResponse(list(get_resource_database().get_xml(urn)), # see django #6527 content_type='text/plain; charset=utf-8')
def _mediacache_view(pathname, query_string): m = _pathname_re.match(pathname) or _alternate_pathname_re.match(pathname) if m is None: raise Http404("pathname does not match mediacache re") hash_type = m.group('hash_type') digest = m.group('digest') additional_args = m.group('additional_args') extension = m.group('extension') try: mime_type = ext_to_mime[extension] except KeyError: raise Http404("unknown extension") blob_urn = 'urn:%s:%s' % (hash_type, digest) # if the file exists on the filesystem, serve it! data_iterator = get(blob_urn, mime_type, additional_args) if data_iterator: # fixme: possibly log a warning if we're in deploy mode response = StreamingHttpResponse(data_iterator, content_type=mime_type) response["X-Ductus-Mediacache"] = "served" return response if not query_string: raise Http404("the urn of the resource that references this blob should be given as the query string") resource_database = get_resource_database() try: resource = resource_database.get_resource_object(query_string) except KeyError: raise Http404("the query string does not reference an existing urn") return _do_mediacache_view_serve(blob_urn, mime_type, additional_args, resource)
def five_sec_widget(request, pagename): """display a `five seconds widget` as specified by the query parameters. Also handle POST requests from the widget, saving blueprints and performing related updates. """ if request.method == 'POST': new_fc_urn = handle_blueprint_post(request, Flashcard) # temp hack for FSI, manually update the lesson we took the flashcard from from django.utils.safestring import mark_safe from ductus.resource.ductmodels import BlueprintSaveContext from ductus.wiki.views import _fully_handle_blueprint_post try: url = request.POST['fsi_url'] card_index = int(request.POST['fsi_index']) except KeyError: raise ValidationError( "the widget should provide FSI specific fields") page = WikiPage.objects.get(name=url) revision = page.get_latest_revision() urn = 'urn:' + revision.urn resource_database = get_resource_database() old_fcd = resource_database.get_resource_object(urn) fcd_bp = json.loads(resource_json(old_fcd)) # remove href and add a @patch statement so that the blueprint updates the database fcd_bp['resource']['@patch'] = urn del fcd_bp['href'] # set the flashcard href saved above fcd_bp['resource']['cards']['array'][card_index][ 'href'] = new_fc_urn.urn # remove all 'resource' keys in the blueprint as ResourceElement ignores the hrefs otherwise for fc in fcd_bp['resource']['cards']['array']: del fc['resource'] for interaction in fcd_bp['resource']['interactions']['array']: del interaction['resource'] request.POST = request.POST.copy() request.POST['blueprint'] = json.dumps(fcd_bp) request.POST['log_message'] = '5sec widget (subtitle)' prefix, pagename = url.split(':') response = _fully_handle_blueprint_post(request, prefix, pagename) return response # define what to do when GETing /special/five-sec-widget?method=something if request.method == 'GET': methods = { 'get-audio-to-subtitle': fsw_get_audio_to_subtitle, 'get-phrase-to-record': fsw_get_phrase_to_record, } try: return methods[request.GET.get('method', None)](request) except KeyError: pass return render_to_response('flashcards/five_sec_widget.html', {}, RequestContext(request))
def five_sec_widget(request, pagename): """display a `five seconds widget` as specified by the query parameters. Also handle POST requests from the widget, saving blueprints and performing related updates. """ if request.method == 'POST': new_fc_urn = handle_blueprint_post(request, Flashcard) # temp hack for FSI, manually update the lesson we took the flashcard from from django.utils.safestring import mark_safe from ductus.resource.ductmodels import BlueprintSaveContext from ductus.wiki.views import _fully_handle_blueprint_post try: url = request.POST['fsi_url'] card_index = int(request.POST['fsi_index']) except KeyError: raise ValidationError("the widget should provide FSI specific fields") page = WikiPage.objects.get(name=url) revision = page.get_latest_revision() urn = 'urn:' + revision.urn resource_database = get_resource_database() old_fcd = resource_database.get_resource_object(urn) fcd_bp = json.loads(resource_json(old_fcd)) # remove href and add a @patch statement so that the blueprint updates the database fcd_bp['resource']['@patch'] = urn del fcd_bp['href'] # set the flashcard href saved above fcd_bp['resource']['cards']['array'][card_index]['href'] = new_fc_urn.urn # remove all 'resource' keys in the blueprint as ResourceElement ignores the hrefs otherwise for fc in fcd_bp['resource']['cards']['array']: del fc['resource'] for interaction in fcd_bp['resource']['interactions']['array']: del interaction['resource'] request.POST = request.POST.copy() request.POST['blueprint'] = json.dumps(fcd_bp) request.POST['log_message'] = '5sec widget (subtitle)' prefix, pagename = url.split(':') response = _fully_handle_blueprint_post(request, prefix, pagename) return response # define what to do when GETing /special/five-sec-widget?method=something if request.method == 'GET': methods = { 'get-audio-to-subtitle': fsw_get_audio_to_subtitle, 'get-phrase-to-record': fsw_get_phrase_to_record, } try: return methods[request.GET.get('method', None)](request) except KeyError: pass return render_to_response('flashcards/five_sec_widget.html', { }, RequestContext(request))
def main_document_view(request, urn=None, wiki_page=None, wiki_revision=None): """Dispatches the appropriate view for a resource/page """ requested_view = request.GET.get('view', None) resource_database = get_resource_database() if requested_view == 'raw': etag = __handle_etag(request, ['raw', urn], weak=False) # fixme: we may also want to set last-modified, expires, max-age try: data_iterator = resource_database[urn] except KeyError: raise Http404 response = HttpResponse(list(data_iterator), # see django #6527 content_type='application/octet-stream') response["ETag"] = etag return response if request.method == "GET": unvaried_etag = [urn, bool(wiki_page), request.META.get("QUERY_STRING", "")] varied_etag = unvaried_etag + [request.LANGUAGE_CODE, bool(request.is_secure()), request.META.get("HTTP_COOKIE", "")] unvaried_etag = __handle_etag(request, unvaried_etag) varied_etag = __handle_etag(request, varied_etag) try: resource = resource_database.get_resource_object(urn) except KeyError: raise Http404("resource does not exist") except UnexpectedHeader as e: raise Http404(str(e)) request.ductus = DuctusRequestInfo(resource, requested_view, wiki_page, wiki_revision) try: f = registered_views[resource.fqn][requested_view] except KeyError: try: f = registered_views[None][requested_view] except KeyError: return query_string_not_found(request) if not f.meets_requirements(request.ductus): return query_string_not_found(request) response = f(request) if request.method == "GET" and not response.has_header("ETag"): if getattr(response, "_unvarying", False): response["ETag"] = unvaried_etag else: vary_headers = set([h.strip().lower() for h in response.get("Vary", "").split(',') if h]) if vary_headers.issubset(set(['cookie', 'accept-language'])): response["ETag"] = varied_etag return response
def save(self, *args, **kwargs): # fixme: See Django #6845. We may need to move these tests to a # special validation function some day. assert not self.urn.startswith('urn:') if self.urn and ('urn:%s' % self.urn) not in get_resource_database(): raise exceptions.ValidationError(_("urn is not in database: urn:%s") % self.urn) if (not self.author) and (not self.author_ip): raise exceptions.ValidationError(_("A user or IP address must be given when saving a revision")) return super(WikiRevision, self).save(*args, **kwargs)
def verify(collection, urn, current_wikipages_list, force_update=False): """Updates a urn's indexing info and returns the set of its recursive links. `collection`: the mongo collection to use as returned by ``get_indexing_mongo_database()``. `urn`: the urn to update the index for, starting with "urn:". `wikipages_url_list` is the sorted list of urls pointing to `urn`. `force_update`: set to True to update the index even if `urn` is already in the index (defaults to ``False``). """ if not force_update: q = collection.find_one({"urn": urn}, {"recursive_links": 1}) if q: try: return set(q["recursive_links"]) except KeyError: return set() resource_database = get_resource_database() try: tree = resource_database.get_xml_tree(urn) except UnexpectedHeader: # it must be a blob perform_upsert(collection, urn, {"fqn": None}) return set() links = set() for event, element in etree.iterwalk(tree): if '{http://www.w3.org/1999/xlink}href' in element.attrib and element.getparent( ).tag != '{http://ductus.us/ns/2009/ductus}parents': link = element.attrib['{http://www.w3.org/1999/xlink}href'] if link.startswith('urn:%s:' % hash_name): links.add(link) recursive_links = set(links) for link in links: additional_links = verify(collection, link, []) recursive_links.update(additional_links) resource = resource_database.get_resource_object(urn) assert resource.fqn is not None obj = { "fqn": resource.fqn, "links": list(links), "recursive_links": sorted(recursive_links), "current_wikipages": sorted(current_wikipages_list), } try: obj["parents"] = sorted( [parent.href for parent in resource.common.parents]) obj["tags"] = sorted([tag.value for tag in resource.tags]) except AttributeError: pass perform_upsert(collection, urn, obj) return recursive_links
def page_exists(self, pagename): match = self.__urn_colon_re.match(pagename) if not match: return False hash_type, hash_digest = match.group(1), match.group(2) resource_database = get_resource_database() urn = 'urn:%s:%s' % (hash_type, hash_digest) # this will return True for blobs as well. do we really want this? return urn in resource_database
def save(self, encoding=None): if self.urn: return self.urn # no-op self.validate() root = etree.Element(self.fqn, nsmap=self.nsmap) self.populate_xml_element(root, self.ns) resource_database = get_resource_database() self.urn = resource_database.store_xml_tree(root, encoding=encoding) return self.urn
def clean(self, data, initial=None): rv = super(AudioField, self).clean(data, initial) # make sure the blob is small enough to fit in the ResourceDatabase # without raising SizeTooLargeError max_blob_size = get_resource_database().max_blob_size if data.size > max_blob_size: raise forms.ValidationError(self.error_messages['file_too_large'] % max_blob_size) filename_requires_cleanup = False oggfile_requires_cleanup = False try: if hasattr(data, 'temporary_file_path'): filename = data.temporary_file_path() else: fd, filename = mkstemp() filename_requires_cleanup = True f = os.fdopen(fd, 'wb') try: for chunk in data.chunks(): f.write(chunk) finally: f.close() from magic import Magic mime_type = Magic(mime=True).from_file(filename) try: logger.debug("Mime type detected: %s", mime_type) verify_file_type = verification_map[mime_type] except KeyError: raise forms.ValidationError( self.error_messages['unrecognized_file_type']) mime_type = verify_file_type(filename, self.error_messages) rv.ductus_mime_type = mime_type # convert Wav files to ogg, so we don't waste precious space # this code will disappear as soon as we have a clean way to compress # audio on the client if mime_type == 'audio/wav': oggfile_requires_cleanup = True ogg_filename = convert_wav_to_ogg(filename) rv.ductus_mime_type = 'audio/ogg' rv.content_type = 'audio/ogg' rv.file = open(ogg_filename) return rv finally: if filename_requires_cleanup: os.remove(filename) if oggfile_requires_cleanup: os.remove(ogg_filename)
def verify(collection, urn, current_wikipages_list, force_update=False): """Updates a urn's indexing info and returns the set of its recursive links. `collection`: the mongo collection to use as returned by ``get_indexing_mongo_database()``. `urn`: the urn to update the index for, starting with "urn:". `wikipages_url_list` is the sorted list of urls pointing to `urn`. `force_update`: set to True to update the index even if `urn` is already in the index (defaults to ``False``). """ if not force_update: q = collection.find_one({"urn": urn}, {"recursive_links": 1}) if q: try: return set(q["recursive_links"]) except KeyError: return set() resource_database = get_resource_database() try: tree = resource_database.get_xml_tree(urn) except UnexpectedHeader: # it must be a blob perform_upsert(collection, urn, {"fqn": None}) return set() links = set() for event, element in etree.iterwalk(tree): if '{http://www.w3.org/1999/xlink}href' in element.attrib and element.getparent().tag != '{http://ductus.us/ns/2009/ductus}parents': link = element.attrib['{http://www.w3.org/1999/xlink}href'] if link.startswith('urn:%s:' % hash_name): links.add(link) recursive_links = set(links) for link in links: additional_links = verify(collection, link, []) recursive_links.update(additional_links) resource = resource_database.get_resource_object(urn) assert resource.fqn is not None obj = { "fqn": resource.fqn, "links": list(links), "recursive_links": sorted(recursive_links), "current_wikipages": sorted(current_wikipages_list), } try: obj["parents"] = sorted([parent.href for parent in resource.common.parents]) obj["tags"] = sorted([tag.value for tag in resource.tags]) except AttributeError: pass perform_upsert(collection, urn, obj) return recursive_links
def view_license_info(request): resource_database = get_resource_database() resource = request.ductus.resource resources = [resource] resources.extend(resource_database.get_resource_object(urn) for urn in subview(resource).subresources()) return render_to_response('wiki/all_license_info.html', { 'resources': resources, }, context_instance=RequestContext(request))
def get(self): if self.href == "": if hasattr(self, "_unsaved_resource"): return self._unsaved_resource else: return None if hasattr(self, "_cached_resource") and self._cached_resource[0] == self.href: return self._cached_resource[1] resource = get_resource_database().get_resource_object(self.href) self.__check_type(resource) self._cached_resource = (self.href, resource) return resource
def fsw_get_flashcard(request, extra_tags, prompt_side, answer_side): """return a JSON flashcard object extra_tags: a list of tags the flashcard deck must have prompt_side: the index (0 based) of the side to use as prompt (which cannot be empty) answer_side: the index (0 based) of the side that must be empty """ if request.method != 'GET': raise ImmediateResponse( HttpTextResponseBadRequest('only GET is allowed')) # get the language to search for language = request.GET.get( 'language', getattr(settings, "FIVE_SEC_WIDGET_DEFAULT_LANGUAGE", 'en')) search_tags = ['target-language:' + language] + extra_tags # get a list of pages tagged as we want url_list = search_pages(tags=search_tags) if not url_list: raise Http404('No material available for this language') #url_list = [url for url in url_list if url.split(':')[0] == language] # pick a randomly chosen flashcard that has no text transcript in side[0] resource_database = get_resource_database() while True: url = url_list[random.randint(0, len(url_list) - 1)] try: page = WikiPage.objects.get(name=url['absolute_pagename']) except WikiPage.DoesNotExist: url_list.remove(url) if len(url_list) > 0: continue else: raise Http404('wikipage does not exist: ' + url['path']) revision = page.get_latest_revision() urn = 'urn:' + revision.urn fcd = resource_database.get_resource_object(urn) card_index = random.randint(0, len(fcd.cards.array) - 1) fc = fcd.cards.array[card_index].get() prompt = fc.sides.array[prompt_side].get() answer = fc.sides.array[answer_side].get() if prompt and not answer: break resource = resource_json(fc) # temporary hack for FSI: add the URL this flashcard is taken from tmp_resource = json.loads(resource) tmp_resource['fsi_url'] = url['absolute_pagename'] tmp_resource['fsi_index'] = card_index return render_json_response(tmp_resource)
def get_joined_audio_mediacache_url(resource, audio_urn_list, mime_type): """Returns relative urls, meant to have the mediacache prefix prepended""" assert mime_type in ('audio/webm', 'audio/mp4') if not audio_urn_list: return None first_audio_resource = get_resource_database().get_resource_object(audio_urn_list[0]) if len(audio_urn_list) == 1: return resolve_relative_mediacache_url(first_audio_resource) else: urn_list_hash = hashlib.sha1(' '.join(audio_urn_list)).hexdigest() return resolve_relative_mediacache_url(resource, mime_type, urn_list_hash, first_audio_resource.blob.href)
def fsw_get_flashcard(request, extra_tags, prompt_side, answer_side): """return a JSON flashcard object extra_tags: a list of tags the flashcard deck must have prompt_side: the index (0 based) of the side to use as prompt (which cannot be empty) answer_side: the index (0 based) of the side that must be empty """ if request.method == 'GET': # get the language to search for language = request.GET.get('language', getattr(settings, "FIVE_SEC_WIDGET_DEFAULT_LANGUAGE", 'en')) search_tags = ['target-language:' + language] + extra_tags # get a list of pages tagged as we want try: url_list = search_pages(tags=search_tags) except IndexingError: raise Http404('Indexing error, contact the site administrator') if url_list != []: #url_list = [url for url in url_list if url.split(':')[0] == language] # pick a randomly chosen flashcard that has no text transcript in side[0] resource_database = get_resource_database() while True: url = url_list[random.randint(0, len(url_list) - 1)] try: page = WikiPage.objects.get(name=url['absolute_pagename']) except WikiPage.DoesNotExist: url_list.remove(url) if len(url_list) > 0: continue else: raise Http404('wikipage does not exist: ' + url['path']) revision = page.get_latest_revision() urn = 'urn:' + revision.urn fcd = resource_database.get_resource_object(urn) card_index = random.randint(0, len(fcd.cards.array) - 1) fc = fcd.cards.array[card_index].get() prompt = fc.sides.array[prompt_side].get() answer = fc.sides.array[answer_side].get() if prompt and not answer: break resource = resource_json(fc) # temporary hack for FSI: add the URL this flashcard is taken from tmp_resource = json.loads(resource) tmp_resource['fsi_url'] = url['absolute_pagename'] tmp_resource['fsi_index'] = card_index return render_json_response(tmp_resource) raise Http404('No material available for this language')
def view_diff(request): this = request.ductus.resource try: that = get_resource_database().get_resource_object(request.GET["diff"]) except KeyError: # This could mean there is no "diff" in the query string, or that the # resource object doesn't exist. Let's first try to diff against one # of the parents, and fail out if that doesn't work. if this.common.parents.array: that = this.common.parents.array[0].get() else: return query_string_not_found(request) return render_to_response("wiki/diff.html", { 'diff': Diff(this, that), }, RequestContext(request))
def view_xml_as_html(request): """Displays HTML-formatted XML representation of resource. """ urn = request.ductus.resource.urn xml = ''.join(get_resource_database().get_xml(urn)) lexer = pygments.lexers.XmlLexer() formatter = pygments.formatters.HtmlFormatter() html = allow_line_wrap(pygments.highlight(xml, lexer, formatter)) html = urn_linkify(html, query_string='view=xml_as_html') css = formatter.get_style_defs('.highlight') return render_to_response('wiki/xml_display.html', {'html': mark_safe(html), 'css': mark_safe(css)}, context_instance=RequestContext(request))
def get_joined_audio_mediacache_url(resource, audio_urn_list, mime_type): """Returns relative urls, meant to have the mediacache prefix prepended""" assert mime_type in ('audio/webm', 'audio/mp4') if not audio_urn_list: return None first_audio_resource = get_resource_database().get_resource_object( audio_urn_list[0]) if len(audio_urn_list) == 1: return resolve_relative_mediacache_url(first_audio_resource) else: urn_list_hash = hashlib.sha1(' '.join(audio_urn_list)).hexdigest() return resolve_relative_mediacache_url(resource, mime_type, urn_list_hash, first_audio_resource.blob.href)
def clean(self, data, initial=None): rv = super(PictureFileField, self).clean(data, initial) if data is None: # this happens with flickr pictures, see https://code.ductus.us/ticket/187 return rv # make sure the blob is small enough to fit in the ResourceDatabase # without raising SizeTooLargeError max_blob_size = get_resource_database().max_blob_size if data.size > max_blob_size: raise forms.ValidationError(self.error_messages['file_too_large'] % max_blob_size) filename_requires_cleanup = False try: if hasattr(data, 'temporary_file_path'): filename = data.temporary_file_path() else: fd, filename = mkstemp() filename_requires_cleanup = True f = os.fdopen(fd, 'wb') try: for chunk in data.chunks(): f.write(chunk) finally: f.close() from magic import Magic mime_type = Magic(mime=True).from_file(filename) try: logger.debug("Mime type detected: %s", mime_type) except KeyError: raise forms.ValidationError( self.error_messages['unrecognized_file_type']) #TODO: double check the file type, like we do for audio files rv.ductus_mime_type = mime_type return rv finally: if filename_requires_cleanup: os.remove(filename)
def mediacache_cat_audio(first_blob_urn, audio_urn_list, mime_type): if len(audio_urn_list) < 2: # there's no reason to do concatenation... # get_joined_audio_mediacache_url() points to the original (single) # file anyway return None resource_database = get_resource_database() audio_resources = [resource_database.get_resource_object(urn) for urn in audio_urn_list] if audio_resources[0].blob.href != first_blob_urn: return None if mime_type == 'audio/webm': return _cat_webm(audio_resources) elif mime_type == 'audio/mp4': return _cat_m4a(audio_resources) else: raise Exception("attempting to concatenate unsupported format: %s" % mime_type)
def save_blueprint(cls, blueprint, save_context): """`blueprint` is a json object. Returns a URN""" # fixme: make sure the end result is compatible with the class. this # might actually be easy if we just make sure the @constructor will # make a class we want, but this would eliminate our ability to make a # @constructor that outputs a resource of some type that is unknown # before its construction resource_database = get_resource_database() blueprint_expects_dict(blueprint) if 'href' in blueprint: href = blueprint['href'] blueprint_expects_string(href) # we ensure it exists and is not a blob, then return the urn resource_database.get_xml(href) return href try: resource_blueprint = blueprint['resource'] except KeyError: raise BlueprintError("blueprint needs either `href` or `resource`", blueprint) blueprint_expects_dict(resource_blueprint) resource_blueprint = dict(resource_blueprint) # copy it so we can modify if '@patch' in resource_blueprint: original_urn = resource_blueprint.pop('@patch') resource = resource_database.get_resource_object(original_urn).clone() elif '@create' in resource_blueprint: fqn = resource_blueprint.pop('@create') try: resource_class = _registered_ductmodels[fqn] except KeyError: raise BlueprintError("invalid argument to `@create`", resource_blueprint) if not issubclass(resource_class, cls): raise BlueprintError("resource is not of an acceptable model type", resource_blueprint) resource = resource_class() else: raise BlueprintError("resource blueprint must contain '@patch' or '@create'", resource_blueprint) resource.patch_from_blueprint(resource_blueprint, save_context) return resource.save()
def clean(self, data, initial=None): rv = super(PictureFileField, self).clean(data, initial) if data is None: # this happens with flickr pictures, see https://code.ductus.us/ticket/187 return rv # make sure the blob is small enough to fit in the ResourceDatabase # without raising SizeTooLargeError max_blob_size = get_resource_database().max_blob_size if data.size > max_blob_size: raise forms.ValidationError(self.error_messages['file_too_large'] % max_blob_size) filename_requires_cleanup = False try: if hasattr(data, 'temporary_file_path'): filename = data.temporary_file_path() else: fd, filename = mkstemp() filename_requires_cleanup = True f = os.fdopen(fd, 'wb') try: for chunk in data.chunks(): f.write(chunk) finally: f.close() from magic import Magic mime_type = Magic(mime=True).from_file(filename) try: logger.debug("Mime type detected: %s", mime_type) except KeyError: raise forms.ValidationError(self.error_messages['unrecognized_file_type']) #TODO: double check the file type, like we do for audio files rv.ductus_mime_type = mime_type return rv finally: if filename_requires_cleanup: os.remove(filename)
def mediacache_cat_audio(first_blob_urn, audio_urn_list, mime_type): if len(audio_urn_list) < 2: # there's no reason to do concatenation... # get_joined_audio_mediacache_url() points to the original (single) # file anyway return None resource_database = get_resource_database() audio_resources = [ resource_database.get_resource_object(urn) for urn in audio_urn_list ] if audio_resources[0].blob.href != first_blob_urn: return None if mime_type == 'audio/webm': return _cat_webm(audio_resources) elif mime_type == 'audio/mp4': return _cat_m4a(audio_resources) else: raise Exception("attempting to concatenate unsupported format: %s" % mime_type)
def validate(self): from ductus.resource import get_resource_database from ductus.resource.ductmodels import DuctModelMismatchError resource_database = get_resource_database() for r in self._re_objects: match = r.match(self.uri) if match is not None: hash_type, digest = match.group(1), match.group(2) urn = "urn:%s:%s" % (hash_type, digest) if urn not in resource_database: raise forms.ValidationError(_(u"This urn cannot be found on the server you are currently accessing.")) try: Picture.load(urn) except DuctModelMismatchError: raise forms.ValidationError(_(u"This urn represents content that is not a picture.")) # fixme: handle exception raised by get_resource_object if it's # actually a blob self.urn = urn return # this should never be reached assert self.handles(self.uri)
def view_jsonp(request): urn = request.ductus.resource.urn resource = get_resource_database().get_resource_object(urn) json_text = json.dumps(resource.output_json_dict()) jsonp_text = "callback({});".format(json_text) return HttpResponse(jsonp_text, content_type='application/javascript; charset=utf-8')
def validate(self, strict=True): super(ResourceElement, self).validate(strict) if strict and self.href: resource = get_resource_database().get_resource_object(self.href) self.__check_type(resource)
def store(self, iterable): self.href = get_resource_database().store_blob(iterable)
def __iter__(self): if self.href: return get_resource_database().get_blob(self.href) else: return ()
def load(cls, urn): resource = get_resource_database().get_resource_object(urn) if type(resource) != cls: raise DuctModelMismatchError("Expecting %s, got %s" % (cls, type(resource))) return resource
def handle_noargs(self, **options): from ductus.index import get_indexing_mongo_database indexing_db = get_indexing_mongo_database() if indexing_db is None: raise Exception collection = indexing_db.urn_index def perform_upsert(urn, obj, ignore=None): # REMEMBER that dictionary order matters in mongodb; we just ignore # it # fixme: first inspect element to see if things might already be # right. also check to make sure there aren't any unexpected # attributes on the toplevel element. and do the same thing for # blobs too. obj = dict(obj) obj["urn"] = urn collection.update({"urn": urn}, obj, upsert=True, safe=True) verified_urns.add(urn) logging.basicConfig(level=logging.INFO) # FIXME # create the mongodb indexes collection.ensure_index("urn", unique=True, drop_dups=True) collection.ensure_index("parents", sparse=True) collection.ensure_index("tags", sparse=True) collection.ensure_index("links") collection.ensure_index("recursive_links") # Begin actual code from lxml import etree from ductus.resource import get_resource_database, UnexpectedHeader, hash_name from ductus.wiki.models import WikiPage resource_database = get_resource_database() verified_urns = set() current_wikipages_map = {} operations = {None: 0} def verify(urn): """Updates a urn's indexing info and returns the set of its recursive links """ operations[None] += 1 logger.info("operation %d: processing %s", operations[None], urn) if urn in verified_urns: q = collection.find_one({"urn": urn}, {"recursive_links": 1}) try: return set(q["recursive_links"]) except KeyError: return set() try: tree = resource_database.get_xml_tree(urn) except UnexpectedHeader: # it must be a blob perform_upsert(urn, {"fqn": None}) return set() links = set() for event, element in etree.iterwalk(tree): if '{http://www.w3.org/1999/xlink}href' in element.attrib and element.getparent( ).tag != '{http://ductus.us/ns/2009/ductus}parents': link = element.attrib['{http://www.w3.org/1999/xlink}href'] if link.startswith('urn:%s:' % hash_name): links.add(link) recursive_links = set(links) for link in links: additional_links = verify(link) recursive_links.update(additional_links) resource = resource_database.get_resource_object(urn) assert resource.fqn is not None obj = { "fqn": resource.fqn, "links": list(links), "recursive_links": sorted(recursive_links), "current_wikipages": sorted(current_wikipages_map.get(urn, ())), } try: obj["parents"] = sorted( [parent.href for parent in resource.common.parents]) obj["tags"] = sorted([tag.value for tag in resource.tags]) except AttributeError: pass perform_upsert(urn, obj) return recursive_links for wikipage in WikiPage.objects.all(): revision = wikipage.get_latest_revision() if revision is not None and revision.urn: urn = 'urn:' + revision.urn current_wikipages_map.setdefault(urn, set()).add(wikipage.name) n_attempted = n_successful = 0 for key in resource_database: n_attempted += 1 try: verify(key) except Exception: logger.warning("Key failed: %s", key) else: n_successful += 1 logger.info("Successfully processed %d of %d keys", n_successful, n_attempted)
#!/usr/bin/env python from lxml import etree import ductus.initialize from ductus.resource import get_resource_database, UnexpectedHeader from ductus.wiki.models import WikiRevision # This script loads everything into memory, which works for our purposes but # wouldn't if there were a higher number of objects in the database HREF_FQN = '{http://www.w3.org/1999/xlink}href' DEFAULT_LICENSE = 'http://creativecommons.org/licenses/by-sa/3.0/' rdb = get_resource_database() urn_update_map = {} def update_object(urn, default_author): if urn in urn_update_map: return urn_update_map[urn] new_urn = _update_object(urn, default_author) urn_update_map[urn] = new_urn print new_urn return new_urn def _update_object(urn, default_author): try: tree = rdb.get_xml_tree(urn) except UnexpectedHeader: # must be a blob
def handle_noargs(self, **options): from ductus.index import get_indexing_mongo_database indexing_db = get_indexing_mongo_database() if indexing_db is None: raise Exception collection = indexing_db.urn_index def perform_upsert(urn, obj, ignore=None): # REMEMBER that dictionary order matters in mongodb; we just ignore # it # fixme: first inspect element to see if things might already be # right. also check to make sure there aren't any unexpected # attributes on the toplevel element. and do the same thing for # blobs too. obj = dict(obj) obj["urn"] = urn collection.update({"urn": urn}, obj, upsert=True, safe=True) verified_urns.add(urn) logging.basicConfig(level=logging.INFO) # FIXME # create the mongodb indexes collection.ensure_index("urn", unique=True, drop_dups=True) collection.ensure_index("parents", sparse=True) collection.ensure_index("tags", sparse=True) collection.ensure_index("links") collection.ensure_index("recursive_links") # Begin actual code from lxml import etree from ductus.resource import get_resource_database, UnexpectedHeader, hash_name from ductus.wiki.models import WikiPage resource_database = get_resource_database() verified_urns = set() current_wikipages_map = {} operations = {None: 0} def verify(urn): """Updates a urn's indexing info and returns the set of its recursive links """ operations[None] += 1 logger.info("operation %d: processing %s", operations[None], urn) if urn in verified_urns: q = collection.find_one({"urn": urn}, {"recursive_links": 1}) try: return set(q["recursive_links"]) except KeyError: return set() try: tree = resource_database.get_xml_tree(urn) except UnexpectedHeader: # it must be a blob perform_upsert(urn, {"fqn": None}) return set() links = set() for event, element in etree.iterwalk(tree): if '{http://www.w3.org/1999/xlink}href' in element.attrib and element.getparent().tag != '{http://ductus.us/ns/2009/ductus}parents': link = element.attrib['{http://www.w3.org/1999/xlink}href'] if link.startswith('urn:%s:' % hash_name): links.add(link) recursive_links = set(links) for link in links: additional_links = verify(link) recursive_links.update(additional_links) resource = resource_database.get_resource_object(urn) assert resource.fqn is not None obj = { "fqn": resource.fqn, "links": list(links), "recursive_links": sorted(recursive_links), "current_wikipages": sorted(current_wikipages_map.get(urn, ())), } try: obj["parents"] = sorted([parent.href for parent in resource.common.parents]) obj["tags"] = sorted([tag.value for tag in resource.tags]) except AttributeError: pass perform_upsert(urn, obj) return recursive_links for wikipage in WikiPage.objects.all(): revision = wikipage.get_latest_revision() if revision is not None and revision.urn: urn = 'urn:' + revision.urn current_wikipages_map.setdefault(urn, set()).add(wikipage.name) n_attempted = n_successful = 0 for key in resource_database: n_attempted += 1 try: verify(key) except Exception: logger.warning("Key failed: %s", key) else: n_successful += 1 logger.info("Successfully processed %d of %d keys", n_successful, n_attempted)
def view_json(request): urn = request.ductus.resource.urn resource = get_resource_database().get_resource_object(urn) json_text = json.dumps(resource.output_json_dict()) return HttpResponse(json_text, content_type='text/plain; charset=utf-8')