Exemple #1
0
def view_xml(request):
    """Displays XML representation of resource.
    """

    urn = request.ductus.resource.urn
    return StreamingHttpResponse(get_resource_database().get_xml(urn),
                                 content_type='application/xml; charset=utf-8')
Exemple #2
0
    def validate(self):
        from ductus.resource import get_resource_database
        from ductus.resource.ductmodels import DuctModelMismatchError
        resource_database = get_resource_database()

        for r in self._re_objects:
            match = r.match(self.uri)
            if match is not None:
                hash_type, digest = match.group(1), match.group(2)
                urn = "urn:%s:%s" % (hash_type, digest)
                if urn not in resource_database:
                    raise forms.ValidationError(
                        _(u"This urn cannot be found on the server you are currently accessing."
                          ))
                try:
                    Picture.load(urn)
                except DuctModelMismatchError:
                    raise forms.ValidationError(
                        _(u"This urn represents content that is not a picture."
                          ))
                # fixme: handle exception raised by get_resource_object if it's
                # actually a blob
                self.urn = urn
                return

        # this should never be reached
        assert self.handles(self.uri)
Exemple #3
0
def edit_textwiki(request):

    resource_database = get_resource_database()

    if request.method == 'POST':
        handle_blueprint_post(request, Wikitext)

    resource = None
    if hasattr(request, 'ductus') and getattr(request.ductus, 'resource',
                                              None):
        resource = request.ductus.resource
        # handle old creole content: make it look like ductus-html5 so we can edit it
        # content is not saved to creole anymore, only to ductus-html5
        if resource.blob.markup_language == 'creole-1.0':
            resource.blob.markup_language = 'ductus-html5'
            resource.text = creole(resource.text,
                                   resource.blob.natural_language)

    return render_to_response('textwiki/edit_wiki.html', {
        'resource_json':
        resource,
        'writable_directories':
        get_writable_directories_for_user(request.user),
    },
                              context_instance=RequestContext(request))
Exemple #4
0
def view_xml(request):
    """Displays XML representation of resource.
    """

    urn = request.ductus.resource.urn
    return HttpResponse(list(get_resource_database().get_xml(urn)), # see django #6527
                        content_type='application/xml; charset=utf-8')
Exemple #5
0
def view_xml_as_text(request):
    """Displays XML representation of resource in text/plain format.
    """

    urn = request.ductus.resource.urn
    return HttpResponse(list(get_resource_database().get_xml(urn)), # see django #6527
                        content_type='text/plain; charset=utf-8')
Exemple #6
0
def _mediacache_view(pathname, query_string):
    m = _pathname_re.match(pathname) or _alternate_pathname_re.match(pathname)
    if m is None:
        raise Http404("pathname does not match mediacache re")

    hash_type = m.group('hash_type')
    digest = m.group('digest')
    additional_args = m.group('additional_args')
    extension = m.group('extension')

    try:
        mime_type = ext_to_mime[extension]
    except KeyError:
        raise Http404("unknown extension")

    blob_urn = 'urn:%s:%s' % (hash_type, digest)

    # if the file exists on the filesystem, serve it!
    data_iterator = get(blob_urn, mime_type, additional_args)
    if data_iterator:
        # fixme: possibly log a warning if we're in deploy mode
        response = StreamingHttpResponse(data_iterator, content_type=mime_type)
        response["X-Ductus-Mediacache"] = "served"
        return response

    if not query_string:
        raise Http404("the urn of the resource that references this blob should be given as the query string")

    resource_database = get_resource_database()
    try:
        resource = resource_database.get_resource_object(query_string)
    except KeyError:
        raise Http404("the query string does not reference an existing urn")

    return _do_mediacache_view_serve(blob_urn, mime_type, additional_args, resource)
Exemple #7
0
def five_sec_widget(request, pagename):
    """display a `five seconds widget` as specified by the query parameters.
    Also handle POST requests from the widget, saving blueprints and performing related updates.
    """
    if request.method == 'POST':

        new_fc_urn = handle_blueprint_post(request, Flashcard)
        # temp hack for FSI, manually update the lesson we took the flashcard from
        from django.utils.safestring import mark_safe
        from ductus.resource.ductmodels import BlueprintSaveContext
        from ductus.wiki.views import _fully_handle_blueprint_post
        try:
            url = request.POST['fsi_url']
            card_index = int(request.POST['fsi_index'])
        except KeyError:
            raise ValidationError(
                "the widget should provide FSI specific fields")

        page = WikiPage.objects.get(name=url)
        revision = page.get_latest_revision()
        urn = 'urn:' + revision.urn
        resource_database = get_resource_database()
        old_fcd = resource_database.get_resource_object(urn)
        fcd_bp = json.loads(resource_json(old_fcd))

        # remove href and add a @patch statement so that the blueprint updates the database
        fcd_bp['resource']['@patch'] = urn
        del fcd_bp['href']

        # set the flashcard href saved above
        fcd_bp['resource']['cards']['array'][card_index][
            'href'] = new_fc_urn.urn
        # remove all 'resource' keys in the blueprint as ResourceElement ignores the hrefs otherwise
        for fc in fcd_bp['resource']['cards']['array']:
            del fc['resource']
        for interaction in fcd_bp['resource']['interactions']['array']:
            del interaction['resource']

        request.POST = request.POST.copy()
        request.POST['blueprint'] = json.dumps(fcd_bp)
        request.POST['log_message'] = '5sec widget (subtitle)'
        prefix, pagename = url.split(':')
        response = _fully_handle_blueprint_post(request, prefix, pagename)

        return response

    # define what to do when GETing /special/five-sec-widget?method=something
    if request.method == 'GET':
        methods = {
            'get-audio-to-subtitle': fsw_get_audio_to_subtitle,
            'get-phrase-to-record': fsw_get_phrase_to_record,
        }
        try:
            return methods[request.GET.get('method', None)](request)
        except KeyError:
            pass

    return render_to_response('flashcards/five_sec_widget.html', {},
                              RequestContext(request))
Exemple #8
0
def five_sec_widget(request, pagename):
    """display a `five seconds widget` as specified by the query parameters.
    Also handle POST requests from the widget, saving blueprints and performing related updates.
    """
    if request.method == 'POST':

        new_fc_urn = handle_blueprint_post(request, Flashcard)
        # temp hack for FSI, manually update the lesson we took the flashcard from
        from django.utils.safestring import mark_safe
        from ductus.resource.ductmodels import BlueprintSaveContext
        from ductus.wiki.views import _fully_handle_blueprint_post
        try:
            url = request.POST['fsi_url']
            card_index = int(request.POST['fsi_index'])
        except KeyError:
            raise ValidationError("the widget should provide FSI specific fields")

        page = WikiPage.objects.get(name=url)
        revision = page.get_latest_revision()
        urn = 'urn:' + revision.urn
        resource_database = get_resource_database()
        old_fcd = resource_database.get_resource_object(urn)
        fcd_bp = json.loads(resource_json(old_fcd))

        # remove href and add a @patch statement so that the blueprint updates the database
        fcd_bp['resource']['@patch'] = urn
        del fcd_bp['href']

        # set the flashcard href saved above
        fcd_bp['resource']['cards']['array'][card_index]['href'] = new_fc_urn.urn
        # remove all 'resource' keys in the blueprint as ResourceElement ignores the hrefs otherwise
        for fc in fcd_bp['resource']['cards']['array']:
            del fc['resource']
        for interaction in fcd_bp['resource']['interactions']['array']:
            del interaction['resource']

        request.POST = request.POST.copy()
        request.POST['blueprint'] = json.dumps(fcd_bp)
        request.POST['log_message'] = '5sec widget (subtitle)'
        prefix, pagename = url.split(':')
        response = _fully_handle_blueprint_post(request, prefix, pagename)

        return response

    # define what to do when GETing /special/five-sec-widget?method=something
    if request.method == 'GET':
        methods = {
            'get-audio-to-subtitle': fsw_get_audio_to_subtitle,
            'get-phrase-to-record': fsw_get_phrase_to_record,
        }
        try:
            return methods[request.GET.get('method', None)](request)
        except KeyError:
            pass

    return render_to_response('flashcards/five_sec_widget.html', {
    }, RequestContext(request))
Exemple #9
0
def main_document_view(request, urn=None, wiki_page=None, wiki_revision=None):
    """Dispatches the appropriate view for a resource/page
    """

    requested_view = request.GET.get('view', None)

    resource_database = get_resource_database()

    if requested_view == 'raw':
        etag = __handle_etag(request, ['raw', urn], weak=False)
        # fixme: we may also want to set last-modified, expires, max-age
        try:
            data_iterator = resource_database[urn]
        except KeyError:
            raise Http404
        response = HttpResponse(list(data_iterator), # see django #6527
                                content_type='application/octet-stream')
        response["ETag"] = etag
        return response

    if request.method == "GET":
        unvaried_etag = [urn, bool(wiki_page),
                         request.META.get("QUERY_STRING", "")]
        varied_etag = unvaried_etag + [request.LANGUAGE_CODE,
                                       bool(request.is_secure()),
                                       request.META.get("HTTP_COOKIE", "")]
        unvaried_etag = __handle_etag(request, unvaried_etag)
        varied_etag = __handle_etag(request, varied_etag)

    try:
        resource = resource_database.get_resource_object(urn)
    except KeyError:
        raise Http404("resource does not exist")
    except UnexpectedHeader as e:
        raise Http404(str(e))
    request.ductus = DuctusRequestInfo(resource, requested_view,
                                       wiki_page, wiki_revision)

    try:
        f = registered_views[resource.fqn][requested_view]
    except KeyError:
        try:
            f = registered_views[None][requested_view]
        except KeyError:
            return query_string_not_found(request)
    if not f.meets_requirements(request.ductus):
        return query_string_not_found(request)
    response = f(request)

    if request.method == "GET" and not response.has_header("ETag"):
        if getattr(response, "_unvarying", False):
            response["ETag"] = unvaried_etag
        else:
            vary_headers = set([h.strip().lower() for h in response.get("Vary", "").split(',') if h])
            if vary_headers.issubset(set(['cookie', 'accept-language'])):
                response["ETag"] = varied_etag
    return response
Exemple #10
0
 def save(self, *args, **kwargs):
     # fixme: See Django #6845.  We may need to move these tests to a
     # special validation function some day.
     assert not self.urn.startswith('urn:')
     if self.urn and ('urn:%s' % self.urn) not in get_resource_database():
         raise exceptions.ValidationError(_("urn is not in database: urn:%s") % self.urn)
     if (not self.author) and (not self.author_ip):
         raise exceptions.ValidationError(_("A user or IP address must be given when saving a revision"))
     return super(WikiRevision, self).save(*args, **kwargs)
Exemple #11
0
def verify(collection, urn, current_wikipages_list, force_update=False):
    """Updates a urn's indexing info and returns the set of its recursive links.

    `collection`: the mongo collection to use as returned by ``get_indexing_mongo_database()``.
    `urn`: the urn to update the index for, starting with "urn:".
    `wikipages_url_list` is the sorted list of urls pointing to `urn`.
    `force_update`: set to True to update the index even if `urn` is already in the index (defaults to ``False``).
    """
    if not force_update:
        q = collection.find_one({"urn": urn}, {"recursive_links": 1})
        if q:
            try:
                return set(q["recursive_links"])
            except KeyError:
                return set()

    resource_database = get_resource_database()

    try:
        tree = resource_database.get_xml_tree(urn)
    except UnexpectedHeader:
        # it must be a blob
        perform_upsert(collection, urn, {"fqn": None})
        return set()

    links = set()
    for event, element in etree.iterwalk(tree):
        if '{http://www.w3.org/1999/xlink}href' in element.attrib and element.getparent(
        ).tag != '{http://ductus.us/ns/2009/ductus}parents':
            link = element.attrib['{http://www.w3.org/1999/xlink}href']
            if link.startswith('urn:%s:' % hash_name):
                links.add(link)

    recursive_links = set(links)
    for link in links:
        additional_links = verify(collection, link, [])
        recursive_links.update(additional_links)

    resource = resource_database.get_resource_object(urn)

    assert resource.fqn is not None
    obj = {
        "fqn": resource.fqn,
        "links": list(links),
        "recursive_links": sorted(recursive_links),
        "current_wikipages": sorted(current_wikipages_list),
    }
    try:
        obj["parents"] = sorted(
            [parent.href for parent in resource.common.parents])
        obj["tags"] = sorted([tag.value for tag in resource.tags])
    except AttributeError:
        pass
    perform_upsert(collection, urn, obj)

    return recursive_links
Exemple #12
0
    def page_exists(self, pagename):
        match = self.__urn_colon_re.match(pagename)
        if not match:
            return False
        hash_type, hash_digest = match.group(1), match.group(2)

        resource_database = get_resource_database()
        urn = 'urn:%s:%s' % (hash_type, hash_digest)
        # this will return True for blobs as well.  do we really want this?
        return urn in resource_database
Exemple #13
0
    def save(self, encoding=None):
        if self.urn:
            return self.urn # no-op

        self.validate()
        root = etree.Element(self.fqn, nsmap=self.nsmap)
        self.populate_xml_element(root, self.ns)
        resource_database = get_resource_database()
        self.urn = resource_database.store_xml_tree(root, encoding=encoding)
        return self.urn
Exemple #14
0
    def clean(self, data, initial=None):
        rv = super(AudioField, self).clean(data, initial)

        # make sure the blob is small enough to fit in the ResourceDatabase
        # without raising SizeTooLargeError
        max_blob_size = get_resource_database().max_blob_size
        if data.size > max_blob_size:
            raise forms.ValidationError(self.error_messages['file_too_large'] %
                                        max_blob_size)

        filename_requires_cleanup = False
        oggfile_requires_cleanup = False

        try:
            if hasattr(data, 'temporary_file_path'):
                filename = data.temporary_file_path()
            else:
                fd, filename = mkstemp()
                filename_requires_cleanup = True
                f = os.fdopen(fd, 'wb')
                try:
                    for chunk in data.chunks():
                        f.write(chunk)
                finally:
                    f.close()

            from magic import Magic
            mime_type = Magic(mime=True).from_file(filename)
            try:
                logger.debug("Mime type detected: %s", mime_type)
                verify_file_type = verification_map[mime_type]
            except KeyError:
                raise forms.ValidationError(
                    self.error_messages['unrecognized_file_type'])

            mime_type = verify_file_type(filename, self.error_messages)
            rv.ductus_mime_type = mime_type

            # convert Wav files to ogg, so we don't waste precious space
            # this code will disappear as soon as we have a clean way to compress
            # audio on the client
            if mime_type == 'audio/wav':
                oggfile_requires_cleanup = True
                ogg_filename = convert_wav_to_ogg(filename)
                rv.ductus_mime_type = 'audio/ogg'
                rv.content_type = 'audio/ogg'
                rv.file = open(ogg_filename)

            return rv

        finally:
            if filename_requires_cleanup:
                os.remove(filename)
            if oggfile_requires_cleanup:
                os.remove(ogg_filename)
Exemple #15
0
def verify(collection, urn, current_wikipages_list, force_update=False):
    """Updates a urn's indexing info and returns the set of its recursive links.

    `collection`: the mongo collection to use as returned by ``get_indexing_mongo_database()``.
    `urn`: the urn to update the index for, starting with "urn:".
    `wikipages_url_list` is the sorted list of urls pointing to `urn`.
    `force_update`: set to True to update the index even if `urn` is already in the index (defaults to ``False``).
    """
    if not force_update:
        q = collection.find_one({"urn": urn}, {"recursive_links": 1})
        if q:
            try:
                return set(q["recursive_links"])
            except KeyError:
                return set()

    resource_database = get_resource_database()

    try:
        tree = resource_database.get_xml_tree(urn)
    except UnexpectedHeader:
        # it must be a blob
        perform_upsert(collection, urn, {"fqn": None})
        return set()

    links = set()
    for event, element in etree.iterwalk(tree):
        if '{http://www.w3.org/1999/xlink}href' in element.attrib and element.getparent().tag != '{http://ductus.us/ns/2009/ductus}parents':
            link = element.attrib['{http://www.w3.org/1999/xlink}href']
            if link.startswith('urn:%s:' % hash_name):
                links.add(link)

    recursive_links = set(links)
    for link in links:
        additional_links = verify(collection, link, [])
        recursive_links.update(additional_links)

    resource = resource_database.get_resource_object(urn)

    assert resource.fqn is not None
    obj = {
        "fqn": resource.fqn,
        "links": list(links),
        "recursive_links": sorted(recursive_links),
        "current_wikipages": sorted(current_wikipages_list),
    }
    try:
        obj["parents"] = sorted([parent.href for parent in resource.common.parents])
        obj["tags"] = sorted([tag.value for tag in resource.tags])
    except AttributeError:
        pass
    perform_upsert(collection, urn, obj)

    return recursive_links
Exemple #16
0
def view_license_info(request):
    resource_database = get_resource_database()

    resource = request.ductus.resource
    resources = [resource]
    resources.extend(resource_database.get_resource_object(urn)
                     for urn in subview(resource).subresources())

    return render_to_response('wiki/all_license_info.html', {
        'resources': resources,
    }, context_instance=RequestContext(request))
Exemple #17
0
 def get(self):
     if self.href == "":
         if hasattr(self, "_unsaved_resource"):
             return self._unsaved_resource
         else:
             return None
     if hasattr(self, "_cached_resource") and self._cached_resource[0] == self.href:
         return self._cached_resource[1]
     resource = get_resource_database().get_resource_object(self.href)
     self.__check_type(resource)
     self._cached_resource = (self.href, resource)
     return resource
Exemple #18
0
def fsw_get_flashcard(request, extra_tags, prompt_side, answer_side):
    """return a JSON flashcard object
    extra_tags: a list of tags the flashcard deck must have
    prompt_side: the index (0 based) of the side to use as prompt (which cannot be empty)
    answer_side: the index (0 based) of the side that must be empty
    """
    if request.method != 'GET':
        raise ImmediateResponse(
            HttpTextResponseBadRequest('only GET is allowed'))

    # get the language to search for
    language = request.GET.get(
        'language', getattr(settings, "FIVE_SEC_WIDGET_DEFAULT_LANGUAGE",
                            'en'))
    search_tags = ['target-language:' + language] + extra_tags
    # get a list of pages tagged as we want
    url_list = search_pages(tags=search_tags)

    if not url_list:
        raise Http404('No material available for this language')

    #url_list = [url for url in url_list if url.split(':')[0] == language]
    # pick a randomly chosen flashcard that has no text transcript in side[0]
    resource_database = get_resource_database()
    while True:
        url = url_list[random.randint(0, len(url_list) - 1)]
        try:
            page = WikiPage.objects.get(name=url['absolute_pagename'])
        except WikiPage.DoesNotExist:
            url_list.remove(url)
            if len(url_list) > 0:
                continue
            else:
                raise Http404('wikipage does not exist: ' + url['path'])

        revision = page.get_latest_revision()
        urn = 'urn:' + revision.urn
        fcd = resource_database.get_resource_object(urn)
        card_index = random.randint(0, len(fcd.cards.array) - 1)
        fc = fcd.cards.array[card_index].get()
        prompt = fc.sides.array[prompt_side].get()
        answer = fc.sides.array[answer_side].get()
        if prompt and not answer:
            break

    resource = resource_json(fc)
    # temporary hack for FSI: add the URL this flashcard is taken from
    tmp_resource = json.loads(resource)
    tmp_resource['fsi_url'] = url['absolute_pagename']
    tmp_resource['fsi_index'] = card_index
    return render_json_response(tmp_resource)
Exemple #19
0
def get_joined_audio_mediacache_url(resource, audio_urn_list, mime_type):
    """Returns relative urls, meant to have the mediacache prefix prepended"""
    assert mime_type in ('audio/webm', 'audio/mp4')

    if not audio_urn_list:
        return None

    first_audio_resource = get_resource_database().get_resource_object(audio_urn_list[0])

    if len(audio_urn_list) == 1:
        return resolve_relative_mediacache_url(first_audio_resource)
    else:
        urn_list_hash = hashlib.sha1(' '.join(audio_urn_list)).hexdigest()
        return resolve_relative_mediacache_url(resource, mime_type, urn_list_hash, first_audio_resource.blob.href)
Exemple #20
0
def fsw_get_flashcard(request, extra_tags, prompt_side, answer_side):
    """return a JSON flashcard object
    extra_tags: a list of tags the flashcard deck must have
    prompt_side: the index (0 based) of the side to use as prompt (which cannot be empty)
    answer_side: the index (0 based) of the side that must be empty
    """
    if request.method == 'GET':
        # get the language to search for
        language = request.GET.get('language', getattr(settings, "FIVE_SEC_WIDGET_DEFAULT_LANGUAGE", 'en'))
        search_tags = ['target-language:' + language] + extra_tags
        # get a list of pages tagged as we want
        try:
            url_list = search_pages(tags=search_tags)
        except IndexingError:
            raise Http404('Indexing error, contact the site administrator')

        if url_list != []:
            #url_list = [url for url in url_list if url.split(':')[0] == language]
            # pick a randomly chosen flashcard that has no text transcript in side[0]
            resource_database = get_resource_database()
            while True:
                url = url_list[random.randint(0, len(url_list) - 1)]
                try:
                    page = WikiPage.objects.get(name=url['absolute_pagename'])
                except WikiPage.DoesNotExist:
                    url_list.remove(url)
                    if len(url_list) > 0:
                        continue
                    else:
                        raise Http404('wikipage does not exist: ' + url['path'])

                revision = page.get_latest_revision()
                urn = 'urn:' + revision.urn
                fcd = resource_database.get_resource_object(urn)
                card_index = random.randint(0, len(fcd.cards.array) - 1)
                fc = fcd.cards.array[card_index].get()
                prompt = fc.sides.array[prompt_side].get()
                answer = fc.sides.array[answer_side].get()
                if prompt and not answer:
                    break

            resource = resource_json(fc)
            # temporary hack for FSI: add the URL this flashcard is taken from
            tmp_resource = json.loads(resource)
            tmp_resource['fsi_url'] = url['absolute_pagename']
            tmp_resource['fsi_index'] = card_index
            return render_json_response(tmp_resource)

        raise Http404('No material available for this language')
Exemple #21
0
def view_diff(request):
    this = request.ductus.resource
    try:
        that = get_resource_database().get_resource_object(request.GET["diff"])
    except KeyError:
        # This could mean there is no "diff" in the query string, or that the
        # resource object doesn't exist.  Let's first try to diff against one
        # of the parents, and fail out if that doesn't work.
        if this.common.parents.array:
            that = this.common.parents.array[0].get()
        else:
            return query_string_not_found(request)

    return render_to_response("wiki/diff.html", {
        'diff': Diff(this, that),
    }, RequestContext(request))
Exemple #22
0
    def view_xml_as_html(request):
        """Displays HTML-formatted XML representation of resource.
        """

        urn = request.ductus.resource.urn
        xml = ''.join(get_resource_database().get_xml(urn))

        lexer = pygments.lexers.XmlLexer()
        formatter = pygments.formatters.HtmlFormatter()
        html = allow_line_wrap(pygments.highlight(xml, lexer, formatter))
        html = urn_linkify(html, query_string='view=xml_as_html')
        css = formatter.get_style_defs('.highlight')

        return render_to_response('wiki/xml_display.html',
                                  {'html': mark_safe(html),
                                   'css': mark_safe(css)},
                                  context_instance=RequestContext(request))
Exemple #23
0
def get_joined_audio_mediacache_url(resource, audio_urn_list, mime_type):
    """Returns relative urls, meant to have the mediacache prefix prepended"""
    assert mime_type in ('audio/webm', 'audio/mp4')

    if not audio_urn_list:
        return None

    first_audio_resource = get_resource_database().get_resource_object(
        audio_urn_list[0])

    if len(audio_urn_list) == 1:
        return resolve_relative_mediacache_url(first_audio_resource)
    else:
        urn_list_hash = hashlib.sha1(' '.join(audio_urn_list)).hexdigest()
        return resolve_relative_mediacache_url(resource, mime_type,
                                               urn_list_hash,
                                               first_audio_resource.blob.href)
Exemple #24
0
    def clean(self, data, initial=None):
        rv = super(PictureFileField, self).clean(data, initial)
        if data is None:
            # this happens with flickr pictures, see https://code.ductus.us/ticket/187
            return rv

        # make sure the blob is small enough to fit in the ResourceDatabase
        # without raising SizeTooLargeError
        max_blob_size = get_resource_database().max_blob_size
        if data.size > max_blob_size:
            raise forms.ValidationError(self.error_messages['file_too_large'] %
                                        max_blob_size)

        filename_requires_cleanup = False

        try:
            if hasattr(data, 'temporary_file_path'):
                filename = data.temporary_file_path()
            else:
                fd, filename = mkstemp()
                filename_requires_cleanup = True
                f = os.fdopen(fd, 'wb')
                try:
                    for chunk in data.chunks():
                        f.write(chunk)
                finally:
                    f.close()

            from magic import Magic
            mime_type = Magic(mime=True).from_file(filename)
            try:
                logger.debug("Mime type detected: %s", mime_type)
            except KeyError:
                raise forms.ValidationError(
                    self.error_messages['unrecognized_file_type'])

            #TODO: double check the file type, like we do for audio files
            rv.ductus_mime_type = mime_type
            return rv

        finally:
            if filename_requires_cleanup:
                os.remove(filename)
Exemple #25
0
def mediacache_cat_audio(first_blob_urn, audio_urn_list, mime_type):
    if len(audio_urn_list) < 2:
        # there's no reason to do concatenation...
        # get_joined_audio_mediacache_url() points to the original (single)
        # file anyway
        return None

    resource_database = get_resource_database()
    audio_resources = [resource_database.get_resource_object(urn)
                       for urn in audio_urn_list]
    if audio_resources[0].blob.href != first_blob_urn:
        return None

    if mime_type == 'audio/webm':
        return _cat_webm(audio_resources)
    elif mime_type == 'audio/mp4':
        return _cat_m4a(audio_resources)
    else:
        raise Exception("attempting to concatenate unsupported format: %s" % mime_type)
Exemple #26
0
    def save_blueprint(cls, blueprint, save_context):
        """`blueprint` is a json object. Returns a URN"""
        # fixme: make sure the end result is compatible with the class.  this
        # might actually be easy if we just make sure the @constructor will
        # make a class we want, but this would eliminate our ability to make a
        # @constructor that outputs a resource of some type that is unknown
        # before its construction

        resource_database = get_resource_database()

        blueprint_expects_dict(blueprint)

        if 'href' in blueprint:
            href = blueprint['href']
            blueprint_expects_string(href)
            # we ensure it exists and is not a blob, then return the urn
            resource_database.get_xml(href)
            return href

        try:
            resource_blueprint = blueprint['resource']
        except KeyError:
            raise BlueprintError("blueprint needs either `href` or `resource`", blueprint)
        blueprint_expects_dict(resource_blueprint)
        resource_blueprint = dict(resource_blueprint) # copy it so we can modify

        if '@patch' in resource_blueprint:
            original_urn = resource_blueprint.pop('@patch')
            resource = resource_database.get_resource_object(original_urn).clone()
        elif '@create' in resource_blueprint:
            fqn = resource_blueprint.pop('@create')
            try:
                resource_class = _registered_ductmodels[fqn]
            except KeyError:
                raise BlueprintError("invalid argument to `@create`", resource_blueprint)
            if not issubclass(resource_class, cls):
                raise BlueprintError("resource is not of an acceptable model type", resource_blueprint)
            resource = resource_class()
        else:
            raise BlueprintError("resource blueprint must contain '@patch' or '@create'", resource_blueprint)

        resource.patch_from_blueprint(resource_blueprint, save_context)
        return resource.save()
Exemple #27
0
def edit_textwiki(request):

    resource_database = get_resource_database()

    if request.method == 'POST':
        handle_blueprint_post(request, Wikitext)

    resource = None
    if hasattr(request, 'ductus') and getattr(request.ductus, 'resource', None):
        resource = request.ductus.resource
        # handle old creole content: make it look like ductus-html5 so we can edit it
        # content is not saved to creole anymore, only to ductus-html5
        if resource.blob.markup_language == 'creole-1.0':
            resource.blob.markup_language = 'ductus-html5'
            resource.text = creole(resource.text, resource.blob.natural_language)

    return render_to_response('textwiki/edit_wiki.html', {
        'resource_json': resource,
        'writable_directories': get_writable_directories_for_user(request.user),
    }, context_instance=RequestContext(request))
Exemple #28
0
    def clean(self, data, initial=None):
        rv = super(PictureFileField, self).clean(data, initial)
        if data is None:
            # this happens with flickr pictures, see https://code.ductus.us/ticket/187
            return rv

        # make sure the blob is small enough to fit in the ResourceDatabase
        # without raising SizeTooLargeError
        max_blob_size = get_resource_database().max_blob_size
        if data.size > max_blob_size:
            raise forms.ValidationError(self.error_messages['file_too_large'] % max_blob_size)

        filename_requires_cleanup = False

        try:
            if hasattr(data, 'temporary_file_path'):
                filename = data.temporary_file_path()
            else:
                fd, filename = mkstemp()
                filename_requires_cleanup = True
                f = os.fdopen(fd, 'wb')
                try:
                    for chunk in data.chunks():
                        f.write(chunk)
                finally:
                    f.close()

            from magic import Magic
            mime_type = Magic(mime=True).from_file(filename)
            try:
                logger.debug("Mime type detected: %s", mime_type)
            except KeyError:
                raise forms.ValidationError(self.error_messages['unrecognized_file_type'])

            #TODO: double check the file type, like we do for audio files
            rv.ductus_mime_type = mime_type
            return rv

        finally:
            if filename_requires_cleanup:
                os.remove(filename)
Exemple #29
0
def mediacache_cat_audio(first_blob_urn, audio_urn_list, mime_type):
    if len(audio_urn_list) < 2:
        # there's no reason to do concatenation...
        # get_joined_audio_mediacache_url() points to the original (single)
        # file anyway
        return None

    resource_database = get_resource_database()
    audio_resources = [
        resource_database.get_resource_object(urn) for urn in audio_urn_list
    ]
    if audio_resources[0].blob.href != first_blob_urn:
        return None

    if mime_type == 'audio/webm':
        return _cat_webm(audio_resources)
    elif mime_type == 'audio/mp4':
        return _cat_m4a(audio_resources)
    else:
        raise Exception("attempting to concatenate unsupported format: %s" %
                        mime_type)
Exemple #30
0
    def validate(self):
        from ductus.resource import get_resource_database
        from ductus.resource.ductmodels import DuctModelMismatchError
        resource_database = get_resource_database()

        for r in self._re_objects:
            match = r.match(self.uri)
            if match is not None:
                hash_type, digest = match.group(1), match.group(2)
                urn = "urn:%s:%s" % (hash_type, digest)
                if urn not in resource_database:
                    raise forms.ValidationError(_(u"This urn cannot be found on the server you are currently accessing."))
                try:
                    Picture.load(urn)
                except DuctModelMismatchError:
                    raise forms.ValidationError(_(u"This urn represents content that is not a picture."))
                # fixme: handle exception raised by get_resource_object if it's
                # actually a blob
                self.urn = urn
                return

        # this should never be reached
        assert self.handles(self.uri)
Exemple #31
0
def view_jsonp(request):
    urn = request.ductus.resource.urn
    resource = get_resource_database().get_resource_object(urn)
    json_text = json.dumps(resource.output_json_dict())
    jsonp_text = "callback({});".format(json_text)
    return HttpResponse(jsonp_text, content_type='application/javascript; charset=utf-8')
Exemple #32
0
 def validate(self, strict=True):
     super(ResourceElement, self).validate(strict)
     if strict and self.href:
         resource = get_resource_database().get_resource_object(self.href)
         self.__check_type(resource)
Exemple #33
0
 def store(self, iterable):
     self.href = get_resource_database().store_blob(iterable)
Exemple #34
0
 def __iter__(self):
     if self.href:
         return get_resource_database().get_blob(self.href)
     else:
         return ()
Exemple #35
0
 def load(cls, urn):
     resource = get_resource_database().get_resource_object(urn)
     if type(resource) != cls:
         raise DuctModelMismatchError("Expecting %s, got %s" % (cls, type(resource)))
     return resource
Exemple #36
0
    def handle_noargs(self, **options):
        from ductus.index import get_indexing_mongo_database
        indexing_db = get_indexing_mongo_database()
        if indexing_db is None:
            raise Exception
        collection = indexing_db.urn_index

        def perform_upsert(urn, obj, ignore=None):
            # REMEMBER that dictionary order matters in mongodb; we just ignore
            # it

            # fixme: first inspect element to see if things might already be
            # right.  also check to make sure there aren't any unexpected
            # attributes on the toplevel element.  and do the same thing for
            # blobs too.

            obj = dict(obj)
            obj["urn"] = urn
            collection.update({"urn": urn}, obj, upsert=True, safe=True)
            verified_urns.add(urn)

        logging.basicConfig(level=logging.INFO)  # FIXME

        # create the mongodb indexes
        collection.ensure_index("urn", unique=True, drop_dups=True)
        collection.ensure_index("parents", sparse=True)
        collection.ensure_index("tags", sparse=True)
        collection.ensure_index("links")
        collection.ensure_index("recursive_links")

        # Begin actual code

        from lxml import etree

        from ductus.resource import get_resource_database, UnexpectedHeader, hash_name
        from ductus.wiki.models import WikiPage

        resource_database = get_resource_database()

        verified_urns = set()
        current_wikipages_map = {}

        operations = {None: 0}

        def verify(urn):
            """Updates a urn's indexing info and returns the set of its recursive links
            """
            operations[None] += 1
            logger.info("operation %d: processing %s", operations[None], urn)

            if urn in verified_urns:
                q = collection.find_one({"urn": urn}, {"recursive_links": 1})
                try:
                    return set(q["recursive_links"])
                except KeyError:
                    return set()

            try:
                tree = resource_database.get_xml_tree(urn)
            except UnexpectedHeader:
                # it must be a blob
                perform_upsert(urn, {"fqn": None})
                return set()

            links = set()
            for event, element in etree.iterwalk(tree):
                if '{http://www.w3.org/1999/xlink}href' in element.attrib and element.getparent(
                ).tag != '{http://ductus.us/ns/2009/ductus}parents':
                    link = element.attrib['{http://www.w3.org/1999/xlink}href']
                    if link.startswith('urn:%s:' % hash_name):
                        links.add(link)

            recursive_links = set(links)
            for link in links:
                additional_links = verify(link)
                recursive_links.update(additional_links)

            resource = resource_database.get_resource_object(urn)

            assert resource.fqn is not None
            obj = {
                "fqn": resource.fqn,
                "links": list(links),
                "recursive_links": sorted(recursive_links),
                "current_wikipages": sorted(current_wikipages_map.get(urn,
                                                                      ())),
            }
            try:
                obj["parents"] = sorted(
                    [parent.href for parent in resource.common.parents])
                obj["tags"] = sorted([tag.value for tag in resource.tags])
            except AttributeError:
                pass
            perform_upsert(urn, obj)

            return recursive_links

        for wikipage in WikiPage.objects.all():
            revision = wikipage.get_latest_revision()
            if revision is not None and revision.urn:
                urn = 'urn:' + revision.urn
                current_wikipages_map.setdefault(urn, set()).add(wikipage.name)

        n_attempted = n_successful = 0
        for key in resource_database:
            n_attempted += 1
            try:
                verify(key)
            except Exception:
                logger.warning("Key failed: %s", key)
            else:
                n_successful += 1

        logger.info("Successfully processed %d of %d keys", n_successful,
                    n_attempted)
#!/usr/bin/env python

from lxml import etree

import ductus.initialize
from ductus.resource import get_resource_database, UnexpectedHeader
from ductus.wiki.models import WikiRevision

# This script loads everything into memory, which works for our purposes but
# wouldn't if there were a higher number of objects in the database

HREF_FQN = '{http://www.w3.org/1999/xlink}href'
DEFAULT_LICENSE = 'http://creativecommons.org/licenses/by-sa/3.0/'

rdb = get_resource_database()

urn_update_map = {}

def update_object(urn, default_author):
    if urn in urn_update_map:
        return urn_update_map[urn]
    new_urn = _update_object(urn, default_author)
    urn_update_map[urn] = new_urn
    print new_urn
    return new_urn

def _update_object(urn, default_author):
    try:
        tree = rdb.get_xml_tree(urn)
    except UnexpectedHeader:
        # must be a blob
Exemple #38
0
    def handle_noargs(self, **options):
        from ductus.index import get_indexing_mongo_database
        indexing_db = get_indexing_mongo_database()
        if indexing_db is None:
            raise Exception
        collection = indexing_db.urn_index

        def perform_upsert(urn, obj, ignore=None):
            # REMEMBER that dictionary order matters in mongodb; we just ignore
            # it

            # fixme: first inspect element to see if things might already be
            # right.  also check to make sure there aren't any unexpected
            # attributes on the toplevel element.  and do the same thing for
            # blobs too.

            obj = dict(obj)
            obj["urn"] = urn
            collection.update({"urn": urn}, obj, upsert=True, safe=True)
            verified_urns.add(urn)

        logging.basicConfig(level=logging.INFO) # FIXME

        # create the mongodb indexes
        collection.ensure_index("urn", unique=True, drop_dups=True)
        collection.ensure_index("parents", sparse=True)
        collection.ensure_index("tags", sparse=True)
        collection.ensure_index("links")
        collection.ensure_index("recursive_links")

        # Begin actual code

        from lxml import etree

        from ductus.resource import get_resource_database, UnexpectedHeader, hash_name
        from ductus.wiki.models import WikiPage

        resource_database = get_resource_database()

        verified_urns = set()
        current_wikipages_map = {}

        operations = {None: 0}

        def verify(urn):
            """Updates a urn's indexing info and returns the set of its recursive links
            """
            operations[None] += 1
            logger.info("operation %d: processing %s", operations[None], urn)

            if urn in verified_urns:
                q = collection.find_one({"urn": urn}, {"recursive_links": 1})
                try:
                    return set(q["recursive_links"])
                except KeyError:
                    return set()

            try:
                tree = resource_database.get_xml_tree(urn)
            except UnexpectedHeader:
                # it must be a blob
                perform_upsert(urn, {"fqn": None})
                return set()

            links = set()
            for event, element in etree.iterwalk(tree):
                if '{http://www.w3.org/1999/xlink}href' in element.attrib and element.getparent().tag != '{http://ductus.us/ns/2009/ductus}parents':
                    link = element.attrib['{http://www.w3.org/1999/xlink}href']
                    if link.startswith('urn:%s:' % hash_name):
                        links.add(link)

            recursive_links = set(links)
            for link in links:
                additional_links = verify(link)
                recursive_links.update(additional_links)

            resource = resource_database.get_resource_object(urn)

            assert resource.fqn is not None
            obj = {
                "fqn": resource.fqn,
                "links": list(links),
                "recursive_links": sorted(recursive_links),
                "current_wikipages": sorted(current_wikipages_map.get(urn, ())),
            }
            try:
                obj["parents"] = sorted([parent.href for parent in resource.common.parents])
                obj["tags"] = sorted([tag.value for tag in resource.tags])
            except AttributeError:
                pass
            perform_upsert(urn, obj)

            return recursive_links

        for wikipage in WikiPage.objects.all():
            revision = wikipage.get_latest_revision()
            if revision is not None and revision.urn:
                urn = 'urn:' + revision.urn
                current_wikipages_map.setdefault(urn, set()).add(wikipage.name)

        n_attempted = n_successful = 0
        for key in resource_database:
            n_attempted += 1
            try:
                verify(key)
            except Exception:
                logger.warning("Key failed: %s", key)
            else:
                n_successful += 1

        logger.info("Successfully processed %d of %d keys", n_successful, n_attempted)
Exemple #39
0
 def view_json(request):
     urn = request.ductus.resource.urn
     resource = get_resource_database().get_resource_object(urn)
     json_text = json.dumps(resource.output_json_dict())
     return HttpResponse(json_text, content_type='text/plain; charset=utf-8')