Example #1
0
def download_info(request, datafile_id, format): #@ReservedAssignment
    # Get datafile (and return 404 if absent)
    try:
        datafile = Dataset_File.objects.get(pk=datafile_id)
    except Dataset_File.DoesNotExist:
        return HttpResponseNotFound()
    # Check users has access to datafile
    if not has_datafile_download_access(request=request,
                                        dataset_file_id=datafile.id):
        return HttpResponseNotFound()

    file_obj = datafile.get_file()
    if file_obj == None:
        return HttpResponseNotFound()
    from contextlib import closing
    with closing(file_obj) as f:
        with Image(file=f) as img:
            data = {'identifier': datafile.id,
                    'height': img.height,
                    'width':  img.width }

    if format == 'xml':
        info = Element('info', nsmap=NSMAP)
        identifier = SubElement(info, 'identifier')
        identifier.text = datafile_id
        height = SubElement(info, 'height')
        height.text = str(data['height'])
        width = SubElement(info, 'width')
        width.text = str(data['width'])
        return HttpResponse(etree.tostring(info, method='xml'),
                            mimetype="application/xml")
    if format == 'json':
        return HttpResponse(json.dumps(data), mimetype="application/json")
    return HttpResponseNotFound()
Example #2
0
    def download_file(self, request, **kwargs):
        '''
        curl needs the -J switch to get the filename right
        auth needs to be added manually here
        '''
        self.method_check(request, allowed=['get'])
        self.is_authenticated(request)
        self.throttle_check(request)

        if not has_datafile_download_access(
                request=request, datafile_id=kwargs['pk']):
            return HttpResponseForbidden()

        file_record = self._meta.queryset.get(pk=kwargs['pk'])
        self.authorized_read_detail(
            [file_record],
            self.build_bundle(obj=file_record, request=request))
        file_object = file_record.get_file()
        wrapper = FileWrapper(file_object)
        tracker_data = dict(
            label='file',
            session_id=request.COOKIES.get('_ga'),
            ip=request.META.get('REMOTE_ADDR', ''),
            user=request.user,
            total_size=file_record.size,
            num_files=1,
            ua=request.META.get('HTTP_USER_AGENT', None))
        response = StreamingHttpResponse(
            IteratorTracker(wrapper, tracker_data),
            content_type=file_record.mimetype)
        response['Content-Length'] = file_record.size
        response['Content-Disposition'] = 'attachment; filename="%s"' % \
                                          file_record.filename
        self.log_throttled_access(request)
        return response
Example #3
0
    def download_file(self, request, **kwargs):
        '''
        curl needs the -J switch to get the filename right
        auth needs to be added manually here
        '''
        self.method_check(request, allowed=['get'])
        self.is_authenticated(request)
        self.throttle_check(request)

        if not has_datafile_download_access(request=request,
                                            datafile_id=kwargs['pk']):
            return HttpResponseForbidden()

        file_record = self._meta.queryset.get(pk=kwargs['pk'])
        self.authorized_read_detail([file_record],
                                    self.build_bundle(obj=file_record,
                                                      request=request))
        file_object = file_record.get_file()
        wrapper = FileWrapper(file_object)
        response = StreamingHttpResponse(wrapper,
                                         content_type=file_record.mimetype)
        response['Content-Length'] = file_record.size
        response['Content-Disposition'] = 'attachment; filename="%s"' % \
                                          file_record.filename
        self.log_throttled_access(request)
        return response
Example #4
0
    def dfo_is_online(self, request, **kwargs):
        '''
        Return the online status of a DataFileObject stored in a
        Hierarchical Storage Management (HSM) system
        '''
        from .exceptions import HsmException

        self.method_check(request, allowed=['get'])
        self.is_authenticated(request)
        self.throttle_check(request)

        dfo = DataFileObject.objects.get(id=kwargs['pk'])
        if not has_datafile_download_access(
                request=request, datafile_id=dfo.datafile.id):
            return HttpResponseForbidden()

        self.authorized_read_detail(
            [dfo.datafile],
            self.build_bundle(obj=dfo.datafile, request=request))

        try:
            online_status = dfo_online(dfo)
            return JsonResponse({'online': online_status})
        except HsmException as err:
            return JsonResponse(
                {'error_message': "%s: %s" % (type(err), str(err))},
                status=HttpResponseServerError.status_code)

        return HttpResponseServerError()
Example #5
0
    def recall_dfo(self, request, **kwargs):
        '''
        Recall archived DataFileObject from HSM system
        '''
        from .exceptions import HsmException

        self.method_check(request, allowed=['get'])
        self.is_authenticated(request)
        self.throttle_check(request)

        dfo = DataFileObject.objects.get(id=kwargs['pk'])
        if not has_datafile_download_access(
                request=request, datafile_id=dfo.datafile.id):
            return HttpResponseForbidden()

        self.authorized_read_detail(
            [dfo.datafile],
            self.build_bundle(obj=dfo.datafile, request=request))

        try:
            dfo_recall.apply_async(
                args=[dfo.id, request.user.id],
                priority=dfo.priority)
        except HsmException as err:
            # We would only see an exception here if CELERY_ALWAYS_EAGER is
            # True, making the task run synchronously
            return JsonResponse(
                {'error_message': "%s: %s" % (type(err), str(err))},
                status=HttpResponseServerError.status_code)

        return JsonResponse({
            "message": "Recall requested for DFO %s" % dfo.id
        })
Example #6
0
def _create_download_response(request, datafile_id, disposition='attachment'):  # too complex # noqa
    # Get datafile (and return 404 if absent)
    try:
        datafile = DataFile.objects.get(pk=datafile_id)
    except DataFile.DoesNotExist:
        return return_response_not_found(request)
    # Check users has access to datafile
    if not has_datafile_download_access(request=request,
                                        datafile_id=datafile.id):
        return return_response_error(request)
    # Send an image that can be seen in the browser
    if disposition == 'inline' and datafile.is_image():
        from tardis.tardis_portal.iiif import download_image
        args = (request, datafile.id, 'full', 'full', '0', 'native')
        # Send unconverted image if web-compatible
        if datafile.get_mimetype() in ('image/gif', 'image/jpeg', 'image/png'):
            return download_image(*args)
        # Send converted image
        return download_image(*args, format='png')
    # Send local file
    try:
        verified_only = True
        # Query parameter to allow download of unverified files
        ignore_verif = request.GET.get('ignore_verification_status', '0')
        # Ensure ignore_verification_status=0 etc works as expected
        # a bare ?ignore_verification_status is True
        if ignore_verif.lower() in [u'', u'1', u'true']:
            verified_only = False

        # Get file object for datafile
        file_obj = datafile.get_file(verified_only=verified_only)
        if not file_obj:
            # If file path doesn't resolve, return not found
            if verified_only:
                return render_error_message(request,
                                            "File is unverified, "
                                            "please try again later.",
                                            status=503)
            else:
                return return_response_not_found(request)
        wrapper = FileWrapper(file_obj, blksize=65535)
        response = StreamingHttpResponse(wrapper,
                                         content_type=datafile.get_mimetype())
        response['Content-Disposition'] = \
            '%s; filename="%s"' % (disposition, datafile.filename)
        return response
    except IOError:
        # If we can't read the file, return not found
        return return_response_not_found(request)
    except ValueError:  # raised when replica not verified TODO: custom excptn
        redirect = request.META.get('HTTP_REFERER',
                                    'http://%s/' %
                                    request.META.get('HTTP_HOST'))
        message = """The file you are trying to access has not yet been
                     verified. Verification is an automated background process.
                     Please try again later or contact the system
                     administrator if the issue persists."""
        message = ' '.join(message.split())  # removes spaces
        redirect = redirect + '#error:' + message
        return HttpResponseRedirect(redirect)
Example #7
0
def download_info(request, datafile_id, format): #@ReservedAssignment
    # Get datafile (and return 404 if absent)
    try:
        datafile = DataFile.objects.get(pk=datafile_id)
    except DataFile.DoesNotExist:
        return HttpResponseNotFound()
    # Check users has access to datafile
    if not has_datafile_download_access(request=request,
                                        datafile_id=datafile.id):
        return HttpResponseNotFound()

    file_obj = datafile.get_file()
    if file_obj is None:
        return HttpResponseNotFound()
    from contextlib import closing
    with closing(file_obj) as f:
        with Image(file=f) as img:
            data = {'identifier': datafile.id,
                    'height': img.height,
                    'width':  img.width }

    if format == 'xml':
        info = Element('info', nsmap=NSMAP)
        identifier = SubElement(info, 'identifier')
        identifier.text = datafile_id
        height = SubElement(info, 'height')
        height.text = str(data['height'])
        width = SubElement(info, 'width')
        width.text = str(data['width'])
        return HttpResponse(etree.tostring(info, method='xml'),
                            content_type="application/xml")
    if format == 'json':
        return HttpResponse(json.dumps(data), content_type="application/json")
    return HttpResponseNotFound()
Example #8
0
def _create_download_response(request, datafile_id, disposition='attachment'):  # too complex # noqa
    # Get datafile (and return 404 if absent)
    try:
        datafile = DataFile.objects.get(pk=datafile_id)
    except DataFile.DoesNotExist:
        return return_response_not_found(request)
    # Check users has access to datafile
    if not has_datafile_download_access(request=request,
                                        datafile_id=datafile.id):
        return return_response_error(request)
    # Send an image that can be seen in the browser
    if disposition == 'inline' and datafile.is_image():
        from tardis.tardis_portal.iiif import download_image
        args = (request, datafile.id, 'full', 'full', '0', 'native')
        # Send unconverted image if web-compatible
        if datafile.get_mimetype() in ('image/gif', 'image/jpeg', 'image/png'):
            return download_image(*args)
        # Send converted image
        return download_image(*args, format='png')
    # Send local file
    try:
        verified_only = True
        # Query parameter to allow download of unverified files
        ignore_verif = request.GET.get('ignore_verification_status', '0')
        # Ensure ignore_verification_status=0 etc works as expected
        # a bare ?ignore_verification_status is True
        if ignore_verif.lower() in [u'', u'1', u'true']:
            verified_only = False

        # Get file object for datafile
        file_obj = datafile.get_file(verified_only=verified_only)
        if not file_obj:
            # If file path doesn't resolve, return not found
            if verified_only:
                return render_error_message(request,
                                            "File is unverified, "
                                            "please try again later.",
                                            status=503)
            else:
                return return_response_not_found(request)
        wrapper = FileWrapper(file_obj, blksize=65535)
        response = StreamingHttpResponse(wrapper,
                                         content_type=datafile.get_mimetype())
        response['Content-Disposition'] = \
            '%s; filename="%s"' % (disposition, datafile.filename)
        return response
    except IOError:
        # If we can't read the file, return not found
        return return_response_not_found(request)
    except ValueError:  # raised when replica not verified TODO: custom excptn
        redirect = request.META.get('HTTP_REFERER',
                                    'http://%s/' %
                                    request.META.get('HTTP_HOST'))
        message = """The file you are trying to access has not yet been
                     verified. Verification is an automated background process.
                     Please try again later or contact the system
                     administrator if the issue persists."""
        message = ' '.join(message.split())  # removes spaces
        redirect = redirect + '#error:' + message
        return HttpResponseRedirect(redirect)
Example #9
0
    def download_file(self, request, **kwargs):
        '''
        curl needs the -J switch to get the filename right
        auth needs to be added manually here
        '''
        self.method_check(request, allowed=['get'])
        self.is_authenticated(request)
        self.throttle_check(request)

        if not has_datafile_download_access(
                request=request, datafile_id=kwargs['pk']):
            return HttpResponseForbidden()

        file_record = self._meta.queryset.get(pk=kwargs['pk'])
        self.authorized_read_detail(
            [file_record],
            self.build_bundle(obj=file_record, request=request))
        file_object = file_record.get_file()
        wrapper = FileWrapper(file_object)
        response = StreamingHttpResponse(
            wrapper, content_type=file_record.mimetype)
        response['Content-Length'] = file_record.size
        response['Content-Disposition'] = 'attachment; filename="%s"' % \
                                          file_record.filename
        self.log_throttled_access(request)
        return response
Example #10
0
def compute_etag(request, datafile_id, *args, **kwargs):
    try:
        datafile = Dataset_File.objects.get(pk=datafile_id)
    except Dataset_File.DoesNotExist:
        return None
    if not has_datafile_download_access(request=request,
                                        dataset_file_id=datafile.id):
        return None
    # OK, we can compute the Etag without giving anything away now
    signature = datafile.sha512sum + json.dumps((args, kwargs))
    import hashlib
    return hashlib.sha1(signature).hexdigest()
Example #11
0
def compute_etag(request, datafile_id, *args, **kwargs):
    try:
        datafile = Dataset_File.objects.get(pk=datafile_id)
    except Dataset_File.DoesNotExist:
        return None
    if not has_datafile_download_access(request=request,
                                        dataset_file_id=datafile.id):
        return None
    # OK, we can compute the Etag without giving anything away now
    signature = datafile.sha512sum + json.dumps((args, kwargs))
    import hashlib
    return hashlib.sha1(signature).hexdigest()
Example #12
0
def compute_etag(request, datafile_id, *args, **kwargs):
    try:
        datafile = DataFile.objects.get(pk=datafile_id)
    except DataFile.DoesNotExist:
        return None
    if not has_datafile_download_access(request=request,
                                        datafile_id=datafile.id):
        return None
    # OK, we can compute the Etag without giving anything away now
    # Calculating SHA-512 sums is now optional, so use MD5 sums
    # if SHA-512 sums are unavailable:
    checksum = datafile.sha512sum or datafile.md5sum
    signature = checksum + json.dumps((args, kwargs))
    import hashlib
    return hashlib.sha1(signature).hexdigest()
Example #13
0
def compute_etag(request, datafile_id, *args, **kwargs):
    try:
        datafile = DataFile.objects.get(pk=datafile_id)
    except DataFile.DoesNotExist:
        return None
    if not has_datafile_download_access(request=request,
                                        datafile_id=datafile.id):
        return None
    # OK, we can compute the Etag without giving anything away now
    # Calculating SHA-512 sums is now optional, so use MD5 sums
    # if SHA-512 sums are unavailable:
    checksum = datafile.sha512sum or datafile.md5sum
    signature = checksum + json.dumps((args, kwargs))
    import hashlib
    return hashlib.sha1(signature).hexdigest()
Example #14
0
def _create_download_response(request, datafile_id, disposition='attachment'):
    #import ipdb; ipdb.set_trace()
    # Get datafile (and return 404 if absent)
    try:
        datafile = Dataset_File.objects.get(pk=datafile_id)
    except Dataset_File.DoesNotExist:
        return return_response_not_found(request)
    # Check users has access to datafile
    if not has_datafile_download_access(request=request,
                                        dataset_file_id=datafile.id):
        return return_response_error(request)
    # Send an image that can be seen in the browser
    if disposition == 'inline' and datafile.is_image():
        from tardis.tardis_portal.iiif import download_image
        args = (request, datafile.id, 'full', 'full', '0', 'native')
        # Send unconverted image if web-compatible
        if datafile.get_mimetype() in ('image/gif', 'image/jpeg', 'image/png'):
            return download_image(*args)
        # Send converted image
        return download_image(*args, format='png')
    # Send local file
    try:
        # Get file object for datafile
        file_obj = datafile.get_file()
        if not file_obj:
            # If file path doesn't resolve, return not found
            return return_response_not_found(request)
        wrapper = FileWrapper(file_obj, blksize=65535)
        response = StreamingHttpResponse(wrapper,
                                         mimetype=datafile.get_mimetype())
        response['Content-Disposition'] = \
            '%s; filename="%s"' % (disposition, datafile.filename)
        return response
    except IOError:
        # If we can't read the file, return not found
        return return_response_not_found(request)
    except ValueError:  # raised when replica not verified TODO: custom excptn
        redirect = request.META.get('HTTP_REFERER',
                                    'http://%s/' %
                                    request.META.get('HTTP_HOST'))
        message = """The file you are trying to access has not yet been
                     verified. Verification is an automated background process.
                     Please try again later or contact the system
                     administrator if the issue persists."""
        message = ' '.join(message.split())  # removes spaces
        redirect = redirect + '#error:' + message
        return HttpResponseRedirect(redirect)
Example #15
0
    def verify_file(self, request, **kwargs):
        '''triggers verification of file, e.g. after non-POST upload complete
        '''
        self.method_check(request, allowed=['get'])
        self.is_authenticated(request)
        self.throttle_check(request)

        if not has_datafile_download_access(request=request,
                                            datafile_id=kwargs['pk']):
            return HttpResponseForbidden()

        file_record = self._meta.queryset.get(pk=kwargs['pk'])
        self.authorized_read_detail([file_record],
                                    self.build_bundle(obj=file_record,
                                                      request=request))
        for dfo in file_record.file_objects.all():
            tasks.dfo_verify.delay(dfo.id)
        return HttpResponse()
Example #16
0
    def verify_file(self, request, **kwargs):
        '''triggers verification of file, e.g. after non-POST upload complete
        '''
        self.method_check(request, allowed=['get'])
        self.is_authenticated(request)
        self.throttle_check(request)

        if not has_datafile_download_access(
                request=request, datafile_id=kwargs['pk']):
            return HttpResponseForbidden()

        file_record = self._meta.queryset.get(pk=kwargs['pk'])
        self.authorized_read_detail(
            [file_record],
            self.build_bundle(obj=file_record, request=request))
        for dfo in file_record.file_objects.all():
            tasks.dfo_verify.delay(dfo.id)
        return HttpResponse()
Example #17
0
    def download_dfo(self, request, **kwargs):
        '''
        Download DataFileObject from S3 Object Store
        '''
        self.method_check(request, allowed=['get'])
        self.is_authenticated(request)
        self.throttle_check(request)

        dfo = DataFileObject.objects.get(id=kwargs['pk'])
        if not has_datafile_download_access(request=request,
                                            datafile_id=dfo.datafile.id):
            return HttpResponseForbidden()

        self.authorized_read_detail([dfo.datafile],
                                    self.build_bundle(obj=dfo.datafile,
                                                      request=request))

        s3_url = generate_presigned_url(dfo)

        # Redirect to Object Store:
        response = redirect(s3_url)
        response['Content-Disposition'] = \
            'attachment; filename="{}"'.format(dfo.datafile.filename)
        return response
Example #18
0
def download_datafiles(request):
    """
    takes string parameter "comptype" for compression method.
    Currently implemented: "zip" and "tar"
    The datafiles to be downloaded are selected using "datafile", "dataset"
    or "url" parameters.  An "expid" parameter may be supplied for use in
    the download archive name.  If "url" is used, the "expid" parameter
    is also used to limit the datafiles to be downloaded to a given experiment.
    """
    # Create the HttpResponse object with the appropriate headers.
    # TODO: handle no datafile, invalid filename, all http links
    # TODO: intelligent selection of temp file versus in-memory buffering.

    logger.error('In download_datafiles !!')
    comptype = "zip"
    organization = "classic"
    if 'comptype' in request.POST:
        comptype = request.POST['comptype']
    if 'organization' in request.POST:
        organization = request.POST['organization']

    if 'datafile' in request.POST or 'dataset' in request.POST:
        if (len(request.POST.getlist('datafile')) > 0
                or len(request.POST.getlist('dataset'))) > 0:

            datasets = request.POST.getlist('dataset')
            datafiles = request.POST.getlist('datafile')

            # Generator to produce datafiles from dataset id
            def get_dataset_datafiles(dsid):
                for datafile in Dataset_File.objects.filter(dataset=dsid):
                    if has_datafile_download_access(
                            request=request, dataset_file_id=datafile.id):
                        yield datafile

            # Generator to produce datafile from datafile id
            def get_datafile(dfid):
                datafile = Dataset_File.objects.get(pk=dfid)
                if has_datafile_download_access(request=request,
                                                dataset_file_id=datafile.id):
                    yield datafile

            # Take chained generators and turn them into a set of datafiles
            df_set = set(chain(chain.from_iterable(map(get_dataset_datafiles,
                                                       datasets)),
                               chain.from_iterable(map(get_datafile,
                                                       datafiles))))
        else:
            return render_error_message(
                request,
                'No Datasets or Datafiles were selected for downloaded',
                status=404)

    elif 'url' in request.POST:
        if not len(request.POST.getlist('url')) == 0:
            return render_error_message(
                request,
                'No Datasets or Datafiles were selected for downloaded',
                status=404)

        for url in request.POST.getlist('url'):
            url = urllib.unquote(url)
            raw_path = url.partition('//')[2]
            experiment_id = request.POST['expid']
            datafile = Dataset_File.objects.filter(
                url__endswith=raw_path,
                dataset__experiment__id=experiment_id)[0]
            if has_datafile_download_access(request=request,
                                            dataset_file_id=datafile.id):
                df_set = set([datafile])
    else:
        return render_error_message(
            request, 'No Datasets or Datafiles were selected for downloaded',
            status=404)

    logger.info('Files for archive command: %s' % df_set)

    if len(df_set) == 0:
        return render_error_message(
            request,
            'You do not have download access for any of the '
            'selected Datasets or Datafiles ',
            status=403)

    rootdir = 'datasets'
    mapper = _make_mapper(organization, rootdir)
    if not mapper:
        return render_error_message(
            request, 'Unknown download organization: %s' % organization,
            status=400)
    msg = _check_download_limits(mapper, df_set, comptype)
    if msg:
        return render_error_message(
            request, 'Cannot download: %s' % msg, status=400)

    # Handle missing experiment ID - only need it for naming
    try:
        expid = request.POST['expid']
    except KeyError:
        expid = iter(df_set).next().dataset.get_first_experiment().id

    if comptype == "tar":
        reader = StreamingFile(_write_tar_func(mapper, df_set),
                               asynchronous_file_creation=True)
        response = StreamingHttpResponse(FileWrapper(reader),
                                mimetype='application/x-tar')
        response['Content-Disposition'] = \
            'attachment; filename="experiment%s-selection.tar"' % expid
    elif comptype == "zip":
        reader = StreamingFile(_write_zip_func(mapper, df_set),
                               asynchronous_file_creation=True)
        response = StreamingHttpResponse(FileWrapper(reader),
                                mimetype='application/zip')
        response['Content-Disposition'] = \
            'attachment; filename="experiment%s-selection.zip"' % expid
    else:
        response = render_error_message(
            request, 'Unsupported download format: %s' % comptype, status=404)
    return response
Example #19
0
 def get_dataset_datafiles(dsid):
     for datafile in DataFile.objects.filter(dataset=dsid):
         if has_datafile_download_access(
                 request=request, datafile_id=datafile.id):
             yield datafile
Example #20
0
 def get_datafile(dfid):
     datafile = DataFile.objects.get(pk=dfid)
     if has_datafile_download_access(request=request,
                                     datafile_id=datafile.id):
         yield datafile
Example #21
0
 def get_dataset_datafiles(dsid):
     for datafile in DataFile.objects.filter(dataset=dsid):
         if has_datafile_download_access(request=request,
                                         datafile_id=datafile.id):
             yield datafile
Example #22
0
def streaming_download_datafiles(request):  # too complex # noqa
    """
    takes string parameter "comptype" for compression method.
    Currently implemented: "tgz" and "tar"
    The datafiles to be downloaded are selected using "datafile", "dataset"
    or "url" parameters.  An "expid" parameter may be supplied for use in
    the download archive name.  If "url" is used, the "expid" parameter
    is also used to limit the datafiles to be downloaded to a given experiment.
    """
    # Create the HttpResponse object with the appropriate headers.
    # TODO: handle no datafile, invalid filename, all http links
    # TODO: intelligent selection of temp file versus in-memory buffering.
    logger.error('In download_datafiles !!')
    comptype = getattr(settings, 'DEFAULT_ARCHIVE_FORMATS', ['tar'])[0]
    organization = getattr(settings, 'DEFAULT_PATH_MAPPER', 'classic')
    if 'comptype' in request.POST:
        comptype = request.POST['comptype']
    if 'organization' in request.POST:
        organization = request.POST['organization']

    if 'datafile' in request.POST or 'dataset' in request.POST:
        if request.POST.getlist('datafile') or request.POST.getlist('dataset'):

            datasets = request.POST.getlist('dataset')
            datafiles = request.POST.getlist('datafile')

            # Generator to produce datafiles from dataset id
            def get_dataset_datafiles(dsid):
                for datafile in DataFile.objects.filter(dataset=dsid):
                    if has_datafile_download_access(request=request,
                                                    datafile_id=datafile.id):
                        yield datafile

            # Generator to produce datafile from datafile id
            def get_datafile(dfid):
                datafile = DataFile.objects.get(pk=dfid)
                if has_datafile_download_access(request=request,
                                                datafile_id=datafile.id):
                    yield datafile

            # Take chained generators and turn them into a set of datafiles
            df_set = set(
                chain(
                    chain.from_iterable(map(get_dataset_datafiles, datasets)),
                    chain.from_iterable(map(get_datafile, datafiles))))
        else:
            return render_error_message(
                request,
                'No Datasets or Datafiles were selected for downloaded',
                status=404)

    elif 'url' in request.POST:
        if not request.POST.getlist('url'):
            return render_error_message(
                request,
                'No Datasets or Datafiles were selected for downloaded',
                status=404)

        for url in request.POST.getlist('url'):
            url = urllib.unquote(url)
            raw_path = url.partition('//')[2]
            experiment_id = request.POST['expid']
            datafile = DataFile.objects.filter(
                url__endswith=raw_path,
                dataset__experiment__id=experiment_id)[0]
            if has_datafile_download_access(request=request,
                                            datafile_id=datafile.id):
                df_set = set([datafile])
    else:
        return render_error_message(
            request,
            'No Datasets or Datafiles were selected for downloaded',
            status=404)

    logger.info('Files for archive command: %s' % df_set)

    if not df_set:
        return render_error_message(
            request, 'You do not have download access for any of the '
            'selected Datasets or Datafiles ',
            status=403)

    try:
        expid = request.POST['expid']
        experiment = Experiment.objects.get(id=expid)
    except (KeyError, Experiment.DoesNotExist):
        experiment = iter(df_set).next().dataset.get_first_experiment()

    exp_title = get_filesystem_safe_experiment_name(experiment)
    filename = '%s-selection.tar' % exp_title
    rootdir = '%s-selection' % exp_title
    return _streaming_downloader(request, df_set, rootdir, filename, comptype,
                                 organization)
Example #23
0
def streaming_download_datafiles(request):  # too complex # noqa
    """
    takes string parameter "comptype" for compression method.
    Currently implemented: "tgz" and "tar"
    The datafiles to be downloaded are selected using "datafile", "dataset"
    or "url" parameters.  An "expid" parameter may be supplied for use in
    the download archive name.  If "url" is used, the "expid" parameter
    is also used to limit the datafiles to be downloaded to a given experiment.
    """
    # Create the HttpResponse object with the appropriate headers.
    # TODO: handle no datafile, invalid filename, all http links
    # TODO: intelligent selection of temp file versus in-memory buffering.
    logger.error('In download_datafiles !!')
    comptype = getattr(settings, 'DEFAULT_ARCHIVE_FORMATS', ['tar'])[0]
    organization = getattr(settings, 'DEFAULT_PATH_MAPPER', 'classic')
    if 'comptype' in request.POST:
        comptype = request.POST['comptype']
    if 'organization' in request.POST:
        organization = request.POST['organization']

    if 'datafile' in request.POST or 'dataset' in request.POST:
        if request.POST.getlist('datafile') or request.POST.getlist('dataset'):

            datasets = request.POST.getlist('dataset')
            datafiles = request.POST.getlist('datafile')

            # Generator to produce datafiles from dataset id
            def get_dataset_datafiles(dsid):
                for datafile in DataFile.objects.filter(dataset=dsid):
                    if has_datafile_download_access(
                            request=request, datafile_id=datafile.id):
                        yield datafile

            # Generator to produce datafile from datafile id
            def get_datafile(dfid):
                datafile = DataFile.objects.get(pk=dfid)
                if has_datafile_download_access(request=request,
                                                datafile_id=datafile.id):
                    yield datafile

            # Take chained generators and turn them into a set of datafiles
            df_set = set(chain(chain.from_iterable(map(get_dataset_datafiles,
                                                       datasets)),
                               chain.from_iterable(map(get_datafile,
                                                       datafiles))))
        else:
            return render_error_message(
                request,
                'No Datasets or Datafiles were selected for downloaded',
                status=404)

    elif 'url' in request.POST:
        if not request.POST.getlist('url'):
            return render_error_message(
                request,
                'No Datasets or Datafiles were selected for downloaded',
                status=404)

        for url in request.POST.getlist('url'):
            url = urllib.unquote(url)
            raw_path = url.partition('//')[2]
            experiment_id = request.POST['expid']
            datafile = DataFile.objects.filter(
                url__endswith=raw_path,
                dataset__experiment__id=experiment_id)[0]
            if has_datafile_download_access(request=request,
                                            datafile_id=datafile.id):
                df_set = set([datafile])
    else:
        return render_error_message(
            request, 'No Datasets or Datafiles were selected for downloaded',
            status=404)

    logger.info('Files for archive command: %s' % df_set)

    if not df_set:
        return render_error_message(
            request,
            'You do not have download access for any of the '
            'selected Datasets or Datafiles ',
            status=403)

    try:
        expid = request.POST['expid']
        experiment = Experiment.objects.get(id=expid)
    except (KeyError, Experiment.DoesNotExist):
        experiment = iter(df_set).next().dataset.get_first_experiment()

    exp_title = get_filesystem_safe_experiment_name(experiment)
    filename = '%s-selection.tar' % exp_title
    rootdir = '%s-selection' % exp_title
    return _streaming_downloader(request, df_set, rootdir, filename,
                                 comptype, organization)
Example #24
0
def download_image(request, datafile_id, region, size, rotation,
                   quality, format=None): #@ReservedAssignment
    # Get datafile (and return 404 if absent)
    try:
        datafile = Dataset_File.objects.get(pk=datafile_id)
    except Dataset_File.DoesNotExist:
        return HttpResponseNotFound()

    is_public = datafile.is_public()
    if not is_public:
        # Check users has access to datafile
        if not has_datafile_download_access(request=request,
                                            dataset_file_id=datafile.id):
            return HttpResponseNotFound()

    buf = StringIO()
    try:
        file_obj = datafile.get_image_data()
        if file_obj is None:
            return HttpResponseNotFound()
        from contextlib import closing
        with closing(file_obj) as f:
            with Image(file=f) as img:
                # Handle region
                if region != 'full':
                    x, y, w, h = map(lambda x: int(x), region.split(','))
                    img.crop(x, y, width=w, height=h)
                # Handle size
                if size != 'full':
                    # Check the image isn't empty
                    if 0 in (img.height, img.width):
                        return _bad_request('size', 'Cannot resize empty image')
                    # Attempt resize
                    if not _do_resize(img, size):
                        return _bad_request('size',
                                            'Invalid size argument: %s' % size)
                # Handle rotation
                if rotation:
                    img.rotate(float(rotation))
                # Handle quality (mostly by rejecting it)
                if not quality in ['native', 'color']:
                    return _get_iiif_error('quality',
                    'This server does not support greyscale or bitonal quality.')
                # Handle format
                if format:
                    mimetype = mimetypes.types_map['.%s' % format.lower()]
                    img.format = format
                    if not mimetype in ALLOWED_MIMETYPES:
                        return _invalid_media_response()
                else:
                    mimetype = datafile.get_mimetype()
                    # If the native format is not allowed, pretend it doesn't exist.
                    if not mimetype in ALLOWED_MIMETYPES:
                        return HttpResponseNotFound()
                img.save(file=buf)
                response = HttpResponse(buf.getvalue(), mimetype=mimetype)
                response['Content-Disposition'] = \
                    'inline; filename="%s.%s"' % (datafile.filename, format)
                # Set Cache
                if is_public:
                    patch_cache_control(response, public=True, max_age=MAX_AGE)
                else:
                    patch_cache_control(response, private=True, max_age=MAX_AGE)
                return response
    except MissingDelegateError:
        if format:
            return _invalid_media_response()
        return HttpResponseNotFound()
    except ValueError:
        return HttpResponseNotFound()
Example #25
0
def download_image(request,
                   datafile_id,
                   region,
                   size,
                   rotation,
                   quality,
                   format=None):  #@ReservedAssignment
    # Get datafile (and return 404 if absent)
    try:
        datafile = DataFile.objects.get(pk=datafile_id)
    except DataFile.DoesNotExist:
        return HttpResponseNotFound()

    is_public = datafile.is_public()
    if not is_public:
        # Check users has access to datafile
        if not has_datafile_download_access(request=request,
                                            datafile_id=datafile.id):
            return HttpResponseNotFound()

    buf = StringIO()
    try:
        file_obj = datafile.get_image_data()
        if file_obj is None:
            return HttpResponseNotFound()
        from contextlib import closing
        with closing(file_obj) as f:
            with Image(file=f) as img:
                if len(img.sequence) > 1:
                    img = Image(img.sequence[0])
                # Handle region
                if region != 'full':
                    x, y, w, h = map(int, region.split(','))
                    img.crop(x, y, width=w, height=h)
                # Handle size
                if size != 'full':
                    # Check the image isn't empty
                    if 0 in (img.height, img.width):
                        return _bad_request('size',
                                            'Cannot resize empty image')
                    # Attempt resize
                    if not _do_resize(img, size):
                        return _bad_request('size',
                                            'Invalid size argument: %s' % size)
                # Handle rotation
                if rotation:
                    img.rotate(float(rotation))
                # Handle quality (mostly by rejecting it)
                if quality not in ['native', 'color']:
                    return _get_iiif_error(
                        'quality',
                        'This server does not support greyscale or bitonal quality.'
                    )
                # Handle format
                if format:
                    mimetype = mimetypes.types_map['.%s' % format.lower()]
                    img.format = format
                    if mimetype not in ALLOWED_MIMETYPES:
                        return _invalid_media_response()
                else:
                    mimetype = datafile.get_mimetype()
                    # If the native format is not allowed, pretend it doesn't exist.
                    if mimetype not in ALLOWED_MIMETYPES:
                        return HttpResponseNotFound()
                img.save(file=buf)
                response = HttpResponse(buf.getvalue(), content_type=mimetype)
                response['Content-Disposition'] = \
                    'inline; filename="%s.%s"' % (datafile.filename, format)
                # Set Cache
                if is_public:
                    patch_cache_control(response, public=True, max_age=MAX_AGE)
                else:
                    patch_cache_control(response,
                                        private=True,
                                        max_age=MAX_AGE)
                return response
    except WandException:
        return HttpResponseNotFound()
    except ValueError:
        return HttpResponseNotFound()
    except IOError:
        return HttpResponseNotFound()
Example #26
0
 def get_datafile(dfid):
     datafile = DataFile.objects.get(pk=dfid)
     if has_datafile_download_access(request=request,
                                     datafile_id=datafile.id):
         yield datafile