Exemple #1
0
def iter_images_and_pages(images):
    """This function iterates over a images and also the contained pages. As
    OpenCV is not able to handle multipage TIFF files, we use the SDAPS internal
    loading method for those."""

    for filename in images:
        try:
            # Check whether this is a TIFF file (ie. try to retrieve the page count)
            pages = image.get_tiff_page_count(filename)
            is_tiff = True
        except AssertionError:
            pages = 1
            is_tiff = False

        for page in xrange(pages):
            if not is_tiff:
                img = cv2.imread(filename)

            else:
                # TIFF pages are zero based
                surf = image.get_rgb24_from_tiff(filename, page, False)

                width = surf.get_width()
                height = surf.get_height()
                stride = surf.get_stride()

                # We need to ensure a sane stride!
                np_width = stride / 4

                # This converts by doing a copy; first create target numpy array
                # We need a dummy alpha channel ...
                target = np.empty((height, np_width), dtype=np.uint32)

                tmp_surf = cairo.ImageSurface.create_for_data(
                    target.data, cairo.FORMAT_RGB24, width, height, stride)
                cr = cairo.Context(tmp_surf)
                cr.set_source_surface(surf)
                cr.paint()
                del cr
                tmp_surf.flush()
                del tmp_surf

                # Now, we need a bit of reshaping
                img = np.empty((height, width, 3), dtype=np.uint8)

                # order should be BGR
                img[:, :, 2] = 0xff & (target[:, :] >> 16)
                img[:, :, 1] = 0xff & (target[:, :] >> 8)
                img[:, :, 0] = 0xff & target[:, :]

            yield img, filename, page
Exemple #2
0
def iter_images_and_pages(images):
    """This function iterates over a images and also the contained pages. As
    OpenCV is not able to handle multipage TIFF files, we use the SDAPS internal
    loading method for those."""

    for filename in images:
        try:
            # Check whether this is a TIFF file (ie. try to retrieve the page count)
            pages = image.get_tiff_page_count(filename)
            is_tiff = True
        except AssertionError:
            pages = 1
            is_tiff = False

        for page in xrange(pages):
            if not is_tiff:
                img = cv2.imread(filename)

            else:
                # TIFF pages are zero based
                surf = image.get_rgb24_from_tiff(filename, page, False)

                width = surf.get_width()
                height = surf.get_height()
                stride = surf.get_stride()

                # We need to ensure a sane stride!
                np_width = stride / 4

                # This converts by doing a copy; first create target numpy array
                # We need a dummy alpha channel ...
                target = np.empty((height, np_width), dtype=np.uint32)

                tmp_surf = cairo.ImageSurface.create_for_data(target.data, cairo.FORMAT_RGB24, width, height, stride)
                cr = cairo.Context(tmp_surf)
                cr.set_source_surface(surf)
                cr.paint()
                del cr
                tmp_surf.flush()
                del tmp_surf

                # Now, we need a bit of reshaping
                img = np.empty((height, width, 3), dtype=np.uint8)

                # order should be BGR
                img[:,:,2] = 0xff & (target[:,:] >> 16)
                img[:,:,1] = 0xff & (target[:,:] >> 8)
                img[:,:,0] = 0xff & target[:,:]

            yield img, filename, page
Exemple #3
0
def survey_image(request, slug, filenum, page):
    # This function does not open the real SDAPS survey, as unpickling the data
    # is way to inefficient.
    survey = get_survey_or_404(request, slug, review=True)

    image_file = os.path.join(survey.path, "%s.tif" % (filenum,))

    if not os.path.exists(os.path.join(survey.path)):
        raise Http404

    surface = image.get_rgb24_from_tiff(image_file, int(page), False)
    if surface is None:
        raise Http404

    # Create PNG stream and return it
    response = HttpResponse(content_type='image/png')
    response['Cache-Control'] = 'private, max-age=3600'
    surface.write_to_png(response)

    return response
Exemple #4
0
def survey_image(request, slug, filenum, page):
    # This function does not open the real SDAPS survey, as unpickling the data
    # is way to inefficient.
    survey = get_survey_or_404(request, slug, review=True)

    image_file = os.path.join(survey.path, "%s.tif" % (filenum, ))

    if not os.path.exists(os.path.join(survey.path)):
        raise Http404

    surface = image.get_rgb24_from_tiff(image_file, int(page), False)
    if surface is None:
        raise Http404

    # Create PNG stream and return it
    response = HttpResponse(content_type='image/png')
    response['Cache-Control'] = 'private, max-age=3600'
    surface.write_to_png(response)

    return response
Exemple #5
0
def iter_images_and_pages(images):
    """This function iterates over a images and also the contained pages. As
    OpenCV is not able to handle multipage TIFF files, we use the SDAPS internal
    loading method for those."""

    for filename in images:
        pages = 1
        is_tiff = False
        is_pdf = False

        try:
            # Check whether this is a TIFF file (ie. try to retrieve the page count)
            pages = image.get_tiff_page_count(filename)
            is_tiff = True
        except AssertionError:
            pass

        if not is_tiff:
            try:
                gfile = Gio.File.new_for_path(filename)
                pdf_doc = Poppler.Document.new_from_gfile(gfile, None, None)
                pages = pdf_doc.get_n_pages()
                is_pdf = True
            except:
                # Either not PDF/damaged or poppler not installed properly
                pass


        for page in xrange(pages):
            if is_tiff:
                # TIFF pages are zero based
                surf = image.get_rgb24_from_tiff(filename, page, False)

                img = to_opencv(surf)

            elif is_pdf:
                # Try to retrieve a single fullpage image, if that fails, render
                # document at 300dpi.

                THRESH = 10 #pt

                pdfpage = pdf_doc.get_page(page)
                page_width, page_height = pdfpage.get_size()

                images = pdfpage.get_image_mapping()
                if len(images) == 1 and (
                        abs(images[0].area.x1) < THRESH and
                        abs(images[0].area.y1) < THRESH and
                        abs(images[0].area.x2 - page_width) < THRESH and
                        abs(images[0].area.y2 - page_height) < THRESH):
                    # Assume one full page image, and simply use that.
                    surf = pdfpage.get_image(images[0].image_id)

                else:
                    # Render page at 300dpi
                    surf = cairo.ImageSurface(cairo.FORMAT_RGB24, int(300 / 72 * page_width), int(300 / 72 * page_height))
                    cr = cairo.Context(surf)
                    cr.scale(300 / 72, 300 / 72)
                    cr.set_source_rgb(1, 1, 1)
                    cr.paint()

                    pdfpage.render_for_printing(cr)

                    del cr

                img = to_opencv(surf)

            else:
                img = cv2.imread(filename)

            yield img, filename, page
Exemple #6
0
def iter_images_and_pages(images):
    """This function iterates over a images and also the contained pages. As
    OpenCV is not able to handle multipage TIFF files, we use the SDAPS internal
    loading method for those."""

    for filename in images:
        pages = 1
        is_tiff = False
        is_pdf = False

        try:
            # Check whether this is a TIFF file (ie. try to retrieve the page count)
            pages = image.get_tiff_page_count(filename)
            is_tiff = True
        except AssertionError:
            pass

        if not is_tiff:
            try:
                gfile = Gio.File.new_for_path(filename)
                pdf_doc = Poppler.Document.new_from_gfile(gfile, None, None)
                pages = pdf_doc.get_n_pages()
                is_pdf = True
            except:
                # Either not PDF/damaged or poppler not installed properly
                pass


        for page in xrange(pages):
            if is_tiff:
                # TIFF pages are zero based
                surf = image.get_rgb24_from_tiff(filename, page, False)

                img = to_opencv(surf)

            elif is_pdf:
                # Try to retrieve a single fullpage image, if that fails, render
                # document at 300dpi.

                THRESH = 10 #pt

                pdfpage = pdf_doc.get_page(page)
                page_width, page_height = pdfpage.get_size()

                images = pdfpage.get_image_mapping()
                if len(images) == 1 and (
                        abs(images[0].area.x1) < THRESH and
                        abs(images[0].area.y1) < THRESH and
                        abs(images[0].area.x2 - page_width) < THRESH and
                        abs(images[0].area.y2 - page_height) < THRESH):
                    # Assume one full page image, and simply use that.
                    surf = pdfpage.get_image(images[0].image_id)

                else:
                    # Render page at 300dpi
                    surf = cairo.ImageSurface(cairo.FORMAT_RGB24, int(300 / 72 * page_width), int(300 / 72 * page_height))
                    cr = cairo.Context(surf)
                    cr.scale(300 / 72, 300 / 72)
                    cr.set_source_rgb(1, 1, 1)
                    cr.paint()

                    pdfpage.render_for_printing(cr)

                    del cr

                img = to_opencv(surf)

            else:
                img = cv2.imread(filename)

            yield img, filename, page
Exemple #7
0
def iter_images_and_pages(images):
    """This function iterates over a images and also the contained pages. As
    OpenCV is not able to handle multipage TIFF files, we use the SDAPS internal
    loading method for those."""

    for filename in images:
        if not os.path.exists(filename):
            raise IOError(errno.ENOENT, _("File does not exist"), filename)

        pages = 1
        is_tiff = False
        is_pdf = False

        try:
            # Check whether this is a TIFF file (ie. try to retrieve the page count)
            pages = image.get_tiff_page_count(filename)
            is_tiff = True
        except AssertionError:
            pass

        if not is_tiff:
            try:
                gfile = Gio.File.new_for_path(filename)
                pdf_doc = Poppler.Document.new_from_gfile(gfile, None, None)
                pages = pdf_doc.get_n_pages()
                is_pdf = True
            except:
                # Either not PDF/damaged or poppler not installed properly
                pass

        for page in range(pages):
            if is_tiff:
                # TIFF pages are zero based
                surf = image.get_rgb24_from_tiff(filename, page, False)

                img = to_opencv(surf)

            elif is_pdf:
                # Try to retrieve a single fullpage image, if that fails, render
                # document at 300dpi.

                THRESH = 10  #pt

                pdfpage = pdf_doc.get_page(page)
                page_width, page_height = pdfpage.get_size()

                images = pdfpage.get_image_mapping()
                if len(images) == 1 and (
                        abs(images[0].area.x1) < THRESH
                        and abs(images[0].area.y1) < THRESH
                        and abs(images[0].area.x2 - page_width) < THRESH
                        and abs(images[0].area.y2 - page_height) < THRESH):
                    # Assume one full page image, and simply use that.
                    surf = pdfpage.get_image(images[0].image_id)

                else:
                    dpi = 0
                    # Try to detect the DPI of the scan
                    for img in images:
                        if img.area.y2 - img.area.y1 < page_height / 2:
                            continue

                        surf = pdfpage.get_image(img.image_id)
                        # Calculate DPI from height
                        dpi_x = round(surf.get_height() /
                                      (img.area.y2 - img.area.y1) * 72)
                        dpi_y = round(surf.get_width() /
                                      (img.area.x2 - img.area.x1) * 72)
                        if abs(dpi_x - dpi_y) <= 1:
                            dpi = max(dpi, dpi_x, dpi_y)

                    # Fall back to 300dpi for odd values
                    if dpi < 199 or dpi > 601:
                        dpi = 300

                    surf = cairo.ImageSurface(cairo.FORMAT_RGB24,
                                              int(dpi / 72 * page_width),
                                              int(dpi / 72 * page_height))
                    cr = cairo.Context(surf)
                    cr.scale(dpi / 72, dpi / 72)
                    cr.set_source_rgb(1, 1, 1)
                    cr.paint()

                    pdfpage.render_for_printing(cr)

                    del cr

                img = to_opencv(surf)

            else:
                img = cv2.imread(filename)

            yield img, filename, page
Exemple #8
0
def iter_images_and_pages(images):
    """This function iterates over a images and also the contained pages. As
    OpenCV is not able to handle multipage TIFF files, we use the SDAPS internal
    loading method for those."""

    for filename in images:
        if not os.path.exists(filename):
            raise IOError(errno.ENOENT, _("File does not exist"), filename)

        pages = 1
        is_tiff = False
        is_pdf = False

        try:
            # Check whether this is a TIFF file (ie. try to retrieve the page count)
            pages = image.get_tiff_page_count(filename)
            is_tiff = True
        except AssertionError:
            pass

        if not is_tiff:
            try:
                gfile = Gio.File.new_for_path(filename)
                pdf_doc = Poppler.Document.new_from_gfile(gfile, None, None)
                pages = pdf_doc.get_n_pages()
                is_pdf = True
            except:
                # Either not PDF/damaged or poppler not installed properly
                pass


        for page in range(pages):
            if is_tiff:
                # TIFF pages are zero based
                surf = image.get_rgb24_from_tiff(filename, page, False)

                img = to_opencv(surf)

            elif is_pdf:
                # Try to retrieve a single fullpage image, if that fails, render
                # document at 300dpi.

                THRESH = 10 #pt

                pdfpage = pdf_doc.get_page(page)
                page_width, page_height = pdfpage.get_size()

                images = pdfpage.get_image_mapping()
                if len(images) == 1 and (
                        abs(images[0].area.x1) < THRESH and
                        abs(images[0].area.y1) < THRESH and
                        abs(images[0].area.x2 - page_width) < THRESH and
                        abs(images[0].area.y2 - page_height) < THRESH):
                    # Assume one full page image, and simply use that.
                    surf = pdfpage.get_image(images[0].image_id)

                else:
                    dpi = 0
                    # Try to detect the DPI of the scan
                    for img in images:
                        if img.area.y2 - img.area.y1 < page_height / 2:
                            continue

                        surf = pdfpage.get_image(img.image_id)
                        # Calculate DPI from height
                        dpi_x = round(surf.get_height() / (img.area.y2 - img.area.y1) * 72)
                        dpi_y = round(surf.get_width() / (img.area.x2 - img.area.x1) * 72)
                        if abs(dpi_x - dpi_y) <= 1:
                            dpi = max(dpi, dpi_x, dpi_y)

                    # Fall back to 300dpi for odd values
                    if dpi < 199 or dpi > 601:
                        dpi = 300

                    surf = cairo.ImageSurface(cairo.FORMAT_RGB24, int(dpi / 72 * page_width), int(dpi / 72 * page_height))
                    cr = cairo.Context(surf)
                    cr.scale(dpi / 72, dpi / 72)
                    cr.set_source_rgb(1, 1, 1)
                    cr.paint()

                    pdfpage.render_for_printing(cr)

                    del cr

                img = to_opencv(surf)

            else:
                img = cv2.imread(filename)

            yield img, filename, page