Ejemplo n.º 1
0
def existsExternalImageUrl(url):
    """ Check if the external URL exists (by issuing a 
        HTTP request.
    """
    class HeadRequest(Request):
        def get_method(self):
            return "HEAD"

    if not url.startswith('http'):
        return False

    try:
        urlopen(HeadRequest(url))
        return True
    except Exception, e:
        LOG.warn('External(?) image reference not found (%s)' % e)
        return False
Ejemplo n.º 2
0
def existsExternalImageUrl(url):
    """ Check if the external URL exists (by issuing a 
        HTTP request.
    """

    class HeadRequest(Request):
        def get_method(self):
            return "HEAD"

    if not url.startswith('http'):
        return False

    try:
        urlopen(HeadRequest(url))
        return True
    except Exception, e: 
        LOG.warn('External(?) image reference not found (%s)' % e)
        return False
def makeImagesLocal(root, params):
    """ deal with internal and external image references """

    ref_catalog = getToolByName(params['context'], 'reference_catalog')
    destdir = params['destdir']
    ini_filename = os.path.join(destdir, 'images.ini')
    fp_ini = file(ini_filename, 'w')
    images_seen = dict()
    for document_node in CSSSelector('div.mode-flat.level-0,div.mode-nested.level-1')(root):
        document_obj = ref_catalog.lookupObject(document_node.get('uid'))

        for img in document_node.xpath(xpath_query(['img'])):
            # 'internal' images are marked with class="internal resource"
            # in order to prevent image fetching later on
            if 'internal-resource' in (img.get('class') or '') or img.get('processed'):
                continue
            
            scale = ''
            src = img.get('src')

            LOG.info('Introspecting image: %s' % src)

            img_obj = resolveImage(document_obj, src)

            if img_obj is None:
                # like some external image URL
                LOG.info('  Remote image fetching: %s' % src)

                try:
                    response = urllib2.urlopen(str(src))
                    img_data = response.read()
                    img_basename = src.split('/')[-1]                    
                except (ValueError, urllib2.URLError), e:
                    LOG.warn('No image found: %s - removed from output (reason: %s)' % (src, e))
                    img.getparent().remove(img)
                    continue 

                tmpname = tempfile.mktemp(dir=destdir) + '_' + img_basename
                file(tmpname, 'wb').write(img_data)
                # write supplementary information to an .ini file per image
                img_id = os.path.basename(tmpname)
                print >>fp_ini, '[%s]' % img_id
                print >>fp_ini, 'id = %s' % img_id
                print >>fp_ini, 'filename = %s' % tmpname
                print >>fp_ini, 'url = %s' % str(src)
                print >>fp_ini, 'scale = %s' % ''
                img.attrib['src'] = img_id
                img.attrib['originalscale'] = ''
                images_seen[src] = img_id
                LOG.info('  Assigned new id: %s' % img_id)
                continue

            # resolved did find a local image
            LOG.info('  Local processing: %s' % src)
            img_filename = images_seen.get(src)
            if not img_filename:
                img_data = None
                for attr in ['data', '_data']:
                    try:
                        img_data = str(getattr(img_obj, attr))
                        continue
                    except AttributeError:
                        pass
                if img_data is None:
                    LOG.warn('No image found: %s - removed from output' % src)
                    img.extract()
                    continue

                tmpname = tempfile.mktemp(dir=destdir)
                file(tmpname, 'wb').write(img_data)
                # determine graphic format using PIL
                pil_image = PIL.Image.open(tmpname)
                format = pil_image.format.lower()

                # generate unique and speaking image names
                img_id = img_obj.getId()
                dest_img_name = os.path.join(destdir, img_id)
                if not os.path.exists(dest_img_name):
                    os.rename(tmpname, dest_img_name)
                else:                    
                    running = True 
                    count = 0
                    while running:
                        img_id = os.path.splitext(img_obj.getId())[0]
                        img_id = '%s-%d.%s' % (img_id, count, format)
                        dest_img_name = os.path.join(params['destdir'], img_id)
                        if not os.path.exists(dest_img_name):
                            os.rename(tmpname, dest_img_name)
                            tmpname = dest_img_name
                            running = False
                            del pil_image
                        else:
                            count += 1
                LOG.info('  Exported to: %s' % dest_img_name)

                # now also export the preview scale as well 
                # (needed for EPUB export/conversion)
                preview_filename = os.path.join(os.path.dirname(dest_img_name), 'preview_' + os.path.basename(dest_img_name))
                try:
                    preview_img = img_obj.Schema().getField('image').getScale(img_obj, scale='preview')
                    if preview_img == '': # no scales created?
                        img_obj.Schema().getField('image').createScales(img_obj)
                        preview_img = img_obj.Schema().getField('image').getScale(img_obj, scale='preview')
                except AttributeError: # Schema (for News Item images)
                    preview_img = None

                if preview_img and safe_hasattr(preview_img, 'data'):
                    file(preview_filename, 'wb').write(str(preview_img.data))                                                    
                    LOG.info('  Exported preview scale to: %s' % preview_filename)

                # determine image scale from 'src' attribute
                src_parts = src.split('/')
                if '@@images' in src_parts:
                    scale = src_parts[-1]
                elif src_parts[-1].startswith('image_'):
                    scale = src_parts[-1][6:]

                print >>fp_ini, '[%s]' % os.path.basename(dest_img_name)
                print >>fp_ini, 'filename = %s' % dest_img_name 
                print >>fp_ini, 'id = %s' % img_id 
                try:
                    print >>fp_ini, 'title = %s' % img_obj.Title()
                    print >>fp_ini, 'description = %s' % img_obj.Description()
                except AttributeError:
                    print >>fp_ini, 'title = s' 
                    print >>fp_ini, 'description = s' 
                print >>fp_ini, 'scale = %s' % scale
                images_seen[src] = os.path.basename(dest_img_name)
                img_filename = dest_img_name

            img.attrib['src'] = os.path.basename(img_filename)         
            LOG.info('  Assigned new id: %s' % img.get('src'))
            img.attrib['originalscale'] = scale
            img.attrib['style'] = 'width: 100%'  # need for PrinceXML8
            img.attrib['processed'] = '1' 

            # image scaling
            # add content-info debug information
            # don't add scale as style since the outer image-container
            # has the style set
            try:
                pdf_scale = img_obj.getField('pdfScale').get(img_obj)
            except AttributeError:
                pdf_scale = 100
            img.attrib['scale'] = str(pdf_scale)

            # now move <img> tag into a dedicated <div>
            div = lxml.html.Element('div')
            div.attrib['class'] = 'image-container'
            div.attrib['style'] = 'width: %d%%' % pdf_scale
            div.attrib['scale'] = str(pdf_scale)
            new_img =  lxml.html.Element('img')
            new_img.attrib.update(img.attrib.items())
            div.insert(0, new_img)

            try:
                displayInline_field = img_obj.getField('displayInline')
            except AttributeError:
                displayInline_field = False
            if displayInline_field and not displayInline_field.get(img_obj):

                # image caption
                img_caption_position = img_obj.getField('captionPosition').get(img_obj)
                img_caption = lxml.html.Element('div')
                img_caption.attrib['class'] = 'image-caption'                       

                # exclude from image enumeration
                exclude_field = img_obj.getField('excludeFromImageEnumeration')
                if exclude_field and not exclude_field.get(img_obj):

                    # add description
                    span = lxml.html.Element('span')
                    description = unicode(img_obj.Description(), 'utf-8')
                    class_ = description and 'image-caption-description image-caption-with-description' or \
                                             'image-caption-description image-caption-without-description'
                    if description:
                        span.text = description
                    span.attrib['class'] = class_
                    img_caption.insert(0, span)

                    if not description:
                        warn = lxml.html.Element('span')
                        warn.attrib['class'] = 'warning-no-description'
                        warn.text = u'image has no description'
                        img_caption.append(warn)

                    # add title
                    span = lxml.html.Element('span')
                    title = unicode(img_obj.Title(), 'utf-8')
                    class_ = description and 'image-caption-title image-caption-with-title' or \
                                             'image-caption-title image-caption-without-title'
                    if title:
                        span.text = title
                    span.attrib['class'] = class_
                    img_caption.insert(0, span)

                    if not title:
                        warn = lxml.html.Element('span')
                        warn.attrib['class'] = 'warning-no-title'
                        warn.text = u'image has no title'
                        img_caption.append(warn)

                    # add title and description to container
                    if img_caption_position == 'top':
                        div.insert(0, img_caption)
                    else:
                        div.append(img_caption)

                div.tail = img.tail
                img.getparent().replace(img, div)
def makeImagesLocal(root, params):
    """ deal with internal and external image references """

    ref_catalog = getToolByName(params['context'], 'reference_catalog')
    destdir = params['destdir']
    ini_filename = os.path.join(destdir, 'images.ini')
    fp_ini = file(ini_filename, 'w')
    images_seen = dict()
    for document_node in CSSSelector(
            'div.mode-flat.level-0,div.mode-nested.level-1')(root):
        document_obj = ref_catalog.lookupObject(document_node.get('uid'))

        for img in document_node.xpath(xpath_query(['img'])):
            # 'internal' images are marked with class="internal resource"
            # in order to prevent image fetching later on
            if 'internal-resource' in (img.get('class')
                                       or '') or img.get('processed'):
                continue

            scale = ''
            src = img.get('src')

            LOG.info('Introspecting image: %s' % src)

            img_obj = resolveImage(document_obj, src)

            if img_obj is None:
                # like some external image URL
                LOG.info('  Remote image fetching: %s' % src)

                try:
                    response = urllib2.urlopen(str(src))
                    img_data = response.read()
                    img_basename = src.split('/')[-1]
                except (ValueError, urllib2.URLError), e:
                    LOG.warn(
                        'No image found: %s - removed from output (reason: %s)'
                        % (src, e))
                    img.getparent().remove(img)
                    continue

                tmpname = tempfile.mktemp(dir=destdir) + '_' + img_basename
                file(tmpname, 'wb').write(img_data)
                # write supplementary information to an .ini file per image
                img_id = os.path.basename(tmpname)
                print >> fp_ini, '[%s]' % img_id
                print >> fp_ini, 'id = %s' % img_id
                print >> fp_ini, 'filename = %s' % tmpname
                print >> fp_ini, 'url = %s' % str(src)
                print >> fp_ini, 'scale = %s' % ''
                img.attrib['src'] = img_id
                img.attrib['originalscale'] = ''
                images_seen[src] = img_id
                LOG.info('  Assigned new id: %s' % img_id)
                continue

            # resolved did find a local image
            LOG.info('  Local processing: %s' % src)
            img_filename = images_seen.get(src)
            if not img_filename:
                img_data = None
                for attr in ['data', '_data']:
                    try:
                        img_data = str(getattr(img_obj, attr))
                        continue
                    except AttributeError:
                        pass
                if img_data is None:
                    LOG.warn('No image found: %s - removed from output' % src)
                    img.extract()
                    continue

                tmpname = tempfile.mktemp(dir=destdir)
                file(tmpname, 'wb').write(img_data)
                # determine graphic format using PIL
                pil_image = PIL.Image.open(tmpname)
                format = pil_image.format.lower()

                # generate unique and speaking image names
                img_id = img_obj.getId()
                dest_img_name = os.path.join(destdir, img_id)
                if not os.path.exists(dest_img_name):
                    os.rename(tmpname, dest_img_name)
                else:
                    running = True
                    count = 0
                    while running:
                        img_id = os.path.splitext(img_obj.getId())[0]
                        img_id = '%s-%d.%s' % (img_id, count, format)
                        dest_img_name = os.path.join(params['destdir'], img_id)
                        if not os.path.exists(dest_img_name):
                            os.rename(tmpname, dest_img_name)
                            tmpname = dest_img_name
                            running = False
                            del pil_image
                        else:
                            count += 1
                LOG.info('  Exported to: %s' % dest_img_name)

                # now also export the preview scale as well
                # (needed for EPUB export/conversion)
                preview_filename = os.path.join(
                    os.path.dirname(dest_img_name),
                    'preview_' + os.path.basename(dest_img_name))
                try:
                    preview_img = img_obj.Schema().getField('image').getScale(
                        img_obj, scale='preview')
                    if preview_img == '':  # no scales created?
                        img_obj.Schema().getField('image').createScales(
                            img_obj)
                        preview_img = img_obj.Schema().getField(
                            'image').getScale(img_obj, scale='preview')
                except AttributeError:  # Schema (for News Item images)
                    preview_img = None

                if preview_img and safe_hasattr(preview_img, 'data'):
                    file(preview_filename, 'wb').write(str(preview_img.data))
                    LOG.info('  Exported preview scale to: %s' %
                             preview_filename)

                # determine image scale from 'src' attribute
                src_parts = src.split('/')
                if '@@images' in src_parts:
                    scale = src_parts[-1]
                elif src_parts[-1].startswith('image_'):
                    scale = src_parts[-1][6:]

                print >> fp_ini, '[%s]' % os.path.basename(dest_img_name)
                print >> fp_ini, 'filename = %s' % dest_img_name
                print >> fp_ini, 'id = %s' % img_id
                try:
                    print >> fp_ini, 'title = %s' % img_obj.Title()
                    print >> fp_ini, 'description = %s' % img_obj.Description()
                except AttributeError:
                    print >> fp_ini, 'title = s'
                    print >> fp_ini, 'description = s'
                print >> fp_ini, 'scale = %s' % scale
                images_seen[src] = os.path.basename(dest_img_name)
                img_filename = dest_img_name

            img.attrib['src'] = os.path.basename(img_filename)
            LOG.info('  Assigned new id: %s' % img.get('src'))
            img.attrib['originalscale'] = scale
            img.attrib['style'] = 'width: 100%'  # need for PrinceXML8
            img.attrib['processed'] = '1'

            # image scaling
            # add content-info debug information
            # don't add scale as style since the outer image-container
            # has the style set
            try:
                pdf_scale = img_obj.getField('pdfScale').get(img_obj)
            except AttributeError:
                pdf_scale = 100
            img.attrib['scale'] = str(pdf_scale)

            # now move <img> tag into a dedicated <div>
            div = lxml.html.Element('div')
            div.attrib['class'] = 'image-container'
            div.attrib['style'] = 'width: %d%%' % pdf_scale
            div.attrib['scale'] = str(pdf_scale)
            new_img = lxml.html.Element('img')
            new_img.attrib.update(img.attrib.items())
            div.insert(0, new_img)

            try:
                displayInline_field = img_obj.getField('displayInline')
            except AttributeError:
                displayInline_field = False
            if displayInline_field and not displayInline_field.get(img_obj):

                # image caption
                img_caption_position = img_obj.getField('captionPosition').get(
                    img_obj)
                img_caption = lxml.html.Element('div')
                img_caption.attrib['class'] = 'image-caption'

                # exclude from image enumeration
                exclude_field = img_obj.getField('excludeFromImageEnumeration')
                if exclude_field and not exclude_field.get(img_obj):

                    # add description
                    span = lxml.html.Element('span')
                    description = unicode(img_obj.Description(), 'utf-8')
                    class_ = description and 'image-caption-description image-caption-with-description' or \
                                             'image-caption-description image-caption-without-description'
                    if description:
                        span.text = description
                    span.attrib['class'] = class_
                    img_caption.insert(0, span)

                    if not description:
                        warn = lxml.html.Element('span')
                        warn.attrib['class'] = 'warning-no-description'
                        warn.text = u'image has no description'
                        img_caption.append(warn)

                    # add title
                    span = lxml.html.Element('span')
                    title = unicode(img_obj.Title(), 'utf-8')
                    class_ = description and 'image-caption-title image-caption-with-title' or \
                                             'image-caption-title image-caption-without-title'
                    if title:
                        span.text = title
                    span.attrib['class'] = class_
                    img_caption.insert(0, span)

                    if not title:
                        warn = lxml.html.Element('span')
                        warn.attrib['class'] = 'warning-no-title'
                        warn.text = u'image has no title'
                        img_caption.append(warn)

                    # add title and description to container
                    if img_caption_position == 'top':
                        div.insert(0, img_caption)
                    else:
                        div.append(img_caption)

                div.tail = img.tail
                img.getparent().replace(img, div)