Ejemplo n.º 1
0
    def __iter__(self):
        for item in self.previous:

            id = item.get("id", u"")
            pathkey = self.pathkey(*item.keys())[0]
            if not pathkey:
                yield item; continue
            path = item[pathkey]
            _id = path.rstrip("/").rsplit("/")[-1]
            if not id:
                id = _id

            # Removes accented characters from composed characters
            # where possible - and supress non "alphanum_-" chars:
            # (example: u"maçã" -> u"maca"
            normal_path = path.rstrip("/").rsplit("/",1)[0]  + \
                          "/" + normalize_url(_id)
            normal_id = normalize_url(id)

            plone = get_plone_root(self.transmogrifier.context)
            if plone:
                s_id = str(normal_id)
                if (hasattr(plone, s_id) and
                    s_id not in plone.contentIds()):
                    # This condition would raise a "BadRequest"
                    # exception
                    normal_id += u"_1"
                    normal_path += u"_1"
            if normal_path == path and normal_id == id:
                yield item
                continue
            if "id" in item:
                item["id"] = normal_id
            item[pathkey] = normal_path
            if not hasattr(self.transmogrifier, "sc_transmogrifier_changed_paths"):
                self.transmogrifier.sc_transmogrifier_changed_paths = {}
            self.transmogrifier.sc_transmogrifier_changed_paths[path] = normal_path
            if not "_orig_path" in item:
                item["_orig_path"] = normal_path
            yield item
    def transmogrify(self, item):
        path = self.get_path(item)
        text = item.get("text", u"")
        if not text:
            raise NothingToDoHere
        # One never knows how those json read
        # strings end up:
        if not isinstance(text, unicode):
            text = text.decode("utf-8")
        image_matches = re.finditer(ur"""<img.*?src\s*?=\s*?"(.*?)".*?>""", text,
                re.DOTALL | re.MULTILINE
                )
        images = []
        for match in reversed(list(image_matches)):

            # *** retrieve image URL and name data:

            start_tag, end_tag, img_title, rel_url, url = \
                    get_image_refs(match, path, self.source_prefix)

            if url == rel_url and not self.load_external_images:
                continue

            # *** retrieve remote image itself

            real_url, image, view_parts = get_remote_image(
                                           url, item, img_title,
                                           self.use_jsonmigrator)
            if real_url is None:
                continue

            image_filename = image["_filename"]

            # *** Fix image references in item's text html content:

            if real_url.startswith(self.source_prefix):
                img_path = real_url[len(self.source_prefix):]
            else: # the still-not-used external images case
                # FIXME: won't work for non folderish content types:
                img_path = path + "/" + url.rsplit("/")[-1]
            img_ref = img_path
            if self.embed_images:
                image["_orig_path"] = img_path
                img_path = path + "/" + image_filename
                img_ref = image_filename
                # Restore the reference to the
                # special view used in plone, after the
                # image file name:
                if view_parts:
                    img_ref += "/%s/" % ("/".join(view_parts))

            image["_path"] = img_path

            if self.replace_references and img_path != rel_url:
                img_ref_unicode = img_ref.decode("utf-8")
                # FIXME: For now, sc.transmgorgrifier
                # fix_path blueprint is not keeping
                # references in texts in sync
                # so, we assume it is being used on the pipeline
                # and normalize the url here
                # (should not affect well behaved urls anyway)
                img_ref_unicode = normalize_url(img_ref_unicode)
                text = text[:start_tag] + img_ref_unicode + text[end_tag:]

            # *** Set image to be put in the pipeline
            if self.use_wormhole:
                self.storage["wormhole"].push(image)
            else:
                images.append(image)
        item["text"] = text
        return item, images