Example #1
0
 def create_images_from_urls_in_content(self, body):
     """create Image objects and transfer image files to media root"""
     soup = BeautifulSoup(body, "html5lib")
     for img in soup.findAll('img'):
         old_url = img['src']
         if 'width' in img:
             width = img['width']
         if 'height' in img:
             height = img['height']
         else:
             width = 100
             height = 100
         path, file_ = os.path.split(img['src'])
         if not img['src']:
             continue  # Blank image
         try:
             remote_image = urllib.request.urlretrieve(img['src'])
         except (urllib.error.HTTPError, urllib.error.URLError,
                 UnicodeEncodeError):
             print("Unable to import " + img['src'])
             continue
         image = Image(title=file_, width=width, height=height)
         try:
             image.file.save(file_, File(open(remote_image[0], 'rb')))
             image.save()
             new_url = image.file.url
             body = body.replace(old_url, new_url)
             body = self.convert_html_entities(body)
         except TypeError:
             print("Unable to import image {}".format(remote_image[0]))
     return body
Example #2
0
 def process_content_image(self, content):
     self.stdout.write('\tGenerate and replace entry content images....')
     if content:
         root = lxml.html.fromstring(content)
         for img_node in root.iter('img'):
             parent_node = img_node.getparent()
             if 'wp-content' in img_node.attrib[
                     'src'] or 'files' in img_node.attrib['src']:
                 image = self._import_image(img_node.attrib['src'])
                 if image:
                     title = img_node.attrib.get(
                         'title') or img_node.attrib.get('alt')
                     new_image = WagtailImage(file=File(file=image),
                                              title=title)
                     new_image.save()
                     if parent_node.tag == 'a':
                         parent_node.addnext(
                             ET.XML(self._image_to_embed(new_image)))
                         parent_node.drop_tree()
                     else:
                         parent_node.append(
                             ET.XML(self._image_to_embed(new_image)))
                         img_node.drop_tag()
         content = ET.tostring(root)
     return content
 def create_images_from_urls_in_content(self, body):
     """create Image objects and transfer image files to media root"""
     soup = BeautifulSoup(body, "html5lib")
     for img in soup.findAll('img'):
         old_url = img['src']
         if 'width' in img:
             width = img['width']
         if 'height' in img:
             height = img['height']
         else:
             width = 100
             height = 100
         path, file_ = os.path.split(img['src'])
         if not img['src']:
             continue  # Blank image
         try:
             remote_image = urllib.request.urlretrieve(img['src'])
         except urllib.error.HTTPError:
             print("Unable to import " + img['src'])
             continue
         except urllib.error.URLError:
             print("URL error - try again " + img['src'])
             continue
         image = Image(title=file_, width=width, height=height)
         image.file.save(file_, File(open(remote_image[0], 'rb')))
         image.save()
         new_url = image.file.url
         body = body.replace(old_url, new_url)
         body = self.convert_html_entities(body)
     return body
    def create_images_from_urls_in_content(self, body):
        """create Image objects and transfer image files to media root"""
        soup = BeautifulSoup(body, "html5lib")
        for img in soup.findAll('img'):
            if 'width' in img:
                width = img['width']
            if 'height' in img:
                height = img['height']
            else:
                width = 100
                height = 100
            try:
                path, file_ = os.path.split(img['src'])
                if not img['src']:
                    continue  # Blank image
                if img['src'].startswith('data:'):
                    continue # Embedded image

                old_url = img['src']
                headers = {
                    'Content-Type': 'application/json',
                    'Accept': 'application/json',
                    'User-Agent': "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36 SE 2.X MetaSr 1.0"
                }
                req = requests.get(self.prepare_url(img['src']), headers=headers, timeout=10)
                if req.status_code == 200:
                    remote_image = tempfile.NamedTemporaryFile()
                    remote_image.write(req.content)
                else:
                    remote_image = None
            except (urllib.error.HTTPError,
                    urllib.error.URLError,
                    UnicodeEncodeError,
                    requests.exceptions.SSLError, 
                    KeyError,
                    requests.exceptions.ConnectionError,
                    requests.exceptions.MissingSchema,
                    requests.exceptions.InvalidSchema,
                    requests.exceptions.InvalidURL):
                logging.warning("Unable to import image: " + img['src'])
                continue
            if len(file_) > 255:
              file_ = file_[:255]
            image = Image(title=file_, width=width, height=height)
            try:
                if remote_image and os.path.getsize(remote_image.name) > 0:
                  #TODO: Log error of files that don't import for manual fix
                  imageFile = File(open(remote_image.name, 'rb'))
                  image.file.save(file_, imageFile)
                  image.save()
                  remote_image.close()
                  new_url = image.file.url
                  body = body.replace(old_url, new_url)
                body = self.convert_html_entities(body)
            except TypeError:
                logging.warning("Unable to import image: " + img['src'])
                #print("Unable to import image {}".format(remote_image[0]))
                pass
        return body
Example #5
0
    def import_entries(self):
        self.stdout.write("Importing entries...")
        entries = ZinniaEntry.objects.all()
        for entry in entries:
            self.stdout.write(entry.title)
            # Header images
            if entry.image:
                header_image = WagtailImage(file=entry.image, title=os.path.basename(entry.image.url))
                self.stdout.write('\tImported header image: {}'.format(entry.image))
                header_image.save()
            else:
                header_image = None

            self.stdout.write('\tGenerate and replace entry content images....')
            if entry.content:
                root = lxml.html.fromstring(entry.content)
                for el in root.iter('img'):
                    if el.attrib['src'].startswith(settings.MEDIA_URL):
                        old_image = el.attrib['src'].replace(settings.MEDIA_URL, '')
                        with open('{}/{}'.format(settings.MEDIA_ROOT, old_image), 'r') as image_file:
                            new_image = WagtailImage(file=File(file=image_file, name=os.path.basename(old_image)),
                                                     title=os.path.basename(old_image))
                            new_image.save()
                            el.attrib['src'] = new_image.file.url
                            self.stdout.write('\t\t{}'.format(new_image.file.url))

                # New content with images replaced
                content = lxml.html.tostring(root, pretty_print=True)
            else:
                content = entry.content

            # Create page
            try:
                page = EntryPage.objects.get(slug=entry.slug)
            except EntryPage.DoesNotExist:
                page = EntryPage(
                    title=entry.title,
                    body=content,
                    slug=entry.slug,
                    go_live_at=entry.start_publication,
                    expire_at=entry.end_publication,
                    first_published_at=entry.creation_date,
                    date=entry.creation_date,
                    owner=entry.authors.first(),
                    seo_title=entry.title,
                    search_description=entry.excerpt,
                    live=entry.is_visible,
                    header_image=header_image
                )
                self.blogpage.add_child(instance=page)
                revision = self.blogpage.save_revision()
                revision.publish()
            self.import_entry_categories(entry, page)
            self.import_entry_tags(entry, page)
            page.save()
            page.save_revision(changed=False)
            self.entries[entry.pk] = page
Example #6
0
 def create_blog_pages(self, posts, blog_index, *args, **options):
     """create Blog post entries from wordpress data"""
     for post in posts:
         post_id = post.get('ID')
         title = post.get('title')
         if title:
             new_title = self.convert_html_entities(title)
             title = new_title
         slug = post.get('slug')
         description = post.get('description')
         if description:
             description = self.convert_html_entities(description)
         body = post.get('content')
         # get image info from content and create image objects
         body = self.create_images_from_urls_in_content(body)
         # author/user data
         author = post.get('author')
         user = self.create_user(author)
         categories = post.get('terms')
         # format the date
         date = post.get('date')[:10]
         try:
             new_entry = BlogPage.objects.get(slug=slug)
             new_entry.title = title
             new_entry.body = body
             new_entry.owner = user
             new_entry.save()
         except BlogPage.DoesNotExist:
             new_entry = blog_index.add_child(instance=BlogPage(
                 title=title, slug=slug, search_description="description",
                 date=date, body=body, owner=user))
         featured_image = post.get('featured_image')
         if featured_image is not None:
             title = post['featured_image']['title']
             source = post['featured_image']['source']
             path, file_ = os.path.split(source)
             source = source.replace('stage.swoon', 'swoon')
             try:
                 remote_image = urllib.request.urlretrieve(
                     self.prepare_url(source))
                 width = 640
                 height = 290
                 header_image = Image(title=title, width=width, height=height)
                 header_image.file.save(
                     file_, File(open(remote_image[0], 'rb')))
                 header_image.save()
             except UnicodeEncodeError:
                 header_image = None
                 print('unable to set header image {}'.format(source))
         else:
             header_image = None
         new_entry.header_image = header_image
         new_entry.save()
         if categories:
             self.create_categories_and_tags(new_entry, categories)
         if self.should_import_comments:
             self.import_comments(post_id, slug)
     def add_to_collection(item, collection):
         
         description_file = item + ".description.txt"
         if os.path.isfile(description_file):
             description = open(description_file, 'r').read()
         else:
             description = ""
         
         file_name = os.path.basename(item)
         title = ".".join(file_name.split(".")[:-1])
         title = title.replace("_", " ")
         
         thumbnail_file = item + ".thumbnail.jpeg"
         if os.path.isfile(thumbnail_file):
             f = open(thumbnail_file, 'r')
             thumbnail = Image()
             thumbnail.file.save(os.path.basename(item) + ".jpeg", File(f))
             thumbnail.title = "Thumbnail for " + title
             thumbnail.save()
         else:
             thumbnail = None
         
         extension = file_name.split(".")[-1]
         if extension in MOVIE_EXTENSIONS:
             cls = models.Movie
         elif extension in EBOOK_EXTENSIONS:
             cls = models.EBook
         else:
             return
         
         slug = slugify(title)
         path = collection.path + "{pos:s}".format(pos=str(collection.numchild + 1).zfill(4))
         try:
              obj = collection.get_children().filter(slug=slug)[0]
              # Ensure that other objects with same slug and path are deleted
              others = collection.get_children().filter(slug=slug).exclude(id=obj.id)
              if others.exists():
                  logger.warn("Other objects with same path existed and were deleted. File: " + item)
                  others.delete()
         except IndexError:
              obj = cls(path=path)
         obj.numchild=0
         obj.depth=collection.depth + 1
         obj.show_in_menus=False
         obj.resource_link=item
         obj.url_path=os.path.join(collection.url_path, slug) + "/"
         obj.slug=slug
         obj.title=title
         obj.live=True
         obj.short_description=description
         obj.author=options["author"]
         obj.duration=""
         obj.thumbnail=thumbnail 
         obj.save()
 
         collection.numchild += 1
         collection.save()
Example #8
0
    def save_images_to_cms(self):
        '''Save images to the database with:
        - title: the file name
        - tags: the directory containing the image'''

        for img_path, dirname, img_name in self.imgs:
            image = Image(title=img_name,
                          file=ImageFile(open(img_path, "rb"), name=os.path.basename(img_path)),
                          tags=img_name)  # is this correct??
            image.save()
Example #9
0
 def import_header_image(self, entry, items, image_id):
     self.stdout.write('\tImport header images....')
     for item in items:
         post_type = item.find(u'{{{0:s}}}post_type'.format(WP_NS)).text
         if post_type == 'attachment' and item.find(u'{{{0:s}}}post_id'.format(WP_NS)).text == image_id:
             title = item.find('title').text
             image_url = item.find(u'{{{0:s}}}attachment_url'.format(WP_NS)).text
             img = self._import_image(image_url)
             new_image = WagtailImage(file=File(file=img, name=title), title=title)
             new_image.save()
             entry.header_image = new_image
             entry.save()
Example #10
0
 def import_header_image(self, entry, items, image_id):
     self.stdout.write('\tImport header images....')
     for item in items:
         post_type = item.find(u'{{{0:s}}}post_type'.format(WP_NS)).text
         if post_type == 'attachment' and item.find(
                 u'{{{0:s}}}post_id'.format(WP_NS)).text == image_id:
             title = item.find('title').text
             image_url = item.find(
                 u'{{{0:s}}}attachment_url'.format(WP_NS)).text
             img = self._import_image(image_url)
             new_image = WagtailImage(file=File(file=img, name=title),
                                      title=title)
             new_image.save()
             entry.header_image = new_image
             entry.save()
Example #11
0
def generate_photo(request, page):
    if isinstance(page, PostPage) and not page.photo:
        im = Vizhash(page.title, 64).identicon()
        buffer = BytesIO()
        im.save(fp=buffer, format="PNG")
        content_file = ContentFile(buffer.getvalue())
        image_file = InMemoryUploadedFile(content_file, None, page.title,
                                          "image/png", content_file.tell,
                                          len(buffer.getvalue()), None)
        image = Image(title=page.title,
                      file=image_file,
                      width=im.width,
                      height=im.height,
                      created_at=page.created,
                      file_size=len(buffer.getvalue()))
        image.save()
        page.photo = image
        page.save()
Example #12
0
 def process_content_image(self, content):
     self.stdout.write('\tGenerate and replace entry content images....')
     if content:
         root = lxml.html.fromstring(content)
         for img_node in root.iter('img'):
             parent_node = img_node.getparent()
             if 'wp-content' in img_node.attrib['src'] or 'files' in img_node.attrib['src']:
                 img = self._import_image(img_node.attrib['src'])
                 title = img_node.attrib.get('title') or img_node.attrib.get('alt')
                 new_image = WagtailImage(file=File(file=img, name=title), title=title)
                 new_image.save()
                 if parent_node.tag == 'a':
                     parent_node.addnext(ET.XML(self._image_to_embed(new_image)))
                     parent_node.drop_tree()
                 else:
                     parent_node.append(ET.XML(self._image_to_embed(new_image)))
                     img_node.drop_tag()
         content = ET.tostring(root)
     return content
 def process_content_image(self, content):
     self.stdout.write('\tGenerate and replace entry content images....')
     if content:
         root = lxml.html.fromstring(content)
         for img_node in root.iter('img'):
             parent_node = img_node.getparent()
             if 'bp.blogspot.com' in img_node.attrib['src']:
                 self.stdout.write('\t\t{}'.format(img_node.attrib['src']))
                 image = self._import_image(img_node.attrib['src'])
                 title = img_node.attrib['src'].rsplit('/', 1)[1]
                 new_image = WagtailImage(file=File(file=image, name=title), title=title)
                 new_image.save()
                 if parent_node.tag == 'a':
                     parent_node.addnext(ET.XML(self._image_to_embed(new_image)))
                     parent_node.drop_tree()
                 else:
                     parent_node.append(ET.XML(self._image_to_embed(new_image)))
                     img_node.drop_tag()
         content = ET.tostring(root)
     return content
Example #14
0
def fetch_hut_images():
    for hpage in HutPage.objects.all():
        if hpage.link_url:
            try:
                r = requests.get(hpage.link_url, timeout=settings.API_TIMEOUT)
            except requests.exceptions.RequestException as e:
                logger.exception(str(e))
            else:
                soup = BeautifulSoup(r.content, 'html5lib')
                a_tag = soup.find_all("a", {"class": "fancybox-gallery"})
                if a_tag:
                    img_tag = a_tag[0].find_all("img")
                    if img_tag:
                        img_url = 'http://www.doc.govt.nz/%s' % img_tag[0].get(
                            'src')
                        logger.debug("Hut %s using img %s from HTML body.",
                                     str(hpage.pk), img_url)
                else:
                    page = metadata_parser.MetadataParser(url=hpage.link_url)
                    img_url = page.get_metadata_link('image')
                    logger.debug("Hut %s using img %s from HTML meta",
                                 str(hpage.pk), img_url)
                if img_url:
                    try:
                        response = requests.get(img_url,
                                                timeout=settings.API_TIMEOUT)
                    except requests.exceptions.RequestException as e:
                        logger.exception(str(e))
                    image = Image(title=hpage.title,
                                  file=ImageFile(BytesIO(response.content),
                                                 name=img_url.split('/')[-1]))
                    image.save()
                    hpage.meta_image = image
                    hpage.save()
                else:
                    logger.debug("No img found for hut %s", str(hpage.pk))
    def handle(self, *args, **options):
        browser = Browser('phantomjs')
        dims = (1600, 1000)
        browser.driver.set_window_size(dims[0], dims[1])

        for project in ProjectPage.objects.all():
            links = project.links.filter(public=True, type='main')
            if not links:
                continue
            # Use only the first link for now
            link = links[0]
            print("Visiting %s (%s)" % (link.url, link))
            browser.visit(link.url)
            assert browser.status_code.is_success()
            time.sleep(5)

            with tempfile.NamedTemporaryFile(suffix='.png', prefix='project') as tmpf:
                browser.driver.save_screenshot(tmpf.name)

                pil_image = PILImage.open(tmpf)
                pil_image = pil_image.crop((0, 0, dims[0], dims[1]))
                tmpf.seek(0)
                tmpf.truncate(0)
                pil_image.save(tmpf, format='PNG')

                title = '%s screenshot' % project.title
                try:
                    image = Image.objects.get(title=title)
                except Image.DoesNotExist:
                    image = Image(title=title)
                image.file = ImageFile(tmpf)
                image.save()

            project.image = image
            project.save(update_fields=['image'])
        browser.quit()
 def create_images_from_urls_in_content(self, body):
     """create Image objects and transfer image files to media root"""
     soup = BeautifulSoup(body, "html5lib")
     for img in soup.findAll('img'):
         old_url = img['src']
         if 'width' in img:
             width = img['width']
         if 'height' in img:
             height = img['height']
         else:
             width = 100
             height = 100
         path, file_ = os.path.split(img['src'])
         if not img['src']:
             continue  # Blank image
         if img['src'].startswith('data:'):
             continue # Embedded image
         try:
             remote_image = urllib.request.urlretrieve(
                 self.prepare_url(img['src']))
         except (urllib.error.HTTPError,
                 urllib.error.URLError,
                 UnicodeEncodeError,
                 ValueError):
             print("Unable to import " + img['src'])
             continue
         image = Image(title=file_, width=width, height=height)
         try:
             image.file.save(file_, File(open(remote_image[0], 'rb')))
             image.save()
             new_url = image.file.url
             body = body.replace(old_url, new_url)
             body = self.convert_html_entities(body)
         except TypeError:
             print("Unable to import image {}".format(remote_image[0]))
     return body
Example #17
0
from io import BytesIO
import requests
from django.core.files.images import ImageFile
from wagtail.wagtailimages.models import Image

# event is a model object, substitute your model
# filename and title are up to you
# in my model, event.event_image is a ForeignKey to wagtailimages.Image

response = requests.get(url)
image = Image(title=title,
              file=ImageFile(BytesIO(response.content), name=filename))
image.save()
event.event_image = image
event.save()
Example #18
0
        def add_to_collection(item, collection):

            description_file = item + ".description.txt"
            if os.path.isfile(description_file):
                description = open(description_file, 'r').read()
            else:
                description = ""

            file_name = os.path.basename(item)
            title = ".".join(file_name.split(".")[:-1])
            title = title.replace("_", " ")

            thumbnail_file = item + ".thumbnail.jpeg"
            if os.path.isfile(thumbnail_file):
                f = open(thumbnail_file, 'r')
                thumbnail = Image()
                thumbnail.file.save(os.path.basename(item) + ".jpeg", File(f))
                thumbnail.title = "Thumbnail for " + title
                thumbnail.save()
            else:
                thumbnail = None

            extension = file_name.split(".")[-1]
            if extension in MOVIE_EXTENSIONS:
                cls = models.Movie
            elif extension in EBOOK_EXTENSIONS:
                cls = models.EBook
            else:
                return

            slug = slugify(title)
            path = collection.path + "{pos:s}".format(
                pos=str(collection.numchild + 1).zfill(4))
            try:
                obj = collection.get_children().filter(slug=slug)[0]
                # Ensure that other objects with same slug and path are deleted
                others = collection.get_children().filter(slug=slug).exclude(
                    id=obj.id)
                if others.exists():
                    logger.warn(
                        "Other objects with same path existed and were deleted. File: "
                        + item)
                    others.delete()
            except IndexError:
                obj = cls(path=path)
            obj.numchild = 0
            obj.depth = collection.depth + 1
            obj.show_in_menus = False
            obj.resource_link = item
            obj.url_path = os.path.join(collection.url_path, slug) + "/"
            obj.slug = slug
            obj.title = title
            obj.live = True
            obj.short_description = description
            obj.author = options["author"]
            obj.duration = ""
            obj.thumbnail = thumbnail
            obj.save()

            collection.numchild += 1
            collection.save()
    def create_blog_pages(self, posts, blog_index, *args, **options):
        """create Blog post entries from wordpress data"""
        for post in posts:
            title = post.get('title')
            print(title)
            if title:
                new_title = self.convert_html_entities(title)
                title = new_title
            # TODO: Fix hardcoded replacement
            slug = post.get('slug') + "-html"

            description = post.get('description')
            if description:
                description = self.convert_html_entities(description)
            body = post.get('content')
            # get image info from content and create image objects
            body = self.create_images_from_urls_in_content(body)
            body = self.format_code_in_content(body)
            body = self.replace_twilioinc_urls(body)
            # author/user data
            author = post.get('author')
            user = self.create_user(author)
            categories = post.get('terms')
            # format the date
            date = post.get('date')[:10]
            try:
                new_entry = BlogPage.objects.get(slug=slug)
                new_entry.title = title
                new_entry.body = body
                new_entry.owner = user
                new_entry.author = user
                new_entry.save()
            except BlogPage.DoesNotExist:
                new_entry = blog_index.add_child(instance=BlogPage(
                    title=title, slug=slug, search_description="description",
                    date=date, body=body, owner=user, author=user))
                print("Owner:")
                print(new_entry.owner)
            featured_image = post.get('featured_image')
            header_image = None
            if featured_image is not None and "source" in post['featured_image']:
                if 'title' in post['featured_image']:
                  title = post['featured_image']['title']
                else:
                  title = "Featured Image"
                source = post['featured_image']['source']
                path, file_ = os.path.split(source)
                source = source.replace('stage.swoon', 'swoon')
                try:
                    headers = {
                          'Content-Type': 'application/json',
                          'Accept': 'application/json',
                          'User-Agent': "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36 SE 2.X MetaSr 1.0"
                    }
                    req = requests.get(self.prepare_url(source), headers=headers, timeout=10)
                    remote_image = tempfile.NamedTemporaryFile()
                    remote_image.write(req.content)
                    #remote_image = urllib.request.urlretrieve(
                    #    self.prepare_url(source))
                    width = 640
                    height = 290
                    if os.path.getsize(remote_image.name):
                      #TODO: Capture error for manual download
                      header_image = Image(title=title, width=width, height=height)
                      header_image.file.save(
                          file_, File(open(remote_image.name, 'rb')))
                      header_image.save()
                except UnicodeEncodeError:
                    header_image = None
                    print('unable to set header image {}'.format(source))
            else:
                header_image = None
            new_entry.header_image = header_image
            new_entry.save()
            if categories:
                self.create_categories_and_tags(new_entry, categories)
            if self.should_import_comments:
                self.import_comments(post_id, slug)