Example #1
0
    def import_image(self, element):
        errors = {}

        # Get image info
        image_contentid = element.attrib['contentid']
        image_filename, errors['filename'] = text_from_elem(element, 'filename', length=255, textify=True)
        image_caption, errors['caption'] = text_from_elem(element, 'caption', length=255)

        image_metadata = element.find('imagemetadata')
        image_title, errors['title'] = text_from_elem(image_metadata, 'title', length=255)
        image_creator, errors['creator'] = text_from_elem(image_metadata, 'creator', length=255, textify=True)
        image_media, errors['media'] = text_from_elem(image_metadata, 'media', length=255, textify=True)
        image_photographer, errors['photographer'] = text_from_elem(image_metadata, 'photographer', length=255, textify=True)
        image_rights, errors['rights'] = text_from_elem(image_metadata, 'rights', length=255, textify=True)

        # Create image
        try:
            image = RcaImage.objects.get(rca_content_id=image_contentid)
        except RcaImage.DoesNotExist:
            image = RcaImage()
            image.rca_content_id = image_contentid

        image.title = image_title
        image.alt = image_caption
        image.creator = image_creator
        image.medium = image_media
        image.photographer = image_photographer
        image.permission = image_rights

        if self.save:
            # Load image file
            if not image.id:
                try:
                    with File(open(self.image_path + image_filename.encode('utf-8'), 'r')) as f:
                        image.file = f
                        image.save()
                except IOError as e:
                    print "I/O error({0}): {1}".format(e.errno, e.strerror)
                    print repr(image_filename)
                    return None, None
                except ValueError:
                    print "Could not convert data to an integer."
                    return None, None
                except:
                    import sys
                    print "Unexpected error:", sys.exc_info()[0]
                    raise
            else:
                image.save()

        return image, errors
Example #2
0
def doimport(**kwargs):
    path = kwargs.get('path', PATH)
    save = kwargs.get('save', False)
    image_path = kwargs.get('image_path', IMAGE_PATH)
    ruthless = kwargs.get('ruthless', False)
    newsindex = NEWS_INDEX
    tree = ET.parse(path)
    root = tree.getroot()
    errors = []
    images_errors = []
    for item in root.findall('news_item'):
        itemerrors = {}

        # sort out what instance this is
        news_contentid = item.attrib['contentid']
        title, itemerrors['title'] = text_from_elem(item, 'title', length=255)
        date = parse_date(item.find('goinglivedate').text.strip().replace('.','-')) or datetime.date.today()
        try:
            newsitem = NewsItem.objects.get(rca_content_id=news_contentid)
        except NewsItem.DoesNotExist:
            newsitem = NewsItem(rca_content_id=news_contentid)
        newsitem.title = title
        newsitem.date = date
        newsitem.intro = richtext_from_elem(item.find('intro'))
        newsitem.slug = make_slug(newsitem)

        # possibly delete any images that are embedded in the existing body
        if ruthless:
            soup = BeautifulSoup(newsitem.body, 'html.parser')
            to_delete_ids = []
            for x in soup.find_all('embed'):
                try:
                    to_delete_ids.append(int(x.attrs['id']))
                except ValueError:
                    pass
            if to_delete_ids:
                RcaImage.objects.filter(id__in=to_delete_ids).delete()

        # build the body
        strings = []
        if item.find('texts'):
            for elem in item.find('texts').findall('text'):
                html = richtext_from_elem(elem.find('content'))
                strings.append(html)
        newsitem.body = '\n'.join(strings)

        # save newsitem
        if save:
            if newsitem.id:
                newsitem.save()
            else:
                newsindex.add_child(newsitem)

        tobesaved = False
        if item.find('images') is not None:
            # first delete images that haven't got a contentid
            if ruthless:
                for c in NewsItemCarouselItem.objects.filter(page=newsitem):
                    c.image.delete()
                    c.delete()

            for image in item.find('images').findall('image'):
                imageerrors = {}
                metadata = image.find('imagemetadata')
                im_contentid = image.attrib['contentid']
                filename = urllib2.unquote(image.find('filename').text.strip())
                try:
                    theimage = RcaImage.objects.get(rca_content_id=im_contentid)
                except RcaImage.DoesNotExist:
                    theimage = RcaImage(rca_content_id=im_contentid)

                theimage.title, imageerrors['title'] = text_from_elem(metadata, 'title', length=255, textify=True)
                theimage.creator, imageerrors['creator'] = text_from_elem(metadata, 'creator', length=255, textify=True)
                theimage.medium, imageerrors['medium'] = text_from_elem(metadata, 'media', length=255, textify=True)
                theimage.photographer, imageerrors['photog'] = text_from_elem(metadata, 'photographer', length=255, textify=True)
                theimage.permission, imageerrors['perms'] = text_from_elem(metadata, 'rights', length=255, textify=True)

                caption, imageerrors['caption'] = text_from_elem(metadata, 'caption', length=255, textify=True)
                theimage.alt = caption

                #theimage.width, imageerrors['width'] = text_from_elem(metadata, 'width', length=255)
                #theimage.height, imageerrors['height'] = text_from_elem(metadata, 'height', length=255)

                try:
                    with File(open(image_path + filename.encode('utf-8'), 'r')) as f:
                        if theimage.id:
                            if save:
                                theimage.delete()
                        theimage.file = f
                        if save:
                            theimage.save()
                except IOError as e:
                    print "I/O error({0}): {1}".format(e.errno, e.strerror)
                    print repr(filename)
                except ValueError:
                    print "Could not convert data to an integer."
                except:
                    import sys
                    print "Unexpected error:", sys.exc_info()[0]
                    raise

                if save and theimage.is_landscape():
                    try:
                        carousel = NewsItemCarouselItem.objects.get(
                                page = newsitem,
                                image = theimage,
                                )
                    except NewsItemCarouselItem.DoesNotExist:
                        carousel = NewsItemCarouselItem(
                                page = newsitem,
                                image = theimage,
                                )
                        if save:
                            carousel.save()
                elif save and theimage.id:
                    imagestring = '<embed alt="%(alt)s" embedtype="image" format="right" id="%(id)s"/>' % {
                            'alt': theimage.alt,
                            'id': theimage.id,
                            }
                    newsitem.body = imagestring + newsitem.body
                    tobesaved = True

                imageerrordict = dict((k, v) for k, v in imageerrors.iteritems() if v)
                if imageerrordict:
                    images_errors.append({image: imageerrordict})
        if tobesaved and save:
            newsitem.save()

        errordict = dict((k, v) for k, v in itemerrors.iteritems() if v)
        if errordict:
            errors.append({item: errordict})
    return errors, images_errors
def import_image(element):
    errors = {}

    # Get image info
    image_contentid = element.attrib['contentid']
    image_filename, errors['filename'] = text_from_elem(element,
                                                        'filename',
                                                        length=255,
                                                        textify=True)
    image_caption, errors['caption'] = text_from_elem(element,
                                                      'caption',
                                                      length=255)

    image_metadata = element.find('imagemetadata')
    image_title, errors['title'] = text_from_elem(image_metadata,
                                                  'title',
                                                  length=255,
                                                  textify=True)
    image_creator, errors['creator'] = text_from_elem(image_metadata,
                                                      'creator',
                                                      length=255,
                                                      textify=True)
    image_media, errors['media'] = text_from_elem(image_metadata,
                                                  'media',
                                                  length=255,
                                                  textify=True)
    image_photographer, errors['photographer'] = text_from_elem(image_metadata,
                                                                'photographer',
                                                                length=255,
                                                                textify=True)
    image_rights, errors['rights'] = text_from_elem(image_metadata,
                                                    'rights',
                                                    length=255,
                                                    textify=True)

    # Create image
    try:
        image = RcaImage.objects.get(rca_content_id=image_contentid)
    except RcaImage.DoesNotExist:
        image = RcaImage()
        image.rca_content_id = image_contentid

    image.title = image_title
    image.alt = image_caption
    image.creator = image_creator
    image.medium = image_media
    image.photographer = image_photographer
    image.permission = image_rights

    # Load image file
    if not image.id:
        try:
            with File(open(IMAGE_PATH + image_filename.encode('utf-8'),
                           'r')) as f:
                image.file = f
                image.save()
        except IOError as e:
            print "I/O error({0}): {1}".format(e.errno, e.strerror)
            print repr(image_filename)
            return None, None
        except ValueError:
            print "Could not convert data to an integer."
            return None, None
        except:
            import sys
            print "Unexpected error:", sys.exc_info()[0]
            raise
    else:
        image.save()

    return image, errors
Example #4
0
def doimport(**kwargs):
    path = kwargs.get('path', PATH)
    save = kwargs.get('save', False)
    image_path = kwargs.get('image_path', IMAGE_PATH)
    ruthless = kwargs.get('ruthless', False)
    newsindex = NEWS_INDEX
    tree = ET.parse(path)
    root = tree.getroot()
    errors = []
    images_errors = []
    for item in root.findall('news_item'):
        itemerrors = {}

        # sort out what instance this is
        news_contentid = item.attrib['contentid']
        title, itemerrors['title'] = text_from_elem(item, 'title', length=255)
        date = parse_date(
            item.find('goinglivedate').text.strip().replace(
                '.', '-')) or datetime.date.today()
        try:
            newsitem = NewsItem.objects.get(rca_content_id=news_contentid)
        except NewsItem.DoesNotExist:
            newsitem = NewsItem(rca_content_id=news_contentid)
        newsitem.title = title
        newsitem.date = date
        newsitem.intro = richtext_from_elem(item.find('intro'))
        newsitem.slug = make_slug(newsitem)

        # possibly delete any images that are embedded in the existing body
        if ruthless:
            soup = BeautifulSoup(newsitem.body, 'html.parser')
            to_delete_ids = []
            for x in soup.find_all('embed'):
                try:
                    to_delete_ids.append(int(x.attrs['id']))
                except ValueError:
                    pass
            if to_delete_ids:
                RcaImage.objects.filter(id__in=to_delete_ids).delete()

        # build the body
        strings = []
        if item.find('texts'):
            for elem in item.find('texts').findall('text'):
                html = richtext_from_elem(elem.find('content'))
                strings.append(html)
        newsitem.body = '\n'.join(strings)

        # save newsitem
        if save:
            if newsitem.id:
                newsitem.save()
            else:
                newsindex.add_child(newsitem)

        tobesaved = False
        if item.find('images') is not None:
            # first delete images that haven't got a contentid
            if ruthless:
                for c in NewsItemCarouselItem.objects.filter(page=newsitem):
                    c.image.delete()
                    c.delete()

            for image in item.find('images').findall('image'):
                imageerrors = {}
                metadata = image.find('imagemetadata')
                im_contentid = image.attrib['contentid']
                filename = urllib2.unquote(image.find('filename').text.strip())
                try:
                    theimage = RcaImage.objects.get(
                        rca_content_id=im_contentid)
                except RcaImage.DoesNotExist:
                    theimage = RcaImage(rca_content_id=im_contentid)

                theimage.title, imageerrors['title'] = text_from_elem(
                    metadata, 'title', length=255, textify=True)
                theimage.creator, imageerrors['creator'] = text_from_elem(
                    metadata, 'creator', length=255, textify=True)
                theimage.medium, imageerrors['medium'] = text_from_elem(
                    metadata, 'media', length=255, textify=True)
                theimage.photographer, imageerrors['photog'] = text_from_elem(
                    metadata, 'photographer', length=255, textify=True)
                theimage.permission, imageerrors['perms'] = text_from_elem(
                    metadata, 'rights', length=255, textify=True)

                caption, imageerrors['caption'] = text_from_elem(metadata,
                                                                 'caption',
                                                                 length=255,
                                                                 textify=True)
                theimage.alt = caption

                #theimage.width, imageerrors['width'] = text_from_elem(metadata, 'width', length=255)
                #theimage.height, imageerrors['height'] = text_from_elem(metadata, 'height', length=255)

                try:
                    with File(open(image_path + filename.encode('utf-8'),
                                   'r')) as f:
                        if theimage.id:
                            if save:
                                theimage.delete()
                        theimage.file = f
                        if save:
                            theimage.save()
                except IOError as e:
                    print "I/O error({0}): {1}".format(e.errno, e.strerror)
                    print repr(filename)
                except ValueError:
                    print "Could not convert data to an integer."
                except:
                    import sys
                    print "Unexpected error:", sys.exc_info()[0]
                    raise

                if save and theimage.is_landscape():
                    try:
                        carousel = NewsItemCarouselItem.objects.get(
                            page=newsitem,
                            image=theimage,
                        )
                    except NewsItemCarouselItem.DoesNotExist:
                        carousel = NewsItemCarouselItem(
                            page=newsitem,
                            image=theimage,
                        )
                        if save:
                            carousel.save()
                elif save and theimage.id:
                    imagestring = '<embed alt="%(alt)s" embedtype="image" format="right" id="%(id)s"/>' % {
                        'alt': theimage.alt,
                        'id': theimage.id,
                    }
                    newsitem.body = imagestring + newsitem.body
                    tobesaved = True

                imageerrordict = dict(
                    (k, v) for k, v in imageerrors.iteritems() if v)
                if imageerrordict:
                    images_errors.append({image: imageerrordict})
        if tobesaved and save:
            newsitem.save()

        errordict = dict((k, v) for k, v in itemerrors.iteritems() if v)
        if errordict:
            errors.append({item: errordict})
    return errors, images_errors