Example #1
0
    def handle(self, *args, **options):
        # Get blogpage content type
        blogpage_content_type, created = ContentType.objects.get_or_create(
            model='blogpage',
            app_label='puput',
            defaults={'name': 'page'} if DJANGO_VERSION < (1, 8) else {})

        # Get root page
        rootpage = Page.objects.first()

        # Set site root page as root site page
        site = Site.objects.first()
        site.root_page = rootpage
        site.save()

        # Create example blog page
        blogpage = BlogPage(
            title="Blog",
            content_type=blogpage_content_type,
            slug='blog',
        )

        # Add blog page as a child for homepage
        rootpage.add_child(instance=blogpage)
        revision = blogpage.save_revision()
        revision.publish()
Example #2
0
    def get_blog_page(self, slug, title):
        # Create blog page
        try:
            self.blogpage = BlogPage.objects.get(slug=slug)
        except BlogPage.DoesNotExist:
            # Get root page
            rootpage = Page.objects.first()

            # Set site root page as root site page
            site = Site.objects.first()
            site.root_page = rootpage
            site.save()

            # Get blogpage content type
            self.blogpage = BlogPage(title=title, slug=slug)
            rootpage.add_child(instance=self.blogpage)
            revision = rootpage.save_revision()
            revision.publish()
Example #3
0
    def get_blog_page(self, slug, title):
        # Create blog page
        try:
            self.blogpage = BlogPage.objects.get(slug=slug)
        except BlogPage.DoesNotExist:
            # Get root page
            rootpage = Page.objects.first()

            # Set site root page as root site page
            site = Site.objects.first()
            site.root_page = rootpage
            site.save()

            # Get blogpage content type
            self.blogpage = BlogPage(title=title, slug=slug)
            rootpage.add_child(instance=self.blogpage)
            revision = rootpage.save_revision()
            revision.publish()
Example #4
0
class Command(LabelCommand):
    help = 'Import blog data from Wordpress'
    label = 'WXR file'
    args = 'wordpress.xml'

    SITE = Site.objects.get_current()

    def add_arguments(self, parser):
        parser.add_argument('--slug', default='blog', help="Slug of the blog.")
        parser.add_argument('--title', default='Blog', help="Title of the blog.")

    def handle_label(self, wxr_file, **options):
        global WP_NS
        self.get_blog_page(options['slug'], options['title'])
        self.tree = ET.parse(wxr_file)
        WP_NS = WP_NS % self.get_wordpress_version(self.tree)
        self.import_authors(self.tree)
        self.categories = self.import_categories(self.tree.findall(u'channel/{{{0:s}}}category'.format(WP_NS)))
        self.import_entries(self.tree.findall('channel/item'))

    def get_wordpress_version(self, tree):
        """
        Get the wxr version used on the imported wordpress xml.
        """
        for v in ('1.2', '1.1', '1.0'):
            try:
                tree.find(u'channel/{{{0:s}}}wxr_version'.format(WP_NS % v)).text
                return v
            except AttributeError:
                pass
        raise CommandError('Cannot resolve the wordpress namespace')

    def import_authors(self, tree):
        self.stdout.write('Importing authors...')

        post_authors = set()
        for item in tree.findall('channel/item'):
            post_type = item.find(u'{{{0:s}}}post_type'.format(WP_NS)).text
            if post_type == 'post':
                post_authors.add(item.find('{http://purl.org/dc/elements/1.1/}creator').text)

        self.authors = {}
        for post_author in post_authors:
            self.authors[post_author] = self.import_author(post_author.replace(' ', '-'))

    def import_author(self, author_name):
        action_text = u"The author '{0:s}' needs to be migrated to an user:\n" \
                      u"1. Use an existing user ?\n" \
                      u"2. Create a new user ?\n" \
                      u"Please select a choice: ".format(author_name)
        User = get_user_model()
        while True:
            selection = str(input(action_text))
            if selection and selection in '12':
                break
        if selection == '1':
            users = User.objects.all()
            if users.count() == 1:
                username = users[0].get_username()
                preselected_user = username
                usernames = [username]
                usernames_display = [u'[{0:s}]'.format(username)]
            else:
                usernames = []
                usernames_display = []
                preselected_user = None
                for user in users:
                    username = user.get_username()
                    if username == author_name:
                        usernames_display.append(u'[{0:s}]'.format(username))
                        preselected_user = username
                    else:
                        usernames_display.append(username)
                    usernames.append(username)
            while True:
                user_text = u"1. Select your user, by typing " \
                            u"one of theses usernames:\n" \
                            u"{0:s} or 'back'\n" \
                            u"Please select a choice: ".format(', '.join(usernames_display))
                user_selected = input(user_text)
                if user_selected in usernames:
                    break
                if user_selected == '' and preselected_user:
                    user_selected = preselected_user
                    break
                if user_selected.strip() == 'back':
                    return self.import_author(author_name)
            return users.get(**{users[0].USERNAME_FIELD: user_selected})
        else:
            create_text = u"2. Please type the email of " \
                          u"the '{0:s}' user or 'back': ".format(author_name)
            author_mail = input(create_text)
            if author_mail.strip() == 'back':
                return self.import_author(author_name)
            try:
                return User.objects.create_user(author_name, author_mail)
            except IntegrityError:
                return User.objects.get(**{User.USERNAME_FIELD: author_name})

    def get_blog_page(self, slug, title):
        # Create blog page
        try:
            self.blogpage = BlogPage.objects.get(slug=slug)
        except BlogPage.DoesNotExist:
            # Get root page
            rootpage = Page.objects.first()

            # Set site root page as root site page
            site = Site.objects.first()
            site.root_page = rootpage
            site.save()

            # Get blogpage content type
            self.blogpage = BlogPage(title=title, slug=slug)
            rootpage.add_child(instance=self.blogpage)
            revision = rootpage.save_revision()
            revision.publish()

    def import_categories(self, category_nodes):
        self.stdout.write('Importing categories...')

        categories = {}
        for category_node in category_nodes:
            title = category_node.find(u'{{{0:s}}}cat_name'.format(WP_NS)).text[:255]
            slug = category_node.find(u'{{{0:s}}}category_nicename'.format(WP_NS)).text[:255]
            try:
                parent = category_node.find(u'{{{0:s}}}category_parent'.format(WP_NS)).text[:255]
            except TypeError:
                parent = None
            self.stdout.write(u'\t\t{0:s}'.format(title))
            category, created = PuputCategory.objects.update_or_create(name=title, defaults={
                'slug': slug, 'parent': categories.get(parent)
            })
            categories[title] = category
        return categories

    def import_entry_tags(self, tags, page):
        self.stdout.write("\tImporting tags...")
        for tag in tags:
            domain = tag.attrib.get('domain', 'category')
            if 'tag' in domain and tag.attrib.get('nicename'):
                self.stdout.write(u'\t\t{}'.format(tag.text))
                puput_tag, created = PuputTag.objects.update_or_create(name=tag.text)
                page.entry_tags.add(PuputTagEntryPage(tag=puput_tag))

    def import_entry_categories(self, category_nodes, page):
        for category_node in category_nodes:
            domain = category_node.attrib.get('domain')
            if domain == 'category':
                puput_category = PuputCategory.objects.get(name=category_node.text)
                PuputCategoryEntryPage.objects.get_or_create(category=puput_category, page=page)

    def import_entry(self, title, content, items, item_node):
        creation_date = datetime.strptime(item_node.find(u'{{{0:s}}}post_date'.format(WP_NS)).text, '%Y-%m-%d %H:%M:%S')
        if settings.USE_TZ:
            creation_date = timezone.make_aware(creation_date, pytz.timezone('GMT'))

        excerpt = strip_tags(item_node.find(u'{{{0:s}excerpt/}}encoded'.format(WP_NS)).text or '')
        if not excerpt and content:
            excerpt = Truncator(content).words(50)
        slug = slugify(title)[:255] or u'post-{0:s}'.format(item_node.find(u'{{{0:s}}}post_id'.format(WP_NS)).text)
        creator = item_node.find('{http://purl.org/dc/elements/1.1/}creator').text
        try:
            entry_date = datetime.strptime(item_node.find(u'{{{0:s}}}post_date_gmt'.format(WP_NS)).text,
                                           '%Y-%m-%d %H:%M:%S')
        except ValueError:
            entry_date = datetime.strptime(item_node.find(u'{{{0:s}}}post_date'.format(WP_NS)).text,
                                           '%Y-%m-%d %H:%M:%S')
        # Create page
        try:
            page = EntryPage.objects.get(slug=slug)
        except EntryPage.DoesNotExist:
            page = EntryPage(
                title=title,
                body=content,
                excerpt=strip_tags(excerpt),
                slug=slug,
                go_live_at=entry_date,
                first_published_at=creation_date,
                date=creation_date,
                owner=self.authors.get(creator),
                seo_title=title,
                search_description=excerpt,
                live=item_node.find(u'{{{0:s}}}status'.format(WP_NS)).text == 'publish')
            self.blogpage.add_child(instance=page)
            revision = self.blogpage.save_revision()
            revision.publish()
        self.import_entry_tags(item_node.findall('category'), page)
        self.import_entry_categories(item_node.findall('category'), page)
        # Import header image
        image_id = self.find_image_id(item_node.findall(u'{{{0:s}}}postmeta'.format(WP_NS)))
        if image_id:
            self.import_header_image(page, items, image_id)
        page.save()
        page.save_revision(changed=False)

    def find_image_id(self, metadatas):
        for meta in metadatas:
            if meta.find(u'{{{0:s}}}meta_key'.format(WP_NS)).text == '_thumbnail_id':
                return meta.find(u'{{{0:s}}}meta_value'.format(WP_NS)).text

    def import_entries(self, items):
        self.stdout.write("Importing entries...")

        for item_node in items:
            title = (item_node.find('title').text or '')[:255]
            post_type = item_node.find(u'{{{0:s}}}post_type'.format(WP_NS)).text
            content = item_node.find('{http://purl.org/rss/1.0/modules/content/}encoded').text

            if post_type == 'post' and content and title:
                self.stdout.write(u'\t{0:s}'.format(title))
                content = self.process_content_image(content)
                self.import_entry(title, content, items, item_node)

    def _import_image(self, image_url):
        image = NamedTemporaryFile(delete=True)
        try:
            response = requests.get(image_url)
        except requests.exceptions.ConnectionError:
            return False
        if response.status_code == 200:
            image.write(response.content)
            image.flush()
            return image
        return

    def import_header_image(self, entry, items, image_id):
        self.stdout.write('\tImport header images....')
        for item in items:
            post_type = item.find(u'{{{0:s}}}post_type'.format(WP_NS)).text
            if post_type == 'attachment' and item.find(u'{{{0:s}}}post_id'.format(WP_NS)).text == image_id:
                title = item.find('title').text
                image_url = item.find(u'{{{0:s}}}attachment_url'.format(WP_NS)).text
                image = self._import_image(image_url)
                if image:
                    new_image = WagtailImage(file=File(file=image), title=title)
                    new_image.save()
                    entry.header_image = new_image
                    entry.save()

    def _image_to_embed(self, image):
        return '<embed alt="{}" embedtype="image" format="fullwidth" id="{}"/>'.format(image.title, image.id)

    def process_content_image(self, content):
        self.stdout.write('\tGenerate and replace entry content images....')
        if content:
            root = lxml.html.fromstring(content)
            for img_node in root.iter('img'):
                parent_node = img_node.getparent()
                if 'wp-content' in img_node.attrib['src'] or 'files' in img_node.attrib['src']:
                    image = self._import_image(img_node.attrib['src'])
                    if image:
                        title = img_node.attrib.get('title') or img_node.attrib.get('alt')
                        new_image = WagtailImage(file=File(file=image), title=title)
                        new_image.save()
                        if parent_node.tag == 'a':
                            parent_node.addnext(ET.XML(self._image_to_embed(new_image)))
                            parent_node.drop_tree()
                        else:
                            parent_node.append(ET.XML(self._image_to_embed(new_image)))
                            img_node.drop_tag()
                    else:
                        print(img_node.attrib.get('src'))
                        parent_node.addnext(ET.XML('<pre>Image missing: {}</pre>'.format(img_node.attrib.get('src'))))
            content = ET.tostring(root)
        return content
Example #5
0
class Command(LabelCommand):
    help = 'Import blog data from Wordpress'
    label = 'WXR file'
    args = 'wordpress.xml'

    SITE = Site.objects.get_current()

    def add_arguments(self, parser):
        parser.add_argument('wxr_file')
        parser.add_argument('--slug', default='blog', help="Slug of the blog.")
        parser.add_argument('--title',
                            default='Blog',
                            help="Title of the blog.")

    def handle(self, wxr_file, **options):
        global WP_NS
        self.get_blog_page(options['slug'], options['title'])
        self.tree = ET.parse(wxr_file)
        WP_NS = WP_NS % self.get_wordpress_version(self.tree)
        self.import_authors(self.tree)
        self.categories = self.import_categories(
            self.tree.findall(u'channel/{{{0:s}}}category'.format(WP_NS)))
        self.import_entries(self.tree.findall('channel/item'))

    def get_wordpress_version(self, tree):
        """
        Get the wxr version used on the imported wordpress xml.
        """
        for v in ('1.2', '1.1', '1.0'):
            try:
                tree.find(u'channel/{{{0:s}}}wxr_version'.format(WP_NS %
                                                                 v)).text
                return v
            except AttributeError:
                pass
        raise CommandError('Cannot resolve the wordpress namespace')

    def import_authors(self, tree):
        self.stdout.write('Importing authors...')

        post_authors = set()
        for item in tree.findall('channel/item'):
            post_type = item.find(u'{{{0:s}}}post_type'.format(WP_NS)).text
            if post_type == 'post':
                post_authors.add(
                    item.find(
                        '{http://purl.org/dc/elements/1.1/}creator').text)

        self.authors = {}
        for post_author in post_authors:
            self.authors[post_author] = self.import_author(
                post_author.replace(' ', '-'))

    def import_author(self, author_name):
        action_text = u"The author '{0:s}' needs to be migrated to an user:\n" \
                      u"1. Use an existing user ?\n" \
                      u"2. Create a new user ?\n" \
                      u"Please select a choice: ".format(author_name)
        User = get_user_model()
        while True:
            selection = str(input(action_text))
            if selection and selection in '12':
                break
        if selection == '1':
            users = User.objects.all()
            if users.count() == 1:
                username = users[0].get_username()
                preselected_user = username
                usernames = [username]
                usernames_display = [u'[{0:s}]'.format(username)]
            else:
                usernames = []
                usernames_display = []
                preselected_user = None
                for user in users:
                    username = user.get_username()
                    if username == author_name:
                        usernames_display.append(u'[{0:s}]'.format(username))
                        preselected_user = username
                    else:
                        usernames_display.append(username)
                    usernames.append(username)
            while True:
                user_text = u"1. Select your user, by typing " \
                            u"one of theses usernames:\n" \
                            u"{0:s} or 'back'\n" \
                            u"Please select a choice: ".format(', '.join(usernames_display))
                user_selected = input(user_text)
                if user_selected in usernames:
                    break
                if user_selected == '' and preselected_user:
                    user_selected = preselected_user
                    break
                if user_selected.strip() == 'back':
                    return self.import_author(author_name)
            return users.get(**{users[0].USERNAME_FIELD: user_selected})
        else:
            create_text = u"2. Please type the email of " \
                          u"the '{0:s}' user or 'back': ".format(author_name)
            author_mail = input(create_text)
            if author_mail.strip() == 'back':
                return self.import_author(author_name)
            try:
                return User.objects.create_user(author_name, author_mail)
            except IntegrityError:
                return User.objects.get(**{User.USERNAME_FIELD: author_name})

    def get_blog_page(self, slug, title):
        # Create blog page
        try:
            self.blogpage = BlogPage.objects.get(slug=slug)
        except BlogPage.DoesNotExist:
            # Get root page
            rootpage = Page.objects.first()

            # Set site root page as root site page
            site = Site.objects.first()
            site.root_page = rootpage
            site.save()

            # Get blogpage content type
            self.blogpage = BlogPage(title=title, slug=slug)
            rootpage.add_child(instance=self.blogpage)
            revision = rootpage.save_revision()
            revision.publish()

    def import_categories(self, category_nodes):
        self.stdout.write('Importing categories...')

        categories = {}
        for category_node in category_nodes:
            title = category_node.find(
                u'{{{0:s}}}cat_name'.format(WP_NS)).text[:255]
            slug = category_node.find(
                u'{{{0:s}}}category_nicename'.format(WP_NS)).text[:255]
            try:
                parent = category_node.find(
                    u'{{{0:s}}}category_parent'.format(WP_NS)).text[:255]
            except TypeError:
                parent = None
            self.stdout.write(u'\t\t{0:s}'.format(title))
            category, created = PuputCategory.objects.update_or_create(
                name=title,
                defaults={
                    'slug': slug,
                    'parent': categories.get(parent)
                })
            categories[title] = category
        return categories

    def import_entry_tags(self, tags, page):
        self.stdout.write("\tImporting tags...")
        for tag in tags:
            domain = tag.attrib.get('domain', 'category')
            if 'tag' in domain and tag.attrib.get('nicename'):
                self.stdout.write(u'\t\t{}'.format(tag.text))
                puput_tag, created = PuputTag.objects.update_or_create(
                    name=tag.text)
                page.entry_tags.add(PuputTagEntryPage(tag=puput_tag))

    def import_entry_categories(self, category_nodes, page):
        for category_node in category_nodes:
            domain = category_node.attrib.get('domain')
            if domain == 'category':
                puput_category = PuputCategory.objects.get(
                    name=category_node.text)
                PuputCategoryEntryPage.objects.get_or_create(
                    category=puput_category, page=page)

    def import_entry(self, title, content, items, item_node):
        creation_date = datetime.strptime(
            item_node.find(u'{{{0:s}}}post_date'.format(WP_NS)).text,
            '%Y-%m-%d %H:%M:%S')
        if settings.USE_TZ:
            creation_date = timezone.make_aware(creation_date,
                                                pytz.timezone('GMT'))

        excerpt = strip_tags(
            item_node.find(u'{{{0:s}excerpt/}}encoded'.format(WP_NS)).text
            or '')
        if not excerpt and content:
            excerpt = Truncator(content).words(50)
        slug = slugify(title)[:255] or u'post-{0:s}'.format(
            item_node.find(u'{{{0:s}}}post_id'.format(WP_NS)).text)
        creator = item_node.find(
            '{http://purl.org/dc/elements/1.1/}creator').text
        try:
            entry_date = datetime.strptime(
                item_node.find(u'{{{0:s}}}post_date_gmt'.format(WP_NS)).text,
                '%Y-%m-%d %H:%M:%S')
        except ValueError:
            entry_date = datetime.strptime(
                item_node.find(u'{{{0:s}}}post_date'.format(WP_NS)).text,
                '%Y-%m-%d %H:%M:%S')
        # Create page
        try:
            page = EntryPage.objects.get(slug=slug)
        except EntryPage.DoesNotExist:
            page = EntryPage(
                title=title,
                body=content,
                excerpt=strip_tags(excerpt),
                slug=slug,
                go_live_at=entry_date,
                first_published_at=creation_date,
                date=creation_date,
                owner=self.authors.get(creator),
                seo_title=title,
                search_description=excerpt,
                live=item_node.find(
                    u'{{{0:s}}}status'.format(WP_NS)).text == 'publish')
            self.blogpage.add_child(instance=page)
            revision = self.blogpage.save_revision()
            revision.publish()
        self.import_entry_tags(item_node.findall('category'), page)
        self.import_entry_categories(item_node.findall('category'), page)
        # Import header image
        image_id = self.find_image_id(
            item_node.findall(u'{{{0:s}}}postmeta'.format(WP_NS)))
        if image_id:
            self.import_header_image(page, items, image_id)
        page.save()
        page.save_revision(changed=False)

    def find_image_id(self, metadatas):
        for meta in metadatas:
            if meta.find(u'{{{0:s}}}meta_key'.format(
                    WP_NS)).text == '_thumbnail_id':
                return meta.find(u'{{{0:s}}}meta_value'.format(WP_NS)).text

    def import_entries(self, items):
        self.stdout.write("Importing entries...")

        for item_node in items:
            title = (item_node.find('title').text or '')[:255]
            post_type = item_node.find(
                u'{{{0:s}}}post_type'.format(WP_NS)).text
            content = item_node.find(
                '{http://purl.org/rss/1.0/modules/content/}encoded').text

            if post_type == 'post' and content and title:
                self.stdout.write(u'\t{0:s}'.format(title))
                content = self.process_content_image(content)
                self.import_entry(title, content, items, item_node)

    def _import_image(self, image_url):
        image = NamedTemporaryFile(delete=True)
        try:
            response = requests.get(image_url)
            if response.status_code == 200:
                image.write(response.content)
                image.flush()
                return image
        except requests.exceptions.ConnectionError:
            self.stdout.write(
                'WARNING: Unable to connect to URL "{}". Image will be broken.'
                .format(image_url))
        return

    def import_header_image(self, entry, items, image_id):
        self.stdout.write('\tImport header images....')
        for item in items:
            post_type = item.find(u'{{{0:s}}}post_type'.format(WP_NS)).text
            if post_type == 'attachment' and item.find(
                    u'{{{0:s}}}post_id'.format(WP_NS)).text == image_id:
                title = item.find('title').text
                image_url = item.find(
                    u'{{{0:s}}}attachment_url'.format(WP_NS)).text
                image = self._import_image(image_url)
                if image:
                    new_image = WagtailImage(file=File(file=image),
                                             title=title)
                    new_image.save()
                    entry.header_image = new_image
                    entry.save()

    def _image_to_embed(self, image):
        return u'<embed alt="{}" embedtype="image" format="fullwidth" id="{}"/>'.format(
            image.title, image.id)

    def process_content_image(self, content):
        self.stdout.write('\tGenerate and replace entry content images....')
        if content:
            root = lxml.html.fromstring(content)
            for img_node in root.iter('img'):
                parent_node = img_node.getparent()
                if 'wp-content' in img_node.attrib[
                        'src'] or 'files' in img_node.attrib['src']:
                    image = self._import_image(img_node.attrib['src'])
                    if image:
                        title = img_node.attrib.get(
                            'title') or img_node.attrib.get('alt')
                        new_image = WagtailImage(file=File(file=image),
                                                 title=title)
                        new_image.save()
                        if parent_node.tag == 'a':
                            parent_node.addnext(
                                ET.XML(self._image_to_embed(new_image)))
                            parent_node.drop_tree()
                        else:
                            parent_node.append(
                                ET.XML(self._image_to_embed(new_image)))
                            img_node.drop_tag()
            content = ET.tostring(root)
        return content
Example #6
0
class Command(BaseCommand):
    help = "Import blog data from Zinnia"
    entries = {}

    def add_arguments(self, parser):
        parser.add_argument('--slug', default='blog', help="Slug of the blog.")
        parser.add_argument('--title', default='Blog', help="Title of the blog.")

    def handle(self, *args, **options):
        self.get_blog_page(options['slug'], options['title'])
        self.import_categories()
        self.import_entries()
        self.import_related_entries()

    def get_blog_page(self, slug, title):
        # Create blog page
        try:
            self.blogpage = BlogPage.objects.get(slug=slug)
        except BlogPage.DoesNotExist:
            # Get root page
            rootpage = Page.objects.first()

            # Set site root page as root site page
            site = Site.objects.first()
            site.root_page = rootpage
            site.save()

            # Get blogpage content type
            self.blogpage = BlogPage(
                title=title,
                slug=slug,
            )
            rootpage.add_child(instance=self.blogpage)
            revision = rootpage.save_revision()
            revision.publish()

    def import_categories(self):
        self.stdout.write("Importing categories...")
        categories = ZinniaCategory.objects.all()
        for category in categories:
            self.stdout.write("\t{}".format(category))
            puput_category, created = PuputCategory.objects.update_or_create(
                name=category.title,
                slug=category.slug,
                description=category.description
            )
            puput_category.save()

    def import_entries(self):
        self.stdout.write("Importing entries...")
        entries = ZinniaEntry.objects.all()
        for entry in entries:
            self.stdout.write(entry.title)
            # Header images
            if entry.image:
                header_image = WagtailImage(file=entry.image, title=os.path.basename(entry.image.url))
                self.stdout.write('\tImported header image: {}'.format(entry.image))
                header_image.save()
            else:
                header_image = None

            self.stdout.write('\tGenerate and replace entry content images....')
            if entry.content:
                root = lxml.html.fromstring(entry.content)
                for el in root.iter('img'):
                    if el.attrib['src'].startswith(settings.MEDIA_URL):
                        old_image = el.attrib['src'].replace(settings.MEDIA_URL, '')
                        with open('{}/{}'.format(settings.MEDIA_ROOT, old_image), 'r') as image_file:
                            new_image = WagtailImage(file=File(file=image_file, name=os.path.basename(old_image)),
                                                     title=os.path.basename(old_image))
                            new_image.save()
                            el.attrib['src'] = new_image.file.url
                            self.stdout.write('\t\t{}'.format(new_image.file.url))

                # New content with images replaced
                content = lxml.html.tostring(root, pretty_print=True)
            else:
                content = entry.content

            # Create page
            try:
                page = EntryPage.objects.get(slug=entry.slug)
            except EntryPage.DoesNotExist:
                page = EntryPage(
                    title=entry.title,
                    body=content,
                    slug=entry.slug,
                    go_live_at=entry.start_publication,
                    expire_at=entry.end_publication,
                    first_published_at=entry.creation_date,
                    date=entry.creation_date,
                    owner=entry.authors.first(),
                    seo_title=entry.title,
                    search_description=entry.excerpt,
                    live=entry.is_visible,
                    header_image=header_image
                )
                self.blogpage.add_child(instance=page)
                revision = self.blogpage.save_revision()
                revision.publish()
            self.import_entry_categories(entry, page)
            self.import_entry_tags(entry, page)
            page.save()
            page.save_revision(changed=False)
            self.entries[entry.pk] = page

    def import_related_entries(self):
        self.stdout.write("Importing related entries...")
        entries = ZinniaEntry.objects.all()
        for entry in entries:
            for related_entry in entry.related.all():
                EntryPageRelated.objects.get_or_create(entrypage_from=self.entries[entry.pk],
                                                       entrypage_to=self.entries[related_entry.pk])

    def import_entry_categories(self, entry, page):
        self.stdout.write("\tImporting categories...")
        for category in entry.categories.all():
            self.stdout.write('\t\tAdd category: {}'.format(category.title))
            puput_category = PuputCategory.objects.get(name=category.title)
            PuputCategoryEntryPage.objects.get_or_create(category=puput_category, page=page)

    def import_entry_tags(self, entry, page):
        self.stdout.write("\tImporting tags...")
        for tag in entry.tags_list:
            self.stdout.write('\t\t{}'.format(tag))
            puput_tag, created = PuputTag.objects.update_or_create(name=tag)
            page.entry_tags.add(PuputTagEntryPage(tag=puput_tag))
Example #7
0
class Command(NoArgsCommand):
    help = 'Import blog data from Blogger.'

    option_list = NoArgsCommand.option_list + (
        make_option('--slug', default='blog', help="Slug of the blog."),
        make_option('--title', default='Blog', help="Title of the blog."),
        make_option('--blogger_blog_id', dest='blogger_blog_id', default='', help='Id of the Blogger blog to import.'),
        make_option('--blogger_api_key', dest='blogger_api_key', default='',
                    help='API Key of the Blogger blog to import.')
    )

    SITE = Site.objects.get_current()

    def handle_noargs(self, **options):
        self.blogger_blog_id = options.get('blogger_blog_id')
        self.blogger_api_key = options.get('blogger_api_key')
        self.get_blog_page(options['slug'], options['title'])
        self.blogger_entries = self.get_blogger_entries()
        self.import_authors()
        self.import_entries()

    def get_blog_page(self, slug, title):
        # Create blog page
        try:
            self.blogpage = BlogPage.objects.get(slug=slug)
        except BlogPage.DoesNotExist:
            # Get root page
            rootpage = Page.objects.first()

            # Set site root page as root site page
            site = Site.objects.first()
            site.root_page = rootpage
            site.save()

            # Get blogpage content type
            self.blogpage = BlogPage(title=title, slug=slug)
            rootpage.add_child(instance=self.blogpage)
            revision = rootpage.save_revision()
            revision.publish()

    def import_authors(self):
        self.stdout.write('Importing authors...')

        entry_authors = set()
        for entry in self.blogger_entries:
            entry_authors.add(entry['author']['displayName'])

        self.stdout.write(u'{0:d} authors found.'.format(len(entry_authors)))
        self.authors = {}
        for entry_author in entry_authors:
            self.authors[entry_author] = self.import_author(entry_author.replace(' ', '-'))

    def import_author(self, author_name):
        action_text = u"The author '{0:s}' needs to be migrated to an user:\n" \
                      u"1. Use an existing user ?\n" \
                      u"2. Create a new user ?\n" \
                      u"Please select a choice: ".format(author_name)
        User = get_user_model()
        while True:
            selection = str(input(action_text))
            if selection and selection in '12':
                break
        if selection == '1':
            users = User.objects.all()
            if users.count() == 1:
                username = users[0].get_username()
                preselected_user = username
                usernames = [username]
                usernames_display = [u'[{0:s}]'.format(username)]
            else:
                usernames = []
                usernames_display = []
                preselected_user = None
                for user in users:
                    username = user.get_username()
                    if username == author_name:
                        usernames_display.append(u'[{0:s}]'.format(username))
                        preselected_user = username
                    else:
                        usernames_display.append(username)
                    usernames.append(username)
            while True:
                user_text = u"1. Select your user, by typing " \
                            u"one of theses usernames:\n" \
                            u"{0:s} or 'back'\n" \
                            u"Please select a choice: ".format(', '.join(usernames_display))
                user_selected = input(user_text)
                if user_selected in usernames:
                    break
                if user_selected == '' and preselected_user:
                    user_selected = preselected_user
                    break
                if user_selected.strip() == 'back':
                    return self.import_author(author_name)
            return users.get(**{users[0].USERNAME_FIELD: user_selected})
        else:
            create_text = u"2. Please type the email of " \
                          u"the '{0:s}' user or 'back': ".format(author_name)
            author_mail = input(create_text)
            if author_mail.strip() == 'back':
                return self.import_author(author_name)
            try:
                return User.objects.create_user(author_name, author_mail)
            except IntegrityError:
                return User.objects.get(**{User.USERNAME_FIELD: author_name})

    def get_blogger_entries(self):
        res = requests.get(BLOGGER_URL.format(self.blogger_blog_id, self.blogger_api_key))
        if res.status_code == 200:
            return res.json()['items']

    def import_entry_tags(self, tags, entry):
        for tag in tags:
            puput_tag, created = PuputTag.objects.update_or_create(name=tag)
            entry.entry_tags.add(PuputTagEntryPage(tag=puput_tag))

    def import_entries(self):
        self.stdout.write('Importing entries...')

        for entry in self.blogger_entries:
            content = entry['content'] or ''
            content = self.process_content_image(content)
            excerpt = Truncator(content).words(50) or ''
            slug = slugify(entry['title'])
            try:
                page = EntryPage.objects.get(slug=slug)
            except EntryPage.DoesNotExist:
                entry_author = entry['author']['displayName'].replace(' ', '-')
                page = EntryPage(
                    title=entry['title'],
                    body=content,
                    excerpt=strip_tags(excerpt),
                    slug=slugify(entry['title']),
                    go_live_at=entry['published'],
                    first_published_at=entry['published'],
                    date=entry['published'],
                    owner=self.authors[entry_author],
                    seo_title=entry['title'],
                    search_description=excerpt,
                    live=entry['published'])
                self.blogpage.add_child(instance=page)
                revision = self.blogpage.save_revision()
                revision.publish()
            self.import_entry_tags(entry.get('labels', []), page)
            page.save()

    def _import_image(self, image_url):
        image = NamedTemporaryFile(delete=True)
        response = requests.get(image_url)
        if response.status_code == 200:
            image.write(response.content)
            image.flush()
            return image
        return

    def _image_to_embed(self, image):
        return u'<embed alt="{}" embedtype="image" format="fullwidth" id="{}"/>'.format(image.title, image.id)

    def process_content_image(self, content):
        self.stdout.write('\tGenerate and replace entry content images....')
        if content:
            root = lxml.html.fromstring(content)
            for img_node in root.iter('img'):
                parent_node = img_node.getparent()
                if 'bp.blogspot.com' in img_node.attrib['src']:
                    self.stdout.write('\t\t{}'.format(img_node.attrib['src']))
                    image = self._import_image(img_node.attrib['src'])
                    if image:
                        title = img_node.attrib['src'].rsplit('/', 1)[1]
                        new_image = WagtailImage(file=File(file=image), title=title)
                        new_image.save()
                        if parent_node.tag == 'a':
                            parent_node.addnext(ET.XML(self._image_to_embed(new_image)))
                            parent_node.drop_tree()
                        else:
                            parent_node.append(ET.XML(self._image_to_embed(new_image)))
                            img_node.drop_tag()
            content = ET.tostring(root)
        return content
Example #8
0
class Command(NoArgsCommand):
    help = 'Import blog data from Blogger.'

    option_list = NoArgsCommand.option_list + (
        make_option('--slug', default='blog', help="Slug of the blog."),
        make_option('--title', default='Blog', help="Title of the blog."),
        make_option('--blogger_blog_id',
                    dest='blogger_blog_id',
                    default='',
                    help='Id of the Blogger blog to import.'),
        make_option('--blogger_api_key',
                    dest='blogger_api_key',
                    default='',
                    help='API Key of the Blogger blog to import.'))

    SITE = Site.objects.get_current()

    def handle_noargs(self, **options):
        self.blogger_blog_id = options.get('blogger_blog_id')
        self.blogger_api_key = options.get('blogger_api_key')
        self.get_blog_page(options['slug'], options['title'])
        self.blogger_entries = self.get_blogger_entries()
        self.import_authors()
        self.import_entries()

    def get_blog_page(self, slug, title):
        # Create blog page
        try:
            self.blogpage = BlogPage.objects.get(slug=slug)
        except BlogPage.DoesNotExist:
            # Get root page
            rootpage = Page.objects.first()

            # Set site root page as root site page
            site = Site.objects.first()
            site.root_page = rootpage
            site.save()

            # Get blogpage content type
            self.blogpage = BlogPage(title=title, slug=slug)
            rootpage.add_child(instance=self.blogpage)
            revision = rootpage.save_revision()
            revision.publish()

    def import_authors(self):
        self.stdout.write('Importing authors...')

        entry_authors = set()
        for entry in self.blogger_entries:
            entry_authors.add(entry['author']['displayName'])

        self.stdout.write(u'{0:d} authors found.'.format(len(entry_authors)))
        self.authors = {}
        for entry_author in entry_authors:
            self.authors[entry_author] = self.import_author(
                entry_author.replace(' ', '-'))

    def import_author(self, author_name):
        action_text = u"The author '{0:s}' needs to be migrated to an user:\n" \
                      u"1. Use an existing user ?\n" \
                      u"2. Create a new user ?\n" \
                      u"Please select a choice: ".format(author_name)
        User = get_user_model()
        while True:
            selection = str(input(action_text))
            if selection and selection in '12':
                break
        if selection == '1':
            users = User.objects.all()
            if users.count() == 1:
                username = users[0].get_username()
                preselected_user = username
                usernames = [username]
                usernames_display = [u'[{0:s}]'.format(username)]
            else:
                usernames = []
                usernames_display = []
                preselected_user = None
                for user in users:
                    username = user.get_username()
                    if username == author_name:
                        usernames_display.append(u'[{0:s}]'.format(username))
                        preselected_user = username
                    else:
                        usernames_display.append(username)
                    usernames.append(username)
            while True:
                user_text = u"1. Select your user, by typing " \
                            u"one of theses usernames:\n" \
                            u"{0:s} or 'back'\n" \
                            u"Please select a choice: ".format(', '.join(usernames_display))
                user_selected = input(user_text)
                if user_selected in usernames:
                    break
                if user_selected == '' and preselected_user:
                    user_selected = preselected_user
                    break
                if user_selected.strip() == 'back':
                    return self.import_author(author_name)
            return users.get(**{users[0].USERNAME_FIELD: user_selected})
        else:
            create_text = u"2. Please type the email of " \
                          u"the '{0:s}' user or 'back': ".format(author_name)
            author_mail = input(create_text)
            if author_mail.strip() == 'back':
                return self.import_author(author_name)
            try:
                return User.objects.create_user(author_name, author_mail)
            except IntegrityError:
                return User.objects.get(**{User.USERNAME_FIELD: author_name})

    def get_blogger_entries(self):
        res = requests.get(
            BLOGGER_URL.format(self.blogger_blog_id, self.blogger_api_key))
        if res.status_code == 200:
            return res.json()['items']

    def import_entry_tags(self, tags, entry):
        for tag in tags:
            puput_tag, created = PuputTag.objects.update_or_create(name=tag)
            entry.entry_tags.add(PuputTagEntryPage(tag=puput_tag))

    def import_entries(self):
        self.stdout.write('Importing entries...')

        for entry in self.blogger_entries:
            content = entry['content'] or ''
            content = self.process_content_image(content)
            excerpt = Truncator(content).words(50) or ''
            slug = slugify(entry['title'])
            try:
                page = EntryPage.objects.get(slug=slug)
            except EntryPage.DoesNotExist:
                entry_author = entry['author']['displayName'].replace(' ', '-')
                page = EntryPage(title=entry['title'],
                                 body=content,
                                 excerpt=strip_tags(excerpt),
                                 slug=slugify(entry['title']),
                                 go_live_at=entry['published'],
                                 first_published_at=entry['published'],
                                 date=entry['published'],
                                 owner=self.authors[entry_author],
                                 seo_title=entry['title'],
                                 search_description=excerpt,
                                 live=entry['published'])
                self.blogpage.add_child(instance=page)
                revision = self.blogpage.save_revision()
                revision.publish()
            self.import_entry_tags(entry.get('labels', []), page)
            page.save()

    def _import_image(self, image_url):
        image = NamedTemporaryFile(delete=True)
        response = requests.get(image_url)
        if response.status_code == 200:
            image.write(response.content)
            image.flush()
            return image
        return

    def _image_to_embed(self, image):
        return u'<embed alt="{}" embedtype="image" format="fullwidth" id="{}"/>'.format(
            image.title, image.id)

    def process_content_image(self, content):
        self.stdout.write('\tGenerate and replace entry content images....')
        if content:
            root = lxml.html.fromstring(content)
            for img_node in root.iter('img'):
                parent_node = img_node.getparent()
                if 'bp.blogspot.com' in img_node.attrib['src']:
                    self.stdout.write('\t\t{}'.format(img_node.attrib['src']))
                    image = self._import_image(img_node.attrib['src'])
                    if image:
                        title = img_node.attrib['src'].rsplit('/', 1)[1]
                        new_image = WagtailImage(file=File(file=image),
                                                 title=title)
                        new_image.save()
                        if parent_node.tag == 'a':
                            parent_node.addnext(
                                ET.XML(self._image_to_embed(new_image)))
                            parent_node.drop_tree()
                        else:
                            parent_node.append(
                                ET.XML(self._image_to_embed(new_image)))
                            img_node.drop_tag()
            content = ET.tostring(root)
        return content
class Command(NoArgsCommand):
    """
    Command object for importing a Blogger blog
    into Puput via Google's gdata API.
    """
    help = 'Import a Blogger blog into Puput.'

    option_list = NoArgsCommand.option_list + (
        make_option('--blogger_title', dest='blogger_title', default='',
                    help='The tittle of blog the blogger'),
        make_option('--blogger_slug', dest='blogger_slug', default='',
                    help='The slug of blog the blogger'),
        make_option('--blogger_blog_id', dest='blogger_blog_id', default='',
                    help='The id of the Blogger blog to import'),
        make_option('--blogger_api_key', dest='blogger_api_key', default='',
                    help='The API of the Blogger blog to import'),
        make_option('--noautoexcerpt', action='store_false',
                    dest='auto_excerpt', default=True,
                    help='Do NOT generate an excerpt.'))

    SITE = Site.objects.get_current()

    def handle_noargs(self, **options):
        self.blogger_title = options.get('blogger_title')
        self.blogger_slug = options.get('blogger_slug')
        self.blogger_blog_id = options.get('blogger_blog_id')
        self.blogger_api_key = options.get('blogger_api_key')
        self.auto_excerpt = options.get('auto-excerpt', True)

        self.stdout.write("Starting migration from Blogger to Puput %s:\n")

        self.get_blog_page(options['blogger_slug'], options['blogger_title'])

        if not self.blogger_blog_id:
            self.blogger_blog_id = input('Blogger ID: ')
            if not self.blogger_blog_id:
                raise CommandError('Invalid Blogger ID')

        if not self.blogger_api_key:
            self.blogger_api_key = input('Blogger API Key: ')
            if not self.blogger_api_key:
                raise CommandError('Invalid Blogger API Key')

        self.import_authors()
        self.import_posts()

    def get_blog_page(self, slug, title):
        # Create blog page
        try:
            self.blogpage = BlogPage.objects.get(slug=slug)
        except BlogPage.DoesNotExist:
            # Get root page
            rootpage = Page.objects.first()

            # Set site root page as root site page
            site = Site.objects.first()
            site.root_page = rootpage
            site.save()

            # Get blogpage content type
            self.blogpage = BlogPage(
                title=title,
                slug=slugify(title),
            )
            rootpage.add_child(instance=self.blogpage)
            revision = rootpage.save_revision()
            revision.publish()

    def import_authors(self):
        """
        Retrieve all the authors used in posts
        and convert it to new or existing author and
        return the conversion.
        """

        self.stdout.write('- Importing authors\n')

        post_authors = set()
        for post in self.get_posts():
            post_authors.add(post['author']['displayName'])

        self.stdout.write(u'> {0:d} authors found.\n'.format(len(post_authors)))

        self.authors = {}
        for post_author in post_authors:
            self.authors[post_author] = self.migrate_author(post_author.replace(' ', '-'))

    def migrate_author(self, author_name):
        """
        Handle actions for migrating the authors.
        """

        action_text = u"The author '{0:s}' needs to be migrated to an user:\n" \
                      u"1. Use an existing user ?\n" \
                      u"2. Create a new user ?\n" \
                      u"Please select a choice: ".format(author_name)
        while True:
            selection = input(smart_str(action_text))
            if selection and selection in '12':
                break
        if selection == '1':
            users = User.objects.all()
            if users.count() == 1:
                username = users[0].get_username()
                preselected_user = username
                usernames = [username]
                usernames_display = [u'[{0:s}]'.format(username)]
            else:
                usernames = []
                usernames_display = []
                preselected_user = None
                for user in users:
                    username = user.get_username()
                    if username == author_name:
                        usernames_display.append(u'[{0:s}]'.format(username))
                        preselected_user = username
                    else:
                        usernames_display.append(username)
                    usernames.append(username)
            while True:
                user_text = u"1. Select your user, by typing " \
                            u"one of theses usernames:\n" \
                            u"{0:s} or 'back'\n" \
                            u"Please select a choice: " \
                    .format(u', '.join(usernames_display))
                user_selected = input(smart_str(user_text))
                if user_selected in usernames:
                    break
                if user_selected == '' and preselected_user:
                    user_selected = preselected_user
                    break
                if user_selected.strip() == 'back':
                    return self.migrate_author(author_name)
            return users.get(**{users[0].USERNAME_FIELD: user_selected})
        else:
            create_text = u"2. Please type the email of " \
                          u"the '{0:s}' user or 'back': ".format(author_name)
            author_mail = input(smart_str(create_text))
            if author_mail.strip() == 'back':
                return self.migrate_author(author_name)
            try:
                return User.objects.create_user(author_name, author_mail)
            except IntegrityError:
                return User.objects.get(**{User.USERNAME_FIELD: author_name})

    def get_posts(self):
        res = requests.get('https://www.googleapis.com/blogger/v3/blogs/{}/posts/?maxResults=500&key={}'.format(self.blogger_blog_id,
                                                                                                self.blogger_api_key))
        if res.status_code == 200:
            return res.json()['items']

    def get_entry_tags(self, tags, entry):
        for tag in tags:
            puput_tag, created = PuputTag.objects.update_or_create(name=tag)
            entry.entry_tags.add(PuputTagEntryPage(tag=puput_tag))

    def import_posts(self):
        self.stdout.write('- Importing entries\n')

        for post in self.get_posts():
            content = post['content'] or ''
            content = self.process_content_image(content)
            excerpt = self.auto_excerpt and Truncator(
                strip_tags(smart_unicode(content))).words(50) or ''
            slug = slugify(post['title'])

            try:
                entry = EntryPage.objects.get(slug=slug)
            except EntryPage.DoesNotExist:
                entry = EntryPage(
                    title=post['title'],
                    body=content,
                    excerpt=excerpt,
                    slug=slugify(post['title']),
                    go_live_at=post['published'],
                    first_published_at=post['published'],
                    date=post['published'],
                    owner=User.objects.first(),
                    seo_title=post['title'],
                    search_description=excerpt,
                    live=post['published'])
                self.blogpage.add_child(instance=entry)
                revision = self.blogpage.save_revision()
                revision.publish()
                self.get_entry_tags(post.get('labels', []), entry)
                entry.save()

    def _import_image(self, image_url):
        img = NamedTemporaryFile(delete=True)
        img.write(requests.get(image_url).content)
        img.flush()
        return img

    def _image_to_embed(self, image):
        return '<embed alt="{}" embedtype="image" format="fullwidth" id="{}"/>'.format(image.title, image.id)

    def process_content_image(self, content):
        self.stdout.write('\tGenerate and replace entry content images....')
        if content:
            root = lxml.html.fromstring(content)
            for img_node in root.iter('img'):
                parent_node = img_node.getparent()
                if 'bp.blogspot.com' in img_node.attrib['src']:
                    self.stdout.write('\t\t{}'.format(img_node.attrib['src']))
                    image = self._import_image(img_node.attrib['src'])
                    title = img_node.attrib['src'].rsplit('/', 1)[1]
                    new_image = WagtailImage(file=File(file=image, name=title), title=title)
                    new_image.save()
                    if parent_node.tag == 'a':
                        parent_node.addnext(ET.XML(self._image_to_embed(new_image)))
                        parent_node.drop_tree()
                    else:
                        parent_node.append(ET.XML(self._image_to_embed(new_image)))
                        img_node.drop_tag()
            content = ET.tostring(root)
        return content
Example #10
0
class Command(BaseCommand):
    help = "Import blog data from Zinnia"
    entries = {}

    def add_arguments(self, parser):
        parser.add_argument('--slug', default='blog', help="Slug of the blog.")
        parser.add_argument('--title',
                            default='Blog',
                            help="Title of the blog.")

    def handle(self, *args, **options):
        self.get_blog_page(options['slug'], options['title'])
        self.import_categories()
        self.import_entries()
        self.import_related_entries()

    def get_blog_page(self, slug, title):
        # Create blog page
        try:
            self.blogpage = BlogPage.objects.get(slug=slug)
        except BlogPage.DoesNotExist:
            # Get root page
            rootpage = Page.objects.first()

            # Set site root page as root site page
            site = Site.objects.first()
            site.root_page = rootpage
            site.save()

            # Get blogpage content type
            self.blogpage = BlogPage(
                title=title,
                slug=slug,
            )
            rootpage.add_child(instance=self.blogpage)
            revision = rootpage.save_revision()
            revision.publish()

    def import_categories(self):
        self.stdout.write("Importing categories...")
        categories = ZinniaCategory.objects.all()
        for category in categories:
            self.stdout.write("\t{}".format(category))
            puput_category, created = PuputCategory.objects.update_or_create(
                name=category.title,
                slug=category.slug,
                description=category.description)
            puput_category.save()

    def import_entries(self):
        self.stdout.write("Importing entries...")
        entries = ZinniaEntry.objects.all()
        for entry in entries:
            self.stdout.write(entry.title)
            # Header images
            if entry.image:
                header_image = WagtailImage(file=entry.image,
                                            title=os.path.basename(
                                                entry.image.url))
                self.stdout.write('\tImported header image: {}'.format(
                    entry.image))
                header_image.save()
            else:
                header_image = None

            self.stdout.write(
                '\tGenerate and replace entry content images....')
            if entry.content:
                root = lxml.html.fromstring(entry.content)
                for el in root.iter('img'):
                    if el.attrib['src'].startswith(settings.MEDIA_URL):
                        # fix media chunks path naming e.g. /media/chinks/media/stuff.jpg will fail
                        img_path = el.attrib['src']
                        old_image = img_path[len(settings.MEDIA_URL):]
                        try:
                            with open(
                                    '{}/{}'.format(settings.MEDIA_ROOT,
                                                   old_image),
                                    'r') as image_file:
                                new_image = WagtailImage(
                                    file=File(
                                        file=image_file,
                                        name=os.path.basename(old_image)),
                                    title=os.path.basename(old_image))
                                new_image.save()
                                el.attrib['src'] = new_image.file.url
                                self.stdout.write('\t\t{}'.format(
                                    new_image.file.url))
                        except Exception as e:
                            # handle image encoding errors like none utf-8 cahrs
                            print(e)
                            print("error handling image, move on... entry:" +
                                  str(entry.id))

                # New content with images replaced
                content = lxml.html.tostring(root, pretty_print=True)
            else:
                content = entry.content

            # decode, somehow the content is a byte array
            if len(content) != 0:
                content = content.decode()

            # First, convert the html to json, with the appropriate block type
            # we convertet the blody from a RichTextField to a StreamField
            import json
            content = json.dumps([{'type': 'html', 'value': content}])

            # fix empty author entrys (puput will not render the page if no author is set)
            author = entry.authors.first()
            if author == None:
                from zinnia.models.author import Author
                author = Author.objects.first()

            # Create page
            try:
                page = EntryPage.objects.get(slug=entry.slug)
            except EntryPage.DoesNotExist:
                page = EntryPage(
                    title=entry.title,
                    body=content,
                    #fix missing excerpt transfer
                    excerpt=entry.excerpt,
                    slug=entry.slug,
                    go_live_at=entry.start_publication,
                    expire_at=entry.end_publication,
                    first_published_at=entry.creation_date,
                    date=entry.creation_date,
                    owner=author,
                    seo_title=entry.title,
                    search_description=entry.excerpt,
                    live=entry.is_visible,
                    header_image=header_image)
                self.blogpage.add_child(instance=page)
                revision = self.blogpage.save_revision()
                revision.publish()
            self.import_entry_categories(entry, page)
            self.import_entry_tags(entry, page)
            page.save()
            page.save_revision(changed=False)
            self.entries[entry.pk] = page

    def import_related_entries(self):
        self.stdout.write("Importing related entries...")
        entries = ZinniaEntry.objects.all()
        for entry in entries:
            for related_entry in entry.related.all():
                EntryPageRelated.objects.get_or_create(
                    entrypage_from=self.entries[entry.pk],
                    entrypage_to=self.entries[related_entry.pk])

    def import_entry_categories(self, entry, page):
        self.stdout.write("\tImporting categories...")
        for category in entry.categories.all():
            self.stdout.write('\t\tAdd category: {}'.format(category.title))
            puput_category = PuputCategory.objects.get(name=category.title)
            PuputCategoryEntryPage.objects.get_or_create(
                category=puput_category, page=page)

    def import_entry_tags(self, entry, page):
        self.stdout.write("\tImporting tags...")
        for tag in entry.tags_list:
            self.stdout.write('\t\t{}'.format(tag))
            puput_tag, created = PuputTag.objects.update_or_create(name=tag)
            page.entry_tags.add(PuputTagEntryPage(tag=puput_tag))