def handle(self, *args, **options): # Get blogpage content type blogpage_content_type, created = ContentType.objects.get_or_create( model='blogpage', app_label='puput', defaults={'name': 'page'} if DJANGO_VERSION < (1, 8) else {}) # Get root page rootpage = Page.objects.first() # Set site root page as root site page site = Site.objects.first() site.root_page = rootpage site.save() # Create example blog page blogpage = BlogPage( title="Blog", content_type=blogpage_content_type, slug='blog', ) # Add blog page as a child for homepage rootpage.add_child(instance=blogpage) revision = blogpage.save_revision() revision.publish()
def get_blog_page(self, slug, title): # Create blog page try: self.blogpage = BlogPage.objects.get(slug=slug) except BlogPage.DoesNotExist: # Get root page rootpage = Page.objects.first() # Set site root page as root site page site = Site.objects.first() site.root_page = rootpage site.save() # Get blogpage content type self.blogpage = BlogPage(title=title, slug=slug) rootpage.add_child(instance=self.blogpage) revision = rootpage.save_revision() revision.publish()
class Command(LabelCommand): help = 'Import blog data from Wordpress' label = 'WXR file' args = 'wordpress.xml' SITE = Site.objects.get_current() def add_arguments(self, parser): parser.add_argument('--slug', default='blog', help="Slug of the blog.") parser.add_argument('--title', default='Blog', help="Title of the blog.") def handle_label(self, wxr_file, **options): global WP_NS self.get_blog_page(options['slug'], options['title']) self.tree = ET.parse(wxr_file) WP_NS = WP_NS % self.get_wordpress_version(self.tree) self.import_authors(self.tree) self.categories = self.import_categories(self.tree.findall(u'channel/{{{0:s}}}category'.format(WP_NS))) self.import_entries(self.tree.findall('channel/item')) def get_wordpress_version(self, tree): """ Get the wxr version used on the imported wordpress xml. """ for v in ('1.2', '1.1', '1.0'): try: tree.find(u'channel/{{{0:s}}}wxr_version'.format(WP_NS % v)).text return v except AttributeError: pass raise CommandError('Cannot resolve the wordpress namespace') def import_authors(self, tree): self.stdout.write('Importing authors...') post_authors = set() for item in tree.findall('channel/item'): post_type = item.find(u'{{{0:s}}}post_type'.format(WP_NS)).text if post_type == 'post': post_authors.add(item.find('{http://purl.org/dc/elements/1.1/}creator').text) self.authors = {} for post_author in post_authors: self.authors[post_author] = self.import_author(post_author.replace(' ', '-')) def import_author(self, author_name): action_text = u"The author '{0:s}' needs to be migrated to an user:\n" \ u"1. Use an existing user ?\n" \ u"2. Create a new user ?\n" \ u"Please select a choice: ".format(author_name) User = get_user_model() while True: selection = str(input(action_text)) if selection and selection in '12': break if selection == '1': users = User.objects.all() if users.count() == 1: username = users[0].get_username() preselected_user = username usernames = [username] usernames_display = [u'[{0:s}]'.format(username)] else: usernames = [] usernames_display = [] preselected_user = None for user in users: username = user.get_username() if username == author_name: usernames_display.append(u'[{0:s}]'.format(username)) preselected_user = username else: usernames_display.append(username) usernames.append(username) while True: user_text = u"1. Select your user, by typing " \ u"one of theses usernames:\n" \ u"{0:s} or 'back'\n" \ u"Please select a choice: ".format(', '.join(usernames_display)) user_selected = input(user_text) if user_selected in usernames: break if user_selected == '' and preselected_user: user_selected = preselected_user break if user_selected.strip() == 'back': return self.import_author(author_name) return users.get(**{users[0].USERNAME_FIELD: user_selected}) else: create_text = u"2. Please type the email of " \ u"the '{0:s}' user or 'back': ".format(author_name) author_mail = input(create_text) if author_mail.strip() == 'back': return self.import_author(author_name) try: return User.objects.create_user(author_name, author_mail) except IntegrityError: return User.objects.get(**{User.USERNAME_FIELD: author_name}) def get_blog_page(self, slug, title): # Create blog page try: self.blogpage = BlogPage.objects.get(slug=slug) except BlogPage.DoesNotExist: # Get root page rootpage = Page.objects.first() # Set site root page as root site page site = Site.objects.first() site.root_page = rootpage site.save() # Get blogpage content type self.blogpage = BlogPage(title=title, slug=slug) rootpage.add_child(instance=self.blogpage) revision = rootpage.save_revision() revision.publish() def import_categories(self, category_nodes): self.stdout.write('Importing categories...') categories = {} for category_node in category_nodes: title = category_node.find(u'{{{0:s}}}cat_name'.format(WP_NS)).text[:255] slug = category_node.find(u'{{{0:s}}}category_nicename'.format(WP_NS)).text[:255] try: parent = category_node.find(u'{{{0:s}}}category_parent'.format(WP_NS)).text[:255] except TypeError: parent = None self.stdout.write(u'\t\t{0:s}'.format(title)) category, created = PuputCategory.objects.update_or_create(name=title, defaults={ 'slug': slug, 'parent': categories.get(parent) }) categories[title] = category return categories def import_entry_tags(self, tags, page): self.stdout.write("\tImporting tags...") for tag in tags: domain = tag.attrib.get('domain', 'category') if 'tag' in domain and tag.attrib.get('nicename'): self.stdout.write(u'\t\t{}'.format(tag.text)) puput_tag, created = PuputTag.objects.update_or_create(name=tag.text) page.entry_tags.add(PuputTagEntryPage(tag=puput_tag)) def import_entry_categories(self, category_nodes, page): for category_node in category_nodes: domain = category_node.attrib.get('domain') if domain == 'category': puput_category = PuputCategory.objects.get(name=category_node.text) PuputCategoryEntryPage.objects.get_or_create(category=puput_category, page=page) def import_entry(self, title, content, items, item_node): creation_date = datetime.strptime(item_node.find(u'{{{0:s}}}post_date'.format(WP_NS)).text, '%Y-%m-%d %H:%M:%S') if settings.USE_TZ: creation_date = timezone.make_aware(creation_date, pytz.timezone('GMT')) excerpt = strip_tags(item_node.find(u'{{{0:s}excerpt/}}encoded'.format(WP_NS)).text or '') if not excerpt and content: excerpt = Truncator(content).words(50) slug = slugify(title)[:255] or u'post-{0:s}'.format(item_node.find(u'{{{0:s}}}post_id'.format(WP_NS)).text) creator = item_node.find('{http://purl.org/dc/elements/1.1/}creator').text try: entry_date = datetime.strptime(item_node.find(u'{{{0:s}}}post_date_gmt'.format(WP_NS)).text, '%Y-%m-%d %H:%M:%S') except ValueError: entry_date = datetime.strptime(item_node.find(u'{{{0:s}}}post_date'.format(WP_NS)).text, '%Y-%m-%d %H:%M:%S') # Create page try: page = EntryPage.objects.get(slug=slug) except EntryPage.DoesNotExist: page = EntryPage( title=title, body=content, excerpt=strip_tags(excerpt), slug=slug, go_live_at=entry_date, first_published_at=creation_date, date=creation_date, owner=self.authors.get(creator), seo_title=title, search_description=excerpt, live=item_node.find(u'{{{0:s}}}status'.format(WP_NS)).text == 'publish') self.blogpage.add_child(instance=page) revision = self.blogpage.save_revision() revision.publish() self.import_entry_tags(item_node.findall('category'), page) self.import_entry_categories(item_node.findall('category'), page) # Import header image image_id = self.find_image_id(item_node.findall(u'{{{0:s}}}postmeta'.format(WP_NS))) if image_id: self.import_header_image(page, items, image_id) page.save() page.save_revision(changed=False) def find_image_id(self, metadatas): for meta in metadatas: if meta.find(u'{{{0:s}}}meta_key'.format(WP_NS)).text == '_thumbnail_id': return meta.find(u'{{{0:s}}}meta_value'.format(WP_NS)).text def import_entries(self, items): self.stdout.write("Importing entries...") for item_node in items: title = (item_node.find('title').text or '')[:255] post_type = item_node.find(u'{{{0:s}}}post_type'.format(WP_NS)).text content = item_node.find('{http://purl.org/rss/1.0/modules/content/}encoded').text if post_type == 'post' and content and title: self.stdout.write(u'\t{0:s}'.format(title)) content = self.process_content_image(content) self.import_entry(title, content, items, item_node) def _import_image(self, image_url): image = NamedTemporaryFile(delete=True) try: response = requests.get(image_url) except requests.exceptions.ConnectionError: return False if response.status_code == 200: image.write(response.content) image.flush() return image return def import_header_image(self, entry, items, image_id): self.stdout.write('\tImport header images....') for item in items: post_type = item.find(u'{{{0:s}}}post_type'.format(WP_NS)).text if post_type == 'attachment' and item.find(u'{{{0:s}}}post_id'.format(WP_NS)).text == image_id: title = item.find('title').text image_url = item.find(u'{{{0:s}}}attachment_url'.format(WP_NS)).text image = self._import_image(image_url) if image: new_image = WagtailImage(file=File(file=image), title=title) new_image.save() entry.header_image = new_image entry.save() def _image_to_embed(self, image): return '<embed alt="{}" embedtype="image" format="fullwidth" id="{}"/>'.format(image.title, image.id) def process_content_image(self, content): self.stdout.write('\tGenerate and replace entry content images....') if content: root = lxml.html.fromstring(content) for img_node in root.iter('img'): parent_node = img_node.getparent() if 'wp-content' in img_node.attrib['src'] or 'files' in img_node.attrib['src']: image = self._import_image(img_node.attrib['src']) if image: title = img_node.attrib.get('title') or img_node.attrib.get('alt') new_image = WagtailImage(file=File(file=image), title=title) new_image.save() if parent_node.tag == 'a': parent_node.addnext(ET.XML(self._image_to_embed(new_image))) parent_node.drop_tree() else: parent_node.append(ET.XML(self._image_to_embed(new_image))) img_node.drop_tag() else: print(img_node.attrib.get('src')) parent_node.addnext(ET.XML('<pre>Image missing: {}</pre>'.format(img_node.attrib.get('src')))) content = ET.tostring(root) return content
class Command(LabelCommand): help = 'Import blog data from Wordpress' label = 'WXR file' args = 'wordpress.xml' SITE = Site.objects.get_current() def add_arguments(self, parser): parser.add_argument('wxr_file') parser.add_argument('--slug', default='blog', help="Slug of the blog.") parser.add_argument('--title', default='Blog', help="Title of the blog.") def handle(self, wxr_file, **options): global WP_NS self.get_blog_page(options['slug'], options['title']) self.tree = ET.parse(wxr_file) WP_NS = WP_NS % self.get_wordpress_version(self.tree) self.import_authors(self.tree) self.categories = self.import_categories( self.tree.findall(u'channel/{{{0:s}}}category'.format(WP_NS))) self.import_entries(self.tree.findall('channel/item')) def get_wordpress_version(self, tree): """ Get the wxr version used on the imported wordpress xml. """ for v in ('1.2', '1.1', '1.0'): try: tree.find(u'channel/{{{0:s}}}wxr_version'.format(WP_NS % v)).text return v except AttributeError: pass raise CommandError('Cannot resolve the wordpress namespace') def import_authors(self, tree): self.stdout.write('Importing authors...') post_authors = set() for item in tree.findall('channel/item'): post_type = item.find(u'{{{0:s}}}post_type'.format(WP_NS)).text if post_type == 'post': post_authors.add( item.find( '{http://purl.org/dc/elements/1.1/}creator').text) self.authors = {} for post_author in post_authors: self.authors[post_author] = self.import_author( post_author.replace(' ', '-')) def import_author(self, author_name): action_text = u"The author '{0:s}' needs to be migrated to an user:\n" \ u"1. Use an existing user ?\n" \ u"2. Create a new user ?\n" \ u"Please select a choice: ".format(author_name) User = get_user_model() while True: selection = str(input(action_text)) if selection and selection in '12': break if selection == '1': users = User.objects.all() if users.count() == 1: username = users[0].get_username() preselected_user = username usernames = [username] usernames_display = [u'[{0:s}]'.format(username)] else: usernames = [] usernames_display = [] preselected_user = None for user in users: username = user.get_username() if username == author_name: usernames_display.append(u'[{0:s}]'.format(username)) preselected_user = username else: usernames_display.append(username) usernames.append(username) while True: user_text = u"1. Select your user, by typing " \ u"one of theses usernames:\n" \ u"{0:s} or 'back'\n" \ u"Please select a choice: ".format(', '.join(usernames_display)) user_selected = input(user_text) if user_selected in usernames: break if user_selected == '' and preselected_user: user_selected = preselected_user break if user_selected.strip() == 'back': return self.import_author(author_name) return users.get(**{users[0].USERNAME_FIELD: user_selected}) else: create_text = u"2. Please type the email of " \ u"the '{0:s}' user or 'back': ".format(author_name) author_mail = input(create_text) if author_mail.strip() == 'back': return self.import_author(author_name) try: return User.objects.create_user(author_name, author_mail) except IntegrityError: return User.objects.get(**{User.USERNAME_FIELD: author_name}) def get_blog_page(self, slug, title): # Create blog page try: self.blogpage = BlogPage.objects.get(slug=slug) except BlogPage.DoesNotExist: # Get root page rootpage = Page.objects.first() # Set site root page as root site page site = Site.objects.first() site.root_page = rootpage site.save() # Get blogpage content type self.blogpage = BlogPage(title=title, slug=slug) rootpage.add_child(instance=self.blogpage) revision = rootpage.save_revision() revision.publish() def import_categories(self, category_nodes): self.stdout.write('Importing categories...') categories = {} for category_node in category_nodes: title = category_node.find( u'{{{0:s}}}cat_name'.format(WP_NS)).text[:255] slug = category_node.find( u'{{{0:s}}}category_nicename'.format(WP_NS)).text[:255] try: parent = category_node.find( u'{{{0:s}}}category_parent'.format(WP_NS)).text[:255] except TypeError: parent = None self.stdout.write(u'\t\t{0:s}'.format(title)) category, created = PuputCategory.objects.update_or_create( name=title, defaults={ 'slug': slug, 'parent': categories.get(parent) }) categories[title] = category return categories def import_entry_tags(self, tags, page): self.stdout.write("\tImporting tags...") for tag in tags: domain = tag.attrib.get('domain', 'category') if 'tag' in domain and tag.attrib.get('nicename'): self.stdout.write(u'\t\t{}'.format(tag.text)) puput_tag, created = PuputTag.objects.update_or_create( name=tag.text) page.entry_tags.add(PuputTagEntryPage(tag=puput_tag)) def import_entry_categories(self, category_nodes, page): for category_node in category_nodes: domain = category_node.attrib.get('domain') if domain == 'category': puput_category = PuputCategory.objects.get( name=category_node.text) PuputCategoryEntryPage.objects.get_or_create( category=puput_category, page=page) def import_entry(self, title, content, items, item_node): creation_date = datetime.strptime( item_node.find(u'{{{0:s}}}post_date'.format(WP_NS)).text, '%Y-%m-%d %H:%M:%S') if settings.USE_TZ: creation_date = timezone.make_aware(creation_date, pytz.timezone('GMT')) excerpt = strip_tags( item_node.find(u'{{{0:s}excerpt/}}encoded'.format(WP_NS)).text or '') if not excerpt and content: excerpt = Truncator(content).words(50) slug = slugify(title)[:255] or u'post-{0:s}'.format( item_node.find(u'{{{0:s}}}post_id'.format(WP_NS)).text) creator = item_node.find( '{http://purl.org/dc/elements/1.1/}creator').text try: entry_date = datetime.strptime( item_node.find(u'{{{0:s}}}post_date_gmt'.format(WP_NS)).text, '%Y-%m-%d %H:%M:%S') except ValueError: entry_date = datetime.strptime( item_node.find(u'{{{0:s}}}post_date'.format(WP_NS)).text, '%Y-%m-%d %H:%M:%S') # Create page try: page = EntryPage.objects.get(slug=slug) except EntryPage.DoesNotExist: page = EntryPage( title=title, body=content, excerpt=strip_tags(excerpt), slug=slug, go_live_at=entry_date, first_published_at=creation_date, date=creation_date, owner=self.authors.get(creator), seo_title=title, search_description=excerpt, live=item_node.find( u'{{{0:s}}}status'.format(WP_NS)).text == 'publish') self.blogpage.add_child(instance=page) revision = self.blogpage.save_revision() revision.publish() self.import_entry_tags(item_node.findall('category'), page) self.import_entry_categories(item_node.findall('category'), page) # Import header image image_id = self.find_image_id( item_node.findall(u'{{{0:s}}}postmeta'.format(WP_NS))) if image_id: self.import_header_image(page, items, image_id) page.save() page.save_revision(changed=False) def find_image_id(self, metadatas): for meta in metadatas: if meta.find(u'{{{0:s}}}meta_key'.format( WP_NS)).text == '_thumbnail_id': return meta.find(u'{{{0:s}}}meta_value'.format(WP_NS)).text def import_entries(self, items): self.stdout.write("Importing entries...") for item_node in items: title = (item_node.find('title').text or '')[:255] post_type = item_node.find( u'{{{0:s}}}post_type'.format(WP_NS)).text content = item_node.find( '{http://purl.org/rss/1.0/modules/content/}encoded').text if post_type == 'post' and content and title: self.stdout.write(u'\t{0:s}'.format(title)) content = self.process_content_image(content) self.import_entry(title, content, items, item_node) def _import_image(self, image_url): image = NamedTemporaryFile(delete=True) try: response = requests.get(image_url) if response.status_code == 200: image.write(response.content) image.flush() return image except requests.exceptions.ConnectionError: self.stdout.write( 'WARNING: Unable to connect to URL "{}". Image will be broken.' .format(image_url)) return def import_header_image(self, entry, items, image_id): self.stdout.write('\tImport header images....') for item in items: post_type = item.find(u'{{{0:s}}}post_type'.format(WP_NS)).text if post_type == 'attachment' and item.find( u'{{{0:s}}}post_id'.format(WP_NS)).text == image_id: title = item.find('title').text image_url = item.find( u'{{{0:s}}}attachment_url'.format(WP_NS)).text image = self._import_image(image_url) if image: new_image = WagtailImage(file=File(file=image), title=title) new_image.save() entry.header_image = new_image entry.save() def _image_to_embed(self, image): return u'<embed alt="{}" embedtype="image" format="fullwidth" id="{}"/>'.format( image.title, image.id) def process_content_image(self, content): self.stdout.write('\tGenerate and replace entry content images....') if content: root = lxml.html.fromstring(content) for img_node in root.iter('img'): parent_node = img_node.getparent() if 'wp-content' in img_node.attrib[ 'src'] or 'files' in img_node.attrib['src']: image = self._import_image(img_node.attrib['src']) if image: title = img_node.attrib.get( 'title') or img_node.attrib.get('alt') new_image = WagtailImage(file=File(file=image), title=title) new_image.save() if parent_node.tag == 'a': parent_node.addnext( ET.XML(self._image_to_embed(new_image))) parent_node.drop_tree() else: parent_node.append( ET.XML(self._image_to_embed(new_image))) img_node.drop_tag() content = ET.tostring(root) return content
class Command(BaseCommand): help = "Import blog data from Zinnia" entries = {} def add_arguments(self, parser): parser.add_argument('--slug', default='blog', help="Slug of the blog.") parser.add_argument('--title', default='Blog', help="Title of the blog.") def handle(self, *args, **options): self.get_blog_page(options['slug'], options['title']) self.import_categories() self.import_entries() self.import_related_entries() def get_blog_page(self, slug, title): # Create blog page try: self.blogpage = BlogPage.objects.get(slug=slug) except BlogPage.DoesNotExist: # Get root page rootpage = Page.objects.first() # Set site root page as root site page site = Site.objects.first() site.root_page = rootpage site.save() # Get blogpage content type self.blogpage = BlogPage( title=title, slug=slug, ) rootpage.add_child(instance=self.blogpage) revision = rootpage.save_revision() revision.publish() def import_categories(self): self.stdout.write("Importing categories...") categories = ZinniaCategory.objects.all() for category in categories: self.stdout.write("\t{}".format(category)) puput_category, created = PuputCategory.objects.update_or_create( name=category.title, slug=category.slug, description=category.description ) puput_category.save() def import_entries(self): self.stdout.write("Importing entries...") entries = ZinniaEntry.objects.all() for entry in entries: self.stdout.write(entry.title) # Header images if entry.image: header_image = WagtailImage(file=entry.image, title=os.path.basename(entry.image.url)) self.stdout.write('\tImported header image: {}'.format(entry.image)) header_image.save() else: header_image = None self.stdout.write('\tGenerate and replace entry content images....') if entry.content: root = lxml.html.fromstring(entry.content) for el in root.iter('img'): if el.attrib['src'].startswith(settings.MEDIA_URL): old_image = el.attrib['src'].replace(settings.MEDIA_URL, '') with open('{}/{}'.format(settings.MEDIA_ROOT, old_image), 'r') as image_file: new_image = WagtailImage(file=File(file=image_file, name=os.path.basename(old_image)), title=os.path.basename(old_image)) new_image.save() el.attrib['src'] = new_image.file.url self.stdout.write('\t\t{}'.format(new_image.file.url)) # New content with images replaced content = lxml.html.tostring(root, pretty_print=True) else: content = entry.content # Create page try: page = EntryPage.objects.get(slug=entry.slug) except EntryPage.DoesNotExist: page = EntryPage( title=entry.title, body=content, slug=entry.slug, go_live_at=entry.start_publication, expire_at=entry.end_publication, first_published_at=entry.creation_date, date=entry.creation_date, owner=entry.authors.first(), seo_title=entry.title, search_description=entry.excerpt, live=entry.is_visible, header_image=header_image ) self.blogpage.add_child(instance=page) revision = self.blogpage.save_revision() revision.publish() self.import_entry_categories(entry, page) self.import_entry_tags(entry, page) page.save() page.save_revision(changed=False) self.entries[entry.pk] = page def import_related_entries(self): self.stdout.write("Importing related entries...") entries = ZinniaEntry.objects.all() for entry in entries: for related_entry in entry.related.all(): EntryPageRelated.objects.get_or_create(entrypage_from=self.entries[entry.pk], entrypage_to=self.entries[related_entry.pk]) def import_entry_categories(self, entry, page): self.stdout.write("\tImporting categories...") for category in entry.categories.all(): self.stdout.write('\t\tAdd category: {}'.format(category.title)) puput_category = PuputCategory.objects.get(name=category.title) PuputCategoryEntryPage.objects.get_or_create(category=puput_category, page=page) def import_entry_tags(self, entry, page): self.stdout.write("\tImporting tags...") for tag in entry.tags_list: self.stdout.write('\t\t{}'.format(tag)) puput_tag, created = PuputTag.objects.update_or_create(name=tag) page.entry_tags.add(PuputTagEntryPage(tag=puput_tag))
class Command(NoArgsCommand): help = 'Import blog data from Blogger.' option_list = NoArgsCommand.option_list + ( make_option('--slug', default='blog', help="Slug of the blog."), make_option('--title', default='Blog', help="Title of the blog."), make_option('--blogger_blog_id', dest='blogger_blog_id', default='', help='Id of the Blogger blog to import.'), make_option('--blogger_api_key', dest='blogger_api_key', default='', help='API Key of the Blogger blog to import.') ) SITE = Site.objects.get_current() def handle_noargs(self, **options): self.blogger_blog_id = options.get('blogger_blog_id') self.blogger_api_key = options.get('blogger_api_key') self.get_blog_page(options['slug'], options['title']) self.blogger_entries = self.get_blogger_entries() self.import_authors() self.import_entries() def get_blog_page(self, slug, title): # Create blog page try: self.blogpage = BlogPage.objects.get(slug=slug) except BlogPage.DoesNotExist: # Get root page rootpage = Page.objects.first() # Set site root page as root site page site = Site.objects.first() site.root_page = rootpage site.save() # Get blogpage content type self.blogpage = BlogPage(title=title, slug=slug) rootpage.add_child(instance=self.blogpage) revision = rootpage.save_revision() revision.publish() def import_authors(self): self.stdout.write('Importing authors...') entry_authors = set() for entry in self.blogger_entries: entry_authors.add(entry['author']['displayName']) self.stdout.write(u'{0:d} authors found.'.format(len(entry_authors))) self.authors = {} for entry_author in entry_authors: self.authors[entry_author] = self.import_author(entry_author.replace(' ', '-')) def import_author(self, author_name): action_text = u"The author '{0:s}' needs to be migrated to an user:\n" \ u"1. Use an existing user ?\n" \ u"2. Create a new user ?\n" \ u"Please select a choice: ".format(author_name) User = get_user_model() while True: selection = str(input(action_text)) if selection and selection in '12': break if selection == '1': users = User.objects.all() if users.count() == 1: username = users[0].get_username() preselected_user = username usernames = [username] usernames_display = [u'[{0:s}]'.format(username)] else: usernames = [] usernames_display = [] preselected_user = None for user in users: username = user.get_username() if username == author_name: usernames_display.append(u'[{0:s}]'.format(username)) preselected_user = username else: usernames_display.append(username) usernames.append(username) while True: user_text = u"1. Select your user, by typing " \ u"one of theses usernames:\n" \ u"{0:s} or 'back'\n" \ u"Please select a choice: ".format(', '.join(usernames_display)) user_selected = input(user_text) if user_selected in usernames: break if user_selected == '' and preselected_user: user_selected = preselected_user break if user_selected.strip() == 'back': return self.import_author(author_name) return users.get(**{users[0].USERNAME_FIELD: user_selected}) else: create_text = u"2. Please type the email of " \ u"the '{0:s}' user or 'back': ".format(author_name) author_mail = input(create_text) if author_mail.strip() == 'back': return self.import_author(author_name) try: return User.objects.create_user(author_name, author_mail) except IntegrityError: return User.objects.get(**{User.USERNAME_FIELD: author_name}) def get_blogger_entries(self): res = requests.get(BLOGGER_URL.format(self.blogger_blog_id, self.blogger_api_key)) if res.status_code == 200: return res.json()['items'] def import_entry_tags(self, tags, entry): for tag in tags: puput_tag, created = PuputTag.objects.update_or_create(name=tag) entry.entry_tags.add(PuputTagEntryPage(tag=puput_tag)) def import_entries(self): self.stdout.write('Importing entries...') for entry in self.blogger_entries: content = entry['content'] or '' content = self.process_content_image(content) excerpt = Truncator(content).words(50) or '' slug = slugify(entry['title']) try: page = EntryPage.objects.get(slug=slug) except EntryPage.DoesNotExist: entry_author = entry['author']['displayName'].replace(' ', '-') page = EntryPage( title=entry['title'], body=content, excerpt=strip_tags(excerpt), slug=slugify(entry['title']), go_live_at=entry['published'], first_published_at=entry['published'], date=entry['published'], owner=self.authors[entry_author], seo_title=entry['title'], search_description=excerpt, live=entry['published']) self.blogpage.add_child(instance=page) revision = self.blogpage.save_revision() revision.publish() self.import_entry_tags(entry.get('labels', []), page) page.save() def _import_image(self, image_url): image = NamedTemporaryFile(delete=True) response = requests.get(image_url) if response.status_code == 200: image.write(response.content) image.flush() return image return def _image_to_embed(self, image): return u'<embed alt="{}" embedtype="image" format="fullwidth" id="{}"/>'.format(image.title, image.id) def process_content_image(self, content): self.stdout.write('\tGenerate and replace entry content images....') if content: root = lxml.html.fromstring(content) for img_node in root.iter('img'): parent_node = img_node.getparent() if 'bp.blogspot.com' in img_node.attrib['src']: self.stdout.write('\t\t{}'.format(img_node.attrib['src'])) image = self._import_image(img_node.attrib['src']) if image: title = img_node.attrib['src'].rsplit('/', 1)[1] new_image = WagtailImage(file=File(file=image), title=title) new_image.save() if parent_node.tag == 'a': parent_node.addnext(ET.XML(self._image_to_embed(new_image))) parent_node.drop_tree() else: parent_node.append(ET.XML(self._image_to_embed(new_image))) img_node.drop_tag() content = ET.tostring(root) return content
class Command(NoArgsCommand): help = 'Import blog data from Blogger.' option_list = NoArgsCommand.option_list + ( make_option('--slug', default='blog', help="Slug of the blog."), make_option('--title', default='Blog', help="Title of the blog."), make_option('--blogger_blog_id', dest='blogger_blog_id', default='', help='Id of the Blogger blog to import.'), make_option('--blogger_api_key', dest='blogger_api_key', default='', help='API Key of the Blogger blog to import.')) SITE = Site.objects.get_current() def handle_noargs(self, **options): self.blogger_blog_id = options.get('blogger_blog_id') self.blogger_api_key = options.get('blogger_api_key') self.get_blog_page(options['slug'], options['title']) self.blogger_entries = self.get_blogger_entries() self.import_authors() self.import_entries() def get_blog_page(self, slug, title): # Create blog page try: self.blogpage = BlogPage.objects.get(slug=slug) except BlogPage.DoesNotExist: # Get root page rootpage = Page.objects.first() # Set site root page as root site page site = Site.objects.first() site.root_page = rootpage site.save() # Get blogpage content type self.blogpage = BlogPage(title=title, slug=slug) rootpage.add_child(instance=self.blogpage) revision = rootpage.save_revision() revision.publish() def import_authors(self): self.stdout.write('Importing authors...') entry_authors = set() for entry in self.blogger_entries: entry_authors.add(entry['author']['displayName']) self.stdout.write(u'{0:d} authors found.'.format(len(entry_authors))) self.authors = {} for entry_author in entry_authors: self.authors[entry_author] = self.import_author( entry_author.replace(' ', '-')) def import_author(self, author_name): action_text = u"The author '{0:s}' needs to be migrated to an user:\n" \ u"1. Use an existing user ?\n" \ u"2. Create a new user ?\n" \ u"Please select a choice: ".format(author_name) User = get_user_model() while True: selection = str(input(action_text)) if selection and selection in '12': break if selection == '1': users = User.objects.all() if users.count() == 1: username = users[0].get_username() preselected_user = username usernames = [username] usernames_display = [u'[{0:s}]'.format(username)] else: usernames = [] usernames_display = [] preselected_user = None for user in users: username = user.get_username() if username == author_name: usernames_display.append(u'[{0:s}]'.format(username)) preselected_user = username else: usernames_display.append(username) usernames.append(username) while True: user_text = u"1. Select your user, by typing " \ u"one of theses usernames:\n" \ u"{0:s} or 'back'\n" \ u"Please select a choice: ".format(', '.join(usernames_display)) user_selected = input(user_text) if user_selected in usernames: break if user_selected == '' and preselected_user: user_selected = preselected_user break if user_selected.strip() == 'back': return self.import_author(author_name) return users.get(**{users[0].USERNAME_FIELD: user_selected}) else: create_text = u"2. Please type the email of " \ u"the '{0:s}' user or 'back': ".format(author_name) author_mail = input(create_text) if author_mail.strip() == 'back': return self.import_author(author_name) try: return User.objects.create_user(author_name, author_mail) except IntegrityError: return User.objects.get(**{User.USERNAME_FIELD: author_name}) def get_blogger_entries(self): res = requests.get( BLOGGER_URL.format(self.blogger_blog_id, self.blogger_api_key)) if res.status_code == 200: return res.json()['items'] def import_entry_tags(self, tags, entry): for tag in tags: puput_tag, created = PuputTag.objects.update_or_create(name=tag) entry.entry_tags.add(PuputTagEntryPage(tag=puput_tag)) def import_entries(self): self.stdout.write('Importing entries...') for entry in self.blogger_entries: content = entry['content'] or '' content = self.process_content_image(content) excerpt = Truncator(content).words(50) or '' slug = slugify(entry['title']) try: page = EntryPage.objects.get(slug=slug) except EntryPage.DoesNotExist: entry_author = entry['author']['displayName'].replace(' ', '-') page = EntryPage(title=entry['title'], body=content, excerpt=strip_tags(excerpt), slug=slugify(entry['title']), go_live_at=entry['published'], first_published_at=entry['published'], date=entry['published'], owner=self.authors[entry_author], seo_title=entry['title'], search_description=excerpt, live=entry['published']) self.blogpage.add_child(instance=page) revision = self.blogpage.save_revision() revision.publish() self.import_entry_tags(entry.get('labels', []), page) page.save() def _import_image(self, image_url): image = NamedTemporaryFile(delete=True) response = requests.get(image_url) if response.status_code == 200: image.write(response.content) image.flush() return image return def _image_to_embed(self, image): return u'<embed alt="{}" embedtype="image" format="fullwidth" id="{}"/>'.format( image.title, image.id) def process_content_image(self, content): self.stdout.write('\tGenerate and replace entry content images....') if content: root = lxml.html.fromstring(content) for img_node in root.iter('img'): parent_node = img_node.getparent() if 'bp.blogspot.com' in img_node.attrib['src']: self.stdout.write('\t\t{}'.format(img_node.attrib['src'])) image = self._import_image(img_node.attrib['src']) if image: title = img_node.attrib['src'].rsplit('/', 1)[1] new_image = WagtailImage(file=File(file=image), title=title) new_image.save() if parent_node.tag == 'a': parent_node.addnext( ET.XML(self._image_to_embed(new_image))) parent_node.drop_tree() else: parent_node.append( ET.XML(self._image_to_embed(new_image))) img_node.drop_tag() content = ET.tostring(root) return content
class Command(NoArgsCommand): """ Command object for importing a Blogger blog into Puput via Google's gdata API. """ help = 'Import a Blogger blog into Puput.' option_list = NoArgsCommand.option_list + ( make_option('--blogger_title', dest='blogger_title', default='', help='The tittle of blog the blogger'), make_option('--blogger_slug', dest='blogger_slug', default='', help='The slug of blog the blogger'), make_option('--blogger_blog_id', dest='blogger_blog_id', default='', help='The id of the Blogger blog to import'), make_option('--blogger_api_key', dest='blogger_api_key', default='', help='The API of the Blogger blog to import'), make_option('--noautoexcerpt', action='store_false', dest='auto_excerpt', default=True, help='Do NOT generate an excerpt.')) SITE = Site.objects.get_current() def handle_noargs(self, **options): self.blogger_title = options.get('blogger_title') self.blogger_slug = options.get('blogger_slug') self.blogger_blog_id = options.get('blogger_blog_id') self.blogger_api_key = options.get('blogger_api_key') self.auto_excerpt = options.get('auto-excerpt', True) self.stdout.write("Starting migration from Blogger to Puput %s:\n") self.get_blog_page(options['blogger_slug'], options['blogger_title']) if not self.blogger_blog_id: self.blogger_blog_id = input('Blogger ID: ') if not self.blogger_blog_id: raise CommandError('Invalid Blogger ID') if not self.blogger_api_key: self.blogger_api_key = input('Blogger API Key: ') if not self.blogger_api_key: raise CommandError('Invalid Blogger API Key') self.import_authors() self.import_posts() def get_blog_page(self, slug, title): # Create blog page try: self.blogpage = BlogPage.objects.get(slug=slug) except BlogPage.DoesNotExist: # Get root page rootpage = Page.objects.first() # Set site root page as root site page site = Site.objects.first() site.root_page = rootpage site.save() # Get blogpage content type self.blogpage = BlogPage( title=title, slug=slugify(title), ) rootpage.add_child(instance=self.blogpage) revision = rootpage.save_revision() revision.publish() def import_authors(self): """ Retrieve all the authors used in posts and convert it to new or existing author and return the conversion. """ self.stdout.write('- Importing authors\n') post_authors = set() for post in self.get_posts(): post_authors.add(post['author']['displayName']) self.stdout.write(u'> {0:d} authors found.\n'.format(len(post_authors))) self.authors = {} for post_author in post_authors: self.authors[post_author] = self.migrate_author(post_author.replace(' ', '-')) def migrate_author(self, author_name): """ Handle actions for migrating the authors. """ action_text = u"The author '{0:s}' needs to be migrated to an user:\n" \ u"1. Use an existing user ?\n" \ u"2. Create a new user ?\n" \ u"Please select a choice: ".format(author_name) while True: selection = input(smart_str(action_text)) if selection and selection in '12': break if selection == '1': users = User.objects.all() if users.count() == 1: username = users[0].get_username() preselected_user = username usernames = [username] usernames_display = [u'[{0:s}]'.format(username)] else: usernames = [] usernames_display = [] preselected_user = None for user in users: username = user.get_username() if username == author_name: usernames_display.append(u'[{0:s}]'.format(username)) preselected_user = username else: usernames_display.append(username) usernames.append(username) while True: user_text = u"1. Select your user, by typing " \ u"one of theses usernames:\n" \ u"{0:s} or 'back'\n" \ u"Please select a choice: " \ .format(u', '.join(usernames_display)) user_selected = input(smart_str(user_text)) if user_selected in usernames: break if user_selected == '' and preselected_user: user_selected = preselected_user break if user_selected.strip() == 'back': return self.migrate_author(author_name) return users.get(**{users[0].USERNAME_FIELD: user_selected}) else: create_text = u"2. Please type the email of " \ u"the '{0:s}' user or 'back': ".format(author_name) author_mail = input(smart_str(create_text)) if author_mail.strip() == 'back': return self.migrate_author(author_name) try: return User.objects.create_user(author_name, author_mail) except IntegrityError: return User.objects.get(**{User.USERNAME_FIELD: author_name}) def get_posts(self): res = requests.get('https://www.googleapis.com/blogger/v3/blogs/{}/posts/?maxResults=500&key={}'.format(self.blogger_blog_id, self.blogger_api_key)) if res.status_code == 200: return res.json()['items'] def get_entry_tags(self, tags, entry): for tag in tags: puput_tag, created = PuputTag.objects.update_or_create(name=tag) entry.entry_tags.add(PuputTagEntryPage(tag=puput_tag)) def import_posts(self): self.stdout.write('- Importing entries\n') for post in self.get_posts(): content = post['content'] or '' content = self.process_content_image(content) excerpt = self.auto_excerpt and Truncator( strip_tags(smart_unicode(content))).words(50) or '' slug = slugify(post['title']) try: entry = EntryPage.objects.get(slug=slug) except EntryPage.DoesNotExist: entry = EntryPage( title=post['title'], body=content, excerpt=excerpt, slug=slugify(post['title']), go_live_at=post['published'], first_published_at=post['published'], date=post['published'], owner=User.objects.first(), seo_title=post['title'], search_description=excerpt, live=post['published']) self.blogpage.add_child(instance=entry) revision = self.blogpage.save_revision() revision.publish() self.get_entry_tags(post.get('labels', []), entry) entry.save() def _import_image(self, image_url): img = NamedTemporaryFile(delete=True) img.write(requests.get(image_url).content) img.flush() return img def _image_to_embed(self, image): return '<embed alt="{}" embedtype="image" format="fullwidth" id="{}"/>'.format(image.title, image.id) def process_content_image(self, content): self.stdout.write('\tGenerate and replace entry content images....') if content: root = lxml.html.fromstring(content) for img_node in root.iter('img'): parent_node = img_node.getparent() if 'bp.blogspot.com' in img_node.attrib['src']: self.stdout.write('\t\t{}'.format(img_node.attrib['src'])) image = self._import_image(img_node.attrib['src']) title = img_node.attrib['src'].rsplit('/', 1)[1] new_image = WagtailImage(file=File(file=image, name=title), title=title) new_image.save() if parent_node.tag == 'a': parent_node.addnext(ET.XML(self._image_to_embed(new_image))) parent_node.drop_tree() else: parent_node.append(ET.XML(self._image_to_embed(new_image))) img_node.drop_tag() content = ET.tostring(root) return content
class Command(BaseCommand): help = "Import blog data from Zinnia" entries = {} def add_arguments(self, parser): parser.add_argument('--slug', default='blog', help="Slug of the blog.") parser.add_argument('--title', default='Blog', help="Title of the blog.") def handle(self, *args, **options): self.get_blog_page(options['slug'], options['title']) self.import_categories() self.import_entries() self.import_related_entries() def get_blog_page(self, slug, title): # Create blog page try: self.blogpage = BlogPage.objects.get(slug=slug) except BlogPage.DoesNotExist: # Get root page rootpage = Page.objects.first() # Set site root page as root site page site = Site.objects.first() site.root_page = rootpage site.save() # Get blogpage content type self.blogpage = BlogPage( title=title, slug=slug, ) rootpage.add_child(instance=self.blogpage) revision = rootpage.save_revision() revision.publish() def import_categories(self): self.stdout.write("Importing categories...") categories = ZinniaCategory.objects.all() for category in categories: self.stdout.write("\t{}".format(category)) puput_category, created = PuputCategory.objects.update_or_create( name=category.title, slug=category.slug, description=category.description) puput_category.save() def import_entries(self): self.stdout.write("Importing entries...") entries = ZinniaEntry.objects.all() for entry in entries: self.stdout.write(entry.title) # Header images if entry.image: header_image = WagtailImage(file=entry.image, title=os.path.basename( entry.image.url)) self.stdout.write('\tImported header image: {}'.format( entry.image)) header_image.save() else: header_image = None self.stdout.write( '\tGenerate and replace entry content images....') if entry.content: root = lxml.html.fromstring(entry.content) for el in root.iter('img'): if el.attrib['src'].startswith(settings.MEDIA_URL): # fix media chunks path naming e.g. /media/chinks/media/stuff.jpg will fail img_path = el.attrib['src'] old_image = img_path[len(settings.MEDIA_URL):] try: with open( '{}/{}'.format(settings.MEDIA_ROOT, old_image), 'r') as image_file: new_image = WagtailImage( file=File( file=image_file, name=os.path.basename(old_image)), title=os.path.basename(old_image)) new_image.save() el.attrib['src'] = new_image.file.url self.stdout.write('\t\t{}'.format( new_image.file.url)) except Exception as e: # handle image encoding errors like none utf-8 cahrs print(e) print("error handling image, move on... entry:" + str(entry.id)) # New content with images replaced content = lxml.html.tostring(root, pretty_print=True) else: content = entry.content # decode, somehow the content is a byte array if len(content) != 0: content = content.decode() # First, convert the html to json, with the appropriate block type # we convertet the blody from a RichTextField to a StreamField import json content = json.dumps([{'type': 'html', 'value': content}]) # fix empty author entrys (puput will not render the page if no author is set) author = entry.authors.first() if author == None: from zinnia.models.author import Author author = Author.objects.first() # Create page try: page = EntryPage.objects.get(slug=entry.slug) except EntryPage.DoesNotExist: page = EntryPage( title=entry.title, body=content, #fix missing excerpt transfer excerpt=entry.excerpt, slug=entry.slug, go_live_at=entry.start_publication, expire_at=entry.end_publication, first_published_at=entry.creation_date, date=entry.creation_date, owner=author, seo_title=entry.title, search_description=entry.excerpt, live=entry.is_visible, header_image=header_image) self.blogpage.add_child(instance=page) revision = self.blogpage.save_revision() revision.publish() self.import_entry_categories(entry, page) self.import_entry_tags(entry, page) page.save() page.save_revision(changed=False) self.entries[entry.pk] = page def import_related_entries(self): self.stdout.write("Importing related entries...") entries = ZinniaEntry.objects.all() for entry in entries: for related_entry in entry.related.all(): EntryPageRelated.objects.get_or_create( entrypage_from=self.entries[entry.pk], entrypage_to=self.entries[related_entry.pk]) def import_entry_categories(self, entry, page): self.stdout.write("\tImporting categories...") for category in entry.categories.all(): self.stdout.write('\t\tAdd category: {}'.format(category.title)) puput_category = PuputCategory.objects.get(name=category.title) PuputCategoryEntryPage.objects.get_or_create( category=puput_category, page=page) def import_entry_tags(self, entry, page): self.stdout.write("\tImporting tags...") for tag in entry.tags_list: self.stdout.write('\t\t{}'.format(tag)) puput_tag, created = PuputTag.objects.update_or_create(name=tag) page.entry_tags.add(PuputTagEntryPage(tag=puput_tag))