def _TranslateComment(self, xml_comment, user_map): comment_id = xml_comment.getAttribute('id') comment_entry = gdata.GDataEntry() comment_entry.id = atom.Id(text='comment-%s' % comment_id) comment_entry.link.append( atom.Link(href=DUMMY_URI, rel='self', link_type=ATOM_TYPE)) comment_entry.link.append( atom.Link(href=DUMMY_URI, rel='alternate', link_type=ATOM_TYPE)) comment_entry.author = atom.Author( atom.Name(text=user_map.GetUser(comment_id))) comment_entry.category.append( atom.Category(scheme=CATEGORY_KIND, term=COMMENT_KIND)) comment_body = self._TranslateContent( self._GetText(xml_comment.getElementsByTagName('body')[0])) comment_entry.content = atom.Content(content_type='html', text=comment_body) comment_entry.published = atom.Published( text=self._GetText(xml_comment.getElementsByTagName('date')[0])) comment_entry.updated = atom.Updated( text=self._GetText(xml_comment.getElementsByTagName('date')[0])) subject = xml_comment.getElementsByTagName('subject') if subject: subject = self._GetText(subject[0]) else: subject = self._CreateSnippet(comment_body) comment_entry.title = atom.Title(text=subject) comment_entry.extension_elements.append( InReplyTo('post-%s' % xml_comment.getAttribute('jitemid'))) return comment_entry
def _build_dummy_entry(): entry = atom.Entry() entry.id = atom.Id("http://11870.com") entry.title = atom.Title() entry.content = atom.Content() entry.author = atom.Author(name=atom.Name("11870.com")) entry.updated = atom.Updated(text="2000-01-01T00:00:00.000Z") return entry
def _TranslatePost(self, lj_event): post_entry = gdata.GDataEntry() post_entry.id = atom.Id(text='post-%d' % lj_event['itemid']) post_entry.link.append( atom.Link(href=DUMMY_URI, rel='self', link_type=ATOM_TYPE)) post_entry.link.append( atom.Link(href=lj_event['url'], rel='alternate', link_type=ATOM_TYPE)) post_entry.author = atom.Author(atom.Name(text=self.username)) post_entry.category.append( atom.Category(scheme=CATEGORY_KIND, term=POST_KIND)) post_entry.published = atom.Published( text=self._ToBlogTime(self._FromLjTime(lj_event['eventtime']))) post_entry.updated = atom.Updated( text=self._ToBlogTime(self._FromLjTime(lj_event['eventtime']))) content = lj_event['event'] if isinstance(lj_event['event'], xmlrpclib.Binary): content = lj_event['event'].data post_entry.content = atom.Content(content_type='html', text=self._TranslateContent(content)) subject = lj_event.get('subject', None) if not subject: subject = self._CreateSnippet(content) if not isinstance(subject, basestring): subject = str(subject) post_entry.title = atom.Title(text=subject) # Turn the taglist into individual labels taglist = lj_event['props'].get('taglist', None) if isinstance(taglist, xmlrpclib.Binary): taglist = taglist.data elif not isinstance(taglist, basestring): taglist = str(taglist) if taglist: tags = taglist.split(',') for tag in tags: post_entry.category.append( atom.Category(scheme=CATEGORY_NS, term=tag.strip())) return post_entry
def Translate(self, infile, outfile): """Performs the actual translation to a Blogger export format. Args: infile: The input MovableType export file outfile: The output file that should receive the translated document """ # Create the top-level feed object feed = BloggerGDataFeed() # Fill in the feed object with the boilerplate metadata feed.generator = atom.Generator(text='Blogger') feed.title = atom.Title(text='MovableType blog') feed.link.append( atom.Link(href=DUMMY_URI, rel='self', link_type=ATOM_TYPE)) feed.link.append( atom.Link(href=DUMMY_URI, rel='alternate', link_type=HTML_TYPE)) # Calculate the last updated time by inspecting all of the posts last_updated = 0 # These three variables keep the state as we parse the file post_entry = None # The current post atom.Entry to populate comment_entry = None # The current comment atom.Entry to populate last_entry = None # The previous post atom.Entry if exists tag_name = None # The current name of multi-line values tag_contents = '' # The contents of multi-line values # Loop through the text lines looking for key/value pairs for line in infile: # Remove whitespace line = line.strip().lstrip(codecs.BOM_UTF8) # Check for the post ending token if line == '-' * 8: if post_entry: # If the body tag is still being read, add what has been read. if tag_name == 'BODY': post_entry.content = atom.Content( content_type='html', text=self._TranslateContents(tag_contents)) # Add the post to our feed feed.entry.insert(0, post_entry) last_entry = post_entry # Reset the state variables post_entry = None comment_entry = None tag_name = None tag_contents = '' continue # Check for the tag ending separator elif line == '-' * 5: # Get the contents of the body and set the entry contents if tag_name == 'BODY': post_entry.content = atom.Content( content_type='html', text=self._TranslateContents(tag_contents)) # This is the start of the COMMENT section. Create a new entry for # the comment and add a link to the original post. elif tag_name == 'COMMENT': comment_entry.content = atom.Content( content_type='html', text=self._TranslateContents(tag_contents)) comment_entry.title = atom.Title( text=self._Encode(self._CreateSnippet(tag_contents))) comment_entry.extension_elements.append(InReplyTo(post_entry.id.text)) feed.entry.append(comment_entry) comment_entry = None # Get the contents of the extended body and append it to the # entry contents elif tag_name == 'EXTENDED BODY': if post_entry: post_entry.content.text += '<br/>' + self._TranslateContents(tag_contents) elif last_entry and last_entry.content: last_entry.content.text += '<br/>' + self._TranslateContents(tag_contents) # Convert any keywords (comma separated values) into Blogger labels elif tag_name == 'KEYWORDS': for keyword in tag_contents.split(','): keyword = keyword.strip() if keyword != '' and len(post_entry.category) < 20: post_entry.category.append( atom.Category(scheme=CATEGORY_NS, term=keyword)) # Reset the current tag and its contents tag_name = None tag_contents = '' continue # Split the line into key/value pairs elems = line.split(':') key = elems[0] value = '' if len(elems) > 1: value = ':'.join(elems[1:]).strip() # The author key indicates the start of a post as well as the author of # the post entry or comment if key == 'AUTHOR': # Create a new entry entry = gdata.GDataEntry() entry.link.append( atom.Link(href=DUMMY_URI, rel='self', link_type=ATOM_TYPE)) entry.link.append( atom.Link(href=DUMMY_URI, rel='alternate', link_type=HTML_TYPE)) entry.id = atom.Id('post-' + self._GetNextId()) # Add the author's name author_name = self._Encode(value) if not author_name: author_name = 'Anonymous' entry.author = atom.Author(atom.Name(text=author_name)) # Add the appropriate kind, either a post or a comment if tag_name == 'COMMENT': entry.category.append( atom.Category(scheme=CATEGORY_KIND, term=COMMENT_KIND)) comment_entry = entry else: entry.category.append( atom.Category(scheme=CATEGORY_KIND, term=POST_KIND)) post_entry = entry # The title only applies to new posts elif key == 'TITLE' and tag_name != 'PING': post_entry.title = atom.Title(text=self._Encode(value)) # If the status is a draft, mark it as so in the entry. If the status # is 'Published' there's nothing to do here elif key == 'STATUS': if value == 'Draft': post_entry.control = atom.Control(atom.Draft('yes')) # Turn categories into labels elif key == 'CATEGORY': if value != '' and len(post_entry.category) < 20: post_entry.category.append( atom.Category(scheme=CATEGORY_NS, term=value)) # Convert the date and specify it as the published/updated time elif key == 'DATE' and tag_name != 'PING': time_val = self._FromMtTime(value) entry = post_entry if tag_name == 'COMMENT': entry = comment_entry entry.published = atom.Published(self._ToBlogTime(time_val)) entry.updated = atom.Updated(self._ToBlogTime(time_val)) # Check to see if this was the last post published (so far) seconds = time.mktime(time_val) last_updated = max(seconds, last_updated) # Convert all tags into Blogger labels elif key == 'TAGS': for keyword in value.split(','): keyword = keyword.strip() if keyword != '' and len(post_entry.category) < 20: post_entry.category.append( atom.Category(scheme=CATEGORY_NS, term=keyword)) # Update the author's email if it is present and not empty elif tag_name == 'COMMENT' and key == 'EMAIL' and len(value) > 0: comment_entry.author.email = atom.Email(text=value) # Update the author's URI if it is present and not empty elif tag_name == 'COMMENT' and key == 'URL' and len(value) > 0: comment_entry.author.uri = atom.Uri(text=value) # If any of these keys are used, they contain information beyond this key # on following lines elif key in ('COMMENT', 'BODY', 'EXTENDED BODY', 'EXCERPT', 'KEYWORDS', 'PING'): tag_name = key # These lines can be safely ignored elif key in ('BASENAME', 'ALLOW COMMENTS', 'CONVERT BREAKS', 'ALLOW PINGS', 'PRIMARY CATEGORY', 'IP', 'URL', 'EMAIL'): continue # If the line is empty and we're processing the body, add an HTML line # break elif tag_name == 'BODY' and len(line) == 0: tag_contents += '<br/>' # This would be a line of content beyond a key/value pair elif len(key) != 0: tag_contents += line + '\n' # Update the feed with the last updated time feed.updated = atom.Updated(self._ToBlogTime(time.gmtime(last_updated))) # Serialize the feed object outfile.write(str(feed))
def endComment_Author(self, content): if self.comments: if not content: content = 'Anonymous' self.comments[0].author.append(atom.Author( atom.Name(text=content)))
def endCreator(self, content): if self.current_post: if not content: content = 'Anonymous' self.current_post.author.append( atom.Author(atom.Name(text=content)))