Example #1
0
    def endItem(self, _):
        if self.current_post:
            # Add the categories that we've collected
            self.current_post.category.extend(
                [atom.Category(c, CATEGORY_NS) for c in self.categories])
            # Add the category specifying this as a post or a page
            term = POST_KIND
            if self.is_page:
                term = PAGE_KIND
            self.current_post.category.append(
                atom.Category(scheme=CATEGORY_KIND, term=term))
            # Check to see if we need to fill in the published time
            if not self.current_post.published:
                blogger_time = self._ToBlogTime(time.gmtime(time.time()))
                self.current_post.published = atom.Published(blogger_time)
            self.feed.entry.append(self.current_post)
            # Add the comments for this post
            for comment in self.comments:
                self.feed.entry.append(comment)

        # Clear the state of the handler to take the next item
        self.categories = set()
        self.current_post = None
        self.is_page = False
        self.comments = []
Example #2
0
    def _TranslateComment(self, xml_comment, user_map):
        comment_id = xml_comment.getAttribute('id')

        comment_entry = gdata.GDataEntry()
        comment_entry.id = atom.Id(text='comment-%s' % comment_id)
        comment_entry.link.append(
            atom.Link(href=DUMMY_URI, rel='self', link_type=ATOM_TYPE))
        comment_entry.link.append(
            atom.Link(href=DUMMY_URI, rel='alternate', link_type=ATOM_TYPE))
        comment_entry.author = atom.Author(
            atom.Name(text=user_map.GetUser(comment_id)))
        comment_entry.category.append(
            atom.Category(scheme=CATEGORY_KIND, term=COMMENT_KIND))

        comment_body = self._TranslateContent(
            self._GetText(xml_comment.getElementsByTagName('body')[0]))
        comment_entry.content = atom.Content(content_type='html',
                                             text=comment_body)
        comment_entry.published = atom.Published(
            text=self._GetText(xml_comment.getElementsByTagName('date')[0]))
        comment_entry.updated = atom.Updated(
            text=self._GetText(xml_comment.getElementsByTagName('date')[0]))

        subject = xml_comment.getElementsByTagName('subject')
        if subject:
            subject = self._GetText(subject[0])
        else:
            subject = self._CreateSnippet(comment_body)
        comment_entry.title = atom.Title(text=subject)

        comment_entry.extension_elements.append(
            InReplyTo('post-%s' % xml_comment.getAttribute('jitemid')))

        return comment_entry
Example #3
0
    def _TranslatePost(self, lj_event):
        post_entry = gdata.GDataEntry()
        post_entry.id = atom.Id(text='post-%d' % lj_event['itemid'])
        post_entry.link.append(
            atom.Link(href=DUMMY_URI, rel='self', link_type=ATOM_TYPE))
        post_entry.link.append(
            atom.Link(href=lj_event['url'],
                      rel='alternate',
                      link_type=ATOM_TYPE))
        post_entry.author = atom.Author(atom.Name(text=self.username))
        post_entry.category.append(
            atom.Category(scheme=CATEGORY_KIND, term=POST_KIND))
        post_entry.published = atom.Published(
            text=self._ToBlogTime(self._FromLjTime(lj_event['eventtime'])))
        post_entry.updated = atom.Updated(
            text=self._ToBlogTime(self._FromLjTime(lj_event['eventtime'])))

        content = lj_event['event']
        if isinstance(lj_event['event'], xmlrpclib.Binary):
            content = lj_event['event'].data
        post_entry.content = atom.Content(content_type='html',
                                          text=self._TranslateContent(content))

        subject = lj_event.get('subject', None)
        if not subject:
            subject = self._CreateSnippet(content)
        if not isinstance(subject, basestring):
            subject = str(subject)
        post_entry.title = atom.Title(text=subject)

        # Turn the taglist into individual labels
        taglist = lj_event['props'].get('taglist', None)
        if isinstance(taglist, xmlrpclib.Binary):
            taglist = taglist.data
        elif not isinstance(taglist, basestring):
            taglist = str(taglist)

        if taglist:
            tags = taglist.split(',')
            for tag in tags:
                post_entry.category.append(
                    atom.Category(scheme=CATEGORY_NS, term=tag.strip()))
        return post_entry
Example #4
0
  def Translate(self, infile, outfile):
    """Performs the actual translation to a Blogger export format.

    Args:
      infile: The input MovableType export file
      outfile: The output file that should receive the translated document
    """
    # Create the top-level feed object
    feed = BloggerGDataFeed()

    # Fill in the feed object with the boilerplate metadata
    feed.generator = atom.Generator(text='Blogger')
    feed.title = atom.Title(text='MovableType blog')
    feed.link.append(
        atom.Link(href=DUMMY_URI, rel='self', link_type=ATOM_TYPE))
    feed.link.append(
        atom.Link(href=DUMMY_URI, rel='alternate', link_type=HTML_TYPE))

    # Calculate the last updated time by inspecting all of the posts
    last_updated = 0

    # These three variables keep the state as we parse the file
    post_entry = None    # The current post atom.Entry to populate
    comment_entry = None # The current comment atom.Entry to populate
    last_entry = None    # The previous post atom.Entry if exists
    tag_name = None      # The current name of multi-line values
    tag_contents = ''    # The contents of multi-line values

    # Loop through the text lines looking for key/value pairs
    for line in infile:

      # Remove whitespace
      line = line.strip().lstrip(codecs.BOM_UTF8)

      # Check for the post ending token
      if line == '-' * 8:
        if post_entry:
          # If the body tag is still being read, add what has been read.
          if tag_name == 'BODY':
            post_entry.content = atom.Content(
                content_type='html', text=self._TranslateContents(tag_contents))

          # Add the post to our feed
          feed.entry.insert(0, post_entry)
          last_entry = post_entry

        # Reset the state variables
        post_entry = None
        comment_entry = None
        tag_name = None
        tag_contents = ''
        continue

      # Check for the tag ending separator
      elif line == '-' * 5:
        # Get the contents of the body and set the entry contents
        if tag_name == 'BODY':
          post_entry.content = atom.Content(
              content_type='html', text=self._TranslateContents(tag_contents))

        # This is the start of the COMMENT section.  Create a new entry for
        # the comment and add a link to the original post.
        elif tag_name == 'COMMENT':
          comment_entry.content = atom.Content(
              content_type='html', text=self._TranslateContents(tag_contents))
          comment_entry.title = atom.Title(
            text=self._Encode(self._CreateSnippet(tag_contents)))
          comment_entry.extension_elements.append(InReplyTo(post_entry.id.text))
          feed.entry.append(comment_entry)
          comment_entry = None

        # Get the contents of the extended body and append it to the
        # entry contents
        elif tag_name == 'EXTENDED BODY':
          if post_entry:
            post_entry.content.text += '<br/>' + self._TranslateContents(tag_contents)
          elif last_entry and last_entry.content:
            last_entry.content.text += '<br/>' + self._TranslateContents(tag_contents)

        # Convert any keywords (comma separated values) into Blogger labels
        elif tag_name == 'KEYWORDS':
          for keyword in tag_contents.split(','):
            keyword = keyword.strip()
            if keyword != '' and len(post_entry.category) < 20:
              post_entry.category.append(
                  atom.Category(scheme=CATEGORY_NS, term=keyword))

        # Reset the current tag and its contents
        tag_name = None
        tag_contents = ''
        continue

      # Split the line into key/value pairs
      elems = line.split(':')
      key = elems[0]
      value = ''
      if len(elems) > 1:
        value = ':'.join(elems[1:]).strip()

      # The author key indicates the start of a post as well as the author of
      # the post entry or comment
      if key == 'AUTHOR':
        # Create a new entry
        entry = gdata.GDataEntry()
        entry.link.append(
            atom.Link(href=DUMMY_URI, rel='self', link_type=ATOM_TYPE))
        entry.link.append(
            atom.Link(href=DUMMY_URI, rel='alternate', link_type=HTML_TYPE))
        entry.id = atom.Id('post-' + self._GetNextId())
        # Add the author's name
        author_name = self._Encode(value)
        if not author_name:
          author_name = 'Anonymous'
        entry.author = atom.Author(atom.Name(text=author_name))

        # Add the appropriate kind, either a post or a comment
        if tag_name == 'COMMENT':
          entry.category.append(
              atom.Category(scheme=CATEGORY_KIND, term=COMMENT_KIND))
          comment_entry = entry
        else:
          entry.category.append(
              atom.Category(scheme=CATEGORY_KIND, term=POST_KIND))
          post_entry = entry

      # The title only applies to new posts
      elif key == 'TITLE' and tag_name != 'PING':
        post_entry.title = atom.Title(text=self._Encode(value))

      # If the status is a draft, mark it as so in the entry.  If the status
      # is 'Published' there's nothing to do here
      elif key == 'STATUS':
        if value == 'Draft':
          post_entry.control = atom.Control(atom.Draft('yes'))

      # Turn categories into labels
      elif key == 'CATEGORY':
        if value != '' and len(post_entry.category) < 20:
          post_entry.category.append(
              atom.Category(scheme=CATEGORY_NS, term=value))

      # Convert the date and specify it as the published/updated time
      elif key == 'DATE' and tag_name != 'PING':
        time_val = self._FromMtTime(value)
        entry = post_entry
        if tag_name == 'COMMENT':
          entry = comment_entry
        entry.published = atom.Published(self._ToBlogTime(time_val))
        entry.updated = atom.Updated(self._ToBlogTime(time_val))

        # Check to see if this was the last post published (so far)
        seconds = time.mktime(time_val)
        last_updated = max(seconds, last_updated)

      # Convert all tags into Blogger labels
      elif key == 'TAGS':
        for keyword in value.split(','):
          keyword = keyword.strip()
          if keyword != '' and len(post_entry.category) < 20:
            post_entry.category.append(
                atom.Category(scheme=CATEGORY_NS, term=keyword))

      # Update the author's email if it is present and not empty
      elif tag_name == 'COMMENT' and key == 'EMAIL' and len(value) > 0:
        comment_entry.author.email = atom.Email(text=value)

      # Update the author's URI if it is present and not empty
      elif tag_name == 'COMMENT' and key == 'URL' and len(value) > 0:
        comment_entry.author.uri = atom.Uri(text=value)

      # If any of these keys are used, they contain information beyond this key
      # on following lines
      elif key in ('COMMENT', 'BODY', 'EXTENDED BODY', 'EXCERPT', 'KEYWORDS', 'PING'):
        tag_name = key

      # These lines can be safely ignored
      elif key in ('BASENAME', 'ALLOW COMMENTS', 'CONVERT BREAKS',
                   'ALLOW PINGS', 'PRIMARY CATEGORY', 'IP', 'URL', 'EMAIL'):
        continue

      # If the line is empty and we're processing the body, add an HTML line
      # break
      elif tag_name == 'BODY' and len(line) == 0:
        tag_contents += '<br/>'

      # This would be a line of content beyond a key/value pair
      elif len(key) != 0:
        tag_contents += line + '\n'


    # Update the feed with the last updated time
    feed.updated = atom.Updated(self._ToBlogTime(time.gmtime(last_updated)))

    # Serialize the feed object
    outfile.write(str(feed))
Example #5
0
 def endComment_Date(self, content):
     if (self.comments and not self.comments[0].published
             and content[:4] != '0000'):
         blogger_time = self._ToBlogTime(self._WordpressDateToTime(content))
         self.comments[0].published = atom.Published(blogger_time)
Example #6
0
 def endPost_Date(self, content):
     if (self.current_post and not self.current_post.published
             and content[:4] != '0000'):
         blogger_time = self._ToBlogTime(self._WordpressDateToTime(content))
         self.current_post.published = atom.Published(blogger_time)
Example #7
0
 def endPubdate(self, content):
     if not self.current_post:
         self.feed.published = atom.Published(
             self._ToBlogTime(self._WordpressPubDateToTime(content)))
         self.feed.updated = atom.Updated(
             self._ToBlogTime(self._WordpressPubDateToTime(content)))