Beispiel #1
0
    def Translate(self, outfile):
        """Performs the actual translation to a Blogger export format.

    Args:
      outfile: The output file that should receive the translated document
    """
        # Create the top-level feed object
        feed = BloggerGDataFeed()

        # Fill in the feed object with the boilerplate metadata
        feed.generator = atom.Generator(text='Blogger')
        feed.title = atom.Title(text='LiveJournal blog')
        feed.link.append(
            atom.Link(href=DUMMY_URI, rel='self', link_type=ATOM_TYPE))
        feed.link.append(
            atom.Link(href=DUMMY_URI, rel='alternate', link_type=HTML_TYPE))
        feed.updated = atom.Updated(text=self._ToBlogTime(time.gmtime()))

        # Grab the list of posts
        posts = self._GetPosts()
        feed.entry.extend(posts)

        # Grab the list of comments
        comments = self._GetComments()
        feed.entry.extend(comments)

        # Serialize the feed object
        outfile.write(str(feed))
Beispiel #2
0
    def Translate(self, doc, outfile):
        """Performs the actual translation to a Blogger export format.

    Args:
      doc: The input WXR file as a string
      outfile: The output file that should receive the translated document
    Returns:
      A Blogger export Atom document as a string, or None on error.
    """
        # Create the top-level feed object
        self.feed = BloggerGDataFeed()
        self.feed.generator = atom.Generator(text='Blogger')
        self.elem_stack = []
        self.contents = ''
        self.outfile = outfile
        self.current_post = None
        self.is_page = False
        self.categories = set()
        self.comments = []
        try:
            xml.sax.parseString(self.RemoveMetaData(doc), self)
        except xml.sax.SAXParseException, e:
            error_string = self.GetSaxErrorString(doc, e.getLineNumber(),
                                                  e.getColumnNumber(), ON_GAE)
            if ON_GAE:
                raise RuntimeWarning(error_string)
            else:
                print error_string
Beispiel #3
0
  def Translate(self, infile, outfile):
    """Performs the actual translation to a Blogger export format.

    Args:
      infile: The input MovableType export file
      outfile: The output file that should receive the translated document
    """
    # Create the top-level feed object
    feed = BloggerGDataFeed()

    # Fill in the feed object with the boilerplate metadata
    feed.generator = atom.Generator(text='Blogger')
    feed.title = atom.Title(text='MovableType blog')
    feed.link.append(
        atom.Link(href=DUMMY_URI, rel='self', link_type=ATOM_TYPE))
    feed.link.append(
        atom.Link(href=DUMMY_URI, rel='alternate', link_type=HTML_TYPE))

    # Calculate the last updated time by inspecting all of the posts
    last_updated = 0

    # These three variables keep the state as we parse the file
    post_entry = None    # The current post atom.Entry to populate
    comment_entry = None # The current comment atom.Entry to populate
    last_entry = None    # The previous post atom.Entry if exists
    tag_name = None      # The current name of multi-line values
    tag_contents = ''    # The contents of multi-line values

    # Loop through the text lines looking for key/value pairs
    for line in infile:

      # Remove whitespace
      line = line.strip().lstrip(codecs.BOM_UTF8)

      # Check for the post ending token
      if line == '-' * 8:
        if post_entry:
          # If the body tag is still being read, add what has been read.
          if tag_name == 'BODY':
            post_entry.content = atom.Content(
                content_type='html', text=self._TranslateContents(tag_contents))

          # Add the post to our feed
          feed.entry.insert(0, post_entry)
          last_entry = post_entry

        # Reset the state variables
        post_entry = None
        comment_entry = None
        tag_name = None
        tag_contents = ''
        continue

      # Check for the tag ending separator
      elif line == '-' * 5:
        # Get the contents of the body and set the entry contents
        if tag_name == 'BODY':
          post_entry.content = atom.Content(
              content_type='html', text=self._TranslateContents(tag_contents))

        # This is the start of the COMMENT section.  Create a new entry for
        # the comment and add a link to the original post.
        elif tag_name == 'COMMENT':
          comment_entry.content = atom.Content(
              content_type='html', text=self._TranslateContents(tag_contents))
          comment_entry.title = atom.Title(
            text=self._Encode(self._CreateSnippet(tag_contents)))
          comment_entry.extension_elements.append(InReplyTo(post_entry.id.text))
          feed.entry.append(comment_entry)
          comment_entry = None

        # Get the contents of the extended body and append it to the
        # entry contents
        elif tag_name == 'EXTENDED BODY':
          if post_entry:
            post_entry.content.text += '<br/>' + self._TranslateContents(tag_contents)
          elif last_entry and last_entry.content:
            last_entry.content.text += '<br/>' + self._TranslateContents(tag_contents)

        # Convert any keywords (comma separated values) into Blogger labels
        elif tag_name == 'KEYWORDS':
          for keyword in tag_contents.split(','):
            keyword = keyword.strip()
            if keyword != '' and len(post_entry.category) < 20:
              post_entry.category.append(
                  atom.Category(scheme=CATEGORY_NS, term=keyword))

        # Reset the current tag and its contents
        tag_name = None
        tag_contents = ''
        continue

      # Split the line into key/value pairs
      elems = line.split(':')
      key = elems[0]
      value = ''
      if len(elems) > 1:
        value = ':'.join(elems[1:]).strip()

      # The author key indicates the start of a post as well as the author of
      # the post entry or comment
      if key == 'AUTHOR':
        # Create a new entry
        entry = gdata.GDataEntry()
        entry.link.append(
            atom.Link(href=DUMMY_URI, rel='self', link_type=ATOM_TYPE))
        entry.link.append(
            atom.Link(href=DUMMY_URI, rel='alternate', link_type=HTML_TYPE))
        entry.id = atom.Id('post-' + self._GetNextId())
        # Add the author's name
        author_name = self._Encode(value)
        if not author_name:
          author_name = 'Anonymous'
        entry.author = atom.Author(atom.Name(text=author_name))

        # Add the appropriate kind, either a post or a comment
        if tag_name == 'COMMENT':
          entry.category.append(
              atom.Category(scheme=CATEGORY_KIND, term=COMMENT_KIND))
          comment_entry = entry
        else:
          entry.category.append(
              atom.Category(scheme=CATEGORY_KIND, term=POST_KIND))
          post_entry = entry

      # The title only applies to new posts
      elif key == 'TITLE' and tag_name != 'PING':
        post_entry.title = atom.Title(text=self._Encode(value))

      # If the status is a draft, mark it as so in the entry.  If the status
      # is 'Published' there's nothing to do here
      elif key == 'STATUS':
        if value == 'Draft':
          post_entry.control = atom.Control(atom.Draft('yes'))

      # Turn categories into labels
      elif key == 'CATEGORY':
        if value != '' and len(post_entry.category) < 20:
          post_entry.category.append(
              atom.Category(scheme=CATEGORY_NS, term=value))

      # Convert the date and specify it as the published/updated time
      elif key == 'DATE' and tag_name != 'PING':
        time_val = self._FromMtTime(value)
        entry = post_entry
        if tag_name == 'COMMENT':
          entry = comment_entry
        entry.published = atom.Published(self._ToBlogTime(time_val))
        entry.updated = atom.Updated(self._ToBlogTime(time_val))

        # Check to see if this was the last post published (so far)
        seconds = time.mktime(time_val)
        last_updated = max(seconds, last_updated)

      # Convert all tags into Blogger labels
      elif key == 'TAGS':
        for keyword in value.split(','):
          keyword = keyword.strip()
          if keyword != '' and len(post_entry.category) < 20:
            post_entry.category.append(
                atom.Category(scheme=CATEGORY_NS, term=keyword))

      # Update the author's email if it is present and not empty
      elif tag_name == 'COMMENT' and key == 'EMAIL' and len(value) > 0:
        comment_entry.author.email = atom.Email(text=value)

      # Update the author's URI if it is present and not empty
      elif tag_name == 'COMMENT' and key == 'URL' and len(value) > 0:
        comment_entry.author.uri = atom.Uri(text=value)

      # If any of these keys are used, they contain information beyond this key
      # on following lines
      elif key in ('COMMENT', 'BODY', 'EXTENDED BODY', 'EXCERPT', 'KEYWORDS', 'PING'):
        tag_name = key

      # These lines can be safely ignored
      elif key in ('BASENAME', 'ALLOW COMMENTS', 'CONVERT BREAKS',
                   'ALLOW PINGS', 'PRIMARY CATEGORY', 'IP', 'URL', 'EMAIL'):
        continue

      # If the line is empty and we're processing the body, add an HTML line
      # break
      elif tag_name == 'BODY' and len(line) == 0:
        tag_contents += '<br/>'

      # This would be a line of content beyond a key/value pair
      elif len(key) != 0:
        tag_contents += line + '\n'


    # Update the feed with the last updated time
    feed.updated = atom.Updated(self._ToBlogTime(time.gmtime(last_updated)))

    # Serialize the feed object
    outfile.write(str(feed))