Beispiel #1
0
    def parse_author(self, entry):
        """Lookup the author for the given entry."""
        def _remember_author(author):
            if author.email is not None and \
               author.email not in self._authors_by_email:
                self._authors_by_email[author.email] = author
            if author.username is not None and \
               author.username not in self._authors_by_username:
                self._authors_by_username[author.username] = author

        author = entry.find(atom.author)
        email = author.findtext(atom.email)
        username = author.findtext(atom.name)

        for extension in self.extensions:
            rv = extension.lookup_author(author, entry, username, email)
            if rv is not None:
                _remember_author(rv)
                return rv

        if email is not None and email in self._authors_by_email:
            return self._authors_by_email[email]
        if username in self._authors_by_username:
            return self._authors_by_username[username]

        author = Author(username, email)
        _remember_author(author)
        self.authors.append(author)
        return author
Beispiel #2
0
 def _get_author(self, dependency):
     author = self._authors.get(dependency)
     if author is None:
         element = self._lookup_user(self._dependencies,
                                     id=str(dependency))[0]
         author = Author(element.findtext(zine.username),
                         element.findtext(zine.email),
                         element.findtext(zine.real_name),
                         element.findtext(zine.description),
                         element.findtext(zine.www),
                         element.findtext(zine.pw_hash),
                         _to_bool(element.findtext(zine.is_author)),
                         _pickle(element.findtext(zine.extra)))
         for privilege in element.findall(zine.privilege):
             p = self.app.privileges.get(privilege.text)
             if p is not None:
                 author.privileges.add(p)
         self._authors[dependency] = author
         self.parser.authors.append(author)
     return author
Beispiel #3
0
    def import_livejournal(self,
                           username,
                           password,
                           import_what=IMPORT_JOURNAL,
                           community='',
                           security_custom=SECURITY_PROTECTED,
                           categories=[],
                           getcomments=True):
        """Import from LiveJournal using specified parameters."""
        yield _(u'<p>Beginning LiveJournal import. Attempting to login...</p>')
        if import_what != IMPORT_JOURNAL:
            usejournal = community
        else:
            usejournal = None
        lj = LiveJournalConnect(username, password, usejournal)
        result = lj.login(getmoods=0)
        authors = {
            username:
            Author(username=username,
                   email='',
                   real_name=unicode(result['fullname'], 'utf-8'))
        }
        yield _(u'<p>Your name: <strong>%s</strong></p>') % \
                                                    authors[username].real_name
        moodlist = dict([(int(m['id']), unicode(str(m['name']), 'utf-8'))
                         for m in result['moods']])

        result = lj.getusertags()
        tags = dict([
            (tag, Tag(gen_slug(tag), tag))
            for tag in [unicode(t['name'], 'utf-8') for t in result['tags']]
        ])
        yield _(u'<p><strong>Tags:</strong> %s</p>') % _(u', ').join(
            tags.keys())

        ##result = lj.getdaycounts()
        ##daycounts = [(date(*strptime(item['date'], '%Y-%m-%d')[0:3]),
        ##              item['count']) for item in result['daycounts']]
        ##totalposts = sum([x[1] for x in daycounts])
        ##yield _(u'<p>Found <strong>%d</strong> posts on <strong>%d days'\
        ##        u'</strong> between %s and %s.</p>') % (
        ##                                totalposts,
        ##                                len(daycounts),
        ##                                daycounts[0][0].strftime('%Y-%m-%d'),
        ##                                daycounts[-1][0].strftime('%Y-%m-%d'))

        posts = {}

        # Process implemented as per
        # http://www.livejournal.com/doc/server/ljp.csp.entry_downloading.html
        yield _(u'<ul>')
        yield _(u'<li>Getting metadata...</li>')
        result = lj.syncitems()
        sync_items = []
        sync_total = int(result['total'])
        yield _(u'<li>%d items...</li>') % sync_total
        sync_items.extend(result['syncitems'])
        while len(sync_items) < sync_total:
            lastsync = max([
                parse_lj_date(item['time']) for item in sync_items
            ]).strftime('%Y-%m-%d %H:%M:%S')
            yield _(u'<li>Got %d items up to %s...</li>') % (len(sync_items),
                                                             lastsync)
            result = lj.syncitems(lastsync=lastsync)
            sync_items.extend(result['syncitems'])
        yield _(u'<li>Got all %d items.</li>') % len(sync_items)
        yield _(u'</ul>')
        #: Discard non-journal items.
        sync_items = [i for i in sync_items if i['item'].startswith('L-')]
        yield _(u'<p>Downloading <strong>%d</strong> entries...</p>') % len(
            sync_items)
        # Track what items we need to get
        sync_data = {}
        for item in sync_items:
            sync_data[int(item['item'][2:])] = {
                'downloaded': False,
                'time': parse_lj_date(item['time'])
            }

        # Start downloading bodies
        sync_left = [
            sync_data[x] for x in sync_data
            if sync_data[x]['downloaded'] is False
        ]
        if sync_left:
            lastsync = (min([x['time'] for x in sync_left]) -
                        timedelta(seconds=1)).strftime('%Y-%m-%d %H:%M:%S')
        while len(sync_left) > 0:
            yield _(u'<p>Getting a batch...</p>')
            try:
                result = lj.getevents(selecttype='syncitems',
                                      lastsync=lastsync)
            except xmlrpclib.Fault, fault:
                if fault.faultCode == 406:
                    # LJ doesn't like us. Go back one second and try again.
                    yield _(u'<p>LiveJournal says we are retrying the same '\
                            u'date and time too often. Trying again with the '\
                            u'time set behind by one second.</p>')
                    lastsync = (
                        parse_lj_date(lastsync) -
                        timedelta(seconds=1)).strftime('%Y-%m-%d %H:%M:%S')
                    continue
                else:
                    yield _(u'<p>Process failed. LiveJournal says: '\
                            u'(%d) %s</p>') % (fault.faultCode,
                                               fault.faultString)
                    break

            yield _(u'<ol start="%d">') % (len(posts) + 1)
            for item in result['events']:
                if sync_data[item['itemid']]['downloaded'] is True:
                    # Dupe, thanks to our lastsync time manipulation. Skip.
                    continue
                sync_data[item['itemid']]['downloaded'] = True
                sync_data[item['itemid']]['item'] = item

                subject = item.get('subject', '')
                if isinstance(subject, xmlrpclib.Binary):
                    subject = subject.data
                subject = unicode(str(subject), 'utf-8')
                #: LiveJournal subjects may contain HTML tags. Strip them and
                #: convert HTML entities to Unicode equivalents.
                subject = unescape(
                    tag_re.sub('', ljuser_re.sub('\\2', subject)))
                poster = item.get('poster', username)
                if poster != username and import_what != IMPORT_COMMUNITY_ALL:
                    # Discard, since we don't want this.
                    yield _(
                        u'<li><strong>Discarded:</strong> %s <em>(by %s)</em></li>'
                    ) % (subject, poster)
                    continue
                if poster not in authors:
                    authors[poster] = Author(poster, '', '')
                # Map LiveJournal security codes to Zine status flags
                security = item.get('security', 'public')
                if security == 'usemask' and item['allowmask'] == 1:
                    security = 'friends'
                if security == 'usemask':
                    status = {
                        SECURITY_DISCARD: None,
                        SECURITY_PUBLIC: STATUS_PUBLISHED,
                        SECURITY_PROTECTED: STATUS_PROTECTED,
                        SECURITY_PRIVATE: STATUS_PRIVATE
                    }[security_custom]
                    if status is None:
                        yield _(u'<li><strong>Discarded (masked):</strong> '\
                                u'%s</li>') % subject
                        continue
                else:
                    status = {
                        'public': STATUS_PUBLISHED,
                        'friends': STATUS_PROTECTED,
                        'private': STATUS_PRIVATE,
                    }[security]

                #: Read time as local timezone and then convert to UTC. Zine
                #: doesn't seem to like non-UTC timestamps in imports.
                pub_date = get_timezone().localize(
                    parse_lj_date(item['eventtime'])).astimezone(UTC)
                itemtags = [
                    t.strip() for t in unicode(
                        item['props'].get('taglist', ''), 'utf-8').split(',')
                ]
                while '' in itemtags:
                    itemtags.remove('')
                itemtags = [tags[t] for t in itemtags]
                extras = {}
                if 'current_music' in item['props']:
                    if isinstance(item['props']['current_music'],
                                  xmlrpclib.Binary):
                        extras['current_music'] = unicode(
                            item['props']['current_music'].data, 'utf-8')
                    else:
                        extras['current_music'] = unicode(
                            str(item['props']['current_music']), 'utf-8')
                if 'current_mood' in item['props']:
                    if isinstance(item['props']['current_mood'],
                                  xmlrpclib.Binary):
                        extras['current_mood'] = unicode(
                            item['props']['current_mood'].data, 'utf-8')
                    else:
                        extras['current_mood'] = unicode(
                            str(item['props']['current_mood']), 'utf-8')
                elif 'current_moodid' in item['props']:
                    extras['current_mood'] = moodlist[int(
                        item['props']['current_moodid'])]
                if 'current_coords' in item['props']:
                    if isinstance(item['props']['current_coords'],
                                  xmlrpclib.Binary):
                        extras['current_coords'] = unicode(
                            item['props']['current_coords'].data, 'utf-8')
                    else:
                        extras['current_coords'] = unicode(
                            str(item['props']['current_coords']), 'utf-8')
                if 'current_location' in item['props']:
                    if isinstance(item['props']['current_location'],
                                  xmlrpclib.Binary):
                        extras['current_location'] = unicode(
                            item['props']['current_location'].data, 'utf-8')
                    else:
                        extras['current_location'] = unicode(
                            str(item['props']['current_location']), 'utf-8')
                if 'picture_keyword' in item['props']:
                    if isinstance(item['props']['picture_keyword'],
                                  xmlrpclib.Binary):
                        extras['picture_keyword'] = unicode(
                            item['props']['picture_keyword'].data, 'utf-8')
                    else:
                        extras['picture_keyword'] = unicode(
                            str(item['props']['picture_keyword']), 'utf-8')

                extras['lj_post_id'] = item['itemid']
                extras['original_url'] = item['url']
                posts[item['itemid']] = Post(
                    #: Generate slug. If there's no subject, use '-'+itemid.
                    #: Why the prefix? Because if the user wants %year%/%month%/
                    #: for the post url format and we end up creating a slug
                    #: like 2003/12/1059, it will conflict with the archive
                    #: access path format of %Y/%m/%d and the post will become
                    #: inaccessible, since archive paths take higher priority
                    #: to slugs in zine's urls.py.
                    slug=gen_timestamped_slug(
                        gen_slug(subject) or ('-' + str(item['itemid'])),
                        'entry', pub_date),
                    title=subject,
                    link=item['url'],
                    pub_date=pub_date,
                    author=authors[poster],
                    intro='',
                    body=isinstance(item['event'], xmlrpclib.Binary)
                    and unicode(item['event'].data, 'utf-8')
                    or url_unquote_plus(str(item['event'])),
                    tags=itemtags,
                    categories=[Category(x) for x in categories],
                    comments=[],  # Will be updated later.
                    comments_enabled=not item['props'].get(
                        'opt_nocomments', False),
                    pings_enabled=False,  # LiveJournal did not support pings
                    uid='livejournal;%s;%d' %
                    (usejournal or username, item['itemid']),
                    parser=item['props'].get('opt_preformatted', False)
                    and 'html' or 'livejournal',
                    status=status,
                    extra=extras)
                yield _(u'<li>%s <em>(by %s on %s)</em></li>') % (
                    subject, poster, pub_date.strftime('%Y-%m-%d %H:%M'))
            # Done processing batch.
            yield _(u'</ol>')
            sync_left = [
                sync_data[x] for x in sync_data
                if sync_data[x]['downloaded'] is False
            ]
            if sync_left:
                lastsync = (min([x['time'] for x in sync_left]) -
                            timedelta(seconds=1)).strftime('%Y-%m-%d %H:%M:%S')
Beispiel #4
0
    def import_quills(self, blogurl, username, password):
        """Import from Quills using Zope's XML-RPC interface."""
        yield _(u'<p>Beginning Quills import. Attempting to get data...</p>')
        urlparts = urlparse.urlsplit(blogurl)
        urlnetloc = urlparts.netloc
        urlpath = urlparts.path
        if not urlpath.endswith('/'):
            urlpath += '/' # Trailing slash required for XML-RPC
        if username:
            #: We're using simple HTTP auth, which isn't the smartest thing to
            #: do, but Plone's default cookie-auth system is just a base64
            #: encoding of username:password, which isn't any better. Quills
            #: runs on Plone 2.1 and 2.5, neither of which shipped with a more
            #: secure auth mechanism, so we'll just go with what works. HTTP
            #: auth fallback has been supported by every Zope 2.x release.
            urlnetloc = '%s:%s@%s' % (username, password, urlnetloc)
        useblogurl = urlparse.urlunsplit((urlparts.scheme, urlnetloc, urlpath,
                                          '', ''))
        conn = xmlrpclib.ServerProxy(useblogurl)
        title = conn.Title()
        data = conn.zine_export()
        yield _(u'<p>Got data. Parsing for weblog entries and replies.</p>')

        tags = {}
        posts = {}
        authors = {}

        yield _(u'<ol>')
        for entry in data:
            itemtags = []
            for tag in entry['tags']:
                if tag in tags:
                    itemtags.append(tags[tag])
                else:
                    newtag = Tag(gen_slug(tag), tag)
                    tags[tag] = newtag
                    itemtags.append(newtag)
            if entry['author'] in authors:
                author = authors[entry['author']]
            else:
                author = Author(entry['author'], '', '')
                authors[entry['author']] = author
            status = PLONE_STATUS.get(entry['status'], STATUS_PUBLISHED)
            body = reunicode(entry['body'])
            description = reunicode(entry['description'])
            subject = reunicode(entry['title'])
            parser = PLONE_PARSERS.get(entry['format'], 'zeml')
            pub_date = parse_plone_date(entry['date'])

            if description:
                #: Assume description is text/plain. Anything else is unlikely
                if parser in ['zeml', 'html']:
                    body = u'<intro><p>%s</p></intro>%s' % (description, body)
                else:
                    # We don't know how this parser works, so just insert
                    # description before body, with a blank line in between
                    body = u'%s\n\n%s' % (description, body)

            comments = {}

            for comment in entry['replies']:
                c_body = reunicode(comment['body'])
                c_author = comment['author']
                if c_author in authors:
                    c_author = authors[c_author]
                #: Fix for Jace's anon comments hack
                elif c_author.startswith('!'):
                    c_author = c_author[1:]
                c_body = reunicode(comment['body'])
                c_subject = reunicode(comment['title'])
                if c_subject:
                    c_body = '%s\n\n%s' % (c_subject, c_body)

                comments[comment['id']] = Comment(
                    author = c_author,
                    body = c_body,
                    pub_date = parse_plone_date(
                                            comment['date']).astimezone(UTC),
                    author_email = None,
                    author_url = None,
                    remote_addr = None,
                    parent = comment['parent'],
                    parser = 'text',
                    status = COMMENT_MODERATED
                    )

            # Re-thread comments
            for comment in comments.values():
                comment.parent = comments.get(comment.parent, None)


            posts[entry['id']] = Post(
                slug=gen_timestamped_slug(entry['id'],
                                          'entry', pub_date),
                title=subject,
                link=entry['url'],
                pub_date=pub_date.astimezone(UTC),
                author=authors[entry['author']],
                intro=u'',
                body=body,
                tags=itemtags,
                categories=[],
                comments=comments.values(),
                comments_enabled=entry['allow_comments'],
                pings_enabled=True,
                uid=entry['id'],
                parser=parser,
                content_type='entry',
                status=status
                )
            yield _(u'<li><strong>%s</strong> (by %s; %d comments)</li>') % (
                subject, author.username, len(comments))

        yield _(u'</ol>')
        self.enqueue_dump(Blog(
            title,
            blogurl,
            '',
            'en',
            tags.values(),
            [],
            posts.values(),
            authors.values()))
        flash(_(u'Added imported items to queue.'))

        yield _(u'<p><strong>All done.</strong></p>')
Beispiel #5
0
 def get_author(name):
     if name:
         author = authors.get(name)
         if author is None:
             author = authors[name] = Author(name, None)
         return author