Example #1
0
def initdb():
    """Initialize the database"""
    from plnt.database import Blog, session
    make_app().init_database()
    # and now fill in some python blogs everybody should read (shamelessly
    # added my own blog too)
    blogs = [
        Blog('Armin Ronacher', 'http://lucumr.pocoo.org/',
             'http://lucumr.pocoo.org/cogitations/feed/'),
        Blog('Georg Brandl', 'http://pyside.blogspot.com/',
             'http://pyside.blogspot.com/feeds/posts/default'),
        Blog('Ian Bicking', 'http://blog.ianbicking.org/',
             'http://blog.ianbicking.org/feed/'),
        Blog('Amir Salihefendic', 'http://amix.dk/',
             'http://feeds.feedburner.com/amixdk'),
        Blog('Christopher Lenz', 'http://www.cmlenz.net/blog/',
             'http://www.cmlenz.net/blog/atom.xml'),
        Blog('Frederick Lundh', 'http://online.effbot.org/',
             'http://online.effbot.org/rss.xml')
    ]
    # okay. got tired here.  if someone feels that he is missing, drop me
    # a line ;-)
    for blog in blogs:
        session.add(blog)
    session.commit()
    click.echo(
        'Initialized database, now run manage-plnt.py sync to get the posts')
Example #2
0
def action_initdb():
    """Initialize the database"""
    from plnt.database import Blog, session
    make_app().init_database()
    # and now fill in some python blogs everybody should read (shamelessly
    # added my own blog too)
    blogs = [
        Blog('Armin Ronacher', 'http://lucumr.pocoo.org/',
             'http://lucumr.pocoo.org/cogitations/feed/'),
        Blog('Georg Brandl', 'http://pyside.blogspot.com/',
             'http://pyside.blogspot.com/feeds/posts/default'),
        Blog('Ian Bicking', 'http://blog.ianbicking.org/',
             'http://blog.ianbicking.org/feed/'),
        Blog('Amir Salihefendic', 'http://amix.dk/',
             'http://feeds.feedburner.com/amixdk'),
        Blog('Christopher Lenz', 'http://www.cmlenz.net/blog/',
             'http://www.cmlenz.net/blog/atom.xml'),
        Blog('Frederick Lundh', 'http://online.effbot.org/',
             'http://online.effbot.org/rss.xml')
    ]
    # okay. got tired here.  if someone feels that he is missing, drop me
    # a line ;-)
    for blog in blogs:
        session.add(blog)
    session.commit()
    print 'Initialized database, now run manage-plnt.py sync to get the posts'
Example #3
0
def initdb():
    """Initialize the database"""
    from plnt.database import Blog, session

    make_app().init_database()
    # and now fill in some python blogs everybody should read (shamelessly
    # added my own blog too)
    blogs = [
        Blog(
            "Armin Ronacher",
            "https://lucumr.pocoo.org/",
            "https://lucumr.pocoo.org/feed.atom",
        ),
        Blog(
            "Georg Brandl",
            "https://pyside.blogspot.com/",
            "https://pyside.blogspot.com/feeds/posts/default",
        ),
        Blog(
            "Ian Bicking",
            "https://blog.ianbicking.org/",
            "https://blog.ianbicking.org/feed/",
        ),
        Blog(
            "Amir Salihefendic",
            "http://amix.dk/",
            "https://feeds.feedburner.com/amixdk",
        ),
        Blog(
            "Christopher Lenz",
            "https://www.cmlenz.net/blog/",
            "https://www.cmlenz.net/blog/atom.xml",
        ),
        Blog(
            "Frederick Lundh",
            "https://effbot.org/",
            "https://effbot.org/rss.xml",
        ),
    ]
    # okay. got tired here.  if someone feels that they are missing, drop me
    # a line ;-)
    for blog in blogs:
        session.add(blog)
    session.commit()
    click.echo(
        "Initialized database, now run manage-plnt.py sync to get the posts")
Example #4
0
def sync():
    """
    Performs a synchronization. Articles that are already syncronized aren't
    touched anymore.
    """
    for blog in Blog.query.all():
        # parse the feed. feedparser.parse will never given an exception
        # but the bozo bit might be defined.
        feed = feedparser.parse(blog.feed_url)
        blog_author = feed.get('author') or blog.name
        blog_author_detail = feed.get('author_detail')

        for entry in feed.entries:
            # get the guid. either the id if specified, otherwise the link.
            # if none is available we skip the entry.
            guid = entry.get('id') or entry.get('link')
            if not guid:
                continue

            # get an old entry for the guid to check if we need to update
            # or recreate the item
            old_entry = Entry.query.filter_by(guid=guid).first()

            # get title, url and text. skip if no title or no text is
            # given. if the link is missing we use the blog link.
            if 'title_detail' in entry:
                title = entry.title_detail.get('value') or ''
                if entry.title_detail.get('type') in HTML_MIMETYPES:
                    title = strip_tags(title)
                else:
                    title = escape(title)
            else:
                title = entry.get('title')
            url = entry.get('link') or blog.blog_url
            text = 'content' in entry and entry.content[0] or \
                   entry.get('summary_detail')

            if not title or not text:
                continue

            # if we have an html text we use that, otherwise we HTML
            # escape the text and use that one. We also handle XHTML
            # with our tag soup parser for the moment.
            if text.get('type') not in HTML_MIMETYPES:
                text = escape(nl2p(text.get('value') or ''))
            else:
                text = text.get('value') or ''

            # no text? continue
            if not text.strip():
                continue

            # get the pub date and updated date. This is rather complex
            # because different feeds do different stuff
            pub_date = entry.get('published_parsed') or \
                       entry.get('created_parsed') or \
                       entry.get('date_parsed')
            updated = entry.get('updated_parsed') or pub_date
            pub_date = pub_date or updated

            # if we don't have a pub_date we skip.
            if not pub_date:
                continue

            # convert the time tuples to datetime objects.
            pub_date = datetime(*pub_date[:6])
            updated = datetime(*updated[:6])
            if old_entry and updated <= old_entry.last_update:
                continue

            # create a new entry object based on the data collected or
            # update the old one.
            entry = old_entry or Entry()
            entry.blog = blog
            entry.guid = guid
            entry.title = title
            entry.url = url
            entry.text = text
            entry.pub_date = pub_date
            entry.last_update = updated
            session.add(entry)

    session.commit()
Example #5
0
def sync():
    """
    Performs a synchronization. Articles that are already syncronized aren't
    touched anymore.
    """
    for blog in Blog.query.all():
        # parse the feed. feedparser.parse will never given an exception
        # but the bozo bit might be defined.
        feed = feedparser.parse(blog.feed_url)
        blog_author = feed.get('author') or blog.name
        blog_author_detail = feed.get('author_detail')

        for entry in feed.entries:
            # get the guid. either the id if specified, otherwise the link.
            # if none is available we skip the entry.
            guid = entry.get('id') or entry.get('link')
            if not guid:
                continue

            # get an old entry for the guid to check if we need to update
            # or recreate the item
            old_entry = Entry.query.filter_by(guid=guid).first()

            # get title, url and text. skip if no title or no text is
            # given. if the link is missing we use the blog link.
            if 'title_detail' in entry:
                title = entry.title_detail.get('value') or ''
                if entry.title_detail.get('type') in HTML_MIMETYPES:
                    title = strip_tags(title)
                else:
                    title = escape(title)
            else:
                title = entry.get('title')
            url = entry.get('link') or blog.blog_url
            text = 'content' in entry and entry.content[0] or \
                   entry.get('summary_detail')

            if not title or not text:
                continue

            # if we have an html text we use that, otherwise we HTML
            # escape the text and use that one. We also handle XHTML
            # with our tag soup parser for the moment.
            if text.get('type') not in HTML_MIMETYPES:
                text = escape(nl2p(text.get('value') or ''))
            else:
                text = text.get('value') or ''

            # no text? continue
            if not text.strip():
                continue

            # get the pub date and updated date. This is rather complex
            # because different feeds do different stuff
            pub_date = entry.get('published_parsed') or \
                       entry.get('created_parsed') or \
                       entry.get('date_parsed')
            updated = entry.get('updated_parsed') or pub_date
            pub_date = pub_date or updated

            # if we don't have a pub_date we skip.
            if not pub_date:
                continue

            # convert the time tuples to datetime objects.
            pub_date = datetime(*pub_date[:6])
            updated = datetime(*updated[:6])
            if old_entry and updated <= old_entry.last_update:
                continue

            # create a new entry object based on the data collected or
            # update the old one.
            entry = old_entry or Entry()
            entry.blog = blog
            entry.guid = guid
            entry.title = title
            entry.url = url
            entry.text = text
            entry.pub_date = pub_date
            entry.last_update = updated
            session.add(entry)

    session.commit()