Exemple #1
0
            if not title:
                title = url
            article = Article(
                url=url,
                title=title,
            )
            if not "://" in article.url:
                article.url = "http://" + article.url
            commit_to_feed(feed, article)
            log("%s: %s/%s: Stored %s, reference to %s (%s)" % \
             (feed.key.name, feed.group.name, feed.name, article.uid, url, document.headers['content-type']))
            return

    # Document parsing.
    try:
        article_content = parser.extract_body(document.text)
        summary = parser.summarise(article_content)
    except Exception, e:
        log("%s: %s: Error parsing %s: %s" %
            (feed.key.name, feed.group.name, url, e.message))
        return

    if not title:
        title = parser.extract_title(document.text)

    # Initial article object
    article = Article(url=url, title=title, summary=summary)

    if not "://" in article.url:
        article.url = "http://" + article.url
Exemple #2
0
            if not title:
                title = url
            article = Article(
                url=url,
                title=title,
            )
            if not "://" in article.url:
                article.url = "http://" + article.url
            commit_to_feed(feed, article)
            log("%s: %s/%s: Stored %s, reference to %s (%s)" % \
                (feed.key.name, feed.group.name, feed.name, article.uid, url, document.headers['content-type']))
            return

    # Document parsing.
    try:
        article_content = parser.extract_body(document.text)
        summary = parser.summarise(article_content)
    except Exception, e:
        log("%s: %s: Error parsing %s: %s" %
            (feed.key.name, feed.group.name, url, e.message))
        return

    # Ensure a title and disregard dupes
    if not title:
        title = parser.extract_title(document.text)

    if app.config['NO_DUPLICATE_TITLES']:
        if Article.query.filter(
                and_(Article.title == title, Article.key == feed.key)).first():
            return
Exemple #3
0
			if not title:
				title = url
			article = Article(
				url=url,
				title=title,
			)
			if not "://" in article.url:
				article.url = "http://" + article.url
			commit_to_feed(feed, article)
			log("%s: %s/%s: Stored %s, reference to %s (%s)" % \
				(feed.key.name, feed.group.name, feed.name, article.uid, url, document.headers['content-type']))
			return

	# Document parsing.
	try:
		article_content = parser.extract_body(document.text)
		summary      = parser.summarise(article_content)
	except Exception, e:
		log("%s: %s: Error parsing %s: %s" % (feed.key.name, feed.group.name, url, e.message))
		return

	if not title:
		title = parser.extract_title(document.text)

	# Initial article object
	article = Article(
		url=url,
		title=title,
		summary=summary
	)
Exemple #4
0
            if not title:
                title = url
            article = Article(
                url=url,
                title=title,
            )
            if not "://" in article.url:
                article.url = "http://" + article.url
            commit_to_feed(feed, article)
            log("%s: %s/%s: Stored %s, reference to %s (%s)" % \
                (feed.key.name, feed.group.name, feed.name, article.uid, url, document.headers['content-type']))
            return

    # Document parsing.
    try:
        article_content = parser.extract_body(document.text)
        summary      = parser.summarise(article_content)
    except Exception, e:
        log("%s: %s: Error parsing %s: %s" % (feed.key.name, feed.group.name, url, e.message))
        return

    # Ensure a title and disregard dupes
    if not title:
        title = parser.extract_title(document.text)

    if app.config['NO_DUPLICATE_TITLES']:
        if Article.query.filter(
            and_(Article.title == title, Article.key == feed.key)
        ).first():
            return