Python NewsProvider Examples

Programming Language: Python

Namespace/Package Name: models.news

Class/Type: NewsProvider

Examples at hotexamples.com: 5

Python NewsProvider - 5 examples found. These are the top rated real world Python examples of models.news.NewsProvider extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

NewsProvider(3)

create(1)

exists_by_news_id(1)

Example #1

Show file

File: news.py Project: projectweekend/movement

 def __init__(self):
     Scraper.__init__(self)
     self.url = "https://berniesanders.com/news/"
     self.html = HTMLParser()
     self.news_provider = NewsProvider()

Example #2

Show file

File: news.py Project: projectweekend/movement

class NewsScraper(Scraper):

    def __init__(self):
        Scraper.__init__(self)
        self.url = "https://berniesanders.com/news/"
        self.html = HTMLParser()
        self.news_provider = NewsProvider()

    def retrieve_article(self, url):
        for x in range(3):
            r = requests.get(url)
            if "https://berniesanders.com" not in r.url:
                return r.url, False, False
            if r.status_code == 200:
                soup = BeautifulSoup(r.text)
                soup = self.sanitize_soup(soup)
                image = soup.find('meta', {'property': 'og:image'})['content']
                content = soup.article
                paragraphs = [self.html.unescape(self.replace_with_newlines(p))
                              for p in content.findAll("p")]
                text = "\n\n".join(paragraphs)
                html = "".join([str(p) for p in content.findAll("p")])
                return text, html, image
        return False, False, False

    def go(self):
        soup = self.get(self.url)
        content = soup.find("section", {"id": "content"})
        for article in content.findAll("article"):
            rec = {
                "news_id": article['id'],
                "image_url": "",
                "timestamp_publish": parser.parse(article.time["datetime"]),
                "site": "berniesanders.com",
                "lang": "en",
                "title": self.html.unescape(article.h2.text),
                "news_category": self.html.unescape(article.h1.string.strip()),
                "url": article.h2.a["href"]
            }
            if article.img is not None:
                rec["image_url"] = article.img["src"]

            # Pull excerpt if available
            try:
                rec["excerpt_html"] = str(article.p)
                rec["excerpt"] = self.html.unescape(article.p.text)
            except AttributeError:
                rec["excerpt"], rec["excerpt_html"] = "", ""

            # Determine Type
            if rec['news_category'].lower() in ["on the road", "news"]:
                rec['news_type'] = "News"
            elif rec['news_category'].lower() == "press release":
                rec['news_type'] = "PressRelease"
            else:
                rec['news_type'] = "Unknown"

            text, html, image = self.retrieve_article(rec["url"])
            if text and not html:
                rec["body"], rec["body_html"] = text, text
                rec['news_type'] = "ExternalLink"
                rec["body_html_nostyle"] = ""
            elif text and html:
                rec["body"], rec["body_html"] = text, html

                no_style = self.remove_style(BeautifulSoup(html))
                rec["body_html_nostyle"] = "".join([str(p) for p in no_style.findAll("p")])

                try:
                    article["image_url"]
                except KeyError:
                    article["image_url"] = image

            msg = ""
            if self.news_provider.exists_by_news_id(rec["news_id"]):
                print "found"
            else:
                print "not found"
                msg = "Inserting '{0}', created {1}"
                self.news_provider.create(rec)

            logging.info(msg.format(
                rec["title"].encode("utf8"),
                str(rec["timestamp_publish"])
            ))

Example #3

Show file

 def __init__(self, url):
     Scraper.__init__(self)
     self.url = url
     self.html = HTMLParser()
     self.news_provider = NewsProvider()
     self.push_provider = PushProvider()

Example #4

Show file

@app.route('/issue/<uuid:issue_uuid>', methods=['GET', 'POST'])
@auth.login_required
def issue_detail(issue_uuid):
    issue = issue_provider.read(issue_uuid)
    updated = False
    if request.method == 'POST' and issue_provider.update(issue, request):
        updated = True
    return render_template('issue.html', issue=issue, updated=updated)


if __name__ == '__main__':
    try:
        with open('/opt/bernie/config.yml', 'r') as f:
            conf = yaml.load(f)['flask']
    except IOError:
        msg = "Could not open config file: {0}"
        logging.info(msg.format(self.configfile))
        raise
    else:
        event_provider = EventProvider()
        issue_provider = IssueProvider()
        video_provider = VideoProvider()
        article_provider = ArticleProvider()
        news_provider = NewsProvider()
        push_provider = PushProvider()
        users = {conf['httpauth_username']: conf['httpauth_password']}
        app.run(host=conf['host'], debug=conf['debug'])
        register(conf['parse_application_id'], conf['parse_rest_api_key'],
                 conf['parse_master_key'])
        #Push.message("Good morning", channels=["Mike Testing"])

Example #5

Show file

File: news.py Project: Bernie-2016/Connect-Sharknado

 def __init__(self, url):
     Scraper.__init__(self)
     self.url = url
     self.html = HTMLParser()
     self.news_provider = NewsProvider()
     self.push_provider = PushProvider()